In [131]:
#imports
import pandas as pd
import numpy as np

#read CSVs - CSVs were already cleaned via regex using Notepad++
customerdemand = pd.read_csv('Customer_Demand.csv', index_col=False)
customerdemand = customerdemand.drop_duplicates()

weightvolume = pd.read_csv('Sku_weight_volume.csv', index_col=False)
weightvolume = weightvolume.drop_duplicates()

#rename fields so they use consistent cases for ease of us later
weightvolume = weightvolume.rename(index=str, columns={'Weight': 'weight', 'Cubic': 'cubic'})

#check
customerdemand.shape + weightvolume.shape
Out[131]:
(1264461, 4, 55799, 4)
In [132]:
#combine the two files
combine = weightvolume.merge(customerdemand, left_on='product', right_on='product', how='inner')
combine
Out[132]:
product weight cubic price type customer quantity
0 366149 3.36 13.44 4 C 10 1
1 366149 3.36 13.44 4 C 12 1
2 366149 3.36 13.44 4 C 24 2
3 366149 3.36 13.44 4 C 100 2
4 366149 3.36 13.44 4 C 104 1
5 366149 3.36 13.44 4 C 112 1
6 366149 3.36 13.44 4 C 119 1
7 366149 3.36 13.44 4 C 120 1
8 366149 3.36 13.44 4 C 129 1
9 366150 3.36 6.72 3 C 14 7
10 366150 3.36 6.72 3 C 15 1
11 366150 3.36 6.72 3 C 19 1
12 366150 3.36 6.72 3 C 21 1
13 366150 3.36 6.72 3 C 60 12
14 366150 3.36 6.72 3 C 69 1
15 366150 3.36 6.72 3 C 70 2
16 366150 3.36 6.72 3 C 78 1
17 366150 3.36 6.72 3 C 100 17
18 366150 3.36 6.72 3 C 113 1
19 366150 3.36 6.72 3 C 125 4
20 366150 3.36 6.72 3 C 129 1
21 366150 3.36 6.72 3 C 140 1
22 366151 2.56 12.80 4 C 17 2
23 366151 2.56 12.80 4 C 18 2
24 366151 2.56 12.80 4 C 19 5
25 366151 2.56 12.80 4 C 20 1
26 366151 2.56 12.80 4 C 21 4
27 366151 2.56 12.80 4 C 22 3
28 366151 2.56 12.80 4 C 23 1
29 366151 2.56 12.80 4 C 24 3
... ... ... ... ... ... ... ...
1244158 980675 4.96 9.92 6 C 106 4
1244159 980675 4.96 9.92 6 C 109 1
1244160 980675 4.96 9.92 6 C 112 35
1244161 980675 4.96 9.92 6 C 114 3
1244162 980675 4.96 9.92 6 C 115 4
1244163 980675 4.96 9.92 6 C 117 1
1244164 980675 4.96 9.92 6 C 121 2
1244165 980675 4.96 9.92 6 C 122 6
1244166 980675 4.96 9.92 6 C 128 1
1244167 980675 4.96 9.92 6 C 188 1
1244168 980676 4.96 9.92 9 C 20 2
1244169 980676 4.96 9.92 9 C 21 2
1244170 980676 4.96 9.92 9 C 24 5
1244171 980676 4.96 9.92 9 C 33 2
1244172 980676 4.96 9.92 9 C 64 1
1244173 980676 4.96 9.92 9 C 66 1
1244174 980676 4.96 9.92 9 C 68 1
1244175 980676 4.96 9.92 9 C 104 4
1244176 980676 4.96 9.92 9 C 105 6
1244177 980676 4.96 9.92 9 C 106 4
1244178 980676 4.96 9.92 9 C 108 1
1244179 980676 4.96 9.92 9 C 111 1
1244180 980676 4.96 9.92 9 C 112 31
1244181 980676 4.96 9.92 9 C 115 3
1244182 980676 4.96 9.92 9 C 117 12
1244183 980676 4.96 9.92 9 C 119 1
1244184 980676 4.96 9.92 9 C 123 1
1244185 980676 4.96 9.92 9 C 137 1
1244186 980676 4.96 9.92 9 C 139 1
1244187 980676 4.96 9.92 9 C 148 1

1244188 rows × 7 columns

In [133]:
#don't think we need these anymore since I changed to inner join from outer join
filtered = combine[(combine['customer'] != 0) & (combine['customer'].notnull())
                    & (combine['weight'] >= 0) & (combine['weight'].notnull())
                    & (combine['cubic'] >= 0) & (combine['cubic'].notnull())
                    & (combine['price'] >= 0) & (combine['price'].notnull())]

#if these are the same size then we don't need the above filters
filtered.shape + combine.shape
Out[133]:
(1244188, 7, 1244188, 7)
In [134]:
#so we didn't need the above filters!
In [135]:
#sum at the customer-weight level
groupedby = filtered.groupby(['customer','weight'], as_index=False).sum()
    #note that this sum renders the product number (SKU) meaningless

groupedby
Out[135]:
customer weight product cubic price quantity
0 10 0.01 4168349 0.24 18 20
1 10 0.03 1742061 0.30 17 2
2 10 0.04 1608729 0.40 10 5
3 10 0.08 2648522 1.84 17 57
4 10 0.09 897041 0.81 12 40
5 10 0.10 5776874 5.20 53 90
6 10 0.11 785376 0.88 12 10
7 10 0.12 403157 0.60 6 1
8 10 0.14 1378222 2.10 13 52
9 10 0.15 948349 0.90 8 8
10 10 0.16 27809949 32.64 268 1303
11 10 0.19 1787431 5.32 26 117
12 10 0.20 647951 1.00 10 4
13 10 0.21 716076 3.36 17 48
14 10 0.22 318931 1.32 1 21
15 10 0.23 1628250 1.84 12 49
16 10 0.24 2177311 5.76 32 141
17 10 0.25 513817 1.50 2 6
18 10 0.26 631372 1.04 6 92
19 10 0.28 685686 0.56 3 12
20 10 0.30 1061037 2.40 5 16
21 10 0.32 37294479 87.04 353 852
22 10 0.33 817225 1.32 6 1
23 10 0.34 793178 2.04 5 1
24 10 0.35 5418488 12.25 51 99
25 10 0.38 913713 2.28 7 2
26 10 0.40 5545261 17.20 48 187
27 10 0.41 817321 0.82 3 2
28 10 0.42 884313 2.10 7 19
29 10 0.46 223917 0.92 3 8
... ... ... ... ... ... ...
236483 188 1222.40 756974 3667.20 8 1
236484 188 1232.00 1369049 9856.00 4 7
236485 188 1248.00 863231 3744.00 3 1
236486 188 1304.00 495795 2608.00 4 2
236487 188 1312.00 374861 3936.00 7 3
236488 188 1344.00 328481 4032.00 2 1
236489 188 1360.00 470383 8160.00 6 2
236490 188 1428.00 807666 2856.00 4 1
236491 188 1440.00 495800 4320.00 5 1
236492 188 1480.00 661599 2960.00 7 2
236493 188 1552.00 368568 3104.00 2 1
236494 188 1568.00 1173562 10976.00 11 5
236495 188 1584.00 637051 3168.00 2 3
236496 188 1640.00 663887 4920.00 7 1
236497 188 1664.00 368562 4992.00 9 5
236498 188 1710.72 917218 5132.16 2 1
236499 188 1760.00 661644 8800.00 8 2
236500 188 1788.64 918495 8943.20 3 1
236501 188 1792.00 459719 3584.00 7 1
236502 188 1808.00 1740793 19888.00 4 2
236503 188 1904.00 718416 5712.00 6 1
236504 188 2000.00 1435091 16000.00 9 3
236505 188 2018.07 918809 8072.28 9 2
236506 188 2018.08 918491 10090.40 5 2
236507 188 2160.00 611911 6480.00 7 1
236508 188 2960.00 868972 5920.00 8 17
236509 188 3024.00 611910 6048.00 5 1
236510 188 4240.00 597296 12720.00 7 1
236511 188 4384.00 794990 26304.00 5 1
236512 190 0.50 606486 2.00 9 1

236513 rows × 6 columns

In [136]:
#presented chapter 6
chapter = 6

#add weight * chapter number to dataframe
assigned = groupedby.assign(wtxch = groupedby['weight']*chapter)

#conditional assign of Kp to dataframe
assigned = assigned.assign(kp = 8)  # >= 900 lbs
assigned.loc[assigned['wtxch'] < 900, 'kp'] = 16
assigned.loc[assigned['wtxch'] < 300, 'kp'] = 23
assigned.loc[assigned['wtxch'] < 60, 'kp'] = 28
assigned.loc[assigned['wtxch'] < 12, 'kp'] = 32

assigned
Out[136]:
customer weight product cubic price quantity wtxch kp
0 10 0.01 4168349 0.24 18 20 0.06 32
1 10 0.03 1742061 0.30 17 2 0.18 32
2 10 0.04 1608729 0.40 10 5 0.24 32
3 10 0.08 2648522 1.84 17 57 0.48 32
4 10 0.09 897041 0.81 12 40 0.54 32
5 10 0.10 5776874 5.20 53 90 0.60 32
6 10 0.11 785376 0.88 12 10 0.66 32
7 10 0.12 403157 0.60 6 1 0.72 32
8 10 0.14 1378222 2.10 13 52 0.84 32
9 10 0.15 948349 0.90 8 8 0.90 32
10 10 0.16 27809949 32.64 268 1303 0.96 32
11 10 0.19 1787431 5.32 26 117 1.14 32
12 10 0.20 647951 1.00 10 4 1.20 32
13 10 0.21 716076 3.36 17 48 1.26 32
14 10 0.22 318931 1.32 1 21 1.32 32
15 10 0.23 1628250 1.84 12 49 1.38 32
16 10 0.24 2177311 5.76 32 141 1.44 32
17 10 0.25 513817 1.50 2 6 1.50 32
18 10 0.26 631372 1.04 6 92 1.56 32
19 10 0.28 685686 0.56 3 12 1.68 32
20 10 0.30 1061037 2.40 5 16 1.80 32
21 10 0.32 37294479 87.04 353 852 1.92 32
22 10 0.33 817225 1.32 6 1 1.98 32
23 10 0.34 793178 2.04 5 1 2.04 32
24 10 0.35 5418488 12.25 51 99 2.10 32
25 10 0.38 913713 2.28 7 2 2.28 32
26 10 0.40 5545261 17.20 48 187 2.40 32
27 10 0.41 817321 0.82 3 2 2.46 32
28 10 0.42 884313 2.10 7 19 2.52 32
29 10 0.46 223917 0.92 3 8 2.76 32
... ... ... ... ... ... ... ... ...
236483 188 1222.40 756974 3667.20 8 1 7334.40 8
236484 188 1232.00 1369049 9856.00 4 7 7392.00 8
236485 188 1248.00 863231 3744.00 3 1 7488.00 8
236486 188 1304.00 495795 2608.00 4 2 7824.00 8
236487 188 1312.00 374861 3936.00 7 3 7872.00 8
236488 188 1344.00 328481 4032.00 2 1 8064.00 8
236489 188 1360.00 470383 8160.00 6 2 8160.00 8
236490 188 1428.00 807666 2856.00 4 1 8568.00 8
236491 188 1440.00 495800 4320.00 5 1 8640.00 8
236492 188 1480.00 661599 2960.00 7 2 8880.00 8
236493 188 1552.00 368568 3104.00 2 1 9312.00 8
236494 188 1568.00 1173562 10976.00 11 5 9408.00 8
236495 188 1584.00 637051 3168.00 2 3 9504.00 8
236496 188 1640.00 663887 4920.00 7 1 9840.00 8
236497 188 1664.00 368562 4992.00 9 5 9984.00 8
236498 188 1710.72 917218 5132.16 2 1 10264.32 8
236499 188 1760.00 661644 8800.00 8 2 10560.00 8
236500 188 1788.64 918495 8943.20 3 1 10731.84 8
236501 188 1792.00 459719 3584.00 7 1 10752.00 8
236502 188 1808.00 1740793 19888.00 4 2 10848.00 8
236503 188 1904.00 718416 5712.00 6 1 11424.00 8
236504 188 2000.00 1435091 16000.00 9 3 12000.00 8
236505 188 2018.07 918809 8072.28 9 2 12108.42 8
236506 188 2018.08 918491 10090.40 5 2 12108.48 8
236507 188 2160.00 611911 6480.00 7 1 12960.00 8
236508 188 2960.00 868972 5920.00 8 17 17760.00 8
236509 188 3024.00 611910 6048.00 5 1 18144.00 8
236510 188 4240.00 597296 12720.00 7 1 25440.00 8
236511 188 4384.00 794990 26304.00 5 1 26304.00 8
236512 190 0.50 606486 2.00 9 1 3.00 32

236513 rows × 8 columns

In [137]:
#now export the above table into CSV
assigned.to_csv(path_or_buf='groupbyexport.csv', index=False)

#let me know when export is complete
print("complete")
complete