#imports
import pandas as pd
import numpy as np
#read CSVs - CSVs were already cleaned via regex using Notepad++
customerdemand = pd.read_csv('Customer_Demand.csv', index_col=False)
customerdemand = customerdemand.drop_duplicates()
weightvolume = pd.read_csv('Sku_weight_volume.csv', index_col=False)
weightvolume = weightvolume.drop_duplicates()
#rename fields so they use consistent cases for ease of us later
weightvolume = weightvolume.rename(index=str, columns={'Weight': 'weight', 'Cubic': 'cubic'})
#check
customerdemand.shape + weightvolume.shape
(1264461, 4, 55799, 4)
#combine the two files
combine = weightvolume.merge(customerdemand, left_on='product', right_on='product', how='inner')
combine
product | weight | cubic | price | type | customer | quantity | |
---|---|---|---|---|---|---|---|
0 | 366149 | 3.36 | 13.44 | 4 | C | 10 | 1 |
1 | 366149 | 3.36 | 13.44 | 4 | C | 12 | 1 |
2 | 366149 | 3.36 | 13.44 | 4 | C | 24 | 2 |
3 | 366149 | 3.36 | 13.44 | 4 | C | 100 | 2 |
4 | 366149 | 3.36 | 13.44 | 4 | C | 104 | 1 |
5 | 366149 | 3.36 | 13.44 | 4 | C | 112 | 1 |
6 | 366149 | 3.36 | 13.44 | 4 | C | 119 | 1 |
7 | 366149 | 3.36 | 13.44 | 4 | C | 120 | 1 |
8 | 366149 | 3.36 | 13.44 | 4 | C | 129 | 1 |
9 | 366150 | 3.36 | 6.72 | 3 | C | 14 | 7 |
10 | 366150 | 3.36 | 6.72 | 3 | C | 15 | 1 |
11 | 366150 | 3.36 | 6.72 | 3 | C | 19 | 1 |
12 | 366150 | 3.36 | 6.72 | 3 | C | 21 | 1 |
13 | 366150 | 3.36 | 6.72 | 3 | C | 60 | 12 |
14 | 366150 | 3.36 | 6.72 | 3 | C | 69 | 1 |
15 | 366150 | 3.36 | 6.72 | 3 | C | 70 | 2 |
16 | 366150 | 3.36 | 6.72 | 3 | C | 78 | 1 |
17 | 366150 | 3.36 | 6.72 | 3 | C | 100 | 17 |
18 | 366150 | 3.36 | 6.72 | 3 | C | 113 | 1 |
19 | 366150 | 3.36 | 6.72 | 3 | C | 125 | 4 |
20 | 366150 | 3.36 | 6.72 | 3 | C | 129 | 1 |
21 | 366150 | 3.36 | 6.72 | 3 | C | 140 | 1 |
22 | 366151 | 2.56 | 12.80 | 4 | C | 17 | 2 |
23 | 366151 | 2.56 | 12.80 | 4 | C | 18 | 2 |
24 | 366151 | 2.56 | 12.80 | 4 | C | 19 | 5 |
25 | 366151 | 2.56 | 12.80 | 4 | C | 20 | 1 |
26 | 366151 | 2.56 | 12.80 | 4 | C | 21 | 4 |
27 | 366151 | 2.56 | 12.80 | 4 | C | 22 | 3 |
28 | 366151 | 2.56 | 12.80 | 4 | C | 23 | 1 |
29 | 366151 | 2.56 | 12.80 | 4 | C | 24 | 3 |
... | ... | ... | ... | ... | ... | ... | ... |
1244158 | 980675 | 4.96 | 9.92 | 6 | C | 106 | 4 |
1244159 | 980675 | 4.96 | 9.92 | 6 | C | 109 | 1 |
1244160 | 980675 | 4.96 | 9.92 | 6 | C | 112 | 35 |
1244161 | 980675 | 4.96 | 9.92 | 6 | C | 114 | 3 |
1244162 | 980675 | 4.96 | 9.92 | 6 | C | 115 | 4 |
1244163 | 980675 | 4.96 | 9.92 | 6 | C | 117 | 1 |
1244164 | 980675 | 4.96 | 9.92 | 6 | C | 121 | 2 |
1244165 | 980675 | 4.96 | 9.92 | 6 | C | 122 | 6 |
1244166 | 980675 | 4.96 | 9.92 | 6 | C | 128 | 1 |
1244167 | 980675 | 4.96 | 9.92 | 6 | C | 188 | 1 |
1244168 | 980676 | 4.96 | 9.92 | 9 | C | 20 | 2 |
1244169 | 980676 | 4.96 | 9.92 | 9 | C | 21 | 2 |
1244170 | 980676 | 4.96 | 9.92 | 9 | C | 24 | 5 |
1244171 | 980676 | 4.96 | 9.92 | 9 | C | 33 | 2 |
1244172 | 980676 | 4.96 | 9.92 | 9 | C | 64 | 1 |
1244173 | 980676 | 4.96 | 9.92 | 9 | C | 66 | 1 |
1244174 | 980676 | 4.96 | 9.92 | 9 | C | 68 | 1 |
1244175 | 980676 | 4.96 | 9.92 | 9 | C | 104 | 4 |
1244176 | 980676 | 4.96 | 9.92 | 9 | C | 105 | 6 |
1244177 | 980676 | 4.96 | 9.92 | 9 | C | 106 | 4 |
1244178 | 980676 | 4.96 | 9.92 | 9 | C | 108 | 1 |
1244179 | 980676 | 4.96 | 9.92 | 9 | C | 111 | 1 |
1244180 | 980676 | 4.96 | 9.92 | 9 | C | 112 | 31 |
1244181 | 980676 | 4.96 | 9.92 | 9 | C | 115 | 3 |
1244182 | 980676 | 4.96 | 9.92 | 9 | C | 117 | 12 |
1244183 | 980676 | 4.96 | 9.92 | 9 | C | 119 | 1 |
1244184 | 980676 | 4.96 | 9.92 | 9 | C | 123 | 1 |
1244185 | 980676 | 4.96 | 9.92 | 9 | C | 137 | 1 |
1244186 | 980676 | 4.96 | 9.92 | 9 | C | 139 | 1 |
1244187 | 980676 | 4.96 | 9.92 | 9 | C | 148 | 1 |
1244188 rows × 7 columns
#don't think we need these anymore since I changed to inner join from outer join
filtered = combine[(combine['customer'] != 0) & (combine['customer'].notnull())
& (combine['weight'] >= 0) & (combine['weight'].notnull())
& (combine['cubic'] >= 0) & (combine['cubic'].notnull())
& (combine['price'] >= 0) & (combine['price'].notnull())]
#if these are the same size then we don't need the above filters
filtered.shape + combine.shape
(1244188, 7, 1244188, 7)
#so we didn't need the above filters!
#sum at the customer-weight level
groupedby = filtered.groupby(['customer','weight'], as_index=False).sum()
#note that this sum renders the product number (SKU) meaningless
groupedby
customer | weight | product | cubic | price | quantity | |
---|---|---|---|---|---|---|
0 | 10 | 0.01 | 4168349 | 0.24 | 18 | 20 |
1 | 10 | 0.03 | 1742061 | 0.30 | 17 | 2 |
2 | 10 | 0.04 | 1608729 | 0.40 | 10 | 5 |
3 | 10 | 0.08 | 2648522 | 1.84 | 17 | 57 |
4 | 10 | 0.09 | 897041 | 0.81 | 12 | 40 |
5 | 10 | 0.10 | 5776874 | 5.20 | 53 | 90 |
6 | 10 | 0.11 | 785376 | 0.88 | 12 | 10 |
7 | 10 | 0.12 | 403157 | 0.60 | 6 | 1 |
8 | 10 | 0.14 | 1378222 | 2.10 | 13 | 52 |
9 | 10 | 0.15 | 948349 | 0.90 | 8 | 8 |
10 | 10 | 0.16 | 27809949 | 32.64 | 268 | 1303 |
11 | 10 | 0.19 | 1787431 | 5.32 | 26 | 117 |
12 | 10 | 0.20 | 647951 | 1.00 | 10 | 4 |
13 | 10 | 0.21 | 716076 | 3.36 | 17 | 48 |
14 | 10 | 0.22 | 318931 | 1.32 | 1 | 21 |
15 | 10 | 0.23 | 1628250 | 1.84 | 12 | 49 |
16 | 10 | 0.24 | 2177311 | 5.76 | 32 | 141 |
17 | 10 | 0.25 | 513817 | 1.50 | 2 | 6 |
18 | 10 | 0.26 | 631372 | 1.04 | 6 | 92 |
19 | 10 | 0.28 | 685686 | 0.56 | 3 | 12 |
20 | 10 | 0.30 | 1061037 | 2.40 | 5 | 16 |
21 | 10 | 0.32 | 37294479 | 87.04 | 353 | 852 |
22 | 10 | 0.33 | 817225 | 1.32 | 6 | 1 |
23 | 10 | 0.34 | 793178 | 2.04 | 5 | 1 |
24 | 10 | 0.35 | 5418488 | 12.25 | 51 | 99 |
25 | 10 | 0.38 | 913713 | 2.28 | 7 | 2 |
26 | 10 | 0.40 | 5545261 | 17.20 | 48 | 187 |
27 | 10 | 0.41 | 817321 | 0.82 | 3 | 2 |
28 | 10 | 0.42 | 884313 | 2.10 | 7 | 19 |
29 | 10 | 0.46 | 223917 | 0.92 | 3 | 8 |
... | ... | ... | ... | ... | ... | ... |
236483 | 188 | 1222.40 | 756974 | 3667.20 | 8 | 1 |
236484 | 188 | 1232.00 | 1369049 | 9856.00 | 4 | 7 |
236485 | 188 | 1248.00 | 863231 | 3744.00 | 3 | 1 |
236486 | 188 | 1304.00 | 495795 | 2608.00 | 4 | 2 |
236487 | 188 | 1312.00 | 374861 | 3936.00 | 7 | 3 |
236488 | 188 | 1344.00 | 328481 | 4032.00 | 2 | 1 |
236489 | 188 | 1360.00 | 470383 | 8160.00 | 6 | 2 |
236490 | 188 | 1428.00 | 807666 | 2856.00 | 4 | 1 |
236491 | 188 | 1440.00 | 495800 | 4320.00 | 5 | 1 |
236492 | 188 | 1480.00 | 661599 | 2960.00 | 7 | 2 |
236493 | 188 | 1552.00 | 368568 | 3104.00 | 2 | 1 |
236494 | 188 | 1568.00 | 1173562 | 10976.00 | 11 | 5 |
236495 | 188 | 1584.00 | 637051 | 3168.00 | 2 | 3 |
236496 | 188 | 1640.00 | 663887 | 4920.00 | 7 | 1 |
236497 | 188 | 1664.00 | 368562 | 4992.00 | 9 | 5 |
236498 | 188 | 1710.72 | 917218 | 5132.16 | 2 | 1 |
236499 | 188 | 1760.00 | 661644 | 8800.00 | 8 | 2 |
236500 | 188 | 1788.64 | 918495 | 8943.20 | 3 | 1 |
236501 | 188 | 1792.00 | 459719 | 3584.00 | 7 | 1 |
236502 | 188 | 1808.00 | 1740793 | 19888.00 | 4 | 2 |
236503 | 188 | 1904.00 | 718416 | 5712.00 | 6 | 1 |
236504 | 188 | 2000.00 | 1435091 | 16000.00 | 9 | 3 |
236505 | 188 | 2018.07 | 918809 | 8072.28 | 9 | 2 |
236506 | 188 | 2018.08 | 918491 | 10090.40 | 5 | 2 |
236507 | 188 | 2160.00 | 611911 | 6480.00 | 7 | 1 |
236508 | 188 | 2960.00 | 868972 | 5920.00 | 8 | 17 |
236509 | 188 | 3024.00 | 611910 | 6048.00 | 5 | 1 |
236510 | 188 | 4240.00 | 597296 | 12720.00 | 7 | 1 |
236511 | 188 | 4384.00 | 794990 | 26304.00 | 5 | 1 |
236512 | 190 | 0.50 | 606486 | 2.00 | 9 | 1 |
236513 rows × 6 columns
#presented chapter 6
chapter = 6
#add weight * chapter number to dataframe
assigned = groupedby.assign(wtxch = groupedby['weight']*chapter)
#conditional assign of Kp to dataframe
assigned = assigned.assign(kp = 8) # >= 900 lbs
assigned.loc[assigned['wtxch'] < 900, 'kp'] = 16
assigned.loc[assigned['wtxch'] < 300, 'kp'] = 23
assigned.loc[assigned['wtxch'] < 60, 'kp'] = 28
assigned.loc[assigned['wtxch'] < 12, 'kp'] = 32
assigned
customer | weight | product | cubic | price | quantity | wtxch | kp | |
---|---|---|---|---|---|---|---|---|
0 | 10 | 0.01 | 4168349 | 0.24 | 18 | 20 | 0.06 | 32 |
1 | 10 | 0.03 | 1742061 | 0.30 | 17 | 2 | 0.18 | 32 |
2 | 10 | 0.04 | 1608729 | 0.40 | 10 | 5 | 0.24 | 32 |
3 | 10 | 0.08 | 2648522 | 1.84 | 17 | 57 | 0.48 | 32 |
4 | 10 | 0.09 | 897041 | 0.81 | 12 | 40 | 0.54 | 32 |
5 | 10 | 0.10 | 5776874 | 5.20 | 53 | 90 | 0.60 | 32 |
6 | 10 | 0.11 | 785376 | 0.88 | 12 | 10 | 0.66 | 32 |
7 | 10 | 0.12 | 403157 | 0.60 | 6 | 1 | 0.72 | 32 |
8 | 10 | 0.14 | 1378222 | 2.10 | 13 | 52 | 0.84 | 32 |
9 | 10 | 0.15 | 948349 | 0.90 | 8 | 8 | 0.90 | 32 |
10 | 10 | 0.16 | 27809949 | 32.64 | 268 | 1303 | 0.96 | 32 |
11 | 10 | 0.19 | 1787431 | 5.32 | 26 | 117 | 1.14 | 32 |
12 | 10 | 0.20 | 647951 | 1.00 | 10 | 4 | 1.20 | 32 |
13 | 10 | 0.21 | 716076 | 3.36 | 17 | 48 | 1.26 | 32 |
14 | 10 | 0.22 | 318931 | 1.32 | 1 | 21 | 1.32 | 32 |
15 | 10 | 0.23 | 1628250 | 1.84 | 12 | 49 | 1.38 | 32 |
16 | 10 | 0.24 | 2177311 | 5.76 | 32 | 141 | 1.44 | 32 |
17 | 10 | 0.25 | 513817 | 1.50 | 2 | 6 | 1.50 | 32 |
18 | 10 | 0.26 | 631372 | 1.04 | 6 | 92 | 1.56 | 32 |
19 | 10 | 0.28 | 685686 | 0.56 | 3 | 12 | 1.68 | 32 |
20 | 10 | 0.30 | 1061037 | 2.40 | 5 | 16 | 1.80 | 32 |
21 | 10 | 0.32 | 37294479 | 87.04 | 353 | 852 | 1.92 | 32 |
22 | 10 | 0.33 | 817225 | 1.32 | 6 | 1 | 1.98 | 32 |
23 | 10 | 0.34 | 793178 | 2.04 | 5 | 1 | 2.04 | 32 |
24 | 10 | 0.35 | 5418488 | 12.25 | 51 | 99 | 2.10 | 32 |
25 | 10 | 0.38 | 913713 | 2.28 | 7 | 2 | 2.28 | 32 |
26 | 10 | 0.40 | 5545261 | 17.20 | 48 | 187 | 2.40 | 32 |
27 | 10 | 0.41 | 817321 | 0.82 | 3 | 2 | 2.46 | 32 |
28 | 10 | 0.42 | 884313 | 2.10 | 7 | 19 | 2.52 | 32 |
29 | 10 | 0.46 | 223917 | 0.92 | 3 | 8 | 2.76 | 32 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
236483 | 188 | 1222.40 | 756974 | 3667.20 | 8 | 1 | 7334.40 | 8 |
236484 | 188 | 1232.00 | 1369049 | 9856.00 | 4 | 7 | 7392.00 | 8 |
236485 | 188 | 1248.00 | 863231 | 3744.00 | 3 | 1 | 7488.00 | 8 |
236486 | 188 | 1304.00 | 495795 | 2608.00 | 4 | 2 | 7824.00 | 8 |
236487 | 188 | 1312.00 | 374861 | 3936.00 | 7 | 3 | 7872.00 | 8 |
236488 | 188 | 1344.00 | 328481 | 4032.00 | 2 | 1 | 8064.00 | 8 |
236489 | 188 | 1360.00 | 470383 | 8160.00 | 6 | 2 | 8160.00 | 8 |
236490 | 188 | 1428.00 | 807666 | 2856.00 | 4 | 1 | 8568.00 | 8 |
236491 | 188 | 1440.00 | 495800 | 4320.00 | 5 | 1 | 8640.00 | 8 |
236492 | 188 | 1480.00 | 661599 | 2960.00 | 7 | 2 | 8880.00 | 8 |
236493 | 188 | 1552.00 | 368568 | 3104.00 | 2 | 1 | 9312.00 | 8 |
236494 | 188 | 1568.00 | 1173562 | 10976.00 | 11 | 5 | 9408.00 | 8 |
236495 | 188 | 1584.00 | 637051 | 3168.00 | 2 | 3 | 9504.00 | 8 |
236496 | 188 | 1640.00 | 663887 | 4920.00 | 7 | 1 | 9840.00 | 8 |
236497 | 188 | 1664.00 | 368562 | 4992.00 | 9 | 5 | 9984.00 | 8 |
236498 | 188 | 1710.72 | 917218 | 5132.16 | 2 | 1 | 10264.32 | 8 |
236499 | 188 | 1760.00 | 661644 | 8800.00 | 8 | 2 | 10560.00 | 8 |
236500 | 188 | 1788.64 | 918495 | 8943.20 | 3 | 1 | 10731.84 | 8 |
236501 | 188 | 1792.00 | 459719 | 3584.00 | 7 | 1 | 10752.00 | 8 |
236502 | 188 | 1808.00 | 1740793 | 19888.00 | 4 | 2 | 10848.00 | 8 |
236503 | 188 | 1904.00 | 718416 | 5712.00 | 6 | 1 | 11424.00 | 8 |
236504 | 188 | 2000.00 | 1435091 | 16000.00 | 9 | 3 | 12000.00 | 8 |
236505 | 188 | 2018.07 | 918809 | 8072.28 | 9 | 2 | 12108.42 | 8 |
236506 | 188 | 2018.08 | 918491 | 10090.40 | 5 | 2 | 12108.48 | 8 |
236507 | 188 | 2160.00 | 611911 | 6480.00 | 7 | 1 | 12960.00 | 8 |
236508 | 188 | 2960.00 | 868972 | 5920.00 | 8 | 17 | 17760.00 | 8 |
236509 | 188 | 3024.00 | 611910 | 6048.00 | 5 | 1 | 18144.00 | 8 |
236510 | 188 | 4240.00 | 597296 | 12720.00 | 7 | 1 | 25440.00 | 8 |
236511 | 188 | 4384.00 | 794990 | 26304.00 | 5 | 1 | 26304.00 | 8 |
236512 | 190 | 0.50 | 606486 | 2.00 | 9 | 1 | 3.00 | 32 |
236513 rows × 8 columns
#now export the above table into CSV
assigned.to_csv(path_or_buf='groupbyexport.csv', index=False)
#let me know when export is complete
print("complete")
complete