#imports
import pandas as pd
import numpy as np
#read CSVs - CSVs were already cleaned via regex using Notepad++
customerdemand = pd.read_csv('Customer_Demand.csv', index_col=False)
customerdemand = customerdemand.drop_duplicates()
weightvolume = pd.read_csv('Sku_weight_volume.csv', index_col=False)
weightvolume = weightvolume.drop_duplicates()
#rename fields so they use consistent cases for ease of us later
weightvolume = weightvolume.rename(index=str, columns={'Weight': 'weight', 'Cubic': 'cubic'})
#check
customerdemand.shape + weightvolume.shape
#combine the two files
combine = weightvolume.merge(customerdemand, left_on='product', right_on='product', how='inner')
#don't think we need these anymore since I changed to inner join from outer join
filtered = combine[(combine['customer'] != 0) & (combine['customer'].notnull())
& (combine['weight'] >= 0) & (combine['weight'].notnull())
& (combine['cubic'] >= 0) & (combine['cubic'].notnull())
& (combine['price'] >= 0) & (combine['price'].notnull())]
#if these are the same size then we don't need the above filters
filtered.shape + combine.shape
#so we didn't need the above filters!
#we estimate revenue using piece price times quantity
filtered = filtered.assign(revenue = filtered['price']*filtered['quantity'])
#then sum up all revenue
filtered['revenue'].sum()