天池大賽o2o優惠券第一名程式碼解讀(3)
阿新 • • 發佈:2019-01-23
感謝大神!!!
#提取商品的特徵
#對於資料集3
merchant3 = feature3[['merchant_id','coupon_id','distance','date_received','date']]
t = merchant3[['merchant_id']]
#刪除重複行資料
t.drop_duplicates(inplace=True)
#顯示賣出的商品
t1 = merchant3[merchant3.date!='null'][['merchant_id']]
t1['total_sales'] = 1
#顯示每個商品的銷售數量
t1 = t1.groupby('merchant_id' ).agg('sum').reset_index()
#顯示使用了優惠券消費的商品,正樣本
t2 = merchant3[(merchant3.date!='null')&(merchant3.coupon_id!='null')][['merchant_id']]
t2['sales_use_coupon'] = 1
t2 = t2.groupby('merchant_id').agg('sum').reset_index()
#顯示了商品的優惠券的總數量
t3 = merchant3[merchant3.coupon_id != 'null'][['merchant_id' ]]
t3 ['total_coupon'] = 1
t3 = t3.groupby('merchant_id').agg('sum').reset_index()
#顯示商品銷量和距離的關係
t4 = merchant3[(merchant3.date != 'null')&(merchant3.coupon_id != 'null')][['merchant_id','distance']]
#把資料中的null值全部替換為-1
t4.replace('null',-1,inplace=True)
t4.distance = t4.distance.astype('int')
#再把資料中的-1全部替換為NaN
t4.replace(-1,np.nan,inplace=True)
#返回使用者離商品的距離最小值
t5 = t4.groupby('merchant_id').agg('min').reset_index()
t5.rename(columns={'distance':'merchant_min_distance'},inplace = True)
#返回使用者離商品的距離最大值
t6 = t4.groupby('merchant_id').agg('max').reset_index()
t6.rename(columns={'distance':'merchant_max_distance'},inplace = True)
#print(t6)
#返回距離的平均值
t7 = t4.groupby('merchant_id').agg('mean').reset_index()
t7.rename(columns = {'distance':'merchant_mean_distance'},inplace= True)
#返回距離的中位值
t8 = t4.groupby('merchant_id').agg('median').reset_index()
t8.rename(columns={'distance':'merchant_median_distance'},inplace = True)
merchant3_feature = pd.merge(t,t1,on='merchant_id',how='left')
#print(merchant3_feature)
merchant3_feature = pd.merge(merchant3_feature,t2,on='merchant_id',how='left')
#print(merchant3_feature)
merchant3_feature = pd.merge(merchant3_feature,t3,on='merchant_id',how='left')
#print(merchant3_feature)
merchant3_feature = pd.merge(merchant3_feature,t5,on='merchant_id',how='left')
#print(merchant3_feature)
merchant3_feature = pd.merge(merchant3_feature,t6,on='merchant_id',how='left')
#print(merchant3_feature)
merchant3_feature = pd.merge(merchant3_feature,t7,on='merchant_id',how='left')
#print(merchant3_feature)
merchant3_feature = pd.merge(merchant3_feature,t8,on='merchant_id',how='left')
#print(merchant3_feature)
#將資料中的NaN用0來替換
merchant3_feature.sales_use_coupon = merchant3_feature.sales_use_coupon.replace(np.nan,0)
#即優惠券的使用率
merchant3_feature['merchant_coupon_transfer_rate'] = merchant3_feature.sales_use_coupon.astype('float')/merchant3_feature.total_coupon
#即賣出商品中使用優惠券的佔比
merchant3_feature['coupon_rate'] = merchant3_feature.sales_use_coupon.astype('float') / merchant3_feature.total_sales
#將資料中的NaN用0來替換
merchant3_feature.total_coupon = merchant3_feature.total_coupon.replace(np.nan,0)
merchant3_feature.to_csv('data/merchant3_feature.csv',index=None)