天池大賽o2o優惠券第一名程式碼解讀(2)
阿新 • • 發佈:2019-02-05
感謝!!!!
#提取優惠券的相關特徵
def calc_discount_rate(s):
s = str(s)
s = s.split(':')
if len(s) == 1:
return float(s[0])
else:
return 1.0-float(s[1])/float(s[0])
def get_discount_man(s):
s = str(s)
s = s.split(':')
if len(s)==1:
return 'null'
else:
return int(s[0])
def get_discount_jian(s):
s = str(s)
s = s.split(':')
if len(s) == 1:
return 'null'
else:
return int(s[1])
def is_man_jian(s):
s = str(s)
s = s.split(':')
if len(s)==1:
return 0
else:
return 1
#對於資料集3
#將時間轉化為第幾周
print(dataset3)
#顯示時間是第幾周
dataset3['day_of_week'] = dataset3.date_received.astype('str').apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1)
#顯示時間是幾月
dataset3['day_of_month'] = dataset3.date_received.astype('str').apply(lambda x:int(x[6:8]))
#顯示時期和截止日之間的天數
dataset3['days_distance'] = dataset3.date_received.astype('str' ).apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,6,30)).days)
#顯示滿了多少錢後開始減
dataset3['discount_man'] = dataset3.discount_rate.apply(get_discount_man)
#顯示滿減的減少的錢
dataset3['discount_jian'] = dataset3.discount_rate.apply(get_discount_jian)
#返回優惠券是否是滿減券
dataset3['is_man_jian'] = dataset3.discount_rate.apply(is_man_jian)
#顯示打折力度
dataset3['discount_rate'] = dataset3.discount_rate.apply(calc_discount_rate)
d = dataset3[['coupon_id']]
d['coupon_count'] = 1
#顯示每一種優惠券的數量
d = d.groupby('coupon_id').agg('sum').reset_index()
dataset3 = pd.merge(dataset3,d,on='coupon_id',how='left')
print(dataset3)
dataset3.to_csv('data/coupon3_feature.csv',index=None)
#對於資料集2
dataset2['day_of_week'] = dataset2.date_received.astype('str').apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1)
dataset2['day_of_month'] = dataset2.date_received.astype('str').apply(lambda x:int(x[6:8]))
dataset2['days_distance'] = dataset2.date_received.astype('str').apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,5,14)).days)
dataset2['discount_man'] = dataset2.discount_rate.apply(get_discount_man)
dataset2['discount_jian'] = dataset2.discount_rate.apply(get_discount_jian)
dataset2['is_man_jian'] = dataset2.discount_rate.apply(is_man_jian)
dataset2['discount_rate'] = dataset2.discount_rate.apply(calc_discount_rate)
d = dataset2[['coupon_id']]
d['coupon_count'] = 1
d = d.groupby('coupon_id').agg('sum').reset_index()
dataset2 = pd.merge(dataset2,d,on='coupon_id',how='left')
dataset2.to_csv('data/coupon2_feature.csv',index=None)
#對於資料集1
dataset1['day_of_week'] = dataset1.date_received.apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1)
dataset1['day_of_month'] = dataset1.date_received.apply(lambda x:int(x[6:8]))
dataset1['days_distance'] = dataset1.date_received.apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,4,13)).days)
dataset1['discount_man'] = dataset1.discount_rate.apply(get_discount_man)
dataset1['discount_jian'] = dataset1.discount_rate.apply(get_discount_jian)
dataset1['is_man_jian'] = dataset1.discount_rate.apply(is_man_jian)
dataset1['discount_rate'] = dataset1.discount_rate.apply(calc_discount_rate)
d = dataset1[['coupon_id']]
d['coupon_count'] = 1
d = d.groupby('coupon_id').agg('sum').reset_index()
dataset1 = pd.merge(dataset1,d,on='coupon_id',how='left')
dataset1.to_csv('data/coupon1_feature.csv',index=None)