Python:使用Jupytrer Notebook操作資料庫
阿新 • • 發佈:2021-08-13
一、前提
1.預裝Python3環境
2.安裝相關庫
二、程式碼如下
import time import pymysql import pandas as pd from datetime import datetime,timedelta
t1=datetime.strftime(datetime.now()-timedelta(days=1),"%Y-%m-%d") # T-1 t2=datetime.strftime(datetime.now(),"%Y-%m-%d") # T like '2021-06-11' a1=int(time.mktime(time.strptime("{0} 0:0:0".format(t1), '%Y-%m-%d %H:%M:%S')))*1000 a2=int(time.mktime(time.strptime("{0} 0:0:0".format(t2), '%Y-%m-%d %H:%M:%S')))*1000 a1,a2,t1,t2
cuor = pymysql.connect(host='', user='', passwd="", db='') curor = cuor.cursor() ucol='record_id pay_channel pay_config_key biz_scenario biz_serial_no pay_amount pay_description response_time pay_status stage user_id our_trans_number third_trans_number receipt create_time update_time'.replace(' ',',')
def daily_user(t1,t2,stage='senior'): #註冊 sql1="select platform,count(platform) from app_user_base where account_source<>'DEVICE' and " sql2="user_register_stage='{0}' and create_time BETWEEN ".format(stage) sql3="'{0} 00:00:00' AND '{1} 00:00:00' group by platform".format(t1,t2) cur.execute(sql1+sql2+sql3) data = cur.fetchall() bss=[i for i in data] print(stage,sum([i[1] for i in bss]),bss) print(t1,t2) #高中 daily_user(t1,t2,stage='senior') #初中 daily_user(t1,t2,stage='junior')
#收入表 收入流水 cuor = pymysql.connect(host='', user='', passwd="", db='') curor = cuor.cursor() ucol='record_id pay_channel pay_config_key biz_scenario biz_serial_no pay_amount pay_description response_time pay_status stage user_id our_trans_number third_trans_number receipt create_time update_time'.replace(' ',',') a1=int(time.mktime(time.strptime("{0} 0:0:0".format(t1), '%Y-%m-%d %H:%M:%S')))*1000 a2=int(time.mktime(time.strptime("{0} 0:0:0".format(t2), '%Y-%m-%d %H:%M:%S')))*1000 msql ="select {0} from mall_pay_record where response_time>={1} and response_time<{2}".format(ucol,a1,a2) #根據時間取得資料 curor.execute(msql) data = curor.fetchall() btd=pd.DataFrame([i for i in data],columns=ucol.split(',')) btd['brt']=btd['response_time'].apply(lambda x:pd.Timestamp.fromtimestamp(x/1000)) print(len(btd),'#len(btd)') btd.to_excel('{0}-{1}-收入情況-原表-1.xlsx'.format(t1,t2)) btd['pay_amount']=btd['pay_amount'].apply(lambda x:float(x)) #btd=btd.loc[btd['pay_status']=='SUCCESS'] btd=btd.loc[btd['pay_amount']>1] def cds(x): if 'tyzx' in x: return 2 if '-' in x: if '充值' in x: return 1 else: a1=x.split('-') if '訂單編號' in a1[1]: a2=a1[1].replace('訂單編號','') return int(a2) else: return int(a1[1]) return 0 btd['order_id']=btd['pay_description'].apply(lambda x:cds(x)) oid=set(btd['order_id']) oid=[i for i in oid if i>1] print(len(oid),'#len(oid)') #根據訂單號得到具體的購買資訊 def mall_sql(oid,curor,ucol,od='mall_order'): res=[] olen=len(oid) if olen<1: return res elif olen<=2000: msql="select {0} from {1} where cash_part>0 and order_id in {2}".format(ucol,od,str(tuple(oid))) if olen==1: msql="select {0} from {1} where cash_part>0 and order_id={2}".format(ucol,od,oid[0]) curor.execute(msql) data = curor.fetchall() res=[i for i in data] return res else: a1=oid[:2000] a2=oid[2000:] return mall_sql(a1,curor,ucol)+mall_sql(a2,curor,ucol) ucol='order_id goods_id goods_stage goods_type goods_resource goods_name create_by authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.replace(' ',',') data1=mall_sql(oid,curor,ucol,od='mall_order_detail') c01='order_id goods_id goods_stage goods_type goods_resource goods_name create_by authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time' btdt=pd.DataFrame(data1,columns=c01.split()) print(len(btdt),'#len(btdt)') bd=btd.merge(btdt,on='order_id',how='left') ucol='order_id order_number user_nick order_source order_type employee_id employee_department_id order_channel actually_paid create_by'.replace(' ',',') data2=mall_sql(oid,curor,ucol,od='mall_order') c02='order_id order_number user_nick order_source order_type employee_id did order_channel actually_paid1 create_by1' btdtw=pd.DataFrame(data2,columns=c02.split()) print(len(btdtw)) bd=bd.merge(btdtw,on='order_id',how='left') bd.rename(columns={'user_id_x': 'uid'},inplace=True) jlst=list(set(bd['uid'])) conn = pymysql.connect(host='', user='', passwd="", db='') cur = conn.cursor() ucol='user_id,le_id,user_register_channel,user_register_time,mobile_location,user_nick' def uid_sql(oid,cur,ucol,od='app_user_base'): res=[] olen=len(oid) print(olen) if olen<1: return res elif olen<=2000: msql="select {0} from {1} where user_id in {2}".format(ucol,od,str(tuple(oid))) if olen==1: msql="select {0} from {1} where user_id = {2}".format(ucol,od,oid[0]) cur.execute(msql) data = cur.fetchall() res=[i for i in data] return res else: a1=oid[:2000] a2=oid[2000:] return uid_sql(a1,cur,ucol)+uid_sql(a2,cur,ucol) data =uid_sql(jlst,cur,ucol,od='app_user_base') btdk=pd.DataFrame(data,columns='uid 公司號 註冊渠道 註冊時間 手機歸屬地 暱稱'.split()) btdk['註冊時間']=btdk['註冊時間'].apply(lambda x:pd.Timestamp.fromtimestamp(x/1000)) bd=bd.merge(btdk,on='uid',how='left') #goods_id,gln,sln glst=list(set(bd['goods_id'])) glst=[i for i in glst if i>0] print(len(glst),'#len(glst)') gsql=''' select goods_id, max(case category_keyword when 'Grade' then label_name end) as gln, max(case category_keyword when 'Subject' then label_name end) as sln from user_order.gds_goods_label gg left join user_order.gds_label gb on gg.label_id=gb.label_id where gg.goods_id in {0} group by goods_id'''.format(str(tuple(glst))) curor.execute(gsql) gdata = curor.fetchall() gbtd=pd.DataFrame([i for i in gdata],columns='goods_id gln sln'.split()) print(len(gbtd),'#len(gbtd)') bd=bd.merge(gbtd,on='goods_id',how='left') kwsh=[i for i in set(btdtw['employee_id']) if i!=None] conncrm = pymysql.connect(host='', user='', passwd="", db='') curcrm = conncrm.cursor() ucks='employee_id,real_name,department_name,leader_name,department_id' sql01="select {0} from crm_employee_user where employee_id in {1}".format(ucks,str(tuple(kwsh))) if len(kwsh)==1: #只有一個eid sql01="select {0} from crm_employee_user where employee_id in ({1})".format(ucks,kwsh[0]) curcrm.execute(sql01) data = curcrm.fetchall() ceu=pd.DataFrame([i for i in data],columns=ucks.split(',')) ceu['employee_id']=ceu['employee_id'].apply(lambda x:str(x)) kdsh=[i for i in set(btdtw['did']) if i!=None] cur = conn.cursor() ucks='department_id,business_line_name' sql02="select {0} from crm_department where department_id in {1}".format(ucks,str(tuple(kdsh))) if len(kwsh)==1: #只有一個department_id sql02="select {0} from crm_department where department_id in ({1})".format(ucks,kdsh[0]) curcrm.execute(sql02) data = curcrm.fetchall() ceu1=pd.DataFrame([i for i in data],columns='did,business_line_name'.split(',')) ceu1['did']=ceu1['did'].apply(lambda x:str(x)) #ceu=ceu.merge(ceu1,on='department_id',how='left') bd=bd.merge(ceu,on='employee_id',how='left') bd=bd.merge(ceu1,on='did',how='left') t3=t1.replace('-','') #做分攤處理和改列順序 def filterShourdfw(sdf): #對收入表進行過濾處理 sdf['訂單總金額'] = sdf['pay_amount'] #sdf['dropidx']=sdf.apply(self.filterToDrop,axis=1) #sdf.drop(sdf.loc[sdf['dropidx']==1].index,inplace=True) uids = set(sdf['biz_serial_no']) # 不用訂單號因為充值的訂單號為0 for u in uids: one = sdf.loc[sdf['biz_serial_no'] == u] # 獲取所有uid等於u的行,之後只會儲存一行 # 在這裡寫if然後只保留一行,然後concat到ndf上,實現只保留一行 olen = len(one) if olen > 1: avg = list(one['pay_amount'])[0] / olen print(avg, type(avg)) for i, v in one.iterrows(): sdf.loc[i, 'pay_amount'] = avg return sdf bd=filterShourdfw(bd) ptt={'junior':'初中','senior':'高中'} bd['stage']=bd['stage'].apply(lambda x:ptt.get(x,x)) bd.rename(columns={'brt': '支付時間', 'create_time': '建立時間', 'pay_channel': '支付平臺', 'leid': '公司號', 'stage':'平臺','business_line_name':'業務部', 'goods_name': '名稱','order_id':'訂單號','pay_amount':'收入金額','goods_stage':'商品所在平臺'}, inplace=True) s_idx = '支付時間 uid 公司號 名稱 收入金額 平臺 gln real_name department_name 業務部 訂單總金額 訂單號 註冊時間 支付平臺'.split() for c in bd.columns: if c in ['user_id','cuser_id','stage']: continue if c not in s_idx: s_idx.append(c) bd=bd.loc[:, s_idx] # 更改列的順序 #bdg=bd.loc[bd['業務部']=='線上電銷'] #bd.to_excel('{0}-{1}-收入情況-收入表-1.xlsx'.format(t1,t2),index=False)
def srs_classify(d): # 平臺 業務部 gln order_type 輔助的 註冊渠道 if d['order_type']=='exp_center': return '體驗中心' if d['平臺']=='初中': if d['業務部']=='輔導老師工作臺': return '輔導-小初' elif '崑山' in str(d['department_name']): if '年級' in d['gln']: return '崑山-小學' elif '初' in d['gln']: return '崑山-初中' elif '高' in d['gln']: return '崑山-高中' #理論上不存在 else: print(d['gln'],d['訂單號']) return '崑山-初中' elif d['業務部']=='線上電銷': if '年級' in d['gln']: return '電銷-小學' elif '初' in d['gln']: return '電銷-初中' elif '高' in d['gln']: return '電銷-高中' #理論上不存在 else: print(d['gln'],d['訂單號']) return '電銷-高中' elif '政哥阿米巴' in str(d['業務部']): if '年級' in d['gln']: return '政哥阿米巴-小學' elif '初' in d['gln']: return '政哥阿米巴-初中' else: print(d['gln'],d['訂單號']) return '政哥阿米巴-高中' elif d['業務部']=='社群運營': if '年級' in d['gln']: return '社群-小學' elif '初' in d['gln']: return '社群-初中' elif '高' in d['gln']: return '社群-高中' else: print(d['gln'],d['訂單號']) return '社群-小學' elif '阿米巴' in str(d['業務部']): #社群 if '年級' in d['gln']: return '社群-小學' elif '初' in d['gln']: return '社群-初中' elif '高' in d['gln']: return '社群-高中' else: print(d['gln'],d['訂單號']) return '社群-小學' elif d['平臺']=='高中' or '高中' in str(d['平臺']): if d['業務部']=='輔導老師工作臺': return '輔導-高中' elif '崑山' in str(d['department_name']): return '崑山-高中' elif d['業務部']=='線上電銷': #if '年級' in d['gln']: return '電銷-小學' return '電銷-高中' elif '政哥阿米巴' in str(d['業務部']) or d['業務部']=='政哥阿米巴': return '政哥阿米巴-高中' elif d['業務部']=='社群運營': return '社群-高中' elif '阿米巴' in str(d['業務部']): #社群 return '社群-高中' else: print(d['平臺']) return '空'
bd['tt']=bd.apply(lambda x:srs_classify(x),axis=1)
def sru_pretty_pr(bd): res={'gmv':dict(bd.groupby('tt')['收入金額'].sum())} bd2=bd.loc[bd['訂單總金額']>100] res['ucount']=dict(bd2.groupby('tt')['uid'].nunique()) res['all']=bd['收入金額'].sum() #for k,v in res.items():print(k,v) return res sru_pretty_pr(bd)
#消費表 #t1='2020-12-31' print(t1) ucol='order_id goods_stage goods_type goods_resource goods_name authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.replace(' ',',') msql="select {0} from mall_order_detail where mall_price>0 and cash_part>0 and date_format(create_time,'%Y%m%d')='{1}'".format(ucol,t1.replace('-','')) curor.execute(msql) data = curor.fetchall() btda=pd.DataFrame([i for i in data],columns='order_id goods_stage goods_type goods_resource goods_name authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.split()) print(len(btda)) olst=list(set(btda['order_id'])) print(len(olst)) #btda.to_excel('{0}-1212111-消費1.xlsx'.format(t1)) u2='order_id stage order_type order_channel pay_type actually_paid cash_part order_status pay_status employee_id paid_time express_status is_second_kill is_offline_site is_shopping_cart remark create_time update_time'.replace(' ',',') data1=mall_sql(olst,curor,u2,od='mall_order') btdw=pd.DataFrame(data1,columns=u2.split(',')) print(len(btdw)) def emp(x): try:return int(x) except:return 0 btdw['employee_i']=btdw['employee_id'].apply(lambda x:emp(x)) kwsh=list(set(btdw['employee_i'])) ucks='employee_id,real_name,department_name,leader_name,department_id' curcrm.execute("select {0} from crm_employee_user where employee_id in {1}".format(ucks,str(tuple(kwsh)))) data = curcrm.fetchall() ceu=pd.DataFrame([i for i in data],columns=ucks.split(',')) ceu['employee_id']=ceu['employee_id'].apply(lambda x:str(x)) kdsh=list(set(ceu['department_id'])) cur = conn.cursor() ucks='department_id,business_line_name' sql02="select {0} from crm_department where department_id in {1}".format(ucks,str(tuple(kdsh))) if len(kwsh)==1: #只有一個department_id sql02="select {0} from crm_department where department_id in ({1})".format(ucks,kdsh[0]) curcrm.execute(sql02) data = curcrm.fetchall() ceu1=pd.DataFrame([i for i in data],columns=ucks.split(',')) ceu=ceu.merge(ceu1,on='department_id',how='left') btd=btdw.merge(ceu,on='employee_id',how='left') btd.to_excel('{0}-1212111-消費2-2.xlsx'.format(t1))
#消費表的篩選統計 btd1=btd.loc[btd['stage']=='senior'] btd1=btd1.loc[btd1['pay_status']=='paid'] btd1=btd1.loc[btd1['order_type']!='exp_center'] #那這個條件其實不需要 btd1=btd1.loc[btd1['business_line_name']=='線上電銷'] #可能會有eid為空的情況也算到電銷? btd1['actually_paid']=btd1['actually_paid'].apply(lambda x:float(x)) print(btd1['actually_paid'].sum())不要為了追逐,而忘記當初的樣子。