Python抓取淘寶手機銷量排行榜
阿新 • • 發佈:2019-01-10
#!/usr/bin/env python # encoding: utf-8 """ @version: v1.0 @author: W_H_J @license: Apache Licence @contact: [email protected] @site: @software: PyCharm @file: taoBaoSpider.py @time: 2017/12/25 15:07 @describe:淘寶部分頁面分析""" import pandas import re import requests import sys reload(sys) sys.setdefaultencoding("utf-8") for ii in range(1,10): # 翻頁 mn = 44 * (ii-1) url = 'https://s.taobao.com/search?q=%E6%89%8B%E6%9C%BA&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306' header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', 'cookie':'thw=cn; t=be73ea5ec1ffbeb254d0a3535dd00415; cna=HqWrEpIZeG4CAbYSAEIb6bav; hng=CN%7Czh-CN%7CCNY%7C156; miid=596160490770762658; lgc=%5Cu5815%5Cu843D%5Cu4E4B%5Cu6CEAa; tracknick=%5Cu5815%5Cu843D%5Cu4E4B%5Cu6CEAa; tg=0; uc2=wuf=https%3A%2F%2Ftrade.tmall.com%2Fdetail%2ForderDetail.htm%3Fbiz_order_id%3D104827474284154168%26forward_action%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; uc3=sg2=VWxidJMT8gLCYBc%2BxP5FJdYe9%2FXfUvq2%2Byf0cFWq90Q%3D&nk2=1RSXayUHM0Sl&id2=UUpkvTJ9k5HsSA%3D%3D&vt3=F8dBzLbVzPYkPml1NZk%3D&lg2=W5iHLLyFOGW7aA%3D%3D; uss=VvioJOfdaT365u5YugXSKrRnG47jUQQG9UQvstfUu5fjcHD0zxGQLEmn; _cc_=VFC%2FuZ9ajQ%3D%3D; mt=ci=67_1; tk_trace=oTRxOWSBNwn9dPy4KVJVbutfzK5InlkjwbWpxHegXyGxPdWTLVRjn23RuZzZtB1ZgD6Khe0jl%2BAoo68rryovRBE2Yp933GccTPwH%2FTbWVnqEfudSt0ozZPG%2BkA1iKeVv2L5C1tkul3c1pEAfoOzBoBsNsJySQJwqIKz2kX83uPP5e4iE9t1ZpHdHZkk218jfUuTKISIEGrGMtBctY%2B2vMCmzCRVhIqleLIl%2BRRQHs4ekW3wNcZhDfwkkQzp9RF7kjYiNbNLTbo2mRCr3Wf97aW%2FfC72uuEf9Tcc6cNT9QCiB0y7NxqzS4M5NvMkxl5KoKbA%2BorLqu5Y9jpCfT31RlA%3D%3D; cookie2=1c16eb46ef00c015dd101f731c258d77; _tb_token_=8de4c4560b63; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; swfstore=107855; JSESSIONID=ED726367865542B7BA84D801D1C72812; isg=AhcXOlKpAS4SKIXa0x_6AhsZpovNTcSrwSKOp2lEKOZNmDfacSx7DtWyjg59; uc1=cookie14=UoTdf1DFLRnICg%3D%3D', 'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'path':'/search?q=%E6%89%8B%E6%9C%BA&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306'} html = requests.request('GET', url, headers=header) # print html.text # ren = re.compile('"title":"(.*?)","pic_url":"(.*?)","price":"(.*?)","importantKey":"(.*?)","month_sales":"(.*?)"') ren = re.compile('"title":"(.*?)","pic_url":"(.*?)","price":"(.*?)","trace":"(.*?)","month_sales":"(.*?)"') data = re.findall(ren, html.text) # print data data2 = pandas.DataFrame(data) print data2 data2.to_csv(r'taobao.csv', header=False, index=False, mode='a+',encoding='utf-8')
銷量資訊視覺化
#!/usr/bin/env python # encoding: utf-8 """ @version: v1.0 @author: W_H_J @license: Apache Licence @contact: [email protected] @site: @software: PyCharm @file: taobaoPlot.py @time: 2017/12/25 16:12 @describe:淘寶資訊分析""" import pandas import matplotlib as mpl import matplotlib.pyplot as plt mpl.rcParams["font.sans-serif"] = ['SimHei'] #配置字型 # 繪圖格式 plt.rcParams["axes.labelsize"] = 16 plt.rcParams["xtick.labelsize"] = 10 # print plt.rcParams.keys() plt.rcParams["ytick.labelsize"] = 10 plt.rcParams["legend.fontsize"] = 10 # 圖例字型大小 plt.rcParams["figure.figsize"] = [15,12] # plt.rcParams['patch.facecolor'] = 'red' def1 =pandas.read_csv('taobao.csv') # print def1 TBdata =pandas.DataFrame(list(zip(def1['A'], def1['E']))) # 視覺化 DD = TBdata.groupby([0]).sum() DD[1].plot(color='r') DD[1].plot(kind='bar', rot=90) DD[1].plot(rot=90) plt.show()