Python快速下載商品資料,並連線資料庫,儲存資料
阿新 • • 發佈:2022-04-12
開發環境
python 3.8 pycharm 2021.2 專業版
程式碼實現
- 傳送請求
- 獲取資料
- 解析資料(篩選資料)
- 儲存資料
- 連線資料庫
開始程式碼
請求資料
# 偽裝 headers = { 'cookie': 'miid=4137864361077413341; tracknick=%5Cu5218%5Cu6587%5Cu9F9978083283; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; cna=MNI4GicXYTQCAa8APqlAWWiS; enc=%2FWC5TlhZCGfEq7Zm4Y7wyNToESfZVxhucOmHkanuKyUkH1YNHBFXacrDRNdCFeeY9y5ztSufV535NI0AkjeX4g%3D%3D; t=ad15767ffa6febb4d2a8709edebf63d3; lgc=%5Cu5218%5Cu6587%5Cu9F9978083283; sgcookie=E100EcWpAN49d4Uc3MkldEc205AxRTa81RfV4IC8X8yOM08mjVtdhtulkYwYybKSRnCaLHGsk1mJ6lMa1TO3vTFmr7MTW3mHm92jAsN%2BOA528auARfjf2rnOV%2Bx25dm%2BYC6l; uc3=nk2=ogczBg70hCZ6AbZiWjM%3D&vt3=F8dCvCogB1%2F5Sh1kqHY%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D&id2=UNGWOjVj4Vjzwg%3D%3D; uc4=nk4=0%40oAWoex2a2MA2%2F2I%2FjFnivZpTtTp%2F2YKSTg%3D%3D&id4=0%40UgbuMZOge7ar3lxd0xayM%2BsqyxOW; _cc_=W5iHLLyFfA%3D%3D; _m_h5_tk=ac589fc01c86be5353b640607e791528_1647451667088; _m_h5_tk_enc=7d452e4e140345814d5748c3e31fc355; xlly_s=1; x5sec=7b227365617263686170703b32223a223264393234316334363365353038663531353163633366363036346635356431434c61583635454745506163324f2f6b2b2b4b6166686f4d4d7a45774e7a4d794d6a59324e4473784d4b6546677037382f2f2f2f2f77453d227d; JSESSIONID=1F7E942AC30122D1C7DBA22C429521B9; tfstk=cKKGBRTY1F71aDbHPcs6LYjFVa0dZV2F6iSeY3hEAYkCuZxFizaUz1sbK1hS_r1..; l=eBEVp-O4gnqzSzLbBOfwnurza77OIIRAguPzaNbMiOCPO75p5zbNW60wl4L9CnGVhsTMR3lRBzU9BeYBqo44n5U62j-la1Hmn; isg=BDw8SnVxcvXZcEU4ugf-vTadDdruNeBfG0WXdBa9WicK4dxrPkd97hHTxQmZqRi3', 'referer': 'https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=', 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36', } url = f'https://s.TB.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s={44*page}' response = requests.get(url=url, headers=headers)
獲取資料
html_data = response.text
解析資料(篩選資料)
json_str = re.findall('g_page_config = (.*);', html_data)[0] # 字串 肯定不能用 字典方式鍵值對取值 # json_str: 字串 # 型別轉變: 轉成字典型別 才能通過鍵值對方式去取值 json_dict = json.loads(json_str) auctions = json_dict['mods']['itemlist']['data']['auctions'] for auction in auctions: raw_title = auction['raw_title'] pic_url = auction['pic_url'] detail_url = auction['detail_url'] view_price = auction['view_price'] item_loc = auction['item_loc'] view_sales = auction['view_sales'] nick = auction['nick'] print(raw_title, pic_url, detail_url, view_price, item_loc, view_sales, nick)
儲存資料
with open('TB.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer =csv.writer(f)
csv_writer.writerow([raw_title, pic_url, detail_url, view_price, item_loc, view_sales, nick])
連線資料庫
import pymysql def save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick): count = pymysql.connect( host='xxx.xxx.xxx.xxx', # 資料庫地址 port=3306, # 資料庫埠 user='xxxx', # 資料庫賬號 password='xxxx', # 資料庫密碼 db='xxxx' # 資料庫表名 ) # 建立資料庫物件 db = count.cursor() # 寫入sql sql = f"insert into goods(title, pic_url, detail_url, view_price, item_loc, view_sales, nick) values ('{title}', '{pic_url}', '{detail_url}', {view_price}, '{item_loc}', '{view_sales}', '{nick}')" # 執行sql db.execute(sql) # 儲存修改內容 count.commit() db.close()