1. 程式人生 > >店鋪商品id爬取

店鋪商品id爬取

sel eat avd sql conn quest code import port

import requests
from bs4 import  BeautifulSoup
import lxml
import re
import time
import random
import pymysql.cursors
connection = pymysql.connect(host=localhost,
                             user=root,
                             password=123,
                             db=asd,
                             charset
=utf8mb4, cursorclass=pymysql.cursors.DictCursor) payload = { "Ancoding":"gzip, deflate, sdch, br", "Accept-Language":"zh-CN,zh;q=0.8", "Connection":"keep-alive", "Cookie":"hng=; uss=UIMY14A%2B04Bbq%2BqRxS6C9OzJWudsw14Q1kb5mDDqxW%2BQ3YG%2BUcpgrDRWnRQ%3D; uc3=sg2=AC4AfXCJ7XkLw0gCUD1tD9ZxhXFdweN2A6VfybWadxI%3D&nk2=&id2=&lg2=; t=3c0787f77a28e0854ef28fc360b2c555; cookie2=1c912d33e44bdb2008763748702a61f4; _tb_token_=78577371d8136; l=AiQkmjyCyPnG7qTN1Iu5fBqvdCgWvUgn; isg=AvDwL_qYXdDeegACSXGXiIOKwb7f2NSDXgsSOepBvMsepZFPkkmkE0aNixo_; pnm_cku822=; cna=T7gREcWMLDsCAavWmjBJPJpS; Hm_lvt_c478afee593a872fd45cb9a0d7a9da3b=1495496950; Hm_lpvt_c478afee593a872fd45cb9a0d7a9da3b=1495496950
", "Host":"tanggulake.tmall.com", "Referer":"https://tanggulake.tmall.com/search.htm?spm=a220o.1000855.w5002-15900729481.1.b3kpys&search=y", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "X-Requested-With":"XMLHttpRequest"} with connection.cursor() as cursor:
# Create a new sql = "select * from 競店" cursor.execute(sql) q = cursor.fetchall() # connection is not autocommit by default. So you must commit to save # your changes. connection.commit() for i in q: url =i["地址"] url_re = requests.get(url+"1", params=payload) soup = BeautifulSoup(url_re.text, "lxml") pig=soup.select("div > div > div > div > span:nth-of-type(1)") get_pig=(pig[2].text.split("/"))[1] print(get_pig) ids=[] for pij in range(1,int(get_pig)+1): time.sleep(random.randrange(1,5)) ur1=i["地址"]+str(pij) url_re1=requests.get(ur1,params=payload) soup=BeautifulSoup(url_re1.text,"lxml") date = soup.select("div > div > div > dl") for spid in date: ids.append(re.sub("\D", "", spid.get("data-id"))) with connection.cursor() as cursor: # Create a new sql = select id from+" " +i["店鋪名稱"] cursor.execute(sql) q = cursor.fetchall() q = [i["id"] for i in q] for w in ids: if w not in q: sql = "INSERT INTO "+i["店鋪名稱"]+ "(`id`) VALUES (%s)" cursor.execute(sql, w) # connection is not autocommit by default. So you must commit to save # your changes. connection.commit()

店鋪商品id爬取