1. 程式人生 > >電商工作代碼

電商工作代碼

dict earch 商品 pig cts itl pymysql word session

from selenium import webdriver
from scrapy.selector import Selector
import  time
import random
import pymysql
from urllib import parse
import re
import  os

        # a = Selector(text=webdriver.page_source)
        # if a.xpath(‘//*[@id="J_submit"]‘):
        #     time.sleep(15)
        #     for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):
# bd_pig = i.xpath(".[email protected]").re(‘.*(img.*?jpg)‘) # bd_name = i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘) # bd_id = i.xpath(‘./dd[1][email protected]).extract_first(‘‘) # bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)
# bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘) # # sql = "INSERT INTO " + i.split(",")[0] + "( `id`,圖片鏈接,價格,標題,銷量) VALUES (%s,%s,%s,%s,%s)" # cursor.execute(sql, # (bd_id, bd_pig, bd_much, bd_name, bd_liang))
# self.connection.commit() # else: # for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘): # bd_pig = i.xpath(".[email protected]").re(‘.*(img.*?jpg)‘) # bd_name =‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘))) # bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1][email protected]).extract_first(‘‘))) # bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘) # bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘) # # sql = "INSERT INTO " + shop.split(",")[0] + "( `id`,圖片鏈接,價格,標題,銷量) VALUES (%s,%s,%s,%s,%s)" # cursor.execute(sql, # (bd_id, bd_pig, bd_much, bd_name, bd_liang)) # conection.commit() class spider(object): def chul3(self,dates): a = Selector(text=dates) next_url = a.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11][email protected]).extract_first("") return https:+next_url chuli=spider() conection = pymysql.connect(host=localhost,user=root,password=123,db=7.24測試,charset=utf8mb4,cursorclass=pymysql.cursors.DictCursor) with conection.cursor() as cursor: sql1 = "select * from 商品id" cursor.execute(sql1) shop_id = cursor.fetchall() shop_oldid=[i[id] for i in shop_id] sql1 = ‘‘‘ SELECT `商品id`.id, `上架時間`,‘1天銷量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架時間`,CURDATE()) =1 union SELECT `商品id`.id, `上架時間`,‘7天銷量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架時間`,CURDATE()) =7 union SELECT `商品id`.id, `上架時間`,‘30天銷量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架時間`,CURDATE()) =30‘‘‘ cursor.execute(sql1) shop_id = cursor.fetchall() shop_olxx = [i for i in shop_id] conection.commit() cursor =conection.cursor() webdriver = webdriver.Ie() url = https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq webdriver.get(url) time.sleep(20) def lll(url): webdriver.implicitly_wait(50) webdriver.get(url) myDynamicElement = webdriver.find_element_by_class_name(pagination) a=webdriver.page_source time.sleep(random.randrange(2,6)) selects=Selector(text=a) for i in selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl): bd_pig = i.xpath(".[email protected]").re((.*)_) bd_name = ‘‘.join(re.findall([\u4e00-\u9fa5], i.xpath(./dd[1]/a/text()).extract_first(‘‘))) bd_id = ‘‘.join(re.findall(\d, i.xpath(./dd[1][email protected]).extract_first(‘‘))) bd_much = i.xpath(./dd[1]/div/div[1]/span[2]/text()).extract_first(‘‘) bd_idlian=http://item.taobao.com/item.htm?id=+bd_id bd_liang = i.xpath(./dd[1]/div/div[last()]/span[last()]/text()).extract_first(‘‘) if bd_id not in shop_oldid: sql = "INSERT INTO 商品id (`品牌`, `id`,圖片鏈接,價格,標題,商品地址) VALUES (%s,%s,%s,%s,%s,%s)" cursor.execute(sql, (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian)) conection.commit() webdriver.implicitly_wait(50) webdriver.get(http://item.taobao.com/item.htm?id=+bd_id) myDynamicElement = webdriver.find_element_by_class_name(tb-price-spec) time.sleep(random.randrange(2, 6)) date=webdriver.page_source select_xixi = Selector(text=date) liem = select_xixi.xpath(//*[@id="J_TMySize"][email protected]).extract_first("") sql = update `商品id` set `商品id`.`類目` = %s where id = %s cursor.execute(sql, (liem, bd_id)) conection.commit() c=1 ee=1 for i in select_xixi.xpath(//*[@id="J_isku"]/div/dl): b = i.xpath(./dt/text()).extract_first("") if 尺碼 in b: aa = i.xpath(./dd/ul/li/a/span/text()).extract() ee = len(aa) dd = .join(aa) sql = update `商品id` set `商品id`.`尺碼` = %s where id = %s cursor.execute(sql, (dd, bd_id)) conection.commit() if 顏色 in b: a = i.xpath(./dd/ul/li/a/span/text()).extract() c = len(a) d = .join(a) sql = update `商品id` set `商品id`.`顏色` = %s where id = %s cursor.execute(sql, (d, bd_id)) conection.commit() w = c * ee sql= update `商品id` set `商品id`.`sku量` = %s where id = %s cursor.execute(sql, (w,bd_id)) conection.commit() title = path + r\\ +shop.split(",")[0] + r\\ + ‘‘.join( re.findall(\d, i.xpath(./dd[1][email protected]).extract_first(‘‘))) + re.sub("\W", "", webdriver.title) capture(webdriver, title + .jpg) for i in shop_olxx: if i[id] == bd_id: sql = "UPDATE 商品id set " + i[日期] + " = (%s) where id = %s" cursor.execute(sql, (bd_liang, i[id])) conection.commit() if selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()][email protected]).extract_first(""): lll(https:+selects.xpath(//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()][email protected]).extract_first("")) path=os.getcwd() def capture(webder, save_fn="capture.png"): # browser = webdriver.Ie() # Get local session of firefox # browser.get(url) # Load page webder.execute_script(""" (function () { var y = 0; var step = 100; window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 50); } else { window.scroll(0, 0); document.title += "scroll-done"; } } setTimeout(f, 1000); })(); """) for i in range(30): if "scroll-done" in webder.title: break time.sleep(1) webder.save_screenshot(save_fn) with open(os.getcwd() + r\1.csv, r) as c: for shop in c.readlines(): url = shop.split(",")[2] lll(url)

電商工作代碼