電商工作代碼
阿新 • • 發佈:2017-07-24
dict earch 商品 pig cts itl pymysql word session
from selenium import webdriver from scrapy.selector import Selector import time import random import pymysql from urllib import parse import re import os # a = Selector(text=webdriver.page_source) # if a.xpath(‘//*[@id="J_submit"]‘): # time.sleep(15) # for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘):# bd_pig = i.xpath(".[email protected]").re(‘.*(img.*?jpg)‘) # bd_name = i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘) # bd_id = i.xpath(‘./dd[1][email protected]).extract_first(‘‘) # bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘)# bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘) # # sql = "INSERT INTO " + i.split(",")[0] + "( `id`,圖片鏈接,價格,標題,銷量) VALUES (%s,%s,%s,%s,%s)" # cursor.execute(sql, # (bd_id, bd_pig, bd_much, bd_name, bd_liang))# self.connection.commit() # else: # for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘): # bd_pig = i.xpath(".[email protected]").re(‘.*(img.*?jpg)‘) # bd_name =‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘))) # bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1][email protected]).extract_first(‘‘))) # bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘) # bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘) # # sql = "INSERT INTO " + shop.split(",")[0] + "( `id`,圖片鏈接,價格,標題,銷量) VALUES (%s,%s,%s,%s,%s)" # cursor.execute(sql, # (bd_id, bd_pig, bd_much, bd_name, bd_liang)) # conection.commit() class spider(object): def chul3(self,dates): a = Selector(text=dates) next_url = a.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11][email protected]‘).extract_first("") return ‘https:‘+next_url chuli=spider() conection = pymysql.connect(host=‘localhost‘,user=‘root‘,password=‘123‘,db=‘7.24測試‘,charset=‘utf8mb4‘,cursorclass=pymysql.cursors.DictCursor) with conection.cursor() as cursor: sql1 = "select * from 商品id" cursor.execute(sql1) shop_id = cursor.fetchall() shop_oldid=[i[‘id‘] for i in shop_id] sql1 = ‘‘‘ SELECT `商品id`.id, `上架時間`,‘1天銷量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架時間`,CURDATE()) =1 union SELECT `商品id`.id, `上架時間`,‘7天銷量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架時間`,CURDATE()) =7 union SELECT `商品id`.id, `上架時間`,‘30天銷量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架時間`,CURDATE()) =30‘‘‘ cursor.execute(sql1) shop_id = cursor.fetchall() shop_olxx = [i for i in shop_id] conection.commit() cursor =conection.cursor() webdriver = webdriver.Ie() url = ‘https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq‘ webdriver.get(url) time.sleep(20) def lll(url): webdriver.implicitly_wait(50) webdriver.get(url) myDynamicElement = webdriver.find_element_by_class_name(‘pagination‘) a=webdriver.page_source time.sleep(random.randrange(2,6)) selects=Selector(text=a) for i in selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘): bd_pig = i.xpath(".[email protected]").re(‘(.*)_‘) bd_name = ‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘))) bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1][email protected]‘).extract_first(‘‘))) bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘) bd_idlian=‘http://item.taobao.com/item.htm?id=‘+bd_id bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span[last()]/text()‘).extract_first(‘‘) if bd_id not in shop_oldid: sql = "INSERT INTO 商品id (`品牌`, `id`,圖片鏈接,價格,標題,商品地址) VALUES (%s,%s,%s,%s,%s,%s)" cursor.execute(sql, (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian)) conection.commit() webdriver.implicitly_wait(50) webdriver.get(‘http://item.taobao.com/item.htm?id=‘+bd_id) myDynamicElement = webdriver.find_element_by_class_name(‘tb-price-spec‘) time.sleep(random.randrange(2, 6)) date=webdriver.page_source select_xixi = Selector(text=date) liem = select_xixi.xpath(‘//*[@id="J_TMySize"][email protected]‘).extract_first("") sql = ‘update `商品id` set `商品id`.`類目` = %s where id = %s‘ cursor.execute(sql, (liem, bd_id)) conection.commit() c=1 ee=1 for i in select_xixi.xpath(‘//*[@id="J_isku"]/div/dl‘): b = i.xpath(‘./dt/text()‘).extract_first("") if ‘尺碼‘ in b: aa = i.xpath(‘./dd/ul/li/a/span/text()‘).extract() ee = len(aa) dd = ‘ ‘.join(aa) sql = ‘update `商品id` set `商品id`.`尺碼` = %s where id = %s‘ cursor.execute(sql, (dd, bd_id)) conection.commit() if ‘顏色‘ in b: a = i.xpath(‘./dd/ul/li/a/span/text()‘).extract() c = len(a) d = ‘ ‘.join(a) sql = ‘update `商品id` set `商品id`.`顏色` = %s where id = %s‘ cursor.execute(sql, (d, bd_id)) conection.commit() w = c * ee sql= ‘update `商品id` set `商品id`.`sku量` = %s where id = %s‘ cursor.execute(sql, (w,bd_id)) conection.commit() title = path + r‘\\‘ +shop.split(",")[0] + r‘\\‘ + ‘‘.join( re.findall(‘\d‘, i.xpath(‘./dd[1][email protected]‘).extract_first(‘‘))) + re.sub("\W", "", webdriver.title) capture(webdriver, title + ‘.jpg‘) for i in shop_olxx: if i[‘id‘] == bd_id: sql = "UPDATE 商品id set " + i[‘日期‘] + " = (%s) where id = %s" cursor.execute(sql, (bd_liang, i[‘id‘])) conection.commit() if selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()][email protected]‘).extract_first(""): lll(‘https:‘+selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()][email protected]‘).extract_first("")) path=os.getcwd() def capture(webder, save_fn="capture.png"): # browser = webdriver.Ie() # Get local session of firefox # browser.get(url) # Load page webder.execute_script(""" (function () { var y = 0; var step = 100; window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 50); } else { window.scroll(0, 0); document.title += "scroll-done"; } } setTimeout(f, 1000); })(); """) for i in range(30): if "scroll-done" in webder.title: break time.sleep(1) webder.save_screenshot(save_fn) with open(os.getcwd() + r‘\1.csv‘, ‘r‘) as c: for shop in c.readlines(): url = shop.split(",")[2] lll(url)
電商工作代碼