selenium模擬登陸淘寶網並且將‘衣服’相關資訊下載儲存在mysql資料庫
阿新 • • 發佈:2018-11-13
import re
import pymysql
from lxml import etree
from selenium import webdriver
#一下三行用於等待判斷頁面是否載入完畢
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
#selenium驅動谷歌瀏覽器
brower = webdriver.Chrome()
#建立資料庫例項物件
con = pymysql.connect(host='localhost',user='root',password='',db='taobao',port=3306)
#建立遊標
cur = con.cursor()
#建立一個表
cur.execute("CREATE TABLE yifu (id int(4) NOT NULL auto_increment PRIMARY KEY ,title VARCHAR(60),prince FLOAT(4,2),people int(10),city VARCHAR(10),shop VARCHAR(20),img VARCHAR(200))" )
def search():
'''
功能:開啟網頁
載入網頁
獲取輸入框物件
獲取搜尋按鈕物件
向輸入框物件輸入關鍵字
搜尋按鈕物件執行點選一次的方法
'''
try:
brower.get('https://www.taobao.com') #開啟淘寶首頁
#等待搜尋框載入 並且 獲取輸入框物件 (是一個列表)
#WebDriverWait(brower,10)顯示等待,直到這個元素被載入完成才會才會繼續執行
in_put = WebDriverWait(brower,10 ).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#q'))
)
#等待 點選搜尋按鈕 載入 並且 獲取搜尋按鈕物件 (是一個列表)
submit = WebDriverWait(brower,10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#J_TSearchForm > div.search-button > button'))
)
#在輸入框內輸入
in_put[0].send_keys('衣服') #因為獲得的是一個物件列表,所以要將需要的物件取出 in_put[0]
#點選 搜尋按鈕
submit[0].click()
#等待 總頁數顯示標籤 載入 並且 獲取物件 (是一個列表)
total_page = WebDriverWait(brower,10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.total'))
)
get_products()
return total_page[0].text #該物件是一個文字標籤,獲取該標籤裡面的內容
except TimeoutException:
return search() #出現超時錯誤再次執行該函式,這裡要用到return,效果就是 total = search()一直在執行並且將值傳給total,
#如果不寫return則傳不了值給total
def next_page(page_num):
'''
功能:執行翻頁操作
:param page_num:
:return:
'''
try:
in_put = WebDriverWait(brower,10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.form > input'))
)[0]
submit = WebDriverWait(brower,10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit'))
)[0]
in_put.clear()
in_put.send_keys(page_num)
submit.click()
#EC.text_to_be_present_in_element 該方法用於判斷所要的文字是否出現在指定標籤元素當中,在這裡是用來判斷當前頁面數是否是我們輸入的數。如果是則為True
active = WebDriverWait(brower,10).until(
EC.text_to_be_present_in_element((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > ul > li.item.active > span'),str(page_num))
)
print(active)
get_products()
except TimeoutException:
next_page(page_num)
def get_products():
#獲取商品資訊的操作物件
WebDriverWait(brower,10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#mainsrp-itemlist > div > div > div:nth-child(1) > div'))
)
html = brower.page_source
html = etree.HTML(html)
items = html.xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]/div')
for i in items:
product={
'img' : i.xpath('./div/div/div/a/img/@data-src')[0],
'prince' : float(i.xpath('./div[2]/div/div/strong/text()')[0]),
'people' : int(
re.compile('(\d+)').search(
i.xpath('./div[2]/div[1]/div[2]/text()')[0]
).group(1)
),
'title' : i.xpath('./div/div/div/a/img/@alt')[0],
'city' : i.xpath('./div[2]/div[3]/div[2]/text()')[0],
'shop': i.xpath('./div[2]/div[3]/div/a/span[2]/text()')[0]
}
print(product)
cur.execute("INSERT INTO yifu (title,prince,people,city,shop,img) VALUES (%s,%s,%s,%s,%s,%s)",(product['title'],product['prince'],product['people'],product['city'],product['shop'],product['img']))
con.commit() #提交
def main():
total = search()
total = int( re.compile('(\d+)').search(total).group(1) )
print(total)
for i in range(2,total+1):
next_page(i)
con.close() #關閉
brower.close()
if __name__ == '__main__':
main()