python selenium簡單使用
阿新 • • 發佈:2018-11-06
selenium的簡單使用
安裝 selenium
pip install selenium
安裝pymongo
pip install pymongo
爬取起點完本小說排行榜資料並儲存到MongoDB資料庫
程式碼如下
import time import pymongo from selenium import webdriver from selenium.common.exceptions import NoSuchElementException # driver = webdriver.Chrome() # 設定不顯示瀏覽器視窗 options = webdriver.ChromeOptions() options.add_argument('headless') driver = webdriver.Chrome(options=options) # 設定等待時長 driver.implicitly_wait(15) # 起點完本小說排行榜第一頁 url = 'https://www.qidian.com/rank/fin?dateType=3&page=1' driver.get(url) # 獲得主視窗控制代碼 mainwindow = driver.current_window_handle # 開啟資料庫連線 mongoclient = pymongo.MongoClient(host='10.31.160.242',port=27017) mongodb = mongoclient['novel'] mongocollection = mongodb['novel_collections'] while True: # 爬取每一頁資訊 booksList = driver.find_element_by_class_name('book-img-text').find_elements_by_xpath('.//li') for book in booksList: item = {} name = book.find_element_by_xpath('.//h4').text # 獲取文字資訊 author = book.find_element_by_xpath('.//p/a[1]').text type = book.find_element_by_xpath('.//p/a[2]').text infoClick = book.find_element_by_partial_link_text('書籍詳情') infoClick.click() # 獲得小說詳情視窗的控制代碼 book_detail_window = driver.window_handles[-1] time.sleep(0.5) # 休眠一會,防止開啟網頁速度過快 driver.switch_to_window(book_detail_window) try: bookinfo = driver.find_element_by_xpath('//div[@class="book-intro"]').text.strip() # 將資料儲存到monggodd item['name'] = name item['author'] = author item['type'] = type item['bookinfo'] = bookinfo mongocollection.insert(item) except: print(name,'未獲取到詳細內容') finally: driver.close() print(name) # 回到主視窗 driver.switch_to_window(mainwindow) try: next_page = driver.find_element_by_xpath('//a[contains(@class,"lbf-pagination-next")]') if next_page.get_attribute('class') == "lbf-pagination-next lbf-pagination-disabled": break except NoSuchElementException as e: print('爬取完畢') break else: time.sleep(1) driver.find_element_by_class_name('lbf-pagination-input') print('第{page}頁爬取完成'.format(page=driver.find_element_by_class_name('lbf-pagination-input').get_attribute('value'))) next_page.click() # 關閉資料庫連線 mongoclient.close() # 退出 driver.quit()