1. 程式人生 > >python selenium簡單使用

python selenium簡單使用

selenium的簡單使用

安裝 selenium
pip install selenium
安裝pymongo
pip install pymongo

爬取起點完本小說排行榜資料並儲存到MongoDB資料庫
程式碼如下

import time
import pymongo
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

# driver = webdriver.Chrome()

# 設定不顯示瀏覽器視窗
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(options=options)

# 設定等待時長
driver.implicitly_wait(15)

# 起點完本小說排行榜第一頁
url = 'https://www.qidian.com/rank/fin?dateType=3&page=1'
driver.get(url)

# 獲得主視窗控制代碼
mainwindow = driver.current_window_handle

# 開啟資料庫連線
mongoclient = pymongo.MongoClient(host='10.31.160.242',port=27017)
mongodb = mongoclient['novel']
mongocollection = mongodb['novel_collections']

while True:
    # 爬取每一頁資訊
    booksList = driver.find_element_by_class_name('book-img-text').find_elements_by_xpath('.//li')
    for book in booksList:
        item = {}
        name = book.find_element_by_xpath('.//h4').text     # 獲取文字資訊
        author = book.find_element_by_xpath('.//p/a[1]').text
        type = book.find_element_by_xpath('.//p/a[2]').text

        infoClick = book.find_element_by_partial_link_text('書籍詳情')
        infoClick.click()
        # 獲得小說詳情視窗的控制代碼
        book_detail_window = driver.window_handles[-1]
        time.sleep(0.5) # 休眠一會,防止開啟網頁速度過快
        driver.switch_to_window(book_detail_window)
        try:
            bookinfo = driver.find_element_by_xpath('//div[@class="book-intro"]').text.strip()
            # 將資料儲存到monggodd
            item['name'] = name
            item['author'] = author
            item['type'] = type
            item['bookinfo'] = bookinfo
            mongocollection.insert(item)
        except:
            print(name,'未獲取到詳細內容')
        finally:
            driver.close()
            print(name)

        # 回到主視窗
        driver.switch_to_window(mainwindow)

    try:
        next_page = driver.find_element_by_xpath('//a[contains(@class,"lbf-pagination-next")]')
        if next_page.get_attribute('class') == "lbf-pagination-next lbf-pagination-disabled":
            break
    except NoSuchElementException as e:
        print('爬取完畢')
        break
    else:
        time.sleep(1)
        driver.find_element_by_class_name('lbf-pagination-input')
        print('第{page}頁爬取完成'.format(page=driver.find_element_by_class_name('lbf-pagination-input').get_attribute('value')))
        next_page.click()

# 關閉資料庫連線
mongoclient.close()

# 退出
driver.quit()