1. 程式人生 > 其它 >selenium+chrome抓取資料,執行js

selenium+chrome抓取資料,執行js

某些特殊的網站需要用selenium來抓取資料,比如用js加密的,破解難度大的

selenium支援linux和win,前提是必須安裝python3,環境配置好

抓取程式碼:

#!/usr/bin/env python
# coding:utf-8
import time
import execjs
import random
import requests
import urllib3
import re
import base64
import json
from datetime import datetime, timedelta
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.keys import Keys
import urllib.parse
from get_area import get_fpdm_area
requests.packages.urllib3.disable_warnings()

#print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
import sys
#print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

fpdm = sys.argv[1]
fphm = sys.argv[2]
kprq = sys.argv[3]
kjje = sys.argv[4]
#ippro = sys.argv[5]


chrome_options = Options()

chrome_options.add_argument('--no-sandbox') #讓Chrome在root許可權執行

chrome_options.add_argument('--disable-dev-shm-usage') #不開啟圖形介面

chrome_options.add_argument('--headless') #瀏覽器不提供視覺化頁面

chrome_options.add_argument('--ignore-ssl-errors=yes')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
#chrome_options.add_argument("---widows-size==2220,1500")
chrome_options.add_argument("---widows-size==1220,800")
#chrome_options.add_argument('blink-settings=imagesEnabled=false') #不載入圖片, 提升速度

chrome_options.add_argument('--disable-gpu') #谷歌文件提到需要加上這個屬性來規避bug

driver = webdriver.Chrome(options=chrome_options,executable_path='/usr/local/bin/chromedriver')

#防止檢測是selenium
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
url = 'http://dasfd.sdfasd.com'
driver.get(url) # 獲取


html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
driver.execute_script(js)#執行上面移動滾動條的js語句scrollLeft


inputss = driver.find_element_by_xpath('//*[@id="fpdm"]')#獲取輸入框
inputss.send_keys(g_fpdm + Keys.ENTER)#輸入搜尋關鍵詞


popup_container = driver.find_element_by_id('checkfp').value_of_css_property('display')


popup_container = driver.find_element_by_xpath('//*[@id="popup_message"]').text#獲取