關於selenium中chrome被反扒的問題(1)
阿新 • • 發佈:2018-11-19
問題描述:
比如一個網站,我們在瀏覽器可以正常開啟,但是使用selenium模擬器卻不可以,這是被反扒了,或許嘗試,新增代理外掛的方法,可以解決這一問題.
程式碼如下:
定義一個獲取代理外掛zip檔案的函式
from selenium import webdriver
import os
import re
import zipfile
def get_chrome_proxy_extension(proxy):
"""獲取一個Chrome代理擴充套件,裡面配置有指定的代理(帶使用者名稱密碼認證)
proxy - 指定的代理,格式: username: [email protected]:port
"""
m = re.compile('([^:]+):([^\@]+)\@([\d\.]+):(\d+)').search(proxy)
if m:
# 提取代理的各項引數
username = m.groups()[0]
password = m.groups()[1]
ip = m.groups()[2]
port = m.groups()[3]
# 建立一個定製Chrome代理擴充套件(zip檔案)
if not os.path.exists(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR):
os.mkdir(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR)
extension_file_path = os.path.join(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR, '{}.zip'.format(proxy.replace(':', '_')))
if not os.path.exists(extension_file_path):
# 擴充套件檔案不存在,建立
zf = zipfile.ZipFile(extension_file_path, mode='w')
zf.write(os.path.join(CHROME_PROXY_HELPER_DIR, 'manifest.json'), 'manifest.json')
# 替換模板中的代理引數
background_content = open(os.path.join(CHROME_PROXY_HELPER_DIR, 'background.js')).read()
background_content = background_content.replace('YOU_PROXY_ADDRESS', ip)
background_content = background_content.replace('YOUR_PROXY_PORT', port)
background_content = background_content.replace('YOUR_PROXY_USERNAME', username)
background_content = background_content.replace('YOUR_PROXY_PASSWORD', password)
zf.writestr('background.js', background_content)
zf.close()
return extension_file_path
else:
raise Exception('Invalid proxy format. Should be username: [email protected]:port')
background.js檔案內容
var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "http", host: "YOU_PROXY_ADDRESS", port: parseInt(YOUR_PROXY_PORT) }, bypassList: ["foobar.com"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "YOUR_PROXY_USERNAME", password: "YOUR_PROXY_PASSWORD" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: ["<all_urls>"]}, ['blocking'] );
manifest.json檔案內容
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
目錄結構
if __name__ == "__main__":
# 測試
options = webdriver.ChromeOptions()
# 新增一個自定義的代理外掛(配置特定的代理,含使用者名稱密碼認證)
options.add_extension(get_chrome_proxy_extension(proxy='username:[email protected]:port'))
driver = webdriver.Chrome(chrome_options=options,executable_path=r'F:\MyDatas\chromedriver.exe')
# 訪問一個IP回顯網站,檢視代理配置是否生效了
driver.get('https://makeabooking.flyscoot.com/Member?at=signup&culture=zh-CN')
time.sleep(30)
經過測試ok