1. 程式人生 > >Python爬蟲之Selenium

Python爬蟲之Selenium

目錄

安裝

  • 安裝selenium
pip install selenium
  • 安裝webdriver

    1. https://blog.csdn.net/huilan_same/article/details/51896672 檢視webdriver對應的chrome版本
    2. http://chromedriver.storage.googleapis.com/index.html下載對應版本的webdriver
    3. 把下載後的exe執行檔案放入chrome的安裝目錄 C:\Program Files (x86)\Google\Chrome\Application
    4. 配置chrome的環境變數,在使用者path裡面新增C:\Program Files (x86)\Google\Chrome\Application

基本使用

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

browser = webdriver.Chrome() #申明一個瀏覽器物件
try
: browser.get("https://www.baidu.com") input = browser.find_element_by_id("kw") input.send_keys("Python") input.send_keys(Keys.ENTER) wait = WebDriverWait(browser,10) wait.until(EC.presence_of_element_located((By.ID,'content_left'))) print(browser.current_url) #當前url print(browser.get_cookies())#獲取cookie,列表形式返回
# print(browser.page_source) #當前網頁原始碼 finally: browser.close() #最後關閉瀏覽器
https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=a3409bcb00003f3f&rsv_t=2630W3R3HMFalhk4MYTvROD2e%2BPuHi9tvbMV3V75Hskz3DDFm2FmVur6%2FFI&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=87&rsv_sug4=87
[{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.baidu.com', 'expiry': 3684140058.697825, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': 'E50F8737E658B64766891FD7D8BFC790:FG=1'}, {'domain': '.baidu.com', 'expiry': 3684140058.69787, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': 'E50F8737E658B64766891FD7D8BFC790'}, {'domain': '.baidu.com', 'expiry': 3684140058.697893, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1536656410'}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': '3'}, {'domain': 'www.baidu.com', 'expiry': 2482736412.427581, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'expiry': 1537520412, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.baidu.com', 'expiry': 1536659005, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'b48eag8Y1Gfk0tqVgKhSeRYI9d2khxoJBTDJUGDy5N%2Fhy%2BpmeQrliwqdEKI'}]

宣告瀏覽器物件

支援多種瀏覽器,以下對應谷歌,火狐,edge,蘋果瀏覽器

from selenium import webdriver

browser = webdriver.Chrome()
browser = webdriver.FireFox()
browser = webdriver.Edge()
browser = webdriver.Safari()

訪問頁面

from selenium import webdriver

browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
# print(browser.page_source)
browser.close()

查詢元素

單個元素

通過不同方法查詢元素

  • find_element_by_id
  • find_element_by_name
  • find_element_by_xpath
  • find_element_by_link_text
  • find_element_by_partial_link_text
  • find_element_tab_name
  • find_element_by_class_name
  • find_element_by_css_selector

以上方法都可以通過find_element()這個方法,傳入型別然後查詢,如 find_element(By.ID,'q') 等於find_element_by_id('q')

from selenium import webdriver

browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
input_first=browser.find_element_by_id('q') #通過id=q 找到淘寶首頁的搜尋框
input_second=browser.find_element_by_css_selector('#q') #通過id=q 找到淘寶首頁的搜尋框
input_third=browser.find_element_by_xpath('//*[@id="q"]') #通過id=q 找到淘寶首頁的搜尋框
print(input_first,input_second,input_third)
browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="a1b039bc947657bca5134b768aec05c2", element="0.9619639072702169-1")> <selenium.webdriver.remote.webelement.WebElement (session="a1b039bc947657bca5134b768aec05c2", element="0.9619639072702169-1")> <selenium.webdriver.remote.webelement.WebElement (session="a1b039bc947657bca5134b768aec05c2", element="0.9619639072702169-1")>
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
input_first=browser.find_element(By.ID,'q') #通過id=q 找到淘寶首頁的搜尋框
print(input_first)
browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="0bc474ae4ceba89ddd11f3cb0f2dfd8a", element="0.06406167083782055-1")>

查詢多個元素

通過不同方法查詢多個元素

  • find_elements_by_id
  • find_elements_by_name
  • find_elements_by_xpath
  • find_elements_by_link_text
  • find_elements_by_partial_link_text
  • find_elements_tab_name
  • find_elements_by_class_name
  • find_elements_by_css_selector

以上方法都可以通過find_elements()這個方法,傳入型別然後查詢,如 find_elements_by_css_selector('.service-bd li') 等於browser.find_elements(By.CSS_SELECTOR,'.service-bd li')

from selenium import webdriver
from selenium.webdriver.common.by import By

browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
lis=browser.find_elements_by_css_selector('.service-bd li') #通過id=q 找到淘寶首頁的搜尋框
print(lis)
browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-1")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-2")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-3")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-4")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-5")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-6")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-7")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-8")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-9")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-10")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-11")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-12")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-13")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-14")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-15")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-16")>]
from selenium import webdriver
from selenium.webdriver.common.by import By

browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
lis=browser.find_elements(By.CSS_SELECTOR,'.service-bd li') #通過id=q 找到淘寶首頁的搜尋框
print(lis)
browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-1")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-2")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-3")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-4")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-5")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-6")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-7")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-8")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-9")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-10")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-11")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-12")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-13")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-14")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-15")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-16")>]

元素互動操作

from selenium import webdriver
from selenium.webdriver.common.by import By
import time

browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("https://www.taobao.com")
    input = browser.find_element_by_id("q")
    input.send_keys("iphone")
    time.sleep(2)
    input.clear()
    input.send_keys("ipad")
    button = browser.find_element_by_class_name('btn-search')
    button.click()
#     input.send_keys(Keys.ENTER) #輸入回車,等於上面的找到搜尋按鈕然後點選
finally:
#     browser.close() #最後關閉瀏覽器
    pass

動作互動

from selenium import webdriver
from selenium.webdriver import ActionChains


browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.runoob.com/try/try.php?filename=jqueryui-example-draggable-sortable")
    browser.switch_to.frame('iframeResult') #切換到  iframeResult  frame
    source = browser.find_element_by_id('draggable')
    target = browser.find_element_by_class_name('ui-state-default')
    actions = ActionChains(browser)
    actions.drag_and_drop(source,target)  #從source元素拖動到target元素
    actions.perform()
#     input.send_keys(Keys.ENTER) #輸入回車,等於上面的找到搜尋按鈕然後點選
finally:
        pass
#     browser.close() #最後關閉瀏覽器  

執行JavaScript

from selenium import webdriver
from selenium.webdriver import ActionChains


browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    browser.execute_script('alert("To Bottom")')
finally:
        pass

獲取元素資訊

獲取屬性

from selenium import webdriver
from selenium.webdriver import ActionChains


browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    logo=browser.find_element_by_class_name('zu-top-link-logo')
    print(logo)
    print(logo.get_attribute('class'))
finally:

    browser.close()   #   browser.close() #最後關閉瀏覽器 
<selenium.webdriver.remote.webelement.WebElement (session="93f61de89e09fc0f7748716b40f70e21", element="0.8305888123519967-1")>
zu-top-link-logo

獲取文字值

from selenium import webdriver
from selenium.webdriver import ActionChains


browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    input=browser.find_element_by_class_name('zu-top-add-question')
    print(input)
    print(input.text)
finally:

    browser.close()   #   browser.close() #最後關閉瀏覽器 
<selenium.webdriver.remote.webelement.WebElement (session="570d4c81969d967c3b223c44960a4888", element="0.8082889744217834-1")>
提問

獲取ID,位置標籤名,大小

from selenium import webdriver
from selenium.webdriver import ActionChains


browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    input=browser.find_element_by_class_name('zu-top-add-question')
    print(input)
    print(input.id)
    print(input.location)
    print(input.tag_name)
    print(input.size)
finally:

    browser.close()   #   browser.close() #最後關閉瀏覽器 
<selenium.webdriver.remote.webelement.WebElement (session="baec40a386c71d25473a05bd73cc8e4b", element="0.11839270092527143-1")>
0.11839270092527143-1
{'x': 675, 'y': 7}
button
{'height': 32, 'width': 66}

Frame

如果存在多個Frame,需要切換元素所在Frame才能查詢到該元素

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException
browser = webdriver.Chrome()  # 申明一個瀏覽器物件
try:
    browser.get("http://www.runoob.com/try/try.php?filename=jqueryui-example-draggable-sortable")
    browser.switch_to.frame('iframeResult')  # 切換到  iframeResult  frame
    source = browser.find_element_by_id('draggable') #查詢iframeResult  frame 裡面的元素
    print(source)
    try:
        logo = browser.find_element_by_class_name('navbar-header') #查詢父frame裡面的元素,這時候沒有切換到父 frame裡面,所以找不到
    except NoSuchElementException as e:
        print("NO LOGO", e)
    browser.switch_to.parent_frame() #切換frame 到父frame
    logo = browser.find_element_by_class_name('navbar-header') #找父frame裡面的logo 元素
    print(logo.text)
finally:

    browser.close()  # 最後關閉瀏覽器 
<selenium.webdriver.remote.webelement.WebElement (session="5fd00176e76798b0c045e6ade384fe20", element="0.09543158896097048-1")>
NO LOGO Message: no such element: Unable to locate element: {"method":"class name","selector":"navbar-header"}
  (Session info: chrome=69.0.3497.81)
  (Driver info: chromedriver=2.40.565498 (ea082db3280dd6843ebfb08a625e3eb905c4f5ab),platform=Windows NT 10.0.17134 x86_64)

RUNOOB.COM

等待(Wait)

隱式等待

當查詢元素時,沒有立即找到,指定等待時間,時間到了才丟擲沒有找到元素異常

from selenium import webdriver
from selenium.webdriver import ActionChains


browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    input=browser.find_element_by_class_name('zu-top-add-question')
    print(input)
    print(input.text)
finally:

    browser.close()   #   browser.close() #最後關閉瀏覽器 

顯示等待

設定一個等待條件,請求會一直到等到條件成立,或者超過最長等待時間

顯示等待條件:

顯示等待條件

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC




browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("https://www.taobao.com")
    wait = WebDriverWait(browser,10) #設定最長等待時間

    input = wait.until(EC.presence_of_element_located((By.ID,'q'))) #通過id查詢id=q的元素,直到q出現,然後賦值給input
    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search'))) # 通過class查詢class=btn-search的元素,直到這個元素可點選,然後賦值給button
    print(input,button)
finally:

    browser.close()   #   browser.close() #最後關閉瀏覽器 
<selenium.webdriver.remote.webelement.WebElement (session="1dbcebc0b7abe4ce6a6f0802bd3d5a3d", element="0.6428976188597413-1")> <selenium.webdriver.remote.webelement.WebElement (session="1dbcebc0b7abe4ce6a6f0802bd3d5a3d", element="0.6428976188597413-2")>

前進後退

from selenium import webdriver
import time

browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    browser.get("https://www.baidu.com")
    browser.get("https://www.taobao.com")
    browser.back()
    time.seleep(1)
    browser.forward()
finally:
    browser.close()   #   browser.close() #最後關閉瀏覽器 
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536725947.916282, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': 'e0a07617c1a38385364125951b19eef8'}, {'domain': '.zhihu.com', 'expiry': 1631333050.496658, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AKBlVIEQMw6PTmRCyZB5vxE2JsdNH6ByB9k=|1536725047"'}, {'domain': '.zhihu.com', 'expiry': 1614485047.916317, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'vPmHo9u7YgHgueSf7uvzgghNZb5p7AKw'}, {'domain': '.zhihu.com', 'expiry': 1536726851, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725051'}, {'domain': '.zhihu.com', 'expiry': 1631333048.326273, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '63dd99ed3e364f8db4c0798e46d8d106|1536725045000|1536725045000'}, {'domain': '.zhihu.com', 'expiry': 1539317048.3263, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjRiZDAyMzJjNTRmNDg0MGE1NDIzNTU1MGM4MTQwZGU=|1536725045|056022ba325021c0be3df388c98dd79d453b3f35"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326314, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NDNjMmNkOTVjM2I4NGMxMzlmNjdmYmRhODVjNzJhOTc=|1536725045|fb5eb8ddc8e7a0b50ca130320c77ef6f03697d81"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326327, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"YzY0OTRlM2ZiZDQ3NDg2OGFhODE4N2Y4ZGJhZjM5OTU=|1536725045|319ba8485432513dd385a70a8af596e955be2672"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '232a387f-66f0-4000-81eb-e0935f0c6b75'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1640535724.1536725051.1536725051.1536725051.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493051, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725051.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}]
新增cookie後: [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536725947.916282, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': 'e0a07617c1a38385364125951b19eef8'}, {'domain': '.zhihu.com', 'expiry': 1631333050.496658, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AKBlVIEQMw6PTmRCyZB5vxE2JsdNH6ByB9k=|1536725047"'}, {'domain': '.zhihu.com', 'expiry': 1614485047.916317, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'vPmHo9u7YgHgueSf7uvzgghNZb5p7AKw'}, {'domain': '.zhihu.com', 'expiry': 1536726851, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725051'}, {'domain': '.zhihu.com', 'expiry': 1631333048.326273, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '63dd99ed3e364f8db4c0798e46d8d106|1536725045000|1536725045000'}, {'domain': '.zhihu.com', 'expiry': 1539317048.3263, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjRiZDAyMzJjNTRmNDg0MGE1NDIzNTU1MGM4MTQwZGU=|1536725045|056022ba325021c0be3df388c98dd79d453b3f35"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326314, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NDNjMmNkOTVjM2I4NGMxMzlmNjdmYmRhODVjNzJhOTc=|1536725045|fb5eb8ddc8e7a0b50ca130320c77ef6f03697d81"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326327, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"YzY0OTRlM2ZiZDQ3NDg2OGFhODE4N2Y4ZGJhZjM5OTU=|1536725045|319ba8485432513dd385a70a8af596e955be2672"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '232a387f-66f0-4000-81eb-e0935f0c6b75'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1640535724.1536725051.1536725051.1536725051.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493051, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725051.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}]
刪除cookies後: []

Cookies

from selenium import webdriver
import time

browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    print(browser.get_cookies()) #獲取cookie
    browser.add_cookie({  #新增cookie
        'name':'name',
        "domain":"www.baidu.com",
        'value':'domain'
    })
    print("新增cookie後:",browser.get_cookies())
    browser.delete_all_cookies() #刪除cookie
    print("刪除cookies後:",browser.get_cookies())
finally:
    pass
#     browser.close()   #   browser.close() #最後關閉瀏覽器
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536726038.320244, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '931b604f0432b1e60014973b6cd4c7bc'}, {'domain': '.zhihu.com', 'expiry': 1631333140.884868, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AEDkmNkQMw6PTubplz3j5qiyQpyHUtkT0R0=|1536725137"'}, {'domain': '.zhihu.com', 'expiry': 1614485138.320367, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'EweYCl0UbeiOvsKxuFyVvpJ53SDX3EKN'}, {'domain': '.zhihu.com', 'expiry': 1536726941, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725141'}, {'domain': '.zhihu.com', 'expiry': 1631333138.700594, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '07261380ae7649b8a4abadd396afa901|1536725135000|1536725135000'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700726, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjA1ODJlMmE1YTljNDZiODgxYTBkNDgzNGFlMjZlMWM=|1536725135|cfbcc1f15711101e2ccbcc9bcb11e8d94312bedf"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700823, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NjcyMDUzM2UxNjc4NGViMTk5NTAxMWVlMzJjNzg4NzM=|1536725135|031f539a6b560c49f51881167ec9abb8b4e96b2b"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700889, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"ZDQ1OTE1NjFkZTFjNDkwNTkwY2IxYjRkMDk3MWI1NzQ=|1536725135|0b53e0ec51816829dd82b0e817d6c66e6aa8ea9a"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1778781766.1536725141.1536725141.1536725141.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493141, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725141.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c6014597-8821-4b60-a8b5-c3c9ed2893c6'}]
新增cookie後: [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536726038.320244, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '931b604f0432b1e60014973b6cd4c7bc'}, {'domain': '.zhihu.com', 'expiry': 1631333140.884868, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AEDkmNkQMw6PTubplz3j5qiyQpyHUtkT0R0=|1536725137"'}, {'domain': '.zhihu.com', 'expiry': 1614485138.320367, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'EweYCl0UbeiOvsKxuFyVvpJ53SDX3EKN'}, {'domain': '.zhihu.com', 'expiry': 1536726941, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725141'}, {'domain': '.zhihu.com', 'expiry': 1631333138.700594, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '07261380ae7649b8a4abadd396afa901|1536725135000|1536725135000'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700726, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjA1ODJlMmE1YTljNDZiODgxYTBkNDgzNGFlMjZlMWM=|1536725135|cfbcc1f15711101e2ccbcc9bcb11e8d94312bedf"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700823, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NjcyMDUzM2UxNjc4NGViMTk5NTAxMWVlMzJjNzg4NzM=|1536725135|031f539a6b560c49f51881167ec9abb8b4e96b2b"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700889, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"ZDQ1OTE1NjFkZTFjNDkwNTkwY2IxYjRkMDk3MWI1NzQ=|1536725135|0b53e0ec51816829dd82b0e817d6c66e6aa8ea9a"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1778781766.1536725141.1536725141.1536725141.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493141, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725141.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c6014597-8821-4b60-a8b5-c3c9ed2893c6'}]
刪除cookies後: []

選項卡管理

from selenium import webdriver
import time

browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
    browser.execute_script('window.open()') #新開一個選項卡
    print(browser.window_handles) #獲取所有選項卡
    browser.switch_to_window(browser.window_handles[1]) #切換到第二個選項卡
    browser.get("https://www.baidu.com") #切換到第二個選擇卡後請求百度
    browser.switch_to_window(browser.window_handles[0]) #切換到第一個選項卡
    browser.get("https://www.taobao.com") #切換到第一個選項卡後請求淘寶

finally:
    pass
#     browser.close()   #   browser.close() #最後關閉瀏覽器
['CDwindow-540B3F1BBD7773487CAEA1C7BD9906FB', 'CDwindow-7765F4165D06DA78757475A7D3E29400']

異常處理

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException,TimeoutException

browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
    browser.get("http://www.zhihu.com/explore")
except TimeoutException: #捕獲超時異常
    print("Time OUT!")
try: 
    browser.find_element_by_id("hello") #通過id查詢id時hello的元素
except NoSuchElementException: #捕獲元素找不到異常
    print("Not find element!")
finally:
#     pass
    browser.close()   #   browser.close() #最後關閉瀏覽器
Not find element!

1