Python爬蟲之Selenium
阿新 • • 發佈:2018-12-09
目錄
安裝
- 安裝selenium
pip install selenium
安裝webdriver
- 去
https://blog.csdn.net/huilan_same/article/details/51896672
檢視webdriver對應的chrome版本 - 去
http://chromedriver.storage.googleapis.com/index.html
下載對應版本的webdriver - 把下載後的exe執行檔案放入chrome的安裝目錄
C:\Program Files (x86)\Google\Chrome\Application
- 配置chrome的環境變數,在使用者path裡面新增
C:\Program Files (x86)\Google\Chrome\Application
- 去
基本使用
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
browser = webdriver.Chrome() #申明一個瀏覽器物件
try :
browser.get("https://www.baidu.com")
input = browser.find_element_by_id("kw")
input.send_keys("Python")
input.send_keys(Keys.ENTER)
wait = WebDriverWait(browser,10)
wait.until(EC.presence_of_element_located((By.ID,'content_left')))
print(browser.current_url) #當前url
print(browser.get_cookies())#獲取cookie,列表形式返回
# print(browser.page_source) #當前網頁原始碼
finally:
browser.close() #最後關閉瀏覽器
https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=a3409bcb00003f3f&rsv_t=2630W3R3HMFalhk4MYTvROD2e%2BPuHi9tvbMV3V75Hskz3DDFm2FmVur6%2FFI&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=87&rsv_sug4=87
[{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': ''}, {'domain': '.baidu.com', 'expiry': 3684140058.697825, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': 'E50F8737E658B64766891FD7D8BFC790:FG=1'}, {'domain': '.baidu.com', 'expiry': 3684140058.69787, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': 'E50F8737E658B64766891FD7D8BFC790'}, {'domain': '.baidu.com', 'expiry': 3684140058.697893, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1536656410'}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': '3'}, {'domain': 'www.baidu.com', 'expiry': 2482736412.427581, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'expiry': 1537520412, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.baidu.com', 'expiry': 1536659005, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'b48eag8Y1Gfk0tqVgKhSeRYI9d2khxoJBTDJUGDy5N%2Fhy%2BpmeQrliwqdEKI'}]
宣告瀏覽器物件
支援多種瀏覽器,以下對應谷歌,火狐,edge,蘋果瀏覽器
from selenium import webdriver
browser = webdriver.Chrome()
browser = webdriver.FireFox()
browser = webdriver.Edge()
browser = webdriver.Safari()
訪問頁面
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
# print(browser.page_source)
browser.close()
查詢元素
單個元素
通過不同方法查詢元素
- find_element_by_id
- find_element_by_name
- find_element_by_xpath
- find_element_by_link_text
- find_element_by_partial_link_text
- find_element_tab_name
- find_element_by_class_name
- find_element_by_css_selector
以上方法都可以通過find_element()
這個方法,傳入型別然後查詢,如
find_element(By.ID,'q')
等於find_element_by_id('q')
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
input_first=browser.find_element_by_id('q') #通過id=q 找到淘寶首頁的搜尋框
input_second=browser.find_element_by_css_selector('#q') #通過id=q 找到淘寶首頁的搜尋框
input_third=browser.find_element_by_xpath('//*[@id="q"]') #通過id=q 找到淘寶首頁的搜尋框
print(input_first,input_second,input_third)
browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="a1b039bc947657bca5134b768aec05c2", element="0.9619639072702169-1")> <selenium.webdriver.remote.webelement.WebElement (session="a1b039bc947657bca5134b768aec05c2", element="0.9619639072702169-1")> <selenium.webdriver.remote.webelement.WebElement (session="a1b039bc947657bca5134b768aec05c2", element="0.9619639072702169-1")>
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
input_first=browser.find_element(By.ID,'q') #通過id=q 找到淘寶首頁的搜尋框
print(input_first)
browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="0bc474ae4ceba89ddd11f3cb0f2dfd8a", element="0.06406167083782055-1")>
查詢多個元素
通過不同方法查詢多個元素
- find_elements_by_id
- find_elements_by_name
- find_elements_by_xpath
- find_elements_by_link_text
- find_elements_by_partial_link_text
- find_elements_tab_name
- find_elements_by_class_name
- find_elements_by_css_selector
以上方法都可以通過find_elements()
這個方法,傳入型別然後查詢,如
find_elements_by_css_selector('.service-bd li')
等於browser.find_elements(By.CSS_SELECTOR,'.service-bd li')
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
lis=browser.find_elements_by_css_selector('.service-bd li') #通過id=q 找到淘寶首頁的搜尋框
print(lis)
browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-1")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-2")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-3")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-4")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-5")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-6")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-7")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-8")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-9")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-10")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-11")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-12")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-13")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-14")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-15")>, <selenium.webdriver.remote.webelement.WebElement (session="8806cb12ae2d7f0a094d752379009d56", element="0.4126457324823489-16")>]
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
lis=browser.find_elements(By.CSS_SELECTOR,'.service-bd li') #通過id=q 找到淘寶首頁的搜尋框
print(lis)
browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-1")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-2")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-3")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-4")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-5")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-6")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-7")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-8")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-9")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-10")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-11")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-12")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-13")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-14")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-15")>, <selenium.webdriver.remote.webelement.WebElement (session="be1cdd445ae6ab42872348d5a2cef0a0", element="0.0207270034860374-16")>]
元素互動操作
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("https://www.taobao.com")
input = browser.find_element_by_id("q")
input.send_keys("iphone")
time.sleep(2)
input.clear()
input.send_keys("ipad")
button = browser.find_element_by_class_name('btn-search')
button.click()
# input.send_keys(Keys.ENTER) #輸入回車,等於上面的找到搜尋按鈕然後點選
finally:
# browser.close() #最後關閉瀏覽器
pass
動作互動
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.runoob.com/try/try.php?filename=jqueryui-example-draggable-sortable")
browser.switch_to.frame('iframeResult') #切換到 iframeResult frame
source = browser.find_element_by_id('draggable')
target = browser.find_element_by_class_name('ui-state-default')
actions = ActionChains(browser)
actions.drag_and_drop(source,target) #從source元素拖動到target元素
actions.perform()
# input.send_keys(Keys.ENTER) #輸入回車,等於上面的找到搜尋按鈕然後點選
finally:
pass
# browser.close() #最後關閉瀏覽器
執行JavaScript
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')
finally:
pass
獲取元素資訊
獲取屬性
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
logo=browser.find_element_by_class_name('zu-top-link-logo')
print(logo)
print(logo.get_attribute('class'))
finally:
browser.close() # browser.close() #最後關閉瀏覽器
<selenium.webdriver.remote.webelement.WebElement (session="93f61de89e09fc0f7748716b40f70e21", element="0.8305888123519967-1")>
zu-top-link-logo
獲取文字值
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
input=browser.find_element_by_class_name('zu-top-add-question')
print(input)
print(input.text)
finally:
browser.close() # browser.close() #最後關閉瀏覽器
<selenium.webdriver.remote.webelement.WebElement (session="570d4c81969d967c3b223c44960a4888", element="0.8082889744217834-1")>
提問
獲取ID,位置標籤名,大小
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
input=browser.find_element_by_class_name('zu-top-add-question')
print(input)
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)
finally:
browser.close() # browser.close() #最後關閉瀏覽器
<selenium.webdriver.remote.webelement.WebElement (session="baec40a386c71d25473a05bd73cc8e4b", element="0.11839270092527143-1")>
0.11839270092527143-1
{'x': 675, 'y': 7}
button
{'height': 32, 'width': 66}
Frame
如果存在多個Frame,需要切換元素所在Frame才能查詢到該元素
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException
browser = webdriver.Chrome() # 申明一個瀏覽器物件
try:
browser.get("http://www.runoob.com/try/try.php?filename=jqueryui-example-draggable-sortable")
browser.switch_to.frame('iframeResult') # 切換到 iframeResult frame
source = browser.find_element_by_id('draggable') #查詢iframeResult frame 裡面的元素
print(source)
try:
logo = browser.find_element_by_class_name('navbar-header') #查詢父frame裡面的元素,這時候沒有切換到父 frame裡面,所以找不到
except NoSuchElementException as e:
print("NO LOGO", e)
browser.switch_to.parent_frame() #切換frame 到父frame
logo = browser.find_element_by_class_name('navbar-header') #找父frame裡面的logo 元素
print(logo.text)
finally:
browser.close() # 最後關閉瀏覽器
<selenium.webdriver.remote.webelement.WebElement (session="5fd00176e76798b0c045e6ade384fe20", element="0.09543158896097048-1")>
NO LOGO Message: no such element: Unable to locate element: {"method":"class name","selector":"navbar-header"}
(Session info: chrome=69.0.3497.81)
(Driver info: chromedriver=2.40.565498 (ea082db3280dd6843ebfb08a625e3eb905c4f5ab),platform=Windows NT 10.0.17134 x86_64)
RUNOOB.COM
等待(Wait)
隱式等待
當查詢元素時,沒有立即找到,指定等待時間,時間到了才丟擲沒有找到元素異常
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
input=browser.find_element_by_class_name('zu-top-add-question')
print(input)
print(input.text)
finally:
browser.close() # browser.close() #最後關閉瀏覽器
顯示等待
設定一個等待條件,請求會一直到等到條件成立,或者超過最長等待時間
顯示等待條件:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("https://www.taobao.com")
wait = WebDriverWait(browser,10) #設定最長等待時間
input = wait.until(EC.presence_of_element_located((By.ID,'q'))) #通過id查詢id=q的元素,直到q出現,然後賦值給input
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search'))) # 通過class查詢class=btn-search的元素,直到這個元素可點選,然後賦值給button
print(input,button)
finally:
browser.close() # browser.close() #最後關閉瀏覽器
<selenium.webdriver.remote.webelement.WebElement (session="1dbcebc0b7abe4ce6a6f0802bd3d5a3d", element="0.6428976188597413-1")> <selenium.webdriver.remote.webelement.WebElement (session="1dbcebc0b7abe4ce6a6f0802bd3d5a3d", element="0.6428976188597413-2")>
前進後退
from selenium import webdriver
import time
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
browser.get("https://www.baidu.com")
browser.get("https://www.taobao.com")
browser.back()
time.seleep(1)
browser.forward()
finally:
browser.close() # browser.close() #最後關閉瀏覽器
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536725947.916282, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': 'e0a07617c1a38385364125951b19eef8'}, {'domain': '.zhihu.com', 'expiry': 1631333050.496658, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AKBlVIEQMw6PTmRCyZB5vxE2JsdNH6ByB9k=|1536725047"'}, {'domain': '.zhihu.com', 'expiry': 1614485047.916317, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'vPmHo9u7YgHgueSf7uvzgghNZb5p7AKw'}, {'domain': '.zhihu.com', 'expiry': 1536726851, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725051'}, {'domain': '.zhihu.com', 'expiry': 1631333048.326273, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '63dd99ed3e364f8db4c0798e46d8d106|1536725045000|1536725045000'}, {'domain': '.zhihu.com', 'expiry': 1539317048.3263, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjRiZDAyMzJjNTRmNDg0MGE1NDIzNTU1MGM4MTQwZGU=|1536725045|056022ba325021c0be3df388c98dd79d453b3f35"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326314, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NDNjMmNkOTVjM2I4NGMxMzlmNjdmYmRhODVjNzJhOTc=|1536725045|fb5eb8ddc8e7a0b50ca130320c77ef6f03697d81"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326327, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"YzY0OTRlM2ZiZDQ3NDg2OGFhODE4N2Y4ZGJhZjM5OTU=|1536725045|319ba8485432513dd385a70a8af596e955be2672"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '232a387f-66f0-4000-81eb-e0935f0c6b75'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1640535724.1536725051.1536725051.1536725051.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493051, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725051.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}]
新增cookie後: [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536725947.916282, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': 'e0a07617c1a38385364125951b19eef8'}, {'domain': '.zhihu.com', 'expiry': 1631333050.496658, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AKBlVIEQMw6PTmRCyZB5vxE2JsdNH6ByB9k=|1536725047"'}, {'domain': '.zhihu.com', 'expiry': 1614485047.916317, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'vPmHo9u7YgHgueSf7uvzgghNZb5p7AKw'}, {'domain': '.zhihu.com', 'expiry': 1536726851, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725051'}, {'domain': '.zhihu.com', 'expiry': 1631333048.326273, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '63dd99ed3e364f8db4c0798e46d8d106|1536725045000|1536725045000'}, {'domain': '.zhihu.com', 'expiry': 1539317048.3263, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjRiZDAyMzJjNTRmNDg0MGE1NDIzNTU1MGM4MTQwZGU=|1536725045|056022ba325021c0be3df388c98dd79d453b3f35"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326314, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NDNjMmNkOTVjM2I4NGMxMzlmNjdmYmRhODVjNzJhOTc=|1536725045|fb5eb8ddc8e7a0b50ca130320c77ef6f03697d81"'}, {'domain': '.zhihu.com', 'expiry': 1539317048.326327, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"YzY0OTRlM2ZiZDQ3NDg2OGFhODE4N2Y4ZGJhZjM5OTU=|1536725045|319ba8485432513dd385a70a8af596e955be2672"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '232a387f-66f0-4000-81eb-e0935f0c6b75'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1640535724.1536725051.1536725051.1536725051.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493051, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725051.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797051, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}]
刪除cookies後: []
Cookies
from selenium import webdriver
import time
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
print(browser.get_cookies()) #獲取cookie
browser.add_cookie({ #新增cookie
'name':'name',
"domain":"www.baidu.com",
'value':'domain'
})
print("新增cookie後:",browser.get_cookies())
browser.delete_all_cookies() #刪除cookie
print("刪除cookies後:",browser.get_cookies())
finally:
pass
# browser.close() # browser.close() #最後關閉瀏覽器
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536726038.320244, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '931b604f0432b1e60014973b6cd4c7bc'}, {'domain': '.zhihu.com', 'expiry': 1631333140.884868, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AEDkmNkQMw6PTubplz3j5qiyQpyHUtkT0R0=|1536725137"'}, {'domain': '.zhihu.com', 'expiry': 1614485138.320367, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'EweYCl0UbeiOvsKxuFyVvpJ53SDX3EKN'}, {'domain': '.zhihu.com', 'expiry': 1536726941, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725141'}, {'domain': '.zhihu.com', 'expiry': 1631333138.700594, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '07261380ae7649b8a4abadd396afa901|1536725135000|1536725135000'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700726, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjA1ODJlMmE1YTljNDZiODgxYTBkNDgzNGFlMjZlMWM=|1536725135|cfbcc1f15711101e2ccbcc9bcb11e8d94312bedf"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700823, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NjcyMDUzM2UxNjc4NGViMTk5NTAxMWVlMzJjNzg4NzM=|1536725135|031f539a6b560c49f51881167ec9abb8b4e96b2b"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700889, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"ZDQ1OTE1NjFkZTFjNDkwNTkwY2IxYjRkMDk3MWI1NzQ=|1536725135|0b53e0ec51816829dd82b0e817d6c66e6aa8ea9a"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1778781766.1536725141.1536725141.1536725141.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493141, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725141.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c6014597-8821-4b60-a8b5-c3c9ed2893c6'}]
新增cookie後: [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1536726038.320244, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '931b604f0432b1e60014973b6cd4c7bc'}, {'domain': '.zhihu.com', 'expiry': 1631333140.884868, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AEDkmNkQMw6PTubplz3j5qiyQpyHUtkT0R0=|1536725137"'}, {'domain': '.zhihu.com', 'expiry': 1614485138.320367, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'EweYCl0UbeiOvsKxuFyVvpJ53SDX3EKN'}, {'domain': '.zhihu.com', 'expiry': 1536726941, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1536725141'}, {'domain': '.zhihu.com', 'expiry': 1631333138.700594, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '07261380ae7649b8a4abadd396afa901|1536725135000|1536725135000'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700726, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjA1ODJlMmE1YTljNDZiODgxYTBkNDgzNGFlMjZlMWM=|1536725135|cfbcc1f15711101e2ccbcc9bcb11e8d94312bedf"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700823, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"NjcyMDUzM2UxNjc4NGViMTk5NTAxMWVlMzJjNzg4NzM=|1536725135|031f539a6b560c49f51881167ec9abb8b4e96b2b"'}, {'domain': '.zhihu.com', 'expiry': 1539317138.700889, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"ZDQ1OTE1NjFkZTFjNDkwNTkwY2IxYjRkMDk3MWI1NzQ=|1536725135|0b53e0ec51816829dd82b0e817d6c66e6aa8ea9a"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1778781766.1536725141.1536725141.1536725141.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1552493141, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1536725141.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180912=1'}, {'domain': '.zhihu.com', 'expiry': 1599797141, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c6014597-8821-4b60-a8b5-c3c9ed2893c6'}]
刪除cookies後: []
選項卡管理
from selenium import webdriver
import time
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
browser.execute_script('window.open()') #新開一個選項卡
print(browser.window_handles) #獲取所有選項卡
browser.switch_to_window(browser.window_handles[1]) #切換到第二個選項卡
browser.get("https://www.baidu.com") #切換到第二個選擇卡後請求百度
browser.switch_to_window(browser.window_handles[0]) #切換到第一個選項卡
browser.get("https://www.taobao.com") #切換到第一個選項卡後請求淘寶
finally:
pass
# browser.close() # browser.close() #最後關閉瀏覽器
['CDwindow-540B3F1BBD7773487CAEA1C7BD9906FB', 'CDwindow-7765F4165D06DA78757475A7D3E29400']
異常處理
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException,TimeoutException
browser = webdriver.Chrome() #申明一個瀏覽器物件
try:
browser.get("http://www.zhihu.com/explore")
except TimeoutException: #捕獲超時異常
print("Time OUT!")
try:
browser.find_element_by_id("hello") #通過id查詢id時hello的元素
except NoSuchElementException: #捕獲元素找不到異常
print("Not find element!")
finally:
# pass
browser.close() # browser.close() #最後關閉瀏覽器
Not find element!
1