python selenium webdriver 手冊文件
python selenium webdriver 手冊文件
1.安裝與配置
pip install selenium
基本使用selenium都是為了動態載入網頁內容用於爬蟲,所以一般也會用到phantomjs
mac下如果要配置phantomjs環境的話
echo $PATH
ln -s
至於chromeDriver,配置方法類似,下載地址:
https://npm.taobao.org/mirrors/chromedriver/
2.程式碼樣例
複製程式碼
#!/usr/bin/env Python
# coding=utf-8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
keyword = '家有'.decode('utf-8')
chrome_options = webdriver.ChromeOptions()
# chrome_options.binary_location = "C:\\Program Files (x86)\\Google\\Application\\chrome.exe"
# chrome_options.add_argument('--user-agent=iphone')
# chrome_options.add_argument('--proxy-server=http://61.155.164.110:3128')
#driver = webdriver.Ie()
#driver = webdriver.Firefox()
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get('http://www.baidu.com')
driver.find_element_by_id('kw').clear()
time.sleep(1)
driver.find_element_by_id('kw').send_keys(keyword)
time.sleep(3)
#driver.find_element_by_id('su').send_keys(Keys.ENTER)
driver.find_element_by_id('su').click()
print driver.title
# driver.quit()
複製程式碼
3.api速查
3.1定位元素
3.1.1 通過id查詢:
element = driver.find_element_by_id("coolestWidgetEvah")
or
from selenium.webdriver.common.by import By
element = driver.find_element(by=By.ID, value="coolestWidgetEvah")
3.1.2 通過class查詢
cheeses = driver.find_elements_by_class_name("cheese")
or
from selenium.webdriver.common.by import By
cheeses = driver.find_elements(By.CLASS_NAME, "cheese")
3.1.3 通過標籤名稱查詢
target_div = driver.find_element_by_tag_name("div")
or
from selenium.webdriver.common.by import By
target_div = driver.find_element(By.TAG_NAME, "div")
3.1.4 通過name屬性查詢
btn = driver.find_element_by_name("input_btn")
or
from selenium.webdriver.common.by import By
btn = driver.find_element(By.NAME, "input_btn")
3.1.5 通過連結的內容查詢
next_page = driver.find_element_by_link_text("下一頁")
or
from selenium.webdriver.common.by import By
next_page = driver.find_element(By.LINK_TEXT, "下一頁")
3.1.6 通過連結的部分內容查詢
next_page = driver.find_element_by_partial_link_text("去下一頁")
or
from selenium.webdriver.common.by import By
next_page = driver.find_element(By.PARTIAL_LINK_TEXT, "下一頁")
3.1.7 通過css查詢
cheese = driver.find_element_by_css_selector("#food span.dairy.aged")
or
from selenium.webdriver.common.by import By
cheese = driver.find_element(By.CSS_SELECTOR, "#food span.dairy.aged")
3.1.8 通過xpath查詢
inputs = driver.find_elements_by_xpath("//input")
or
from selenium.webdriver.common.by import By
inputs = driver.find_elements(By.XPATH, "//input")
3.1.9 通過js查詢
labels = driver.find_elements_by_tag_name("label")
inputs = driver.execute_script(
"var labels = arguments[0], inputs = []; for (var i=0; i < labels.length; i++){" +
"inputs.push(document.getElementByIdx_x_x(labels[i].getAttribute('for'))); } return inputs;", labels)
3.2 獲取元素的文字資訊
element = driver.find_element_by_id("element_id")
element.text
3.3 修改userAgent
profile = webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", "some UA string")
driver = webdriver.Firefox(profile)
3.4 cookies
複製程式碼
# Go to the correct domain
driver.get("http://www.example.com")
# Now set the cookie. Here's one for the entire domain
# the cookie name here is 'key' and its value is 'value'
driver.add_cookie({'name':'key', 'value':'value', 'path':'/'})
# additional keys that can be passed in are:
# 'domain' -> String,
# 'secure' -> Boolean,
# 'expiry' -> Milliseconds since the Epoch it should expire.
# And now output all the available cookies for the current URL
for cookie in driver.get_cookies():
print "%s -> %s" % (cookie['name'], cookie['value'])
# You can delete cookies in 2 ways
# By name
driver.delete_cookie("CookieName")
# Or all of them
driver.delete_all_cookies()
最後放一個自己的程式碼樣例好了,完成的功能為找到搜尋框輸入搜尋關鍵詞然後點選搜尋按鈕,然後開啟每個搜尋結果並且輸出網頁原始碼
# coding=utf-8
import time
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
# Create a new instance of the Firefox driver
driver = webdriver.Chrome()
# go to the home page
driver.get("http://www.baidu.com")
#獲得當前視窗控制代碼
nowhandle = driver.current_window_handle
print driver.title
# find the element that's name attribute is qymc (the search box)
inputElement = driver.find_element_by_name("qymc")
print inputElement
# type in the search
inputElement.send_keys(u"加油網")
driver.find_element_by_name("imageField").click();
# submit the form (compare with google we can found that the search is not a standard form and can not be submitted, we do click instead)
# inputElement.submit()
try:
# overlap will happen if we do not move the page to the bottom
# the last link will be under another unrelevant link if we do not scroll to the bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#find all link and click them
for item in driver.find_elements_by_xpath('//*[@id="pagetest2"]/div/table/tbody/tr/td/a'):
item.click()
time.sleep(10)
#獲取所有視窗控制代碼
allhandles=driver.window_handles
#在所有視窗中查詢新開的視窗
for handle in allhandles:
if handle!=nowhandle:
#這兩步是在彈出視窗中進行的操作,證明我們確實進入了
driver.switch_to_window(handle)
print driver.page_source
#返回到主視窗頁面
driver.switch_to_window(nowhandle)
finally:
driver.quit()