學習selenium寫的例子,就當是學習記錄吧
阿新 • • 發佈:2018-12-12
from selenium import webdriver from selenium.webdriver.remote import webelement from selenium.common.exceptions import TimeoutException from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0 from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0 from selenium.webdriver.chrome.options import Options import time import json chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') # Create a new instance of the Firefox driver browser = webdriver.Chrome(chrome_options=chrome_options) #browser = webdriver.Chrome() browser.maximize_window() # open baidu.com url='xxxx' browser.get(url) browser.current_window_handle # sleep 2 secs #time.sleep(2) # clean the enter text password browser.find_element_by_id("username").clear() browser.find_element_by_id("password").clear() # enter something browser.find_element_by_id("username").send_keys("username") browser.find_element_by_id("password").send_keys("123456") # 登入 browser.find_element_by_class_name('login-button').click() iframe = browser.find_element_by_xpath("//iframe") browser.switch_to.frame(iframe) # 開始使用 browser.find_element_by_tag_name('a').click() browser.switch_to.default_content() time.sleep(1) # browser.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/div[1]/ul[1]/li[6]").click() time.sleep(1) moniframe=browser.find_element_by_xpath("/html/body/div[3]/div[1]/div[1]/div[3]/iframe[1]") browser.switch_to.frame(moniframe) #最大化 browser.find_element_by_xpath("/html/body/div[1]/div[1]/div[1]/ul[1]/li[2]").click() #獲取thead列印頭 heads=browser.find_element_by_xpath("/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/table[1]").text while(1): #tbody datatable=browser.find_element_by_xpath("/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[2]/table[1]/tbody[1]") datalist =str(datatable.text).splitlines() headlist=str(heads).split(' ') for i in range(0,len(datalist)): dt=datalist[i].split(' ') #合併為dict datadict = dict(zip(headlist,dt)) #轉為json datajson=json.dumps(datadict,ensure_ascii=False) print(type(datadict),datajson) time.sleep(1) try: # we have to wait for the page to refresh, the last thing that seems to be updated is the title WebDriverWait(browser, 10000).until(EC.title_contains("selenium")) # You should see "selenium - 百度搜索" browser.title finally: browser.quit()