1. 程式人生 > 實用技巧 >整理好完美使用webdriver進行模擬驅動,再用BeautifulSoup進行提取

整理好完美使用webdriver進行模擬驅動,再用BeautifulSoup進行提取

樣例程式碼:

 1 from  selenium import webdriver
 2 from selenium.webdriver.common.by import By
 3 import time
 4 
 5 if __name__ =='__main__':
 6     #options=webdriver.ChromeOptions()
 7     #options.binary_location=r'C:\Users\13313\AppData\Local\Google\Chrome\Application\chrome.exe'
 8     driver=webdriver.Chrome('
E:\Google\Driver\chromedriver.exe') 9 #get方法 開啟指定網址 10 driver.get("http://www.baidu.com") 11 #選擇網頁元素 12 elemnt_keyword=driver.find_element_by_id('kw') 13 #element = driver.find_element(by=By.ID, value="kw") 14 15 #輸入搜尋資訊 16 elemnt_keyword.send_keys('博二爺') 17 #找到搜尋按鈕 18
element_search_button=driver.find_element_by_id('su') 19 element_search_button.click() 20 21 #等待,進行爬取。 22 time.sleep(2) 23 ret=driver.find_element_by_id('1') 24 print(ret) 25 #獲取 值 26 print(ret.text) 27 28 #獲取屬性值 29 ele = driver.find_element_by_id("1") 30 print
(ele.get_attribute('href')) 31 32 #這一條的完整資訊 33 ele = driver.find_element_by_id("baidulink") 34 print(ele.get_attribute('outerHTML ')) 35 36 #只獲取內部原始碼 37 ele = driver.find_element_by_id("baidulink") 38 print(ele.get_attribute('innerHTML')) 39 40 ''' 41 div id="food" style="margin-top:10px;color:red"> 42 <span calss="vegetable good">黃瓜</span> 43 <span calss="meat">牛肉</span> 44 <p calss="vegetable">南瓜</p> 45 <p calss="vegetable">青菜</p> 46 47 程式碼解決辦法: 48 ele = driver.find_element_by_id('food') 49 foodText = ele.get_attribute('innerHTML') 50 1, 51 ret1 = foodText.split('</span>)[1] 52 ret2 = ret1.split('"')[1] 53 2, 54 ele = driver.find_element_by_id('food') 55 html= ele.get_Attribute('innerHTML) 56 from bs4 import BeautifulSoup 57 soup = BeautifulSoup(html,'html5lib') 58 target = soup.find_all('span')[1]['class']#列表 59 print(taget) 60 61 62 獲取屬性值:print(soup.find('a')['class']) 63 獲取文字:soup.find('a')。get_text() 64 65 ''' 66 67 if ret.text.startswith("博二爺"): 68 print("成功") 69 else: 70 print("失敗") 71 72 #driver.quit()#全部退出 73 #driver.close()