1. 程式人生 > 實用技巧 >Python爬蟲:使用Selenium爬取指定上市公司(如浦發銀行)的今年公告資訊

Python爬蟲:使用Selenium爬取指定上市公司(如浦發銀行)的今年公告資訊

 1 from selenium import webdriver#匯入庫
 2 from selenium.webdriver.common.keys import Keys
 3 from bs4 import BeautifulSoup
 4 import csv,time
 5 import os,re
 6 import requests
 7 import selenium.webdriver.support.ui as ui
 8 import  urllib
 9 
10 chromeOptions = webdriver.ChromeOptions()
11 prefs = {"
download.default_directory":"D:\\pufa"} 12 chromeOptions.add_experimental_option("prefs", prefs) 13 browser = webdriver.Chrome(chrome_options=chromeOptions)#宣告瀏覽器 14 15 positon = {} 16 def enterinfo(): 17 url = 'http://www.cninfo.com.cn/new/commonUrl?url=disclosure/list/search' 18 browser.get(url)#
開啟瀏覽器預設網址 19 input = browser.find_element_by_css_selector('input[placeholder $= "標題關鍵字"]') 20 input.send_keys('浦發銀行') 21 22 browser.find_element_by_class_name("el-range__close-icon").click()#刪除原來日期資訊 23 input = browser.find_element_by_css_selector('input[placeholder $= "開始日期"]') 24 input.send_keys('
2020-01-01') 25 26 input = browser.find_element_by_css_selector('input[placeholder $= "結束日期"]') 27 xianzai = time.strftime("%Y-%m-%d", time.localtime()) 28 input.send_keys(xianzai) 29 30 time.sleep(2) 31 32 browser.find_elements_by_xpath('//*[@id="main"]/div[2]/div[1]/div[2]/div[1]/div[2]/div[1]/button/span')[0].click() 33 #填寫文字 34 time.sleep(2)#睡眠5 35 36 def GainPage(): 37 source = browser.page_source # 列印網頁原始碼 38 soup = BeautifulSoup(source, 'lxml') 39 40 ul_list = soup.select('div.el-table__body-wrapper')[0] 41 for ul in ul_list.select('tr.el-table__row'): 42 web = ul.select('td.el-table_1_column_3')[0].select('span.ahover')[0].select('a')[0] 43 webs = web.get('href') 44 45 url = 'http://www.cninfo.com.cn' 46 url = url + webs 47 48 biaoti = web.text 49 positon[biaoti] = url 50 51 52 time.sleep(2) 53 enterinfo() 54 i = 1 55 while(i): 56 GainPage() 57 browser.find_elements_by_xpath('//*[@id="main"]/div[2]/div[1]/div[1]/div[3]/div/button[2]/i')[0].click() 58 i = i+1 59 if i==9: 60 break 61 print(len(positon)) 62 63 64 for it in positon.items(): 65 print(it) 66 for val in positon.values(): 67 68 url = val 69 browser.get(url) # 開啟瀏覽器預設網址 70 71 browser.find_elements_by_xpath('//*[@id="noticeDetail"]/div/div[1]/div[3]/div[1]/button/span')[0].click()