Python selenium 爬取天天基金網股票型基金
阿新 • • 發佈:2019-02-13
對於股票市場長期的判斷,普通上班族沒有多少時間和資料可以分析。那麼,就應該藉助基金機構選擇的股票來分析,藉助各基金經理管理的股票基金來統計,哪些股票是基金經理或團隊分析購買的。所以選擇的是股票型基金,最終將分析得出哪類股票是機構最多選擇的,那隻股票是機構購買最多的。利用基金經理們分析的結果,我們可選擇相應的幾隻股票進行長期投資。
先看看股票型基金,然後遍歷某基金的股票持倉。
所以選擇股票型別的基金後,除了讀取頁面資訊,還應該讀取連結網址。比較好的是,在分頁的最右邊有一個“不分頁”的選項,點選後所有資料都在一頁中顯示,這就方便很多了!
以下是讀取頁面資訊的指令碼:
# -*- coding: utf-8 -*- # python 3.5 import re import time import pymssql from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC class FUND(object): def __init__(self): self.url = 'http://fund.eastmoney.com/data/fundranking.html' self.driver = webdriver.PhantomJS() #self.driver = webdriver.Chrome("D:/Python35/selenium/webdriver/chromedriver/chromedriver.exe") self._conn = self.GetConnect() if(self._conn): self._cur = self._conn.cursor() #資料庫連線 def GetConnect(self): conn = False try: conn = pymssql.connect(host="HZC",user="kk",password="kk",database ="StockDB") except Exception as err: print("連線資料庫失敗, %s" % err) else: return conn #執行語句 def ExecNonQuery(self, sql): flag = False try: self._cur.execute(sql) self._conn.commit() flag = True except Exception as err: flag = False self._conn.rollback() print("執行失敗, %s" % err) else: return flag def GetURL(self): print("[-] 開啟網址: %s" % self.url) self.driver.get(self.url) def SetURL(self,url): print("[-] 設定網址: %s" % url) self.url = url #股票型(641) & 不分頁 def GetSelectStockType(self): typetext = None displaytext = None typetext = self.driver.find_element_by_xpath("//ul[@id='types']/li[@class='at']").text.strip() elems = self.driver.find_elements_by_xpath("//div[@id='pagebar']") for e in elems: displaytext = e.get_attribute('style') return typetext,displaytext def DoSelectStockType(self): print("[-] 選擇股票型別") try: #self.driver.find_element_by_xpath("//ul[@id='types']/li[2]").click() element = WebDriverWait(self.driver,10).until(EC.presence_of_element_located((By.XPATH,"//ul[@id='types']/li[2]"))) element.click() time.sleep(3) except: pass def DoSelectShowAll(self): print("[-] 顯示所有") try: #self.driver.find_element_by_xpath("//input[@id='showall']").click() element = WebDriverWait(self.driver,10).until(EC.presence_of_element_located((By.ID,"showall"))) element.click() time.sleep(3) except: pass def GetBaseInfo(self): print("[-] 基本資訊") dict = {} typetext,displaytext = self.GetSelectStockType() while not re.match("股票型", typetext) or re.match("block", displaytext): print(" waiting……") time.sleep(1) typetext,displaytext = self.GetSelectStockType() #print(typetext,displaytext) table = self.driver.find_element_by_xpath("//table[@id='dbtable']/tbody") for row in table.find_elements_by_xpath(".//tr"): col = row.find_elements(By.TAG_NAME, "td") id = col[1].text code = col[2].text name = col[3].text link = col[3].find_element(By.TAG_NAME, "a").get_attribute("href") date = col[4].text dwjz = col[5].text ljjz = col[6].text rzzl = col[7].text jyz = col[8].text jsy = col[9].text jly = col[10].text jyn = col[11].text jln = col[12].text jsn = col[13].text jnl = col[14].text cll = col[15].text zdy = col[16].text sxf = col[17].text sql = """INSERT INTO [TTStocks]([id],[code],[name],[link],[date],[dwjz],[ljjz],[rzzl],[jyz],[jsy],[jly],[jyn],[jln],[jsn],[jnl],[cll],[zdy],[sxf]) VALUES (%s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')""" % \ (id,code,name,link,date,dwjz,ljjz,rzzl,jyz,jsy,jly,jyn,jln,jsn,jnl,cll,zdy,sxf) self.ExecNonQuery(sql) dict[code] = link print(id,name) self.GetDetialStocks(dict) def GetDetialStocks(self,dict): for k,v in dict.items(): #print('%s = %s' % (k,v)) self.SetURL(v) self.GetURL() table = self.driver.find_element_by_xpath("//li[@id='position_shares']/div[@class='poptableWrap']/table/tbody") text = table.find_element_by_xpath(".//tr[2]").text.strip() if text != "暫無資料": for row in table.find_elements_by_xpath(".//tr"): col = row.find_elements(By.TAG_NAME, "td") if len(col)!=0: print(k,col[0].text) sql = "INSERT INTO TTStocksDetial(code,name,cczb) VALUES ('%s','%s','%s')" %(k,col[0].text,col[1].text) self.ExecNonQuery(sql) if __name__ == "__main__": f = FUND() f.GetURL() f.DoSelectStockType() f.DoSelectShowAll() f.GetBaseInfo()
接下來這些資料將存入資料庫,讀取每條記錄的同時,也將模擬開啟連結網址,讀取 “股票持倉”情況。