爬蟲練習--爬取股票資料
阿新 • • 發佈:2018-11-22
爬取股票資料
步驟
- 從東方財富網找到上市公司的股票程式碼並儲存
- 根據股票程式碼去百度股市通去查詢相關股票的具體資訊並儲存
程式碼
#-*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import traceback
import re
import xlwt
def getHTMLText(url, code="utf-8"):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = code
return r.text
except:
traceback.print_exc()
return ""
#獲取股票列表
def getStockList(lst, stockURL):
html = getHTMLText(stockURL,"GB2312")
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('a')
for i in a:
try:
href = i.attrs['href']
lst.append(re.findall(r"[s][hz]\d{6}" , href)[0])
except:
continue
#根據股票的代號查詢股票的交易資訊,並將結果儲存到相關檔案
def getStockInfo(lst, stockURL):
#建立EXCEL檔案
book=xlwt.Workbook(encoding='utf-8')
sheet1=book.add_sheet('sheet1',cell_overwrite_ok=True)
heads=['股票名稱','最高','最低','今開','昨收','成交額','成交量','淨值','折價率']
num=0
for head in heads:
sheet1.write(0,num,head)
num=num+1
book.save('gupiao.xls')
count = 1
length=len(lst)
#每次將一個查詢的資料輸出到EXCEl表中
for stock in lst:
url = stockURL + stock + ".html"
html = getHTMLText(url)
try:
if html=="":
continue
infoDict = {}
soup = BeautifulSoup(html, 'html.parser')
stockInfo = soup.find('div',attrs={'class':'stock-bets'})
#查詢股票名稱
if stockInfo:
name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
infoDict.update({'股票名稱': name.text.split()[0]})
#尋找所有鍵和值(最高、最低)
keyList = stockInfo.find_all('dt')
valueList = stockInfo.find_all('dd')
for i in range(len(keyList)):
key = keyList[i].text
val = valueList[i].text
infoDict[key] = val
j=0
for i in heads:
if i in infoDict:
sheet1.write(count,j,infoDict[i])
j=j+1
else:
pass
book.save('gupiao.xls')
print("\r當前進度: {:.2f}%".format(count*100/length),end="")
count=count+1
except:
print("\r當前進度: {:.2f}%".format(count * 100 / length), end="")
count = count + 1
continue
if __name__=='__main__':
#找尋將資料靜態寫在html頁面的網頁
stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
stock_info_url = 'https://gupiao.baidu.com/stock/'
slist=[]
getStockList(slist, stock_list_url)
getStockInfo(slist, stock_info_url)
執行結果
- 程式執行後將結果儲存在EXCEL中,部分結果截圖如下所示