python爬蟲 股票介面爬取 DAY6
阿新 • • 發佈:2018-12-31
import requests import re from bs4 import BeautifulSoup def getHTMLText(url,code): try: r = requests.get(url) r.raise_for_status r.encoding = code return r.text except: return '爬取異常' def getStocklist(ulist,stocklistURL): html = getHTMLText(stocklistURL,'GB2312') soup = BeautifulSoup(html,'html.parser') a = soup.findAll('a',attrs={'target':"_blank"}) for i in a: try: href = i.attrs['href'] ulist.append(re.findall(r'[s][zh]\d{6}',href)[0]) except: continue def getStockinfo(ulist,StockinfoURL,stockDic,fpath): count =0 for stock in ulist: url = StockinfoURL+stock+'.html' html = getHTMLText(url,'utf-8') try: if html =='': continue soup = BeautifulSoup(html,'html.parser') name = soup.findAll('a',attrs={'class':"bets-name"})[0].text stockDic.update({"股票名稱":name.split()[0]}) div = soup.findAll("div",attrs={'class':"bets-content"}) for dl in div: dt = dl('dt') dd = dl('dd') if dt[0] =="": continue for i in range(len(dt)): key = dt[i].text val = dd[i].text stockDic[key] = val # print(stockDic) with open(fpath,'a',encoding ='utf-8') as f: f.write(str(stockDic)+'\n') count=count+1 print("\r當前進度:{:.2f}%".format(count*100/len(ulist)),end='') except: count=count+1 print("\r當前進度:{:.2f}%".format(count*100/len(ulist)),end='') continue def main(): stocklistURL = 'http://quote.eastmoney.com/stocklist.html' StockinfoURL = 'https://gupiao.baidu.com/stock/' fpath =r'C:\Users\lenovo\Desktop\stock.txt' stockDic = {} ulist = [] getStocklist(ulist,stocklistURL) print('獲取列表成功!') # f = open(fpath,'a+',encoding = 'utf-8') getStockinfo(ulist,StockinfoURL,stockDic,fpath) # f.close() main()