1. 程式人生 > >python爬蟲 股票介面爬取 DAY6

python爬蟲 股票介面爬取 DAY6

import requests
import re
from bs4 import BeautifulSoup

def getHTMLText(url,code):
    try:
        r = requests.get(url)
        r.raise_for_status
        r.encoding = code
        return r.text
    except:
        return '爬取異常'


def getStocklist(ulist,stocklistURL):  
    html = getHTMLText(stocklistURL,'GB2312')
    soup = BeautifulSoup(html,'html.parser')
    a = soup.findAll('a',attrs={'target':"_blank"})
    for i in a:
        try:
            href = i.attrs['href']
            ulist.append(re.findall(r'[s][zh]\d{6}',href)[0])
        except:
            continue

def getStockinfo(ulist,StockinfoURL,stockDic,fpath):
    count =0
    for stock in ulist:
        url = StockinfoURL+stock+'.html'
        html = getHTMLText(url,'utf-8')        
        try:
            if html =='':
                continue
            soup = BeautifulSoup(html,'html.parser')
            name = soup.findAll('a',attrs={'class':"bets-name"})[0].text
            stockDic.update({"股票名稱":name.split()[0]})
            div = soup.findAll("div",attrs={'class':"bets-content"})
            for dl in div:
                dt = dl('dt')
                dd = dl('dd')
                if dt[0] =="":
                    continue
                for i in range(len(dt)):
                    key = dt[i].text
                    val = dd[i].text
                    stockDic[key] = val
               # print(stockDic)
                with open(fpath,'a',encoding ='utf-8') as f:                
                    f.write(str(stockDic)+'\n')
                    count=count+1
                    print("\r當前進度:{:.2f}%".format(count*100/len(ulist)),end='')
        except:
            count=count+1
            print("\r當前進度:{:.2f}%".format(count*100/len(ulist)),end='')
            continue
        

def main():
    stocklistURL = 'http://quote.eastmoney.com/stocklist.html'
    StockinfoURL = 'https://gupiao.baidu.com/stock/'
    fpath =r'C:\Users\lenovo\Desktop\stock.txt'
    stockDic = {}
    ulist = []
    getStocklist(ulist,stocklistURL)
    print('獲取列表成功!')
   # f = open(fpath,'a+',encoding = 'utf-8')
    getStockinfo(ulist,StockinfoURL,stockDic,fpath)
   # f.close()
    
main()