[用科學的方法做不科學的事情繫列]---分析五百萬大獎-雙色球之花落誰家?(1)
阿新 • • 發佈:2018-11-11
目的:
瞧瞧雙色球裡的各種資料.
用阿里雲的pai來分析分析雙色球相關的東西.
獲取資料
中獎公告:
http://www.cwl.gov.cn/kjxx/ssq/
環境搭建
安裝 python3
安裝 pip
安裝第三方模組
pip install BeautifulSoup
pip install requests
算了,不廢話,直接上程式碼吧.
import requests from bs4 import BeautifulSoup import json import time def url_find(url): r = requests.get(url) r.encoding='utf-8' soup = BeautifulSoup(r.text, 'html.parser') x = soup.find_all('a') xx = [] for i in x: #print(i) if str(i).find("期開獎公告")>0: m = "http://www.cwl.gov.cn"+i.get('href') #print(m) xx.append(m) return xx def cat_text(url): m={} r = requests.get(url) r.encoding='utf-8' soup = BeautifulSoup(r.text, 'html.parser') y=[] for x in soup.find_all('td'): y.append(x.get_text()) x_id=str(soup.h2.get_text())[10:17] for i in soup.find_all("script"): if str(i).find("var khHq") > 0: qiu_h=json.loads(str(i)[24:55]) for i in soup.find_all("span"): if i.get("class") == ["qiuL"]: qiu_l=i.get_text() for i in soup.find_all("div"): if i.get("class") == ["zjqkzy"]: address=i.find("dd").get_text() if y[3]=='- 元': mm = x_id + "," + y[0] + "," + y[1].rstrip(" 元").replace(",","") + ","+ y[2].rstrip(" 元").replace(",","") + ","+ y[9] +","+ y[10].split("(")[0] + ","+ y[12] +",TEst"+ y[13].split("(含")[0]+ ","+ y[15].split("(")[0] +","+ y[16] + ","+ y[18] +","+ y[19] + ","+ y[21] +","+ y[22] + ","+ y[24] +","+ y[25] +","+str(qiu_h).replace("[","").replace("]","").replace(" ","").replace("'","")+","+qiu_l+","+address.replace(",","--").replace("。","").replace("共","").replace("注","") elif y[11]=='其中:一等獎複式投注': mm = x_id + "," + y[0] + "," + y[1].rstrip(" 元").replace(",","") + ","+ y[2].rstrip(" 元").replace(",","") + ","+ y[9] +","+ y[10].split("(")[0] + ","+ y[12] +","+ y[13].split("(含")[0]+ ","+ y[15].split("(")[0] +","+ y[16] + ","+ y[18] +","+ y[19] + ","+ y[21] +","+ y[22] + ","+ y[24] +","+ y[25] +","+str(qiu_h).replace("[","").replace("]","").replace(" ","").replace("'","")+","+qiu_l+","+address.replace(",","--").replace("。","").replace("共","").replace("注","") else: mm = x_id + "," + y[0] + "," + y[1].rstrip(" 元").replace(",","") + ","+ y[2].rstrip(" 元").replace(",","") + ","+ y[7] +","+ y[8].split("(")[0] + ",,,"+ y[10] +","+ y[11].split("(含")[0]+ ","+ y[13].split("(")[0] +","+ y[14] + ","+ y[16] +","+ y[17] + ","+ y[19] +","+ y[20] + ","+ y[22] +","+ y[23] +","+str(qiu_h).replace("[","").replace("]","").replace(" ","").replace("'","")+","+qiu_l+","+address.replace(",","--").replace("。","").replace("共","").replace("注","") return mm url = 'http://www.cwl.gov.cn/kjxx/ssq/kjgg/list.shtml' url_list = [] url_list = url_list + url_find(url) for i in range(29): url = 'http://www.cwl.gov.cn/kjxx/ssq/kjgg/list_' + str(i+2) + '.shtml' url_list = url_list+url_find(url) #print(url_list) def save_file(somea): with open('./data', 'a') as f: f.write(somea ) for i in url_list: data=str(cat_text(i))+"\n" save_file(data)