python 爬蟲例項 爬取中國大學排名
阿新 • • 發佈:2018-12-09
import requests from bs4 import BeautifulSoup import bs4 def gegHTMLText(url): try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "" def fillUnivList(ulist,html): soup = BeautifulSoup(html, "html.parser") for tr in soup.find('tbody').children: #採用迴圈查詢html文字中的tbody標籤並且將它的孩子children做一個遍歷 if isinstance(tr, bs4.element.Tag): #檢測tr標籤的標籤型別 如果不是bs4庫裡面定義的Tag型別 則過濾掉 tds = tr('td') ulist.append([tds[0].string, tds[1].string, tds[2].string]) def printUnivList(ulist,num): print("{:^10}\t{:^6}\t{:^10}".format("排名", "學校", "分數")) #表頭資訊的列印 for i in range(num): u = ulist[i] print("{:^10}\t{:^6}\t{:^10}".format(u[0], u[1], u[2])) def main(): uinfo = [] url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html" html = gegHTMLText(url) fillUnivList(uinfo, html) printUnivList(uinfo, 20) main()