1. 程式人生 > >爬取"最好大學網站"大學排名

爬取"最好大學網站"大學排名

上半年在學bs4時的一個爬蟲,主要是針對table標籤的,可用於爬取其他類似網站,程式碼比較好更改

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import bs4

def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except
: return "" def fillUnivList(ulist, html): soup = BeautifulSoup(html, "html.parser") #soup.find('tbody').children是列表迭代型別,而如果是soup.find('tbody')則是標籤 for tr in soup.find('tbody').children: if isinstance(tr, bs4.element.Tag): tds = tr('td') ulist.append([tds[0
].string, tds[1].string, tds[3].string]) print(ulist) def printUnivList(ulist, num): tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}" print(tplt.format("排名","學校名稱","總分",chr(12288))) for i in range(num): u=ulist[i] print(tplt.format(u[0],u[1],u[2],chr(12288))) def main()
:
uinfo = [] url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html' html = getHTMLText(url) fillUnivList(uinfo, html) printUnivList(uinfo, 20) # 20 univs main()