1. 程式人生 > >Beautiful編寫簡單爬蟲實驗

Beautiful編寫簡單爬蟲實驗

from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup

def getTitle(url):
    try:
        html = urlopen(url)
    except HTTPError as e:
        return None
    try:
        bsObj = BeautifulSoup(html.read(), 'lxml')
        title = bsObj.body.h1
    except AttributeError as e:
        return None
    return title

title = getTitle("http://www.pythonscraping.com/pages/page1.html")
if title == None:
    print("The title could not be found.")
else:
    print(title)

輸出:

<h1>An Interesting Title</h1>