Beautiful編寫簡單爬蟲實驗
阿新 • • 發佈:2019-02-06
from urllib.request import urlopen from urllib.error import HTTPError from bs4 import BeautifulSoup def getTitle(url): try: html = urlopen(url) except HTTPError as e: return None try: bsObj = BeautifulSoup(html.read(), 'lxml') title = bsObj.body.h1 except AttributeError as e: return None return title title = getTitle("http://www.pythonscraping.com/pages/page1.html") if title == None: print("The title could not be found.") else: print(title)
輸出:
<h1>An Interesting Title</h1>