python3 spider --- BeautifulSoup替代正則表示式
阿新 • • 發佈:2021-06-17
import requests, re, json, bs4, lxml from bs4 import * html_doc = """ <html><head><title>學習python的正確姿勢</title></head> <body> <p class="title"><b>Karl的message來了</b></p> <p class="story">有一天,Karl想告訴大家講兩個訊息 <a href="http://example.com/1" class="sister" id="link1">一個好訊息</a>, <a href="http://example.com/2" class="sister" id="link2">一個壞訊息</a> , 他問大家,想聽好的還是壞的?</p> <p class="story">...</p>""" soup = BeautifulSoup(html_doc, 'lxml') # print(soup.get_text()) # print(soup.title.string) # print(soup.a.string) # print(soup.title.parent.name) # print(soup.body.parent.name) # print(soup.a) # print(soup.body) # print(soup.find_all('a')) # print(soup.find(id='link2')) ## 除了find方法,還可以使用select方法# print(soup.select("title")) # print(soup.select("body a")) # print(soup.select("p > #link1"))