python爬蟲系列(3.7-使用 bs4 爬取獲取貴州農產品)
一、爬取資料步驟
1、爬取網站地址
2、實現程式碼
import requests
from bs4 import BeautifulSoup
class Food(object):
def __init__(self):
self.url = 'http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page=1'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
}
def get_html(self):
"""
抓取網頁
:return:
"""
response = requests.get(url=self.url, headers=self.headers)
if response.status_code == 200:
return response.text
return ''
def down_data(self):
"""
下載資料
:return:
"""
soup = BeautifulSoup(self.get_html, 'lxml')
table = soup.find('table', attrs={'class': 'table table-hover'})
trs = table.find('tbody').find_all('tr')
food_list = []
for tr in trs:
food_dict = {}
tds = tr.find_all('td')
name = tds[0].get_text()
price = tds[1].get_text()
address = tds[3].get_text()
time = tds[4].get_text()
food_dict['name'] = name
food_dict['price'] = price
food_dict['address'] = address
food_dict['time'] = time
food_list.append(food_dict)
return food_list
if __name__ == "__main__":
foo = Food()
print(foo.down_data())