爬取豆瓣電影排行榜前250
阿新 • • 發佈:2018-11-03
環境:python3.6 + BeautifulSoup
爬取一頁的電影資訊 對應網址:https://movie.douban.com/top250
import requests # 匯入網頁請求庫 from bs4 import BeautifulSoup # 匯入網頁解析庫 import json # 用於傳送請求,獲得網頁原始碼以供解析 def start_requests(url): r = requests.get(url) return r.content # 接收網頁原始碼解析出需要的資訊 def parse(text): soup = BeautifulSoup(text, 'html.parser') movie_list = soup.find_all('div', class_ = 'item') result_list = [] for movie in movie_list: mydict = {} mydict['title'] = movie.find('span', class_ = 'title').text mydict['score'] = movie.find('span', class_ = 'rating_num').text mydict['quote'] = movie.find('span', class_ = 'inq').text star = movie.find('div', class_ = 'star') mydict['comment_num'] = star.find_all('span')[-1].text[:-3] result_list.append(mydict) return result_list # 將資料寫入json檔案 def write_json(result): s = json.dumps(result, indent = 4, ensure_ascii=False) with open('movies.json', 'w', encoding = 'utf-8') as f: f.write(s) # 主執行函式,呼叫其他函式 def main(): url = 'https://movie.douban.com/top250' text = start_requests(url) result = parse(text) write_json(result) # 一般做法 if __name__ == '__main__': main()```