1. 程式人生 > >爬取豆瓣電影排行榜前250

爬取豆瓣電影排行榜前250

環境:python3.6 + BeautifulSoup

爬取一頁的電影資訊 對應網址:https://movie.douban.com/top250

import requests # 匯入網頁請求庫
from bs4 import BeautifulSoup # 匯入網頁解析庫
import json

# 用於傳送請求,獲得網頁原始碼以供解析
def start_requests(url):
    r = requests.get(url)
    return r.content

# 接收網頁原始碼解析出需要的資訊
def parse(text):
    soup = BeautifulSoup(text, 'html.parser')
    movie_list = soup.find_all('div', class_ = 'item')
    result_list = []
    for movie in movie_list:
        mydict = {}
        mydict['title'] = movie.find('span', class_ = 'title').text
        mydict['score'] = movie.find('span', class_ = 'rating_num').text
        mydict['quote'] = movie.find('span', class_ = 'inq').text
        star = movie.find('div', class_ = 'star')
        mydict['comment_num'] = star.find_all('span')[-1].text[:-3]
        result_list.append(mydict)
    return result_list

# 將資料寫入json檔案
def write_json(result):
    s = json.dumps(result, indent = 4, ensure_ascii=False)
    with open('movies.json', 'w', encoding = 'utf-8') as f:
        f.write(s)

# 主執行函式,呼叫其他函式
def main():
    url = 'https://movie.douban.com/top250'
    text = start_requests(url)
    result = parse(text)
    write_json(result)

# 一般做法
if __name__ == '__main__':
    main()```