1. 程式人生 > 其它 >Python爬取網頁上想要的資料

Python爬取網頁上想要的資料

 

 

原始碼如下

from urllib.request import urlopen,Request
import urllib.request
import re
from bs4 import BeautifulSoup
from distutils.filelist import findall

url ='http://movie.douban.com/top250?format=text'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36
'} ret = Request(url,headers=headers) page = urllib.request.urlopen(ret) contents = page.read() # print(contents) soup = BeautifulSoup(contents, "html.parser") print("豆瓣電影TOP250" + "\n" + " 影片名 評分 評價人數 連結 ") for tag in soup.find_all('div', class_='info'): # print tag m_name
= tag.find('span', class_='title').get_text() m_rating_score = float(tag.find('span', class_='rating_num').get_text()) m_people = tag.find('div', class_="star") m_span = m_people.findAll('span') m_peoplecount = m_span[3].contents[0] m_url = tag.find('a').get('href') print(m_name
+ " " + str(m_rating_score) + " " + m_peoplecount + " " + m_url)