1. 程式人生 > 實用技巧 >爬蟲爬取網頁圖片(分頁)

爬蟲爬取網頁圖片(分頁)

爬蟲爬取網頁圖片(分頁)

不分頁 原始碼:

import requests
import re
url = 'https://www.qiushibaike.com/imgrank/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
res = requests.get(url,headers = headers).text
# print(res)
urls = re.findall('<img src="(.*?)" alt=".*?" class="illustration" width="100%" height="auto">',res)
print(urls)
for url1 in urls:
	filename = url1.split('/')[-1]
	urll = 'https:'+url1
	response = requests.get(urll,headers = headers)
	with open(filename,'wb') as f:
		f.write(response.content)


分頁的話需要設定一個通用的url模板
url = 'https://www.qiushibaike.com/imgrank/page/%d/'
for page in range(1,4):
newurl = format(url%page)

import requests
import re
url = 'https://www.qiushibaike.com/imgrank/page/%d/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
for page in range(1,4):
	newurl = format(url%page)
	res = requests.get(newurl,headers = headers).text
# print(res)
urls = re.findall('<img src="(.*?)" alt=".*?" class="illustration" width="100%" height="auto">',res)
print(urls)
for url1 in urls:
	filename = url1.split('/')[-1]
	urll = 'https:'+url1
	response = requests.get(urll,headers = headers)
	with open(filename,'wb') as f:
		f.write(response.content)