基於Python 任意頁面下的桌布批量下載
阿新 • • 發佈:2021-08-06
- 基於Python 任意頁面下的桌布批量下載
- github
- Maxpagenum 爬取頁數
- fpath 儲存路徑
- url 基礎地址
import requests import re import time import os #爬取頁數 Maxpagenum = 10 Sleeptime =0.1 def creatPath(path): if not os.path.exists(path): print("Creat path") os.makedirs(path) if __name__ == '__main__': #建立資料夾路徑 fpath = "D:\Download\pic" creatPath(path=fpath) #源地址'https://wallhaven.cc/search?q=id%3A2278&sorting=random&ref=fp&seed=ZYNEUQ&page=2' 'https://wallhaven.cc/hot''https://wallhaven.cc/hot?page=4'... #圖片列表連結 url = 'https://wallhaven.cc/search?q=id%3A4641&page=4' #初始化 pagenum = 0 picnum = 0 #獲取每一個page while pagenum<Maxpagenum: headers = { 'referer': url + 'page = ' + str(pagenum), "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36" } pagenum = pagenum + 1 par = { 'page': str(pagenum) } img_data = requests.get(url=url,headers=headers,params=par).text #獲取圖片詳情頁連結的正則表示式 ex = '<a class="preview" href="(.*?)" target="_blank" ></a>' img_src_list = re.findall(ex,img_data,re.S) #獲取圖片連結的正則表示式 img_url_ex = '<img id="wallpaper" src="(.*?)" alt' # 從詳情頁獲取圖片連結 for src in img_src_list: time.sleep(Sleeptime) img_page = requests.get(url=src,headers=headers).text img_url = re.findall(img_url_ex,img_page,re.S)[0] img_data = requests.get(url=img_url).content img_name = img_url.split('/')[-1] img_path = fpath+'/'+img_name fp = open(img_path, 'wb') fp.write(img_data) print("finish " + str(picnum)) picnum += 1