Python爬取百度圖片
阿新 • • 發佈:2020-09-16
import urllib.request as urqt import urllib.parse as urps from urllib.parse import quote import requests import os import re import sys def gethtml(url): header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"} res = requests.Session() res.headers = header html = res.get(url, timeout = 10, allow_redirects = False).text return html def getbyte(url): header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"} rep = urqt.Request(url, headers = header) return urqt.urlopen(rep).read() def makejpg(url, f): f.write(getbyte(url)) f.close() def getintofold(string): os.chdir(r"D:\資訊\python\一些成品\百度圖片爬蟲") want = string + "圖片" wehave = os.listdir() if want in wehave: os.chdir(want) else: os.mkdir(want) os.chdir(want) def getall(num, url): key = re.compile(r'"thumbURL":"(.*?)"') tot = 0 now = url while tot < num: html = gethtml(now) for now in re.findall(key, html): tot += 1 if tot > num: break; f = open(str(tot) + ".jpg", "wb") try: makejpg(now, f) except BaseException: print("錯誤") tot -= 1 continue; print("第 " + str(tot) + " 個已下載") if tot > num: break now = url + "&pn=" + str(tot) def endd(): g.msgbox("感謝使用") sys.exit() def init(): now = input("請輸入想要的圖片: ") num = input("請輸入想要的數量: ") getintofold(now) now = quote(now, encoding = 'utf-8') url = "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1599885698346_R&pv=&ic=0&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=" + now getall(int(num), url) init()