python3-按關鍵字爬取百度圖片
阿新 • • 發佈:2019-02-03
別說話!上程式碼:
import re,os import requests from urllib.request import urlretrieve def download(url,filename,filepath): path = os.path.join(filepath,filename) try: urlretrieve(url,path) except: print('【錯誤】當前圖片無法下載') def search(word="美女",localpath="data/", page=None): localpath += word os.makedirs(localpath ,exist_ok=True) url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={word}&pn={pn}&gsm={gsm:x}&ct=&ic=0&lm=-1&width=0&height=0'.format(word=word, pn=20 * page, gsm=40 + 20 * page) print("HHHC:0====>page=%d,url=\"%s\"" % (page,url)) try: html = requests.get(url).text except: print("【錯誤】requests.get 失敗") return False pic_url = re.findall('"objURL":"(.*?)",', html, re.S) i = 0; for url in pic_url: print(url) i = i + 1; filename = os.path.split(url)[1].split('?')[0] if len(filename.split(".")) != 2: print("【錯誤】檔名異常:"+ filename) download(url, filename, localpath) return def search_20_page(word): for i in range(1, 30): search(word, "data/", i) def test_search_list(): obj_list = ["蘋果", "香蕉", "桔子", "橙子", "桃子", "櫻桃", "龍眼", "荔枝", "雪梨", "草莓", "葡萄", "獼猴桃", "菠蘿", "番石榴", "青梅"] for obj in obj_list: search_20_page(obj) if __name__ =='__main__': test_search_list()
效果: