1. 程式人生 > >python3-按關鍵字爬取百度圖片

python3-按關鍵字爬取百度圖片

別說話!上程式碼:

import re,os
import requests
from urllib.request import urlretrieve


def download(url,filename,filepath):
    path = os.path.join(filepath,filename)
    try:
        urlretrieve(url,path)
    except:
        print('【錯誤】當前圖片無法下載')


def search(word="美女",localpath="data/", page=None):
    localpath += word
    os.makedirs(localpath , 
exist_ok=True) url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={word}&pn={pn}&gsm={gsm:x}&ct=&ic=0&lm=-1&width=0&height=0'.format(word=word, pn=20 * page, gsm=40 + 20 * page) print("HHHC:0====>page=%d,url=\"%s\"" % (page,url)) try: html = requests.get(url).text except
: print("【錯誤】requests.get 失敗") return False pic_url = re.findall('"objURL":"(.*?)",', html, re.S) i = 0; for url in pic_url: print(url) i = i + 1; filename = os.path.split(url)[1].split('?')[0] if len(filename.split(".")) != 2: print("【錯誤】檔名異常:"
+ filename) download(url, filename, localpath) return def search_20_page(word): for i in range(1, 30): search(word, "data/", i) def test_search_list(): obj_list = ["蘋果", "香蕉", "桔子", "橙子", "桃子", "櫻桃", "龍眼", "荔枝", "雪梨", "草莓", "葡萄", "獼猴桃", "菠蘿", "番石榴", "青梅"] for obj in obj_list: search_20_page(obj) if __name__ =='__main__': test_search_list()

效果: