爬取pexels女性圖片+百度人臉檢測過濾高顏值美女
阿新 • • 發佈:2019-01-08
pexels網站提供了大量貼圖,從中搜索美女圖片,編寫爬蟲進行下載,下載後圖片中除了女人外,還包含男人,風景、靜物和動物,呼叫百度人臉檢測模組識別檢測,將其中顏值大於60分的美女儲存到另外一個資料夾。爬取圖片共計1251張,最後過濾出的美女共計287張。上程式碼:
爬蟲程式:
from bs4 import BeautifulSoup import requests import os import time save_path = 'F://photos/' url_path = 'https://www.pexels.com/search/' headers ={ 'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36' } searchWord = 'beauty' urls = [url_path+searchWord+'/?page={}'.format(str(i)) for i in range(1,100)] if not os.path.exists(save_path): os.mkdir(save_path) page =1 for url in urls: img_list = [] wb_data = requests.get(url,headers=headers) print("當前爬取頁面連結",url) soup = BeautifulSoup(wb_data.text,'lxml') imgs = soup.select('article > a > img') for img in imgs: photo_src = img.get('src') img_list.append(photo_src) print("第{}頁,共計{}張圖片".format(page,len(img_list))) for item in img_list: data = requests.get(item, headers=headers) fp = open(save_path+item.split('?')[0][-10:],'wb') fp.write(data.content) fp.close() page = page+1 time.sleep(2)
顏值檢測程式:
from aip import AipFace import base64 import os import time import shutil #接入百度AI人臉識別的引數 APP_ID="換成你自己的" API_KEY = "換成你自己的" SECRET_KEY = "換成你自己的" imageType = "BASE64" options = {} options["face_field"] = "gender,beauty" options["face_type"] = "LIVE" #下載圖片和篩選圖片的資料夾 file_path = 'F://photos/' copy_file_path = 'F://highScore/' file_lists=os.listdir(file_path) aipFace =AipFace(APP_ID,API_KEY,SECRET_KEY) #將圖片轉換為BASE64格式,這是百度平臺的要求 def get_file_content(filePath): with open(filePath,'rb') as fp: content = base64.b64encode(fp.read()) return content.decode('utf-8') for file_list in file_lists: result = aipFace.detect(get_file_content(os.path.join(file_path,file_list)),imageType,options) error_code = result['error_code'] if error_code == 222202: #沒有人臉 continue if error_code==223110: #人臉太多 continue try: sex_type = result['result']['face_list'][-1]['gender']['type'] #只要美女圖片 if sex_type == 'male': continue beauty = result['result']['face_list'][-1]['beauty'] new_beauty = round(beauty/10,1) print(file_list,new_beauty) if new_beauty>=6: copy_src = os.path.join(file_path,str(new_beauty)+'_'+file_list) copy_dst = os.path.join(copy_file_path,str(new_beauty)+'_'+file_list) #重新命名高分照片 os.rename(os.path.join(file_path,file_list),copy_src) #複製高分照片到另外的照片 shutil.copyfile(copy_src,copy_dst) time.sleep(1) except KeyError: pass except TypeError: pass
爬取的圖片:
過濾後的圖片:
看看最高顏值的美女:
較低顏值的女性: