requests-爬取美女圖片原始碼
阿新 • • 發佈:2019-01-14
爬取思路:
1.分析ajax請求,找到存放圖片地址的json
2.解析json資料,提取中圖片url
3.再次請求圖片url,通過open()和write()方法將圖片儲存至內地。
廢話少說,直接上程式碼:
前提條件是在當前.py檔案同級目錄下新建一個beauty360的資料夾用來儲存圖片
import requests import time import re base_url="https://image.so.com/z?" num=1 headers={ "Host": "image.so.com", "Referer": "https://image.so.com/zv?ch=beauty", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36", "X-Requested-With": "XMLHttpRequest", "Cookie": "__guid=16527278.4407656107534301000.1546852761488.196; __guid=100021698.456336978600101800.1546852883449.8489; count=2; tracker=; lightbox_thumb_visible=1; _S=ab9f5ecb680ae35247705feda8f5bda4; test_cookie_enable=null" } header1={ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control':'max-age=0', 'upgrade-insecure-requests':'1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } #獲取存有具體圖片url地址的json資料 def get_json(page): paras = { "ch": "beauty", "a": "jsonpViewScroll", "i": page, "count": 30 } try: response=requests.get(base_url,params=paras,headers=headers) if response.status_code==200: json=response.json() return json except: print("wrong url.") #解析圖片url地址,並儲存圖片至本地 def get_pic(json): global num datas=json.get("data") if datas: for item in datas: groupdatas=item.get("groupdata") if groupdatas: for group in groupdatas: picurl=group.get("qhimg_url").strip() response = requests.get(picurl, headers=header1) if response.status_code==200: num=num+1 print(str(num)+": "+picurl) with open(r"./beauty360/"+str(num)+".jpg","wb") as fp: fp.write(response.content) if __name__=="__main__": for page in range(1,600): json=get_json(page) get_pic(json) time.sleep(3)