批量下載某網的美女圖片
阿新 • • 發佈:2022-03-02
#!/usr/bin/python3.6 # -*- coding: utf-8 -*- import requests from lxml import etree import time import os sum_page = 0 sum_images = 0 headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"} def download_page(url): # 返回響應內容 try: r = requests.get(url, headers=headers, timeout=10) r.raise_for_status()return r.text except: print('頁面訪問失敗,', url) return None def download_pic(imgUrl): # 返回響應物件 r = requests.get(imgUrl,headers=headers, timeout=10) return r def mkdir(path): if not os.path.exists(path): os.mkdir(path) return path def get_content(html): selector= etree.HTML(html) # 第1頁面 pages = selector.xpath('//div[@class="archive-row"]//ul/li//a[@class="thumb-link"]/@href') for i in range(len(pages)): global sum_page sum_page += 1 print('這是下載的第%s個頁面,%s'% (sum_page, pages[i])) # 頁面中的圖片連結 page_html = download_page(pages[i]) page_selector= etree.HTML(page_html) img_lists = page_selector.xpath('//div[@class="entry-content"]//img/@src') # 圖片儲存目錄 pictures = mkdir(os.path.join(os.path.curdir, 'pictures')) # 圖片url for j in range(len(img_lists)): # 圖片名稱 file_name = os.path.basename(img_lists[j]) img_name = os.path.join(pictures, file_name) response = download_pic(img_lists[j]) with open(img_name, 'wb') as f: global sum_images sum_images += 1 res = requests.get(img_lists[j]) for chunk in response.iter_content(1024): f.write(chunk) def get_content_pages(url, html): # 翻頁獲取圖片 selector = etree.HTML(html) with open('temp.html', 'w', encoding='utf-8') as f: f.write(html) # 首頁號和最後一頁面數 start_page = 1 end_page = selector.xpath('//div[@class="btn-group"]//button[last()]//text()') end_page = ''.join(end_page) if not end_page: end_page = '30' # 構建頁面url while start_page <= int(end_page.strip()): page_url = url + '/page/' + str(start_page) html = download_page(page_url) if html: get_content(html) start_page += 1 def main(): url = 'https://www.jder.net/mx' html = download_page(url) get_content_pages(url, html) print('共下載圖片數為:', sum_images) if __name__ == '__main__': main()