1. 程式人生 > 其它 >快看,這是我為你準備的Python爬取圖片教程

快看,這是我為你準備的Python爬取圖片教程

爬取圖片例項

•selenium+win32爬取圖片

Python學習交流Q群:903971231#####
"""爬取圖片"""
import os
import threading
import time
from ctypes import windll

import requests
import win32ap
iimport win32clipboard
import win32con
from PySide2 import QtWidgets
from requests_html import HTMLSession, HTML
from PySide2.QtGui import
QPixmap, QColor, QStandardItemModel, QStandardItem from PySide2.QtCore import QFile, Qt, QDateTime, QDate, QTime, QTimer, QStringListModel, QModelIndex from PySide2.QtUiTools import QUiLoader from PySide2.QtWidgets import QApplication, QTreeView, QTreeWidget, QHeaderView, QTreeWidgetItem, QWidget from
bs4 import BeautifulSoup from selenium import webdriverfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.chrome.options import Optionsfrom selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC class Test: def __init__(self): super(Test, self).__init__() file = QFile('UI.ui') file.open(QFile.ReadOnly) file.close() self.ui = QUiLoader().load(file) self.ui.B_start.clicked.connect(self.start) self.ui.B_left.clicked.connect(lambda: self.change_index('left')) self.ui.B_right.clicked.connect(lambda: self.change_index('right')) #定義圖片列表 self.img_list = [] #圖片自適應 self.ui.label.setScaledContents(True) #當前顯示的圖片索引 self.index = 0 self.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", } self.text = '' # 使用phantomJS消除瀏覽器介面 #self.browser = webdriver.PhantomJS() #出警告可使用設定chrome的方法 #瀏覽器設定 options = Options() options.add_argument('--headless') # self.browser = webdriver.Chrome(options=options) self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser, 30) self.session = requests.Session() self.pull() self.start_show_pic() def change_index(self, button): if button == 'left': self.index -= 1 pix = QPixmap('圖片/' + self.img_list[self.index]) self.ui.label.setPixmap(pix) else: self.index += 1 pix = QPixmap('圖片/' + self.img_list[self.index]) self.ui.label.setPixmap(pix) def start_show_pic(self): t1 = threading.Thread(target=self.show_pic) t1.setDaemon(True) t1.start() def show_pic(self): while True: for i in os.walk('圖片'): self.img_list = i[2] if self.img_list: pix = QPixmap('圖片/' + self.img_list[self.index]) self.ui.label.setPixmap(pix) time.sleep(3) self.index += 1 if self.index > len(self.img_list): self.index = 0 def start(self): t1 = threading.Thread(target=self.get_img) t1.setDaemon(True) t1.start() def pull(self): """ 檢視是否有目標網站的原始碼,如果有就讀取, 如果沒有就傳送請求 """ if os.path.exists('爬取圖片.html'): with open('爬取圖片.html', 'r', encoding='utf8') as f: self.text = f.read() # print(self.text) else: self.browser.get('http://www.netbian.com/') self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.list'))) self.text = self.browser.page_source with open('爬取圖片.html', 'w', encoding='utf8') as f: f.write(self.text) print(self.text) self.ui.B_start.setEnabled(True) def get_img(self): html = BeautifulSoup(self.text, 'lxml') href_url = html.select('.list ul li a') print(href_url) for a in href_url: #print('@@@', a) if a['href'].startswith('/desk'): url = 'http://www.netbian.com' + a['href'] #print(url) self.browser.get(url) pic = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.pic'))) soup = BeautifulSoup(self.browser.page_source, 'lxml') img = soup.select_one('#main > div.endpage > div > p > a > img') #print(img) #print(img) url = img['src'] title = img['title'] #獲取路徑 path = os.path.join(os.getcwd(), '圖片', title + '.jpg') if len(self.browser.window_handles) > 1: self.browser.switch_to.window(self.browser.window_handles[1]) self.browser.close() self.browser.switch_to.window(self.browser.window_handles[0]) # action = ActionChains(self.browser) # action.move_to_element(pic) # action.context_click(pic) # 右鍵點選該元素 # action.perform() # time.sleep(1) # action.send_keys('v') # action.perform() time.sleep(1) if os.path.exists(path): self.ui.listWidget.addItem(title + '.jpg 已存在,不下載') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) else: # 滑鼠移動到位置點右鍵 windll.user32.SetCursorPos(500, 700) win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTDOWN, 0, 0, 0) time.sleep(0.05) win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTUP, 0, 0, 0) time.sleep(1) # 按下v win32api.keybd_event(86, 0, 0, 0) win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0) # 將路徑複製到剪下板 win32clipboard.OpenClipboard() win32clipboard.EmptyClipboard() win32clipboard.SetClipboardText(path) win32clipboard.CloseClipboard() # 滑鼠定位輸入框並點選 windll.user32.SetCursorPos(274, 449) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, 0, 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, 0, 0, 0) time.sleep(1) # 按下ctrl+v win32api.keybd_event(17, 0, 0, 0) win32api.keybd_event(86, 0, 0, 0) win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0) win32api.keybd_event(17, 0, win32con.KEYEVENTF_KEYUP, 0) time.sleep(3) # 按下回車 win32api.keybd_event(13, 0, 0, 0) win32api.keybd_event(13, 0, win32con.KEYEVENTF_KEYUP, 0) time.sleep(2) self.ui.listWidget.addItem(title + '.jpg 下載完成') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) # res = self.session.get(url, headers=self.headers) # print(res.text) # with open('圖片/' + title + '.jpg', 'wb') as f: # f.write(res.content) # print(title + '.jpg 下載完成') elif a['href'].startswith('/index'): url = 'http://www.netbian.com' + a['href'] print(url) self.browser.get(url) self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.list'))) self.text = self.browser.page_source self.ui.listWidget.addItem('下一頁') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) self.get_img() if __name__ == '__main__': app = QApplication([]) # 設定fusion風格 app.setStyle('Fusion') window = Test() window.ui.show() app.exec_()

最後

今天的分享到這裡就完了,祝大家五一快樂鴨!!!