Python selenium PIL 全網頁滾動截圖 && headless全網頁截圖
阿新 • • 發佈:2021-10-20
-
思路
先擷取當前螢幕的圖片,獲取其高度作為base高度 h,再獲取全網頁body到尾部的高度 H ,迴圈擷取圖片,再通過PIL進行拼接。
-
程式碼
# -*- coding:utf-8 -*- # author: [email protected] # software: PyCharm import os from PIL import Image from time import sleep class ScreenShot: __JS__ = { 'scroll_to_bottom': "window.scroll({top:document.body.clientHeight,left:0,behavior:'auto'});", 'scroll_to_y': "window.scroll({top:%d,left:0,behavior:'auto'});", } __base_end__ = 'tmp_end.png' __scroll_bottom__ = 'scroll_to_bottom' __scroll_y__ = 'scroll_to_y' __body__ = '//body' __height__ = 'height' __clear_shell__ = 'rm -rf *.png' __RGB__ = 'RGB' @classmethod def screen_shot(cls, driver, title, uploader_url='', delete=False): """ 全網頁滾動截圖 :param driver: webdriver 示例 :param title: 標題(最終圖片命名) :param uploader_url: 上傳url :param delete: 是否清除所有圖片 :return: """ base_image = '{}.png'.format(title) driver.save_screenshot(base_image) body_h = int(driver.find_element_by_xpath(cls.__body__).size.get(cls.__height__)) current_h = Image.open(base_image).size[1] / 2 for i in range(1, int(body_h / current_h)): driver.execute_script(cls.__JS__[cls.__scroll_y__] % (current_h * i)) sleep(.5) driver.save_screenshot(f'tmp_{i}.png') cls.__join_images__(base_image, f'tmp_{i}.png', 0, base_image) driver.execute_script(cls.__JS__[cls.__scroll_bottom__]) driver.save_screenshot(cls.__base_end__) cls.__join_images__(base_image, cls.__base_end__, int(current_h - int(body_h % current_h)), base_image) # TODO 上傳圖片 url = '' # 移除圖片 if delete: os.system(cls.__clear_shell__) return url @classmethod def __join_images__(cls, png1, png2, size=0, output='result.png'): """ 圖片拼接 :param png1: 圖片1 :param png2: 圖片2 :param size: 兩個圖片重疊的距離 :param output: 輸出的圖片檔案 :return: """ size = size * 2 img1, img2 = Image.open(png1), Image.open(png2) size1, size2 = img1.size, img2.size joint = Image.new(cls.__RGB__, (size1[0], size1[1] + size2[1] - size)) loc1, loc2 = (0, 0), (0, size1[1] - size) joint.paste(img1, loc1) joint.paste(img2, loc2) joint.save(output) if __name__ == '__main__': from selenium import webdriver driver = webdriver.Chrome() driver.get("https://www.cnblogs.com/worldline/") ScreenShot.screen_shot(driver, 'worldline') driver.quit()
-
其他
如果是在headless模式,可以使用
def get_image(url, pic_name): """ 適用於無頭全屏截圖 :param url: url訪問路徑 :param pic_name: 圖片名稱 :return: """ chrome_options = Options() chrome_options.add_argument('headless') driver = webdriver.Chrome(options=chrome_options) driver.get(url) time.sleep(.5) width = driver.execute_script("return document.documentElement.scrollWidth") height = driver.execute_script("return document.documentElement.scrollHeight") print(width, height) driver.set_window_size(width, height) time.sleep(.5) driver.save_screenshot(pic_name) driver.close()