1. 程式人生 > 實用技巧 >親測,完全有效,使用webdriver,自寫,裁判文書網,批量全部下載

親測,完全有效,使用webdriver,自寫,裁判文書網,批量全部下載

直接上程式碼(注意改使用者名稱,密碼)

 1 """程式說明"""
 2 # -*-  coding: utf-8 -*-
 3 # Author: cao wang
 4 # Datetime : 2020
 5 # software: PyCharm
 6 # 收穫:
 7 from selenium import webdriver
 8 from selenium.webdriver.common.by import By
 9 from selenium.webdriver.support import expected_conditions as EC
10 from selenium.webdriver.support.wait import
WebDriverWait 11 import math 12 import time 13 import logging 14 from selenium.webdriver.firefox.options import Options 15 import os 16 from crawler_tools import user_agent as u 17 from datetime import datetime 18 from selenium.common.exceptions import * 19 import pyautogui 20 import random 21 from selenium.webdriver import
ActionChains 22 from retrying import retry 23 24 25 def login(driver): 26 """登入""" 27 # 切換框架 28 wait = WebDriverWait(driver, 20) 29 driver.refresh() 30 frame = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="contentIframe"]'))) 31 driver.switch_to.frame(frame) 32
33 click = wait.until(EC.presence_of_element_located( 34 (By.XPATH, '//*[@id="phoneNumber"]'))) 35 click.send_keys("手機號") 36 time.sleep(1) 37 click1 = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/app-root/div/app-login/div/div/form/div/div[2]/input'))) 38 # click1.clear() 39 click1.send_keys("密碼") 40 time.sleep(1) # 等一秒是最優選擇,短了網路錯誤 41 button1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.custom-button'))) 42 button1.click() 43 # 必須加上表單退出,否者就是死元素無法定位 44 driver.switch_to.default_content() 45 46 # 通過輸入,進行進入資料 47 select_value = wait.until(EC.presence_of_element_located( 48 (By.XPATH, '//*[@id="_view_1540966814000"]/div/div[1]/div[2]/input')))#//*[@id="_view_1540966814000"]/div/div[1]/div[2]/input 49 select_value.send_keys("合同糾紛") 50 time.sleep(2) # 等一秒是最優選擇,短了網路錯誤 51 driver.get( 52 "https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=b67ff15b548ff825d1e09dc899ecf778&s21=%E5%90%88%E5%90%8C%E7%BA%A0%E7%BA%B7") 53 five_to_15(driver) 54 down_load(driver) 55 while(1): 56 next_page(driver) 57 time.sleep(2) 58 down_load(driver) 59 60 def five_to_15(driver): 61 wait = WebDriverWait(driver, 20) 62 button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[8]/div/select')))#//*[@id="_view_1545184311000"]/div[8]/div/select 63 button_.click() 64 # time.sleep(1) 65 button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[8]/div/select/option[3]')))#//*[@id="_view_1545184311000"]/div[8]/div/select/option[3] 66 button_.click() 67 time.sleep(1) 68 69 def down_load(driver): 70 wait = WebDriverWait(driver, 20) 71 button_select = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[2]/div[4]/a[1]'))) 72 button_select.click() 73 time.sleep(2) # 等一秒是最優選擇,短了網路錯誤 74 button_download = wait.until( 75 EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[2]/div[4]/a[3]'))) 76 button_download.click() 77 78 def next_page(driver): 79 wait = WebDriverWait(driver, 20) 80 button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="left_7_3"]/a[last()]'))) 81 time.sleep(2) 82 button_.click() 83 84 85 86 if __name__ =="__main__": 87 # 讀取限定詞目錄 88 driver = webdriver.Chrome('E:\Google\Driver\chromedriver.exe') 89 driver.get("https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=b67ff15b548ff825d1e09dc899ecf778&s21=%E5%90%88%E5%90%8C%E7%BA%A0%E7%BA%B7") 90 time.sleep(5) 91 login(driver)