1. 程式人生 > >selenium 批量下載檔案,json,重新命名

selenium 批量下載檔案,json,重新命名

from selenium import webdriver
import time
import json
import os
import datetime
number = 0

# 設定下載路徑
# 嘗試過使用圖片識別庫,來驗證驗證碼,成功率太低,改為手動輸入
# https://segmentfault.com/a/1190000015489113?utm_source=tag-newest
options = webdriver.ChromeOptions()
prefs = {'profile.default_content_settings.popups': 0,
         'download.default_directory': 'C:\\Users\Administrator\\Desktop\\requests_pwrd'}
options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe",chrome_options=options)


def get_json():
    """獲取json配置檔案中的key和value"""
    # json = {'a' :'a','a' :'a'}
    json_file = json.load(open('haiwai_settings.json', 'r'))
    keys_list = []
    values_list = []
    for key, value in json_file.items():
        keys_list.append(str(key))
        values_list.append(str(value))
    return keys_list, values_list


def get_url_file():
    global driver
    driver.maximize_window()
    driver.implicitly_wait(10)
    url_link, values_list = get_json()
    driver.get(url_link[0])
    driver.find_elements_by_xpath('//*[@class="controls"]/input')[0].send_keys('')
    driver.find_elements_by_xpath('//*[@class="controls"]/input')[1].send_keys('')
    veri_code = input('請輸入網頁中的驗證碼')
    get_file()


def get_file():
    global driver
    url_link, values_list = get_json()
    for url in url_link:
        driver.get(url)
        time.sleep(1)
        rename_file()


def rename_file():
    """重新命名"""
    oldname = sort_file()
    newname = new_name()
    dir_link = 'C:\\Users\\Administrator\\Desktop\\requests_pwrd'
    os.chdir(dir_link)
    os.rename(oldname, newname)


def sort_file():
    """排序檔案"""
    dir_link = 'C:\\Users\\Administrator\\Desktop\\requests_pwrd'
    dir_lists = os.listdir(dir_link)
    dir_lists.sort(key=lambda fn: os.path.getmtime(dir_link + '\\' + fn))
    return (dir_lists[-1])


def new_name():
    global number
    today = datetime.date.today()
    time_newname = str(today.strftime('%y%m%d'))
    '''獲取value'''
    url_link, values_list = get_json()
    newname_value = values_list[number]
    number += 1
    return time_newname + newname_value + '.xls'


if __name__ == '__main__':
    get_url_file()