1. 程式人生 > >Python爬蟲:Selenium常用操作,下載youtube視訊例項

Python爬蟲:Selenium常用操作,下載youtube視訊例項

selenium常用操作:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver.get(url)
# 輸入內容
driver.find_element_by_id("sf_url").send_keys(arg)
# 滑鼠點選
driver.
find_element_by_class("sf_submit").click() # 獲取延遲載入的元素 element = WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.CSS_SELECTOR, "[class='row title']")) ) print(element) print(element.get_attribute("title")) print("text:", element.text)

常用API:
https://selenium-python-zh.readthedocs.io/en/latest/api.html#locate-elements-by


下面的例子是下載youtube視訊(利用https://zh.savefrom.net/網站得到下載地址):

if __name__ == "__main__":
    arg = "https://www.youtube.com/watch?v=***"
    driver = get_selenium_driver()
    url = "https://zh.savefrom.net/"
    driver.get(url)
    driver.find_element_by_id("sf_url").send_keys(arg)
    driver.find_element_by_id(
"sf_submit").click() element = WebDriverWait(driver, 20).until( EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='row title']")) ) title = element.text.replace(" ", "_") print(title) # 獲取元素的父元素的父元素 pele = element.find_element(By.XPATH, "./../..") #pele.find_element_by_class_name("def-btn-name").click() # 獲取所有title包含'視訊格式'的a元素 ? tag_as = pele.find_elements_by_xpath("//a[contains(@title,'視訊格式')]") m = {} for tag_a in tag_as: dt = tag_a.get_attribute("data-type") k = tag_a.get_attribute("title") href = tag_a.get_attribute("href") m[k] = (href, dt) li = list(m.keys()) for idx in range(len(li)): print("%s: %s" % (idx, li[idx])) i = int(input("intpu:")) (href, dt) = m[li[i]] driver.quit() print("""\nwget -O "%s.%s" "%s" """ % (title, dt, href))