1. 程式人生 > 實用技巧 >爬取電影網站

爬取電影網站

code

import time
import sys,os
import requests
import shutil
from selenium import webdriver 
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup


def asleep(driver):
    driver.implicitly_wait(3.5)
    time.sleep(
2) driver = webdriver.Chrome() asleep(driver) #719頁 for k in range(1,720): url="http://zimiyy.com/mov/0/0/all/{}.html".format(k) driver.get(url) t=driver.find_element_by_xpath("//div[@class='index-tj mb clearfix']/ul").get_attribute('innerHTML') soup1 = BeautifulSoup(t, 'html.parser') tmp
=soup1.findAll('a') for i in tmp: tmp_movie_url="http://zimiyy.com{}".format(i.get("href")) print(tmp_movie_url) movie_name=i.get("title") print(movie_name) pic_url=i.find("img").get("src") print(pic_url) time.sleep(2) #進入詳情頁 driver.
get(tmp_movie_url) #獲取描述 tmp_desc=driver.find_element_by_class_name("info").get_attribute('innerHTML') detail_html=driver.find_element_by_id("stab_1_71").get_attribute('innerHTML') soup2 = BeautifulSoup(detail_html, 'html.parser') tmp_play_page_list=soup2.findAll('li') print(tmp_desc) all_movie_url={} for j in tmp_play_page_list: movie_url_type=j.find("a").string play_page_url=j.find("a").get("href") #進入播放頁 driver.get(play_page_url) #獲取視訊連結 try: movie_url=driver.find_element_by_xpath("//span[@class='dplayer-info-panel-item-data']").text except Exception as e: print(e) movie_url=None #記錄 all_movie_url[movie_url_type]=movie_url print(all_movie_url) print("*"*17) time.sleep(3)