1. 程式人生 > 其它 >python爬蟲爬取騰訊視訊

python爬蟲爬取騰訊視訊

技術標籤:Python爬蟲python庫python資料視覺化騰訊xpathpythonselenium

python爬蟲爬取騰訊視訊

話不多說,直接上程式碼:

import requests
from lxml import etree
from selenium import webdriver
from fake_useragent import UserAgent

class tencent_movie(object):
    def __init__(self):
        ua = UserAgent(verify_ssl=False)
        for i in
range(1, 100): self.headers = { 'User-Agent': ua.random } def get_html(self,url): response=requests.get(url,headers=self.headers) html=response.content.decode('utf-8') return html def parse_html_tengxun(self,html): target=
etree.HTML(html) links = target.xpath('//h2[@class="result_title"]/a/@href') host=links[0] res = requests.get(host, headers=self.headers) con = res.content.decode('utf-8') new_html = etree.HTML(con) first_select = int(input('1.電視劇\n2.電影\n')) if
(first_select == 1): titles=new_html.xpath('//div[@class="mod_episode"]/span/a/span/text()') new_links=new_html.xpath('//div[@class="mod_episode"]/span/a/@href') for title in titles: print('第%s集'%title) select = int(input('你要看第幾集:(輸入數字即可)')) new_link = new_links[select - 1] last_host = 'https://api.akmov.net/?url=' + new_link else: last_host = 'https://api.akmov.net/?url=' + host self.driver = webdriver.Chrome() self.driver.maximize_window() self.driver.get(last_host) def main(self): name = str(input('請輸入電視劇或電影名:')) url = 'https://v.qq.com/x/search/?q={}&stag=0&smartbox_ab='.format(name) html = self.get_html(url) self.parse_html_tengxun(html) if __name__ == '__main__': spider=tencent_movie() spider.main()

在這裡插入圖片描述