python爬蟲爬取騰訊視訊
阿新 • • 發佈:2021-02-11
技術標籤:Python爬蟲python庫python資料視覺化騰訊xpathpythonselenium
python爬蟲爬取騰訊視訊
話不多說,直接上程式碼:
import requests
from lxml import etree
from selenium import webdriver
from fake_useragent import UserAgent
class tencent_movie(object):
def __init__(self):
ua = UserAgent(verify_ssl=False)
for i in range(1, 100):
self.headers = {
'User-Agent': ua.random
}
def get_html(self,url):
response=requests.get(url,headers=self.headers)
html=response.content.decode('utf-8')
return html
def parse_html_tengxun(self,html):
target= etree.HTML(html)
links = target.xpath('//h2[@class="result_title"]/a/@href')
host=links[0]
res = requests.get(host, headers=self.headers)
con = res.content.decode('utf-8')
new_html = etree.HTML(con)
first_select = int(input('1.電視劇\n2.電影\n'))
if (first_select == 1):
titles=new_html.xpath('//div[@class="mod_episode"]/span/a/span/text()')
new_links=new_html.xpath('//div[@class="mod_episode"]/span/a/@href')
for title in titles:
print('第%s集'%title)
select = int(input('你要看第幾集:(輸入數字即可)'))
new_link = new_links[select - 1]
last_host = 'https://api.akmov.net/?url=' + new_link
else:
last_host = 'https://api.akmov.net/?url=' + host
self.driver = webdriver.Chrome()
self.driver.maximize_window()
self.driver.get(last_host)
def main(self):
name = str(input('請輸入電視劇或電影名:'))
url = 'https://v.qq.com/x/search/?q={}&stag=0&smartbox_ab='.format(name)
html = self.get_html(url)
self.parse_html_tengxun(html)
if __name__ == '__main__':
spider=tencent_movie()
spider.main()