爬取攝圖網裡的 音樂和視訊 攝圖網模擬登陸
阿新 • • 發佈:2018-12-17
#首席那安裝selinium
from selenium import webdriver
import time
from lxml import etree
import json
#新增顯示等待
from selenium.webdriver.support.ui import WebDriverWait
#根據條件尋找對應節點
from selenium.webdriver.support import expected_conditions as EC
import requests
import re
import urllib.parse
import urllib
header = {
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
}
#設定無頭瀏覽器
# options=webdriver.ChromeOptions()
# options.set_headless()
#建立瀏覽器驅動
# driver = webdriver.Chrome(
# executable_path='/home/lbc/Documents/chromedriver',
# )
# # options=options
# driver.get('http://699pic.com/soundtrack/?sem=1&sem_kid=206316&sem_type=3')
# #獲取cookie
# cookies = driver.get_cookies()
# cookie_dict = {}
# for cookie in cookies:
# cookie_dict[cookie['name']] = cookie['value']
# # print(cookie_dict)
# #匯入滑鼠移入
# from selenium.webdriver import ActionChains
# #用xpath解析並拖拽滑鼠進行點選
# # 點選登入
# element = driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/a[2]')
# #將滑鼠移動到指定的節點
# ActionChains(driver).move_to_element(element).perform()
# #將滑鼠移動到指定的節點並且點選該節點(單擊)
# ActionChains(driver).move_to_element(element).click(element).perform()
# # 手機號登入
# element = driver.find_element_by_xpath('//*[@id="alert-action-login"]/div/div/div/div[1]/div[2]/p[2]/a[1]')
# #將滑鼠移動到指定的節點
# ActionChains(driver).move_to_element(element).perform()
# #將滑鼠移動到指定的節點並且點選該節點(單擊)
# ActionChains(driver).move_to_element(element).click(element).perform()
# driver.find_element_by_name('phone').send_keys('15326245558')
# #隱式等待
# driver.find_element_by_name('passwd').send_keys('q134679.')
# # 輸入賬號密碼點選登入
# element = driver.find_element_by_xpath('//*[@id="alert-action-login"]/div/div/div/div[2]/div[1]/div/label[3]/a')
# #將滑鼠移動到指定的節點
# ActionChains(driver).move_to_element(element).perform()
# #將滑鼠移動到指定的節點並且點選該節點(單擊)
# ActionChains(driver).move_to_element(element).click(element).perform()
def qingqiu(url):
response = requests.get(url,headers=header)
# response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="audio-list"]/ul[@class="soundEffect-block clearfix"]/li')
for i in b:
title = i.xpath('.//a[@class="soundEffect-name"]/text()')
link = i.xpath('.//a[@class="soundEffect-name"]/@href')[0]
tlink = urllib.parse.urljoin(response.url,link)
# print(title,tlink)
music(tlink)
def music(url):
response = requests.get(url,headers=header)
print(response.status_code)
a= etree.HTML(response.text)
title = a.xpath('//div[@class="photo-content fl"]/h1/text()')
# print(title)
# print(response.text)
b = a.xpath('//div[@class="audio-body"]/div[@class="audio-bodyBg"]/div[@class="audio-box clearfix"]')
for i in b:
mp = i.xpath('./audio[@id="audio0"]/source/@src')
# print(mp3)
for lin in mp:
song(lin,title)
def song(t,title):
response = requests.get(t,headers=header)
# r = re.compile(".*-(.{1,15})",re.S)
# c = r.findall(response.url)[0]
# print(c)
with open('{}.mp3'.format(title),'wb+') as f:
f.write(response.content)
if __name__ == '__main__':
for i in range(3,4):
url = 'http://699pic.com/media/soundtrack-so-%s-0-0-0-0-0-0-0.html'%str(i)
qingqiu(url)