新手的一個小漫畫爬蟲
阿新 • • 發佈:2021-02-01
技術標籤:python
@[TOC]新手的一個小漫畫爬蟲
一、爬蟲小程式碼
爬取的是http://www.js518.net/的漫畫,只能一話一話的下載,記錄一下程式碼的成長
#coding: UTF-8
from bs4 import BeautifulSoup
from selenium import webdriver
import urllib.request
import os
import time
def Requ(url):#定義一個可以獲取圖片的位元組碼的方法
headers = {"User-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.22 Safari/537.36" }
requ =urllib.request.Request(url,headers = headers)
html = urllib.request.urlopen(requ).read()
return html
def PicUrl(url,bq,id):
driver = webdriver.Chrome()
openurl = driver.get(url)
html = driver.page_source
driver.quit()
soup = BeautifulSoup(html,'html.parser')
return soup.find(bq,id=id)
if __name__ == '__main__':
print("漫畫網址:******,搜素你要看的漫畫後,將網址輸入:")
url = input()
a = PicUrl(url,'span','k_total').next_element
for i in range(1,int(a)+1):
rullurl = url+r'?p='+str(i)
picurl = PicUrl(rullurl,'img','qTcms_pic').get('src')
with open(os.getcwd()+'\\2\\'+'%d.jpg'%i,'wb')as f:#寫入檔案,應加入檔名和檔案位置
f.write(Requ(picurl))
time.sleep(1)
print("save %d pic"%i)
print('save end!')
`