簡單的圖片批量爬取
阿新 • • 發佈:2020-12-19
技術標籤:python學習筆記
一、獲取html頁面內容
用requests獲取,用beautifulsoup解析
import os
import requests
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
ua = UserAgent(verify_ssl=False)
# print(ua.random)
user_agent=ua.random
#
url='https://www.yunbtv.com/vodtype/oumeiju.html'
headers={
'User-Agent' :user_agent
}
response=requests.get(url,headers=headers)
html_content=response.text
soup=BeautifulSoup(html_content,'lxml')
二、建立資料夾,用於存放圖片
curpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
# 在當前路徑外第二層檔案下建立
#print(curpath)
imgs = os.path.join(curpath, 'imgs')
if not os.path.exists( imgs):
os.mkdir(imgs)
三、提取圖片並存入資料夾
img=soup.find_all('img',{'class':'lazy'})
#for i in img:
#print(i['alt'])
# print(i['data-original'])
for i in img:
try:
jpg_url = i['data-original']
name = i['alt']
r = requests.get(jpg_url)
f = open(os.path. join(imgs, '%s.jpg'%name), 'wb')
f.write(r.content)
f.close()
except:
pass