Day11 (黑客成長日記) 爬取網站圖片
阿新 • • 發佈:2018-11-03
#匯入第三方庫
# coding:utf-8
import requests,re
#找到需要爬取的網站'http://www.qqjia.com/sucai/sucai1210.htm'
#1>獲取網站 2>正則表示式匹配不同圖片的地址 3>找到所有圖片的URL
#開發講究見名識意
#1.1 定義一個函式get到url
def get_urls():
#(1)獲取網站
response = requests.get('http://www.qqjia.com/sucai/sucai1210.htm')
# (2)通過正則匹配地址re庫,共有的內容儲存,不一樣的用 .*?
url_add = r'<img border="0" alt="" src="(.*?)" /></p>'
#(3)找到所有圖片的URL
url_list = re.findall(url_add,response.text)
# (4)列印驗證
print(url_list)
return url_list
#第四步,下載網頁資料
#再定義一個函式 目的:下載資料
def get_gif(url,name):
#請求圖片地址 ,傳參
response = requests.get(url)
#下載圖片到目標位置--》D:\pygif
with open('D:\pygif\%d.gif'%name,'wb') as ft :
ft.write(response.content)
# (5)啟動這個程式
if __name__ == '__main__':
url_list = get_urls()
a = 1
for url in url_list:
get_gif(url,a)
a += 1
text是原始碼,content是二進位制資料
import requests,re def url_get(): response = requests.get('http://qq.yh31.com/zjbq/2920180.html') url_add = r'<img border="0" alt="" src="(.*?)" />' url_list = re.findall(url_add,response.text) print(url_list) return url_list def download(url,name): response = requests.get(url) with open('D:\pygif\%d'%name,'wb') as ft : ft.write(response.content) if __name__ == '__main__': url_list = url_get() a = 1 for url in url_list: com_url = 'http://mm.yh31.com:88'+ url download(com_url, a) a += 1