python扒靜態網頁圖片
阿新 • • 發佈:2019-02-10
拿扒東北大學官網的圖片為例:
#coding=utf-8
import re
import urllib2
import time
url = urllib2.urlopen('http://www.neu.edu.cn/')
buf = url.read()
more = re.compile(r'<img src=\"(.*\.(jpg|png|jpeg))\"')
abc = more.findall(buf)
base = 'http://www.neu.edu.cn/'
img = [base+i[0] for i in abc]
for tmp in img:
data = urllib2.urlopen(tmp).read()
filename = './img/' +tmp[tmp.rfind('/')+1:]
print filename
with open (filename, 'wb') as fw:
fw.write (data)
time.sleep (1)