簡單的python網路爬蟲實現
阿新 • • 發佈:2019-02-11
import urllib2 import urllib import re import time def getHtml(url): request = urllib2.Request(url) request.add_header('User-Agent','Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36') response = urllib2.urlopen(request) html=response.read() return html
def getImage(html):
imglist=re.findall(r’data-original=”(.*?.(jpg|jpeg))”’,html)
print(len(imglist))
path =””
x=0
for img in imglist:
urllib.urlretrieve(img[0],”/home/qiracle/douyu/”+str(x)+”.”+img[1])
x+=1
time.sleep(1)
html =getHtml("https://www.douyu.com/directory/game/yz")
getImage(html)
最終爬到的結果如下: