使用python對圖片進行爬取
阿新 • • 發佈:2018-12-22
對京東上面的手機圖片進行爬取,f12找到相應的程式碼之後進行模式匹配,其中過濾到沒用資訊,實現程式碼如下:
import re import urllib.request as request import urllib def craw(url, page): html1 = request.urlopen(url).read() html1 = str(html1) pat1 = '<div id="J_goodsList".+?<div class="page clearfix">' result1 = re.compile(pat1).findall(html1) result1 = result1[0] pat2 = '<img width="220" height="220" class="err-product" data-img="1" source-data-lazy-img=".+?\.jpg" />' imagelist = re.compile(pat2).findall(result1) x = 1 for imageurl in imagelist: imagename = "F:\\C\\mobilepic\\" + str(page) + '-' + str(x) + '.jpg' #print(imageurl[87:-4]) 獲得圖片的地址 imageurl = "http://" + imageurl[87:-4] try: request.urlretrieve(imageurl, filename=imagename) print(imagename) except urllib.error.URLError as e: #異常處理,若不能爬取,調至下一張圖片 if hasattr(e, 'code'): x = + 1 if hasattr(e, 'reason'): x += 1 x += 1 for i in range(1, 3): url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&cid2=653&cid3=655&page=" + str( i) + '3&s=58&click=0' craw(url, i) print("Finish:", i)
結果: