為愛而碼
阿新 • • 發佈:2018-11-09
下載微信文章中圖片
downloadIMage.py
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re,os
import urllib,urllib2;
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
#通過url獲取網頁
def getHtml(url):
# 要設定請求頭,讓伺服器知道不是機器人
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent' : user_agent}
request=urllib2.Request(url,headers=headers);
page = urllib2.urlopen(request);
html = page.read()
return html
#通過正則表示式來獲取圖片地址,並下載到本地
def getImg(html,savePath):
#http://mmbiz.qpic.cn/mmbiz_jpg/wlJkphkR2NMibwTo1cqHwdhLTMYmbV0IOw5vCaJuTsbvTdukCQwUicPClXRibcnY8RCsszAfBYlrJnfz8icUIBWWGw/640?wx_fmt=jpeg
reg = r'data-src="(.*?)"'
imgre = re.compile(reg)
imglist = imgre.findall(html)
x = 0
for imgurl in imglist:
try:
#通過urlretrieve函式把資料下載到本地的D:\\images,所以你需要建立目錄
urllib.urlretrieve(imgurl, savePath+'\\%s.jpg' % x)
print "[+] imgurl =%s" % imgurl
except :
print "[-] imgurl =%s"%imgurl
finally:
if imgurl!='':
x = x + 1
time.sleep(1)
def bookUrl(html):
reg = r'<a href=\"(.*?)\" target=\"_blank\">(.*?)<\/a>'
imgre = re.compile(reg)
imglist = imgre.findall(html)
for item in imglist[:6]:
url, bookName =item[0],item[1]
savePath =r'C:\Users\pradmin\Desktop\downloadImage\images\\'+bookName.decode("utf-8")
print("[+] url =%s"%url)
html=getHtml(url)
#os.mkdir(savePath)
getImg(html,savePath)
originUrl="http://mp.weixin.qq.com/s?__biz=MzA4NjQzNzY4Mw==&mid=2454531002&idx=4&sn=67826657f4486bfa0cb8f195262a86f9&chksm=887131e6bf06b8f09b2ec821f49c71c64536cf585d9f17664709fcfc533d39c976c30da91a8d&mpshare=1&scene=1&srcid=1215qBhkFwNhLrfnZlSMmZSj#rd"
html = getHtml(originUrl)
bookUrl(html)