1. 程式人生 > >為愛而碼

為愛而碼

下載微信文章中圖片
downloadIMage.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re,os
import urllib,urllib2;
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')

#通過url獲取網頁
def getHtml(url):
    # 要設定請求頭,讓伺服器知道不是機器人
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    headers = {'User-Agent'
: user_agent} request=urllib2.Request(url,headers=headers); page = urllib2.urlopen(request); html = page.read() return html #通過正則表示式來獲取圖片地址,並下載到本地 def getImg(html,savePath): #http://mmbiz.qpic.cn/mmbiz_jpg/wlJkphkR2NMibwTo1cqHwdhLTMYmbV0IOw5vCaJuTsbvTdukCQwUicPClXRibcnY8RCsszAfBYlrJnfz8icUIBWWGw/640?wx_fmt=jpeg
reg = r'data-src="(.*?)"' imgre = re.compile(reg) imglist = imgre.findall(html) x = 0 for imgurl in imglist: try: #通過urlretrieve函式把資料下載到本地的D:\\images,所以你需要建立目錄 urllib.urlretrieve(imgurl, savePath+'\\%s.jpg' % x) print "[+] imgurl =%s" % imgurl except
: print "[-] imgurl =%s"%imgurl finally: if imgurl!='': x = x + 1 time.sleep(1) def bookUrl(html): reg = r'<a href=\"(.*?)\" target=\"_blank\">(.*?)<\/a>' imgre = re.compile(reg) imglist = imgre.findall(html) for item in imglist[:6]: url, bookName =item[0],item[1] savePath =r'C:\Users\pradmin\Desktop\downloadImage\images\\'+bookName.decode("utf-8") print("[+] url =%s"%url) html=getHtml(url) #os.mkdir(savePath) getImg(html,savePath) originUrl="http://mp.weixin.qq.com/s?__biz=MzA4NjQzNzY4Mw==&mid=2454531002&idx=4&sn=67826657f4486bfa0cb8f195262a86f9&chksm=887131e6bf06b8f09b2ec821f49c71c64536cf585d9f17664709fcfc533d39c976c30da91a8d&mpshare=1&scene=1&srcid=1215qBhkFwNhLrfnZlSMmZSj#rd" html = getHtml(originUrl) bookUrl(html)