1. 程式人生 > 實用技巧 >爬取必應首頁大圖 - Python

爬取必應首頁大圖 - Python

不廢話,直接上程式碼

# -*- coding: utf-8 -*-
# @Author: Wang Hongbin
# @Email:   [email protected]
# @Date:   2018-03-16 14:19:27
# @Last Modified by:   Wang Hongbin
# @Last Modified time: 2018-03-28 16:26:07
import requests 
import re 
import os
import time #時間模組

local = time.strftime("%Y-%m-%d_")
baseUrl = "https://cn.bing.com"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}

def getImgUrl(url):  
    reg1 = r"(/az/hprichbg/rb/.*?.jpg)"
    con = requests.get(url)
    content = con.text
    imgUrl = re.findall(reg1, content, re.S)[0]    
    # imgLink = baseUrl+imgUrl
    return imgUrl  

def getFilePath():
	# filePath = '/var/www/html/biYinPic/images/' + time.strftime("%Y%m%d") + '/'
	filePath = 'C:/Users/Administrator/Pictures/MyDesktop/'
	if not os.path.exists(filePath):
		os.mkdir(filePath)

	return filePath

def getImgName(url):
	reg2 = r"/az/hprichbg/rb/(.*?)_"
	imgName = re.findall(reg2, url, re.S)[0]
	imgName = local + imgName + '.jpg'
	return imgName


def downloadByPic(url):
	imgUrl = getImgUrl(url)
	imgName = getImgName(imgUrl)
	filePath = getFilePath()
	fileName = filePath+imgName
	
	picUrl = baseUrl + imgUrl
	read = requests.get(picUrl)

	f = open(fileName, 'wb')
	f.write(read.content)
	f.close()

# reg3 = r'<div class=\"hplaCata\"><div class=\"hplatt\">(.*)</div><div class=\"hplats\">(.*)</div><div id=\"hplaSnippet\">(.*)</div><div class=\"hplaPvd\">(.*)</div>'

downloadByPic(baseUrl)
print('is ok!')

爬取結果

下圖是七月份至今的爬取圖片,因為是在window上執行的,電腦不開機的時候不會執行,程式碼放在Linux上執行也沒問題,使用crontab啟個定時器就行了