Python爬蟲系列之微信小程式實戰
阿新 • • 發佈:2018-12-04
Python爬蟲系列之微信小程式實戰
基於Scrapy爬蟲框架實現對微信小程式資料的爬取
首先,你得需要安裝抓包工具,這裡推薦使用Charles,至於怎麼使用後期有時間我會出一個事例
- 最重要的步驟之一就是分析介面,理清楚每一個介面功能,然後連線起來形成介面串思路,再通過Spider的回撥函式一次次去分析資料
- 抓包分析介面過程不做演示了,主要是分析請求頭和query引數
- 以下為程式碼部分,程式碼未寫詳細註釋,但是流程寫的還是挺清晰的,如有疑問私信交流
- 如需測試請自行抓包更換請求頭的token與session,以下測試頭已做修改,不能直接使用
# -*- coding:utf-8 -*-
import scrapy
'''
@Author :王磊
@Date :2018/12/3
@Description:美家優享微信小程式全國商品資料爬取
'''
class MeiJiaSpider(scrapy.spiders.Spider):
name = "MeiJiaSpider"
def __init__(self):
self.headers = {
"x-bell-token": "ef4d705aabf4909db847b6de6068605c-4" ,
"x-session-key": "ab7f2b8673429d5e779c7f5c8b4a8524",
"User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
}
def start_requests(self):
'''
獲取城市列表
:return:
'''
url = 'https://bell-mall.yunshanmeicai.com/mall/gis/get-city-list'
yield scrapy.FormRequest(
url=url,
headers=self.headers,
dont_filter=True,
callback=self.getCityChild
)
def getCityChild(self, response):
'''
通過城市列表獲取城市子列表,獲取子列表經緯度資料
:param response:
:return:
'''
datas = eval(response.text)
if datas['ret']:
url = 'https://bell-mall.yunshanmeicai.com/mall/gis/address-search'
for _ in datas['data']:
name = _['name']
data = {
"key_words": name,
"city": name
}
yield scrapy.FormRequest(
url=url,
headers=self.headers,
formdata=data,
dont_filter=True,
callback=self.sellerParse
)
def sellerParse(self, response):
'''
通過經緯度獲取該位置附近商家列表
:param response:
:return:
'''
res = eval(response.text)
if res['ret']:
datas = res['data']
for _ in datas:
locationData = {"lat": str(_['location']['lat']), "lng": str(_['location']['lng'])}
urlNearby = 'https://bell-mall.yunshanmeicai.com/mall/gis/get-nearby-team'
yield scrapy.FormRequest(
url=urlNearby,
headers=self.headers,
formdata=locationData,
dont_filter=True,
callback=self.sellerInfoParse
)
def sellerInfoParse(self, response):
'''
獲取商家詳細資訊,包含店鋪id,手機號,地區等等(若不需要店鋪id以外的其他資料,此過程可省略,因為店鋪id在商家列表中以id的形式展示了)
:param response:
:return:
'''
res = eval(response.text)
if res['ret']:
datas = res['data']
urlClass = 'https://bell-mall.yunshanmeicai.com/cart/cart/get-list'
for _ in datas:
query = {}
headers = {
"x-bell-token": "0b5e5bcf70c973b080f39cb7b4ec2306-4",
"x-session-key": "3e76463e81d9551826fc132b10c27794",
"x-group-token": _['id'],
"User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
}
yield scrapy.FormRequest(
url=urlClass,
headers=headers,
formdata=query,
dont_filter=True,
callback=self.storeClassParse
)
def storeClassParse(self, response):
'''
通過店鋪id獲取店鋪類目
:param response:
:return:
'''
res = eval(response.text)
if res['ret']:
urlClass = 'https://bell-mall.yunshanmeicai.com/mall/home/get-home-class'
version = {"version": "1.0.0"}
headers = {
"x-bell-token": "0b5e5bcf70c973b080f39cb7b4ec2306-4",
"x-session-key": "3e76463e81d9551826fc132b10c27794",
"x-group-token": str(res['data']['store_id']),
"User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
}
yield scrapy.FormRequest(
url=urlClass,
headers=headers,
formdata=version,
dont_filter=True,
callback=self.goodsListParse,
meta={"store_id": str(res['data']['store_id'])}
)
def goodsListParse(self, response):
'''
通過店鋪類目id獲取商品列表
:param response:
:return:
'''
res = eval(str(response.text).replace('null', 'None'))
if res['ret']:
if res['data']['list']:
data = res['data']['list']
goodsUrl = 'https://bell-mall.yunshanmeicai.com/mall/home/index'
headers = {
"x-bell-token": "0b5e5bcf70c973b080f39cb7b4ec2306-4",
"x-session-key": "3e76463e81d9551826fc132b10c27794",
"x-group-token": str(response.meta['store_id']),
"User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
}
for _ in data:
query = {"page": "1", "class_id": str(_['id']), "version": "1.0.2"}
yield scrapy.FormRequest(
url=goodsUrl,
headers=headers,
formdata=query,
dont_filter=True,
callback=self.goodsParse
)
def goodsParse(self, response):
'''
解析最終商品資料
:param response:
:return:
'''
goodsList = eval(response.text)
if goodsList['ret']:
if goodsList['data']['list']:
lists = goodsList['data']['list']
for _ in lists:
start_time = str(_['start_time'])
end_time = str(_['end_time'])
product_id = str(_['product_id'])
product_name = _['product_name']
group_product_name = _['group_product_name']
group_id = str(_['group_id'])
group_type = str(_['group_type'])
product_short_desc = _['product_short_desc']
product_desc = _['product_desc']
product_format_id = str(_['product_format_id'])
already_txt = _['already_txt']
already_nums = str(_['already_nums'])
left_txt = _['left_txt']
left_num = str(_['left_num'])
real_left_num = str(_['real_left_num'])
group_price = str(_['group_price'])
line_price = str(_['line_price'])
product_sales_num = str(_['product_sales_num'])
identify = _['identify']
print(
"start_time: %s ,end_time: %s ,product_id: %s ,product_name: %s ,group_product_name: %s ,group_id: %s ,group_type: %s ,product_short_desc: %s ,product_format_id: %s ,already_txt: %s ,already_nums: %s ,real_left_num: %s ,group_price: %s ,line_price: %s ,product_sales_num: %s ,identify: %s " % (
start_time, end_time, product_id, product_name, group_product_name, group_id, group_type,
product_short_desc, product_format_id, already_txt, already_nums, real_left_num, group_price,
line_price, product_sales_num, identify)
)
'''
"text_label_list": [
{
"label_content": "#fe3113",
"label_name": "熱銷",
"label_id": 10
}
],
"pic_label_list": [
{
"label_content": "https:\\/\\/img-oss.yunshanmeicai.com\\/xfresh\\/product\\/69cf3401b000504ea33d9e8b80bfc467.png",
"label_name": "美家福利",
"label_id": 52
}
],
"loop_pics": [
"https:\\/\\/img-oss.yunshanmeicai.com\\/xfresh\\/product\\/03df45319b36070f67edf4562d6ec74f.jpg"
],
"new_loop_pics": "https:\\/\\/img-oss.yunshanmeicai.com\\/xfresh\\/product\\/03df45319b36070f67edf4562d6ec74f.jpg?x-oss-process=image\\/resize,w_360",
'''
♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪
♪♪後續會更新系列基於Python的爬蟲小例子,歡迎關注。♪♪
♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪