1. 程式人生 > >獲取微博廣告博文數據

獲取微博廣告博文數據

frame count div tope comm spa sin post .data

import requests
import json
import pandas as pd
import time
import re 

headers = {
    User-Agent: XXXX,
    Cookie: XXX}

def get_ad(page):
    url = https://m.weibo.cn/api/container/getIndex?containerid=102803&openApp=0&since_id={}.format(page)

    res = requests.get(url,headers=headers)
    data 
= json.loads(res.text) all_ad = [] for k,i in enumerate(data[data][cards]): aa = i[mblog].get(from_cateid) if aa in [Brand,Sfst,FanstopExtend,Wax]: result={} pattern = re.compile(r<.*?>) cc = pattern.sub(‘‘,i[mblog
][text]) result[uid] = i[mblog][user][id] result[昵稱] = i[mblog][user][screen_name] result[排名] = k+1 result[出現頁數] = page+1 result[博文] = cc result[轉發數] = i[mblog][reposts_count] result[評論數
] = i[mblog][comments_count] result[點贊數] = i[mblog][attitudes_count] all_ad.append(result) return all_ad all_data = [] for i in range(50): time.sleep(1) print(len(all_data)) fina_data = get_ad(i) if fina_data: all_data+=fina_data df1 =pd.DataFrame(all_data) df1.to_excel(result+time.strftime("%Y%m%d%H%M%S")+.xlsx,index=False) print(done)

獲取微博廣告博文數據