獲取微博廣告博文數據
阿新 • • 發佈:2018-05-18
frame count div tope comm spa sin post .data
import requests import json import pandas as pd import time import re headers = { ‘User-Agent‘: ‘XXXX‘, ‘Cookie‘: ‘XXX‘} def get_ad(page): url = ‘https://m.weibo.cn/api/container/getIndex?containerid=102803&openApp=0&since_id={}‘.format(page) res = requests.get(url,headers=headers) data= json.loads(res.text) all_ad = [] for k,i in enumerate(data[‘data‘][‘cards‘]): aa = i[‘mblog‘].get(‘from_cateid‘) if aa in [‘Brand‘,‘Sfst‘,‘FanstopExtend‘,‘Wax‘]: result={} pattern = re.compile(r‘<.*?>‘) cc = pattern.sub(‘‘,i[‘mblog‘][‘text‘]) result[‘uid‘] = i[‘mblog‘][‘user‘][‘id‘] result[‘昵稱‘] = i[‘mblog‘][‘user‘][‘screen_name‘] result[‘排名‘] = k+1 result[‘出現頁數‘] = page+1 result[‘博文‘] = cc result[‘轉發數‘] = i[‘mblog‘][‘reposts_count‘] result[‘評論數‘] = i[‘mblog‘][‘comments_count‘] result[‘點贊數‘] = i[‘mblog‘][‘attitudes_count‘] all_ad.append(result) return all_ad all_data = [] for i in range(50): time.sleep(1) print(len(all_data)) fina_data = get_ad(i) if fina_data: all_data+=fina_data df1 =pd.DataFrame(all_data) df1.to_excel(‘result‘+time.strftime("%Y%m%d%H%M%S")+‘.xlsx‘,index=False) print(‘done‘)
獲取微博廣告博文數據