1. 程式人生 > >爬蟲-微博移動端評論遞迴問題

爬蟲-微博移動端評論遞迴問題

#評論連結有max_id值,下個連結裡的必要引數max_id是上個連結結果裡的值,所以考慮到最後用遞迴,遞迴感覺還是挺難的,重點要考慮好結束條件,本案例結束條件就是max_id==0,等於0表示就是沒有下一頁了

import requests
import json
from lxml import etree

max_id=0
html_contents=[]
def down(url):
    headers={
                "accept":"application / json, text / plain, * / *",
        "upgrade-insecure-requests":"1",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
        # "Referer":"https: // m.weibo.cn / detail / 4323281584327025",
        "cookie":"_T_WM=74b5406b79cd18adabbcaac40f997914; WEIBOCN_FROM=1110006030; MLOGIN=1; SSOLoginState=1546235890; ALF=1548827890; SCF=Arj6zmmKiOmQAk_IgSYwafWcdI6LlAtTIuAWJCXnxyWffuZOwcMEjITykhpkEIjdpvk1Tl-MAFRtjZPwLBkKg7w.; SUB=_2A25xLd-iDeRhGeBG41IS9yzJzD2IHXVS0eHqrDV6PUNbktAKLRHTkW1NQeU4KyxGbCrkBPK46ssmM7owlLLmzyNw; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF6hmlpjTzkNkQzFAuzj21D5JpX5KMhUgL.FoqR1h50S0zfS022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoMXShBfehzRe0eX; SUHB=03oFS1TMqpmO_Q; M_WEIBOCN_PARAMS=oid%3D4323281584327025%26luicode%3D20000174%26lfid%3D4323281584327025%26uicode%3D20000061%26fid%3D4323281584327025",


    }
    html = requests.get(url,headers=headers).text
    print(html)
    if json.loads(html)['data']['max_id'] == 0:
        html_contents.append(json.loads(html))
        return 0
    else:

        html_contents.append([json.loads(html)])
        max_id = json.loads(html)['data']['max_id']
        print(max_id)
        print(type(max_id))
        print(down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id={}&max_id_type=0".format(str(max_id))))

        return 1
        # return down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id={}&max_id_type=0".format(str(max_id)))

print(down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id_type=0"))
print(html_contents)