爬蟲-微博移動端評論遞迴問題
阿新 • • 發佈:2018-12-31
#評論連結有max_id值,下個連結裡的必要引數max_id是上個連結結果裡的值,所以考慮到最後用遞迴,遞迴感覺還是挺難的,重點要考慮好結束條件,本案例結束條件就是max_id==0,等於0表示就是沒有下一頁了
import requests import json from lxml import etree max_id=0 html_contents=[] def down(url): headers={ "accept":"application / json, text / plain, * / *", "upgrade-insecure-requests":"1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36", # "Referer":"https: // m.weibo.cn / detail / 4323281584327025", "cookie":"_T_WM=74b5406b79cd18adabbcaac40f997914; WEIBOCN_FROM=1110006030; MLOGIN=1; SSOLoginState=1546235890; ALF=1548827890; SCF=Arj6zmmKiOmQAk_IgSYwafWcdI6LlAtTIuAWJCXnxyWffuZOwcMEjITykhpkEIjdpvk1Tl-MAFRtjZPwLBkKg7w.; SUB=_2A25xLd-iDeRhGeBG41IS9yzJzD2IHXVS0eHqrDV6PUNbktAKLRHTkW1NQeU4KyxGbCrkBPK46ssmM7owlLLmzyNw; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF6hmlpjTzkNkQzFAuzj21D5JpX5KMhUgL.FoqR1h50S0zfS022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoMXShBfehzRe0eX; SUHB=03oFS1TMqpmO_Q; M_WEIBOCN_PARAMS=oid%3D4323281584327025%26luicode%3D20000174%26lfid%3D4323281584327025%26uicode%3D20000061%26fid%3D4323281584327025", } html = requests.get(url,headers=headers).text print(html) if json.loads(html)['data']['max_id'] == 0: html_contents.append(json.loads(html)) return 0 else: html_contents.append([json.loads(html)]) max_id = json.loads(html)['data']['max_id'] print(max_id) print(type(max_id)) print(down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id={}&max_id_type=0".format(str(max_id)))) return 1 # return down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id={}&max_id_type=0".format(str(max_id))) print(down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id_type=0")) print(html_contents)