Python爬取內涵段子裏的段子
阿新 • • 發佈:2018-02-24
爬蟲 內涵段子 環境:Python3.6
#!/usr/bin/env python3 #-*-coding:utf-8-*- #version:3.6.4 __author__ = '杜文濤' import requests import json def get_json_dic(url): global dict_json response = requests.get(url=url) json_response = response.content.decode() #獲取r的文本 就是一個json字符串 dict_json = json.loads(json_response) # 將json字符串轉換成dic字典對象 return dict_json def get_joke(dict_json): print(len(dict_json['data']['data'])) for i in range(1,20): conture = input('是否繼續,Y/N') if conture == 'Y' or conture == 'y': joke = dict_json['data']['data'][i]['group']['text'] if i == 19: dict_json = {} conture print(joke) else: exit() def get_max_time(dic_json): global max_time max_time = dict_json['data']['max_time'] return max_time if __name__ == '__main__': max_time = 1519404642 url = 'http://www.neihanshequ.com/joke/?is_json=1&app_name=neihanshequ_web&max_time=' + str(max_time) get_json_dic(url) get_max_time(dic_json=dict_json) get_joke(dict_json=dict_json) while True: url = 'http://www.neihanshequ.com/joke/?is_json=1&app_name=neihanshequ_web&max_time=' + str(max_time) get_json_dic(url) get_max_time(dic_json=dict_json) get_joke(dict_json=dict_json) print(url)
微信公眾號:
Python爬取內涵段子裏的段子