1. 程式人生 > >Python爬取內涵段子裏的段子

Python爬取內涵段子裏的段子

爬蟲 內涵段子

環境:Python3.6



#!/usr/bin/env python3
#-*-coding:utf-8-*-
#version:3.6.4
__author__ = '杜文濤'

import requests
import json


def get_json_dic(url):
    global dict_json
    response = requests.get(url=url)
    json_response = response.content.decode() #獲取r的文本 就是一個json字符串
    dict_json = json.loads(json_response) #  將json字符串轉換成dic字典對象

    return dict_json

def get_joke(dict_json):
    print(len(dict_json['data']['data']))
    for i in range(1,20):
        conture = input('是否繼續,Y/N')
        if conture == 'Y' or conture == 'y':
            joke = dict_json['data']['data'][i]['group']['text']
            if i == 19:
                dict_json = {}
                conture
            print(joke)
        else:
            exit()

def get_max_time(dic_json):
    global max_time
    max_time = dict_json['data']['max_time']
    return max_time

if __name__ == '__main__':
    max_time = 1519404642
    url = 'http://www.neihanshequ.com/joke/?is_json=1&app_name=neihanshequ_web&max_time=' + str(max_time)

    get_json_dic(url)
    get_max_time(dic_json=dict_json)
    get_joke(dict_json=dict_json)

    while True:
        url = 'http://www.neihanshequ.com/joke/?is_json=1&app_name=neihanshequ_web&max_time=' + str(max_time)
        get_json_dic(url)
        get_max_time(dic_json=dict_json)
        get_joke(dict_json=dict_json)
        print(url)

微信公眾號:

技術分享圖片

Python爬取內涵段子裏的段子