1. 程式人生 > >json API通用爬取模塊

json API通用爬取模塊

python 爬蟲

&

import requests
def jsonRequest(url,postdata):
    """
    使用requests模塊post方法請求api接口,返回json串
    :param url:
    :return json串:
    """
    response = requests.post(url,data=postdata)
    if response.status_code == 200:
        return response.json()
    else:
        return "requests faild"


def extractionData(jsondata,keynote):
    """
    接收一個json字符串和要提取的key值,返回相應的value值
    :param jsondata:
    :param keynote:
    :return:
    """
    extractionValue = jsondata[keynote]
    if isinstance(extractionValue,dict) == True:
        for key in extractionValue.keys():
            print key,extractionValue[key]
    elif isinstance(extractionValue,list) == True:
        for item in extractionValue:
            if isinstance(item,dict) == True:
                for key in item.keys():
                    print key,item[key]
    else:
        print extractionValue
    return extractionValue




def jsonRequestPost(url,postdata):
    """
    使用requests模塊post方法請求api接口,返回json串
    :param url:
    :return json串:
    """
    response = requests.post(url,data=postdata)
    if response.status_code == 200:
        status = 0
        msg = "請求url成功"
        result =  response.json()
    else:
        status = 400
        msg = "請求url失敗"
        result = ''

    return {
        'status':status,
        'msg':msg,
        'result':result
    }

def jsonRequestGet(url):
    """
    使用requests模塊get方法請求api接口,返回json串
    :param url:
    :return json串:
    """
    headers = {
        "user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2 Safari/537.36",
    }
    response = requests.get(url,headers=headers)
    if response.status_code == 200:
        status = 0
        msg = "請求url成功"
        result =  response.json()
    else:
        status = 400
        msg = "請求url失敗"
        result = ''

    return {
        'status':status,
        'msg':msg,
        'result':result
    }


def makePostData(params):
    """
    構建post請求的postdata
    :param params:
    :return:
    """
    postdata = {}
    for item in params:
        postdata[item['key']]=item['value']
    return postdata


def getTableField(jsonrequestdata,responseListKey,sourceField):
    """
    jsonrequestdata:json響應的數據
    responseListKey:要哪些數據
    sourceField:要取responseListKey下面的哪些字段   sourceField ="networks[1]|ip"sourceField = "serverInfo|hostname"
    :return:
    """
    responseListKeylist = responseListKey.split('|')
    jsondata = jsonrequestdata
    for key in responseListKeylist:
        jsondata=jsondata[key]
    sourceFieldlist = sourceField.split('|')
    itemlist = []
    for list in jsondata:
        item = list
        for key in sourceFieldlist:
            if '[' in key:
                key2 = key.split('[')[0]
                index = key.split('[')[1].split(']')[0]
                try:
                    item = item[key2][int(index)]
                except:
                    item = "None"
            else:
                try:
                    item = item[key]
                except:
                    item = "None"
        itemlist.append(item)
    return itemlist





if __name__ == "__main__":
    url = "https://box.maoyan.com/promovie/api/box/second.json"
    params = [{"value": "1", "key": "flag"},
              {"value": "12", "key": "userid"},
              {"value": "1524234956", "key": "expiretime"},
              {"value": "1d5df8ff087815336a8e0f299c9811fe", "key": "token"},
              {"value": "7", "key": "roleid"},
              {"value": "190", "key": "projectid"}
              ]
    postdata = makePostData(params)
    jsonrequestdata = jsonRequestGet(url)['result']
    responseListKey = "data|list"
    sourceField = "movieName"
    print getTableField(jsonrequestdata, responseListKey, sourceField)


&

json API通用爬取模塊