python爬餓了麼外賣資料(1)
阿新 • • 發佈:2019-01-06
#爬餓了麼外賣資料--區域集合 #https://mainsite-restapi.ele.me/v2/pois? #extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby import urllib.request import os import json from openpyxl import Workbook from openpyxl import load_workbook keywordExcel="D:\worksapce\python\爬蟲餓了麼\\keyword.xlsx" #關鍵字檢索外賣地點儲存路徑 keywords=["朝陽","奧體"] #關鍵字集合 def reqsetting():#首先構造請求頭headers,url目前暫時儲存根路徑 weburl = "https://mainsite-restapi.ele.me/v2/pois?" #extra1="extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby" webheaders={ "Accept":"application/json, text/plain, */*", "Accept-Language":"zh-CN,zh;q=0.8", "Connection":"keep-alive", "Cookie":"ubt_ssid=plds7ye19rj2rghg3oaar8hkt89yy7f1_2017-02-07; _utrace=ac9073c509bedb74b28a1482bd95a9d8_2017-02-07", "Host":"mainsite-restapi.ele.me", "Origin":"https://www.ele.me", "Referer":"https://www.ele.me/place/wx4g4h5shqf", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36" } req=urllib.request.Request(url=weburl,headers=webheaders) return req def write2Excel(jsondata,title):#根據不同的關鍵字將資料寫入到excel中 fileName=keywordExcel if(os.path.exists(fileName)): wb=load_workbook(fileName) else: wb=Workbook() ws=wb.create_sheet(title) ws.column_dimensions["A"].width =10.0 ws.append(["ID","城市","geohash","名稱","地址","商家總數","經度","緯度","request_id","short_address"]) ws.column_dimensions["A"].width =30.0 ws.column_dimensions["B"].width =10.0 ws.column_dimensions["C"].width =18.0 ws.column_dimensions["D"].width =20.0 ws.column_dimensions["E"].width =50.0 ws.column_dimensions["F"].width =10.0 ws.column_dimensions["G"].width =10.0 ws.column_dimensions["H"].width =10.0 ws.column_dimensions["I"].width =25.0 ws.column_dimensions["J"].width =40.0 for i in range(len(jsondata)): row=jsondata[i] ws.append([row["id"],row["city"],row["geohash"],row["name"],row["address"],row["count"], row["longitude"],row["latitude"],row["request_id"],row["short_address"]]) wb.save(fileName) if __name__ == '__main__': #程式執行入口 if(os.path.exists(keywordExcel)): os.remove(keywordExcel) req=reqsetting() newUrl=req.get_full_url() for keyword in keywords:#遍歷關鍵字集合,構造不同的請求引數,附加到URL 請求上 params={ "extras[]":"count", "geohash":"wx4g0bmjetr7", "keyword":"%s" % keyword, "limit":"20", "type":"nearby" } params=urllib.parse.urlencode(params)#將請求引數進行編碼 req.full_url=newUrl+params#重新構造請求引數 webpage=urllib.request.urlopen(req)#獲取資料 contentBytes = webpage.read().decode("utf-8") jsondata=json.loads(contentBytes)#將資料解析成json格式 write2Excel(jsondata,keyword)#將資料寫入excel 中