python遞迴解析JSON轉換為excel輸出
阿新 • • 發佈:2018-12-20
參考了此部落格的內容,在此基礎上在對資料進行處理:
程式碼如下:
import json import pandas as pd def dict_generator(indict, pre=None): """ 把json遞迴的解析為key,value結構 """ pre = pre[:] if pre else [] if isinstance(indict, dict): for key, value in indict.items(): if isinstance(value, dict): if len(value) == 0: yield pre+[key, '{}'] else: for d in dict_generator(value, pre + [key]): yield d elif isinstance(value, list): if len(value) == 0: yield pre+[key, '[]'] else: for v in value: for d in dict_generator(v, pre + [key]): yield d elif isinstance(value, tuple): if len(value) == 0: yield pre+[key, '()'] else: for v in value: for d in dict_generator(v, pre + [key]): yield d else: yield pre + [key, value] else: yield indict def get_all_record_list(read_file_name): """ 每一個物件的json的dict 轉換為list """ all_record_list = [] record_dict = {} columns_set = set() num = 0 KEY_INDEX_NAME = 'hits.hits._index' fh = open(read_file_name,'r') sJOSN = fh.read() sValue = json.loads(sJOSN) for line in dict_generator(sValue): key = '.'.join(line[0:-1]) value = line[-1] columns_set.add(key) record_dict[key] = value if key == KEY_INDEX_NAME and num > 0: all_record_list.append(record_dict.copy()) record_dict.clear() record_dict[key] = value num = num + 1 all_record_list.append(record_dict) return all_record_list,columns_set def list_convert_df(all_record_list,columns_set): """ 每一個物件的json的dict 轉換為list,並且把缺失的欄位補上。然後轉換為df """ record_list = [] combin_list = [] for record in all_record_list: for column in columns_set: record_list.append(record.get(column,'')) combin_list.append(record_list.copy()) record_list.clear() df = pd.DataFrame(combin_list,columns=columns_set) print ("write over") return df def change_id_to_first(df): """ 把每一個小的json的id轉換為df之後,調到最前頭。 """ KEY_ID_NAME = 'hits.hits._id' df_id = df[KEY_ID_NAME] df = df.drop(KEY_ID_NAME,axis=1) df.insert(0,KEY_ID_NAME,df_id) return df if __name__ == "__main__": read_file_name = 'file/esdata20181030.txt' write_file_name = 'file/wirte20181030.csv' all_record_list,columns_set = get_all_record_list(read_file_name) df = list_convert_df(all_record_list,columns_set) df = change_id_to_first(df) df.to_excel('file/excel_to_python.xlsx', sheet_name='mysheet') #df.to_csv('file/excel_to_python.csv',index=False)