1. 程式人生 > 實用技巧 >關鍵資訊讀寫指令碼

關鍵資訊讀寫指令碼

記錄一個小的指令碼

"""
    Function: extract the key info
    Author: dyx
    DateTime: 20200805
"""
import pandas as pd

current_file = r'./data/original.xlsx'
to_save = r'./data/abc.xlsx'


class AnalysisTool():


    def get_info(self, analy_file, save_file):
        df = pd.read_excel(analy_file)
        message 
= df['sms_message'] messageid = df['messageid'] data = message.tolist() messages = [] groups = [] groups_ids = [] groups_name = [] save_types = [] money_nums = [] money_types = [] gusts = [] save_moneys = [] put_datas
= [] remarks_names = [] remarks_groups = [] remarks_money = [] remarks = [] for id, each in zip(messageid, data): messages.append(id) items = each.split('\n\n') if len(items) == 4: items = [items[0], items[1], items[2]+'
\n'+items[3]] first = items[0].split('\n') name_info = first[2].split(' ') if not name_info: name_info = 'None' # print(name_info) save_get_info = first[3] gust_info = first[4].split(': ')[-1] remark_info = first[5] second = items[1].split('\n') save_money = [] for s in second: if '0 萬' not in s and '存結' not in s and '' in s: save_money.append(s) if '0 million(s)' not in s and 'currency balance' not in s and 'million' in s: save_money.append(s) third = items[2].split('\n') for t in third: if '入數日期' in t or 'Input Date' in t : put_date_info = t.split(': ')[1] groups.append(name_info[0]) groups_ids.append(name_info[1]) groups_name.append(name_info[2]) sgi = save_get_info.split(': ')[-1] save_type = sgi.split(' ')[0] save_money_info = sgi.split(save_type)[-1] if '' in save_get_info: smi = save_money_info.split('') money_num = smi[0].strip()+'' money_type = smi[-1].strip() elif 'million' in save_get_info: smi = save_money_info.split('million(s)') money_num = smi[0].strip()+'million(s)' money_type = smi[-1].strip() save_types.append(save_type) money_nums.append(money_num) money_types.append(money_type) gusts.append(gust_info) remarks.append(remark_info) save_moneys.append(save_money) put_datas.append(put_date_info) # remark remarks = self.ner_deal_data(remarks) for each in remarks: if '備註' in each[0] or 'Notes' in each[0]: remarks_names.append(None) remarks_groups.append(None) remarks_money.append(None) else: remarks_names.append(each[0]) remarks_groups.append(each[1]) remarks_money.append(each[2]) # save DateSet = list(zip(messages, groups,groups_ids,groups_name, save_types, money_nums,money_types, gusts, save_moneys, put_datas, remarks_names, remarks_groups, remarks_money, remarks)) df = pd.DataFrame(data=DateSet, columns=['item1','item2','編號','','','金額','單位', '資訊','', '日期','姓名','組名','金額','備註']) try: df.to_excel(save_file) print('OK. analysis result has generate!') except: print('exe is wrong') def ner_deal_data(self, data): all_list= [] for line in data: invertname = "" name = "" split_line =line.split("]") if len(split_line) < 2: all_list.append([line]) continue else: for i in split_line[0][::-1]: if not i.isdigit(): invertname += i else: for j in invertname[::-1]: name += j name.strip() break group1 = split_line[0].split(" ") group = "" for one in group1: if "" in one or "" in one: group = one money = "" for i in split_line[0]: if i.isdigit() or i == ".": money += i else: if i == "" or i == "": if len(money) != 0: money += i break else: money = "" else: money = "" all_list.append([name.strip(), group, money]) return all_list if __name__ == '__main__': at = AnalysisTool() # execute at.get_info(current_file, to_save)