python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案
阿新 • • 發佈:2021-09-13
python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案,本人通過此方法,在短短几分鐘內恢復了三十多萬條資料!!!
python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案
Panadas:是做資料處理。是python的一個資料分析包。
Numpy:是數值計算的擴充套件包,它能高效處理N維陣列,複雜函式,線性代數。
import numpy as np import pandas as pd import csv data_header_list =[ "x", "y", "speed" ] def csv_writer(data_list, data_header_list, file_path): """Write data to CSV """ if not isinstance(data_list, list): raise ValueError("data_list is no list") if not isinstance(data_header_list, list): raise ValueError("data_header_list is no list") head = False with open(file_path, 'a+') as csv_fi: writer = csv.DictWriter(csv_fi, data_header_list) reader = csv.reader(csv_fi) '''判斷是否第一次寫入''' try: reader.next() except StopIteration: head = True if head: writer.writeheader() writer.writerows(data_list) else: writer.writerows(data_list) fin = np.loadtxt("/home/read.csv", dtype=np.str, delimiter=',') # 準備讀取資料的csv檔案 file_path = '/home/write.csv' #準備寫入的csv檔案 data_frame = pd.read_csv("/home/reference.csv") #參考資料的csv檔案 data = fin[1:].tolist() for list1 in data: current_log_list = [] timestamp = int(float(list1[-1])) at_id = list1[1] try: one_data = data_frame.loc[(data_frame['time'] == timestamp) & (data_frame['at_id'] == at_id)] x = float(one_data['x']) y = float(one_data['y']) speed = float(one_data['speed_m_s']) except Exception as e: x = 0 y = 0 speed = 0 test_dict = { "x": x, "y": y, "speed": speed } current_log_list.append(test_dict) csv_writer(current_log_list, data_header_list, file_path) time.sleep(0.001)