1. 程式人生 > 其它 >python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案

python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案

python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案,本人通過此方法,在短短几分鐘內恢復了三十多萬條資料!!!

python 結合 Panadas && Numpy在百萬條資料中取某一條資料並寫入csv檔案

Panadas:是做資料處理。是python的一個資料分析包

Numpy:數值計算的擴充套件包,它能高效處理N維陣列,複雜函式,線性代數。

import numpy as np
import pandas as pd
import csv
data_header_list =[
    "x",
    "y",
    "speed"
]

def csv_writer(data_list, data_header_list, file_path):
    """Write data to CSV
    """
    if not isinstance(data_list, list):
        raise ValueError("data_list is no list")
    if not isinstance(data_header_list, list):
        raise ValueError("data_header_list is no list")

    head = False
    with open(file_path, 'a+') as csv_fi:
        writer = csv.DictWriter(csv_fi, data_header_list)
        reader = csv.reader(csv_fi)

        '''判斷是否第一次寫入'''
        try:
            reader.next()
        except StopIteration:
            head = True
        if head:
            writer.writeheader()
            writer.writerows(data_list)
        else:
            writer.writerows(data_list)
            
fin = np.loadtxt("/home/read.csv", dtype=np.str, delimiter=',') # 準備讀取資料的csv檔案

file_path = '/home/write.csv' #準備寫入的csv檔案


data_frame = pd.read_csv("/home/reference.csv") #參考資料的csv檔案



data = fin[1:].tolist()


for list1 in data:
    current_log_list = []
    timestamp = int(float(list1[-1]))
    at_id = list1[1]
    try:
       one_data = data_frame.loc[(data_frame['time'] == timestamp) & (data_frame['at_id'] == at_id)]
    	x = float(one_data['x'])
	y = float(one_data['y'])
	speed = float(one_data['speed_m_s'])
    except Exception as e:
        x = 0
        y = 0
      	speed = 0
        
    test_dict = {
      "x": x,
      "y": y,
      "speed": speed
      }
    current_log_list.append(test_dict)
    csv_writer(current_log_list, data_header_list, file_path)
    time.sleep(0.001)