讀取CSV檔案內容-筆記
阿新 • • 發佈:2019-02-06
1、每一次只能讀取同一種資料型別,不能讀取字串。
2、第次讀取會返回一個 CSV資料結構,有源資料和二維陣列,行列數資訊
3、可以轉換二維陣列,但總大小不能變
123.csv
Month,"CO2 (ppm) mauna loa, 1965-1980",,,CO2 (ppm) mauna loa
Jan-65,319.32,1,,,
Feb-65,320.36,2,,,
Mar-65,320.82,3,,,
Apr-65,322.06,4,,,
May-65,322.17,5,,,
Jun-65,321.95,6,,,
標頭檔案 read_csv_data.h
#include <stdio.h>
#include <memory.h> // for memset
#include <stdlib.h> // for malloc, free
#include <string.h> // for strtok
//fgets函式讀取的最大位元組數
#define MAX_LINE_SIZE 1024
//字串分割結構
struct str_split
{
int count;
char *str;
char **str_array;
};
//可變資料型別
typedef union dtype_u{
long int s32;
char s8;
short int s16;
unsigned char u8;
unsigned short int u16;
unsigned long int u32;
float f32;
double f64;
} Dtype;
//CSV檔案結構
struct csv_s{
Dtype **darray; //二維陣列
int drow;
int dcol;
int dnum; //總有多少個
Dtype data[];
};
enum{
S8,
S16,
S32,
U8,
U16,
U32,
F32,
F64,
DEF = S32
};
/*
釋放CSV檔案結構,使用方法,檢視testFile()
*/
void FreeCsvData(struct csv_s **csv_ptr);
/*
對CSV資料進行二維轉換,生成新的二維資料。使用方法,檢視testFile()
*/
int setNewArray( struct csv_s *tt, int row, int col);
/*
讀取CSV檔案資料,生成CSV結構。使用方法,檢視testFile()
csvFilePath:檔案路徑
delimiter: 分割符
skiprows: 跳過頭N行
ColumnList:選把指定列,從0列開始,
dclass: 資料型別
*/
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass);
原始碼 read_csv_data.c
#include "read_csv_data.h"
int str_split_func(struct str_split *split, char * src, char delimiter)
{
int count = 0;
char *pchar, **ptr;
if ( NULL != split ) {
memset(split, 0, sizeof(struct str_split));
}
if(NULL == split || NULL == src || src[0] == '\0')
{
return 0;
}
split->str = strdup(src);
if(NULL == split->str)
{
return 0;
}
count = 1;
pchar = src;
while('\0' != *pchar)
{
if (delimiter == *pchar)
{
count++;
}
pchar++;
}
split->str_array = (char **)malloc(count*sizeof(char*));
if(NULL == split->str_array)
{
return 0;
}
split->count = count;
ptr = split->str_array;
*ptr = split->str;
pchar = split->str;
while('\0' != *pchar && count > 1)
{
if (delimiter == *pchar)
{
ptr++;
*ptr = pchar+1;
*pchar = '\0';
count--;
}
pchar++;
}
return 0;
}
int str_split_free(struct str_split *split)
{
if(split == NULL)
{
return 0;
}
if(split->str!=NULL)
{
free(split->str);
split->str=NULL;
}
if(split->str_array != NULL)
{
free(split->str_array);
split->str_array=NULL;
}
return 0;
}
//計算csv檔案中的總大小(位元組)
int GetTotalSize(FILE * fp)
{
if(fp==NULL){
return -1;
}
fseek(fp,0,SEEK_END);
return ftell(fp);
}
//計算csv檔案中的總行數
int GetTotalLineCount(FILE * fp)
{
int i = 0;
char strLine[MAX_LINE_SIZE];
fseek(fp,0,SEEK_SET);
while (fgets(strLine, MAX_LINE_SIZE, fp))
i++;
fseek(fp,0,SEEK_SET);
return i;
}
//計算csv檔案中的總列數(以第一行的列數為基準)
int GetTotalColCount(FILE * fp, char delimiter)
{
int i = 0;
char strLine[MAX_LINE_SIZE];
struct str_split tss;
fseek(fp,0,SEEK_SET);
char *str=NULL;
str = fgets(strLine, MAX_LINE_SIZE, fp);
if (str)
{
str_split_func(&tss, strLine, delimiter);
i = tss.count;
str_split_free(&tss);
}
return i;
}
// 通過指標*giCsvData給資料動態分配記憶體空間
int strToData(Dtype *d, char *str, int dtpye)
{
switch(dtpye){
case S8:
*((char *)d) =(char) strtol(str,NULL,10);
break;
case S16:
*((short int *)d) =(short int) strtol(str,NULL,10);
break;
case S32:
*((long int *)d) =(long int) strtol(str,NULL,10);
break;
case U8:
*((unsigned char *)d) =(unsigned char ) strtol(str,NULL,10);
break;
case U16:
*((unsigned short int *)d) =(unsigned short int) strtol(str,NULL,10);
break;
case U32:
*((unsigned long int *)d) =(unsigned long int) strtol(str,NULL,10);
break;
case F32:
*((float *)d) =(float) strtod(str,NULL);
break;
case F64:
*((double *)d) =(double) strtod(str,NULL);
break;
default:
printf("讀取資料型別不對\n");
return -1;
}
return 0;
}
// 對CSV結構中的二維陣列進行轉換
int setNewArray( struct csv_s *tt, int row, int col)
{
int i;
//CSV結構不NULL
if(tt == NULL)return -1;
//新的行列總個數,與舊的資料個數不等
if(row * col != tt->dnum)return -2;
Dtype **darray=(Dtype **)malloc( sizeof(Dtype *[row]) );
//分配記憶體空間失敗,出錯的可能性不大
if(!darray)return -3;
if(tt->darray)free(tt->darray);
tt->drow = row;
tt->dcol = col;
tt->dnum = row * col;
tt->darray = darray;
for(i=0; i<row; i++){
tt->darray[i]=(Dtype *)(tt->data+i*col);
}
return 0;
}
// 釋放動態資料記憶體
void FreeCsvData(struct csv_s **csv_ptr)
{
if(*csv_ptr){
if((*csv_ptr)->darray)
free((*csv_ptr)->darray);
free(*csv_ptr);
}
*csv_ptr=NULL;
return ;
}
// 從csv檔案中讀取資料
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass)
{
/*
delimiter:分割符
dtype:資料型別,多個數據型別使用逗號分割,從第一列到最後一列開始一一對應。預設其他的是float 型別。
例:有5列 dtype=“str, str, str” 則前三列為string型別,後兩列為float型別
skiprows:跳過開頭N行
ColumnList:取N列資料。
例:"2,1,3",取第 2,1,3列資料返回
*/
FILE* fCsv=NULL;
int rowTotal=0;
int colTotal=0;
struct str_split tss;
struct csv_s *temp_csv=NULL;
int cur_ptr=0;
char strLine[MAX_LINE_SIZE];
int i;
int j;
//獲取指定列
int t_column_list=0;
int *t_column_list_ptr=NULL;
if(access(csvFilePath, 0)<0){
printf("%s 檔案不存在\n", csvFilePath);
goto label_error;
}
// 開啟檔案
fCsv = fopen( csvFilePath, "r" );
if( fCsv==NULL )
{
printf("open file %s failed",csvFilePath);
goto label_error;
}
rowTotal = GetTotalLineCount(fCsv);
colTotal = GetTotalColCount(fCsv, delimiter);
//獲取指定列
if(ColumnList==NULL){
t_column_list=colTotal;
t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
for(i=0;i<tss.count;i++){
t_column_list_ptr[i]=i;
}
}else{
str_split_func(&tss, ColumnList, ',');
t_column_list = tss.count;
t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
for(i=0;i<tss.count;i++){
t_column_list_ptr[i]=strtol(tss.str_array[i], NULL, 10);
}
str_split_free(&tss);
//檢查引數ColumnList
if (t_column_list>colTotal){
printf("[%s] 超過最大列數 %d\n", ColumnList, colTotal);
goto label_error;
}
for(i=0; i<t_column_list; i++){
if(t_column_list_ptr[i] > colTotal){
printf("[%s] 超過最大列數 %d\n", ColumnList, colTotal);
goto label_error;
}
}
}
//檢查是否超過,最大列數
if(rowTotal <1 || colTotal <1 || skiprows>rowTotal){
printf("資料不對,有%d行,%d列\n", rowTotal, colTotal);
goto label_error;
}
//分配內在空間
rowTotal = rowTotal - skiprows;
temp_csv = (struct csv_s *)malloc(sizeof(struct csv_s) + (rowTotal * t_column_list)*sizeof(Dtype));
if(!temp_csv){
printf("分配記憶體失敗 \n");
goto label_error;
}
// 讀取資料
i = skiprows;
fseek(fCsv, 0, SEEK_SET);
//跳過開頭N行
while(i>0 && fgets(strLine,MAX_LINE_SIZE,fCsv) )i--;
i = rowTotal;
while( i>=0 && fgets(strLine,MAX_LINE_SIZE,fCsv)){
//printf("%d %s\n", i, strLine);
i--;
str_split_func(&tss, strLine, delimiter);
if(t_column_list > tss.count){
printf("read error\n");
goto label_error;
}
int t_res = 0;
for(j=0; j<t_column_list; j++){
t_res = strToData(temp_csv->data+cur_ptr, tss.str_array[t_column_list_ptr[j]], dclass);
if(t_res < 0){
printf("str to data error\n");
goto label_error;
}
cur_ptr++;
}
str_split_free(&tss);
}
temp_csv->drow = rowTotal;
temp_csv->dcol = t_column_list;
temp_csv->dnum = rowTotal * t_column_list;
//printf("%d %d %d\n", i, rowTotal, t_column_list);
//for(i=0;i<temp_csv->dnum; i++)
// printf("%d ", temp_csv->data[i].s32);
//temp_csv->dtype = dtype;
temp_csv->darray=NULL;
setNewArray(temp_csv,temp_csv->drow, temp_csv->dcol);
// 關閉檔案
fclose(fCsv);
free(t_column_list_ptr);
return temp_csv;
label_error:
if(fCsv)fclose(fCsv);
if(temp_csv)free(temp_csv);
if(t_column_list_ptr)free(t_column_list_ptr);
return NULL;
}
int testData()
{
int size=1000;
//分配記憶體空間
struct csv_s *stdata=(struct csv_s *)malloc(sizeof(struct csv_s) + size*sizeof(Dtype));
int i;
//初始化資料
for(i=900; i<size; i++){
// *((long int *)(stdata->data+i)) = i;
*((double *)(stdata->data+i)) = i*1.0;
}
//隨機測試 初始化資料
*((double *)(stdata->data+1)) = 10*1.0;
printf("%f asdfasfasdf\n", *((double *)(stdata->data+1)));
//初始化結構
int row = 500;
int col = 2;
stdata->drow= row;
stdata->dcol = col;
//stdata->dtype = sizeof(Dtype);
stdata->dnum = row * col;
int j=0;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray %d error\n", j);
return 0;
}
//顯示結構資料
printf("顯示定義結構最後一行資料 \n");
for(j=0; j<col; j++){
printf("%f\n", stdata->darray[row-1][j].f64);
}
//結構轉換
row = 100;
col = 10;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 轉換 %d error\n", j);
return 0;
}
printf("顯示結構轉換最後一行資料 \n");
//顯示結構資料
for(j=0; j<col; j++){
printf("%f\n", stdata->darray[row-1][j].f64);
}
return 0;
}
//測試浮點數
void testFile()
{
struct csv_s *stdata;
//stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32);
stdata = ReadCsvData("123.csv", ',', 1, "2,1", F32);
printf("testFile row %d col %d\n", stdata->drow, stdata->dcol);
int i, j;
//單獨以Dtype顯示資料,是一個二維陣列,以stdata->drow, stdata->dcol為行,列:Dtype show[stdata->drow][stdata->dcol]
Dtype ** show = stdata->darray;
printf("顯示最後一行資料 \n");
for(j=0; j<stdata->dcol; j++){
printf("%f\n", show[stdata->drow-1][j].f32);
}
//結構轉換
int row = 3;
int col = 4;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 轉換 %d error\n", j);
}else{
//顯示結構資料
printf("顯示結構轉換最後一行資料 \n");
for(j=0; j<col; j++){
printf("%f\n", stdata->darray[row-1][j].f32);
}
}
printf("轉換失敗測試\n");
//結構轉換
row = 3;
col = 2;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 轉換 %d error\n", j);
}
//釋放記憶體
FreeCsvData(&stdata);
if(stdata==NULL)
printf("ok t is null\n");
else
printf("ok t is not null\n");
}
//測試整數型別
void testFile2()
{
struct csv_s *stdata;
//stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32);
stdata = ReadCsvData("123.csv", ',', 1, "2,1", DEF);
printf("testFile row %d col %d\n", stdata->drow, stdata->dcol);
int i, j;
//單獨以Dtype顯示資料,是一個二維陣列,以stdata->drow, stdata->dcol為行,列:Dtype show[stdata->drow][stdata->dcol]
//注意:這個預設long int 資料型別
Dtype ** show = stdata->darray;
printf("顯示最後一行資料 \n");
for(j=0; j<stdata->dcol; j++){
printf("%d\n", show[stdata->drow-1][j]);
}
//結構轉換
int row = 3;
int col = 4;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 轉換 %d error\n", j);
}else{
//顯示結構資料
printf("顯示結構轉換最後一行資料 \n");
for(j=0; j<col; j++){
printf("%d\n", stdata->darray[row-1][j]);
}
}
printf("轉換失敗測試\n");
//結構轉換
row = 3;
col = 2;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 轉換 %d error\n", j);
}
//釋放記憶體
FreeCsvData(&stdata);
if(stdata==NULL)
printf("ok t is null\n");
else
printf("ok t is not null\n");
}
int main(int args, char *argv)
{
// testData();
testFile();
testFile2();
return 0;
}