1. 程式人生 > >讀取CSV檔案內容-筆記

讀取CSV檔案內容-筆記

1、每一次只能讀取同一種資料型別,不能讀取字串。
2、第次讀取會返回一個 CSV資料結構,有源資料和二維陣列,行列數資訊
3、可以轉換二維陣列,但總大小不能變

這裡寫圖片描述

123.csv

Month,"CO2 (ppm) mauna loa, 1965-1980",,,CO2 (ppm) mauna loa
Jan-65,319.32,1,,,
Feb-65,320.36,2,,,
Mar-65,320.82,3,,,
Apr-65,322.06,4,,,
May-65,322.17,5,,,
Jun-65,321.95,6,,,

標頭檔案 read_csv_data.h


#include <stdio.h>
#include <memory.h> // for memset #include <stdlib.h> // for malloc, free #include <string.h> // for strtok //fgets函式讀取的最大位元組數 #define MAX_LINE_SIZE 1024 //字串分割結構 struct str_split { int count; char *str; char **str_array; }; //可變資料型別 typedef union dtype_u{ long int s32; char
s8; short int s16; unsigned char u8; unsigned short int u16; unsigned long int u32; float f32; double f64; } Dtype; //CSV檔案結構 struct csv_s{ Dtype **darray; //二維陣列 int drow; int dcol; int dnum; //總有多少個 Dtype data[]; }; enum{ S8, S16, S32, U8, U16, U32, F32, F64, DEF = S32 }; /* 釋放CSV檔案結構,使用方法,檢視testFile() */
void FreeCsvData(struct csv_s **csv_ptr); /* 對CSV資料進行二維轉換,生成新的二維資料。使用方法,檢視testFile() */ int setNewArray( struct csv_s *tt, int row, int col); /* 讀取CSV檔案資料,生成CSV結構。使用方法,檢視testFile() csvFilePath:檔案路徑 delimiter: 分割符 skiprows: 跳過頭N行 ColumnList:選把指定列,從0列開始, dclass: 資料型別 */ struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass);

原始碼 read_csv_data.c

#include "read_csv_data.h"



int str_split_func(struct str_split *split, char * src, char delimiter)
{
    int count = 0;
    char *pchar, **ptr;

    if ( NULL != split ) {
        memset(split, 0, sizeof(struct str_split));
    }

    if(NULL == split || NULL == src || src[0] == '\0')
    {
        return 0;
    }

    split->str = strdup(src);
    if(NULL == split->str)
    {
        return 0;
    }
    count = 1;
    pchar = src;
    while('\0' != *pchar)
    {
        if (delimiter == *pchar)
        {
            count++;
        }
        pchar++;
    }
    split->str_array = (char **)malloc(count*sizeof(char*));
    if(NULL == split->str_array)
    {
        return 0;
    }
    split->count = count;

    ptr = split->str_array;
    *ptr = split->str;
    pchar = split->str;
    while('\0' != *pchar && count > 1)
    {
        if (delimiter == *pchar)
        {
            ptr++;
            *ptr = pchar+1;
            *pchar = '\0';
            count--;
        }
        pchar++;
    }
    return 0;
}
int str_split_free(struct str_split *split)
{
    if(split == NULL)
    {
        return 0;
    }
    if(split->str!=NULL)
    {
        free(split->str);
        split->str=NULL;
    }
    if(split->str_array != NULL)
    {
        free(split->str_array);
        split->str_array=NULL;
    }
    return 0;
}


//計算csv檔案中的總大小(位元組)
int GetTotalSize(FILE * fp)
{   

    if(fp==NULL){
        return -1;
    }
    fseek(fp,0,SEEK_END);
    return ftell(fp);
}

//計算csv檔案中的總行數
int GetTotalLineCount(FILE * fp)
{   
    int i = 0;
    char strLine[MAX_LINE_SIZE];
    fseek(fp,0,SEEK_SET);
    while (fgets(strLine, MAX_LINE_SIZE, fp))
        i++;
    fseek(fp,0,SEEK_SET);
    return i;
}
//計算csv檔案中的總列數(以第一行的列數為基準)
int GetTotalColCount(FILE * fp, char delimiter)
{
    int i = 0;
    char strLine[MAX_LINE_SIZE];
    struct str_split tss;
    fseek(fp,0,SEEK_SET);
    char *str=NULL;
    str = fgets(strLine, MAX_LINE_SIZE, fp);
    if (str)
    {
        str_split_func(&tss, strLine, delimiter);
        i = tss.count;
        str_split_free(&tss);
    }

    return i;
}
// 通過指標*giCsvData給資料動態分配記憶體空間
int strToData(Dtype *d, char *str, int dtpye)
{
    switch(dtpye){
        case S8:
            *((char *)d) =(char) strtol(str,NULL,10);
            break;
        case S16:
            *((short int *)d) =(short int) strtol(str,NULL,10);
            break;
        case S32:
            *((long int *)d) =(long int) strtol(str,NULL,10);
            break;
        case U8:
            *((unsigned char *)d) =(unsigned char ) strtol(str,NULL,10);
            break;
        case U16:
            *((unsigned short int *)d) =(unsigned short int) strtol(str,NULL,10);
            break;
        case U32:
            *((unsigned long int *)d) =(unsigned long int) strtol(str,NULL,10);
            break;
        case F32:
            *((float *)d) =(float) strtod(str,NULL);
            break;
        case F64:
            *((double *)d) =(double) strtod(str,NULL);
            break;
        default:
            printf("讀取資料型別不對\n");
            return -1;
    }

    return 0;
}

// 對CSV結構中的二維陣列進行轉換
int setNewArray( struct csv_s *tt, int row, int col)
{
    int i;
    //CSV結構不NULL
    if(tt == NULL)return -1;
    //新的行列總個數,與舊的資料個數不等
    if(row * col != tt->dnum)return -2;
    Dtype **darray=(Dtype **)malloc( sizeof(Dtype *[row]) );
    //分配記憶體空間失敗,出錯的可能性不大
    if(!darray)return -3;

    if(tt->darray)free(tt->darray);
    tt->drow = row;
    tt->dcol = col;
    tt->dnum = row * col;
    tt->darray = darray;

    for(i=0; i<row; i++){
        tt->darray[i]=(Dtype *)(tt->data+i*col);
    }
    return 0;
}

// 釋放動態資料記憶體
void FreeCsvData(struct csv_s **csv_ptr)
{
    if(*csv_ptr){
        if((*csv_ptr)->darray)
            free((*csv_ptr)->darray);
        free(*csv_ptr);
    }
    *csv_ptr=NULL;
    return ;
}

// 從csv檔案中讀取資料
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass)
{
    /*
        delimiter:分割符
        dtype:資料型別,多個數據型別使用逗號分割,從第一列到最後一列開始一一對應。預設其他的是float 型別。
            例:有5列 dtype=“str, str, str” 則前三列為string型別,後兩列為float型別
        skiprows:跳過開頭N行
        ColumnList:取N列資料。
            例:"2,1,3",取第 2,1,3列資料返回
    */


    FILE* fCsv=NULL;
    int rowTotal=0; 
    int colTotal=0;
    struct str_split tss;

    struct csv_s *temp_csv=NULL;
    int cur_ptr=0;

    char strLine[MAX_LINE_SIZE];
    int i;
    int j;

    //獲取指定列
    int t_column_list=0;
    int *t_column_list_ptr=NULL;


    if(access(csvFilePath, 0)<0){
        printf("%s 檔案不存在\n", csvFilePath);
        goto label_error;
    }


    // 開啟檔案
    fCsv = fopen( csvFilePath, "r" );
    if( fCsv==NULL )
    {
        printf("open file %s failed",csvFilePath);
        goto label_error;
    }

    rowTotal = GetTotalLineCount(fCsv);
    colTotal = GetTotalColCount(fCsv, delimiter);
    //獲取指定列
    if(ColumnList==NULL){
        t_column_list=colTotal;
        t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
        for(i=0;i<tss.count;i++){
            t_column_list_ptr[i]=i;
        }
    }else{
        str_split_func(&tss, ColumnList, ',');
        t_column_list = tss.count;
        t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
        for(i=0;i<tss.count;i++){
            t_column_list_ptr[i]=strtol(tss.str_array[i], NULL, 10);
        }
        str_split_free(&tss);
        //檢查引數ColumnList
        if (t_column_list>colTotal){
            printf("[%s] 超過最大列數 %d\n", ColumnList, colTotal);
            goto label_error;
        }
        for(i=0; i<t_column_list; i++){
            if(t_column_list_ptr[i] > colTotal){
                printf("[%s] 超過最大列數 %d\n", ColumnList, colTotal);
                goto label_error;
            }
        }

    }

    //檢查是否超過,最大列數
    if(rowTotal <1 || colTotal <1 ||  skiprows>rowTotal){
        printf("資料不對,有%d行,%d列\n", rowTotal, colTotal);
        goto label_error;
    }
    //分配內在空間
    rowTotal = rowTotal - skiprows;
    temp_csv = (struct csv_s *)malloc(sizeof(struct csv_s) + (rowTotal * t_column_list)*sizeof(Dtype));

    if(!temp_csv){
        printf("分配記憶體失敗 \n");
        goto label_error;
    }
    // 讀取資料
    i = skiprows;
    fseek(fCsv, 0, SEEK_SET);
    //跳過開頭N行
    while(i>0 && fgets(strLine,MAX_LINE_SIZE,fCsv) )i--;

    i = rowTotal;
    while( i>=0 && fgets(strLine,MAX_LINE_SIZE,fCsv)){

        //printf("%d %s\n", i, strLine);
        i--;
        str_split_func(&tss, strLine, delimiter);
        if(t_column_list > tss.count){
            printf("read error\n");
            goto label_error;
        }
        int t_res = 0;
        for(j=0; j<t_column_list; j++){
            t_res = strToData(temp_csv->data+cur_ptr, tss.str_array[t_column_list_ptr[j]], dclass);
            if(t_res < 0){
                printf("str to data error\n");
                goto label_error;               
            }
            cur_ptr++;
        }

        str_split_free(&tss);

    }
    temp_csv->drow = rowTotal;
    temp_csv->dcol = t_column_list;
    temp_csv->dnum = rowTotal * t_column_list;
    //printf("%d %d %d\n", i, rowTotal, t_column_list);
    //for(i=0;i<temp_csv->dnum; i++)
    //  printf("%d ", temp_csv->data[i].s32);
    //temp_csv->dtype = dtype;
    temp_csv->darray=NULL;
    setNewArray(temp_csv,temp_csv->drow, temp_csv->dcol);

    // 關閉檔案
    fclose(fCsv);
    free(t_column_list_ptr);
    return temp_csv;

label_error:
    if(fCsv)fclose(fCsv);
    if(temp_csv)free(temp_csv);
    if(t_column_list_ptr)free(t_column_list_ptr);
    return NULL;

}

int testData()
{
    int size=1000;
    //分配記憶體空間 
    struct csv_s *stdata=(struct csv_s *)malloc(sizeof(struct csv_s) + size*sizeof(Dtype));
    int i;
    //初始化資料 
    for(i=900; i<size; i++){
//      *((long int *)(stdata->data+i)) = i;
        *((double *)(stdata->data+i)) = i*1.0;
    }
    //隨機測試 初始化資料
    *((double *)(stdata->data+1)) = 10*1.0;
    printf("%f asdfasfasdf\n", *((double *)(stdata->data+1)));

    //初始化結構 
    int row = 500;
    int col = 2;
    stdata->drow= row;
    stdata->dcol = col;
    //stdata->dtype = sizeof(Dtype);
    stdata->dnum = row * col;
    int j=0;
    j = setNewArray(stdata,row, col);
    if(j<0){
        printf("setNewArray %d error\n", j);
        return 0;
    }

    //顯示結構資料 
    printf("顯示定義結構最後一行資料 \n");
    for(j=0; j<col; j++){
        printf("%f\n", stdata->darray[row-1][j].f64);           
    }


    //結構轉換
    row = 100;
    col = 10;
    j = setNewArray(stdata,row, col);
    if(j<0){
        printf("setNewArray 轉換  %d error\n", j);
        return 0;
    }
    printf("顯示結構轉換最後一行資料 \n");
    //顯示結構資料 
    for(j=0; j<col; j++){
        printf("%f\n", stdata->darray[row-1][j].f64);           
    }

    return 0;

}

//測試浮點數
void testFile()
{

    struct csv_s *stdata;
    //stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32);
    stdata = ReadCsvData("123.csv", ',', 1, "2,1", F32);
    printf("testFile row %d  col %d\n", stdata->drow, stdata->dcol);
    int i, j;

    //單獨以Dtype顯示資料,是一個二維陣列,以stdata->drow, stdata->dcol為行,列:Dtype show[stdata->drow][stdata->dcol]
    Dtype ** show = stdata->darray;
    printf("顯示最後一行資料 \n");
    for(j=0; j<stdata->dcol; j++){
        printf("%f\n", show[stdata->drow-1][j].f32);
    }   

    //結構轉換
    int row = 3;
    int col = 4;
    j = setNewArray(stdata,row, col);
    if(j<0){
        printf("setNewArray 轉換 %d error\n", j);
    }else{
        //顯示結構資料 
        printf("顯示結構轉換最後一行資料 \n");
        for(j=0; j<col; j++){
            printf("%f\n", stdata->darray[row-1][j].f32);   
        }
    }

    printf("轉換失敗測試\n");
    //結構轉換
    row = 3;
    col = 2;
    j = setNewArray(stdata,row, col);
    if(j<0){
        printf("setNewArray 轉換 %d error\n", j);
    }   

    //釋放記憶體 
    FreeCsvData(&stdata);
    if(stdata==NULL)
        printf("ok t is null\n");
    else
        printf("ok t is not null\n");   
}

//測試整數型別
void testFile2()
{

    struct csv_s *stdata;
    //stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32);
    stdata = ReadCsvData("123.csv", ',', 1, "2,1", DEF);
    printf("testFile row %d  col %d\n", stdata->drow, stdata->dcol);
    int i, j;

    //單獨以Dtype顯示資料,是一個二維陣列,以stdata->drow, stdata->dcol為行,列:Dtype show[stdata->drow][stdata->dcol]

    //注意:這個預設long int 資料型別
    Dtype ** show = stdata->darray;
    printf("顯示最後一行資料 \n");
    for(j=0; j<stdata->dcol; j++){
        printf("%d\n", show[stdata->drow-1][j]);
    }   

    //結構轉換
    int row = 3;
    int col = 4;
    j = setNewArray(stdata,row, col);
    if(j<0){
        printf("setNewArray 轉換 %d error\n", j);
    }else{
        //顯示結構資料 
        printf("顯示結構轉換最後一行資料 \n");
        for(j=0; j<col; j++){
            printf("%d\n", stdata->darray[row-1][j]);   
        }
    }

    printf("轉換失敗測試\n");
    //結構轉換
    row = 3;
    col = 2;
    j = setNewArray(stdata,row, col);
    if(j<0){
        printf("setNewArray 轉換 %d error\n", j);
    }   

    //釋放記憶體 
    FreeCsvData(&stdata);
    if(stdata==NULL)
        printf("ok t is null\n");
    else
        printf("ok t is not null\n");   
}

int main(int args, char *argv)
{
//  testData();
    testFile();
    testFile2();
    return 0;
}