1. 程式人生 > >60.大數據創建索引,並實現大文件的二分查找,遷移實現分層

60.大數據創建索引,並實現大文件的二分查找,遷移實現分層

sizeof post alloc can sys 數據 define sprint !=

  • index.h
     1 #define  _CRT_SECURE_NO_WARNINGS
     2 #include<stdio.h>
     3 #include<stdlib.h>
     4 #define N 10000000
     5 
     6 struct index
     7 {
     8     int *pindex;
     9     int length;
    10 };
    11 
    12 char **g_pp;//保存指針數組
    13 char filepath[256];
    14 char sortpath[256];
    15 char indexpath[256];
    16 struct index allindex;//
    索引 17 18 int getN();//函數聲明 19 void eatg(char *str); 20 void eatN(char *str);

  • index.c
     1 #include"index.h"
     2 
     3 char **g_pp = NULL;//保存指針數組
     4 char filepath[256] = { 0 };
     5 char sortpath[256] = { 0 };
     6 char indexpath[256] = { 0 };
     7 struct index allindex = { 0 };//索引
     8 
     9 int getN()
    10 {
    11     FILE *pf = fopen("
    file.txt", "r"); 12 if (pf == NULL) 13 { 14 return -1; 15 } 16 else 17 { 18 int i = 0; 19 while (!feof(pf)) 20 { 21 char str[50] = { 0 }; 22 fgets(str, 50, pf);//讀取 23 i++; 24 } 25 fclose(pf); 26 return
    i; 27 28 } 29 } 30 void eatg(char *str) 31 { 32 while (*str != \0) 33 { 34 35 if (*str == -) 36 { 37 *str = \0; 38 } 39 str++; 40 } 41 42 } 43 void eatN(char *str) 44 { 45 while (*str != \0) 46 { 47 if (*str == \r || *str == \n) 48 { 49 *str = \0; 50 } 51 52 str++; 53 } 54 55 }

  • createsort.h
    1 #include "index.h"
    2 
    3 void initmem();
    4 int com(void *p1, void*p2);
    5 void sort();
    6 void show();
    7 void writetofile();

  • createsort.cp
     1 #include "createsort.h"
     2 void initmem()
     3 {
     4     g_pp = calloc(N, sizeof(char*));//分配指針數組
     5     FILE *pf = fopen(filepath, "r");
     6     if (pf == NULL)
     7     {
     8         return -1;
     9     }
    10     else
    11     {
    12         for (int i = 0; i < N; i++)
    13         {
    14             char str[50] = { 0 };
    15             fgets(str, 50, pf);//讀取
    16             g_pp[i] = calloc(strlen(str) + 1, sizeof(char));//分配
    17             if (g_pp[i]!=NULL)
    18             {
    19                 //sprintf(g_pp[i], str);//打印進去
    20                 strcpy(g_pp[i], str);
    21                 eatN(g_pp[i]);
    22             }
    23             
    24             //printf("%s", g_pp[i]);//顯示測試
    25 
    26 
    27         }
    28 
    29 
    30         fclose(pf);
    31 
    32 
    33     }
    34 
    35 
    36 
    37 
    38 
    39 
    40 }
    41 
    42 int com(void *p1, void*p2)
    43 {
    44     char **pp1 = p1;
    45     char **pp2 = p2;
    46 
    47     return strcmp(*pp1, *pp2);
    48 
    49 }
    50 
    51 void sort()
    52 {
    53     qsort(g_pp, N, sizeof(char*), com);
    54 
    55 
    56 }
    57 void show()
    58 {
    59     printf("\n此時狀態\n");
    60     for (int i = 0; i < N; i++)
    61     {
    62         printf("\n%s", g_pp[i]);
    63     }
    64 }
    65 void writetofile()
    66 {
    67     FILE *pf = fopen(sortpath, "w");
    68     for (int i = 0; i < N; i++)
    69     {
    70         char temp[100] = { 0 };
    71     //    printf("\n%s", g_pp[i]);
    72         sprintf(temp, "%s\n", g_pp[i]);
    73     //    printf("\n%s", temp);
    74         fputs(temp, pf);
    75     }
    76 
    77     fclose(pf);
    78 }

  • createindex.h
    1 #include "index.h"
    2 void init();
    3 void qucik();

  • createindex.c
     1 #include "createindex.h"
     2 
     3 
     4 void init()
     5 {
     6     printf("\n索引數組開始分配");
     7     allindex.length = N;
     8     allindex.pindex = calloc(N, sizeof(int));//分配內存
     9     printf("\n索引數組完成分配");
    10 
    11     printf("\n開始讀取");
    12     FILE *pf = fopen(sortpath, "rb");//\r\n->\n
    13     if (pf == NULL)
    14     {
    15         return -1;
    16     }
    17     else
    18     {
    19         int alllength = 0;
    20         for (int i = 0; i < N; i++)
    21         {
    22             char str[50] = { 0 };
    23             fgets(str, 50, pf);
    24             allindex.pindex[i] = alllength;//錯位從0開始
    25 
    26             int length = strlen(str);
    27             alllength += length;
    28 
    29         }
    30 
    31         fclose(pf);
    32     }
    33     printf("\n結束讀取");
    34 
    35     printf("\n開始寫入");
    36     FILE *pfw = fopen(indexpath, "wb");//寫入索引
    37     fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
    38     fclose(pfw);//關閉
    39     printf("\n結束寫入");
    40 
    41 
    42     free(allindex.pindex);
    43 
    44 }
    45 void qucik()
    46 {
    47     printf("\n索引數組開始分配");
    48     allindex.length = N;
    49     allindex.pindex = calloc(N, sizeof(int));//分配內存
    50     printf("\n索引數組完成分配");
    51 
    52     printf("\n開始讀取");
    53     FILE *pfw = fopen("index.txt", "rb");//寫入索引
    54     fread(allindex.pindex, sizeof(int), allindex.length, pfw);
    55     fclose(pfw);//關閉
    56     printf("\n結束讀取");
    57 }

  • binsearch.h
    1 #include "index.h"
    2 void binsearch(char *searchstr);

  • binsearch.c
     1 #include "binsearch.h"
     2 
     3 void binsearch(char *searchstr)
     4 {
     5     int tou = 0;
     6     int wei = N - 1;
     7     int flag = 0;
     8     while (tou <= wei)
     9     {
    10         int zhong = (tou + wei) / 2;
    11         char zhongstr[256] = { 0 };
    12         {
    13             FILE *pf1 = fopen(indexpath, "rb");
    14             FILE *pf2 = fopen(sortpath, "rb");
    15 
    16 
    17             int indexnum = 0;
    18             fseek(pf1, zhong*sizeof(int), SEEK_SET);
    19             fread(&indexnum, sizeof(int), 1, pf1);//讀索引zhong到indexnum
    20 
    21             fseek(pf2, indexnum, SEEK_SET);
    22             fgets(zhongstr, 128, pf2);//讀取
    23 
    24             fclose(pf1);
    25             fclose(pf2);
    26         }
    27         eatN(zhongstr);
    28         char pnewzhongstr[256] = { 0 };
    29         sprintf(pnewzhongstr, zhongstr);
    30         eatg(pnewzhongstr);//遇到-終止
    31         int res = strcmp(pnewzhongstr, searchstr);//1 0  -1
    32 
    33 
    34         if (res == 0)
    35         {
    36             flag = 1;
    37             printf("%s", zhongstr);
    38             break;
    39         }
    40         else if (res == 1)
    41         {
    42             wei = zhong - 1;
    43         }
    44         else
    45         {
    46             tou = zhong + 1;
    47         }
    48 
    49 
    50     }
    51 
    52 
    53     if (flag)
    54     {
    55         printf("\nfind");
    56     }
    57     else
    58     {
    59         printf("\n not find");
    60     }
    61 
    62 
    63 }

  • main.c
     1 #include "binsearch.h"
     2 void initall()
     3 {
     4     strcpy(filepath, "1E~001OK.txt");
     5     strcpy(sortpath, "1E~001sort.txt");
     6     strcpy(indexpath, "1E~001index.txt");
     7 
     8 }
     9 
    10 void main()
    11 {
    12     initall();
    13     //初始化內存
    14     initmem();
    15     //排序
    16     sort();
    17     //寫入文件
    18     writetofile();
    19 
    20     //初始化索引
    21     init();
    22     
    23     //二分查找
    24     while (1)
    25     {
    26         char str[256] = { 0 };
    27         scanf("%s", str);
    28         binsearch(str);
    29     }
    30     system("pause");
    31 
    32 }

60.大數據創建索引,並實現大文件的二分查找,遷移實現分層