60.大數據創建索引,並實現大文件的二分查找,遷移實現分層
阿新 • • 發佈:2018-02-10
sizeof post alloc can sys 數據 define sprint !=
- index.h
1 #define _CRT_SECURE_NO_WARNINGS 2 #include<stdio.h> 3 #include<stdlib.h> 4 #define N 10000000 5 6 struct index 7 { 8 int *pindex; 9 int length; 10 }; 11 12 char **g_pp;//保存指針數組 13 char filepath[256]; 14 char sortpath[256]; 15 char indexpath[256]; 16 struct index allindex;//
- index.c
1 #include"index.h" 2 3 char **g_pp = NULL;//保存指針數組 4 char filepath[256] = { 0 }; 5 char sortpath[256] = { 0 }; 6 char indexpath[256] = { 0 }; 7 struct index allindex = { 0 };//索引 8 9 int getN() 10 { 11 FILE *pf = fopen("
- createsort.h
1 #include "index.h" 2 3 void initmem(); 4 int com(void *p1, void*p2); 5 void sort(); 6 void show(); 7 void writetofile();
- createsort.cp
1 #include "createsort.h" 2 void initmem() 3 { 4 g_pp = calloc(N, sizeof(char*));//分配指針數組 5 FILE *pf = fopen(filepath, "r"); 6 if (pf == NULL) 7 { 8 return -1; 9 } 10 else 11 { 12 for (int i = 0; i < N; i++) 13 { 14 char str[50] = { 0 }; 15 fgets(str, 50, pf);//讀取 16 g_pp[i] = calloc(strlen(str) + 1, sizeof(char));//分配 17 if (g_pp[i]!=NULL) 18 { 19 //sprintf(g_pp[i], str);//打印進去 20 strcpy(g_pp[i], str); 21 eatN(g_pp[i]); 22 } 23 24 //printf("%s", g_pp[i]);//顯示測試 25 26 27 } 28 29 30 fclose(pf); 31 32 33 } 34 35 36 37 38 39 40 } 41 42 int com(void *p1, void*p2) 43 { 44 char **pp1 = p1; 45 char **pp2 = p2; 46 47 return strcmp(*pp1, *pp2); 48 49 } 50 51 void sort() 52 { 53 qsort(g_pp, N, sizeof(char*), com); 54 55 56 } 57 void show() 58 { 59 printf("\n此時狀態\n"); 60 for (int i = 0; i < N; i++) 61 { 62 printf("\n%s", g_pp[i]); 63 } 64 } 65 void writetofile() 66 { 67 FILE *pf = fopen(sortpath, "w"); 68 for (int i = 0; i < N; i++) 69 { 70 char temp[100] = { 0 }; 71 // printf("\n%s", g_pp[i]); 72 sprintf(temp, "%s\n", g_pp[i]); 73 // printf("\n%s", temp); 74 fputs(temp, pf); 75 } 76 77 fclose(pf); 78 }
- createindex.h
1 #include "index.h" 2 void init(); 3 void qucik();
- createindex.c
1 #include "createindex.h" 2 3 4 void init() 5 { 6 printf("\n索引數組開始分配"); 7 allindex.length = N; 8 allindex.pindex = calloc(N, sizeof(int));//分配內存 9 printf("\n索引數組完成分配"); 10 11 printf("\n開始讀取"); 12 FILE *pf = fopen(sortpath, "rb");//\r\n->\n 13 if (pf == NULL) 14 { 15 return -1; 16 } 17 else 18 { 19 int alllength = 0; 20 for (int i = 0; i < N; i++) 21 { 22 char str[50] = { 0 }; 23 fgets(str, 50, pf); 24 allindex.pindex[i] = alllength;//錯位從0開始 25 26 int length = strlen(str); 27 alllength += length; 28 29 } 30 31 fclose(pf); 32 } 33 printf("\n結束讀取"); 34 35 printf("\n開始寫入"); 36 FILE *pfw = fopen(indexpath, "wb");//寫入索引 37 fwrite(allindex.pindex, sizeof(int), allindex.length, pfw); 38 fclose(pfw);//關閉 39 printf("\n結束寫入"); 40 41 42 free(allindex.pindex); 43 44 } 45 void qucik() 46 { 47 printf("\n索引數組開始分配"); 48 allindex.length = N; 49 allindex.pindex = calloc(N, sizeof(int));//分配內存 50 printf("\n索引數組完成分配"); 51 52 printf("\n開始讀取"); 53 FILE *pfw = fopen("index.txt", "rb");//寫入索引 54 fread(allindex.pindex, sizeof(int), allindex.length, pfw); 55 fclose(pfw);//關閉 56 printf("\n結束讀取"); 57 }
- binsearch.h
1 #include "index.h" 2 void binsearch(char *searchstr);
- binsearch.c
1 #include "binsearch.h" 2 3 void binsearch(char *searchstr) 4 { 5 int tou = 0; 6 int wei = N - 1; 7 int flag = 0; 8 while (tou <= wei) 9 { 10 int zhong = (tou + wei) / 2; 11 char zhongstr[256] = { 0 }; 12 { 13 FILE *pf1 = fopen(indexpath, "rb"); 14 FILE *pf2 = fopen(sortpath, "rb"); 15 16 17 int indexnum = 0; 18 fseek(pf1, zhong*sizeof(int), SEEK_SET); 19 fread(&indexnum, sizeof(int), 1, pf1);//讀索引zhong到indexnum 20 21 fseek(pf2, indexnum, SEEK_SET); 22 fgets(zhongstr, 128, pf2);//讀取 23 24 fclose(pf1); 25 fclose(pf2); 26 } 27 eatN(zhongstr); 28 char pnewzhongstr[256] = { 0 }; 29 sprintf(pnewzhongstr, zhongstr); 30 eatg(pnewzhongstr);//遇到-終止 31 int res = strcmp(pnewzhongstr, searchstr);//1 0 -1 32 33 34 if (res == 0) 35 { 36 flag = 1; 37 printf("%s", zhongstr); 38 break; 39 } 40 else if (res == 1) 41 { 42 wei = zhong - 1; 43 } 44 else 45 { 46 tou = zhong + 1; 47 } 48 49 50 } 51 52 53 if (flag) 54 { 55 printf("\nfind"); 56 } 57 else 58 { 59 printf("\n not find"); 60 } 61 62 63 }
- main.c
1 #include "binsearch.h" 2 void initall() 3 { 4 strcpy(filepath, "1E~001OK.txt"); 5 strcpy(sortpath, "1E~001sort.txt"); 6 strcpy(indexpath, "1E~001index.txt"); 7 8 } 9 10 void main() 11 { 12 initall(); 13 //初始化內存 14 initmem(); 15 //排序 16 sort(); 17 //寫入文件 18 writetofile(); 19 20 //初始化索引 21 init(); 22 23 //二分查找 24 while (1) 25 { 26 char str[256] = { 0 }; 27 scanf("%s", str); 28 binsearch(str); 29 } 30 system("pause"); 31 32 }
60.大數據創建索引,並實現大文件的二分查找,遷移實現分層