- index.h
1 #define _CRT_SECURE_NO_WARNINGS 2 #include<stdio.h> 3 #include<stdlib.h> 4 #define N 10000000 5 6 struct index 7 { 8 int *pindex; 9 int length; 10 }; 11 12 char **g_pp;//保存指针数组 13 char filepath[256]; 14 char sortpath[256]; 15 char indexpath[256]; 16 struct index allindex;//索引 17 18 int getN();//函数声明 19 void eatg(char *str); 20 void eatN(char *str);
- index.c
1 #include"index.h" 2 3 char **g_pp = NULL;//保存指针数组 4 char filepath[256] = { 0 }; 5 char sortpath[256] = { 0 }; 6 char indexpath[256] = { 0 }; 7 struct index allindex = { 0 };//索引 8 9 int getN() 10 { 11 FILE *pf = fopen("file.txt", "r"); 12 if (pf == NULL) 13 { 14 return -1; 15 } 16 else 17 { 18 int i = 0; 19 while (!feof(pf)) 20 { 21 char str[50] = { 0 }; 22 fgets(str, 50, pf);//读取 23 i++; 24 } 25 fclose(pf); 26 return i; 27 28 } 29 } 30 void eatg(char *str) 31 { 32 while (*str != ‘\0‘) 33 { 34 35 if (*str == ‘-‘) 36 { 37 *str = ‘\0‘; 38 } 39 str++; 40 } 41 42 } 43 void eatN(char *str) 44 { 45 while (*str != ‘\0‘) 46 { 47 if (*str == ‘\r‘ || *str == ‘\n‘) 48 { 49 *str = ‘\0‘; 50 } 51 52 str++; 53 } 54 55 }
- createsort.h
1 #include "index.h" 2 3 void initmem(); 4 int com(void *p1, void*p2); 5 void sort(); 6 void show(); 7 void writetofile();
- createsort.cp
1 #include "createsort.h" 2 void initmem() 3 { 4 g_pp = calloc(N, sizeof(char*));//分配指针数组 5 FILE *pf = fopen(filepath, "r"); 6 if (pf == NULL) 7 { 8 return -1; 9 } 10 else 11 { 12 for (int i = 0; i < N; i++) 13 { 14 char str[50] = { 0 }; 15 fgets(str, 50, pf);//读取 16 g_pp[i] = calloc(strlen(str) + 1, sizeof(char));//分配 17 if (g_pp[i]!=NULL) 18 { 19 //sprintf(g_pp[i], str);//打印进去 20 strcpy(g_pp[i], str); 21 eatN(g_pp[i]); 22 } 23 24 //printf("%s", g_pp[i]);//显示测试 25 26 27 } 28 29 30 fclose(pf); 31 32 33 } 34 35 36 37 38 39 40 } 41 42 int com(void *p1, void*p2) 43 { 44 char **pp1 = p1; 45 char **pp2 = p2; 46 47 return strcmp(*pp1, *pp2); 48 49 } 50 51 void sort() 52 { 53 qsort(g_pp, N, sizeof(char*), com); 54 55 56 } 57 void show() 58 { 59 printf("\n此时状态\n"); 60 for (int i = 0; i < N; i++) 61 { 62 printf("\n%s", g_pp[i]); 63 } 64 } 65 void writetofile() 66 { 67 FILE *pf = fopen(sortpath, "w"); 68 for (int i = 0; i < N; i++) 69 { 70 char temp[100] = { 0 }; 71 // printf("\n%s", g_pp[i]); 72 sprintf(temp, "%s\n", g_pp[i]); 73 // printf("\n%s", temp); 74 fputs(temp, pf); 75 } 76 77 fclose(pf); 78 }
- createindex.h
1 #include "index.h" 2 void init(); 3 void qucik();
- createindex.c
1 #include "createindex.h" 2 3 4 void init() 5 { 6 printf("\n索引数组开始分配"); 7 allindex.length = N; 8 allindex.pindex = calloc(N, sizeof(int));//分配内存 9 printf("\n索引数组完成分配"); 10 11 printf("\n开始读取"); 12 FILE *pf = fopen(sortpath, "rb");//\r\n->\n 13 if (pf == NULL) 14 { 15 return -1; 16 } 17 else 18 { 19 int alllength = 0; 20 for (int i = 0; i < N; i++) 21 { 22 char str[50] = { 0 }; 23 fgets(str, 50, pf); 24 allindex.pindex[i] = alllength;//错位从0开始 25 26 int length = strlen(str); 27 alllength += length; 28 29 } 30 31 fclose(pf); 32 } 33 printf("\n结束读取"); 34 35 printf("\n开始写入"); 36 FILE *pfw = fopen(indexpath, "wb");//写入索引 37 fwrite(allindex.pindex, sizeof(int), allindex.length, pfw); 38 fclose(pfw);//关闭 39 printf("\n结束写入"); 40 41 42 free(allindex.pindex); 43 44 } 45 void qucik() 46 { 47 printf("\n索引数组开始分配"); 48 allindex.length = N; 49 allindex.pindex = calloc(N, sizeof(int));//分配内存 50 printf("\n索引数组完成分配"); 51 52 printf("\n开始读取"); 53 FILE *pfw = fopen("index.txt", "rb");//写入索引 54 fread(allindex.pindex, sizeof(int), allindex.length, pfw); 55 fclose(pfw);//关闭 56 printf("\n结束读取"); 57 }
- binsearch.h
1 #include "index.h" 2 void binsearch(char *searchstr);
- binsearch.c
1 #include "binsearch.h" 2 3 void binsearch(char *searchstr) 4 { 5 int tou = 0; 6 int wei = N - 1; 7 int flag = 0; 8 while (tou <= wei) 9 { 10 int zhong = (tou + wei) / 2; 11 char zhongstr[256] = { 0 }; 12 { 13 FILE *pf1 = fopen(indexpath, "rb"); 14 FILE *pf2 = fopen(sortpath, "rb"); 15 16 17 int indexnum = 0; 18 fseek(pf1, zhong*sizeof(int), SEEK_SET); 19 fread(&indexnum, sizeof(int), 1, pf1);//读索引zhong到indexnum 20 21 fseek(pf2, indexnum, SEEK_SET); 22 fgets(zhongstr, 128, pf2);//读取 23 24 fclose(pf1); 25 fclose(pf2); 26 } 27 eatN(zhongstr); 28 char pnewzhongstr[256] = { 0 }; 29 sprintf(pnewzhongstr, zhongstr); 30 eatg(pnewzhongstr);//遇到-终止 31 int res = strcmp(pnewzhongstr, searchstr);//1 0 -1 32 33 34 if (res == 0) 35 { 36 flag = 1; 37 printf("%s", zhongstr); 38 break; 39 } 40 else if (res == 1) 41 { 42 wei = zhong - 1; 43 } 44 else 45 { 46 tou = zhong + 1; 47 } 48 49 50 } 51 52 53 if (flag) 54 { 55 printf("\nfind"); 56 } 57 else 58 { 59 printf("\n not find"); 60 } 61 62 63 }
- main.c
1 #include "binsearch.h" 2 void initall() 3 { 4 strcpy(filepath, "1E~001OK.txt"); 5 strcpy(sortpath, "1E~001sort.txt"); 6 strcpy(indexpath, "1E~001index.txt"); 7 8 } 9 10 void main() 11 { 12 initall(); 13 //初始化内存 14 initmem(); 15 //排序 16 sort(); 17 //写入文件 18 writetofile(); 19 20 //初始化索引 21 init(); 22 23 //二分查找 24 while (1) 25 { 26 char str[256] = { 0 }; 27 scanf("%s", str); 28 binsearch(str); 29 } 30 system("pause"); 31 32 }
60.大数据创建索引,并实现大文件的二分查找,迁移实现分层
Posted 喵小喵~
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了60.大数据创建索引,并实现大文件的二分查找,迁移实现分层相关的知识,希望对你有一定的参考价值。
以上是关于60.大数据创建索引,并实现大文件的二分查找,迁移实现分层的主要内容,如果未能解决你的问题,请参考以下文章