trie树查找和hash查找比较(大量数据)
Posted semen
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了trie树查找和hash查找比较(大量数据)相关的知识,希望对你有一定的参考价值。
trie树代码
#include<iostream> #include<stdio.h> #include<iostream> #include<string> #include<stdlib.h> #include<fstream> #include<sstream> #include<vector> #include<string> #include<time.h> using namespace std; class trienode { public: char *word; int count; trienode *branch[26]; public: trienode() { word = NULL; count = 0;//词频 memset(branch, NULL, sizeof(trienode*) * 26); } }; class trie { public: trienode *root; public: trie(); ~trie(); void Insert(char *str); bool Search(char*str, int &count);//索引 void printall(trienode *root);//字符排序 void printpre(char *str);//前缀匹配 }; trie::trie() { root = new trienode(); } trie::~trie() {} void trie::Insert(char *str) { int index; trienode *tt = root; for (int i = 0; str[i]; i++) { index = str[i] - ‘a‘; if (index < 0 || index>26) { return; } if (tt->branch[index] == NULL) { tt->branch[index] = new trienode(); } tt = tt->branch[index]; } if (tt->word) { tt->count++; return; } else { tt->count++; tt->word = new char[strlen(str) + 1]; strcpy_s(tt->word, strlen(str) + 1, str); } } bool trie::Search(char *str, int &count) { int index = -1; trienode *tt = root; while (tt&&*str) { index = *str - ‘a‘; if (index < 0 || index>26) return false; tt = tt->branch[index]; str++; } if (tt&&tt->word) { count = tt->count; return true; } return false; } void trie::printall(trienode *root) { trienode *t = root; if (!t) return; if (t->word) { cout << t->word << endl; } for (int i = 0; i < 26; i++) { printall(t->branch[i]); } } void trie::printpre(char *str) { trienode *t = root; int index = -1; while (t&&*str) { index = *str - ‘a‘; if (index < 0 || index>26) return; t = t->branch[index]; str++; } if (t) { printall(t); } } int main() { clock_t startTime, endTime; startTime = clock(); trie *t = new trie(); ifstream it("C:/Users/ww/Desktop/string.txt"); string sline; string str = ""; while (it&&getline(it, sline)) { str += sline + " "; } it.close(); for (int i = 0; i < str.length(); i++) { if (str[i] == ‘.‘ || str[i] == ‘,‘ || str[i] == ‘(‘ || str[i] == ‘(‘) { str.erase(i, 1); } } string word; stringstream ss(str); vector<string> vec; while (ss >> word) { vec.push_back(word); } vector<string>::iterator iter; for (iter = vec.begin(); iter != vec.end(); iter++) { t->Insert((char*)(*iter).data()); } int val = -1; if (t->Search("the", val)) { cout << val << endl; } else { cout << "empty" << endl; } endTime = clock(); cout << "the running time is " << (double)(endTime - startTime) << endl; return 0; }
hash代码
#include<iostream> #include<fstream> #include<sstream> #include<string> #include<vector> #include<stdlib.h> #include<time.h> using namespace std; class hashnode { public: char *p; hashnode *next; }; class hashmap { public: hashnode *hashps[1000]; public: hashmap(); ~hashmap(); int String2Int(char *p); void Insert(char *p); bool Find(char *p); }; hashmap::hashmap() { for (int i = 0; i < 1000; i++) { hashps[i] = new hashnode(); } for (int i = 0; i < 1000; i++) { hashps[i]->next = NULL; } } hashmap::~hashmap() {} int hashmap::String2Int(char *p) { int num = 0; while (*p) { num += *p; p++; } return num % 1000; } void hashmap::Insert(char *p) { int index = String2Int(p); hashnode *hash = hashps[index]; hashnode *newr = new hashnode(); newr->p = new char[strlen(p) + 1]; strcpy_s(newr->p, strlen(p) + 1, p); newr->next = hash->next; hash->next = newr; } bool hashmap::Find(char *p) { int index = String2Int(p); hashnode *t = hashps[index]->next; if (!t) { return false; } else { hashnode *w = t; while (w) { if (strcmp(p, w->p)==0) { return true; } w = w->next; } } } int re(int *p) { return *p; } int main() { clock_t startTime, endTime; startTime = clock(); hashmap *t = new hashmap(); ifstream it("C:/Users/ww/Desktop/string.txt"); string sline; string str = ""; while (it&&getline(it, sline)) { str += sline + " "; } it.close(); for (int i = 0; i < str.length(); i++) { if (str[i] == ‘.‘ || str[i] == ‘,‘ || str[i] == ‘(‘ || str[i] == ‘(‘) { str.erase(i, 1); } } stringstream ss(str); string word; vector<string> vec; while (ss >> word) { vec.push_back(word); } vector<string>::iterator iter; for (iter = vec.begin(); iter != vec.end(); iter++) { t->Insert((char*)(*iter).data()); } cout << "the result is: " << t->Find("the") << endl; endTime = clock(); cout << "the running time is " << (double)(endTime - startTime) << endl; return 0; }
trie树查找时间是O(L)L是字符串长度,而hash是O(LL),LL是关键字对应哈希地址链表长度,都和数据的大小无关,查找都很高效
以上是关于trie树查找和hash查找比较(大量数据)的主要内容,如果未能解决你的问题,请参考以下文章