trie树查找和hash查找比较(大量数据)

Posted semen

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了trie树查找和hash查找比较(大量数据)相关的知识,希望对你有一定的参考价值。

trie树代码

#include<iostream>
#include<stdio.h>
#include<iostream>
#include<string>
#include<stdlib.h>
#include<fstream>
#include<sstream>
#include<vector>
#include<string>
#include<time.h>
using namespace std;
class trienode
{
public:
    char *word;
    int count;
    trienode *branch[26];
public:
    trienode()
    {
        word = NULL;
        count = 0;//词频
        memset(branch, NULL, sizeof(trienode*) * 26);
    }
};
class trie
{
public:
    trienode *root;
public:
    trie();
    ~trie();
    void Insert(char *str);
    bool Search(char*str, int &count);//索引
    void printall(trienode *root);//字符排序
    void printpre(char *str);//前缀匹配
};
trie::trie()
{
    root = new trienode();
}
trie::~trie() {}
void trie::Insert(char *str)
{
    int index;
    trienode *tt = root;
    for (int i = 0; str[i]; i++)
    {
        index = str[i] - a;
        if (index < 0 || index>26)
        {
            return;
        }
        if (tt->branch[index] == NULL)
        {
            tt->branch[index] = new trienode();
        }
        tt = tt->branch[index];
    }
    if (tt->word)
    {
        tt->count++;
        return;
    }
    else
    {
        tt->count++;
        tt->word = new char[strlen(str) + 1];
        strcpy_s(tt->word, strlen(str) + 1, str);
    }

}
bool trie::Search(char *str, int &count)
{
    int index = -1;
    trienode *tt = root;
    while (tt&&*str)
    {
        index = *str - a;
        if (index < 0 || index>26) return false;
        tt = tt->branch[index];
        str++;
    }
    if (tt&&tt->word)
    {
        count = tt->count;
        return true;
    }
    return false;
}
void trie::printall(trienode *root)
{
    trienode *t = root;
    if (!t) return;
    if (t->word)
    {
        cout << t->word << endl;
    }
    for (int i = 0; i < 26; i++)
    {
        printall(t->branch[i]);
    }

}
void trie::printpre(char *str)
{
    trienode *t = root;
    int index = -1;
    while (t&&*str)
    {
        index = *str - a;
        if (index < 0 || index>26) return;
        t = t->branch[index];
        str++;
    }
    if (t)
    {
        printall(t);
    }
}
int main()
{
    clock_t startTime, endTime;
    startTime = clock();
    trie *t = new trie();
    ifstream it("C:/Users/ww/Desktop/string.txt");
    string sline;
    string str = "";
    while (it&&getline(it, sline))
    {
        str += sline + " ";
    }
    it.close();
    for (int i = 0; i < str.length(); i++)
    {
        if (str[i] == . || str[i] == , || str[i] == ( || str[i] == ()
        {
            str.erase(i, 1);
        }
    }
    string word;
    stringstream ss(str);
    vector<string> vec;
    while (ss >> word)
    {
        vec.push_back(word);
    }
    vector<string>::iterator iter;
    for (iter = vec.begin(); iter != vec.end(); iter++)
    {
        t->Insert((char*)(*iter).data());
    }
    int val = -1;
    if (t->Search("the", val))
    {
        cout << val << endl;
    }
    else
    {
        cout << "empty" << endl;
    }
    endTime = clock();
    cout << "the running time is " << (double)(endTime - startTime) << endl;
    return 0;
}

hash代码

#include<iostream>
#include<fstream>
#include<sstream>
#include<string>
#include<vector>
#include<stdlib.h>
#include<time.h>
using namespace std;
class hashnode
{
public:
    char *p;
    hashnode *next;
};
class hashmap
{
public:
    hashnode *hashps[1000];
public:
    hashmap();
    ~hashmap();
    int String2Int(char *p);
    void Insert(char *p);
    bool Find(char *p);
};
hashmap::hashmap()
{
    for (int i = 0; i < 1000; i++)
    {
        hashps[i] = new hashnode();
    }
    for (int i = 0; i < 1000; i++)
    {
        hashps[i]->next = NULL;
    }
}
hashmap::~hashmap() {}
int hashmap::String2Int(char *p)
{
    int num = 0;
    while (*p)
    {
        num += *p;
        p++;
    }
    return num % 1000;
}
void hashmap::Insert(char *p)
{
    int index = String2Int(p);
    hashnode *hash = hashps[index];
    hashnode *newr = new hashnode();
    newr->p = new char[strlen(p) + 1];
    strcpy_s(newr->p, strlen(p) + 1, p);
    newr->next = hash->next;
    hash->next = newr;
}
bool hashmap::Find(char *p)
{
    int index = String2Int(p);
    hashnode *t = hashps[index]->next;
    if (!t)
    {
        return false;
    }
    else
    {
        hashnode *w = t;
        while (w)
        {
            if (strcmp(p, w->p)==0)
            {
                return true;
            }
            w = w->next;
        }
    }
}
int re(int *p)
{
    return *p;
}
int main()
{
    clock_t startTime, endTime;
    startTime = clock();
    hashmap *t = new hashmap();
    ifstream it("C:/Users/ww/Desktop/string.txt");
    string sline;
    string str = "";
    while (it&&getline(it, sline))
    {
        str += sline + " ";
    }
    it.close();
    for (int i = 0; i < str.length(); i++)
    {
        if (str[i] == . || str[i] == , || str[i] == ( || str[i] == ()
        {
            str.erase(i, 1);
        }
    }
    stringstream ss(str);
    string word;
    vector<string> vec;
    while (ss >> word)
    {
        vec.push_back(word);
    }
    vector<string>::iterator iter;
    for (iter = vec.begin(); iter != vec.end(); iter++)
    {
        t->Insert((char*)(*iter).data());
    }
    cout << "the result is: " << t->Find("the") << endl;
    endTime = clock();
    cout << "the running time is " << (double)(endTime - startTime) << endl;
    return 0;
}

trie树查找时间是O(L)L是字符串长度,而hash是O(LL),LL是关键字对应哈希地址链表长度,都和数据的大小无关,查找都很高效

以上是关于trie树查找和hash查找比较(大量数据)的主要内容,如果未能解决你的问题,请参考以下文章

Trie树,又称单词查找树字典

大数据处理-Trie树

算法与数据结构Trie树简介及应用

Trie树

Trie树(字典树)

01字典树