如何在 C# 中创建 trie [关闭]

Posted

技术标签:

【中文标题】如何在 C# 中创建 trie [关闭]【英文标题】:How to create a trie in c# [closed] 【发布时间】:2011-09-18 22:51:03 【问题描述】:

有谁知道我在哪里可以找到如何在 C# 中构造 trie 的示例?我正在尝试获取字典/单词列表并用它创建一个 trie。

【问题讨论】:

他绝对是指trie。 简单,你用谷歌搜索 C# trie 实现;前 3 个结果很成功 @Gregory 我显然在发布之前就这样做了。我希望有人可以为我指出一个更好的遍历或提供一个更简单的示例来构建一个 trie。 几周前我在codereview 上发布了一篇文章。您可以从理论上进行评估(并结合答案中提供的建议)。 @GregoryPakosz 如果您在 Google 上搜索“C# trie implementation”,this 是最佳结果。 【参考方案1】:

快速谷歌搜索结果: 取自:Trie Generic 作者:格伦·斯莱登 归功于 Kerry D. Wong

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

public class Trie<TValue> : System.Collections.IEnumerable, IEnumerable<Trie<TValue>.TrieNodeBase>

    public abstract class TrieNodeBase
    
        protected TValue m_value = default(TValue);

        public TValue Value
        
            get  return m_value; 
            set  m_value = value; 
        

        public bool HasValue  get  return !Object.Equals(m_value, default(TValue));  
        public abstract bool IsLeaf  get; 

        public abstract TrieNodeBase this[char c]  get; 

        public abstract TrieNodeBase[] Nodes  get; 

        public abstract void SetLeaf();

        public abstract int ChildCount  get; 

        public abstract bool ShouldOptimize  get; 

        public abstract KeyValuePair<Char, TrieNodeBase>[] CharNodePairs();

        public abstract TrieNodeBase AddChild(char c, ref int node_count);

        /// <summary>
        /// Includes current node value
        /// </summary>
        /// <returns></returns>
        public IEnumerable<TValue> SubsumedValues()
        
            if (Value != null)
                yield return Value;
            if (Nodes != null)
                foreach (TrieNodeBase child in Nodes)
                    if (child != null)
                        foreach (TValue t in child.SubsumedValues())
                            yield return t;
        

        /// <summary>
        /// Includes current node
        /// </summary>
        /// <returns></returns>
        public IEnumerable<TrieNodeBase> SubsumedNodes()
        
            yield return this;
            if (Nodes != null)
                foreach (TrieNodeBase child in Nodes)
                    if (child != null)
                        foreach (TrieNodeBase n in child.SubsumedNodes())
                            yield return n;
        

        /// <summary>
        /// Doesn't include current node
        /// </summary>
        /// <returns></returns>
        public IEnumerable<TrieNodeBase> SubsumedNodesExceptThis()
        
            if (Nodes != null)
                foreach (TrieNodeBase child in Nodes)
                    if (child != null)
                        foreach (TrieNodeBase n in child.SubsumedNodes())
                            yield return n;
        

        /// <summary>
        /// Note: doesn't de-optimize optimized nodes if re-run later
        /// </summary>
        public void OptimizeChildNodes()
        
            if (Nodes != null)
                foreach (var q in CharNodePairs())
                
                    TrieNodeBase n_old = q.Value;
                    if (n_old.ShouldOptimize)
                    
                        TrieNodeBase n_new = new SparseTrieNode(n_old.CharNodePairs());
                        n_new.m_value = n_old.m_value;
                        Trie<TValue>.c_sparse_nodes++;
                        ReplaceChild(q.Key, n_new);
                    
                    n_old.OptimizeChildNodes();
                
        

        public abstract void ReplaceChild(Char c, TrieNodeBase n);

    ;

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///
    /// Sparse Trie Node
    ///
    /// currently, this one's "nodes" value is never null, because we leave leaf nodes as the non-sparse type,
    /// (with nodes==null) and they currently never get converted back. Consequently, IsLeaf should always be 'false'.
    /// However, we're gonna do the check anyway.
    /// 
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    public class SparseTrieNode : TrieNodeBase
    
        Dictionary<Char, TrieNodeBase> d;

        public SparseTrieNode(IEnumerable<KeyValuePair<Char, TrieNodeBase>> ie)
        
            d = new Dictionary<char, TrieNodeBase>();
            foreach (var kvp in ie)
                d.Add(kvp.Key, kvp.Value);
        

        public override TrieNodeBase this[Char c]
        
            get
            
                TrieNodeBase node;
                return d.TryGetValue(c, out node) ? node : null;
            
        

        public override TrieNodeBase[] Nodes  get  return d.Values.ToArray();  

        /// <summary>
        /// do not use in current form. This means, run OptimizeSparseNodes *after* any pruning
        /// </summary>
        public override void SetLeaf()  d = null; 

        public override int ChildCount  get  return d.Count;  

        public override KeyValuePair<Char, TrieNodeBase>[] CharNodePairs()
        
            return d.ToArray();
        

        public override TrieNodeBase AddChild(char c, ref int node_count)
        
            TrieNodeBase node;
            if (!d.TryGetValue(c, out node))
            
                node = new TrieNode();
                node_count++;
                d.Add(c, node);
            
            return node;
        

        public override void ReplaceChild(Char c, TrieNodeBase n)
        
            d[c] = n;
        

        public override bool ShouldOptimize  get  return false;  
        public override bool IsLeaf  get  return d == null;  

    ;

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///
    /// Non-sparse Trie Node
    ///
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    public class TrieNode : TrieNodeBase
    
        private TrieNodeBase[] nodes = null;
        private Char m_base;

        public override int ChildCount  get  return (nodes != null) ? nodes.Count(e => e != null) : 0;  
        public int AllocatedChildCount  get  return (nodes != null) ? nodes.Length : 0;  

        public override TrieNodeBase[] Nodes  get  return nodes;  

        public override void SetLeaf()  nodes = null; 

        public override KeyValuePair<Char, TrieNodeBase>[] CharNodePairs()
        
            KeyValuePair<Char, TrieNodeBase>[] rg = new KeyValuePair<char, TrieNodeBase>[ChildCount];
            Char ch = m_base;
            int i = 0;
            foreach (TrieNodeBase child in nodes)
            
                if (child != null)
                    rg[i++] = new KeyValuePair<char, TrieNodeBase>(ch, child);
                ch++;
            
            return rg;
        

        public override TrieNodeBase this[char c]
        
            get
            
                if (nodes != null && m_base <= c && c < m_base + nodes.Length)
                    return nodes[c - m_base];
                return null;
            
        

        public override TrieNodeBase AddChild(char c, ref int node_count)
        
            if (nodes == null)
            
                m_base = c;
                nodes = new TrieNodeBase[1];
            
            else if (c >= m_base + nodes.Length)
            
                Array.Resize(ref nodes, c - m_base + 1);
            
            else if (c < m_base)
            
                Char c_new = (Char)(m_base - c);
                TrieNodeBase[] tmp = new TrieNodeBase[nodes.Length + c_new];
                nodes.CopyTo(tmp, c_new);
                m_base = c;
                nodes = tmp;
            

            TrieNodeBase node = nodes[c - m_base];
            if (node == null)
            
                node = new TrieNode();
                node_count++;
                nodes[c - m_base] = node;
            
            return node;
        

        public override void ReplaceChild(Char c, TrieNodeBase n)
        
            if (nodes == null || c >= m_base + nodes.Length || c < m_base)
                throw new Exception();
            nodes[c - m_base] = n;
        

        public override bool ShouldOptimize
        
            get
            
                if (nodes == null)
                    return false;
                return (ChildCount * 9 < nodes.Length);     // empirically determined optimal value (space & time)
            
        

        public override bool IsLeaf  get  return nodes == null;  
    ;

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    /// 
    /// Trie proper begins here
    ///
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    private TrieNodeBase _root = new TrieNode();
    public int c_nodes = 0;
    public static int c_sparse_nodes = 0;

    // in combination with Add(...), enables C# 3.0 initialization syntax, even though it never seems to call it
    public System.Collections.IEnumerator GetEnumerator()
    
        return _root.SubsumedNodes().GetEnumerator();
    

    IEnumerator<TrieNodeBase> IEnumerable<TrieNodeBase>.GetEnumerator()
    
        return _root.SubsumedNodes().GetEnumerator();
    

    public IEnumerable<TValue> Values  get  return _root.SubsumedValues();  

    public void OptimizeSparseNodes()
    
        if (_root.ShouldOptimize)
        
            _root = new SparseTrieNode(_root.CharNodePairs());
            c_sparse_nodes++;
        
        _root.OptimizeChildNodes();
    

    public TrieNodeBase Root  get  return _root;  

    public TrieNodeBase Add(String s, TValue v)
    
        TrieNodeBase node = _root;
        foreach (Char c in s)
            node = node.AddChild(c,ref c_nodes);

        node.Value = v;
        return node;
    

    public bool Contains(String s)
    
        TrieNodeBase node = _root;
        foreach (Char c in s)
        
            node = node[c];
            if (node == null)
                return false;
        
        return node.HasValue;
    

    /// <summary>
    /// Debug only; this is hideously inefficient
    /// </summary>
    public String GetKey(TrieNodeBase seek)
    
        String sofar = String.Empty;

        GetKeyHelper fn = null;
        fn = (TrieNodeBase cur) =>
        
            sofar += " ";   // placeholder
            foreach (var kvp in cur.CharNodePairs())
            
                Util.SetStringChar(ref sofar, sofar.Length - 1, kvp.Key);
                if (kvp.Value == seek)
                    return true;
                if (kvp.Value.Nodes != null && fn(kvp.Value))
                    return true;
            
            sofar = sofar.Substring(0, sofar.Length - 1);
            return false;
        ;

        if (fn(_root))
            return sofar;
        return null;
    


    /// <summary>
    /// Debug only; this is hideously inefficient
    /// </summary>
    delegate bool GetKeyHelper(TrieNodeBase cur);
    public String GetKey(TValue seek)
    
        String sofar = String.Empty;

        GetKeyHelper fn = null;
        fn = (TrieNodeBase cur) =>
             
                 sofar += " ";  // placeholder
                 foreach (var kvp in cur.CharNodePairs())
                 
                     Util.SetStringChar(ref sofar, sofar.Length - 1, kvp.Key);
                     if (kvp.Value.Value != null && kvp.Value.Value.Equals(seek))
                         return true;
                     if (kvp.Value.Nodes != null && fn(kvp.Value))
                         return true;
                 
                 sofar = sofar.Substring(0, sofar.Length - 1);
                 return false;
             ;

        if (fn(_root))
            return sofar;
        return null;
    

    public TrieNodeBase FindNode(String s_in)
    
        TrieNodeBase node = _root;
        foreach (Char c in s_in)
            if ((node = node[c]) == null)
                return null;
        return node;
    

    /// <summary>
    /// If continuation from the terminal node is possible with a different input string, then that node is not
    /// returned as a 'last' node for the given input. In other words, 'last' nodes must be leaf nodes, where
    /// continuation possibility is truly unknown. The presense of a nodes array that we couldn't match to 
    /// means the search fails; it is not the design of the 'OrLast' feature to provide 'closest' or 'best'
    /// matching but rather to enable truncated tails still in the context of exact prefix matching.
    /// </summary>
    public TrieNodeBase FindNodeOrLast(String s_in, out bool f_exact)
    
        TrieNodeBase node = _root;
        foreach (Char c in s_in)
        
            if (node.IsLeaf)
            
                f_exact = false;
                return node;
            
            if ((node = node[c]) == null)
            
                f_exact = false;
                return null;
            
        
        f_exact = true;
        return node;
    

    // even though I found some articles that attest that using a foreach enumerator with arrays (and Lists)
    // returns a value type, thus avoiding spurious garbage, I had already changed the code to not use enumerator.
    public unsafe TValue Find(String s_in)
    
        TrieNodeBase node = _root;
        fixed (Char* pin_s = s_in)
        
            Char* p = pin_s;
            Char* p_end = p + s_in.Length;
            while (p < p_end)
            
                if ((node = node[*p]) == null)
                    return default(TValue);
                p++;
            
            return node.Value;
        
    

    public unsafe TValue Find(Char* p_tag, int cb_ctag)
    
        TrieNodeBase node = _root;
        Char* p_end = p_tag + cb_ctag;
        while (p_tag < p_end)
        
            if ((node = node[*p_tag]) == null)
                return default(TValue);
            p_tag++;
        
        return node.Value;
    

    public IEnumerable<TValue> FindAll(String s_in)
    
        TrieNodeBase node = _root;
        foreach (Char c in s_in)
        
            if ((node = node[c]) == null)
                break;
            if (node.Value != null)
                yield return node.Value;
        
    

    public IEnumerable<TValue> SubsumedValues(String s)
    
        TrieNodeBase node = FindNode(s);
        if (node == null)
            return Enumerable.Empty<TValue>();
        return node.SubsumedValues();
    

    public IEnumerable<TrieNodeBase> SubsumedNodes(String s)
    
        TrieNodeBase node = FindNode(s);
        if (node == null)
            return Enumerable.Empty<TrieNodeBase>();
        return node.SubsumedNodes();
    

    public IEnumerable<TValue> AllSubstringValues(String s)
    
        int i_cur = 0;
        while (i_cur < s.Length)
        
            TrieNodeBase node = _root;
            int i = i_cur;
            while (i < s.Length)
            
                node = node[s[i]];
                if (node == null)
                    break;
                if (node.Value != null)
                    yield return node.Value;
                i++;
            
            i_cur++;
        
    

    /// <summary>
    /// note: only returns nodes with non-null values
    /// </summary>
    public void DepthFirstTraverse(Action<String,TrieNodeBase> callback)
    
        Char[] rgch = new Char[100];
        int depth = 0;

        Action<TrieNodeBase> fn = null;
        fn = (TrieNodeBase cur) =>
        
            if (depth >= rgch.Length)
            
                Char[] tmp = new Char[rgch.Length * 2];
                Buffer.BlockCopy(rgch, 0, tmp, 0, rgch.Length * sizeof(Char));
                rgch = tmp;
            
            foreach (var kvp in cur.CharNodePairs())
            
                rgch[depth] = kvp.Key;
                TrieNodeBase n = kvp.Value;
                if (n.Nodes != null)
                
                    depth++;
                    fn(n);
                    depth--;
                
                else if (n.Value == null)       // leaf nodes should always have a value
                    throw new Exception();

                if (n.Value != null)
                    callback(new String(rgch, 0, depth+1), n);
            
        ;

        fn(_root);
    


    /// <summary>
    /// note: only returns nodes with non-null values
    /// </summary>
    public void EnumerateLeafPaths(Action<String,IEnumerable<TrieNodeBase>> callback)
    
        Stack<TrieNodeBase> stk = new Stack<TrieNodeBase>();
        Char[] rgch = new Char[100];

        Action<TrieNodeBase> fn = null;
        fn = (TrieNodeBase cur) =>
        
            if (stk.Count >= rgch.Length)
            
                Char[] tmp = new Char[rgch.Length * 2];
                Buffer.BlockCopy(rgch, 0, tmp, 0, rgch.Length * sizeof(Char));
                rgch = tmp;
            
            foreach (var kvp in cur.CharNodePairs())
            
                rgch[stk.Count] = kvp.Key;
                TrieNodeBase n = kvp.Value;
                stk.Push(n);
                if (n.Nodes != null)
                    fn(n);
                else
                
                    if (n.Value == null)        // leaf nodes should always have a value
                        throw new Exception();
                    callback(new String(rgch, 0, stk.Count), stk);
                
                stk.Pop();
            
        ;

        fn(_root);
    

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///
    /// Convert a trie with one value type to another
    ///
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    public Trie<TNew> ToTrie<TNew>(Func<TValue, TNew> value_converter)
    
        Trie<TNew> t = new Trie<TNew>();
        DepthFirstTraverse((s,n)=>
            t.Add(s,value_converter(n.Value));
        );
        return t;
    
;

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public static class TrieExtension

    public static Trie<TValue> ToTrie<TValue>(this IEnumerable<String> src, Func<String, int, TValue> selector)
    
        Trie<TValue> t = new Trie<TValue>();
        int idx = 0;
        foreach (String s in src)
            t.Add(s,selector(s,idx++));
        return t;
    

    public static Trie<TValue> ToTrie<TValue>(this Dictionary<String, TValue> src)
    
        Trie<TValue> t = new Trie<TValue>();
        foreach (var kvp in src)
            t.Add(kvp.Key, kvp.Value);
        return t;
    

    public static IEnumerable<TValue> AllSubstringValues<TValue>(this String s, Trie<TValue> trie)
    
        return trie.AllSubstringValues(s);
    

    public static void AddToValueHashset<TKey, TValue>(this Dictionary<TKey, HashSet<TValue>> d, TKey k, TValue v)
    
        HashSet<TValue> hs;
        if (d.TryGetValue(k, out hs))
            hs.Add(v);
        else
            d.Add(k, new HashSet<TValue>  v );
    
;

【讨论】:

谷歌没有提供这个。请注明原始来源。 它错过了Util.SetStringChar 函数 - 无法编译【参考方案2】:

这是我自己的代码,来自我对How to find a word from arrays of characters? 的回答:

public class Trie

  public struct Letter
  
    public const string Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    public static implicit operator Letter(char c)
    
      return new Letter()  Index = Chars.IndexOf(c) ;
    
    public int Index;
    public char ToChar()
    
      return Chars[Index];
    
    public override string ToString()
    
      return Chars[Index].ToString();
    
  

  public class Node
  
    public string Word;
    public bool IsTerminal  get  return Word != null;  
    public Dictionary<Letter, Node> Edges = new Dictionary<Letter, Node>();
  

  public Node Root = new Node();

  public Trie(string[] words)
  
    for (int w = 0; w < words.Length; w++)
    
      var word = words[w];
      var node = Root;
      for (int len = 1; len <= word.Length; len++)
      
        var letter = word[len - 1];
        Node next;
        if (!node.Edges.TryGetValue(letter, out next))
        
          next = new Node();
          if (len == word.Length)
          
            next.Word = word;
          
          node.Edges.Add(letter, next);
        
        node = next;
      
    
  

【讨论】:

糟糕,我忘了包括 Letter 的定义。您可以从原始问题中得到它。或者你可以使用 int 之类的其他东西。 +1 但是有一个小错误:为了能够使用“未排序”的词,你应该将if (len == word.Length)移到if (!node.Edges.TryGetValue(之外 这个实现的一个问题(对其他人来说可能很明显,但可能不是)是它只支持 26 个 ASCII 字母,这对于某些应用程序可能并不令人满意。 如果您首先添加包含较短单词的较长单词,则稍后添加较短单词不会将该路径标记为终端。终止条件的检查应该发生在所有迭代中,而不仅仅是在添加新节点时。【参考方案3】:

看看这个 codeplex 项目:

https://github.com/gmamaladze/trienet

它是一个库,其中包含几个经过良好测试的通用 c# trie 类的不同变体,包括 patricia trie 和并行 trie。

Trie - 简单的 trie,只允许前缀搜索,例如 .Where(s =&gt; s.StartsWith(searchString)) SuffixTrie - 也允许中缀搜索,例如 .Where(s =&gt; s.Contains(searchString)) PatriciaTrie – 压缩的 trie,更紧凑,在查找过程中效率更高,但 durig 构建速度相当慢。 SuffixPatriciaTrie - 与 PatriciaTrie 相同,也启用中缀搜索。 ParallelTrie – 非常原始地实现了并行数据结构,允许同时从不同线程添加数据和检索结果。

【讨论】:

【参考方案4】:

这是一个 Trie 和一个扫描仪合二为一(取自 Resin 代码库):

using System;
using System.Collections.Generic;
using System.IO;

namespace Resin

    public class UseTrie
    
        public void Main()
        
            var words = new[]"pre", "prefix";
            var trie = new Trie(words);

            // Print "pre" and "prefix"
            foreach(var word in trie.GetTokens("pr"))
            
                Console.WriteLine(word);
            
        
    
    public class Trie
    
        public char Value  get; set; 

        public bool Eow  get; set; 

        public IDictionary<char, Trie> Children  get; set; 

        public bool Root  get; set; 

        public Trie(bool isRoot)
        
            Root = isRoot;
            Children = new Dictionary<char, Trie>();
        

        public Trie(IList<string> words)
        
            if (words == null) throw new ArgumentNullException("words");

            Root = true;
            Children = new Dictionary<char, Trie>();

            foreach (var word in words)
            
                AppendToDescendants(word);
            
        

        public Trie(string text)
        
            if (string.IsNullOrWhiteSpace(text))
            
                throw new ArgumentException("text");
            

            Value = text[0];

            Children = new Dictionary<char, Trie>();

            if (text.Length > 1)
            
                var overflow = text.Substring(1);
                if (overflow.Length > 0)
                
                    AppendToDescendants(overflow);
                
            
            else
            
                Eow = true;
            
        

        public IEnumerable<string> GetTokens(string prefix)
        
            var words = new List<string>();
            Trie child;
            if (Children.TryGetValue(prefix[0], out child))
            
                child.Scan(prefix, prefix, ref words);
            
            return words;
        

        private void Scan(string originalPrefix, string prefix, ref List<string> words)
        
            if (string.IsNullOrWhiteSpace(prefix)) throw new ArgumentException("prefix");

            if (prefix.Length == 1 && prefix[0] == Value)
            
                // The scan has reached its destination. Find words derived from this node.
                if (Eow) words.Add(originalPrefix);
                foreach (var node in Children.Values)
                
                    node.Scan(originalPrefix+node.Value, new string(new []node.Value), ref words);
                
            
            else if (prefix[0] == Value)
            
                Trie child;
                if (Children.TryGetValue(prefix[1], out child))
                
                    child.Scan(originalPrefix, prefix.Substring(1), ref words);
                
            
        

        public void AppendToDescendants(string text)
        
            if (string.IsNullOrWhiteSpace(text)) throw new ArgumentException("text");

            Trie child;
            if (!Children.TryGetValue(text[0], out child))
            
                child = new Trie(text);
                Children.Add(text[0], child);
            
            else
            
                child.Append(text);
            
        

        public void Append(string text)
        
            if (string.IsNullOrWhiteSpace(text)) throw new ArgumentException("text");
            if (text[0] != Value) throw new ArgumentOutOfRangeException("text");
            if (Root) throw new InvalidOperationException("When appending from the root, use AppendToDescendants.");

            var overflow = text.Substring(1);
            if (overflow.Length > 0)
            
                AppendToDescendants(overflow);
            
        
    

【讨论】:

【参考方案5】:

我刚刚在 C# 中创建了一个 Trie 实现:

https://github.com/TomGullen/C-Sharp-Trie/tree/master

代码:

/*
    Copyright (c) 2016 Scirra Ltd
    www.scirra.com

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

    The above copyright notice and this permission notice shall be included in all
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    SOFTWARE.
 */
public class Trie

    private class Node
    
        public bool Terminal  get; set; 
        public Dictionary<char, Node> Nodes  get; private set; 
        public Node ParentNode  get; private set; 
        public char C  get; private set; 

        /// <summary>
        /// String word represented by this node
        /// </summary>
        public string Word
        
            get
            
                var b = new StringBuilder();
                b.Insert(0, C.ToString(CultureInfo.InvariantCulture));
                var selectedNode = ParentNode;
                while (selectedNode != null)
                
                    b.Insert(0, selectedNode.C.ToString(CultureInfo.InvariantCulture));
                    selectedNode = selectedNode.ParentNode;
                
                return b.ToString();
            
        

        public Node(Node parent, char c)
        
            C = c;
            ParentNode = parent;
            Terminal = false;
            Nodes = new Dictionary<char, Node>();
        

        /// <summary>
        /// Return list of terminal nodes under this node
        /// </summary>
        public IEnumerable<Node> TerminalNodes(char? ignoreChar = null)
        
            var r = new List<Node>();
            if (Terminal) r.Add(this);
            foreach (var node in Nodes.Values)
            
                if (ignoreChar != null && node.C == ignoreChar) continue;
                r = r.Concat(node.TerminalNodes()).ToList();
            
            return r;
         
    

    private Node TopNode_  get; set; 
    private Node TopNode
    
        get
        
            if (TopNode_ == null) TopNode_ = new Node(null, ' ');
            return TopNode_;
        
    
    private bool CaseSensitive  get; set; 

    /// <summary>
    /// Get list of all words in trie that start with
    /// </summary>
    public HashSet<string> GetAutocompleteSuggestions(string wordStart, int fetchMax = 10)
    
        if(fetchMax <= 0) throw new Exception("Fetch max must be positive integer.");

        wordStart = NormaliseWord(wordStart);

        var r = new HashSet<string>();

        var selectedNode = TopNode;
        foreach (var c in wordStart)
        
            // Nothing starting with this word
            if (!selectedNode.Nodes.ContainsKey(c)) return r;
            selectedNode = selectedNode.Nodes[c];
        

        // Get terminal nodes for this node
        
            var terminalNodes = selectedNode.TerminalNodes().Take(fetchMax);
            foreach (var node in terminalNodes)
            
                r.Add(node.Word);
            
        

        // Go up a node if not found enough suggestions
        if (r.Count < fetchMax)
        
            var parentNode = selectedNode.ParentNode;
            if (parentNode != null)
            
                var remainingToFetch = fetchMax - r.Count;
                var terminalNodes = parentNode.TerminalNodes(selectedNode.C).Take(remainingToFetch);
                foreach (var node in terminalNodes)
                
                    r.Add(node.Word);
                
            
        

        return r;
     

    /// <summary>
    /// Initialise instance of trie with set of words
    /// </summary>
    public Trie(IEnumerable<string> words, bool caseSensitive = false)
    
        CaseSensitive = caseSensitive;
        foreach (var word in words)
        
            AddWord(word);
        
    

    /// <summary>
    /// Add a single word to the trie
    /// </summary>
    public void AddWord(string word)
    
        word = NormaliseWord(word);
        var selectedNode = TopNode;

        for (var i = 0; i < word.Length; i++)
        
            var c = word[i];
            if (!selectedNode.Nodes.ContainsKey(c))
            
                selectedNode.Nodes.Add(c, new Node(selectedNode, c));
            
            selectedNode = selectedNode.Nodes[c];
        
        selectedNode.Terminal = true;
    

    /// <summary>
    /// Normalise word for trie
    /// </summary>
    private string NormaliseWord(string word)
    
        if (String.IsNullOrWhiteSpace(word)) word = String.Empty;
        word = word.Trim();
        if (!CaseSensitive)
        
            word = word.Trim();
        
        return word;
    

    /// <summary>
    /// Does this word exist in this trie?
    /// </summary>
    public bool IsWordInTrie(string word)
    
        word = NormaliseWord(word);
        if (String.IsNullOrWhiteSpace(word)) return false;
        var selectedNode = TopNode;
        foreach (var c in word)
        
            if (!selectedNode.Nodes.ContainsKey(c)) return false;
            selectedNode = selectedNode.Nodes[c];
        
        return selectedNode.Terminal;
    

示例用法:

var trie = new Trie(new String[] "hello", "help", "he-man", "happy", "hoppy", "tom");

var autoCompleteSuggestions = trie.GetAutocompleteSuggestions("ha");
foreach (var s in autoCompleteSuggestions)

    Response.Write(s + "\n");

【讨论】:

【参考方案6】:

一个简单的 Trie 实现。

http://github.com/bharathkumarms/AlgorithmsMadeEasy/blob/master/AlgorithmsMadeEasy/Tries.cs

using System;
using System.Collections.Generic;
using System.Linq;

namespace AlgorithmsMadeEasy

    class Tries
    
        TrieNode root;

        public void CreateRoot()
        
            root = new TrieNode();
        

        public void Add(char[] chars)
        
            TrieNode tempRoot = root;
            int total = chars.Count() - 1;
            for (int i = 0; i < chars.Count(); i++)
            
                TrieNode newTrie;
                if (tempRoot.children.Keys.Contains(chars[i]))
                
                    tempRoot = tempRoot.children[chars[i]];
                
                else
                
                    newTrie = new TrieNode();

                    if (total == i)
                    
                        newTrie.endOfWord = true;
                    

                    tempRoot.children.Add(chars[i], newTrie);
                    tempRoot = newTrie;
                
            
        

        public bool FindPrefix(char[] chars)
        
            TrieNode tempRoot = root;
            for (int i = 0; i < chars.Count(); i++)
            
                if (tempRoot.children.Keys.Contains(chars[i]))
                
                    tempRoot = tempRoot.children[chars[i]];
                
                else
                
                    return false;
                
            
            return true;
        

        public bool FindWord(char[] chars)
        
            TrieNode tempRoot = root;
            int total = chars.Count() - 1;
            for (int i = 0; i < chars.Count(); i++)
            
                if (tempRoot.children.Keys.Contains(chars[i]))
                
                    tempRoot = tempRoot.children[chars[i]];

                    if (total == i)
                    
                        if (tempRoot.endOfWord == true)
                        
                            return true;
                        
                    
                
                else
                
                    return false;
                
            
            return false;
        
    

    public class TrieNode
    
        public Dictionary<char, TrieNode> children = new Dictionary<char, TrieNode>();
        public bool endOfWord;
    


/*
Calling Code:
    Tries t = new Tries();
    t.CreateRoot();
    t.Add("abc".ToCharArray());
    t.Add("abgl".ToCharArray());
    t.Add("cdf".ToCharArray());
    t.Add("abcd".ToCharArray());
    t.Add("lmn".ToCharArray());

    bool findPrefix1 = t.FindPrefix("ab".ToCharArray());
    bool findPrefix2 = t.FindPrefix("lo".ToCharArray());

    bool findWord1 = t.FindWord("lmn".ToCharArray());
    bool findWord2 = t.FindWord("ab".ToCharArray());
    bool findWord3 = t.FindWord("cdf".ToCharArray());
    bool findWord4 = t.FindWord("ghi".ToCharArray());
*/

【讨论】:

【参考方案7】:

URI 下面的文章有很好的实现和与其他.NET Collection 的比较。它还指定了我们应该在 (HashSet, SortedList) 等集合中使用 Trie 而不是 .NET 构建的场景。

https://visualstudiomagazine.com/articles/2015/10/20/text-pattern-search-trie-class-net.aspx

【讨论】:

【参考方案8】:

要从 trie 数据结构中获取即时建议,请在从字符串加载后使用以下内容。 (更快的检索)

public class Trie
    
        public struct Letter
        
            public const string Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
            public static implicit operator Letter(char c)
            
                c = c.ToString().ToUpper().ToCharArray().First();

                return new Letter()  Index = Chars.IndexOf(c) ;
            
            public int Index;
            public char ToChar()
            
                return Chars[Index];
            
            public override string ToString()
            
                return Chars[Index].ToString();
            
        

        public class Node
        
            public string Word;
            public bool IsTerminal  get  return Word != null;  
            public Dictionary<Letter, Node> Edges = new Dictionary<Letter, Node>();


        

        public Node Root = new Node();

        public Trie(string[] words)
        
            for (int w = 0; w < words.Length; w++)
            
                var word = words[w];
                var node = Root;
                for (int len = 1; len <= word.Length; len++)
                
                    var letter = word[len - 1];
                    Node next;
                    if (!node.Edges.TryGetValue(letter, out next))
                    
                        next = new Node();
                        if (len == word.Length)
                        
                            next.Word = word;
                        
                        node.Edges.Add(letter, next);
                    
                    node = next;
                
            
        


        public List<string> GetSuggestions(string word, int max)
        
            List<string> outPut = new List<string>();

            var node = Root;
            int i = 0;
            foreach (var l in word)
            
                Node cNode;
                if (node.Edges.TryGetValue(l, out cNode))
                
                    node = cNode;
                
                else
                
                    if (i == word.Length - 1)
                        return outPut;
                
                i++;
            

            GetChildWords(node, ref outPut, max);

            return outPut;
        


        public void GetChildWords(Node n, ref List<string> outWords, int Max)
        
            if (n.IsTerminal && outWords.Count < Max)
                outWords.Add(n.Word);

            foreach (var item in n.Edges)
            
                GetChildWords(item.Value, ref outWords, Max);
            
        

    

【讨论】:

c.ToString().ToUpper().ToCharArray().First() 是我今天见过的最糟糕的事情。 char.ToUpperInvariant(c) 有什么问题?这是分配两个字符串、一个字符数组和一个枚举器与对两个字节进行一些数学运算之间的区别。 更好的是,您不必将任何内容转换为大写,只需执行return new Letter Index = Chars.IndexOf(c, StringComparison.OrdinalIgnoreCase) ;【参考方案9】:

简洁美观:

class Trie
    
        private readonly string _key;
        private string _value;
        private List<Trie> _path;
        private List<Trie> _children;
        public Trie(string key = "root", string value = "root_val")
        
            this._key = key;
            this._value = value;
            this._path = this._children = new List<Trie>();
        
        public void Initialize(Dictionary<string, string> nodes, int keyLength = 1)
        
            foreach (var node in nodes)
            
                this.Add(node, keyLength);
            
        
        public void Add(KeyValuePair<string, string> node, int keyLength = 1)
        
            if (this._children.Count == 0 || !this._children.Any(ch => (node.Key.StartsWith(ch._key)) || (ch._key == node.Key)))
            
                //For any item that could be a child of newly added item
                Predicate<Trie> possibleChildren = (Trie ch) =>  return ch._key.StartsWith(node.Key); ;

                var newChild = new Trie(node.Key, node.Value);
                newChild._children.AddRange(this._children.FindAll(possibleChildren));

                this._children.RemoveAll(possibleChildren);
                this._children.Add(newChild);
            
            else
            
                this._children.First(ch => (ch._key == node.Key) || (node.Key.Substring(0, keyLength) == ch._key)).Add(node, keyLength + 1);
            
        
        public void Delete(string key, bool recursively = true)
        
            var newChildren = new List<Trie>(this._children);
            foreach (var child in this._children)
            
                if (child._key == key)
                
                    if (!recursively)
                    
                        newChildren.AddRange(child._children);
                    
                    newChildren.Remove(child);
                
                else
                
                    child.Delete(key, recursively);
                
            

            this._children = newChildren;
        
        public List<Trie> Find(string key, int keyLength = 1)
        
            this._path = new List<Trie>();

            if (key.Length >= keyLength - 1 && this._key == key.Substring(0, keyLength - 1))
            
                this._path.Add(this);
            
            foreach (var child in this._children)
            
                var childPath = child.Find(key, keyLength + 1);
                this._path.AddRange(childPath);
            

            return this._path;
        
    

var items = new Dictionary<string, string>

     "a", "First level item" ,
     "b", "First level item",
     "ad", "Second level item",
     "bv", "Second level item",
     "adf", "Third level item",
     "adg", "Third level item",
     "bvc", "Third level item",
     "bvr", "Third level item"
;

var myTree = new Trie();
myTree.Initialize(items);

【讨论】:

以上是关于如何在 C# 中创建 trie [关闭]的主要内容,如果未能解决你的问题,请参考以下文章

如何在 Python 中创建一个 trie

如何在 C# 中创建 Word 文档? [关闭]

如何在Python中创建TRIE

如何在 uwp C# 中创建手电筒应用程序

如何在c#中创建数据模型Model

在 C# 中创建 Json 的问题 [关闭]