[數據結構]字典樹(Tire樹)

本文轉載自查看原文 2015-10-21 17:01 1804 ALGORITHMS

概述：

Trie是個簡單但實用的數據結構，是一種樹形結構，是一種哈希樹的變種，相鄰節點間的邊代表一個字符，這樣樹的每條分支代表一則子串，而樹的葉節點則代表完整的字符串。和普通樹不同的地方是，相同的字符串前綴共享同一條分支。

例如：pool,prize,preview,prepare,produce,progress這些關鍵詞的Tire樹

Trie Example

典型應用是用於統計，排序和保存大量的字符串（但不僅限於字符串），所以經常被搜索引擎系統用於文本詞頻統計。

它的優點是：利用字符串的公共前綴來減少查詢時間，最大限度地減少無謂的字符串比較，查詢效率比哈希樹高。

基本性質：

根節點不包含字符，除根節點外每一個節點都只包含一個字符；
從根節點到某一節點，路徑上經過的字符連接起來，為該節點對應的字符串；
每個節點的所有子節點包含的字符都不相同。

應用場景：

字典數查找效率很高，時間復雜度是O(m)，m是要查找的單詞中包含的字母的個數，但是會浪費大量存放空指針的存儲空間，屬於以空間換時間的算法。

1、串快速檢索

給出N個單詞組成的熟詞表，以及一篇全用小寫英文書寫的文章，請你按最早出現的順序寫出所有不在熟詞表中的生詞。

2、單詞自動完成

編輯代碼時，輸入字符，自動提示可能的關鍵字、變量或函數等信息。

3、最長公共前綴

對所有串建立字典樹，對於兩個串的最長公共前綴的長度即他們所在的結點的公共祖先個數，於是，問題就轉化為最近公共祖先問題。

4、串排序方面的應用

給定N個互不相同的僅由一個單詞構成的英文名，讓你將他們按字典序從小到大輸出用字典樹進行排序，采用數組的方式創建字典樹，這棵樹的每個結點的所有兒子很顯然地按照其字母大小排序。對這棵樹進行先序遍歷即可。

程序代碼：

#include <gtest/gtest.h>
#include <list>
using namespace std;


class TrieTree
{
public:

    const static int  MAX_CHILD_KEY_COUNT = 30;
    const static char STRING_END_TAG = '\xFF';
    struct TrieNode
    {
        char nodeValue;
        int  nodeFreq;        
        list<TrieNode*> childNodes[MAX_CHILD_KEY_COUNT]; //為了避免這里數組太大，采用數組+鏈表方式

        TrieNode()
        {
            nodeValue = 0;
            nodeFreq = 0;            
        }
    };

    TrieTree();
    ~TrieTree();

public:
    void Insert(const string& strVal);
    void Delete(const string& strVal);
    int  Search(const string& strVal);    
    int  CommonPrefix(const string& strVal);

private:
    void Clean(TrieNode* rootNode);
    bool DeleteNode(TrieNode* rootNode, const string& strVal, int nOffset);
    
    TrieNode m_RootNode;
};

TrieTree::TrieTree()
{
};

TrieTree::~TrieTree()
{
    Clean(&m_RootNode);
};

void TrieTree::Insert(const string& strVal)
{
    if (strVal.empty())
    {
        return;
    }

    // 在字符串末尾添加一個特殊字符，以區分是前綴還是完整字符串
    string strValue(strVal);
    strValue += STRING_END_TAG;

    TrieNode* pCurrentNode = &m_RootNode;
    unsigned int nIndex = 0;
    unsigned int nLength = strValue.length();

    do
    {
        bool bExistVal = false;
        char cValue = strValue[nIndex];
        list<TrieNode*>& refListNode = pCurrentNode->childNodes[(unsigned char)cValue % MAX_CHILD_KEY_COUNT];
        if (refListNode.size())
        {
            list<TrieNode*>::iterator it = refListNode.begin();
            list<TrieNode*>::iterator itEnd = refListNode.end();
            for (; it != itEnd; ++it)
            {
                if (cValue == (*it)->nodeValue)
                {
                    (*it)->nodeFreq++;
                    bExistVal = true;
                    pCurrentNode = *it;

                    break;
                }
            }
        }

        // 當前不存在對應的字符，則新建一個
        if (!bExistVal)
        {
            TrieNode* pNewNode = new TrieNode();
            pNewNode->nodeFreq = 1;
            pNewNode->nodeValue = cValue;

            refListNode.push_back(pNewNode);            
            pCurrentNode = pNewNode;
        }

        ++nIndex;
    }
    while(nIndex < nLength);
}

void TrieTree::Delete(const string& strVal)
{    
    if (strVal.empty())
    {
        return;
    }

    string strValue(strVal);
    strValue += STRING_END_TAG;

    DeleteNode(&m_RootNode, strValue, 0);
}

int TrieTree::Search(const string& strVal)
{
    if (strVal.empty())
    {
        return 0;
    }

    string strValue(strVal);
    strValue += STRING_END_TAG;

    return CommonPrefix(strValue);
}

int TrieTree::CommonPrefix(const string& strVal)
{
    if (strVal.empty())
    {
        return 0;
    }

    TrieNode* pCurrentNode = &m_RootNode;
    unsigned int nIndex = 0;
    unsigned int nLength = strVal.length();
    int nFreq = 0;

    do
    {
        bool bExistVal = false;
        char cValue = strVal[nIndex];
        list<TrieNode*>& refListNode = pCurrentNode->childNodes[(unsigned char)cValue % MAX_CHILD_KEY_COUNT];
        if (refListNode.size())
        {
            list<TrieNode*>::iterator it = refListNode.begin();
            list<TrieNode*>::iterator itEnd = refListNode.end();
            for (; it != itEnd; ++it)
            {
                if (cValue == (*it)->nodeValue)
                {
                    nFreq = (*it)->nodeFreq;
                    bExistVal = true;
                    pCurrentNode = *it;
                    break;
                }
            }
        }

        // 當前不存在對應的字符，則沒有找到
        if (!bExistVal)
        {
            nFreq = 0;
            break;
        }

        ++nIndex;
    }
    while(nIndex < nLength);

    return nFreq;
}

void TrieTree::Clean(TrieNode* rootNode)
{
    if (!rootNode)
    {
        return;
    }

    for (int i=0; i<MAX_CHILD_KEY_COUNT; ++i)
    {
        list<TrieNode*>& refListNode = rootNode->childNodes[i];
        if (refListNode.size())
        {
            list<TrieNode*>::iterator it = refListNode.begin();
            list<TrieNode*>::iterator itEnd = refListNode.end();
            for (; it != itEnd; ++it)
            {
                Clean(*it);
                delete *it;
            }

            refListNode.clear();
        }
    }
}

bool TrieTree::DeleteNode(TrieNode* rootNode, const string& strVal, int nOffset)
{
    if (!rootNode)
    {
        return false;
    }

    bool bDelChild = false;
    char cValue = strVal[nOffset];
    list<TrieNode*>& refListNode = rootNode->childNodes[(unsigned char)cValue % MAX_CHILD_KEY_COUNT];
    if (refListNode.size())
    {
        list<TrieNode*>::iterator it = refListNode.begin();
        list<TrieNode*>::iterator itEnd = refListNode.end();
        for (; it != itEnd; ++it)
        {
            if ((*it)->nodeValue == cValue)
            {        
                bDelChild = true;
                // 字符串沒有結束，刪除下一個節點
                if (++nOffset < (int)strVal.length())
                {
                    bDelChild = DeleteNode(*it, strVal, nOffset);
                }
                
                // 該節點次數為0，說明已經沒有子節點了，移除該節點
                if (bDelChild && 
                    (0 == (--(*it)->nodeFreq)))
                {
                    delete *it;
                    refListNode.erase(it);                    
                }

                break;
            }            
        }
    }

    return bDelChild;
}

TEST(Structure, tTireTree)
{
    // "abc","ab","bd","dda"
    TrieTree tree;
    tree.Insert("abc");
    tree.Insert("ab");
    tree.Insert("bd");
    tree.Insert("dda");

    ASSERT_EQ(tree.Search("ab"), 1);
    ASSERT_EQ(tree.CommonPrefix("ab"), 2);
    tree.Delete("ab");
    ASSERT_EQ(tree.Search("ab"), 0);
    ASSERT_EQ(tree.CommonPrefix("ab"), 1);
    tree.Delete("abcd");
    ASSERT_EQ(tree.Search("ab"), 0);
    ASSERT_EQ(tree.Search("d"), 0);
    ASSERT_EQ(tree.CommonPrefix("d"), 1);
    ASSERT_EQ(tree.Search("fg"), 0);
    tree.Delete("fg");
}

參考引用：

百度百科“字典樹”

http://www.cnblogs.com/huangxincheng/archive/2012/11/25/2788268.html

  看書、學習、寫代碼

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 數據結構Tire 樹實際應用----過濾禁詞數據結構~trie樹（字典樹）數據結構：字典樹 (Trie) 數據結構：樹 Tire樹 Tire樹數據結構-樹【數據結構】樹數據結構-樹數據結構——樹