Trie圖（DFA），AC自動機

本文轉載自查看原文 2012-07-24 14:50 7756 字符串/ 算法筆記

Trie圖

先看一個問題：給一個很長很長的母串長度為n，然后給m個小的模式串。求這m個模式串里邊有多少個是母串的字串。

最先想到的是暴力O(n*m*len(m)) len(m)表示這m個模式串的平均長度。。。

顯然時間復雜度會很高。。。

再改進一些，用kmp讓每一模式串與母串進行匹配呢？時間復雜度為O((n + len(m))*m)，還算可以。

可是還有沒有更快的算法呢？

編譯原理里邊有一個很著名的思想：自動機。

這里就要用到確定性有限狀態自動機（DFA）。可以對這m個模式串建立一個DFA，然后讓母串在DFA上跑，遇到某個模式串的終結節點則表示這個模式串在母串上。

就像這個圖，母串“nano”在上邊跑就能到達終止節點。

上邊說的是自動機的概念。。。還有一個要用到的是trie樹，這個不解釋了，網上資料一大堆。

這里步入正題：Trie圖

trie圖是一種DFA，可以由trie樹為基礎構造出來，
對於插入的每個模式串，其插入過程中使用的最后一個節點都作為DFA的一個終止節點。
如果要求一個母串包含哪些模式串，以用母串作為DFA的輸入，在DFA 上行走，走到終止節點，就意味着匹配了相應的模式串。

ps: AC自動機是Trie的一種實現，也就是說AC自動機是構造Trie圖的DFA的一種方法。還有別的構造DFA的方法...

怎么建Trie圖？

可以回想一下，在kmp算法中是如何避免母串在匹配過程種指針回溯的？也就是說指針做不必要的前移，浪費時間。

同樣的，在trie圖中也定義這樣一個概念：前綴指針。

這個前綴指針，從根節點沿邊到節點p我們可以得到一個字符串S，節點p的前綴指針定義為：指向樹中出現過的S的最長的后綴。

構造前綴指針的步驟為：根據深度一一求出每一個節點的前綴指針。對於當前節點，設他的父節點與他的邊上的字符為Ch，如果他的父節點的前綴指針所指向的節點的兒子中，有通過Ch字符指向的兒子，那么當前節點的前綴指針指向該兒子節點，否則通過當前節點的父節點的前綴指針所指向點的前綴指針，繼續向上查找，直到到達根節點為止。

上圖構造出所有節點的前綴指針。

相信原來的問題到這里基本已經解決了。可以再考慮一下它的時間復雜度，設M個串的總長度為LEN

所以算法總的時間復雜度為O(LEN + n)。比較好的效率。

模板，HDU 2222：

/*

個人感覺這樣寫更清晰一點。（動態分配內存）

*/
class Node {
public:
    Node* fail;
    Node* next[26];
    int cnt;
    Node() {
        CL(next, 0);
        fail = NULL;
        cnt = 0;
    }
};

//Node* q[10000000];

class AC_automaton : public Node{
public:
    Node *root;
    int head, tail;

    void init() {
        root = new Node();
        head = tail = 0;
    }

    void insert(char* st) {
        Node* p = root;
        while(*st) {
            if(p->next[*st-'a'] == NULL) {
                p->next[*st-'a'] = new Node();
            }
            p = p->next[*st-'a'];
            st++;
        }
        p->cnt++;
    }

    void build() {
        root->fail = NULL;
        deque<Node* > q;
        q.push_back(root);

        while(!q.empty()) {
            Node* tmp = q.front();
            Node* p = NULL;
            q.pop_front();
            for(int i = 0; i < 26; ++i) {
                if(tmp->next[i] != NULL) {
                    if(tmp == root) tmp->next[i]->fail = root;
                    else {
                        p = tmp->fail;
                        while(p != NULL) {
                            if(p->next[i] != NULL) {
                                tmp->next[i]->fail = p->next[i];
                                break;
                            }
                            p = p->fail;
                        }
                        if(p == NULL)   tmp->next[i]->fail = root;
                    }
                    q.push_back(tmp->next[i]);
                }
            }
        }
    }

    int search(char* st) {
        int cnt = 0, t;
        Node* p = root;
        while(*st) {
            t = *st - 'a';
            while(p->next[t] == NULL && p != root) {
                p = p->fail;
            }
            p = p->next[t];
            if(p == NULL)   p = root;

            Node* tmp = p;
            while(tmp != root && tmp->cnt != -1) {
                cnt += tmp->cnt;
                tmp->cnt = -1;
                tmp = tmp->fail;
            }
            st++;
        }
        return cnt;
    }
}AC;

POJ 1204：

View Code

#include <iostream>
#include <cstdio>
#include <cmath>
#include <vector>
#include <cstring>
#include <algorithm>
#include <string>
#include <set>
#include <ctime>
#include <queue>
#include <map>
#include <sstream>

#define CL(arr, val)    memset(arr, val, sizeof(arr))
#define REP(i, n)       for((i) = 0; (i) < (n); ++(i))
#define FOR(i, l, h)    for((i) = (l); (i) <= (h); ++(i))
#define FORD(i, h, l)   for((i) = (h); (i) >= (l); --(i))
#define L(x)    (x) << 1
#define R(x)    (x) << 1 | 1
#define MID(l, r)   (l + r) >> 1
#define Min(x, y)   x < y ? x : y
#define Max(x, y)   x < y ? y : x
#define E(x)    (1 << (x))

const int eps = 1e-6;
const int inf = ~0u>>2;
typedef long long LL;

using namespace std;

const int N = 1024;
const int LET = 26;
int nNodesCount = 0;

struct CNode {
    CNode * ch[LET];
    CNode * pPre;
    vector<int> bstopNode;    //同一個節點可能會是多個串的終止節點。
    int num;
    CNode() {
        CL(ch, 0);
        bstopNode.clear();
        pPre = NULL;
    }
};

CNode T[100000];
char mp[N][N];
int r, c, m;
bool vis[N] = {false};


int dir[8][2] = {{-1, 0}, {-1, 1}, {0, 1}, {1, 1}, {1, 0}, {1, -1}, {0, -1}, {-1, -1}};

void insert(CNode* p, char* s, int x) {
    int i, l = strlen(s);
    for(i = l - 1; i >= 0; --i) {
        if(p->ch[s[i]-'A'] == NULL) {
            p->ch[s[i]-'A'] = T + nNodesCount++;
        }
        p = p->ch[s[i] - 'A'];
    }
    p->bstopNode.push_back(x);
}

void buildDFA() {
    int i;
    for(i = 0; i < LET; ++i) {
        T[0].ch[i] = T + 1;
    }
    T[0].pPre = NULL;
    T[1].pPre = T;

    deque<CNode *> q;    //....
    q.push_back(T + 1);

    while(!q.empty()) {
        CNode * proot = q.front();
        q.pop_front();
        for(i = 0; i < LET; ++i) {
            CNode* p = proot->ch[i];

            if(p) {
                CNode* father = proot->pPre;
                while(father) {
                    if(father->ch[i]) {
                        p->pPre = father->ch[i];

                        if(p->pPre->bstopNode.size() != 0) {
                            vector<int>::iterator it;
                            for(it = p->pPre->bstopNode.begin(); it != p->pPre->bstopNode.end(); ++it)    //合並終止節點
                                p->bstopNode.push_back(*it);
                        }

                        break;
                    } else
                        father = father->pPre;
                }
                q.push_back(p);
            }
        }
    }
}

bool inmap(int x, int y) {
    if(x < 0 || x >= r || y < 0 || y >= c)  return false;
    return true;
}

struct node {
    int x, y;
    char c;
    node() {}
    node(int a, int b, char d) : x(a), y(b), c(d) {}
} ans[10000];

bool search(int sx, int sy, int d) {
    CNode* p = T + 1;
    int x, y;
    for(x = sx, y = sy; inmap(x, y); x += dir[d][0], y += dir[d][1]) {
        while(true) {
            if(p->ch[mp[x][y] - 'A']) {
                p = p->ch[mp[x][y] - 'A'];
                if(p->bstopNode.size() != 0) {
                    //printf("%d %d %d\n", x, y, p->num);
                    vector<int>::iterator it;
                    for(it = p->bstopNode.begin(); it != p->bstopNode.end(); ++it)
                        if(!vis[*it]) {    //記錄多個終止節點
                            ans[*it] = node(x, y, (d + 4)%8 + 'A');
                            vis[*it] = true;
                        }
                    //return true;
                }
                break;
            } else  p = p->pPre;
        }
    }
    return false;
}

void solve() {
    buildDFA();
    int i;    //枚舉8整個矩陣的一圈，作為起點走八個方向。
    for(i = 0; i < r; ++i) {
        search(i, 0, 2);
        search(i, 0, 1);
        search(i, 0, 3);
        search(i, c - 1, 6);
        search(i, c - 1, 5);
        search(i, c - 1, 7);
    }
    for(i = 0; i < c; ++i) {
        search(0, i, 4);
        search(0, i, 5);
        search(0, i, 3);
        search(r - 1, i, 0);
        search(r - 1, i, 1);
        search(r - 1, i, 7);
    }
}

int main() {
    //freopen("data.in", "r", stdin);

    int i;
    scanf("%d%d%d", &r, &c, &m);
    for(i = 0; i < r; ++i) {
        scanf("%s", mp[i]);
    }
    char st[N];
    nNodesCount = 2;
    for(i = 0; i < m; ++i) {
        scanf("%s", st);
        insert(T + 1, st, i);
    }
    solve();
    for(i = 0; i < m; ++i) {
        printf("%d %d %c\n", ans[i].x, ans[i].y, ans[i].c);
    }
    return 0;
}

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 AC自動機學習筆記-2（Trie圖&&last優化） [知識點]Trie樹和AC自動機 AC自動機 AC自動機入門 AC自動機詳解 AC自動機講解 AC自動機總結【總結】AC自動機 AC自動機題目 (轉)兩種高效過濾敏感詞算法--DFA算法和AC自動機算法