后缀自动机

Posted mrzdtz220

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了后缀自动机相关的知识,希望对你有一定的参考价值。

记录一下这几天刷的后缀自动机的题目

Glass Beads UVA - 719
(S) 循环同构的最小表示
(S) copy 成 (SS) 后建出后缀自动机,按字典序走 (n) 步即可


代码

const int N = 4e4 + 7;
char s[N];

namespace SAM {
    int ch[N][26], len[N], link[N];
    int last, tol, n;
    void init() {
        len[tol = last = 0] = 0; link[0] = -1;
        rep (i, 0, 26) ch[0][i] = 0;
        tol++;
    }
    int newnode() {
        int p = tol++;
        rep (i, 0, 26) ch[p][i] = 0;
        len[p] = link[p] = 0;
        return p;
    }
    void extend(int c) {
        int cur = newnode();
        len[cur] = len[last] + 1;
        int p = last;
        while (p != -1 && !ch[p][c]) {
            ch[p][c] = cur;
            p = link[p];
        }
        if (p == -1) {
            link[cur] = 0;
        } else {
            int q = ch[p][c];
            if (len[p] + 1 == len[q]) {
                link[cur] = q;
            } else {
                int clone = newnode();
                len[clone] = len[p] + 1;
                memcpy(ch[clone], ch[q], sizeof(ch[q]));
                link[clone] = link[q];
                while (p != -1 && ch[p][c] == q) {
                    ch[p][c] = clone;
                    p = link[p];
                }
                link[q] = link[cur] = clone;
            }
        }
        last = cur;
    }
    int dfs(int u, int l) {
        if (l == n) return len[u];
        rep (i, 0, 26) if (ch[u][i]) return dfs(ch[u][i], l + 1);
    }
    void solve() {
        scanf("%s", s);
        n = strlen(s);
        init();
        rep (l, 0, 2) rep (i, 0, n) extend(s[i] - 'a');
        printf("%d
", dfs(0, 0) - n + 1);
    }
}

int main() {
#ifdef LOCAL
    freopen("ans.out", "w", stdout);
#endif
    int T;
    scanf("%d", &T);
    while (T--) SAM::solve();
#ifdef LOCAL
    printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
    return 0;
}

Longest Common Substring SPOJ - LCS
求两个串的最长公共子串
对第一个串建后缀自动机,第二个串在自动机上匹配,若匹配上了则长度加一,并且转移到下一个状态,匹配失败则走fail节点,更新 (len),每次都更新一下答案即可


代码

const int N = 5e5 + 7;

namespace SAM {
    int ch[N][26], len[N], link[N], sz[N];
    int last, tol;
    void init() {
        len[tol = last = 0] = 0; link[0] = -1;
        rep (i, 0, 26) ch[0][i] = 0;
        tol++;
    }
    int newnode() {
        int p = tol++;
        rep (i, 0, 26) ch[p][0] = 0;
        len[p] = link[p] = sz[p] = 0;
        return p;
    }
    void extend(int c) {
        int cur = newnode();
        len[cur] = len[last] + 1;
        sz[cur] = 1;
        int p = last;
        while (p != -1 && !ch[p][c]) {
            ch[p][c] = cur;
            p = link[p];
        }
        if (p == -1) {
            link[cur] = 0;
        } else {
            int q = ch[p][c];
            if (len[p] + 1 == len[q]) {
                link[cur] = q;
            } else {
                int clone = newnode();
                len[clone] = len[p] + 1;
                memcpy(ch[clone], ch[q], sizeof(ch[q]));
                link[clone] = link[q];
                while (p != -1 && ch[p][c] == q) {
                    ch[p][c] = clone;
                    p = link[p];
                }
                link[q] = link[cur] = clone;
            }
        }
        last = cur;
    }
    void solve(char *s) {
        int n = strlen(s);
        int cur = 0;
        int ans = 0, l = 0;
        rep (i, 0, n) {
            int d = s[i] - 'a';
            if (ch[cur][d]) {
                cur = ch[cur][d];
                l++;
            } else {
                while (cur != -1 && !ch[cur][d]) cur = link[cur];
                if (cur == -1) cur = l = 0;
                else l = len[cur] + 1, cur = ch[cur][d];
            }
            chkmax(ans, l);
        }
        printf("%d
", ans);
    }
}

char s[N], t[N];

int main() {
#ifdef LOCAL
    freopen("ans.out", "w", stdout);
#endif
    scanf("%s%s", s, t);
    int n = strlen(s);
    SAM::init();
    rep (i, 0, n) SAM::extend(s[i] - 'a');
    SAM::solve(t);
#ifdef LOCAL
    printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
    return 0;
}

Substrings SPOJ - NSUBSTR
求每个长度right集合最大的大小
right集合大小通过fail树进行dfs或拓扑序求出,然后用right集合大小更新状态里最长串
最后从后往前依次用 (f_{i+1}) 更新 (f_i) 即可


代码

const int N = 5e5 + 7, E = 5e5 + 7;
int f[N];
namespace SAM {
    int ch[N][26], len[N], link[N], cnt[N];
    int last, tol;
    void init() {
        len[tol = last = 0] = 0; link[0] = -1;
        rep (i, 0, 26) ch[0][i] = 0;
        tol++;
    }
    int newnode() {
        int p = tol++;
        rep (i, 0, 26) ch[p][0] = 0;
        len[p] = link[p] = cnt[p] = 0;
        return p;
    }
    void extend(int c) {
        int cur = newnode();
        len[cur] = len[last] + 1;
        cnt[cur] = 1;
        int p = last;
        while (p != -1 && !ch[p][c]) {
            ch[p][c] = cur;
            p = link[p];
        }
        if (p == -1) {
            link[cur] = 0;
        } else {
            int q = ch[p][c];
            if (len[p] + 1 == len[q]) {
                link[cur] = q;
            } else {
                int clone = newnode();
                len[clone] = len[p] + 1;
                memcpy(ch[clone], ch[q], sizeof(ch[q]));
                link[clone] = link[q];
                while (p != -1 && ch[p][c] == q) {
                    ch[p][c] = clone;
                    p = link[p];
                }
                link[q] = link[cur] = clone;
            }
        }
        last = cur;
    }
    Edg
    void dfs(int u) {
        es (u, i, v) dfs(v), cnt[u] += cnt[v];
        chkmax(f[len[u]], cnt[u]);
    }
    void count() {
        rep (i, 1, tol) addd(link[i], i);
        dfs(0);
    }
}

char s[N];
int n;

int main() {
#ifdef LOCAL
    freopen("ans.out", "w", stdout);
#endif
    SAM::init();
    scanf("%s", s);
    n = strlen(s);
    rep (i, 0, n) SAM::extend(s[i] - 'a');
    SAM::count();
    per (i, 1, n) chkmax(f[i], f[i + 1]);
    rep (i, 1, n + 1) printf("%d
", f[i]);
#ifdef LOCAL
    printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
    return 0;
}

Longest Common Substring II SPOJ - LCS2
求多个串之间的最长公共子串
首先对第一个串建后缀自动机,然后对后面每一个串进行匹配,保存在每个状态上能匹配长度的最大值,再用拓扑序更新fail能匹配的最大值(因为当前节点能匹配的长度是这个状态的一个后缀,fail系欸但也是该状态的后缀,那么当前状态能匹配上的长度fail也能匹配上)
然后分别取min即可(因为要满足每个串都能匹配上,那么是最短的限制了它们能匹配的最长长度)


代码

const int N = 5e5 + 7;
int ch[N][26], len[N], link[N], cnt[N];
int last, tol;
void init() {
    len[tol = last = 0] = 0; link[0] = -1;
    rep (i, 0, 26) ch[0][i] = 0;
    tol++;
}
int newnode() {
    int p = tol++;
    rep (i, 0, 26) ch[p][0] = 0;
    len[p] = link[p] = cnt[p] = 0;
    return p;
}
void extend(int c) {
    int cur = newnode();
    len[cur] = len[last] + 1;
    cnt[cur] = 1;
    int p = last;
    while (p != -1 && !ch[p][c]) {
        ch[p][c] = cur;
        p = link[p];
    }
    if (p == -1) {
        link[cur] = 0;
    } else {
        int q = ch[p][c];
        if (len[p] + 1 == len[q]) {
            link[cur] = q;
        } else {
            int clone = newnode();
            len[clone] = len[p] + 1;
            memcpy(ch[clone], ch[q], sizeof(ch[q]));
            link[clone] = link[q];
            while (p != -1 && ch[p][c] == q) {
                ch[p][c] = clone;
                p = link[p];
            }
            link[q] = link[cur] = clone;
        }
    }
    last = cur;
}
int c[N], a[N];
int mx[N], ans[N];
char s[N];

int main() {
#ifdef LOCAL
    freopen("ans.out", "w", stdout);
#endif
    memset(ans, 0x3f, sizeof(ans));
    init();
    scanf("%s", s);
    for (int i = 0; s[i]; i++) extend(s[i] - 'a');
    rep (i, 0, tol) c[len[i]]++;
    rep (i, 1, tol) c[i] += c[i - 1];
    rep (i, 0, tol) a[--c[len[i]]] = i;
    while (~scanf("%s", s)) {
        int l = 0, p = 0;
        for (int i = 0; s[i]; i++) {
            int d = s[i] - 'a';
            while (~p && !ch[p][d]) p = link[p], l = len[p];
            if (p == -1) p = l = 0;
            else ++l, p = ch[p][d], chkmax(mx[p], l);
        }
        per (i, 0, tol) {
            int p = a[i];
            if (~link[p]) chkmax(mx[link[p]], std::min(len[link[p]], mx[p]));
            chkmin(ans[p], mx[p]);
            mx[p] = 0;
        }
    }
    int res = 0;
    rep (i, 0, tol) chkmax(res, ans[i]);
    printf("%d
", res);
#ifdef LOCAL
    printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
    return 0;
}

Lexicographical Substring Search SPOJ - SUBLEX
求出字典序第 (k) 小的子串
每个状态的right集合为 (1),再求出每个状态往 (trans) 边走的路径数,通过路径数类似于主席树求第 (k) 大的方法dfs即可


代码

const int N = 2e5 + 7;
int ch[N][26], len[N], link[N], cnt[N], f[N];
int last, tol;
void init() {
    len[tol = last = 0] = 0; link[0] = -1;
    rep (i, 0, 26) ch[0][i] = 0;
    tol++;
}
int newnode() {
    int p = tol++;
    rep (i, 0, 26) ch[p][0] = 0;
    len[p] = link[p] = cnt[p] = 0;
    return p;
}
void extend(int c) {
    int cur = newnode();
    len[cur] = len[last] + 1;
    cnt[cur] = 1;
    int p = last;
    while (p != -1 && !ch[p][c]) {
        ch[p][c] = cur;
        p = link[p];
    }
    if (p == -1) {
        link[cur] = 0;
    } else {
        int q = ch[p][c];
        if (len[p] + 1 == len[q]) {
            link[cur] = q;
        } else {
            int clone = newnode();
            len[clone] = len[p] + 1;
            memcpy(ch[clone], ch[q], sizeof(ch[q]));
            link[clone] = link[q];
            while (p != -1 && ch[p][c] == q) {
                ch[p][c] = clone;
                p = link[p];
            }
            link[q] = link[cur] = clone;
        }
    }
    last = cur;
}

void dfs(int u, int x) {
    if (!x) return;
    rep (i, 0, 26) {
        int ne = ch[u][i];
        if (ne <= 0) continue;
        if (f[ne] >= x) {
            putchar('a' + i);
            dfs(ne, x - 1);
            return;
        }
        x -= f[ne];
    }
}
char s[N];
int a[N], c[N];

int main() {
#ifdef LOCAL
    freopen("ans.out", "w", stdout);
#endif
    init();
    scanf("%s", s);
    for (int i = 0; s[i]; i++) extend(s[i] - 'a');
    rep (i, 0, tol) c[len[i]]++;
    rep (i, 1, tol) c[i] += c[i - 1];
    rep (i, 0, tol) a[--c[len[i]]] = i;
    per (i, 0, tol) {
        int p = a[i];
        f[p] = 1;
        rep (j, 0, 26) if (ch[p][j]) f[p] += f[ch[p][j]];
    }
    int T;
    scanf("%d", &T);
    while (T--) {
        int x;
        scanf("%d", &x);
        dfs(0, x);
        puts("");
    }
#ifdef LOCAL
    printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
    return 0;
}

以上是关于后缀自动机的主要内容,如果未能解决你的问题,请参考以下文章

JSP是什么?

我的Android进阶之旅关于Android平台获取文件的mime类型:为啥不传小写后缀名就获取不到mimeType?为啥android 4.4系统获取不到webp格式的mimeType呢?(代码片段

我的Android进阶之旅关于Android平台获取文件的mime类型:为啥不传小写后缀名就获取不到mimeType?为啥android 4.4系统获取不到webp格式的mimeType呢?(代码片段

HDU4057 Rescue the Rabbit(AC自动机+状压DP)

Sublime Text3自定义代码片段

后缀自动机多图详解(代码实现)