后缀自动机
Posted mrzdtz220
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了后缀自动机相关的知识,希望对你有一定的参考价值。
记录一下这几天刷的后缀自动机的题目
Glass Beads UVA - 719
求 (S) 循环同构的最小表示
将 (S) copy 成 (SS) 后建出后缀自动机,按字典序走 (n) 步即可
代码
const int N = 4e4 + 7;
char s[N];
namespace SAM {
int ch[N][26], len[N], link[N];
int last, tol, n;
void init() {
len[tol = last = 0] = 0; link[0] = -1;
rep (i, 0, 26) ch[0][i] = 0;
tol++;
}
int newnode() {
int p = tol++;
rep (i, 0, 26) ch[p][i] = 0;
len[p] = link[p] = 0;
return p;
}
void extend(int c) {
int cur = newnode();
len[cur] = len[last] + 1;
int p = last;
while (p != -1 && !ch[p][c]) {
ch[p][c] = cur;
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = ch[p][c];
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newnode();
len[clone] = len[p] + 1;
memcpy(ch[clone], ch[q], sizeof(ch[q]));
link[clone] = link[q];
while (p != -1 && ch[p][c] == q) {
ch[p][c] = clone;
p = link[p];
}
link[q] = link[cur] = clone;
}
}
last = cur;
}
int dfs(int u, int l) {
if (l == n) return len[u];
rep (i, 0, 26) if (ch[u][i]) return dfs(ch[u][i], l + 1);
}
void solve() {
scanf("%s", s);
n = strlen(s);
init();
rep (l, 0, 2) rep (i, 0, n) extend(s[i] - 'a');
printf("%d
", dfs(0, 0) - n + 1);
}
}
int main() {
#ifdef LOCAL
freopen("ans.out", "w", stdout);
#endif
int T;
scanf("%d", &T);
while (T--) SAM::solve();
#ifdef LOCAL
printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
return 0;
}
Longest Common Substring SPOJ - LCS
求两个串的最长公共子串
对第一个串建后缀自动机,第二个串在自动机上匹配,若匹配上了则长度加一,并且转移到下一个状态,匹配失败则走fail节点,更新 (len),每次都更新一下答案即可
代码
const int N = 5e5 + 7;
namespace SAM {
int ch[N][26], len[N], link[N], sz[N];
int last, tol;
void init() {
len[tol = last = 0] = 0; link[0] = -1;
rep (i, 0, 26) ch[0][i] = 0;
tol++;
}
int newnode() {
int p = tol++;
rep (i, 0, 26) ch[p][0] = 0;
len[p] = link[p] = sz[p] = 0;
return p;
}
void extend(int c) {
int cur = newnode();
len[cur] = len[last] + 1;
sz[cur] = 1;
int p = last;
while (p != -1 && !ch[p][c]) {
ch[p][c] = cur;
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = ch[p][c];
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newnode();
len[clone] = len[p] + 1;
memcpy(ch[clone], ch[q], sizeof(ch[q]));
link[clone] = link[q];
while (p != -1 && ch[p][c] == q) {
ch[p][c] = clone;
p = link[p];
}
link[q] = link[cur] = clone;
}
}
last = cur;
}
void solve(char *s) {
int n = strlen(s);
int cur = 0;
int ans = 0, l = 0;
rep (i, 0, n) {
int d = s[i] - 'a';
if (ch[cur][d]) {
cur = ch[cur][d];
l++;
} else {
while (cur != -1 && !ch[cur][d]) cur = link[cur];
if (cur == -1) cur = l = 0;
else l = len[cur] + 1, cur = ch[cur][d];
}
chkmax(ans, l);
}
printf("%d
", ans);
}
}
char s[N], t[N];
int main() {
#ifdef LOCAL
freopen("ans.out", "w", stdout);
#endif
scanf("%s%s", s, t);
int n = strlen(s);
SAM::init();
rep (i, 0, n) SAM::extend(s[i] - 'a');
SAM::solve(t);
#ifdef LOCAL
printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
return 0;
}
Substrings SPOJ - NSUBSTR
求每个长度right集合最大的大小
right集合大小通过fail树进行dfs或拓扑序求出,然后用right集合大小更新状态里最长串
最后从后往前依次用 (f_{i+1}) 更新 (f_i) 即可
代码
const int N = 5e5 + 7, E = 5e5 + 7;
int f[N];
namespace SAM {
int ch[N][26], len[N], link[N], cnt[N];
int last, tol;
void init() {
len[tol = last = 0] = 0; link[0] = -1;
rep (i, 0, 26) ch[0][i] = 0;
tol++;
}
int newnode() {
int p = tol++;
rep (i, 0, 26) ch[p][0] = 0;
len[p] = link[p] = cnt[p] = 0;
return p;
}
void extend(int c) {
int cur = newnode();
len[cur] = len[last] + 1;
cnt[cur] = 1;
int p = last;
while (p != -1 && !ch[p][c]) {
ch[p][c] = cur;
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = ch[p][c];
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newnode();
len[clone] = len[p] + 1;
memcpy(ch[clone], ch[q], sizeof(ch[q]));
link[clone] = link[q];
while (p != -1 && ch[p][c] == q) {
ch[p][c] = clone;
p = link[p];
}
link[q] = link[cur] = clone;
}
}
last = cur;
}
Edg
void dfs(int u) {
es (u, i, v) dfs(v), cnt[u] += cnt[v];
chkmax(f[len[u]], cnt[u]);
}
void count() {
rep (i, 1, tol) addd(link[i], i);
dfs(0);
}
}
char s[N];
int n;
int main() {
#ifdef LOCAL
freopen("ans.out", "w", stdout);
#endif
SAM::init();
scanf("%s", s);
n = strlen(s);
rep (i, 0, n) SAM::extend(s[i] - 'a');
SAM::count();
per (i, 1, n) chkmax(f[i], f[i + 1]);
rep (i, 1, n + 1) printf("%d
", f[i]);
#ifdef LOCAL
printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
return 0;
}
Longest Common Substring II SPOJ - LCS2
求多个串之间的最长公共子串
首先对第一个串建后缀自动机,然后对后面每一个串进行匹配,保存在每个状态上能匹配长度的最大值,再用拓扑序更新fail能匹配的最大值(因为当前节点能匹配的长度是这个状态的一个后缀,fail系欸但也是该状态的后缀,那么当前状态能匹配上的长度fail也能匹配上)
然后分别取min即可(因为要满足每个串都能匹配上,那么是最短的限制了它们能匹配的最长长度)
代码
const int N = 5e5 + 7;
int ch[N][26], len[N], link[N], cnt[N];
int last, tol;
void init() {
len[tol = last = 0] = 0; link[0] = -1;
rep (i, 0, 26) ch[0][i] = 0;
tol++;
}
int newnode() {
int p = tol++;
rep (i, 0, 26) ch[p][0] = 0;
len[p] = link[p] = cnt[p] = 0;
return p;
}
void extend(int c) {
int cur = newnode();
len[cur] = len[last] + 1;
cnt[cur] = 1;
int p = last;
while (p != -1 && !ch[p][c]) {
ch[p][c] = cur;
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = ch[p][c];
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newnode();
len[clone] = len[p] + 1;
memcpy(ch[clone], ch[q], sizeof(ch[q]));
link[clone] = link[q];
while (p != -1 && ch[p][c] == q) {
ch[p][c] = clone;
p = link[p];
}
link[q] = link[cur] = clone;
}
}
last = cur;
}
int c[N], a[N];
int mx[N], ans[N];
char s[N];
int main() {
#ifdef LOCAL
freopen("ans.out", "w", stdout);
#endif
memset(ans, 0x3f, sizeof(ans));
init();
scanf("%s", s);
for (int i = 0; s[i]; i++) extend(s[i] - 'a');
rep (i, 0, tol) c[len[i]]++;
rep (i, 1, tol) c[i] += c[i - 1];
rep (i, 0, tol) a[--c[len[i]]] = i;
while (~scanf("%s", s)) {
int l = 0, p = 0;
for (int i = 0; s[i]; i++) {
int d = s[i] - 'a';
while (~p && !ch[p][d]) p = link[p], l = len[p];
if (p == -1) p = l = 0;
else ++l, p = ch[p][d], chkmax(mx[p], l);
}
per (i, 0, tol) {
int p = a[i];
if (~link[p]) chkmax(mx[link[p]], std::min(len[link[p]], mx[p]));
chkmin(ans[p], mx[p]);
mx[p] = 0;
}
}
int res = 0;
rep (i, 0, tol) chkmax(res, ans[i]);
printf("%d
", res);
#ifdef LOCAL
printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
return 0;
}
Lexicographical Substring Search SPOJ - SUBLEX
求出字典序第 (k) 小的子串
每个状态的right集合为 (1),再求出每个状态往 (trans) 边走的路径数,通过路径数类似于主席树求第 (k) 大的方法dfs即可
代码
const int N = 2e5 + 7;
int ch[N][26], len[N], link[N], cnt[N], f[N];
int last, tol;
void init() {
len[tol = last = 0] = 0; link[0] = -1;
rep (i, 0, 26) ch[0][i] = 0;
tol++;
}
int newnode() {
int p = tol++;
rep (i, 0, 26) ch[p][0] = 0;
len[p] = link[p] = cnt[p] = 0;
return p;
}
void extend(int c) {
int cur = newnode();
len[cur] = len[last] + 1;
cnt[cur] = 1;
int p = last;
while (p != -1 && !ch[p][c]) {
ch[p][c] = cur;
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = ch[p][c];
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newnode();
len[clone] = len[p] + 1;
memcpy(ch[clone], ch[q], sizeof(ch[q]));
link[clone] = link[q];
while (p != -1 && ch[p][c] == q) {
ch[p][c] = clone;
p = link[p];
}
link[q] = link[cur] = clone;
}
}
last = cur;
}
void dfs(int u, int x) {
if (!x) return;
rep (i, 0, 26) {
int ne = ch[u][i];
if (ne <= 0) continue;
if (f[ne] >= x) {
putchar('a' + i);
dfs(ne, x - 1);
return;
}
x -= f[ne];
}
}
char s[N];
int a[N], c[N];
int main() {
#ifdef LOCAL
freopen("ans.out", "w", stdout);
#endif
init();
scanf("%s", s);
for (int i = 0; s[i]; i++) extend(s[i] - 'a');
rep (i, 0, tol) c[len[i]]++;
rep (i, 1, tol) c[i] += c[i - 1];
rep (i, 0, tol) a[--c[len[i]]] = i;
per (i, 0, tol) {
int p = a[i];
f[p] = 1;
rep (j, 0, 26) if (ch[p][j]) f[p] += f[ch[p][j]];
}
int T;
scanf("%d", &T);
while (T--) {
int x;
scanf("%d", &x);
dfs(0, x);
puts("");
}
#ifdef LOCAL
printf("%.10f
", (db)clock() / CLOCKS_PER_SEC);
#endif
return 0;
}
以上是关于后缀自动机的主要内容,如果未能解决你的问题,请参考以下文章
我的Android进阶之旅关于Android平台获取文件的mime类型:为啥不传小写后缀名就获取不到mimeType?为啥android 4.4系统获取不到webp格式的mimeType呢?(代码片段
我的Android进阶之旅关于Android平台获取文件的mime类型:为啥不传小写后缀名就获取不到mimeType?为啥android 4.4系统获取不到webp格式的mimeType呢?(代码片段