SPOJ705 Distinct Substrings (后缀自动机&后缀数组)

Posted ---学习ing---

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了SPOJ705 Distinct Substrings (后缀自动机&后缀数组)相关的知识,希望对你有一定的参考价值。

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA: 
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题意:

求出大写的字符串里不同的子串。默写了一遍后缀自动机。今天主要是练习后缀数组。

 注意:

  • 注意是大写还是小写;
  • 注意init初始化的时候没有一次性memset,所以下面要把每个新出现的点memset。不要搞忘。

后缀自动机:

技术分享图片
#include<cstdio>
#include<cstdlib>
#include<iostream>
#include<cstring>
#include<algorithm>
using namespace std;
const int maxn=10000;
struct SAM
{
    int ch[maxn][26],fa[maxn],maxlen[maxn],Last,sz;
    void init()
    {
        sz=Last=1;    fa[1]=maxlen[1]=0;
        memset(ch[1],0,sizeof(ch[1]));
    }
    void add(int x)
    {
        int np=++sz,p=Last;Last=np;
        memset(ch[np],0,sizeof(ch[np]));
        maxlen[np]=maxlen[p]+1;
        while(p&&!ch[p][x]) ch[p][x]=np,p=fa[p];
        if(!p) fa[np]=1;
        else {
            int q=ch[p][x];
            if(maxlen[p]+1==maxlen[q]) fa[np]=q;
            else {
                int nq=++sz;
                memcpy(ch[nq],ch[q],sizeof(ch[q]));
                maxlen[nq]=maxlen[p]+1;
                fa[nq]=fa[q];
                fa[q]=fa[np]=nq;
                while(p&&ch[p][x]==q) ch[p][x]=nq,p=fa[p];
            }
        }
    }
};
SAM Sam;
int main()
{
    char chr[maxn];
    int T,ans,i,L;
    scanf("%d",&T);
    while(T--){
        Sam.init();ans=0;
        scanf("%s",chr);
        L=strlen(chr);
        for(i=0;i<L;i++) Sam.add(chr[i]-A);
        for(i=1;i<=Sam.sz;i++) ans+=Sam.maxlen[i]-Sam.maxlen[Sam.fa[i]];
        printf("%d\n",ans);
    }
    return 0;
} 
View Code

后缀数组:

 

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<iostream>
#include<algorithm>
using namespace std;
const int maxn=10000;
char ch[maxn];
int L;
struct SA
{
    int cntA[maxn],cntB[maxn],A[maxn],B[maxn];
    int rank[maxn],sa[maxn],tsa[maxn],ht[maxn];void sort()
    {
         for (int i = 0; i < 256; i ++) cntA[i] = 0;
         for (int i = 1; i <= L; i ++) cntA[ch[i]] ++;
         for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
         for (int i = L; i; i --) sa[cntA[ch[i]] --] = i;
         rank[sa[1]] = 1;
         for (int i = 2; i <= L; i ++){
              rank[sa[i]] = rank[sa[i - 1]];
              if (ch[sa[i]] != ch[sa[i - 1]]) rank[sa[i]] ++;
         }
         for (int l = 1; rank[sa[L]] < L; l <<= 1){
              for (int i = 0; i <= L; i ++) cntA[i] = 0;
              for (int i = 0; i <= L; i ++) cntB[i] = 0;
              for ( int i = 1; i <= L; i ++){
                  cntA[A[i] = rank[i]] ++; 
                  cntB[B[i] = (i + l <= L) ? rank[i + l] : 0] ++;
              }
              for (int i = 1; i <= L; i ++) cntB[i] += cntB[i - 1];
              for (int i = L; i; i --) tsa[cntB[B[i]] --] = i;
              for (int i = 1; i <= L; i ++) cntA[i] += cntA[i - 1];
              for (int i = L; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
              rank[sa[1]] = 1;
              for (int i = 2; i <= L; i ++){
                   rank[sa[i]] = rank[sa[i - 1]];
                   if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rank[sa[i]] ++;
              }
         }
    }
    void getheight()
    {
         for (int i = 1, j = 0; i <= L; i ++){
              if (j) j --;
              while (ch[i + j] == ch[sa[rank[i] - 1] + j]) j ++;
              ht[rank[i]] = j;
        }
    }
};
SA Sa;
int main()
{
    int T,ans,i;
    scanf("%d",&T);
    while(T--){
        ans=0;
        scanf("%s",ch+1);
        L=strlen(ch+1);
        Sa.sort();
        Sa.getheight();
        for(i=1;i<=L;i++) ans+=L-Sa.sa[i]+1-Sa.ht[i];
        printf("%d\n",ans);
    }
    return 0;
} 

 

以上是关于SPOJ705 Distinct Substrings (后缀自动机&后缀数组)的主要内容,如果未能解决你的问题,请参考以下文章

SPOJ705 Distinct Substrings (后缀自动机&后缀数组)

SPOJ694&705Distinct Substrings(后缀数组)

SPOJ 694 || 705 Distinct Substrings ( 后缀数组 && 不同子串的个数 )

[SPOJ705]不同的子串

Spoj-DISUBSTR - Distinct Substrings~New Distinct Substrings SPOJ - SUBST1~(后缀数组求解子串个数)

Spoj 694 Distinct Substrings