POJ 3294 UVA 11107 Life Forms 后缀数组

Posted 逐雪

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了POJ 3294 UVA 11107 Life Forms 后缀数组相关的知识,希望对你有一定的参考价值。

相同的题目,输出格式有区别。

给定n个字符串,求最长的子串,使得它同时出现在一半以上的串中。

不熟悉后缀数组的童鞋建议先去看一看如何用后缀数组计算两个字符串的最长公共子串 Ural1517

这道题的思路也是基本相同的,都是利用了后缀数组的良好性质。

#include <iostream>  
#include <cstring>  
#include <cstdio>  
using namespace std;  
  
const int MAX = 100500;  
const int nMAX = 105;  
const int mMAX = 1005;  
  
int strnum;  
char str[nMAX][mMAX];  
int source[MAX];  
int sa[MAX], rk[MAX], height[MAX];  
int wa[MAX], wb[MAX], wv[MAX], wd[MAX];  
bool vis[nMAX];  
int id[MAX];  
int anslen, anspos[mMAX], ansnum;  
  const int MAXN=200000+100;
void radix(int *str,int *a,int *b,int n,int m)
{
 static int count[MAXN];
 memset(count,0,sizeof(count));
 for(int i=0;i<n;++i)++count[str[a[i]]];
 for(int i=1;i<=m;++i)count[i]+=count[i-1];
 for(int i=n-1;i>=0;--i)b[--count[str[a[i]]]]=a[i];
}

void sorted_suffix_array(int *str,int *sa,int n,int m)
{
 static int rank[MAXN],a[MAXN],b[MAXN];
 for(int i=0;i<n;++i)rank[i]=i;
 radix(str,rank,sa,n,m);
 
 rank[sa[0]]=0;
 for(int i=1;i<n;++i)rank[sa[i]]=rank[sa[i-1]]+(str[sa[i]]!=str[sa[i-1]]);
 for(int i=0;(1<<i) <n;++i)
 	{
 	 for(int j=0;j<n;++j)
 	 	{
 	 	 a[j]=rank[j]+1;
 	 	 b[j]=j+(1<<i)>=n? 0:rank[j+(1<<i)]+1;
 	 	 sa[j]=j;
		}
	 radix(b,sa,rank,n,n);
	 radix(a,rank,sa,n,n);
	 rank[sa[0]]=0;
	 for(int j=1;j<n;++j)
	 	{
	 	 rank[sa[j]]=rank[sa[j-1]]+(a[sa[j-1]]!=a[sa[j]]||b[sa[j-1]]!=b[sa[j]]);
		}
	}
}


void calc_height(int *str,int *sa,int *h,int n)
{
 static int Rank[MAXN];
 int k=0;
 h[0]=0;
 for(int i=0;i<n;++i)Rank[sa[i]]=i;
 for(int i=0;i<n;++i)
 	{
 	 k= k==0?0:k-1;
 	 if(Rank[i]!=0)
 	   while(str[i+k]==str[sa[Rank[i]-1]+k])++k;
 	 h[Rank[i]]=k;
	}
}

  
bool solve(int beg, int end)  
{  
    int tot = 0;  
    int t = strnum >> 1;  
    for (int i = 0; i < strnum; ++i) vis[i] = false;  
    for (int i = beg; i <= end; ++i)  
    {  
        if (!vis[id[sa[i]]])  
        {  
            vis[id[sa[i]]] = true;  
            ++tot;  
        }  
        if (tot > t) return true;  
    }  
    return false;  
}  
  
bool group(int len, int n)  
{  
    bool res = false;  
    int beg, end;  
    beg = end = 0;  
    for (int i = 1; i < n; ++i)  
    {  
        if (height[i] >= len) ++end;  
        else  
        {  
            if (solve(beg, end))   
            {  
                if (!res) ansnum = 0;  
                res = true;  
                anspos[ansnum++] = sa[beg];  
            }  
            beg = end = i;  
        }  
    }  
    if (beg < end)  
    {  
        if (solve(beg, end))   
        {  
            if (!res) ansnum = 0;  
            res = true;  
            anspos[ansnum++] = sa[beg];  
        }  
    }  
    return res;  
}  
  
int main()  
{ // freopen("t.txt","r",stdin);
 bool flg=false;
    while (scanf("%d", &strnum) && strnum != 0)  
    {   if(flg)printf("\\n");
        for (int i = 0; i < strnum; ++i) scanf("%s", str[i]);  
        int n = 0, low = 1, high = 0, mid;  
        for (int i = 0; i < strnum; ++i)  
        {  
            int j;  
            for (j = 0; str[i][j] != 0; ++j)  
            {  
                id[n] = i;  
                source[n++] = str[i][j] - \'a\' + 100;  
            }  
            if (j > high) high = j;  
            id[n] = i;  
            source[n++] = i;  
        }  
        sorted_suffix_array(source,sa,n,126);
        calc_height(source,sa,height,n);
        //suffix(source, n, 126);  
        //calheight(source, n - 1);  
        anslen = 0;  
        while (low <= high)  
        {  
            mid = (low + high) >> 1;  
            if (group(mid, n))   
            {  
                anslen = mid;  
                low = mid + 1;  
            }  
            else high = mid - 1;  
        }  
        if (anslen == 0) printf("?\\n");  
        else  
        {  
            for (int i = 0; i < ansnum; ++i)  
            {  
                for (int j = 0; j < anslen; ++j)  
                {  
                    printf("%c", source[anspos[i] + j] - 100 + \'a\');  
                }  
                printf("\\n");  
            }  
        }  
        //printf("\\n"); 
		flg=true; 
    }  
    return 0;  
}  

  

以上是关于POJ 3294 UVA 11107 Life Forms 后缀数组的主要内容,如果未能解决你的问题,请参考以下文章

UVA11107 训练指南Life Forms后缀数组

UVA-11107 Life Forms(求出现K次的子串,后缀数组+二分答案)

Life Forms POJ - 3294

poj 3294 Life Forms

POJ3294 Life Forms

POJ3294:Life Forms(后缀数组)