HDU5008 Boring String Problem(后缀数组 + 二分 + 线段树)

Posted 新博客↓

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了HDU5008 Boring String Problem(后缀数组 + 二分 + 线段树)相关的知识,希望对你有一定的参考价值。

题目

Source

http://acm.hdu.edu.cn/showproblem.php?pid=5008

Description

In this problem, you are given a string s and q queries.

For each query, you should answer that when all distinct substrings of string s were sorted lexicographically, which one is the k-th smallest.

A substring si...j of the string s = a1a2 ...an(1 ≤ i ≤ j ≤ n) is the string aiai+1 ...aj. Two substrings sx...y and sz...w are cosidered to be distinct if sx...y ≠ Sz...w

Input

The input consists of multiple test cases.Please process till EOF.

Each test case begins with a line containing a string s(|s| ≤ 105) with only lowercase letters.

Next line contains a postive integer q(1 ≤ q ≤ 105), the number of questions.

q queries are given in the next q lines. Every line contains an integer v. You should calculate the k by k = (l⊕r⊕v)+1(l, r is the output of previous question, at the beginning of each case l = r = 0, 0 < k < 263, “⊕” denotes exclusive or)

Output

For each test case, output consists of q lines, the i-th line contains two integers l, r which is the answer to the i-th query. (The answer l,r satisfies that sl...r is the k-th smallest and if there are several l,r available, ouput l,r which with the smallest l. If there is no l,r satisfied, output “0 0”. Note that s1...n is the whole string)

Sample Input

aaa
4
0
2
3
5

Sample Output

1 1
1 3
1 2
0 0

 

分析

题目大概说给一个字符串,将所有不同子串从小到大排序,多次询问,每次询问输出第k个子串是哪个子串。

 

任何一个子串都是某个后缀的前缀,用后缀数组得到所有后缀的排列,然后对于各个后缀i能贡献出的子串就是len-i-height[i]。

可以通过预处理出前缀和,前缀和记录的是前几个后缀贡献的总子串数目;对于各个询问,在前缀和上二分查找,就能得到所的要子串了。

 

不过,题目还要求输出子串在原串所表示的区间,并且多个方案的情况下输出字典序最小。

那么,可以先找到一个子串S,然后在后缀数组上面再进行两次二分查找,查找到与S的LCP大于等于S长度的后缀rank上界和下界,上界和下界之间所有的子串都是满足要求的,而为了快速找到这个区间字典序最小,用个线段树RMQ一下即可。

 

代码

#include<cstdio>
#include<cstring>
#include<cmath>
#include<algorithm>
using namespace std;
#define MAXN 111111
 
int wa[MAXN],wb[MAXN],wv[MAXN],ws[MAXN];
int cmp(int *r,int a,int b,int l){
    return r[a]==r[b] && r[a+l]==r[b+l];
}
int sa[MAXN],rnk[MAXN],height[MAXN];
void SA(int *r,int n,int m){
    int *x=wa,*y=wb;
 
    for(int i=0; i<m; ++i) ws[i]=0;
    for(int i=0; i<n; ++i) ++ws[x[i]=r[i]];
    for(int i=1; i<m; ++i) ws[i]+=ws[i-1];
    for(int i=n-1; i>=0; --i) sa[--ws[x[i]]]=i;
 
    int p=1;
    for(int j=1; p<n; j<<=1,m=p){
        p=0;
        for(int i=n-j; i<n; ++i) y[p++]=i;
        for(int i=0; i<n; ++i) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(int i=0; i<n; ++i) wv[i]=x[y[i]];
        for(int i=0; i<m; ++i) ws[i]=0;
        for(int i=0; i<n; ++i) ++ws[wv[i]];
        for(int i=1; i<m; ++i) ws[i]+=ws[i-1];
        for(int i=n-1; i>=0; --i) sa[--ws[wv[i]]]=y[i];
        swap(x,y); x[sa[0]]=0; p=1;
        for(int i=1; i<n; ++i) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
 
    for(int i=1; i<n; ++i) rnk[sa[i]]=i;
    int k=0;
    for(int i=0; i<n-1; height[rnk[i++]]=k){
        if(k) --k;
        for(int j=sa[rnk[i]-1]; r[i+k]==r[j+k]; ++k);
    }
}

int n,st[17][MAXN];
void ST(int *a){
	for(int i=1; i<=n; ++i) st[0][i]=a[i];
    for(int i=1; i<17; ++i){
        for(int j=1; j<=n; ++j){
            if(j+(1<<i)-1>n) continue;
            st[i][j]=min(st[i-1][j],st[i-1][j+(1<<i-1)]);
        }
    }
}
int logs[MAXN];
int rmq(int a,int b){
    int k=logs[b-a+1];
    return min(st[k][a],st[k][b-(1<<k)+1]);
}
int lcp(int a,int b){
	if(a==b) return n-sa[a];
	return rmq(a+1,b);
}

int tree[MAXN<<2],N,x,y;
void update(int i,int j,int k){
	if(i==j){
		tree[k]=y;
		return;
	}
	int mid=i+j>>1;
	if(x<=mid) update(i,mid,k<<1);
	else update(mid+1,j,k<<1|1);
	tree[k]=min(tree[k<<1],tree[k<<1|1]);
}
int query(int i,int j,int k){
	if(x<=i && j<=y){
		return tree[k];
	}
	int mid=i+j>>1,ret=MAXN;
	if(x<=mid) ret=min(ret,query(i,mid,k<<1));
	if(y>mid) ret=min(ret,query(mid+1,j,k<<1|1));
	return ret;
}

char str[MAXN];
int a[MAXN];

long long sum[MAXN];
int dis[MAXN];

int main(){
	for(int i=1; i<MAXN; ++i){
        logs[i]=log2(i)+1e-6;
    }
	while(~scanf("%s",str)){
		n=strlen(str);
		for(int i=0; i<n; ++i){
			a[i]=str[i]-‘a‘+1;
		}
		a[n]=0;
		SA(a,n+1,28);
		ST(height);
		
		memset(tree,127,sizeof(tree));
		for(N=1; N<n; N<<=1);
		for(int i=1; i<=n; ++i){
			x=i; y=sa[i];
			update(1,N,1);
		}
		
		for(int i=1; i<=n; ++i){
			sum[i]=n-sa[i]-height[i]+sum[i-1];
			dis[i]=height[i];
		}
		
		int q;
		long long v,ansl=0,ansr=0;
		scanf("%d",&q);
		while(q--){
			scanf("%I64d",&v);
			long long k=(ansl^ansr^v)+1;
			
			if(k>sum[n]){
				ansl=0; ansr=0;
				puts("0 0");
				continue;
			}
			
			int tmp=lower_bound(sum+1,sum+1+n,k)-sum;
			int len=dis[tmp]+k-sum[tmp-1];
			
			int l=1,r=tmp;
			while(l<r){
				int mid=l+r>>1;
				if(lcp(mid,tmp)>=len) r=mid;
				else l=mid+1;
			}
			x=l;
			
			l=tmp; r=n;
			while(l<r){
				int mid=l+r+1>>1;
				if(lcp(tmp,mid)>=len) l=mid;
				else r=mid-1;
			}
			y=l;
			
			ansl=query(1,N,1)+1; ansr=ansl+len-1;
			printf("%I64d %I64d\n",ansl,ansr);
		}
	}
	return 0;
}

 

以上是关于HDU5008 Boring String Problem(后缀数组 + 二分 + 线段树)的主要内容,如果未能解决你的问题,请参考以下文章

HDU - 5008 Boring String Problem(后缀树求本质不同第k大子串)

HDU - 5008 Boring String Problem(后缀树求本质不同第k大子串)

HDU - 5008 Boring String Problem(后缀树求本质不同第k大子串)

HDOJ 5008 Boring String Problem

[HDU3518]Boring counting

HDU 5056 Boring count(数学)