POJ - 3693 Maximum repetition substring (后缀数组)
Posted zgqblogs
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了POJ - 3693 Maximum repetition substring (后缀数组)相关的知识,希望对你有一定的参考价值。
The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.
Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.
Input
The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.
The last test case is followed by a line containing a ‘#‘.
Output
For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.
Sample Input
ccabababc daabbccaa #
Sample Output
Case 1: ababab Case 2: aa
题意:
求重复次数最多的连续重复子串所在的子串
思路:
https://www.cnblogs.com/ZGQblogs/p/11176264.html
通过这篇博客找出重复次数最多的连续重复子串出现的次数.
然后在后缀数组的前缀里寻找符合条件的子串.因为后缀数组已经按字典序排好序,所以找到后立即退出.
具体详见代码注释:
#include<iostream> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<map> #include<set> #include<cstdio> #include<cstring> #include<cmath> #include<ctime> #define fuck(x) cerr<<#x<<" = "<<x<<endl; #define debug(a, x) cerr<<#a<<"["<<x<<"] = "<<a[x]<<endl; #define ls (t<<1) #define rs ((t<<1)|1) using namespace std; typedef long long ll; typedef unsigned long long ull; const int maxn = 100086; const int maxm = 100086; const int inf = 0x3f3f3f3f; const ll Inf = 999999999999999999; const int mod = 1000000007; const double eps = 1e-6; const double pi = acos(-1); char s[maxn]; int len, Rank[maxn], sa[maxn], tlen, tmp[maxn]; bool compare_sa(int i, int j) if (Rank[i] != Rank[j]) return Rank[i] < Rank[j]; //如果以i开始,长度为k的字符串的长度,已经超出了字符串尾,那么就赋值为-1 //这是因为,在前面所有数据相同的情况下,字符串短的字典序小. int ri = i + tlen <= len ? Rank[i + tlen] : -inf; int rj = j + tlen <= len ? Rank[j + tlen] : -inf; return ri < rj; void construct_sa() //初始的RANK为字符的ASCII码 for (int i = 0; i <= len; i++) sa[i] = i; Rank[i] = i < len ? s[i] : -inf; for (tlen = 1; tlen <= len; tlen *= 2) sort(sa, sa + len + 1, compare_sa); tmp[sa[0]] = 0; //全新版本的RANK,tmp用来计算新的rank //将字典序最小的后缀rank计为0 //sa之中表示的后缀都是有序的,所以将下一个后缀与前一个后缀比较,如果大于前一个后缀,rank就比前一个加一. //否则就和前一个相等. for (int i = 1; i <= len; i++) tmp[sa[i]] = tmp[sa[i - 1]] + (compare_sa(sa[i - 1], sa[i]) ? 1 : 0); for (int i = 0; i <= len; i++) Rank[i] = tmp[i]; int height[maxn]; void construct_lcp() // for(int i=0;i<=n;i++)Rank[sa[i]]=i; int h = 0; height[0] = 0; for (int i = 0; i < len; i++) //i为后缀数组起始位置 int j = sa[Rank[i] - 1];//获取当前后缀的前一个后缀(排序后) if (h > 0)h--; for (; j + h < len && i + h < len; h++) if (s[j + h] != s[i + h])break; height[Rank[i]] = h; int st[maxn][20]; void rmq_init() for (int i = 1; i <= len; i++) st[i][0] = height[i]; int l = 2; for (int i = 1; l <= len; i++) for (int j = 1; j + l / 2 <= len; j++) st[j][i] = min(st[j][i - 1], st[j + l / 2][i - 1]); l <<= 1; int ask_min(int i, int j) int k = int(log(j - i + 1.0) / log(2.0)); return min(st[i][k], st[j - (1 << k) + 1][k]); int lcp(int a, int b) //此处参数是,原字符串下标 a = Rank[a], b = Rank[b]; if (a > b) swap(a, b); return ask_min(a + 1, b); vector<int>ansl; int main() int cases=0; while (scanf("%s",s)!=EOF) ansl.clear(); len = strlen(s); cases++; if(len==1&&s[0]==‘#‘) break; construct_sa(); construct_lcp(); rmq_init(); int ansx,ans; ansx=ans=0; for(int i=1;i<=len;i++) bool flag = true; for(int j=0;j+i<len;j+=i) ans=lcp(j,j+i); int k=j-(i-ans%i); ans=ans/i+1; if(k>=0&&lcp(k,k+i)>=i)ans++; if(ans==ansx) if(flag) ansl.push_back(i); flag = false; else if(ans>ansx) ansx=ans; ansl.clear(); ansl.push_back(i); flag=false; int siz = ansl.size(); bool flag = false; for(int i=1;i<=len;i++) for(int j=0;j<siz;j++) int l = ansl[j]; if(sa[i]+l<len&&lcp(sa[i],sa[i]+l)>=(ansx-1)*l)//核心代码,如果sa[i]和sa[i]+l的公共前缀中包含了ansx-1个l, // 说明sa[i]的前缀中已经包含了ansx个l ans = sa[i]; s[ans+ansx*l]=0; flag=true; if(flag) break; printf("Case %d: %s\\n",cases,s+ans); return 0;
Maximum repetition substring
以上是关于POJ - 3693 Maximum repetition substring (后缀数组)的主要内容,如果未能解决你的问题,请参考以下文章
POJ3693Maximum repetition substring (后缀数组+RMQ)(占位)
POJ3693 Maximum repetition substring
POJ 3693 Maximum repetition substring(最多重复次数的子串)
POJ 3693 Maximum repetition substring(后缀数组[重复次数最多的连续重复子串])