字符串

Posted 冷暖知不知

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了字符串相关的知识,希望对你有一定的参考价值。

技术分享
//普通
void getnext1(char *s)
{
    int i = 0,j = -1,len = strlen(s);
    ne[0] = -1;
    while(i < len)
    {
        if(j == -1 || s[i] == s[j]) ne[++i] = ++j;
        else    j = ne[j];
    }
}

//加快预处理
void getnext2(char *s)
{
    int i = 0,j = -1,len = strlen(s);
    ne[0] = -1;
    while(i < len)
    {
        if(j == -1 || s[i] == s[j])
        {
            if(s[++i] == s[++j])    ne[i] = ne[j];
            else    ne[i] = j;
        }
        else    j = ne[j];
    }
}

//反悔x在y中出现的次数,可以重叠
int kmp(char *x,char *y)
{
    getnext1(x);
    int i = 0,j = 0,ans = 0,leny = strlen(y),lenx = strlen(x);
    while(i < leny)
    {
        if(j == -1 || x[j] == y[i])
        {
            i++,j++;
            if(j == lenx)
            {
                ans++;
                j = ne[j];
            }
        }
        else    j = ne[j];
    }
    return ans;
}
KMP模版

技术分享
//求浮动匹配
//l[i]表示i结点之前(包括i结点),小于a[i]的个数
//le[i]表示i结点之前(包括i结点),小于等于a[i]的个数
//l[i] == l[j] && le[i] == le[j],则匹配
#include<bits/stdc++.h>
using namespace std;

int n,k,s,a[100005],b[25005],ne[25005],tree[30],l[100005],le[100005];

inline int lowbit(int x)
{
    return x&-x;
}

void update(int pos,int x)
{
    while(pos <= s)
    {
        tree[pos] += x;
        pos += lowbit(pos);
    }
}

int getsum(int pos)
{
    int sum = 0;
    while(pos)
    {
        sum += tree[pos];
        pos -= lowbit(pos);
    }
    return sum;
}

void getnext1()
{
    memset(tree,0,sizeof(tree));
    int i = 0,j = -1;
    ne[0] = -1;
    while(i < k)
    {
        if(j == -1 || getsum(b[i]-1) == l[j] && getsum(b[i]) == le[j])
        {
            ne[++i] = ++j;
            if(i < k)   update(b[i],1);
        }
        else
        {
            for(int t = i-j;t < i-ne[j];t++)  update(b[t],-1);
            j = ne[j];
        }
    }
}

vector<int> kmp()
{
    getnext1();
    memset(tree,0,sizeof(tree));
    vector<int> ans;
    int i = 0,j = 0;
    update(a[0],1);
    while(i < n)
    {
        if(j == -1 || getsum(a[i]-1) == l[j] && getsum(a[i]) == le[j])
        {
            i++,j++;
            if(i < n)   update(a[i],1);
            if(j == k)
            {
                ans.push_back(i-k+1);
                for(int t = i-j;t < i-ne[j];t++)  update(a[t],-1);
                j = ne[j];
            }
        }
        else
        {
            for(int t = i-j;t < i-ne[j];t++)  update(a[t],-1);
            j = ne[j];
        }
    }
    return ans;
}

int main()
{
    while(~scanf("%d%d%d",&n,&k,&s))
    {
        memset(tree,0,sizeof(tree));
        memset(l,0,sizeof(l));
        memset(le,0,sizeof(le));
        memset(ne,0,sizeof(ne));
        for(int i = 0;i < n;i++)    scanf("%d",&a[i]);
        for(int i = 0;i < k;i++)
        {
            scanf("%d",&b[i]);
            update(b[i],1);
            l[i] = getsum(b[i]-1);
            le[i] = getsum(b[i]);
        }
        vector<int> v = kmp();
        printf("%d\n",v.size());
        for(int i = 0;i < v.size();i++) printf("%d\n",v[i]);
    }
    return 0;
}
浮动匹配

技术分享
void getnext(char *s)
{
    int j = 0,len = strlen(s),k = 1;
    ne[0] = len;
    while(j+1 < len && s[j] == s[j+1])  j++;
    ne[1] = j;
    for(int i = 2;i < len;i++)
    {
        if(ne[i-k]+i < ne[k]+k) ne[i] = ne[i-k];
        else
        {
            j = max(0,ne[k]+k-i);
            while(i+j < len && s[i+j] == s[j])  j++;
            ne[i] = j;
            k = i;
        }
    }
}

void ekmp(char *x,char *y)
{
    getnext(x);
    int j = 0,lenx = strlen(x),leny = strlen(y);
    while(j < lenx && j < leny && x[j] == y[j]) j++;
    ex[0] = j;
    int k = 0;
    for(int i = 1;i < leny;i++)
    {
        if(ne[i-k]+i < ex[k]+k) ex[i] = ne[i-k];
        else
        {
            j = max(0,ex[k]+k-i);
            while(i+j < leny && j < lenx && y[i+j] == x[j]) j++;
            ex[i] = j;
            k = i;
        }
    }
}
扩展KMP

技术分享
//abaa
//i:    0 1 3 4 5 6 7 8 9 10
//a[i]: $ # a # b # a # a #
//p[i]: 1 1 2 1 4 1 2 3 2 1
#include<bits/stdc++.h>
using namespace std;

char s[100005],a[200005];
int p[200005];

void manacher(int len)
{
    int mx = 0,id;
    for(int i = 1;i < len;i++)
    {
        if(mx > i)    p[i] = min(p[2*id-i],mx-i);
        else    p[i] = 1;
        while(a[i+p[i]] == a[i-p[i]])   p[i]++;
        if(p[i]+i > mx)
        {
            mx = p[i]+i;
            id = i;
        }
    }
}
int main()
{
    scanf("%s",s);
    a[0] = $;
    a[1] = #;
    int len = 2;
    for(int i = 0;s[i];i++)
    {
        a[len++] = s[i];
        a[len++] = #;
    }
    manacher(len);
    int ans = 0;
    for(int i = 0;i < len;i++)    ans = max(ans,p[i]);
    printf("%d\n",ans-1);
    return 0;
}
最长回文字串Manacher

技术分享
//添加
void add(char *s,int x)
{
    int now = 0;
    for(int i = 0;i < strlen(s);i++)
    {
        int c = s[i]-a;
        if(!ch[now][c])
        {
            ch[now][c] = ++sz;
            cnt[sz] = 0;
        }
        now = ch[now][c];
        cnt[now]++;
    }
}

//查找数量
int getnum(char *s)
{
    int now = 0;
    for(int i = 0;i < strlen(s);i++)
    {
        int c = s[i]-a;
        if(!ch[now][c]) return 0;
        now = ch[now][c];
    }
    return cnt[now];
}
trie树

技术分享
//HDU2222
//求目标中出现了几个模式串
#include<bits/stdc++.h>
using namespace std;

struct Trie
{
    int next[500000][26],fail[500005],num[500005],root,cnt;
    int newnode()
    {
        for(int i = 0;i < 26;i++)   next[cnt][i] = -1;
        num[cnt++] = 0;
        return cnt-1;
    }
    void init()
    {
        cnt = 0;
        root = newnode();
    }
    void insert(char *s)
    {
        int now = root,len = strlen(s);
        for(int i = 0;i < len;i++)
        {
            int c = s[i]-a;
            if(next[now][c] == -1)  next[now][c] = newnode();
            now = next[now][c];
        }
        num[now]++;
    }
    void build()
    {
        queue<int> q;
        fail[root] = root;
        for(int i = 0;i < 26;i++)
        {
            if(next[root][i] == -1) next[root][i] = root;
            else
            {
                fail[next[root][i]] = root;
                q.push(next[root][i]);
            }
        }
        while(!q.empty())
        {
            int now = q.front();
            q.pop();
            for(int i = 0;i < 26;i++)
            {
                if(next[now][i] == -1)  next[now][i] = next[fail[now]][i];
                else
                {
                    fail[next[now][i]] = next[fail[now]][i];
                    q.push(next[now][i]);
                }
            }
        }
    }
    int query(char *s)
    {
        int now = root,ans = 0,len = strlen(s);
        for(int i = 0;i < len;i++)
        {
            now = next[now][s[i]-a];
            int t = now;
            while(t != root)
            {
                ans += num[t];
                num[t] = 0;
                t = fail[t];
            }
        }
        return ans;
    }
    void debug()
    {
        for(int i = 0;i < cnt;i++)
        {
            printf("id = %3d,fail = %3d,num = %3d,chi = [",i,fail[i],num[i]);
            for(int j = 0;j < 26;j++)   printf("%2d",next[i][j]);
            printf("]\n");
        }
    }
};
int n;
char s[1000001];
Trie ac;

int main()
{
    int T;
    scanf("%d",&T);
    while(T--)
    {
        scanf("%d",&n);
        ac.init();
        while(n--)
        {
            scanf("%s",s);
            ac.insert(s);
        }
        ac.build();
        scanf("%s",s);
        printf("%d\n",ac.query(s));
    }
    return 0;
}
AC自动机

技术分享
//O(nlogn)
/待排序数组长度n,放在0~n中,最后补0
//sa[i]:每个后缀串从小到大排第i小的位置
//rank[i]:i位置的从小到大排序位置
//height[i]:sa[i]和sa[i-1]对应后缀的最长公共前缀
//n = 8
//num[i]:   1 1 2 1 1 1 1 2 0       num[8]加0
//sa[i]:      8 3 4 5 0 6 1 7 2       num[0~n]有效
//rank[i]:  4 6 8 1 2 3 5 7 0       num[0~n-1]有效
//height[i]:0 0 3 2 3 1 2 0 1       num[2~n]有效

int t1[N],t2[N],c[N],sa[N],rank[N],height[N];
int mm[200005],best[20][200005],rmq[200005];

int main()
{
    gets(s);
    int len = strlen(s);
    for(int i = 0;i < len;i++)  r[i] = s[i];
    da(r,sa,rk,height,len,128);
    return 0;
}

bool cmp(int *r,int a,int b,int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}

void da(int *r,int *sa,int *rank,int *height,int n,int m)
{
    r[n] = 0;
    n++;
    int *x = t1,*y = t2;
    for(int i = 0;i < m;i++)    c[i] = 0;
    for(int i = 0;i < n;i++)    c[x[i] = r[i]]++;
    for(int i = 1;i < m;i++)    c[i] += c[i-1];
    for(int i = n-1;i >= 0;i--) sa[--c[x[i]]] = i;
    for(int j = 1;j <= n;j <<= 1)
    {
        int p = 0;
        for(int i = n-j;i < n;i++)  y[p++] = i;
        for(int i = 0;i < n;i++)
        {
            if(sa[i] >= j)  y[p++] = sa[i]-j;
        }
        for(int i = 0;i < m;i++)    c[i] = 0;
        for(int i = 0;i < n;i++)    c[x[y[i]]]++;
        for(int i = 1;i < m;i++)    c[i] += c[i-1];
        for(int i = n-1;i >= 0;i--) sa[--c[x[y[i]]]] = y[i];
        swap(x,y);
        p = 1;
        x[sa[0]] = 0;
        for(int i = 1;i < n;i++)    x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p >= n)  break;
        m = p;
    }
    int k = 0;
    n--;
    for(int i = 0;i <= n;i++)   rank[sa[i]] = i;
    for(int i = 0;i < n;i++)
    {
        if(k)   k--;
        int t = sa[rank[i]-1];
        while(r[i+k] == r[t+k]) k++;
        height[rank[i]] = k;
    }
}


void initrmq(int n)
{
    mm[0] = -1;
    for(int i = 1;i < n;i++)    mm[i] = (i&(i-1) == 0)?mm[i-1]+1:mm[i-1];
    for(int i = 1;i < n;i++)    best[0][i] = i;
    for(int i = 1;i <= mm[n-1];i++)
    {
        for(int j = 1;j+(1<<i)-1 < n;j++)
        {
            int a = best[i-1][j],b = best[i-1][j+(1<<(i-1))];
            best[i][j] = rmq[a] < rmq[b]?a:b;
        }
    }
}
int askrmq(int a,int b)
{
    int t = mm[b-a+1];
    b -= (1<<t)-1;
    a = best[t][a];
    b = best[t][b];
    return rmq[a] < rmq[b]?a:b;
}

//求a,b位置开始的后缀的最长公共前缀
int lcp(int a,int b)
{
    a = rk[a];
    b = rk[b];
    if(a > b)   swap(a,b);
    return height[askrmq(a+1,b)];
}
后缀数组DA

技术分享
//O(n)
//所有数组开3倍
#include<bits/stdc++.h>
#define F(x) ((x)/3+((x)%3 == 1?0:tb))
#define G(x) ((x) < tb?(x)*3+1:((x)-tb)*3+2)
#define N 2005
using namespace std;

int wa[3*N],wb[3*N],wv[3*N],wss[3*N],r[3*N],sa[3*N],rk[3*N],height[3*N];
char s[3*N];

int c0(int *r,int a,int b)
{
    return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2];
}

int c12(int k,int *r,int a,int b)
{
    if(k == 2)  return r[a] < r[b] || r[a] == r[b] && c12(1,r,a+1,b+1);
    return r[a] < r[b] || r[a] == r[b] && wv[a+1] < wv[b+1];
}

void sort(int *r,int *a,int *b,int n,int m)
{
    for(int i = 0;i < n;i++)    wv[i] = r[a[i]];
    for(int i = 0;i < m;i++)    wss[i] = 0;
    for(int i = 0;i < n;i++)    wss[wv[i]]++;
    for(int i = 1;i < m;i++)    wss[i] += wss[i-1];
    for(int i = n-1;i >= 0;i--) b[--wss[wv[i]]] = a[i];
}

void dc3(int *r,int *sa,int n,int m)
{
    int *rn = r+n,*san = sa+n,ta = 0,tb = (n+1)/3,tbc = 0,i,j,p;
    r[n]=r[n+1]=0;
    for(i = 0;i < n;i++)
    {
        if(i%3) wa[tbc++] = i;
    }
    sort(r+2,wa,wb,tbc,m);
    sort(r+1,wb,wa,tbc,m);
    sort(r,wa,wb,tbc,m);
    for(p = 1,rn[F(wb[0])] = 0,i = 1;i < tbc;i++)
    {
        rn[F(wb[i])] = c0(r,wb[i-1],wb[i])?p-1:p++;
    }
    if(p < tbc) dc3(rn,san,tbc,p);
    else
    {
        for(i = 0;i < tbc;i++)  san[rn[i]] = i;
    }
    for(i = 0;i < tbc;i++)
    {
        if(san[i] < tb) wb[ta++] = san[i]*3;
    }
    if(n%3 == 1)    wb[ta++] = n-1;
    sort(r,wb,wa,ta,m);
    for(i = 0;i < tbc;i++)  wv[wb[i] = G(san[i])]=i;
    for(i = 0,j = 0,p = 0;i < ta && j < tbc;p++)    sa[p] = c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
    for(;i < ta;p++)    sa[p] = wa[i++];
    for(;j < tbc;p++)   sa[p] = wb[j++];
}

void da(int *r,int *sa,int *rank,int *height,int n,int m)
{
    for(int i = n;i < n*3;i++)  r[i] = 0;
    dc3(r,sa,n+1,m);
    int k = 0;
    for(int i = 0;i <= n;i++)   rank[sa[i]] = i;
    for(int i = 0;i < n;i++)
    {
        if(k)   k--;
        int t = sa[rank[i]-1];
        while(r[i+k] == r[t+k]) k++;
        height[rank[i]] = k;
    }
}
后缀数组DC3

技术分享
struct samnode
{
    samnode *fa,*next[26];
    int len;
    void clear()
    {
        fa = NULL;
        len = 0;
        memset(next,0,sizeof(next));
    }
};

samnode *root,*last,st[2*N];
int sz;
char s[10005];
int topcnt[2*N];
samnode *topsam[2*N];

void saminit()
{
    sz = 0;
    root = last = &st[sz++];
    root ->clear();
}

void samadd(int w)
{
    samnode *p = last,*np = &st[sz++];
    last = np;
    np->clear();
    np->len = p->len+1;
    while(p && !p->next[w])
    {
        p->next[w] = np;
        p = p->fa;
    }
    if(!p)
    {
        np->fa = root;
        return;
    }
    samnode *q = p->next[w];
    if(q->len == p->len+1)
    {
        np->fa = q;
        return;
    }
    samnode *nq = &st[sz++];
    nq->clear();
    nq->len = p->len+1;
    memcpy(nq->next,q->next,sizeof(q->next));
    nq->fa = q->fa;
    q->fa = nq;
    np->fa = nq;
    while(p && p->next[w] == q)
    {
        p->next[w] = nq;
        p = p->fa;
    }
}

void sambuild(char *s)
{
    saminit();
    int len = strlen(s);
    for(int i = 0;i < len;i++)  samadd(s[i]-a);
}


int main()
{
    gets(s);
    sambuild(s);
    memset(topcnt,0,sizeof(topcnt));
    for(int i = 0;i < sz;i++)   topcnt[st[i].len]++;
    for(int i = 1;i <= strlen(s);i++)   topcnt[i] += topcnt[i-1];
    for(int i = 0;i < sz;i++)   topsam[--topcnt[st[i].len]] = &st[i];
    return 0;
}
后缀自动机

技术分享
#include<bits/stdc++.h>
using namespace std;


const int HASH = 10007;
const int N = 2010;
const int SEED = 13331;
char s[N];

struct HASHMAP
{
    int head[N],next[N],size,f[N];
    unsigned long long state[N];
    void init()
    {
        size = 0;
        memset(state,-1,sizeof(state));
    }
    int insert(unsigned long long x,int id)
    {
        int h = x%HASH;
        for(int i = head[h];i != -1;i = next[i])
        {
            if(x == state[i])   return f[i];
        }
        f[size] = id;
        state[size] = x;
        next[size] = head[h];
        head[h] = size++;
        return 0;
    }
};

int main()
{
    gets(s);
    int len = strlen(s);
    unsigned long long t = 0;
    for(int i = 1;i <= len;i++) t = t*SEED+s[i-1];
}
字符串hash

 

以上是关于字符串的主要内容,如果未能解决你的问题,请参考以下文章

PHP 代码片段

JavaScript 代码片段

CSP核心代码片段记录

带有神秘附加字符的 Javascript Date getTime() 代码片段

web代码片段

vs code 自定义代码片段