字符串哈希专题

Posted luowentao

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了字符串哈希专题相关的知识,希望对你有一定的参考价值。


layout: post
title: 字符串哈希专题
author: "luowentaoaa"
catalog: true
tags:
mathjax: true
- 字符串


传送门

摘要 哈希进制转换

题意

一个字符串分成长度为N的字串。且不同的字符不会超过NC个。问总共有多少个不同的子串

思路

以nc作为进制,把一个子串化为这个进制下的数,再用哈希判断

#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
    ull SEED,MOD;
    vector<ull>p,h;
    Hash(){}
    Hash(const char* s,const int& seed_index,const int& mod_index){
        SEED=Seed_Pool[seed_index];
        MOD=Mod_Pool[mod_index];
        int n=strlen(s);
        p.resize(n+1),h.resize(n+1);
        p[0]=1;
        for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
        for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
    }
    ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
    ull substr(int l,int m){return get(l,l+m);}
};
set<ull>st;
char s[16000005];
bool a[maxn*2];
int ha[256];
int main()
{
    int n,nc;
    cin>>n>>nc>>s;
    int cnt=0;
    memset(ha,-1,sizeof(ha));
    memset(a,false,sizeof(a));
    int len=strlen(s);
    for(int i=0;i<len;i++){
        if(ha[s[i]]==-1)ha[s[i]]=cnt++;
    }
    int res=0;
    for(int i=0;i+n<=len;i++){
        int sum=0;
        for(int j=i;j<i+n;j++){
            sum*=nc;
            sum+=ha[s[j]];
        }
        if(!a[sum])res++,a[sum]=true;
    }
    cout<<res<<endl;
    return 0;
}

C.POJ - 2774 Long Long Message

两个字符串最长子串长度

题意

求两个字符串的最长子串长度

题解

二分长度,然后把字符串A的长度mid的哈希值塞入数组,再在字符串B的数组中二分查找长度为mid

复杂度为O(logn×N×logN)

也可以直接用后缀数组的height

#include    <cstring>
#include   <iostream>
#include  <algorithm>
#include     <string>
#include     <vector>
#include        <set>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e6+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull seed=19260817;
struct Hash{
    vector<ull>p,h;
    Hash(){}
    Hash(const string& s){
        int n=s.length();
        p.resize(n+1),h.resize(n+1);
        p[0]=1;
        for(int i=1;i<=n;i++)p[i]=p[i-1]*seed;
        for(int i=1;i<=n;i++)h[i]=(h[i-1]*seed+s[i-1]);
    }
    ull get(int l,int r){return(h[r]-h[l]*p[r-l]);}
    ull substr(int l,int m){return get(l,l+m);}
}A,B;
int n,m;
bool ok(int mid){
    vector<ull>ve;
    for(int i=0;i<=n-mid;i++){
        ve.push_back(A.substr(i,mid));
    }
    sort(ve.begin(),ve.end());
    for(int i=0;i<=m-mid;i++){
        if(binary_search(ve.begin(),ve.end(),B.substr(i,mid))){
            return true;
        }
    }
    return false;
}
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    string a,b;
    cin>>a>>b;
    n=a.length(),m=b.length();
    if(n>m){swap(a,b);swap(n,m);}
    A=Hash(a);B=Hash(b);
    int l=0,r=n;
    int haha=0;
    while(r-l>=0){
        int mid=(r+l)>>1;
       // cout<<mid<<endl;
        if(ok(mid)){
            haha=mid;
            l=mid+1;
        }
        else r=mid-1;
    }
    cout<<haha<<endl;
    return 0;
}

D.URAL - 1989 Subpalindromes

线段树/树状数组和哈希应用 判断回文

题意

给定一个字符串(长度<=100000),有两个操作。 1:改变某个字符。 2:判断某个子串是否构成回文串。

题解

把字符串正向,方向插入线段树和树状数组中,然后单点修改,区间查值, 如果正向和方向值一样,那就是回文了

//线段树
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
#define lson (x<<1)
#define rson ((x<<1)|1)
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn];
string s;
struct node{
    int l,r;
    ull sum1,sum2;
}my[maxn<<2];
int n;
void pushup(int x){
    my[x].sum1=my[lson].sum1+my[rson].sum1;
    my[x].sum2=my[lson].sum2+my[rson].sum2;
}
void build(int x,int l,int r){
    my[x].l=l;my[x].r=r;
    if(my[x].l==my[x].r){
        my[x].sum1=bit[l-1]*(s[l-1]-'a');
        my[x].sum2=bit[n-l]*(s[l-1]-'a');
        return;
    }
    int mid=(l+r)>>1;
    build(lson,l,mid);
    build(rson,mid+1,r);
    pushup(x);
}
ull one,two;
void update(int x,int pos,int val){
    if(my[x].l==my[x].r){
        my[x].sum1=bit[pos-1]*val;
        my[x].sum2=bit[n-pos]*val;
        return;
    }
    int mid=(my[x].l+my[x].r)>>1;
    if(pos<=mid)
        update(lson,pos,val);
    else
        update(rson,pos,val);
    pushup(x);
}
void query(int x,int l,int r){
    if(my[x].l>=l&&my[x].r<=r){
        one+=my[x].sum1;
        two+=my[x].sum2;
        return;
    }
    int mid=(my[x].l+my[x].r)>>1;
    if(l<=mid)query(lson,l,r);
    if(r>mid)query(rson,l,r);
}
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    cin>>s;n=s.length();int t;
    cin>>t;
    bit[0]=1;
    for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
    build(1,1,n);
    while(t--){
        char s[50];
        cin>>s;
        if(s[0]=='p'){
            int x,y;
            cin>>x>>y;
            one=0;two=0;
            query(1,x,y);
            if((x-1)>(n-y))two*=bit[(x-1)-(n-y)];
            else one*=bit[(n-y)-(x-1)];
            if(one==two)cout<<"Yes"<<endl;
            else cout<<"No"<<endl;
        }
        else{
            int x;char ch;
            cin>>x>>ch;
            update(1,x,ch-'a');
        }
    }
    return 0;
}
//树状数组
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn],c[maxn][2];
int n,len;
inline int lowbit(int x){
    return x&(-x);
}
void update(int x,ull val,int flag){
    while(x<maxn){
        c[x][flag]+=val;
        x+=lowbit(x);
    }
}
ull sum(int x,int flag){
    ull cnt=0;
    while(x){
        cnt+=c[x][flag];
        x-=lowbit(x);
    }
    return cnt;
}
string s;
string str;
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    bit[0]=1;
    for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
    while(cin>>str){
        len=str.length();
        memset(c,0,sizeof(c));
        for(int i=0;i<len;i++){
            update(i+1,(str[i]-'a'+1)*bit[i],0);
            update(i+1,(str[len-i-1]-'a'+1)*bit[i],1);
        }
        cin>>n;
        int l,r;
        while(n--){
            cin>>s;
            if(s[0]=='p'){
                cin>>l>>r;
                ull a=(sum(r,0)-sum(l-1,0))*bit[len-r];
                ull b=(sum(len-l+1,1)-sum(len-r,1))*bit[l-1];
                if(a==b)cout<<"Yes"<<endl;
                else cout<<"No"<<endl;
            }
            else{
                int w;
                char ch;
                cin>>w>>ch;
                update(w,(ch-str[w-1])*bit[w-1],0);
                update(len-w+1,(ch-str[w-1])*bit[len-w],1);
                str[w-1]=ch;
            }
        }
    }
    return 0;
}

E.CodeForces - 580E Kefa and Watch

线段树+哈希

题意

给你一个长度为n的字符串s,有两种操作:

1 L R C : 把s[l,r]全部变为c;

2 L R d : 询问s[l,r]是否是周期为d的重复串。

题解

n最大为1e5,且m+k最大也为1e5,这就要求操作1和操作2都要采用logn的算法,所以用线段树.

对于更新操作,使用区间更新就可解决。

主要是如何在logn的时间内完成询问操作.

我们采用线段树维护hash值的方法.

结合于类似KMP的性质,我们发现,字符串[l,r]有长度为w的循环节,只需要使得[l,r-w]=[l+w,r]即可。证明过程看这里

这题的hash不同于普通的字符串hash,因为涉及到动态修改,所以需要预先处理出所有的base,在修改的时候直接用.

#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull seed=19260817;
//ull seed=10;
ull s[maxn];
ull fs[maxn];
char ss[maxn];
void init(){
    s[0]=1;fs[0]=1;
    for(int i=1;i<maxn;i++)s[i]=(s[i-1]*seed)%mod;
    for(int i=1;i<maxn;i++)fs[i]=(fs[i-1]+s[i])%mod;
   /* for(int i=0;i<5;i++){
        cout<<i<<"	"<<s[i]<<"	"<<fs[i]<<endl;
    }*/
}
struct node{
    int l,r;
    int lazy;
    int ok;
    ull num;
}my[maxn<<2];
void pushup(int x){
    int mid=(my[x].l+my[x].r)>>1;
   // printf("x==%d x<<1=%d x<<1|1=%d  my[x<<1].num=%llu my[x<<1|1].num=%llu s==%d   
",x,x<<1,x<<1|1,my[x<<1].num,my[(x<<1)|1].num,s[my[x].r-mid]);
    my[x].num=(my[x<<1].num*s[my[x].r-mid]+my[(x<<1|1)].num)%mod;
   // cout<<"x=="<<x<<" my[x].num"<<my[x].num<<endl;
}
void pushdown(int x){
    if(my[x].lazy){
        int mid=(my[x].l+my[x].r)>>1;
        my[x<<1].lazy=my[(x<<1)|1].lazy=my[x].lazy;
        my[x<<1].ok=my[x<<1|1].ok=my[x].ok;
        my[x<<1].num=(fs[mid-my[x].l]*my[x].ok)%mod;
        my[(x<<1)|1].num=(fs[my[x].r-mid-1]*my[x].ok)%mod;
        my[x].lazy=0;
    }
}
void build(int x,int l,int r){
    my[x].l=l;my[x].r=r;my[x].lazy=0;
    if(my[x].l==my[x].r){
        my[x].num=ss[l-1]-'0';
       // printf("my[%d].num=%d
",x,my[x].num);
        return;
    }
    int mid=(l+r)>>1;
    build(x<<1,l,mid);
    build((x<<1)|1,mid+1,r);
    pushup(x);
}
void update(int x,int l,int r,int k){
    if(my[x].l>=l&&my[x].r<=r){
        my[x].num=(fs[my[x].r-my[x].l]*k)%mod;
        my[x].ok=k;
        my[x].lazy=1;
        return;
    }
    pushdown(x);
    int mid=(my[x].l+my[x].r)>>1;
    if(l<=mid)update(x<<1,l,r,k);
    if(r>mid)update(x<<1|1,l,r,k);
    pushup(x);
}
ull query(int x,int l,int r){
    if(my[x].l>=l&&my[x].r<=r)return my[x].num;
    pushdown(x);
    int mid=(my[x].l+my[x].r)>>1;
    if(l>mid)return query(x<<1|1,l,r);
    else if(r<=mid)return query(x<<1,l,r);
    else{
        ull t1=query(x<<1,l,r);
        ull t2=query(x<<1|1,l,r);
        int k=min(r,my[x].r)-mid;
        return (t1*s[k]+t2)%mod;
    }
    pushup(x);
}
void pri(int n){
    for(int i=1;i<=n*4;i++){
        printf("my[%d].num=%llu
",i,my[i].num);
    }
}
int main()
{
   /* std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);*/
    init();
    int n,q,t;
    scanf("%d%d%d",&n,&q,&t);
    q+=t;
    scanf("%s",ss);
    int len=strlen(ss);
    build(1,1,len);
   // pri(len);
    for(int i=0;i<q;i++){
        int op,l,r,d;
        scanf("%d%d%d%d",&op,&l,&r,&d);
        if(op==1)update(1,l,r,d);
        else {
            if(d==r-l+1){
                printf("YES
");
                continue;
            }
            ull one=query(1,l,r-d);
         //   cout<<"one="<<one<<endl;
            ull two=query(1,l+d,r);
           // cout<<"two="<<two<<endl;
            if(one==two)printf("YES
");
            else printf("NO
");
        }
    }

    return 0;
}

H.HDU - 1686 Oulipo

哈希水题,求模式串出现次数

#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
    ull SEED,MOD;
    vector<ull>p,h;
    Hash(){}
    Hash(const string& s,const int& seed_index,const int& mod_index){
        SEED=Seed_Pool[seed_index];
        MOD=Mod_Pool[mod_index];
        int n=s.length();
        p.resize(n+1),h.resize(n+1);
        p[0]=1;
        for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
        for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
    }
    ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
    ull substr(int l,int m){return get(l,l+m);}
};
int main()
{
    int t;
    ios::sync_with_stdio(false);
    cin>>t;
    while(t--){
        string s;
        cin>>s;
        int n=s.length();
        Hash aa=Hash(s,0,0);
        ull a=aa.substr(0,n);
        cin>>s;
        int nn=s.length();
        aa=Hash(s,0,0);
        //cout<<"aa="<<a<<endl;
        int sum=0;
        for(int i=0;i+n<=nn;i++){
            if(aa.substr(i,n)==a){
                //cout<<aa.substr(i,n)<<endl;
                sum++;
            }
        }
        cout<<sum<<endl;
    }
    return 0;
}

以上是关于字符串哈希专题的主要内容,如果未能解决你的问题,请参考以下文章

哈希专题--Codeforces

《寒假算法集训》(专题十三)哈希

下文中的哈希片段指的是啥?

URL片段的最大长度(哈希)

URL的PHP​​和哈希/片段部分

《寒假算法集训》(专题十八)字典树