poj2778 DNA Sequence AC自動機+矩陣快速冪
阿新 • • 發佈:2019-02-12
DNA Sequence
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Time Limit: 1000MS | Memory Limit: 65536K |
Total Submissions: 11721 | Accepted: 4471 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.Sample Input
4 3 AT AC AG AA
Sample Output
36
給M個串,問有多少個長度為N的的串不包含給出的M個串。
首先把M個串建立AC自動機,然後構造一個大小為ac.size*ac.size的矩陣,mat[i][j]代表節點i走到節點j走一步的合法路徑數,i和j應該是合法節點。這樣形成了一個一步矩陣,N步的話把這個矩陣N次方,答案是矩陣第一行的和。
#include<iostream> #include<queue> #include<cstring> #include<cstdio> #include<cmath> #include<set> #include<map> #include<vector> #include<stack> #include<algorithm> #define INF 0x3f3f3f3f #define eps 1e-9 #define MAXN 60010 #define MAXM 2000010 #define MAXNODE 105 #define MOD 100000 #define SIGMA_SIZE 4 typedef long long LL; using namespace std; int T,M,N; char str[15]; struct AC{ int ch[MAXNODE][SIGMA_SIZE],f[MAXNODE],val[MAXNODE],sz; void init(){ memset(ch[0],0,sizeof(ch[0])); val[0]=0; sz=1; } int idx(char c){ switch(c){ case 'A':return 0; case 'C':return 1; case 'T':return 2; case 'G':return 3; } } void insert(char *s,int v){ int u=0; for(int i=0;s[i];i++){ int c=idx(s[i]); if(!ch[u][c]){ memset(ch[sz],0,sizeof(ch[sz])); val[sz]=0; ch[u][c]=sz++; } u=ch[u][c]; } val[u]=1; } void get_fail(){ queue<int> q; f[0]=0; for(int c=0;c<SIGMA_SIZE;c++){ int u=ch[0][c]; if(u){ f[u]=0; q.push(u); } } while(!q.empty()){ int r=q.front(); q.pop(); for(int c=0;c<SIGMA_SIZE;c++){ int u=ch[r][c]; if(!u){ ch[r][c]=ch[f[r]][c]; continue; } q.push(u); f[u]=ch[f[r]][c]; val[u]|=val[f[u]]; } } } }ac; struct Mat{ LL mat[MAXNODE][MAXNODE]; void init(){ memset(mat,0,sizeof(mat)); } }ans; Mat operator * (Mat a,Mat b){ int i,j,k,sz=ac.sz; Mat ret; ret.init(); for(int k=0;k<sz;k++) for(int i=0;i<sz;i++){ if(!a.mat[i][k]) continue; for(int j=0;j<sz;j++) ret.mat[i][j]=(ret.mat[i][j]+a.mat[i][k]*b.mat[k][j])%MOD; } return ret; } Mat operator ^ (Mat a,int n){ Mat ret,t=a; int sz=ac.sz; for(int i=0;i<sz;i++) for(int j=0;j<sz;j++) ret.mat[i][j]=(i==j); while(n){ if(n&1) ret=ret*t; t=t*t; n>>=1; } return ret; } void get_mat(){ for(int u=0;u<ac.sz;u++) for(int c=0;c<SIGMA_SIZE;c++) if(!ac.val[u]&&!ac.val[ac.ch[u][c]]) ans.mat[u][ac.ch[u][c]]++; } int main(){ freopen("in.txt","r",stdin); while(scanf("%d%d",&M,&N)!=EOF){ ac.init(); for(int i=0;i<M;i++){ scanf("%s",str); ac.insert(str,1); } ac.get_fail(); ans.init(); get_mat(); ans=(ans^N); LL cnt=0; for(int i=0;i<ac.sz;i++) cnt=(cnt+ans.mat[0][i])%MOD; printf("%I64d\n",cnt); } return 0; }