1. 程式人生 > 其它 >字串匹配演算法:BMH演算法 C實現

字串匹配演算法:BMH演算法 C實現

技術標籤:CC演算法字串匹配BMH演算法

#include <stdio.h>
#include <stdlib.h>
#define MAXCHAR 256

/*
 * 把壞子串的下一跳找出來
 * */
void pre_process(char *patt, int m, int bm_bc[]){
	int k = 0;
	for ( k = 0; k < MAXCHAR; k++ ){
		bm_bc[k] = m;
		printf("bm_bc[%d]=%d \n", k, bm_bc[k]);
	}

	//最後一次字元的值不需要計算,保留預設值m,或者使用0 ~ (m-1)內計算到的值
	for ( k = 0; k < m-1; k++ ){
		bm_bc[patt[k]] = m - k -1;
		printf("bm_bc[patt[%d]] = %d char=%c\n", patt[k], bm_bc[patt[k]], patt[k]);
	}

	printf("bm_bc[patt[%d]]=%d \n", 'G', bm_bc['G']);
	printf("bm_bc[patt[%d]]=%d \n", 'T', bm_bc['T']);
	printf("bm_bc[patt[%d]]=%d \n", 'A', bm_bc['A']);
	printf("bm_bc[patt[%d]]=%d \n", 'C', bm_bc['C']);
}

int BMH(char *src, int n, char *des, int m){
	int bm_bc[MAXCHAR] = {0};
	int j = 0, k = 0, i = 0;

	if ( m > n)
		return -1;

	pre_process(des, m, bm_bc);
	//k 指src查詢遞增的過程;i指向src的當前比較位置;j指向匹配子串的位置
	k = m -1;
	while ( k < n ){
		j = m -1;
		i = k;
		while ( j >=0 && src[i] == des[j] ){
			j--;
			i--;
		}
		if ( j == -1)
			return i+1;
		k += bm_bc[src[k]];

	}
	return -1;
}

int main(){

	char char1[]="GCCTCATCCUACGTTAC";
	char char2[]="GTTAC";
	//需要向BMS傳遞的是兩個子串和子串的長度,因為上述初始化時是使用字串的初始化,所以sizeof會計算最後一個\0,所以在傳引數時要-1
	printf("char2 find in char1:%d \n", BMH(char1, sizeof(char1)-1, char2, sizeof(char2)-1 ));

}