1. 程式人生 > >Welcome to Smile-Huang 's Blog.

Welcome to Smile-Huang 's Blog.

#include<iostream>
#include<string>
#include<fstream>
using namespace std;


//字元數目為n的詞項k-gram數目為n+1-k
//預定的閾值為0.1

#define threshold 0.1//閾值

void k_gram(string words,string kwords,int _k)
{
	string input="";//輸入的字串
	string search="";//要查詢的字串
	int k=0;//k-gram中的k值
	int input_k_gram=0;//輸入字串的k-gram數目
	int search_k_gram=0;//查詢關鍵字的k-gram數目
	int fitness=0;//匹配度
	float Jaccard=0.00;//Jaccard係數

	//初始化相應的值
	//cout<<"請輸入要查詢的字元:";
	//cin>>input;
	input=words;
	//cout<<"請輸入查詢關鍵字:";
	//cin>>search;
	search=kwords;
	//cout<<"請輸入k-gram中的k值:";
	//cin>>k;
	k=_k;
	
	//計算k-gram數目
	input_k_gram=input.size()+1-k;
	search_k_gram=search.size()+1-k;

	cout<<input_k_gram<<"    "<<search_k_gram<<endl;

	for(int i=0;i<search.size()-1;i++)
	{
		string temp=search.substr(i,k);
		string::size_type pos=input.find(temp);
		if(pos<input.size()) fitness++;
	}
	
	cout<<"fitness="<<fitness<<endl;
	
	//計算Jaccard係數
	Jaccard=(float)fitness/(input_k_gram+search_k_gram-fitness);
	cout<<Jaccard<<endl;
	
	if(Jaccard>threshold) cout<<input<<endl;

	cout<<endl;
}

void main()
{
	//初始化部分
	cout<<"請輸入查詢關鍵字:";
	string kwords;
	cin>>kwords;
	cout<<"請輸入k值:";
	int k;
	cin>>k;

	
	string str;
	ifstream out("F:\\大三下課程\\網路儲存\\課程實習\\words.txt",ios::in);
	while(!out.eof())
	{
		getline(out,str);
		//cout<<str<<endl;
		k_gram(str,kwords,k);
	}
	out.close();
}

其中,此處單詞集合我是用文字的形式儲存的,每一行儲存一個單詞