C#通過編輯距離計算兩個字符串的相似度的代碼
阿新 • • 發佈:2019-01-22
int != else dha reg ndis namespace i++ 過程 將開發過程中較好的一些代碼段備份一下,下面的代碼是關於C#通過編輯距離計算兩個字符串的相似度的代碼,應該能對碼農們有些幫助。
using System; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace Levenshtein { public delegate void AnalyzerCompletedHander(double sim); public class LevenshteinDistance:IDisposable { private string str1; private string str2; private int[,] index; int k; Task<double> task; public event AnalyzerCompletedHander AnalyzerCompleted; public string Str1 { get { return str1; } set { str1 = Format(value); index = new int[str1.Length, str2.Length]; } } public string Str2 { get { return str2; } set { str2 = Format(value); index = new int[str1.Length, str2.Length]; } } public int TotalTimes { } public bool IsCompleted { get { return task.IsCompleted; } } public LevenshteinDistance(string str1, string str2) { this.str1 = Format(str1); this.str2 = Format(str2); index = new int[str1.Length, str2.Length]; } public LevenshteinDistance() { } public void Start() { task = new Task<double>(Analyzer); task.Start(); task.ContinueWith(o => Completed(o.Result)); } public double StartAyns() { task = new Task<double>(Analyzer); task.Start(); task.Wait(); return task.Result; } private void Completed(double s) { if (AnalyzerCompleted != null) { AnalyzerCompleted(s); } } private double Analyzer() { if (str1.Length == 0 || str2.Length == 0) return 0; for (int i = 0; i < str1.Length; i++) { for (int j = 0; j < str2.Length; j++) { k = str1[i] == str2[j] ? 0 : 1; if (i == 0&&j==0) { continue; } else if (i == 0) { index[i, j] = k + index[i, j - 1]; continue; } else if (j == 0) { index[i, j] = k + index[i - 1, j]; continue; } int temp = Min(index[i, j - 1], index[i - 1, j], index[i - 1, j - 1]); index[i, j] = temp + k; } } float similarty = 1 - (float)index[str1.Length - 1, str2.Length - 1] / (str1.Length > str2.Length ? str1.Length : str2.Length); return similarty; } private string Format(string str) { str = Regex.Replace(str, @"[^a-zA-Z0-9u4e00-u9fa5s]", ""); return str; } private int Min(int a, int b, int c) { int temp = a < b ? a : b; temp = temp < c ? temp : c; return temp; } public void Dispose() { task.Dispose(); } } }
C#通過編輯距離計算兩個字符串的相似度的代碼