[原創]大資料:布隆過濾器C#版簡單實現。
阿新 • • 發佈:2018-11-29
public class BloomFilter { public BitArray _BloomArray; public Int64 BloomArryLength { get; } public Int64 DataArrayLeng { get; } public Int64 BitIndexCount { get; } /// <summary> /// 初始化 /// </summary> /// <param name="BloomArryLength">布隆陣列的大小</param> /// <param name="DataArrayLeng">資料的長度</param> /// <param name="bitIndexCount">hash數</param> public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount) { _BloomArray = new BitArray(BloomArryLength);this.BloomArryLength = BloomArryLength; this.DataArrayLeng = DataArrayLeng; this.BitIndexCount = bitIndexCount; } public void Add(string str) { var hashCode = GetHashCode(str); Random random = new Random(hashCode);for (int i = 0; i < BitIndexCount; i++) { var c = random.Next((int)(this.BloomArryLength - 1)); _BloomArray[c] = true; } } public bool isExist(string str) { var hashCode = GetHashCode(str); Random random = new Random(hashCode); for (int i = 0; i < BitIndexCount; i++) { if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))]) { return false; } } return true; } public int GetHashCode(object value) { return value.GetHashCode(); } public double getFalsePositiveProbability() { // (1 - e^(-k * n / m)) ^ k return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)), BitIndexCount); } }
static void Main(string[] args) { Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千萬條資料 for (int i = 0; i < bloom.DataArrayLeng; i++)//五千萬條資料 { bloom.Add(i.ToString()); } do { var c = Console.ReadLine(); if (c == "e") break; Stopwatch sw = new Stopwatch(); sw.Start(); var temp=bloom.isExist(c); sw.Stop(); Console.WriteLine($"查詢:{c}\n結果:{temp}\n總耗時:{sw.ElapsedTicks}\n錯誤概率:{bloom.getFalsePositiveProbability()}"); } while (true); }
結果:使用記憶體27MB,查詢結果一般在100毫秒以內。