1. 程式人生 > >javascript實現布隆過濾器(改進)

javascript實現布隆過濾器(改進)

雜湊函式

/**
 * MurmurHash
 * 
 * 參考 http://murmurhash.googlepages.com/ 
 * 
 * data:待雜湊的值
 * offset:
 * seed:種子集
 *
 */
function MurmurHash (data, offset, seed) {
    let len = data.length,
      m = 0x5bd1e995,
      r = 24,
      h = seed ^ len,
      len_4 = len >> 2;  

  for (let i = 0; i < len_4; i++) {  
      let i_4 = (i << 2
) + offset, k = data[i_4 + 3]; k = k << 8; k = k | (data[i_4 + 2] & 0xff); k = k << 8; k = k | (data[i_4 + 1] & 0xff); k = k << 8; k = k | (data[i_4 + 0] & 0xff); k *= m; k ^= k >>> r; k *= m; h *= m; h ^= k; } // avoid calculating modulo
let len_m = len_4 << 2, left = length - len_m, i_m = len_m + offset; if (left != 0) { if (left >= 3) { h ^= data[i_m + 2] << 16; } if (left >= 2) { h ^= data[i_m + 1] << 8; } if (left >= 1) { h ^= data[i_m]; } h *= m; } h ^= h >>> 13
; h *= m; h ^= h >>> 15; return h; }

BloomFilter

/**
 * BoolmFilter
 * maxKeys:最大數量
 * errorRate:錯誤率
 *
 */
function BoolmFilter (maxKeys, errorRate) {
  // 布隆過濾器點陣圖對映變數
  this.bitMap = [];
  // 布隆過濾器中最多可放的數量
  this.maxKeys = maxKeys;
  // 布隆過濾器錯誤率
  this.errorRate = errorRate;
  // 點陣圖變數的長度,需要根據maxKeys和errorRate來計算
  this.bitSize = Math.ceil(maxKeys * (-Math.log(errorRate) / (Math.log(2) * Math.log(2)) ));
  // 雜湊數量
  this.hashCount = Math.ceil(Math.log(2) * (this.bitSize / maxKeys));
  // 已加入元素數量
  this.keyCount = 0;

  // 初始化點陣圖陣列
  // for (let i = Math.ceil(this.bitSize / 31) - 1; i >=0; i--) {
  //   this.bitMap[i] = 0;
  // }
}

/**
 * 設定位
 *
 */
BoolmFilter.prototype.bitSet = function (bit) {
  // this.bitMap |= (1<<bit);
  // bitSize
  let numArr = Math.floor(bit / 31),
      numBit = Math.floor(bit % 31);
  this.bitMap[numArr] |= (1<<numBit);
  // this.bitMap[bit] = 1;
}

/**
 * 讀取位
 *
 */
BoolmFilter.prototype.bitGet = function (bit) {
  // return this.bitMap &= (1<<bit);
  let numArr = Math.floor(bit / 31),
      numBit = Math.floor(bit % 31);
  return this.bitMap[numArr] &= (1<<numBit);
  // return this.bitMap[bit];
}

/**
 * 加入布隆過濾器
 *
 */
BoolmFilter.prototype.add = function (key) {
  if (this.contain(key)) {
    return -1;
  }

  let hash1 = MurmurHash(key, 0, 0),
      hash2 = MurmurHash(key, 0, hash1);  

  for (let i = 0; i < this.hashCount; i++) {
    this.bitSet(Math.abs( Math.floor((hash1 + i * hash2) % (this.bitSize)) ));  
  }  

  this.keyCount++;
}

/**
 * 檢測是否已經存在
 *
 */
BoolmFilter.prototype.contain = function (key) {
  let hash1 = MurmurHash(key, 0, 0),
      hash2 = MurmurHash(key, 0, hash1);  

  for (let i = 0; i < this.hashCount; i++) {  
    if ( !this.bitGet(Math.abs( Math.floor((hash1 + i * hash2) % (this.bitSize)) )) ) {
      return false;
    }  
  }

  return true;
}

測試

let bloomFilter = new BoolmFilter(100000000, 0.01);

bloomFilter.add('https://blog.csdn.net/whodarewin2005/article/details/51579053');
bloomFilter.contain('https://blog.csdn.net/whodarewin2005/article/details/51579053');