LintCode Counting Bloom Filter和Standard Bloom Filter
刷題時遇到的,網上查了下資料,整理了下這兩道題,直接上程式碼
public class StandardBloomFilter {
private static final int BIT_SIZE = 2 << 28;//二進位制向量的位數,相當於能儲存1000萬條url左右,誤報率為千萬分之一
private BitSet bits = new BitSet(BIT_SIZE);
private Hash[] func;//用於儲存隨機雜湊值物件
/*
* @param k: An integer
*/public StandardBloomFilter(int k) {
// do intialization if necessary
func = new Hash[k];
for(int i=0;i<k;i++){
func[i] = new Hash(BIT_SIZE,i+1);
}
}
/*
* @param word: A string
* @return: nothing
*/
public void add(String word) {
// write your code here
if(word!=null){
for(Hash f : func){
bits.set(f.hash(word),true);
}
}
}
/*
* @param word: A string
* @return: True if contains word
*/
public boolean contains(String word) {
if(word==null){
return false;
}
boolean ret = true;
for(Hash f : func){
ret=ret&&bits.get(f.hash(word));
}
return ret;
}
public static class Hash{
private int size;//二進位制向量陣列大小
private int seed;//隨機數種子
public Hash(int cap, int seed){
this.size = cap;
this.seed = seed;
}
/**
* 計算雜湊值(也可以選用別的恰當的雜湊函式)
*/
public int hash(String value){
int result = 0;
int len = value.length();
for(int i = 0; i < len; i++){
result = seed * result + value.charAt(i);
}
return (size - 1) & result;
}
}
}
------------------------------------------------------------------------------------------------------------------------------------------------
public class CountingBloomFilter {
private static final int BIT_SIZE = 100000;
private int[] bits;
private Hash[] func;
/*
* @param k: An integer
*/public CountingBloomFilter(int k) {
// do intialization if necessary
func = new Hash[k];
for(int i=0;i<k;i++){
func[i] = new Hash(BIT_SIZE, 2*i+3);
}
bits = new int[BIT_SIZE];
}
/*
* @param word: A string
* @return: nothing
*/
public void add(String word) {
// write your code here
if(word==null){
return;
}
for(Hash f : func){
bits[f.hash(word)]++;
}
}
/*
* @param word: A string
* @return: nothing
*/
public void remove(String word) {
// write your code here
for(Hash f : func){
int pos = f.hash(word);
if(bits[pos]>0){
bits[pos]--;
}
}
}
/*
* @param word: A string
* @return: True if contains word
*/
public boolean contains(String word) {
for(Hash f : func){
if(bits[f.hash(word)]==0){
return false;
}
}
return true;
}
public static class Hash{
private int size;//二進位制向量陣列大小
private int seed;//隨機數種子
public Hash(int cap, int seed){
this.size = cap;
this.seed = seed;
}
/**
* 計算雜湊值(也可以選用別的恰當的雜湊函式)
*/
public int hash(String value){
int result = 0;
int len = value.length();
for(int i = 0; i < len; i++){
result = seed * result + value.charAt(i);
result%=size;
}
return result;
}
}
}
沒什麼難點,就不多講了,主要是hash函式的實現和隨機種子數的選擇