1. 程式人生 > >CharsRefIntHashMap並不比HashMap<String, Integer>快

CharsRefIntHashMap並不比HashMap<String, Integer>快

erl turn ava implement dom ati new abs bytes

我模仿lucene的BytesRef寫了一個CharsRefIntHashMap,實測效果並不如HashMap<String, Integer>。代碼例如以下:

package com.dp.arts.lucenex.utils;


import org.apache.lucene.util.CharsRef;


public interface CharsRefIntMap

{

public static abstract class CharsRefIntEntryAccessor {

public

abstract void access(char[] arr, int offset, int length, int value);

}

public void incKey(CharsRef key);

public void incKey(CharsRef key, int add);

public void incKey(char[] arr, int offset, int length);

public void incKey(char[] arr, int offset, int

length, int add);

public int get(CharsRef key);

public int get(CharsRef key, int no_entry_value);

public int get(char[] arr, int offset, int length);

public int get(char[] arr, int offset, int length, int no_entry_value);

public int size();

public void forEach(CharsRefIntEntryAccessor accesor);

}


package com.dp.arts.lucenex.utils;


import java.util.Arrays;


import org.apache.lucene.util.CharsRef;


import com.dp.arts.lucenex.utils.CharsRefIntMap.CharsRefIntEntryAccessor;


public class CharsRefIntHashMap implements CharsRefIntMap
{
public static final int DEFAULT_CAPACITY = 16;

private char[][] arrs;
private int[] offsets;
private int[] lengths;
private int[] ords;
private int[] values;

private int hashSize;
private int halfHashSize;
private int hashMask;
private int count;

public CharsRefIntHashMap() {
this(DEFAULT_CAPACITY);
}

public CharsRefIntHashMap(int capacity) {
assert capacity > 0 && ( (capacity & (capacity - 1)) == 0);

arrs = new char[capacity][];
offsets = new int[capacity];
lengths = new int[capacity];
ords = new int[capacity];
values = new int[capacity];

Arrays.fill(ords, -1);
hashSize = capacity;
halfHashSize = (capacity >>> 1);
hashMask = capacity - 1;
}


@Override
public void incKey(CharsRef key) {
int code = charsHashCode(key.chars, key.offset, key.length);
incKey(key.chars, key.offset, key.length, code, 1);
}


@Override
public void incKey(CharsRef key, int add) {
int code = charsHashCode(key.chars, key.offset, key.length);
incKey(key.chars, key.offset, key.length, code, add);
}


@Override
public void incKey(char[] arr, int offset, int length) {
int code = charsHashCode(arr, offset, length);
incKey(arr, offset, length, code, 1);
}

@Override
public void incKey(char[] arr, int offset, int length, int add) {
int code = charsHashCode(arr, offset, length);
incKey(arr, offset, length, code, add);
}

private void incKey(char[] arr, int offset, int length, int code, int add) {
int pos = (code & hashMask);
int e = ords[pos];
while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = (code & hashMask);
e = ords[pos];
}
if (e == -1) {
// new entry.
arrs[count] = arr;
offsets[count] = offset;
lengths[count] = length;
values[count] = add;
ords[pos] = count;
++count;
if (count == halfHashSize) {
rehash((hashSize << 1));
}
} else {
values[e] += add;
}
}

private void rehash(int newSize) {
char[][] newArrs = new char[newSize][];
int[] newOffsets = new int[newSize];
int[] newLengths = new int[newSize];
int[] newValues = new int[newSize];
System.arraycopy(arrs, 0, newArrs, 0, halfHashSize);
System.arraycopy(offsets, 0, newOffsets, 0, halfHashSize);
System.arraycopy(lengths, 0, newLengths, 0, halfHashSize);
System.arraycopy(values, 0, newValues, 0, halfHashSize);


final int[] newOrds = new int[newSize];
Arrays.fill(newOrds, -1);
final int newHashMask = newSize - 1;
for (int i = 0; i < hashSize; ++i) {
int e0 = ords[i];
if (e0 != -1) {
char[] arr = newArrs[e0];
int offset = newOffsets[e0];
int length = newLengths[e0];
int code = charsHashCode(arr, offset, length);
int pos = code & newHashMask;
while (newOrds[pos] != -1) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = code & newHashMask;
}
newOrds[pos] = e0;
}
}

ords = newOrds;
arrs = newArrs;
offsets = newOffsets;
lengths = newLengths;
values = newValues;

hashSize = newSize;
halfHashSize = (newSize >> 1);
hashMask = newHashMask;
}

public int charsHashCode(char[] chars, int offset, int length) {
final int prime = 31;
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
result = prime * result + chars[i];
}
return result;
}

public boolean charsEquals(char[] lhsArr, int lhsOffset, int lhsLength, char[] rhsArr, int rhsOffset, int rhsLength) {
if (lhsLength == rhsLength) {
int otherUpto = rhsOffset;
final int end = lhsOffset + lhsLength;
for (int upto = lhsOffset; upto < end; upto++, otherUpto++) {
if (lhsArr[upto] != rhsArr[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}

@Override
public int get(CharsRef key) {
return get(key.chars, key.offset, key.length, 0);
}

@Override
public int get(CharsRef key, int no_entry_key) {
return get(key.chars, key.offset, key.length, no_entry_key);
}

@Override
public int get(char[] arr, int offset, int length) {
return get(arr, offset, length, 0);
}

@Override
public int get(char[] arr, int offset, int length, int no_entry_key) {
int code = charsHashCode(arr, offset, length);
int pos = (code & hashMask);
int e = ords[pos];
while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = (code & hashMask);
e = ords[pos];
}
return e == -1 ? no_entry_key : values[e];
}

@Override
public void forEach(CharsRefIntEntryAccessor accessor) {
for (int i = 0; i < hashSize; ++i) {
int pos = ords[i];
if (pos != -1) {
accessor.access(arrs[pos], offsets[pos], lengths[pos], values[pos]);
}
}
}


@Override
public int size() {
return count;
}

// for test only.
public int hashSize() {
return hashSize;
}
}

package com.dp.arts.lucenex.utils;


import java.util.HashMap;

import java.util.Random;


import org.apache.lucene.util.CharsRef;


public class CharsRefIntHashMapBenchmark

{

private static Random randGen = null;

private static char[] numbersAndLetters = null;


static {

randGen = new Random();

numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +

"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();

}


private static final String randomString(int length) {

if (length < 1) {

return null;

}

char [] randBuffer = new char[length];

for (int i=0; i<randBuffer.length; i++) {

randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];

}

return new String(randBuffer);

}

public static void main(String[] args) {

final int MAX = 100000;

String[] strs = new String[10000];

int[] values = new int[MAX];

for (int i = 0; i < 10000; ++i) {

strs[i] = randomString(randGen.nextInt(10) + 1);

}

for (int i = 0; i < MAX; ++i) {

values[i] = randGen.nextInt(10000);

}

char[][] arrs = new char[MAX][];

int offsets[] = new int[MAX];

int counts[] = new int[MAX];

for (int i = 0; i < MAX; ++i) {

String s = strs[values[i]];

arrs[i] = StringMisc.toCharArray(s);

offsets[i] = StringMisc.getOffset(s);

counts[i] = StringMisc.getCount(s);

}

long start = System.currentTimeMillis();

CharsRefIntHashMap map = new CharsRefIntHashMap();

for (int j = 0; j < 100; ++j) {

for (int i = 0; i < MAX; ++i) {

map.incKey(arrs[i], offsets[i], counts[i]);

}}

System.err.println("CharsRefIntHashMap time elapsed: " + (System.currentTimeMillis() - start) + "ms.");


start = System.currentTimeMillis();

HashMap<String, Integer> oldMap = new HashMap<String, Integer>();

for (int j = 0; j < 100; ++j) {

for (int i = 0; i < MAX; ++i) {

String s = strs[values[i]];

Integer v = oldMap.get(s);

if (v == null) {

v = new Integer(1);

oldMap.put(s, v);

} else {

v += 1;

}

}}

System.err.println("Origin string map time elapsed: " + (System.currentTimeMillis() - start) + "ms.");



}

}


因此這樣寫優點不過內存少用一些,性能應該更差,rehash時須要拷貝很多其它數據,對每一個數據的訪問都須要下標。實際情況下,CharsRef所須要的內存是24字節。假設使用trove的TObjectIntHashMap,插入速度相當,查詢速度是jdk hashmap的三倍。

CharsRefIntHashMap並不比HashMap&lt;String, Integer&gt;快