LongHashMap学习笔记
一个HashMap代码,来自于jive。用三个数组实现,不同于jdk中的实现方式。处理哈希冲突是采用二次哈希(再哈希)的策略,学习了一把,个别地方可能没有理解透。
一个HashMap代码,来自于jive。用三个数组实现,不同于jdk中的实现方式。处理哈希冲突是采用二次哈希(再哈希)的策略,学习了一把,个别地方可能没有理解透。
public final class LongHashMap { protected long table[];//存放键,类型为long,应该是用于特殊场所 protected Object values[];//存放值 protected byte state[];//state[i]=0,1,2表示table[i]与values[i]没有使用,已经使用,已删除 protected int freeEntries;//空闲的空间数 protected int distinct;//当前存了多少对键值 protected int lowWaterMark;//当LongHashMap存放的键值对少于此数时,将重新调整(再哈希) protected int highWaterMark;//当LongHashMap存放的键值对大于此数时,将重新调整,由容量和装载因子决定 protected double minLoadFactor;//最小装载因子 protected double maxLoadFactor;//最大装载因子 // 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中, //而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。 protected static final int DEFAULT_CAPACITY = 277;//缺省的容量,一个素数 protected static final double DEFAULT_MIN_LOAD_FACTOR = 0.2;//缺省的最小的装载因子 protected static final double DEFAULT_MAX_LOAD_FACTOR = 0.6;//缺省的最大的装载因子 protected static final byte FREE = 0; protected static final byte FULL = 1; protected static final byte REMOVED = 2; //用缺省的容量构建HashMap public LongHashMap() { this(DEFAULT_CAPACITY); } //构造函数 public LongHashMap(int initialCapacity) { this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR); } //构造函数 public LongHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) { setUp(initialCapacity,minLoadFactor,maxLoadFactor); } //使用指定的初始化容量,最小装载因子,最大装载因子构建LongHashMap protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) { if (initialCapacity < 0) {//参数检查 throw new IllegalArgumentException( "Initial Capacity must not be less than zero: "+ initialCapacity ); } if (minLoadFactor < 0.0 || minLoadFactor >= 1.0) { throw new IllegalArgumentException( "Illegal minLoadFactor: "+ minLoadFactor ); } if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) { throw new IllegalArgumentException( "Illegal maxLoadFactor: "+ maxLoadFactor ); } if (minLoadFactor >= maxLoadFactor) { throw new IllegalArgumentException( "Illegal minLoadFactor: " + minLoadFactor + " and maxLoadFactor: " + maxLoadFactor ); } int capacity = initialCapacity; capacity = nextPrime(capacity);//程序将调整初始化容量,使之为素数 if (capacity==0) { capacity=1; } this.table = new long[capacity];//关键字数组 this.values = new Object[capacity];//值数组 this.state = new byte[capacity];//状态数组 this.minLoadFactor = minLoadFactor; if (capacity == LARGEST_PRIME) this.maxLoadFactor = 1.0; else this.maxLoadFactor = maxLoadFactor; this.distinct = 0;//开始时,LongHashMap中没有存入键值对 this.freeEntries = capacity; // 开始时空闲的空间数=容量 this.lowWaterMark = 0; // Math.min(capacity-2, (int) (capacity * maxLoadFactor)); this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor); } //扩容量,一个素数 private int chooseGrowCapacity(int size, double minLoad, double maxLoad) { return nextPrime(Math.max(size+1, (int) ((4*size / (3*minLoad+maxLoad))))); } public boolean put(long key, Object value) {//在LongHashMap中存放键值对 int i = indexOfInsertion(key); if (i< 0) { i = -i -1; this.values[i]=value; return false; } if (this.distinct > this.highWaterMark) {//当存的健值对超过highWaterMark时,将重新构建LongHashMap int newCapacity = chooseGrowCapacity( this.distinct+1, this.minLoadFactor, this.maxLoadFactor ); rehash(newCapacity);//用新容量重新构造 return put(key, value); } this.table[i]=key; this.values[i]=value; if (this.state[i]==FREE) this.freeEntries--;//剩余空间少了一个 this.state[i]=FULL; this.distinct++;//当前存放的键值对数目加1 if (this.freeEntries < 1) { int newCapacity = chooseGrowCapacity( this.distinct+1, this.minLoadFactor, this.maxLoadFactor ); rehash(newCapacity);//用新容量重新构造 } return true; } //求关键字key的索引值,用于插入(添加) private final int indexOfInsertion(long key) { final long tab[] = table; final byte stat[] = state; final int length = tab.length; //这就是哈希函数了 final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF; int i = hash % length; //发生哈希冲突时,用于再哈希探测的步长 int decrement = (hash) % (length-2); if (decrement == 0) decrement = 1; //stat[i]有三种情况 while (stat[i] == FULL && tab[i] != key) {//第一种,发生哈希冲突,往前探测 i -= decrement; if (i< 0) i+=length; } if (stat[i] == REMOVED) {//第二种,此位置原来的键值对已删除 int j = i; //有意思,已删除的位置并不用来存新的 while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) { i -= decrement; if (i< 0) i+=length; } if (stat[i] == FREE) i = j; } if (stat[i] == FULL) {//第三种,这种情况会出现吗? return -i-1; } //第三种,stat[i]=FREE return i; } //删除key的value public boolean removeKey(long key) { int i = indexOfKey(key);//获取关键字的索引 if (i< 0) return false; this.state[i]=REMOVED;//作删除标记 this.values[i]=null; //注意:table[i]并没有置为null this.distinct--; if (this.distinct < this.lowWaterMark) {//存放的键值对少于lowWaterMark,重新调整 int newCapacity = chooseShrinkCapacity( this.distinct, this.minLoadFactor, this.maxLoadFactor ); rehash(newCapacity);//用新容量重新构造 } return true; } public final Object get(long key) {//获取关键字对应的值 int i = indexOfKey(key); if (i< 0) { return null; } else { return values[i]; } } private final int indexOfKey(long key) {//求关键字之索引值,用于查找 final long tab[] = table; final byte stat[] = state; final int length = tab.length; //这个是哈希函数 final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF; int i = hash % length;//得到了关键字的索引值 //用于再哈希探测的步长 int decrement = (hash) % (length-2);//减量 if (decrement == 0) decrement = 1; while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) { i -= decrement;//往前找 if (i< 0) i+=length; } if (stat[i] == FREE) return -1; // 没有找到 return i; //找到了,返回索引值 } public void clear() {//清空 for (int i=0; i< state.length; i++) { state[i] = FREE; } for (int i=0; i< values.length-1; i++) { values[i] = null; } this.distinct = 0; this.freeEntries = table.length; trimToSize();//清空以后,容量不能太大,以节约空间 } public void trimToSize() { int newCapacity = nextPrime((int)(1 + 1.2*size())); if (table.length > newCapacity) { rehash(newCapacity); } } //是否包含key public boolean containsKey(long key) { return indexOfKey(key) >= 0; } //是否包含value public boolean containsValue(Object value) { return indexOfValue(value) >= 0; } public void ensureCapacity(int minCapacity) {//确保容量不小于minCapacity if (table.length < minCapacity) { int newCapacity = nextPrime(minCapacity); rehash(newCapacity);//再哈希 } } protected int indexOfValue(Object value) {//获取值的索引 final Object val[] = values; final byte stat[] = state; for (int i=stat.length; --i >= 0;) { if (stat[i]==FULL && val[i]==value) return i; } return -1; // not found } //获取value的第一个键值,可能的多个 public long keyOf(Object value) { int i = indexOfValue(value); if (i< 0) return Long.MIN_VALUE; return table[i]; } public long[] keys() {//所有的键 long[] elements = new long[distinct]; long[] tab = table; byte[] stat = state; int j=0; for (int i = tab.length ; i-- > 0 ;) { if (stat[i]==FULL) { elements[j++]=tab[i]; } } return elements; } public int size() {//当前存了多少对键值 return distinct; } public boolean isEmpty() { return distinct == 0; } // 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中, //而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。 protected void rehash(int newCapacity) {//用新的容量重新构建LongHashMap int oldCapacity = table.length;//原来的容量 long oldTable[] = table;//原来的键 Object oldValues[] = values;//原来的值 byte oldState[] = state; long newTable[] = new long[newCapacity]; Object newValues[] = new Object[newCapacity]; byte newState[] = new byte[newCapacity]; //(int) (newCapacity * minLoadFactor); this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor); this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor); this.table = newTable; this.values = newValues; this.state = newState; this.freeEntries = newCapacity-this.distinct; // 当前的剩余空间 for (int i = oldCapacity ; i-- > 0 ;) { if (oldState[i]==FULL) { long element = oldTable[i]; int index = indexOfInsertion(element); newTable[index]=element; newValues[index]=oldValues[i]; newState[index]=FULL; } } } public Object[] values() { Object[] elements = new Object[distinct]; Object[] val = values; byte[] stat = state; int j=0; for (int i = stat.length ; i-- > 0 ;) { if (stat[i]==FULL) { elements[j++]=val[i]; } } return elements; } private int chooseHighWaterMark(int capacity, double maxLoad) { return Math.min(capacity-2, (int) (capacity * maxLoad)); } protected int chooseLowWaterMark(int capacity, double minLoad) { return (int) (capacity * minLoad); } protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) { return nextPrime(Math.max(size+1, (int) ((2*size / (minLoad+maxLoad))))); } protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) { return nextPrime(Math.max(size+1, (int) ((4*size / (minLoad+3*maxLoad))))); } protected int nextPrime(int desiredCapacity) {//对指定的容量,在素数表中进行对半查找,返回一个素数容量 int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity); if(desiredCapacity==100) System.out.println("i="+i); if (i< 0) { i = -i -1; } return primeCapacities[i]; } private void printState(){ for(int i=0;i< state.length;i++) System.out.print(state[i]+" "); System.out.println(); } public static final int LARGEST_PRIME = Integer.MAX_VALUE; //最大的素数. private static final int[] primeCapacities = {//容量素数表 LARGEST_PRIME, //chunk #1 5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759, 411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939, 210719881,421439783,842879579,1685759167, //chunk #2 433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107, 3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699, 1854585413, //chunk #3 953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341, 7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963, 2004663929, //chunk #4 1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963, 8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463, //chunk #5 31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953, 1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013, 587742049,1175484103, //chunk #6 599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729, 4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683, //chunk #7 311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867, 2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673, 1344393353, //chunk #8 3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899, 701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557, 359339171,718678369,1437356741, //chunk #9 43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337, 1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741, 759155483,1518310967, //chunk #10 379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611, 3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929, 1600153859 }; static { java.util.Arrays.sort(primeCapacities); } public static void main(String args[]){ LongHashMap lh=new LongHashMap(5);//初始容量为5 System.out.println("size="+lh.size()); for(int i=0;i< 3;i++)//先放三个 lh.put(i, Integer.valueOf(i)); System.out.println("size="+lh.size()); lh.removeKey(1);//删除二个 lh.removeKey(2); lh.put(123,"ok");//添加一个 //看看状态 lh.printState(); lh.put(1234,"oo");//再放一个 //看看状态 lh.printState(); //取出来 System.out.println(lh.get(0)); System.out.println(lh.get(123)); System.out.println(lh.get(1234)); } }Read full article from LongHashMap学习笔记