LongHashMap学习笔记


LongHashMap学习笔记
一个HashMap代码,来自于jive。用三个数组实现,不同于jdk中的实现方式。处理哈希冲突是采用二次哈希(再哈希)的策略,学习了一把,个别地方可能没有理解透。
public final class LongHashMap {
    
    protected long table[];//存放键,类型为long,应该是用于特殊场所
    protected Object values[];//存放值
    protected byte state[];//state[i]=0,1,2表示table[i]与values[i]没有使用,已经使用,已删除
    protected int freeEntries;//空闲的空间数
    protected int distinct;//当前存了多少对键值
 
    protected int lowWaterMark;//当LongHashMap存放的键值对少于此数时,将重新调整(再哈希)
    protected int highWaterMark;//当LongHashMap存放的键值对大于此数时,将重新调整,由容量和装载因子决定
   
    protected double minLoadFactor;//最小装载因子
    protected double maxLoadFactor;//最大装载因子
   // 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中,
   //而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。
    protected static final int DEFAULT_CAPACITY = 277;//缺省的容量,一个素数
    protected static final double DEFAULT_MIN_LOAD_FACTOR = 0.2;//缺省的最小的装载因子
    protected static final double DEFAULT_MAX_LOAD_FACTOR = 0.6;//缺省的最大的装载因子
    protected static final byte FREE = 0;
    protected static final byte FULL = 1;
    protected static final byte REMOVED = 2;
   //用缺省的容量构建HashMap
    public LongHashMap() {
        this(DEFAULT_CAPACITY);
    }
   //构造函数
    public LongHashMap(int initialCapacity) {
        this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR);
    }
   //构造函数
    public LongHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
        setUp(initialCapacity,minLoadFactor,maxLoadFactor);
    }
    //使用指定的初始化容量,最小装载因子,最大装载因子构建LongHashMap
    protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
        if (initialCapacity < 0) {//参数检查
            throw new IllegalArgumentException(
                "Initial Capacity must not be less than zero: "+ initialCapacity
            );
        }
        if (minLoadFactor < 0.0 || minLoadFactor >= 1.0) {
                throw new IllegalArgumentException(
                    "Illegal minLoadFactor: "+ minLoadFactor
                );
        }
        if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) {
                throw new IllegalArgumentException(
                    "Illegal maxLoadFactor: "+ maxLoadFactor
                );
        }
        if (minLoadFactor >= maxLoadFactor) {
                throw new IllegalArgumentException(
                    "Illegal minLoadFactor: " + minLoadFactor +
                    " and maxLoadFactor: " + maxLoadFactor
                );
        }
        int capacity = initialCapacity;
        capacity = nextPrime(capacity);//程序将调整初始化容量,使之为素数
      
        if (capacity==0) {
            capacity=1;
        }
        this.table = new long[capacity];//关键字数组
        this.values = new Object[capacity];//值数组
        this.state = new byte[capacity];//状态数组
       
        this.minLoadFactor = minLoadFactor;
        
        if (capacity == LARGEST_PRIME) this.maxLoadFactor = 1.0;
        else this.maxLoadFactor = maxLoadFactor;
        this.distinct = 0;//开始时,LongHashMap中没有存入键值对
        this.freeEntries = capacity; // 开始时空闲的空间数=容量
       
        this.lowWaterMark = 0;
        // Math.min(capacity-2, (int) (capacity * maxLoadFactor));
        this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
    }
     
     //扩容量,一个素数
     private int chooseGrowCapacity(int size, double minLoad, double maxLoad) {
        return nextPrime(Math.max(size+1, (int) ((4*size / (3*minLoad+maxLoad)))));
    }
     public boolean put(long key, Object value) {//在LongHashMap中存放键值对
        int i = indexOfInsertion(key);
        if (i< 0) { 
            i = -i -1;
            this.values[i]=value;
            return false;
        }
         if (this.distinct > this.highWaterMark) {//当存的健值对超过highWaterMark时,将重新构建LongHashMap
            int newCapacity = chooseGrowCapacity(
                    this.distinct+1,
                    this.minLoadFactor,
                    this.maxLoadFactor
                );
            rehash(newCapacity);//用新容量重新构造
            return put(key, value);
        }
        this.table[i]=key;
        this.values[i]=value;
        if (this.state[i]==FREE) this.freeEntries--;//剩余空间少了一个
        this.state[i]=FULL;
        this.distinct++;//当前存放的键值对数目加1
        if (this.freeEntries < 1) {
            int newCapacity = chooseGrowCapacity(
                    this.distinct+1,
                    this.minLoadFactor,
                    this.maxLoadFactor
                );
            rehash(newCapacity);//用新容量重新构造
        }
        return true;
    }
  
         //求关键字key的索引值,用于插入(添加)
    private final int indexOfInsertion(long key) {
        final long tab[] = table;
        final byte stat[] = state;
        final int length = tab.length;
        //这就是哈希函数了
        final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF;
        int i = hash % length;
         //发生哈希冲突时,用于再哈希探测的步长
        int decrement = (hash) % (length-2);
      
        if (decrement == 0) decrement = 1;
        //stat[i]有三种情况
        while (stat[i] == FULL && tab[i] != key) {//第一种,发生哈希冲突,往前探测
            i -= decrement;
            if (i< 0) i+=length;
        }
        if (stat[i] == REMOVED) {//第二种,此位置原来的键值对已删除
           
            int j = i;
            //有意思,已删除的位置并不用来存新的
            while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
                i -= decrement;
                if (i< 0) i+=length;
            }
            if (stat[i] == FREE) i = j;
        }
        if (stat[i] == FULL) {//第三种,这种情况会出现吗?
          
            return -i-1;
        }
        //第三种,stat[i]=FREE
       
        return i;
    }
     //删除key的value
    public boolean removeKey(long key) {
        int i = indexOfKey(key);//获取关键字的索引
        if (i< 0) return false; 
        this.state[i]=REMOVED;//作删除标记
        this.values[i]=null; //注意:table[i]并没有置为null
        this.distinct--;
        if (this.distinct < this.lowWaterMark) {//存放的键值对少于lowWaterMark,重新调整
                int newCapacity = chooseShrinkCapacity(
                        this.distinct,
                        this.minLoadFactor,
                        this.maxLoadFactor
                    );
         rehash(newCapacity);//用新容量重新构造
        }
        return true;
    }
      public final Object get(long key) {//获取关键字对应的值
        int i = indexOfKey(key);
        if (i< 0) {
            return null;
        }
        else {
            return values[i];
        }
    }
    private final int indexOfKey(long key) {//求关键字之索引值,用于查找
        final long tab[] = table;
        final byte stat[] = state;
        final int length = tab.length;
        //这个是哈希函数
        final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF;
        int i = hash % length;//得到了关键字的索引值
       
        //用于再哈希探测的步长
        int decrement = (hash) % (length-2);//减量
  
        if (decrement == 0) decrement = 1;
        while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
            i -= decrement;//往前找
            if (i< 0) i+=length;
        }
        if (stat[i] == FREE) return -1; // 没有找到
        return i; //找到了,返回索引值
    }
  
      public void clear() {//清空
        for (int i=0; i< state.length; i++) {
            state[i] = FREE;
        }
        for (int i=0; i< values.length-1; i++) {
            values[i] = null;
        }
        this.distinct = 0;
        this.freeEntries = table.length; 
        trimToSize();//清空以后,容量不能太大,以节约空间
    }
     public void trimToSize() {
       int newCapacity = nextPrime((int)(1 + 1.2*size()));
        if (table.length > newCapacity) {
            rehash(newCapacity);
        }
    }
    //是否包含key
    public boolean containsKey(long key) {
        return indexOfKey(key) >= 0;
    }
    //是否包含value
    public boolean containsValue(Object value) {
        return indexOfValue(value) >= 0;
    }
  
    public void ensureCapacity(int minCapacity) {//确保容量不小于minCapacity
        if (table.length < minCapacity) {
            int newCapacity = nextPrime(minCapacity);
            rehash(newCapacity);//再哈希
        }
    }
    protected int indexOfValue(Object value) {//获取值的索引
        final Object val[] = values;
        final byte stat[] = state;
        for (int i=stat.length; --i >= 0;) {
            if (stat[i]==FULL && val[i]==value) return i;
        }
        return -1; // not found
    }
   //获取value的第一个键值,可能的多个
    public long keyOf(Object value) {
        int i = indexOfValue(value);
        if (i< 0) return Long.MIN_VALUE;
        return table[i];
    }
 
    public long[] keys() {//所有的键
        long[] elements = new long[distinct];
        long[] tab = table;
        byte[] stat = state;
        int j=0;
        for (int i = tab.length ; i-- > 0 ;) {
            if (stat[i]==FULL) {
                elements[j++]=tab[i];
            }
        }
        return elements;
    }
  
     public int size() {//当前存了多少对键值
        return distinct;
    }
    
    public boolean isEmpty() {
        return distinct == 0;
    }
     // 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中,
   //而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。
    protected void rehash(int newCapacity) {//用新的容量重新构建LongHashMap
        int oldCapacity = table.length;//原来的容量
        long oldTable[] = table;//原来的键
        Object oldValues[] = values;//原来的值
        byte oldState[] = state;
        long newTable[] = new long[newCapacity];
        Object newValues[] = new Object[newCapacity];
        byte newState[] = new byte[newCapacity];
        
         //(int) (newCapacity * minLoadFactor);
        this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
        this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
        this.table = newTable;
        this.values = newValues;
        this.state = newState;
        this.freeEntries = newCapacity-this.distinct; // 当前的剩余空间
        for (int i = oldCapacity ; i-- > 0 ;) {
            if (oldState[i]==FULL) {
                long element = oldTable[i];
                int index = indexOfInsertion(element);
                newTable[index]=element;
                newValues[index]=oldValues[i];
                newState[index]=FULL;
            }
        }
    }
   
  
    public Object[] values() {
        Object[] elements = new Object[distinct];
        Object[] val = values;
        byte[] stat = state;
        int j=0;
        for (int i = stat.length ; i-- > 0 ;) {
            if (stat[i]==FULL) {
                elements[j++]=val[i];
            }
        }
        return elements;
    }
   
    
    private int chooseHighWaterMark(int capacity, double maxLoad) {
          return Math.min(capacity-2, (int) (capacity * maxLoad));
    }
   
    protected int chooseLowWaterMark(int capacity, double minLoad) {
        return (int) (capacity * minLoad);
    }
    
    protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) {
        return nextPrime(Math.max(size+1, (int) ((2*size / (minLoad+maxLoad)))));
    }
 
    protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) {
        return nextPrime(Math.max(size+1, (int) ((4*size / (minLoad+3*maxLoad)))));
    }
  
    protected int nextPrime(int desiredCapacity) {//对指定的容量,在素数表中进行对半查找,返回一个素数容量
        int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
        if(desiredCapacity==100) System.out.println("i="+i);
        if (i< 0) {
             i = -i -1; 
        }
        return primeCapacities[i];
    }
   private void printState(){
      for(int i=0;i< state.length;i++)
        System.out.print(state[i]+"  ");
      System.out.println();
   }
    
    public static final int LARGEST_PRIME = Integer.MAX_VALUE; //最大的素数.
   
    private static final int[] primeCapacities = {//容量素数表
        LARGEST_PRIME,
        //chunk #1
        5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
        411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
        210719881,421439783,842879579,1685759167,
        //chunk #2
        433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
        3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
        1854585413,
        //chunk #3
        953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
        7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
        2004663929,
        //chunk #4
        1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
        8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
        //chunk #5
        31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
        1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
        587742049,1175484103,
        //chunk #6
        599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
        4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
        //chunk #7
        311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
        2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
        1344393353,
        //chunk #8
        3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
        701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
        359339171,718678369,1437356741,
        //chunk #9
        43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
        1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
        759155483,1518310967,
        //chunk #10
        379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
        3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
        1600153859
    };
    static { 
        java.util.Arrays.sort(primeCapacities);
    }
     public static void main(String args[]){
      LongHashMap lh=new LongHashMap(5);//初始容量为5
      System.out.println("size="+lh.size());
      for(int i=0;i< 3;i++)//先放三个
        lh.put(i, Integer.valueOf(i));
      System.out.println("size="+lh.size());
      lh.removeKey(1);//删除二个
      lh.removeKey(2);
      lh.put(123,"ok");//添加一个
      //看看状态
      lh.printState();
      lh.put(1234,"oo");//再放一个
      //看看状态 
      lh.printState();
      //取出来
      System.out.println(lh.get(0));
      System.out.println(lh.get(123));
      System.out.println(lh.get(1234));
      
    }
}
Read full article from LongHashMap学习笔记

Labels

LeetCode (1432) GeeksforGeeks (1122) LeetCode - Review (1067) Review (882) Algorithm (668) to-do (609) Classic Algorithm (270) Google Interview (237) Classic Interview (222) Dynamic Programming (220) DP (186) Bit Algorithms (145) POJ (141) Math (137) Tree (132) LeetCode - Phone (129) EPI (122) Cracking Coding Interview (119) DFS (115) Difficult Algorithm (115) Lintcode (115) Different Solutions (110) Smart Algorithm (104) Binary Search (96) BFS (91) HackerRank (90) Binary Tree (86) Hard (79) Two Pointers (78) Stack (76) Company-Facebook (75) BST (72) Graph Algorithm (72) Time Complexity (69) Greedy Algorithm (68) Interval (63) Company - Google (62) Geometry Algorithm (61) Interview Corner (61) LeetCode - Extended (61) Union-Find (60) Trie (58) Advanced Data Structure (56) List (56) Priority Queue (53) Codility (52) ComProGuide (50) LeetCode Hard (50) Matrix (50) Bisection (48) Segment Tree (48) Sliding Window (48) USACO (46) Space Optimization (45) Company-Airbnb (41) Greedy (41) Mathematical Algorithm (41) Tree - Post-Order (41) ACM-ICPC (40) Algorithm Interview (40) Data Structure Design (40) Graph (40) Backtracking (39) Data Structure (39) Jobdu (39) Random (39) Codeforces (38) Knapsack (38) LeetCode - DP (38) Recursive Algorithm (38) String Algorithm (38) TopCoder (38) Sort (37) Introduction to Algorithms (36) Pre-Sort (36) Beauty of Programming (35) Must Known (34) Binary Search Tree (33) Follow Up (33) prismoskills (33) Palindrome (32) Permutation (31) Array (30) Google Code Jam (30) HDU (30) Array O(N) (29) Logic Thinking (29) Monotonic Stack (29) Puzzles (29) Code - Detail (27) Company-Zenefits (27) Microsoft 100 - July (27) Queue (27) Binary Indexed Trees (26) TreeMap (26) to-do-must (26) 1point3acres (25) GeeksQuiz (25) Merge Sort (25) Reverse Thinking (25) hihocoder (25) Company - LinkedIn (24) Hash (24) High Frequency (24) Summary (24) Divide and Conquer (23) Proof (23) Game Theory (22) Topological Sort (22) Lintcode - Review (21) Tree - Modification (21) Algorithm Game (20) CareerCup (20) Company - Twitter (20) DFS + Review (20) DP - Relation (20) Brain Teaser (19) DP - Tree (19) Left and Right Array (19) O(N) (19) Sweep Line (19) UVA (19) DP - Bit Masking (18) LeetCode - Thinking (18) KMP (17) LeetCode - TODO (17) Probabilities (17) Simulation (17) String Search (17) Codercareer (16) Company-Uber (16) Iterator (16) Number (16) O(1) Space (16) Shortest Path (16) itint5 (16) DFS+Cache (15) Dijkstra (15) Euclidean GCD (15) Heap (15) LeetCode - Hard (15) Majority (15) Number Theory (15) Rolling Hash (15) Tree Traversal (15) Brute Force (14) Bucket Sort (14) DP - Knapsack (14) DP - Probability (14) Difficult (14) Fast Power Algorithm (14) Pattern (14) Prefix Sum (14) TreeSet (14) Algorithm Videos (13) Amazon Interview (13) Basic Algorithm (13) Codechef (13) Combination (13) Computational Geometry (13) DP - Digit (13) LCA (13) LeetCode - DFS (13) Linked List (13) Long Increasing Sequence(LIS) (13) Math-Divisible (13) Reservoir Sampling (13) mitbbs (13) Algorithm - How To (12) Company - Microsoft (12) DP - Interval (12) DP - Multiple Relation (12) DP - Relation Optimization (12) LeetCode - Classic (12) Level Order Traversal (12) Prime (12) Pruning (12) Reconstruct Tree (12) Thinking (12) X Sum (12) AOJ (11) Bit Mask (11) Company-Snapchat (11) DP - Space Optimization (11) Dequeue (11) Graph DFS (11) MinMax (11) Miscs (11) Princeton (11) Quick Sort (11) Stack - Tree (11) 尺取法 (11) 挑战程序设计竞赛 (11) Coin Change (10) DFS+Backtracking (10) Facebook Hacker Cup (10) Fast Slow Pointers (10) HackerRank Easy (10) Interval Tree (10) Limited Range (10) Matrix - Traverse (10) Monotone Queue (10) SPOJ (10) Starting Point (10) States (10) Stock (10) Theory (10) Tutorialhorizon (10) Kadane - Extended (9) Mathblog (9) Max-Min Flow (9) Maze (9) Median (9) O(32N) (9) Quick Select (9) Stack Overflow (9) System Design (9) Tree - Conversion (9) Use XOR (9) Book Notes (8) Company-Amazon (8) DFS+BFS (8) DP - States (8) Expression (8) Longest Common Subsequence(LCS) (8) One Pass (8) Quadtrees (8) Traversal Once (8) Trie - Suffix (8) 穷竭搜索 (8) Algorithm Problem List (7) All Sub (7) Catalan Number (7) Cycle (7) DP - Cases (7) Facebook Interview (7) Fibonacci Numbers (7) Flood fill (7) Game Nim (7) Graph BFS (7) HackerRank Difficult (7) Hackerearth (7) Inversion (7) Kadane’s Algorithm (7) Manacher (7) Morris Traversal (7) Multiple Data Structures (7) Normalized Key (7) O(XN) (7) Radix Sort (7) Recursion (7) Sampling (7) Suffix Array (7) Tech-Queries (7) Tree - Serialization (7) Tree DP (7) Trie - Bit (7) 蓝桥杯 (7) Algorithm - Brain Teaser (6) BFS - Priority Queue (6) BFS - Unusual (6) Classic Data Structure Impl (6) DP - 2D (6) DP - Monotone Queue (6) DP - Unusual (6) DP-Space Optimization (6) Dutch Flag (6) How To (6) Interviewstreet (6) Knapsack - MultiplePack (6) Local MinMax (6) MST (6) Minimum Spanning Tree (6) Number - Reach (6) Parentheses (6) Pre-Sum (6) Probability (6) Programming Pearls (6) Rabin-Karp (6) Reverse (6) Scan from right (6) Schedule (6) Stream (6) Subset Sum (6) TSP (6) Xpost (6) n00tc0d3r (6) reddit (6) AI (5) Abbreviation (5) Anagram (5) Art Of Programming-July (5) Assumption (5) Bellman Ford (5) Big Data (5) Code - Solid (5) Code Kata (5) Codility-lessons (5) Coding (5) Company - WMware (5) Convex Hull (5) Crazyforcode (5) DFS - Multiple (5) DFS+DP (5) DP - Multi-Dimension (5) DP-Multiple Relation (5) Eulerian Cycle (5) Graph - Unusual (5) Graph Cycle (5) Hash Strategy (5) Immutability (5) Java (5) LogN (5) Manhattan Distance (5) Matrix Chain Multiplication (5) N Queens (5) Pre-Sort: Index (5) Quick Partition (5) Quora (5) Randomized Algorithms (5) Resources (5) Robot (5) SPFA(Shortest Path Faster Algorithm) (5) Shuffle (5) Sieve of Eratosthenes (5) Strongly Connected Components (5) Subarray Sum (5) Sudoku (5) Suffix Tree (5) Swap (5) Threaded (5) Tree - Creation (5) Warshall Floyd (5) Word Search (5) jiuzhang (5)

Popular Posts