Title: Hashing
1Hashing
2??????????????????????????????? hash table
- Insert
- Delete
- find (constant time)
- sort ??????
- Findmin findmax ??????
- ????????????????????? binary search tree
3Hash table
- ????? key ??? value ??????????? Map ??
- ?????? key ?????? hash function ??????????? index
????????? value - ????????????????????????
- ?????????
- ?????????????????????????????????? index ???????
(????????????????????????????????????????)
4Hash function
- ??????????????????????????? ?????????
- ????????? mod tableSize
- ?????????????? 10, 20, 30, ?????????????????????
? - ??????????????? String ??? ??????????????????
hash function ???????????
5Hash function (1st example)
- ?????? ASCII ????????????????
- public static int hash(String key, int
tableSize) - int hashVal 0
- for(int i 0 iltkey.length() i)
- hashVal key.charAt(i)
- return hashValtableSize
6- ??????????????????????????
- ?????????????????????? ?????????????????
- ASCII ???????????????????? 127
- ????????????????????????????????????? 1278
- ???????????? ???????????
???????????
Index ????????????????????
7Hash function (2nd example)
- ??????????????????????????????????????????????????
????????????? - ???????????????????????????
- public static int hash(String key, int
tableSize) - return (key.charAt(0) 27key.charAt(1) 729
key.charAT(2))tableSize
2727
????????????? ??????????
???????????????????????????????? (10007
????????????????????????????????? prime
?????????????????????????)
8- ??????????????????????????????
- ???????????????????????
- ???????????????????????????
9Hash function (3rd example)
- ????? polynomial function ??? 37 ?????? Horners
Rule - ?????????????? k0 37k1 3737k2 ?????????
- (k237)k137 k0
- Horner rule ???????????????? n ???
????????????????????????
10- public static int hash(String key, int
tableSize) - int hashVal 0
- for(int i 0 iltkey.length() i)
- hashVal 37hashValkey.charAt(i)
- hashVal tableSize
- if(hashVallt0)
- hashVal tableSize
- return hashVal
????? overflow
11- ??????????????????? ????????????????????
- ???????????????? ????????????
- ?????????????????????????? ???????????????????????
???????????????????????????????? - ???????? hash function ?????????????????????100
- ?????????????????????????????????? ???????????
collision - ????????????????????????????????? collision
12??????? separate chaining
- ???????????????? linked list
- ?????????? ????? hash function ???????????????????
???? - ??????????????? ?? ??? hash function
??????????????????? ??????????????????????????????
????????????????? ?????????????????????????
(?????????????????????????????????????????????????
?)
13??????????????????????
- public interface Hashable
-
- /
- Compute a hash function for this
object. - _at_param tableSize the hash table size.
- _at_return (deterministically) a number
between - 0 and tableSize-1, distributed
equitably. - /
- int hash( int tableSize )
-
14?????????????????
- Public class Student implements Hashable
- private String name
- private double number
- private int year
- public int hash(int tableSize)
- return SeparateChainingHashTable.hash(name,
tableSize) -
- public boolean equals(Object rhs)
- return name.equals(((Student)rhs).name)
-
???????????? static method ???????
15- public class SeparateChainingHashTable
-
- /
- Construct the hash table.
- /
- public SeparateChainingHashTable( )
-
- this( DEFAULT_TABLE_SIZE )
-
- /
- Construct the hash table.
- _at_param size approximate table size.
- /
- public SeparateChainingHashTable( int
size ) -
- theLists new LinkedList nextPrime(
size ) - for( int i 0 i lt theLists.length
i ) - theLists i new LinkedList(
)
16- /
- Insert into the hash table. If the
item is - already present, then do nothing.
- _at_param x the item to insert.
- /
- public void insert( Hashable x )
-
- LinkedList whichList theLists
x.hash( theLists.length ) - LinkedListItr itr whichList.find( x
) - if( itr.isPastEnd( ) )
- whichList.insert( x,
whichList.zeroth( ) ) -
- /
- Remove from the hash table.
- _at_param x the item to remove.
- /
- public void remove( Hashable x )
??? Student ????????
17- /
- Find an item in the hash table.
- _at_param x the item to search for.
- _at_return the matching item, or null if
not found. - /
- public Hashable find( Hashable x )
-
- return (Hashable)theLists x.hash(
theLists.length ) .find( x ).retrieve( ) -
- /
- Make the hash table logically empty.
- /
- public void makeEmpty( )
-
- for( int i 0 i lt theLists.length
i ) - theLists i .makeEmpty( )
-
18- /
- A hash routine for String objects.
- _at_param key the String to hash.
- _at_param tableSize the size of the hash
table. - _at_return the hash value.
- /
- public static int hash( String key, int
tableSize ) -
- int hashVal 0
- for( int i 0 i lt key.length( )
i ) - hashVal 37 hashVal
key.charAt( i ) - hashVal tableSize
- if( hashVal lt 0 )
- hashVal tableSize
- return hashVal
-
19- private static final int
DEFAULT_TABLE_SIZE 101 - / The array of Lists. /
- private LinkedList theLists
- /
- Internal method to find a prime number
at least as large as n. - _at_param n the starting number (must be
positive). - _at_return a prime number larger than or
equal to n. - /
- private static int nextPrime( int n )
-
- if( n 2 0 )
- n
- for( !isPrime( n ) n 2 )
-
- return n
20- /
- Internal method to test if a number is
prime. - Not an efficient algorithm.
- _at_param n the number to test.
- _at_return the result of the test.
- /
- private static boolean isPrime( int n )
-
- if( n 2 n 3 )
- return true
- if( n 1 n 2 0 )
- return false
- for( int i 3 i i lt n i 2 )
- if( n i 0 )
- return false
- return true
21- // Simple main
- public static void main( String args
) -
- SeparateChainingHashTable H new
SeparateChainingHashTable( ) - final int NUMS 4000
- final int GAP 37
- System.out.println( "Checking... (no
more output means success)" ) - for( int i GAP i ! 0 i ( i
GAP ) NUMS ) - H.insert( new MyInteger( i ) )
- for( int i 1 i lt NUMS i 2 )
- H.remove( new MyInteger( i ) )
- for( int i 2 i lt NUMS i2 )
- if( ((MyInteger)(H.find( new
MyInteger( i ) ))).intValue( ) ! i ) - System.out.println( "Find
fails " i )
22?????
- Load factor
- ???????????????????????????????????
- Search time time to do hashing time to search
list - constant time to search list
- Unsuccessful search
- Search time ???????????? ??? load factor
23- Successful search
- ???????????????? ??????????????????
?????????????????????????? 0 ????????? - ????????????????????????????????? N
?????????????????????? M ????? - ???????? ???????????????????????????????????? N-1
??? - ????????????????????????????????????????????????
(N-1)/M ??? - lambda- (1/M)
- lambda ??????? ???????? M ????????
- ????????????? ??????????????????????????????
??????? lambda/2 - ?????? ??????????????????????????????????? 1
(lambda/2) - ?????????????? ????????????????? ??????????? load
factor
24?????????? Open addressing
- ?????????????????? ???????????????
- ????? collision ??????????????????????????????????
???? - ????????????? h0(x), h1(x),
- hi(x)hash(x)f(i)tableSize, f(0)0
- ??????????????????????? ????????????????????
???????????????????? - Load factor lt0.5
25Open addressing ??? linear probing
- F ?????????????????????????? i
- ????????????? f(i)i
- ??????????????????????????????????????????????????
???? - ?????????? ???????????????????????????
?????????????????????? (primary clustering)
????????????? collide ?????????
??????????????????????????????????
26Open addressing ??? quadratic probing
- ????????? primary clustering
- ????????????????????????? f(i)i2
- ?????????? hi(x)hash(x)f(i)tableSize
a
??? b ???????????????? a ????????? 12
??????????????
??? c ???????????????? a ????????? 12 ??????? b
????????????? 22 ??????????????
27- ????????????????????????????????
?????????????????????????????????
????????????????????????????????? - ??????????????????????????????????????????????????
????????????????? ????????????????????????????????
???????
28???????
- ????????????????????????????????????????
- ??? (h(x)i2) mod tableSize
- (h(x)j2) mod tableSize
- Prove by contradiction
- ???????????????????????????????????????????? i
?????????? j
???????????????????????????
29???????(???)
- i-j 0 ???????????????? ??????????????????????????
????????????? - ij0 ?????????????? ?????????????
- ?????????????????????? ???????????????
??????????????????? ?????? - ??????? ??????????????? ???????????????????
- ????????????????????????????????????????????????
?????????????????????
30???????????????????????????
- ?????????? ????????????????????????????????????
- ???????? ????????? 16 ????????????????????????????
???? - ??? quadratic probing
32
12
22
42
52
62
72
??????????????????????
31??????????????????????
- ????????????????????? ????????????????(??????
insert ?????????????????????????????)
?????????????????????????????? ???????????????????
??????????????
32
12
22
42
52
62
72
?????????????? lazy deletion ?????????????????????
???????????
32Open addressing implementation
- class HashEntry
- Hashable element // the element
- boolean isActive // false is deleted
- public HashEntry( Hashable e )
- this( e, true )
-
- public HashEntry( Hashable e, boolean i
) - element e
- isActive i
-
-
33- public class QuadraticProbingHashTable
- private static final int DEFAULT_TABLE_SIZE
11 - / The array of elements. /
- private HashEntry array // The
array of elements - private int currentSize // The
number of occupied cells -
- public QuadraticProbingHashTable( )
- this( DEFAULT_TABLE_SIZE )
-
- /
- Construct the hash table.
- _at_param size the approximate initial
size. - /
- public QuadraticProbingHashTable( int
size ) - allocateArray( size )
- makeEmpty( )
-
nonactive
null
active
34- /
- Internal method to allocate array.
- _at_param arraySize the size of the
array. - /
- private void allocateArray( int arraySize
) - array new HashEntry arraySize
-
- /
- Make the hash table logically empty.
- /
- public void makeEmpty( )
- currentSize 0
- for( int i 0 i lt array.length i
) - array i null
-
35- /
- Return true if currentPos exists and
is active. - _at_param currentPos the result of a call
to findPos. - _at_return true if currentPos is active.
- /
- private boolean isActive( int currentPos
) - return array currentPos ! null
array currentPos .isActive -
36- /
- Method that performs quadratic probing
resolution. - _at_param x the item to search for.
- _at_return the position where the search
terminates. - /
- private int findPos( Hashable x )
- / 1/ int collisionNum 0
- / 2/ int currentPos x.hash( array.length
) - / 3/ while( array currentPos ! null
- !array currentPos
.element.equals( x ) ) - / 4/ currentPos 2 collisionNum
- 1 // Compute ith probe - / 5/ if( currentPos gt array.length )
// Implement the mod - / 6/ currentPos - array.length
-
- / 7/ return currentPos
-
f(i)i2f(i-1)2i-1
37- /
- Find an item in the hash table.
- _at_param x the item to search for.
- _at_return the matching item.
- /
- public Hashable find( Hashable x )
- int currentPos findPos( x )
- return isActive( currentPos ) ? array
currentPos .element null -
38- /
- Insert into the hash table. If the
item is - already present, do nothing.
- _at_param x the item to insert.
- /
- public void insert( Hashable x )
-
- // Insert x as active
- int currentPos findPos( x )
- if( isActive( currentPos ) )
- return //x is already
inside, so do nothing - array currentPos new HashEntry(
x, true ) - // Rehash see Section 5.5
- if( currentSize gt array.length / 2
) - rehash( )
-
39???? O(N) ??????? N ????????? rehash
???????????????????? 2N ???????????? ???????
rehash ?????????????????????????????
- /
- Expand the hash table.
- /
- private void rehash( )
-
- HashEntry oldArray array
- // Create a new double-sized,
empty table - allocateArray( nextPrime( 2
oldArray.length ) ) - currentSize 0
- // Copy table over
- for( int i 0 i lt oldArray.length
i ) - if( oldArray i ! null
oldArray i .isActive ) - insert( oldArray i .element
) - return
-
40????????? rehash
- Rehash ?????????????????
- ?????????????????????????????
- ??????? insert ?????????????????? ????
- ??????????????????????????????
- ????????????????????????????? ???? insert
???????? - ??????????????????? ???????????????????????
41hash, nextPrime, isPrime ??????????
- /
- Remove from the hash table.
- _at_param x the item to remove.
- /
- public void remove( Hashable x )
-
- int currentPos findPos( x )
- if( isActive( currentPos ) )
- array currentPos .isActive
false -
42?????????? quadratic probing
- Secondary clustering
- ?????? double hashing ???????
- f(i) ihash2(x) ?????????????????????????
hash2(x), 2 hash2(x), ????????? - ????????????????????????????????????????
- ??????????????????? hash2(x) ???? x9
???????????????????????? 99 ????????? 0 ???? - ????????????????????????? 0
- ???????????????????????????????????????
43???????? hash2
- hash2(x)R-(xR) ??? R ?????????????????????????
tableSize - ??????????????? 16 ?????? 9, 25, 26, 41, 42, 58
???????? hash ?????????? xtableSize
26 9 25
25 ???????????????? 13-(2513)1
26 ???????????????? 13-(2613)13
4441 26 9 25 42
41 ???????????????? 13-(4113)11
42 ???????????????? 13-(4213)10
??? 42 ???????????? ???????????????????? 210
4558 41 26 9 25 42
58 ????? ?????????????? 13-(5813)7