Hash table. Dynamic resizing
With the growth of hash table's load factor, number of collisions increases, which leads to the decrease of overall table's performance. It is bearable for hash tables with chaining, but unacceptable for hash tables based on open addressing due to essential performance drop. The solution is to resize table, when its load factor exceeds given threshold.
As well, when table becomes too rare, it's reasonable to pack the array in order to save space.
Resizing algorithm
Remember, that hash values depend on table's size. Hence, hashes of entries are changed when resizing and algorithm can't just copy data from old storage to new one. For general hash function the only thing to do is to iterate over old hash table and add each entry to a new table.
Complexity analysis
Dynamic resizing doesn't affect amortized complexity of the hash table's operations. But resizing is done at once and operation, which triggers resizing, takes O(n) time to complete, where n is a number of entries in the table. This fact may make dynamic-sized hash tables inappropriate for real-time applications.
Code snippets
As it was mentioned above, table may need resizing in two cases: it is filled too tight (loadFactor > thresholdMax) or it is filled too rare (loadFactor < thresholdMin). We consider only the first case here to maintain simplicity. Java implementation is done for open addressing based hash table and C++ one is done for hash table with chaining. Highlighted code strings refer to functionality responsible for resizing.
Java implementation
public class HashMap {
private final int DEFAULT_TABLE_SIZE = 128;
private float threshold = 0.75f;
private int maxSize = 96;
private int size = 0;
HashEntry[] table;
HashMap() {
table = new HashEntry[DEFAULT_TABLE_SIZE];
for (int i = 0; i < DEFAULT_TABLE_SIZE; i++)
table[i] = null;
}
void setThreshold(float threshold) {
this.threshold = threshold;
maxSize = (int) (table.length * threshold);
}
public int get(int key) {
int hash = (key % table.length);
int initialHash = -1;
while (hash != initialHash
&& (table[hash] == DeletedEntry.getUniqueDeletedEntry()
|| table[hash] != null
&& table[hash].getKey() != key)) {
if (initialHash == -1)
initialHash = hash;
hash = (hash + 1) % table.length;
}
if (table[hash] == null || hash == initialHash)
return -1;
else
return table[hash].getValue();
}
public void put(int key, int value) {
int hash = (key % table.length);
int initialHash = -1;
int indexOfDeletedEntry = -1;
while (hash != initialHash
&& (table[hash] == DeletedEntry.getUniqueDeletedEntry()
|| table[hash] != null
&& table[hash].getKey() != key)) {
if (initialHash == -1)
initialHash = hash;
if (table[hash] == DeletedEntry.getUniqueDeletedEntry())
indexOfDeletedEntry = hash;
hash = (hash + 1) % table.length;
}
if ((table[hash] == null || hash == initialHash)
&& indexOfDeletedEntry != -1) {
table[indexOfDeletedEntry] = new HashEntry(key, value);
size++;
} else if (initialHash != hash)
if (table[hash] != DeletedEntry.getUniqueDeletedEntry()
&& table[hash] != null && table[hash].getKey() == key)
table[hash].setValue(value);
else {
table[hash] = new HashEntry(key, value);
size++;
}
if (size >= maxSize)
resize();
}
private void resize() {
int tableSize = 2 * table.length;
maxSize = (int) (tableSize * threshold);
HashEntry[] oldTable = table;
table = new HashEntry[tableSize];
size = 0;
for (int i = 0; i < oldTable.length; i++)
if (oldTable[i] != null
&& oldTable[i] != DeletedEntry.getUniqueDeletedEntry())
put(oldTable[i].getKey(), oldTable[i].getValue());
}
public void remove(int key) {
int hash = (key % table.length);
int initialHash = -1;
while (hash != initialHash
&& (table[hash] == DeletedEntry.getUniqueDeletedEntry()
|| table[hash] != null
&& table[hash].getKey() != key)) {
if (initialHash == -1)
initialHash = hash;
hash = (hash + 1) % table.length;
}
if (hash != initialHash && table[hash] != null) {
table[hash] = DeletedEntry.getUniqueDeletedEntry();
size--;
}
}
}
C++ implementation
const int DEFAULT_TABLE_SIZE = 128;
class HashMap {
private:
float threshold;
int maxSize;
int tableSize;
int size;
LinkedHashEntry **table;
void resize() {
int oldTableSize = tableSize;
tableSize *= 2;
maxSize = (int) (tableSize * threshold);
LinkedHashEntry **oldTable = table;
table = new LinkedHashEntry*[tableSize];
for (int i = 0; i < tableSize; i++)
table[i] = NULL;
size = 0;
for (int hash = 0; hash < oldTableSize; hash++)
if (oldTable[hash] != NULL) {
LinkedHashEntry *oldEntry;
LinkedHashEntry *entry = oldTable[hash];
while (entry != NULL) {
put(entry->getKey(), entry->getValue());
oldEntry = entry;
entry = entry->getNext();
delete oldEntry;
}
}
delete[] oldTable;
}
public:
HashMap() {
threshold = 0.75f;
maxSize = 96;
tableSize = DEFAULT_TABLE_SIZE;
size = 0;
table = new LinkedHashEntry*[tableSize];
for (int i = 0; i < tableSize; i++)
table[i] = NULL;
}
void setThreshold(float threshold) {
this->threshold = threshold;
maxSize = (int) (tableSize * threshold);
}
int get(int key) {
int hash = (key % tableSize);
if (table[hash] == NULL)
return -1;
else {
LinkedHashEntry *entry = table[hash];
while (entry != NULL && entry->getKey() != key)
entry = entry->getNext();
if (entry == NULL)
return -1;
else
return entry->getValue();
}
}
void put(int key, int value) {
int hash = (key % tableSize);
if (table[hash] == NULL) {
table[hash] = new LinkedHashEntry(key, value);
size++;
} else {
LinkedHashEntry *entry = table[hash];
while (entry->getNext() != NULL)
entry = entry->getNext();
if (entry->getKey() == key)
entry->setValue(value);
else {
entry->setNext(new LinkedHashEntry(key, value));
size++;
}
}
if (size >= maxSize)
resize();
}
void remove(int key) {
int hash = (key % tableSize);
if (table[hash] != NULL) {
LinkedHashEntry *prevEntry = NULL;
LinkedHashEntry *entry = table[hash];
while (entry->getNext() != NULL && entry->getKey() != key) {
prevEntry = entry;
entry = entry->getNext();
}
if (entry->getKey() == key) {
if (prevEntry == NULL) {
LinkedHashEntry *nextEntry = entry->getNext();
delete entry;
table[hash] = nextEntry;
} else {
LinkedHashEntry *next = entry->getNext();
delete entry;
prevEntry->setNext(next);
}
size--;
}
}
}
~HashMap() {
for (int hash = 0; hash < tableSize; hash++)
if (table[hash] != NULL) {
LinkedHashEntry *prevEntry = NULL;
LinkedHashEntry *entry = table[hash];
while (entry != NULL) {
prevEntry = entry;
entry = entry->getNext();
delete prevEntry;
}
}
delete[] table;
}
};
Previous: Open addressing | Next: More about hash functions |
Contribute to AlgoList
Liked this tutorial? Please, consider making a donation. Contribute to help us keep sharing free knowledge and write new tutorials.
Every dollar helps!