reimplementation of hash in c++, problem in rehash() method
I am asked to reimplement the hash in c++. the hash will be used for sets. I have all the code. i just need the rehash function. the goal of rehash function is to increase size of hash table based on data; so, if data are more than half-size of table, table size should be increased (based on some prime array values) and elements are redistributed in that. I can not change the public interface but i can change the private part.
#ifndef HASHSET_H
#define HASHSET_H
//
// Hash table implementation of set
// (open addressing, quadratic hashing)
#include <algorithm>
#include <functional>
#include <iterator>
#include <vector>
#include <iostream>
#include <cassert>
enum HashStatus { Occupied, Empty, Deleted };
template <class T>
struct HashEntry
{
T data;
unsigned hash;
HashStatus info;
HashEntry(): info(Empty) {}
HashEntry(const T& v, unsigned h, HashStatus status)
: data(v), hash(h), info(status) {}
private:
// Forbid copying of HashEntries
HashEntry(const HashEntry<T>&) {}
void operator=(const HashEntry<T>&) {}
// (If you think you need to copy these, you should consider
// copying pointers to them instead.)
};
template <class T, class HashFun, class CompareEQ=std::equal_to<T> >
class hash_set
{
public:
typedef ptrdiff_t size_type;
typedef T key_type;
typedef T value_type;
hash_set ();
hash_set (const hash_set<T,HashFun,CompareEQ>&);
~hash_set ();
const hash_set<T,HashFun,CompareEQ>&
operator= (const hash_set<T,HashFun,CompareEQ>&);
bool empty() const {return theSize == 0;}
size_type size() const {return theSize;}
void insert (const T& element);
unsigned count (const T& element) const;
void erase (const T& element);
void clear();
class const_iterator
{
public:
typedef std::bidirectional_iterator_tag iterator_category;
typedef T value_type;
typedef ptrdiff_t difference_type;
typedef const T* pointer;
typedef const T& reference;
const_iterator& operator ++()
{++current; advance(); return *this;}
const_iterator operator ++(int)
{
const_iterator clone = this;
++current;
advance();
return clone;
}
const_iterator & operator --()
{--current; regress(); return *this;}
const_iterator operator --(int)
{const_iterator clone = this;
--current;
regress();
return clone;}
reference operator * () { return (*current)->data; }
pointer operator -> () { return &((*current)->data); }
bool operator == (const const_iterator & rhs) const
{ return current == rhs.current; }
bool operator != (const const_iterator & rhs) const
{ return current != rhs.current; }
protected:
typename std::vector<HashEntry<T>*>::const_iterator current;
const std::vector<HashEntry<T>*>* v;
const_iterator (typename std::vector<HashEntry<T>*>::const_iterator p,
const std::vector<HashEntry<T>*>& theVector)
: current(p), v(&theVector) {}
void advance()
{
while (current != v->end()
&& ((*current) == 0
|| (*current)->info == Deleted
|| (*current)->info == Empty))
++current;
}
void regress()
{
while (current != v->begin()
&& ((*current) == 0
|| (*current)->info == Deleted
|| (*current)->info == Empty))
--current;
}
friend class hash_set<T,HashFun,CompareEQ>;
};
typedef const_iterator iterator;
const_iterator begin() const
{
const_iterator i = const_iterator(table.begin(), table);
i.advance();
return i;
}
const_iterator end() const {return const_iterator(table.end(), table);}
iterator begin()
{iterator i = iterator(table.begin(), table);
i.advance();
return i;
}
iterator end() {return iterator(table.end(), table);}
const_iterator find (const T& element) const;
iterator find (const T& element);
void printStats(std::ostream& out) const
{
using namespace std;
out << "size: " << theSize << " hSize: " << hSize << endl;
}
private:
static unsigned primes[13];
unsigned find (const T& element, unsigned h0) const;
void rehash();
std::vector<HashEntry<T>*> table;
int theSize;
unsigned hSize;
unsigned hSize_index;
HashFun hash;
CompareEQ compare;
};
template <class T, class Hash1, class Hash2, class Equals1, class Equals2>
bool operator== (const hash_set<T,Hash1,Equals1>& left,
const hash_set<T,Hash2,Equals2>& right)
{
if (left.size() == right.size())
{
for (typename hash_set<T,Hash1,Equals1>::const_iterator p = left.begin();
p != left.end(); ++p)
{
if (right.count(*p) == 0)
return false;
}
return true;
}
else
return false;
}
// Table of prime numbers: Use these for table sizes so that
// quadratic probing will succeed.
template <class T, class HashFun, class CompareEQ>
unsigned hash_set<T,HashFun,CompareEQ>::primes[13] =
{5, 13, 31, 61, 127, 521, 1279, 2281, 3217,
9689, 19937, 44497, 86243};
template <class T, class HashFun, class CompareEQ>
hash_set<T,HashFun,CompareEQ>::
hash_set (): theSize(0), hSize_index(0)
{
// hash table size is the hsize_Index_th prime number in the primes table
hSize = primes[hSize_index];
table.resize(hSize);
// Fill table with null pointers to strt with
fill (table.begin(), table.end(), (HashEntry<T>*)0);
}
template <class T, class HashFun, class CompareEQ>
hash_set<T,HashFun,CompareEQ>::
hash_set (const hash_set<T,HashFun,CompareEQ>& hs)
: hSize(hs.hSize), hSize_index(hs.hSize_index), theSize(0)
{
table.resize(hSize);
for (unsigned i = 0; i < hSize; ++i)
if ((hs.table[i] != 0) && (hs.table[i]->info == Occupied))
insert (hs.table[i]->data);
}
template <class T, class HashFun, class CompareEQ>
hash_set<T,HashFun,CompareEQ>::
~hash_set ()
{
clear();
}
template <class T, class HashFun, class CompareEQ>
const hash_set<T,HashFun,CompareEQ>&
hash_set<T,HashFun,CompareEQ>::
operator= (const hash_set<T,HashFun,CompareEQ>& hs)
{
if (this != &hs)
{
clear();
for (unsigned i = 0; i < hs.hSize; ++i)
if ((hs.table[i] != 0) && (hs.table[i]->info == Occupied))
insert (hs.table[i]->data);
}
return *this;
}
template <class T, class HashFun, class CompareEQ>
void hash_set<T,HashFun,CompareEQ>::
insert (const T& element)
{
unsigned h0 = hash(element);
// Is this item already in the table?
unsigned h = find(element, h0);
if (h == hSize)
{
// No. Find an empty or deleted slot to put it into.
unsigned count = 0;
h = h0 % hSize;
while (table[h] != 0
&& table[h]->info == Occupied
&& count < hSize)
{
++count;
h = (h0 + count*count) % hSize;
}
assert (count < hSize); // could not insert (table filled?)
delete table[h];
table[h] = new HashEntry<T>(element, h0, Occupied);
++theSize;
}
else
{ // Yes. Replace the data field.
table[h]->data = element;
}
// Is the table half full or more? If so, increase the table size
// and redistribute the data.
if (2*theSize >= hSize)
rehash();
}
template <class T, class HashFun, class C开发者_StackOverflowompareEQ>
void hash_set<T,HashFun,CompareEQ>::
rehash ()
{
//*** It should increase the size of
//*** the hash table to primes[hSize_index+1], redistributing the
//*** entries already in the table to their new positions.
}
template <class T, class HashFun, class CompareEQ>
unsigned hash_set<T,HashFun,CompareEQ>::
count (const T& element) const
{
unsigned h0 = hash(element);
unsigned h = find(element, h0);
return (h != hSize) ? 1 : 0;
}
template <class T, class HashFun, class CompareEQ>
void hash_set<T,HashFun,CompareEQ>::
erase (const T& element)
{
unsigned h0 = hash(element);
unsigned h = find(element, h0);
if (h != hSize)
{
table[h]->info = Deleted;
--theSize;
}
}
template <class T, class HashFun, class CompareEQ>
void hash_set<T,HashFun,CompareEQ>::
clear()
{
for (unsigned i = 0; i < hSize; ++i)
{
delete table[i];
table[i] = 0;
}
theSize = 0;
}
template <class T, class HashFun, class CompareEQ>
unsigned hash_set<T,HashFun,CompareEQ>::
find (const T& element, unsigned h0) const
{
unsigned h = h0 % hSize;
unsigned count = 0;
// Search for element, stopping at any empty slot
// or when we've searched the whole table
while (table[h] != 0 &&
(table[h]->info == Deleted ||
(table[h]->info == Occupied
&& (!compare(table[h]->data, element))))
&& count < hSize)
{
++count;
h = (h0 + count*count) % hSize;
}
if (count >= hSize
|| table[h] == 0
|| table[h]->info == Empty)
return hSize; // didn't find element
else
return h; // found it
}
template <class T, class HashFun, class CompareEQ>
typename hash_set<T,HashFun,CompareEQ>::const_iterator
hash_set<T,HashFun,CompareEQ>::
find (const T& element) const
{
unsigned h0 = hash(element);
unsigned h = find(element, h0);
if (h == hSize)
return end();
else
return const_iterator(table.begin()+h, table);
}
template <class T, class HashFun, class CompareEQ>
typename hash_set<T,HashFun,CompareEQ>::iterator
hash_set<T,HashFun,CompareEQ>::
find (const T& element)
{
unsigned h0 = hash(element);
unsigned h = find(element, h0);
if (h == hSize)
return end();
else
return iterator(table.begin()+h, table);
}
#endif
You create a new vector of the required size and then iterator over current table
inserting the HashEntry
s from the later into their appropriate index in former.
I notice the class retains the full hash-code in the member HashEntry::hash
, so there'll be no need to re-calculate the object hashes.
Then you need to replace table
with the new vector.
Remember to use std::vector::swap
to avoid unnecessary behind the scenes allocation and copying inside the std::vector
s.
Apart from a bit of housekeeping on hSize
and hSize_index
, I think that's about it.
There is a way of redistributing the entries within the existing table
and I think you may be wondering about that. I'm not sure there's any benefit in doing that.
精彩评论