1. 程式人生 > >STL原始碼分析之hash表(gnu-c++ 2.9)

STL原始碼分析之hash表(gnu-c++ 2.9)

1、基本概念
關於hash表的概念這裡就不再多說,hash表的變化一般都在雜湊函式和退避方法上。STL採用的是開鏈法,即每個hash桶裡面維持一個連結串列,hash函式計算出位置後,就將節點插入該位置的連結串列上,因此,底層實現為hash表的容器,迭代器的實現都類似於deque,不僅要維持在連結串列上移動還得實現在到達一條連結串列的末端時候,下次移動可以進入到下一個hash桶裡面的連結串列。
2、 STL雜湊表結構
開鏈法:

template<class _Val>
struct _Hashtable_node
{
  _Hashtable_node* _M_next;
  _Val _M_val;
};

雜湊表定義時要指定陣列大小n,不過實際分配的陣列長度是一個根據n計算而來的質數(下段)。

void _M_initialize_buckets(size_type __n)
  {
    const size_type __n_buckets = _M_next_size(__n);
    _M_buckets.reserve(__n_buckets);
    _M_buckets.insert(_M_buckets.end(), __n_buckets, (_Node*) 0);
    _M_num_elements = 0;
  }
 inline unsigned long
  __stl_next_prime(unsigned long __n)
  {
    const unsigned long* __first = _Hashtable_prime_list<unsigned long>::_S_get_prime_list();
    const unsigned long* __last = __first + (int)_S_num_primes;
    const unsigned long* pos = std::lower_bound(__first, __last, __n);
    return pos == __last ? *(__last - 1) : *pos;
  }

從 prime_list中找到第一個大於n的數,list是已經計算好的靜態陣列,包含了29個質數(之後版本有所改變).

template<typename _PrimeType> const _PrimeType
  _Hashtable_prime_list<_PrimeType>::__stl_prime_list[_S_num_primes] =
{
  5ul,          53ul,         97ul,         193ul,       389ul,
  769ul,        1543ul,       3079ul,       6151ul,      12289ul,
  24593ul,      49157ul,      98317ul,      196613ul,    393241ul,
  786433ul,     1572869ul,    3145739ul,    6291469ul,   12582917ul,
  25165843ul,   50331653ul,   100663319ul,  201326611ul, 402653189ul,
  805306457ul,  1610612741ul, 3221225473ul, 4294967291ul
};

__stl_prime_list陣列中,後一個數總是大約等於前一個數的兩倍,當插入資料時,如果所有元素個數大於雜湊表陣列長度,為了使雜湊表的負載因子(元素個數和hash桶之比)永遠小於1,就必須呼叫resize重新分配,增長速度跟vector差不多,每次分配陣列長度差不多翻倍

template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
void
hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
resize(size_type __num_elements_hint)
{
  const size_type __old_n = _M_buckets.size();
  if (__num_elements_hint > __old_n)
    {
      const size_type __n = _M_next_size(__num_elements_hint);
      if (__n > __old_n)
        {
          _Vector_type __tmp(__n, (_Node*)(0), _M_buckets.get_allocator());
          __try
            {
              for (size_type __bucket = 0; __bucket < __old_n; ++__bucket)
                {
                  _Node* __first = _M_buckets[__bucket];
                  while (__first)
                    {
                      size_type __new_bucket = _M_bkt_num(__first->_M_val,
                                                          __n);
                      _M_buckets[__bucket] = __first->_M_next;
                      __first->_M_next = __tmp[__new_bucket];
                      __tmp[__new_bucket] = __first;
                      __first = _M_buckets[__bucket];
                    }
                }
              _M_buckets.swap(__tmp);
            }
          __catch(...)
            {
              for (size_type __bucket = 0; __bucket < __tmp.size();
                ++__bucket)
                {
                  while (__tmp[__bucket])
                    {
                      _Node* __next = __tmp[__bucket]->_M_next;
                      _M_delete_node(__tmp[__bucket]);
                      __tmp[__bucket] = __next;
                    }
                }
              __throw_exception_again;
            }
        }
    }
}

每次新插入的元素都放在連結串列的第一個節點前面。

template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
pair<typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::iterator, bool>
hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
insert_unique_noresize(const value_type& __obj)
{
  const size_type __n = _M_bkt_num(__obj);
  _Node* __first = _M_buckets[__n];

  for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
    if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
      return pair<iterator, bool>(iterator(__cur, this), false);

  _Node* __tmp = _M_new_node(__obj);
  __tmp->_M_next = __first;
  _M_buckets[__n] = __tmp;
  ++_M_num_elements;
  return pair<iterator, bool>(iterator(__tmp, this), true);
}

3、 雜湊函式

size_type
  _M_bkt_num_key(const key_type& __key, size_t __n) const
  { return _M_hash(__key) % __n; }

(偏特化)

inline size_t
  __stl_hash_string(const char* __s)
  {
    unsigned long __h = 0;
    for ( ; *__s; ++__s)
      __h = 5 * __h + *__s;
    return size_t(__h);
  }



template<>
struct hash<char*>
{
  size_t
  operator()(const char* __s) const
  { return __stl_hash_string(__s); }
};



template<>
struct hash<const char*>
{
  size_t
  operator()(const char* __s) const
  { return __stl_hash_string(__s); }
};



template<>
struct hash<char>
{ 
  size_t
  operator()(char __x) const
  { return __x; }
};


template<>
struct hash<int>
{ 
  size_t 
  operator()(int __x) const 
  { return __x; }
};


template<>
struct hash<unsigned int>
{ 
  size_t
  operator()(unsigned int __x) const
  { return __x; }
};


template<>
struct hash<long>
{
  size_t
  operator()(long __x) const
  { return __x; }
};