std::unordered_map
版本XcodeDefault.xctoolchain/usr/include/c++/v1
1:unorderd_map typedef
例子:typedef std::unordered_map<std::string, int>
模板參數:
1 template <class _Key, class _Tp, class _Hash = hash<_Key>, class _Pred = equal_to<_Key>, 2 class _Alloc = allocator<pair<const _Key, _Tp> > > 3 class _LIBCPP_TEMPLATE_VIS unordered_map 4 { 5 public: 6 // types 7 typedef _Key key_type; 8 typedef _Tp mapped_type; 9 typedef _Hash hasher; 10 typedef _Pred key_equal; 11 typedef _Alloc allocator_type; 12 typedef pair<const key_type, mapped_type> value_type; 13 typedef value_type& reference; 14 typedef const value_type& const_reference; 15 static_assert((is_same<value_type, typename allocator_type::value_type>::value), 16 "Invalid allocator::value_type"); 17 18 private: 19 typedef __hash_value_type<key_type, mapped_type> __value_type; 20 typedef __unordered_map_hasher<key_type, __value_type, hasher> __hasher; 21 typedef __unordered_map_equal<key_type, __value_type, key_equal> __key_equal; 22 typedef typename __rebind_alloc_helper<allocator_traits<allocator_type>, 23 __value_type>::type __allocator_type; 24 25 typedef __hash_table<__value_type, __hasher, 26 __key_equal, __allocator_type> __table; 27 28 __table __table_; 29 30 ...... 31 32 }
- key_type -> _Key -> std::string
- mapped_type -> _Tp -> int
- hasher - > _Hash = hash<_Key> -> hash<std::string>
- key_equal -> _Pred = equal_to<_Key> -> equal_to<std::string>
- _Alloc = allocator<pair<const _Key, _Tp> > > -> allocator<pair<const std::string, int> >
unorderd_map內部持有__hash_table對象,std::unordered_map<std::string, int>特化模板的_hash_table類型應該是
__hash_table<
pair<const std::string, int>,
hash<std::string>,
equal_to<std::string>,
allocator<pair<const std::string, int> >
>
1 template <class _Tp, class _Hash, class _Equal, class _Alloc> 2 class __hash_table 3 { 4 public: 5 typedef _Tp value_type; 6 typedef _Hash hasher; 7 typedef _Equal key_equal; 8 typedef _Alloc allocator_type; 9 10 private: 11 typedef unique_ptr<__next_pointer[], __bucket_list_deleter> __bucket_list; 12 // --- Member data begin --- 13 __bucket_list __bucket_list_; 14 __compressed_pair<__first_node, __node_allocator> __p1_; 15 __compressed_pair<size_type, hasher> __p2_; 16 __compressed_pair<float, key_equal> __p3_; 17 // --- Member data end --- 18 19 ...... 20 21 }
__hash_table內部持有4個局部變量,
- __bucket_list_,__next_pointer數組,儲存插入節點node,內部含有多個bucket(node節點的集合),以node節點的形式鏈式組織
- __p1_,head node -- node分配器;
- __p2_,node總數量 -- hash key size_t計算器;每成功插入一個node,node總數量+1
- __p3_,負載因子 -- 數據比較器;負載因子調整bucket的數量(rehash方法),數據比較器用於比較參數和bucket node中_Key是否相同(因為是bucket是鏈式儲存,在hash key sizt_t到bucket index之后,會從bucket的頭node開始,逐一比較node是否和參數相同)
模板推導出類型后,就可以得知unorder map的幾個關鍵要點
- __p2_->second, hash<std::string>,提供string到hash key sizt_t的計算
- __bucket_list_,unorderd_map的存儲區
- __p3_->first, 負載因子, rebase,決定bucket數量
- hash key sizt_t -> bucket index, __constrain_hash
- __p3_ -> second, equal_to<std::string>,數據的比較器
2: 散列計算器,string -> hash
hash<std::string>, 在std::string實現。提供operator()操作符,作為計算hash數值的入口方法
1 template <class _CharT, class _Allocator> 2 struct _LIBCPP_TEMPLATE_VIS 3 hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> > 4 : public unary_function< 5 basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t> 6 { 7 size_t 8 operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT 9 { return __do_string_hash(__val.data(), __val.data() + __val.size()); } 10 };
hash<std::string>::operator() 調用 __do_string_hash
__do_string_hash 調用 __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len)
__murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len) 按照字符串長度__len,分成若干計算
1 template <class _Size> 2 _Size 3 __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len) 4 { 5 const char* __s = static_cast<const char*>(__key); 6 if (__len <= 32) { 7 if (__len <= 16) { 8 return __hash_len_0_to_16(__s, __len); 9 } else { 10 return __hash_len_17_to_32(__s, __len); 11 } 12 } else if (__len <= 64) { 13 return __hash_len_33_to_64(__s, __len); 14 } 15 16 // For strings over 64 bytes we hash the end first, and then as we 17 // loop we keep 56 bytes of state: v, w, x, y, and z. 18 _Size __x = __loadword<_Size>(__s + __len - 40); 19 _Size __y = __loadword<_Size>(__s + __len - 16) + 20 __loadword<_Size>(__s + __len - 56); 21 _Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len, 22 __loadword<_Size>(__s + __len - 24)); 23 pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z); 24 pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x); 25 __x = __x * __k1 + __loadword<_Size>(__s); 26 27 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. 28 __len = (__len - 1) & ~static_cast<_Size>(63); 29 do { 30 __x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1; 31 __y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1; 32 __x ^= __w.second; 33 __y += __v.first + __loadword<_Size>(__s + 40); 34 __z = __rotate(__z + __w.first, 33) * __k1; 35 __v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first); 36 __w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second, 37 __y + __loadword<_Size>(__s + 16)); 38 std::swap(__z, __x); 39 __s += 64; 40 __len -= 64; 41 } while (__len != 0); 42 return __hash_len_16( 43 __hash_len_16(__v.first, __w.first) + __shift_mix(__y) * __k1 + __z, 44 __hash_len_16(__v.second, __w.second) + __x); 45 }
舉例,__hash_len_0_to_16
1 static _Size __hash_len_0_to_16(const char* __s, _Size __len) 2 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK 3 { 4 if (__len > 8) { 5 const _Size __a = __loadword<_Size>(__s); 6 const _Size __b = __loadword<_Size>(__s + __len - 8); 7 return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b; 8 } 9 if (__len >= 4) { 10 const uint32_t __a = __loadword<uint32_t>(__s); 11 const uint32_t __b = __loadword<uint32_t>(__s + __len - 4); 12 return __hash_len_16(__len + (__a << 3), __b); 13 } 14 if (__len > 0) { 15 const unsigned char __a = __s[0]; 16 const unsigned char __b = __s[__len >> 1]; 17 const unsigned char __c = __s[__len - 1]; 18 const uint32_t __y = static_cast<uint32_t>(__a) + 19 (static_cast<uint32_t>(__b) << 8); 20 const uint32_t __z = __len + (static_cast<uint32_t>(__c) << 2); 21 return __shift_mix(__y * __k2 ^ __z * __k3) * __k2; 22 } 23 return __k2; 24 }
同理,其余類型type均實現hash<type>::operator()方法
3:構造bucket
__p3_->first, 負載因子, rehash,決定bucket數量
1 template <class _Tp, class _Hash, class _Equal, class _Alloc> 2 _LIBCPP_INLINE_VISIBILITY 3 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer 4 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare( 5 size_t __hash, value_type& __value) 6 { 7 size_type __bc = bucket_count(); 8 9 if (__bc != 0) 10 { 11 size_t __chash = __constrain_hash(__hash, __bc); 12 __next_pointer __ndptr = __bucket_list_[__chash]; 13 if (__ndptr != nullptr) 14 { 15 for (__ndptr = __ndptr->__next_; __ndptr != nullptr && 16 __constrain_hash(__ndptr->__hash(), __bc) == __chash; 17 __ndptr = __ndptr->__next_) 18 { 19 if (key_eq()(__ndptr->__upcast()->__value_, __value)) 20 return __ndptr; 21 } 22 } 23 } 24 if (size()+1 > __bc * max_load_factor() || __bc == 0) 25 { 26 rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), 27 size_type(ceil(float(size() + 1) / max_load_factor())))); 28 } 29 return nullptr; 30 }
插入node時,如果滿足公式
size()+1 > __bc * max_load_factor() || __bc == 0,則調用rehash方法,構造
_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))
hash_table默認構造函數,提供的負載因子是1,rehash傳入的參數為1
1 template <class _Tp, class _Hash, class _Equal, class _Alloc> 2 void 3 __hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n) 4 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK 5 { 6 if (__n == 1) 7 __n = 2; 8 else if (__n & (__n - 1)) 9 __n = __next_prime(__n); 10 size_type __bc = bucket_count(); 11 if (__n > __bc) 12 __rehash(__n); 13 else if (__n < __bc) 14 { 15 __n = _VSTD::max<size_type> 16 ( 17 __n, 18 __is_hash_power2(__bc) ? __next_hash_pow2(size_t(ceil(float(size()) / max_load_factor()))) : 19 __next_prime(size_t(ceil(float(size()) / max_load_factor()))) 20 ); 21 if (__n < __bc) 22 __rehash(__n); 23 } 24 }
rehash內部接收到__n == 1,調整__n = 2。然后調用__rehash方法創建2個bucket
1 template <class _Tp, class _Hash, class _Equal, class _Alloc> 2 void 3 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc) 4 { 5 #if _LIBCPP_DEBUG_LEVEL >= 2 6 __get_db()->__invalidate_all(this); 7 #endif // _LIBCPP_DEBUG_LEVEL >= 2 8 __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc(); 9 __bucket_list_.reset(__nbc > 0 ? 10 __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr); 11 __bucket_list_.get_deleter().size() = __nbc; 12 if (__nbc > 0) 13 { 14 for (size_type __i = 0; __i < __nbc; ++__i) 15 __bucket_list_[__i] = nullptr; 16 __next_pointer __pp = __p1_.first().__ptr(); 17 __next_pointer __cp = __pp->__next_; 18 if (__cp != nullptr) 19 { 20 size_type __chash = __constrain_hash(__cp->__hash(), __nbc); 21 __bucket_list_[__chash] = __pp; 22 size_type __phash = __chash; 23 for (__pp = __cp, __cp = __cp->__next_; __cp != nullptr; 24 __cp = __pp->__next_) 25 { 26 __chash = __constrain_hash(__cp->__hash(), __nbc); 27 if (__chash == __phash) 28 __pp = __cp; 29 else 30 { 31 if (__bucket_list_[__chash] == nullptr) 32 { 33 __bucket_list_[__chash] = __pp; 34 __pp = __cp; 35 __phash = __chash; 36 } 37 else 38 { 39 __next_pointer __np = __cp; 40 for (; __np->__next_ != nullptr && 41 key_eq()(__cp->__upcast()->__value_, 42 __np->__next_->__upcast()->__value_); 43 __np = __np->__next_) 44 ; 45 __pp->__next_ = __np->__next_; 46 __np->__next_ = __bucket_list_[__chash]->__next_; 47 __bucket_list_[__chash]->__next_ = __cp; 48 49 } 50 } 51 } 52 } 53 } 54 }
4:插入操作(碰撞沖突,二次探測)
hash key size_t定位到bucket index的計算方法
1 inline _LIBCPP_INLINE_VISIBILITY
2 size_t 3 __constrain_hash(size_t __h, size_t __bc) 4 { 5 return !(__bc & (__bc - 1)) ? __h & (__bc - 1) : 6 (__h < __bc ? __h : __h % __bc); 7 }
第一個參數為hash值,第二個參數為bucket數量
!(__bc & (__bc - 1)) -> 滿足表達式為true,則__bc為2的N次方
__h & (__bc - 1) -> __bc - 1,形如0x111,直接取__h作為存儲地址
(__h < __bc ? __h : __h % __bc) -> 如果__h < __bc,則直接獲取__h 作為存儲地址;否則,取模運算為存儲地址,__h % __bc(除留余數法)
插入bucket之前,先探測bucket內是否有相同散列地址。
bucket內部采用鏈表存儲node,從頭節點開始,順序比較hash值,如果hash值相同,再使用key_eq比較具體數值
1 // Prepare the container for an insertion of the value __value with the hash 2 // __hash. This does a lookup into the container to see if __value is already 3 // present, and performs a rehash if necessary. Returns a pointer to the 4 // existing element if it exists, otherwise nullptr. 5 // 6 // Note that this function does forward exceptions if key_eq() throws, and never 7 // mutates __value or actually inserts into the map. 8 template <class _Tp, class _Hash, class _Equal, class _Alloc> 9 _LIBCPP_INLINE_VISIBILITY 10 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer 11 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare( 12 size_t __hash, value_type& __value) 13 { 14 size_type __bc = bucket_count(); 15 16 if (__bc != 0) 17 { 18 size_t __chash = __constrain_hash(__hash, __bc); 19 __next_pointer __ndptr = __bucket_list_[__chash]; 20 if (__ndptr != nullptr) 21 { 22 for (__ndptr = __ndptr->__next_; __ndptr != nullptr && 23 __constrain_hash(__ndptr->__hash(), __bc) == __chash; 24 __ndptr = __ndptr->__next_) 25 { 26 if (key_eq()(__ndptr->__upcast()->__value_, __value)) 27 return __ndptr; 28 } 29 } 30 } 31 if (size()+1 > __bc * max_load_factor() || __bc == 0) 32 { 33 rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), 34 size_type(ceil(float(size() + 1) / max_load_factor())))); 35 } 36 return nullptr; 37 }
如果未發現相同hash值,則插入節點
1 // Insert the node __nd into the container by pushing it into the right bucket, 2 // and updating size(). Assumes that __nd->__hash is up-to-date, and that 3 // rehashing has already occurred and that no element with the same key exists 4 // in the map. 5 template <class _Tp, class _Hash, class _Equal, class _Alloc> 6 _LIBCPP_INLINE_VISIBILITY 7 void 8 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_perform( 9 __node_pointer __nd) _NOEXCEPT 10 { 11 size_type __bc = bucket_count(); 12 size_t __chash = __constrain_hash(__nd->__hash(), __bc); 13 // insert_after __bucket_list_[__chash], or __first_node if bucket is null 14 __next_pointer __pn = __bucket_list_[__chash]; 15 if (__pn == nullptr) 16 { 17 __pn =__p1_.first().__ptr(); 18 __nd->__next_ = __pn->__next_; 19 __pn->__next_ = __nd->__ptr(); 20 // fix up __bucket_list_ 21 __bucket_list_[__chash] = __pn; 22 if (__nd->__next_ != nullptr) 23 __bucket_list_[__constrain_hash(__nd->__next_->__hash(), __bc)] = __nd->__ptr(); 24 } 25 else 26 { 27 __nd->__next_ = __pn->__next_; 28 __pn->__next_ = __nd->__ptr(); 29 } 30 ++size(); 31 }
將新建節點插入bucket頭部
__nd->__next_ = __pn->__next_;
__pn->__next_ = __nd->__ptr();
5:查找操作
__p3_ -> second, equal_to<std::string>,數據的比較器
1 template <class _Tp, class _Hash, class _Equal, class _Alloc> 2 template <class _Key> 3 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator 4 __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) 5 { 6 size_t __hash = hash_function()(__k); 7 size_type __bc = bucket_count(); 8 if (__bc != 0) 9 { 10 size_t __chash = __constrain_hash(__hash, __bc); 11 __next_pointer __nd = __bucket_list_[__chash]; 12 if (__nd != nullptr) 13 { 14 for (__nd = __nd->__next_; __nd != nullptr && 15 (__nd->__hash() == __hash 16 || __constrain_hash(__nd->__hash(), __bc) == __chash); 17 __nd = __nd->__next_) 18 { 19 if ((__nd->__hash() == __hash) 20 && key_eq()(__nd->__upcast()->__value_, __k)) 21 #if _LIBCPP_DEBUG_LEVEL >= 2 22 return iterator(__nd, this); 23 #else 24 return iterator(__nd); 25 #endif 26 } 27 } 28 } 29 return end(); 30 }
查找方法:
- 生成入參hash key size_t : size_t __hash= hash_function()(__k);
- 獲取bucket數量:size_type __bc = bucket_count();
- 生成bucket index:size_t __chash = __constrain_hash(__hash, __bc);
- 獲取bucket頭節點指針:__next_pointer __nd = __bucket_list_[__chash];
- 循環比較node hash key size_t 和 入參hash key size_t
- 比較入參 和 node key:key_eq()(__nd->__upcast()->__value_, __k)
- 返回結果