44
55#pragma once
66
7- // #include <cassert>
7+ #include < cassert>
88#include < cstring>
99#include < string_view>
1010
11+ #include " base/hash.h"
1112#include " base/logging.h"
1213
1314extern " C" {
1415#include " redis/zmalloc.h"
1516}
1617
1718namespace dfly {
19+
20+ static uint64_t Hash (std::string_view str) {
21+ constexpr XXH64_hash_t kHashSeed = 24061983 ;
22+ return XXH3_64bits_withSeed (str.data (), str.size (), kHashSeed );
23+ }
24+
25+ static uint32_t BucketId (uint64_t hash, uint32_t capacity_log) {
26+ assert (capacity_log > 0 );
27+ return hash >> (64 - capacity_log);
28+ }
1829// doesn't possess memory, it should be created and release manually
1930class ISLEntry {
2031 friend class IntrusiveStringList ;
@@ -30,7 +41,7 @@ class ISLEntry {
3041
3142 // extended hash allows us to reduce keys comparisons
3243 static constexpr size_t kExtHashShift = 56 ;
33- static constexpr uint32_t ext_hash_bit_size = 8 ;
44+ static constexpr uint32_t kExtHashSize = 8 ;
3445 static constexpr size_t kExtHashMask = 0xFFULL ;
3546 static constexpr size_t kExtHashShiftedMask = kExtHashMask << kExtHashShift ;
3647
@@ -86,28 +97,74 @@ class ISLEntry {
8697 return (uptr () & kExtHashShiftedMask ) >> kExtHashShift ;
8798 }
8899
89- bool CheckBucketAffiliation (uint32_t bucket_id, uint32_t capacity_log, uint32_t shift_log) const {
100+ bool CheckBucketAffiliation (uint32_t bucket_id, uint32_t capacity_log, uint32_t shift_log) {
90101 uint32_t bucket_id_hash_part = capacity_log > shift_log ? shift_log : capacity_log;
91102 uint32_t bucket_mask = (1 << bucket_id_hash_part) - 1 ;
92103 bucket_id &= bucket_mask;
93- uint32_t stored_bucket_id = GetExtendedHash () >> (ext_hash_bit_size - bucket_id_hash_part);
104+ auto stored_hash = GetExtendedHash ();
105+ if (!stored_hash) {
106+ stored_hash = SetExtendedHash (Hash (Key ()), capacity_log, shift_log);
107+ }
108+ uint32_t stored_bucket_id = stored_hash >> (kExtHashSize - bucket_id_hash_part);
94109 return bucket_id == stored_bucket_id;
95110 }
96111
97- bool CheckExtendedHash (uint64_t hash, uint32_t capacity_log, uint32_t shift_log) const {
98- uint32_t start_hash_bit = capacity_log > shift_log ? capacity_log - shift_log : 0 ;
99- uint32_t ext_hash_shift = 64 - start_hash_bit - ext_hash_bit_size;
100- uint64_t ext_hash = (hash >> ext_hash_shift) & kExtHashMask ;
101- return GetExtendedHash () == ext_hash;
112+ bool CheckExtendedHash (uint64_t hash, uint32_t capacity_log, uint32_t shift_log) {
113+ const uint32_t start_hash_bit = capacity_log > shift_log ? capacity_log - shift_log : 0 ;
114+ const uint32_t ext_hash_shift = 64 - start_hash_bit - kExtHashSize ;
115+ const uint64_t ext_hash = (hash >> ext_hash_shift) & kExtHashMask ;
116+ auto stored_hash = GetExtendedHash ();
117+ if (!stored_hash) {
118+ stored_hash = SetExtendedHash (Hash (Key ()), capacity_log, shift_log);
119+ }
120+ return stored_hash == ext_hash;
102121 }
103122
104123 // TODO rename to SetHash
105124 // shift_log identify which bucket the element belongs to
106- void SetExtendedHash (uint64_t hash, uint32_t capacity_log, uint32_t shift_log) {
107- uint32_t start_hash_bit = capacity_log > shift_log ? capacity_log - shift_log : 0 ;
108- uint32_t ext_hash_shift = 64 - start_hash_bit - ext_hash_bit_size;
109- uint64_t ext_hash = ((hash >> ext_hash_shift) << kExtHashShift ) & kExtHashShiftedMask ;
125+ uint64_t SetExtendedHash (uint64_t hash, uint32_t capacity_log, uint32_t shift_log) {
126+ const uint32_t start_hash_bit = capacity_log > shift_log ? capacity_log - shift_log : 0 ;
127+ const uint32_t ext_hash_shift = 64 - start_hash_bit - kExtHashSize ;
128+ const uint64_t result_hash = (hash >> ext_hash_shift) & kExtHashMask ;
129+ const uint64_t ext_hash = result_hash << kExtHashShift ;
110130 data_ = (char *)((uptr () & ~kExtHashShiftedMask ) | ext_hash);
131+ return result_hash;
132+ }
133+
134+ void ClearHash () {
135+ data_ = (char *)((uptr () & ~kExtHashShiftedMask ));
136+ }
137+
138+ // return new bucket_id
139+ uint32_t Rehash (uint32_t current_bucket_id, uint32_t prev_capacity_log, uint32_t new_capacity_log,
140+ uint32_t shift_log) {
141+ auto stored_hash = GetExtendedHash ();
142+
143+ const uint32_t logs_diff = new_capacity_log - prev_capacity_log;
144+ const uint32_t prev_significant_bits =
145+ prev_capacity_log > shift_log ? shift_log : prev_capacity_log;
146+ const uint32_t needed_hash_bits = prev_significant_bits + logs_diff;
147+
148+ if (!stored_hash || needed_hash_bits > kExtHashSize ) {
149+ auto hash = Hash (Key ());
150+ SetExtendedHash (hash, new_capacity_log, shift_log);
151+ return BucketId (hash, new_capacity_log);
152+ }
153+
154+ const uint32_t real_bucket_end = stored_hash >> (kExtHashSize - prev_significant_bits);
155+ const uint32_t prev_shift_mask = (1 << prev_significant_bits) - 1 ;
156+ const uint32_t curr_shift = (current_bucket_id - real_bucket_end) & prev_shift_mask;
157+ const uint32_t prev_bucket_mask = (1 << prev_capacity_log) - 1 ;
158+ const uint32_t base_bucket_id = (current_bucket_id - curr_shift) & prev_bucket_mask;
159+
160+ const uint32_t last_bits_mask = (1 << logs_diff) - 1 ;
161+ const uint32_t stored_hash_shift = kExtHashSize - needed_hash_bits;
162+ const uint32_t last_bits = (stored_hash >> stored_hash_shift) & last_bits_mask;
163+ const uint32_t new_bucket_id = (base_bucket_id << logs_diff) | last_bits;
164+
165+ ClearHash (); // the cache is invalid after rehash operation
166+
167+ return new_bucket_id;
111168 }
112169
113170 protected:
@@ -227,7 +284,6 @@ class ISLEntry {
227284 return HasTtl () ? sizeof (std::uint32_t ) : 0 ;
228285 }
229286
230- // TODO consider use SDS strings or other approach
231287 // TODO add optimization for big keys
232288 // memory daya layout [ISLEntry*, TTL, key_size, key]
233289 char * data_ = nullptr ;
@@ -248,6 +304,7 @@ class UniqueISLEntry : private ISLEntry {
248304 using ISLEntry::ExpiryTime;
249305 using ISLEntry::HasExpiry;
250306 using ISLEntry::Key;
307+ using ISLEntry::Rehash;
251308 using ISLEntry::operator bool ;
252309
253310 ISLEntry Release () {
@@ -304,6 +361,14 @@ class IntrusiveStringList {
304361 return false ;
305362 }
306363
364+ uint32_t Rehash (uint32_t current_bucket_id, uint32_t prev_capacity_log,
365+ uint32_t new_capacity_log, uint32_t shift_log) {
366+ auto entry = prev_.Next ();
367+ auto res = entry->Rehash (current_bucket_id, prev_capacity_log, new_capacity_log, shift_log);
368+ prev_.SetNext (entry);
369+ return res;
370+ }
371+
307372 bool HasExpiry () const {
308373 return prev_.Next ().HasExpiry ();
309374 }
0 commit comments