mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 18:35:46 +02:00
feat: Defragmentation for hash values (#1727)
* feat: defrag hash Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io> * fix: change tests to real scenarios Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io> * fix: handle link nodes Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io> --------- Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
parent
e4ea868175
commit
731f36edd2
5 changed files with 183 additions and 1 deletions
|
@ -36,6 +36,7 @@ using namespace std;
|
|||
using absl::GetFlag;
|
||||
using detail::binpacked_len;
|
||||
using MemoryResource = detail::RobjWrapper::MemoryResource;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr XXH64_hash_t kHashSeed = 24061983;
|
||||
|
@ -140,6 +141,38 @@ inline void FreeObjZset(unsigned encoding, void* ptr) {
|
|||
}
|
||||
}
|
||||
|
||||
// Iterates over allocations of internal hash data structed and re-allocates
|
||||
// them if their pages are underutilized.
|
||||
void* DefragHash(MemoryResource* mr, unsigned encoding, void* ptr, float ratio) {
|
||||
switch (encoding) {
|
||||
// Listpack is stored as a single contiguous array
|
||||
case kEncodingListPack: {
|
||||
uint8_t* lp = (uint8_t*)ptr;
|
||||
if (!zmalloc_page_is_underutilized(lp, ratio))
|
||||
return lp;
|
||||
|
||||
size_t lp_bytes = lpBytes(lp);
|
||||
uint8_t* replacement = lpNew(lpBytes(lp));
|
||||
memcpy(replacement, lp, lp_bytes);
|
||||
lpFree(lp);
|
||||
|
||||
return replacement;
|
||||
};
|
||||
|
||||
// StringMap supports re-allocation of it's internal nodes
|
||||
case kEncodingStrMap2: {
|
||||
StringMap* sm = (StringMap*)ptr;
|
||||
for (auto it = sm->begin(); it != sm->end(); ++it)
|
||||
it.ReallocIfNeeded(ratio);
|
||||
|
||||
return sm;
|
||||
}
|
||||
|
||||
default:
|
||||
ABSL_UNREACHABLE();
|
||||
};
|
||||
}
|
||||
|
||||
inline void FreeObjStream(void* ptr) {
|
||||
freeStream((stream*)ptr);
|
||||
}
|
||||
|
@ -359,10 +392,12 @@ void RobjWrapper::SetString(string_view s, MemoryResource* mr) {
|
|||
}
|
||||
|
||||
bool RobjWrapper::DefragIfNeeded(float ratio) {
|
||||
if (type() == OBJ_STRING) { // only applicable to strings
|
||||
if (type() == OBJ_STRING) {
|
||||
if (zmalloc_page_is_underutilized(inner_obj(), ratio)) {
|
||||
return Reallocate(tl.local_mr);
|
||||
}
|
||||
} else if (type() == OBJ_HASH) {
|
||||
inner_obj_ = DefragHash(tl.local_mr, encoding_, inner_obj_, ratio);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -601,6 +601,62 @@ TEST_F(CompactObjectTest, JsonTypeWithPathTest) {
|
|||
}
|
||||
}
|
||||
|
||||
// Test listpack defragmentation.
|
||||
// StringMap has built-in defragmantation that is tested in its own test suite.
|
||||
TEST_F(CompactObjectTest, DefragHash) {
|
||||
auto build_str = [](size_t i) { return string(111, 'v') + to_string(i); };
|
||||
|
||||
vector<uint8_t*> lps(10'00);
|
||||
|
||||
for (size_t i = 0; i < lps.size(); i++) {
|
||||
uint8_t* lp = lpNew(100);
|
||||
for (size_t j = 0; j < 100; j++) {
|
||||
auto s = build_str(j);
|
||||
lp = lpAppend(lp, reinterpret_cast<const unsigned char*>(s.data()), s.length());
|
||||
}
|
||||
DCHECK_EQ(lpLength(lp), 100u);
|
||||
lps[i] = lp;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < lps.size(); i++) {
|
||||
if (i % 10 == 0)
|
||||
continue;
|
||||
lpFree(lps[i]);
|
||||
}
|
||||
|
||||
// Find a listpack that is located on a underutilized page
|
||||
uint8_t* target_lp = nullptr;
|
||||
for (size_t i = 0; i < lps.size(); i += 10) {
|
||||
if (zmalloc_page_is_underutilized(lps[i], 0.8))
|
||||
target_lp = lps[i];
|
||||
}
|
||||
CHECK_NE(target_lp, nullptr);
|
||||
|
||||
// Trigger re-allocation
|
||||
cobj_.InitRobj(OBJ_HASH, kEncodingListPack, target_lp);
|
||||
cobj_.DefragIfNeeded(0.8);
|
||||
|
||||
// Check the pointer changes as the listpack needed defragmentation
|
||||
auto lp = (uint8_t*)cobj_.RObjPtr();
|
||||
CHECK_NE(lp, target_lp);
|
||||
|
||||
uint8_t* fptr = lpFirst(lp);
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
int64_t len;
|
||||
auto* s = lpGet(fptr, &len, nullptr);
|
||||
|
||||
string_view sv{reinterpret_cast<const char*>(s), static_cast<uint64_t>(len)};
|
||||
EXPECT_EQ(sv, build_str(i));
|
||||
|
||||
fptr = lpNext(lp, fptr);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < lps.size(); i += 10) {
|
||||
if (lps[i] != target_lp)
|
||||
lpFree(lps[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ascii_pack_naive(const char* ascii, size_t len, uint8_t* bin) {
|
||||
const char* end = ascii + len;
|
||||
|
||||
|
|
|
@ -101,6 +101,36 @@ sds StringMap::Find(std::string_view key) {
|
|||
return GetValue(str);
|
||||
}
|
||||
|
||||
sds StringMap::ReallocIfNeeded(void* obj, float ratio) {
|
||||
sds key = (sds)obj;
|
||||
size_t key_len = sdslen(key);
|
||||
|
||||
auto* value_ptr = key + key_len + 1;
|
||||
uint64_t value_tag = absl::little_endian::Load64(value_ptr);
|
||||
sds value = (sds)(uint64_t(value_tag) & kValMask);
|
||||
|
||||
// If the allocated value is underutilized, re-allocate it and update the pointer inside the key
|
||||
if (zmalloc_page_is_underutilized(value, ratio)) {
|
||||
size_t value_len = sdslen(value);
|
||||
sds new_value = sdsnewlen(value, value_len);
|
||||
memcpy(new_value, value, value_len);
|
||||
uint64_t new_value_tag = (uint64_t(new_value) & kValMask) | (value_tag & ~kValMask);
|
||||
absl::little_endian::Store64(value_ptr, new_value_tag);
|
||||
sdsfree(value);
|
||||
}
|
||||
|
||||
if (!zmalloc_page_is_underutilized(key, ratio))
|
||||
return key;
|
||||
|
||||
size_t space_size = 8 /* value ptr */ + ((value_tag & kValTtlBit) ? 4 : 0) /* optional expiry */;
|
||||
|
||||
sds new_key = AllocSdsWithSpace(key_len, space_size);
|
||||
memcpy(new_key, key, key_len + 1 /* \0 */ + space_size);
|
||||
sdsfree(key);
|
||||
|
||||
return new_key;
|
||||
}
|
||||
|
||||
uint64_t StringMap::Hash(const void* obj, uint32_t cookie) const {
|
||||
DCHECK_LT(cookie, 2u);
|
||||
|
||||
|
@ -146,6 +176,7 @@ uint32_t StringMap::ObjExpireTime(const void* obj) const {
|
|||
const char* valptr = str + sdslen(str) + 1;
|
||||
|
||||
uint64_t val = absl::little_endian::Load64(valptr);
|
||||
|
||||
DCHECK(val & kValTtlBit);
|
||||
if (val & kValTtlBit) {
|
||||
return absl::little_endian::Load32(valptr + 8);
|
||||
|
|
|
@ -62,6 +62,16 @@ class StringMap : public DenseSet {
|
|||
return BreakToPair(ptr);
|
||||
}
|
||||
|
||||
void ReallocIfNeeded(float ratio) {
|
||||
// Unwrap all links to correctly call SetObject()
|
||||
auto* ptr = curr_entry_;
|
||||
while (ptr->IsLink())
|
||||
ptr = ptr->AsLink();
|
||||
|
||||
auto* obj = ptr->GetObject();
|
||||
ptr->SetObject(static_cast<StringMap*>(owner_)->ReallocIfNeeded(obj, ratio));
|
||||
}
|
||||
|
||||
iterator& operator++() {
|
||||
Advance();
|
||||
return *this;
|
||||
|
@ -104,6 +114,10 @@ class StringMap : public DenseSet {
|
|||
}
|
||||
|
||||
private:
|
||||
// Reallocate key and/or value if their pages are underutilized.
|
||||
// Returns new object pointer (stays the same if utilization is enough).
|
||||
sds ReallocIfNeeded(void* obj, float ratio);
|
||||
|
||||
uint64_t Hash(const void* obj, uint32_t cookie) const final;
|
||||
bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const final;
|
||||
size_t ObjectAllocSize(const void* obj) const final;
|
||||
|
|
|
@ -117,4 +117,50 @@ TEST_F(StringMapTest, Ttl) {
|
|||
EXPECT_TRUE(it == sm_->end());
|
||||
}
|
||||
|
||||
unsigned total_wasted_memory = 0;
|
||||
|
||||
TEST_F(StringMapTest, ReallocIfNeeded) {
|
||||
auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };
|
||||
|
||||
auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,
|
||||
size_t block_size, void* arg) {
|
||||
size_t used = block_size * area->used;
|
||||
total_wasted_memory += area->committed - used;
|
||||
return true;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < 10'000; i++)
|
||||
sm_->AddOrUpdate(build_str(i), build_str(i + 1), i * 10 + 1);
|
||||
|
||||
for (size_t i = 0; i < 10'000; i++) {
|
||||
if (i % 10 == 0)
|
||||
continue;
|
||||
sm_->Erase(build_str(i));
|
||||
}
|
||||
|
||||
mi_heap_collect(mi_heap_get_backing(), true);
|
||||
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
|
||||
size_t wasted_before = total_wasted_memory;
|
||||
|
||||
size_t underutilized = 0;
|
||||
for (auto it = sm_->begin(); it != sm_->end(); ++it) {
|
||||
underutilized += zmalloc_page_is_underutilized(it->first, 0.9);
|
||||
it.ReallocIfNeeded(0.9);
|
||||
}
|
||||
// Check there are underutilized pages
|
||||
CHECK_GT(underutilized, 0u);
|
||||
|
||||
total_wasted_memory = 0;
|
||||
mi_heap_collect(mi_heap_get_backing(), true);
|
||||
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
|
||||
size_t wasted_after = total_wasted_memory;
|
||||
|
||||
// Check we waste significanlty less now
|
||||
EXPECT_GT(wasted_before, wasted_after * 2);
|
||||
|
||||
EXPECT_EQ(sm_->Size(), 1000);
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
EXPECT_EQ(sm_->Find(build_str(i * 10)), build_str(i * 10 + 1));
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue