feat: Defragmentation for hash values (#1727)

* feat: defrag hash

Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>

* fix: change tests to real scenarios

Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>

* fix: handle link nodes

Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>

---------

Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
Vladislav 2023-08-24 18:17:28 +03:00 committed by GitHub
parent e4ea868175
commit 731f36edd2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 183 additions and 1 deletions

View file

@ -36,6 +36,7 @@ using namespace std;
using absl::GetFlag;
using detail::binpacked_len;
using MemoryResource = detail::RobjWrapper::MemoryResource;
namespace {
constexpr XXH64_hash_t kHashSeed = 24061983;
@ -140,6 +141,38 @@ inline void FreeObjZset(unsigned encoding, void* ptr) {
}
}
// Iterates over allocations of internal hash data structed and re-allocates
// them if their pages are underutilized.
void* DefragHash(MemoryResource* mr, unsigned encoding, void* ptr, float ratio) {
switch (encoding) {
// Listpack is stored as a single contiguous array
case kEncodingListPack: {
uint8_t* lp = (uint8_t*)ptr;
if (!zmalloc_page_is_underutilized(lp, ratio))
return lp;
size_t lp_bytes = lpBytes(lp);
uint8_t* replacement = lpNew(lpBytes(lp));
memcpy(replacement, lp, lp_bytes);
lpFree(lp);
return replacement;
};
// StringMap supports re-allocation of it's internal nodes
case kEncodingStrMap2: {
StringMap* sm = (StringMap*)ptr;
for (auto it = sm->begin(); it != sm->end(); ++it)
it.ReallocIfNeeded(ratio);
return sm;
}
default:
ABSL_UNREACHABLE();
};
}
inline void FreeObjStream(void* ptr) {
freeStream((stream*)ptr);
}
@ -359,10 +392,12 @@ void RobjWrapper::SetString(string_view s, MemoryResource* mr) {
}
bool RobjWrapper::DefragIfNeeded(float ratio) {
if (type() == OBJ_STRING) { // only applicable to strings
if (type() == OBJ_STRING) {
if (zmalloc_page_is_underutilized(inner_obj(), ratio)) {
return Reallocate(tl.local_mr);
}
} else if (type() == OBJ_HASH) {
inner_obj_ = DefragHash(tl.local_mr, encoding_, inner_obj_, ratio);
}
return false;
}

View file

@ -601,6 +601,62 @@ TEST_F(CompactObjectTest, JsonTypeWithPathTest) {
}
}
// Test listpack defragmentation.
// StringMap has built-in defragmantation that is tested in its own test suite.
TEST_F(CompactObjectTest, DefragHash) {
auto build_str = [](size_t i) { return string(111, 'v') + to_string(i); };
vector<uint8_t*> lps(10'00);
for (size_t i = 0; i < lps.size(); i++) {
uint8_t* lp = lpNew(100);
for (size_t j = 0; j < 100; j++) {
auto s = build_str(j);
lp = lpAppend(lp, reinterpret_cast<const unsigned char*>(s.data()), s.length());
}
DCHECK_EQ(lpLength(lp), 100u);
lps[i] = lp;
}
for (size_t i = 0; i < lps.size(); i++) {
if (i % 10 == 0)
continue;
lpFree(lps[i]);
}
// Find a listpack that is located on a underutilized page
uint8_t* target_lp = nullptr;
for (size_t i = 0; i < lps.size(); i += 10) {
if (zmalloc_page_is_underutilized(lps[i], 0.8))
target_lp = lps[i];
}
CHECK_NE(target_lp, nullptr);
// Trigger re-allocation
cobj_.InitRobj(OBJ_HASH, kEncodingListPack, target_lp);
cobj_.DefragIfNeeded(0.8);
// Check the pointer changes as the listpack needed defragmentation
auto lp = (uint8_t*)cobj_.RObjPtr();
CHECK_NE(lp, target_lp);
uint8_t* fptr = lpFirst(lp);
for (size_t i = 0; i < 100; i++) {
int64_t len;
auto* s = lpGet(fptr, &len, nullptr);
string_view sv{reinterpret_cast<const char*>(s), static_cast<uint64_t>(len)};
EXPECT_EQ(sv, build_str(i));
fptr = lpNext(lp, fptr);
}
for (size_t i = 0; i < lps.size(); i += 10) {
if (lps[i] != target_lp)
lpFree(lps[i]);
}
}
static void ascii_pack_naive(const char* ascii, size_t len, uint8_t* bin) {
const char* end = ascii + len;

View file

@ -101,6 +101,36 @@ sds StringMap::Find(std::string_view key) {
return GetValue(str);
}
sds StringMap::ReallocIfNeeded(void* obj, float ratio) {
sds key = (sds)obj;
size_t key_len = sdslen(key);
auto* value_ptr = key + key_len + 1;
uint64_t value_tag = absl::little_endian::Load64(value_ptr);
sds value = (sds)(uint64_t(value_tag) & kValMask);
// If the allocated value is underutilized, re-allocate it and update the pointer inside the key
if (zmalloc_page_is_underutilized(value, ratio)) {
size_t value_len = sdslen(value);
sds new_value = sdsnewlen(value, value_len);
memcpy(new_value, value, value_len);
uint64_t new_value_tag = (uint64_t(new_value) & kValMask) | (value_tag & ~kValMask);
absl::little_endian::Store64(value_ptr, new_value_tag);
sdsfree(value);
}
if (!zmalloc_page_is_underutilized(key, ratio))
return key;
size_t space_size = 8 /* value ptr */ + ((value_tag & kValTtlBit) ? 4 : 0) /* optional expiry */;
sds new_key = AllocSdsWithSpace(key_len, space_size);
memcpy(new_key, key, key_len + 1 /* \0 */ + space_size);
sdsfree(key);
return new_key;
}
uint64_t StringMap::Hash(const void* obj, uint32_t cookie) const {
DCHECK_LT(cookie, 2u);
@ -146,6 +176,7 @@ uint32_t StringMap::ObjExpireTime(const void* obj) const {
const char* valptr = str + sdslen(str) + 1;
uint64_t val = absl::little_endian::Load64(valptr);
DCHECK(val & kValTtlBit);
if (val & kValTtlBit) {
return absl::little_endian::Load32(valptr + 8);

View file

@ -62,6 +62,16 @@ class StringMap : public DenseSet {
return BreakToPair(ptr);
}
void ReallocIfNeeded(float ratio) {
// Unwrap all links to correctly call SetObject()
auto* ptr = curr_entry_;
while (ptr->IsLink())
ptr = ptr->AsLink();
auto* obj = ptr->GetObject();
ptr->SetObject(static_cast<StringMap*>(owner_)->ReallocIfNeeded(obj, ratio));
}
iterator& operator++() {
Advance();
return *this;
@ -104,6 +114,10 @@ class StringMap : public DenseSet {
}
private:
// Reallocate key and/or value if their pages are underutilized.
// Returns new object pointer (stays the same if utilization is enough).
sds ReallocIfNeeded(void* obj, float ratio);
uint64_t Hash(const void* obj, uint32_t cookie) const final;
bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const final;
size_t ObjectAllocSize(const void* obj) const final;

View file

@ -117,4 +117,50 @@ TEST_F(StringMapTest, Ttl) {
EXPECT_TRUE(it == sm_->end());
}
unsigned total_wasted_memory = 0;
TEST_F(StringMapTest, ReallocIfNeeded) {
auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };
auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,
size_t block_size, void* arg) {
size_t used = block_size * area->used;
total_wasted_memory += area->committed - used;
return true;
};
for (size_t i = 0; i < 10'000; i++)
sm_->AddOrUpdate(build_str(i), build_str(i + 1), i * 10 + 1);
for (size_t i = 0; i < 10'000; i++) {
if (i % 10 == 0)
continue;
sm_->Erase(build_str(i));
}
mi_heap_collect(mi_heap_get_backing(), true);
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
size_t wasted_before = total_wasted_memory;
size_t underutilized = 0;
for (auto it = sm_->begin(); it != sm_->end(); ++it) {
underutilized += zmalloc_page_is_underutilized(it->first, 0.9);
it.ReallocIfNeeded(0.9);
}
// Check there are underutilized pages
CHECK_GT(underutilized, 0u);
total_wasted_memory = 0;
mi_heap_collect(mi_heap_get_backing(), true);
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
size_t wasted_after = total_wasted_memory;
// Check we waste significanlty less now
EXPECT_GT(wasted_before, wasted_after * 2);
EXPECT_EQ(sm_->Size(), 1000);
for (size_t i = 0; i < 1000; i++)
EXPECT_EQ(sm_->Find(build_str(i * 10)), build_str(i * 10 + 1));
}
} // namespace dfly