mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
chore: add defrag for StringSet (#4308)
* add defrag logic for StringSet * add test
This commit is contained in:
parent
d88bdb8bc3
commit
fbd785cbc7
4 changed files with 103 additions and 2 deletions
|
@ -276,6 +276,15 @@ pair<void*, bool> DefragSortedMap(detail::SortedMap* sm, float ratio) {
|
||||||
return {sm, reallocated};
|
return {sm, reallocated};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pair<void*, bool> DefragStrSet(StringSet* ss, float ratio) {
|
||||||
|
bool realloced = false;
|
||||||
|
|
||||||
|
for (auto it = ss->begin(); it != ss->end(); ++it)
|
||||||
|
realloced |= it.ReallocIfNeeded(ratio);
|
||||||
|
|
||||||
|
return {ss, realloced};
|
||||||
|
}
|
||||||
|
|
||||||
// Iterates over allocations of internal hash data structures and re-allocates
|
// Iterates over allocations of internal hash data structures and re-allocates
|
||||||
// them if their pages are underutilized.
|
// them if their pages are underutilized.
|
||||||
// Returns pointer to new object ptr and whether any re-allocations happened.
|
// Returns pointer to new object ptr and whether any re-allocations happened.
|
||||||
|
@ -304,8 +313,7 @@ pair<void*, bool> DefragSet(unsigned encoding, void* ptr, float ratio) {
|
||||||
}
|
}
|
||||||
|
|
||||||
case kEncodingStrMap2: {
|
case kEncodingStrMap2: {
|
||||||
// Still not implemented
|
return DefragStrSet((StringSet*)ptr, ratio);
|
||||||
return {ptr, false};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -165,4 +165,44 @@ sds StringSet::MakeSetSds(string_view src, uint32_t ttl_sec) const {
|
||||||
return sdsnewlen(src.data(), src.size());
|
return sdsnewlen(src.data(), src.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Does not release obj. Callers must deallocate with sdsfree explicitly
|
||||||
|
pair<sds, bool> StringSet::DuplicateEntryIfFragmented(void* obj, float ratio) {
|
||||||
|
sds key = (sds)obj;
|
||||||
|
|
||||||
|
if (!zmalloc_page_is_underutilized(key, ratio))
|
||||||
|
return {key, false};
|
||||||
|
|
||||||
|
size_t key_len = sdslen(key);
|
||||||
|
bool has_ttl = MayHaveTtl(key);
|
||||||
|
|
||||||
|
if (has_ttl) {
|
||||||
|
sds res = AllocSdsWithSpace(key_len, sizeof(uint32_t));
|
||||||
|
std::memcpy(res, key, key_len + sizeof(uint32_t));
|
||||||
|
return {res, true};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {sdsnewlen(key, key_len), true};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StringSet::iterator::ReallocIfNeeded(float ratio) {
|
||||||
|
auto* ptr = curr_entry_;
|
||||||
|
if (ptr->IsLink()) {
|
||||||
|
ptr = ptr->AsLink();
|
||||||
|
}
|
||||||
|
|
||||||
|
DCHECK(!ptr->IsEmpty());
|
||||||
|
DCHECK(ptr->IsObject());
|
||||||
|
|
||||||
|
auto* obj = ptr->GetObject();
|
||||||
|
auto [new_obj, realloced] =
|
||||||
|
static_cast<StringSet*>(owner_)->DuplicateEntryIfFragmented(obj, ratio);
|
||||||
|
|
||||||
|
if (realloced) {
|
||||||
|
ptr->SetObject(new_obj);
|
||||||
|
sdsfree((sds)obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
return realloced;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace dfly
|
} // namespace dfly
|
||||||
|
|
|
@ -85,6 +85,10 @@ class StringSet : public DenseSet {
|
||||||
using IteratorBase::ExpiryTime;
|
using IteratorBase::ExpiryTime;
|
||||||
using IteratorBase::HasExpiry;
|
using IteratorBase::HasExpiry;
|
||||||
using IteratorBase::SetExpiryTime;
|
using IteratorBase::SetExpiryTime;
|
||||||
|
|
||||||
|
// Try reducing memory fragmentation of the value by re-allocating. Returns true if
|
||||||
|
// re-allocation happened.
|
||||||
|
bool ReallocIfNeeded(float ratio);
|
||||||
};
|
};
|
||||||
|
|
||||||
iterator begin() {
|
iterator begin() {
|
||||||
|
@ -114,6 +118,9 @@ class StringSet : public DenseSet {
|
||||||
void ObjDelete(void* obj, bool has_ttl) const override;
|
void ObjDelete(void* obj, bool has_ttl) const override;
|
||||||
void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const override;
|
void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const override;
|
||||||
sds MakeSetSds(std::string_view src, uint32_t ttl_sec) const;
|
sds MakeSetSds(std::string_view src, uint32_t ttl_sec) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::pair<sds, bool> DuplicateEntryIfFragmented(void* obj, float ratio);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> unsigned StringSet::AddMany(absl::Span<T> span, uint32_t ttl_sec) {
|
template <typename T> unsigned StringSet::AddMany(absl::Span<T> span, uint32_t ttl_sec) {
|
||||||
|
|
|
@ -657,4 +657,50 @@ void BM_Grow(benchmark::State& state) {
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_Grow);
|
BENCHMARK(BM_Grow);
|
||||||
|
|
||||||
|
unsigned total_wasted_memory = 0;
|
||||||
|
|
||||||
|
TEST_F(StringSetTest, ReallocIfNeeded) {
|
||||||
|
auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };
|
||||||
|
|
||||||
|
auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,
|
||||||
|
size_t block_size, void* arg) {
|
||||||
|
size_t used = block_size * area->used;
|
||||||
|
total_wasted_memory += area->committed - used;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 10'000; i++)
|
||||||
|
ss_->Add(build_str(i));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 10'000; i++) {
|
||||||
|
if (i % 10 == 0)
|
||||||
|
continue;
|
||||||
|
ss_->Erase(build_str(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
mi_heap_collect(mi_heap_get_backing(), true);
|
||||||
|
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
|
||||||
|
size_t wasted_before = total_wasted_memory;
|
||||||
|
|
||||||
|
size_t underutilized = 0;
|
||||||
|
for (auto it = ss_->begin(); it != ss_->end(); ++it) {
|
||||||
|
underutilized += zmalloc_page_is_underutilized(*it, 0.9);
|
||||||
|
it.ReallocIfNeeded(0.9);
|
||||||
|
}
|
||||||
|
// Check there are underutilized pages
|
||||||
|
CHECK_GT(underutilized, 0u);
|
||||||
|
|
||||||
|
total_wasted_memory = 0;
|
||||||
|
mi_heap_collect(mi_heap_get_backing(), true);
|
||||||
|
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
|
||||||
|
size_t wasted_after = total_wasted_memory;
|
||||||
|
|
||||||
|
// Check we waste significanlty less now
|
||||||
|
EXPECT_GT(wasted_before, wasted_after * 2);
|
||||||
|
|
||||||
|
EXPECT_EQ(ss_->UpperBoundSize(), 1000);
|
||||||
|
for (size_t i = 0; i < 1000; i++)
|
||||||
|
EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace dfly
|
} // namespace dfly
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue