From b1222cb260074662015696533bf5810d9071c38a Mon Sep 17 00:00:00 2001 From: Borys Date: Wed, 5 Mar 2025 11:44:03 +0200 Subject: [PATCH 01/12] feat: add intrusive string set entry --- src/core/CMakeLists.txt | 1 + src/core/intrusive_string_set.h | 156 ++++++++++++++++++++++++++ src/core/intrusive_string_set_test.cc | 55 +++++++++ 3 files changed, 212 insertions(+) create mode 100644 src/core/intrusive_string_set.h create mode 100644 src/core/intrusive_string_set_test.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f3bcbd567..6d12a74f4 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -40,6 +40,7 @@ cxx_test(interpreter_test dfly_core LABELS DFLY) cxx_test(string_set_test dfly_core LABELS DFLY) cxx_test(string_map_test dfly_core LABELS DFLY) +cxx_test(intrusive_string_set_test dfly_core LABELS DFLY) cxx_test(sorted_map_test dfly_core redis_test_lib LABELS DFLY) cxx_test(bptree_set_test dfly_core LABELS DFLY) cxx_test(score_map_test dfly_core LABELS DFLY) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h new file mode 100644 index 000000000..1692e7bb6 --- /dev/null +++ b/src/core/intrusive_string_set.h @@ -0,0 +1,156 @@ +// Copyright 2024, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +#include +#include +#include +#include + +namespace dfly { + +class ISSEntry { + public: + ISSEntry(std::string_view key) { + ISSEntry* next = nullptr; + uint32_t key_size = key.size(); + + auto size = sizeof(next) + sizeof(key_size) + key_size; + + data_ = (char*)malloc(size); + + std::memcpy(data_, &next, sizeof(next)); + + auto* key_size_pos = data_ + sizeof(next); + std::memcpy(key_size_pos, &key_size, sizeof(key_size)); + + auto* key_pos = key_size_pos + sizeof(key_size); + std::memcpy(key_pos, key.data(), key_size); + } + + std::string_view Key() const { + return {GetKeyData(), GetKeySize()}; + } + + ISSEntry* Next() const { + ISSEntry* next = nullptr; + std::memcpy(&next, data_, sizeof(next)); + return next; + } + + void SetNext(ISSEntry* next) { + std::memcpy(data_, &next, sizeof(next)); + } + + private: + const char* GetKeyData() const { + return data_ + sizeof(ISSEntry*) + sizeof(uint32_t); + } + + uint32_t GetKeySize() const { + uint32_t size = 0; + std::memcpy(&size, data_ + sizeof(ISSEntry*), sizeof(size)); + return size; + } + + // TODO consider use SDS strings or other approach + // TODO add optimization for big keys + // memory daya layout [ISSEntry*, key_size, key] + char* data_; +}; + +class ISMEntry { + public: + ISMEntry(std::string_view key, std::string_view val) { + ISMEntry* next = nullptr; + uint32_t key_size = key.size(); + uint32_t val_size = val.size(); + + auto size = sizeof(next) + sizeof(key_size) + sizeof(val_size) + key_size + val_size; + + data_ = (char*)malloc(size); + + std::memcpy(data_, &next, sizeof(next)); + + auto* key_size_pos = data_ + sizeof(next); + std::memcpy(key_size_pos, &key_size, sizeof(key_size)); + + auto* val_size_pos = key_size_pos + sizeof(key_size); + std::memcpy(val_size_pos, &val_size, sizeof(val_size)); + + auto* key_pos = val_size_pos + sizeof(val_size); + std::memcpy(key_pos, key.data(), key_size); + + auto* val_pos = key_pos + key_size; + std::memcpy(val_pos, val.data(), val_size); + } + + std::string_view Key() const { + return {GetKeyData(), GetKeySize()}; + } + + std::string_view Val() const { + return {GetValData(), GetValSize()}; + } + + ISMEntry* Next() const { + ISMEntry* next = nullptr; + std::memcpy(&next, data_, sizeof(next)); + return next; + } + + void SetVal(std::string_view val) { + // TODO add optimization for the same size key + uint32_t val_size = val.size(); + auto new_size = + sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t) + GetKeySize() + val_size; + + data_ = (char*)realloc(data_, new_size); + + auto* val_size_pos = data_ + sizeof(ISMEntry*) + sizeof(uint32_t); + std::memcpy(val_size_pos, &val_size, sizeof(val_size)); + + auto* val_pos = val_size_pos + sizeof(val_size) + GetKeySize(); + std::memcpy(val_pos, val.data(), val_size); + } + + void SetNext(ISMEntry* next) { + std::memcpy(data_, &next, sizeof(next)); + } + + private: + const char* GetKeyData() const { + return data_ + sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t); + } + + uint32_t GetKeySize() const { + uint32_t size = 0; + std::memcpy(&size, data_ + sizeof(ISMEntry*), sizeof(size)); + return size; + } + + const char* GetValData() const { + return GetKeyData() + GetKeySize(); + } + + uint32_t GetValSize() const { + uint32_t size = 0; + std::memcpy(&size, data_ + sizeof(ISMEntry*) + sizeof(uint32_t), sizeof(size)); + return size; + } + + // TODO consider use SDS strings or other approach + // TODO add optimization for big keys + // memory daya layout [ISMEntry*, key_size, val_size, key, val] + char* data_; +}; + +template class IntrusiveStringSet { + public: + private: + std::vector entries_; +}; + +} // namespace dfly diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc new file mode 100644 index 000000000..e73ebca8f --- /dev/null +++ b/src/core/intrusive_string_set_test.cc @@ -0,0 +1,55 @@ +// Copyright 2022, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/intrusive_string_set.h" + +#include "base/gtest.h" + +namespace dfly { + +using namespace std; + +class IntrusiveStringSetTest : public ::testing::Test { + protected: + static void SetUpTestSuite() { + } + + static void TearDownTestSuite() { + } + + void SetUp() override { + } + + void TearDown() override { + } +}; + +TEST_F(IntrusiveStringSetTest, ISSEntryTest) { + ISSEntry test("0123456789"); + + EXPECT_EQ(test.Key(), "0123456789"sv); + EXPECT_EQ(test.Next(), nullptr); + + test.SetNext(&test); + + EXPECT_EQ(test.Key(), "0123456789"sv); + EXPECT_EQ(test.Next(), &test); +} + +TEST_F(IntrusiveStringSetTest, ISMEntryTest) { + ISMEntry test("0123456789", "qwertyuiopasdfghjklzxcvbnm"); + + EXPECT_EQ(test.Key(), "0123456789"sv); + EXPECT_EQ(test.Val(), "qwertyuiopasdfghjklzxcvbnm"sv); + EXPECT_EQ(test.Next(), nullptr); + + test.SetVal("QWERTYUIOPASDFGHJKLZXCVBNM"); + test.SetNext(&test); + + EXPECT_EQ(test.Key(), "0123456789"sv); + EXPECT_EQ(test.Val(), "QWERTYUIOPASDFGHJKLZXCVBNM"sv); + EXPECT_EQ(test.Next(), &test); +} + +} // namespace dfly From 844c4ef7cfa4c98cc558ee9ae37b3ae5a35dfb46 Mon Sep 17 00:00:00 2001 From: Borys Date: Wed, 5 Mar 2025 16:39:40 +0200 Subject: [PATCH 02/12] feat: add IntrusiveStringSet --- src/core/intrusive_string_set.h | 252 ++++++++++++++++---------- src/core/intrusive_string_set_test.cc | 33 ++-- 2 files changed, 176 insertions(+), 109 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index 1692e7bb6..66dc64d4e 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -4,153 +4,221 @@ #pragma once +#include #include #include #include #include +#include "base/hash.h" + namespace dfly { -class ISSEntry { +class ISLEntry { + friend class IntrusiveStringList; + public: - ISSEntry(std::string_view key) { - ISSEntry* next = nullptr; + ISLEntry() = default; + + ISLEntry(char* data) { + data_ = data; + } + + operator bool() const { + return data_; + } + + std::string_view Key() const { + return {GetKeyData(), GetKeySize()}; + } + + private: + static ISLEntry Create(std::string_view key) { + char* next = nullptr; uint32_t key_size = key.size(); auto size = sizeof(next) + sizeof(key_size) + key_size; - data_ = (char*)malloc(size); + char* data = (char*)malloc(size); - std::memcpy(data_, &next, sizeof(next)); + std::memcpy(data, &next, sizeof(next)); - auto* key_size_pos = data_ + sizeof(next); + auto* key_size_pos = data + sizeof(next); std::memcpy(key_size_pos, &key_size, sizeof(key_size)); auto* key_pos = key_size_pos + sizeof(key_size); std::memcpy(key_pos, key.data(), key_size); + + return ISLEntry(data); } - std::string_view Key() const { - return {GetKeyData(), GetKeySize()}; + static void Destroy(ISLEntry entry) { + free(entry.data_); } - ISSEntry* Next() const { - ISSEntry* next = nullptr; - std::memcpy(&next, data_, sizeof(next)); + ISLEntry Next() const { + ISLEntry next; + std::memcpy(&next.data_, data_, sizeof(next)); return next; } - void SetNext(ISSEntry* next) { + void SetNext(ISLEntry next) { std::memcpy(data_, &next, sizeof(next)); } - private: const char* GetKeyData() const { - return data_ + sizeof(ISSEntry*) + sizeof(uint32_t); + return data_ + sizeof(ISLEntry*) + sizeof(uint32_t); } uint32_t GetKeySize() const { uint32_t size = 0; - std::memcpy(&size, data_ + sizeof(ISSEntry*), sizeof(size)); + std::memcpy(&size, data_ + sizeof(ISLEntry*), sizeof(size)); return size; } // TODO consider use SDS strings or other approach // TODO add optimization for big keys - // memory daya layout [ISSEntry*, key_size, key] - char* data_; + // memory daya layout [ISLEntry*, key_size, key] + char* data_ = nullptr; }; -class ISMEntry { - public: - ISMEntry(std::string_view key, std::string_view val) { - ISMEntry* next = nullptr; - uint32_t key_size = key.size(); - uint32_t val_size = val.size(); - - auto size = sizeof(next) + sizeof(key_size) + sizeof(val_size) + key_size + val_size; - - data_ = (char*)malloc(size); - - std::memcpy(data_, &next, sizeof(next)); - - auto* key_size_pos = data_ + sizeof(next); - std::memcpy(key_size_pos, &key_size, sizeof(key_size)); - - auto* val_size_pos = key_size_pos + sizeof(key_size); - std::memcpy(val_size_pos, &val_size, sizeof(val_size)); - - auto* key_pos = val_size_pos + sizeof(val_size); - std::memcpy(key_pos, key.data(), key_size); - - auto* val_pos = key_pos + key_size; - std::memcpy(val_pos, val.data(), val_size); - } - - std::string_view Key() const { - return {GetKeyData(), GetKeySize()}; - } - - std::string_view Val() const { - return {GetValData(), GetValSize()}; - } - - ISMEntry* Next() const { - ISMEntry* next = nullptr; - std::memcpy(&next, data_, sizeof(next)); - return next; - } - - void SetVal(std::string_view val) { - // TODO add optimization for the same size key - uint32_t val_size = val.size(); - auto new_size = - sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t) + GetKeySize() + val_size; - - data_ = (char*)realloc(data_, new_size); - - auto* val_size_pos = data_ + sizeof(ISMEntry*) + sizeof(uint32_t); - std::memcpy(val_size_pos, &val_size, sizeof(val_size)); - - auto* val_pos = val_size_pos + sizeof(val_size) + GetKeySize(); - std::memcpy(val_pos, val.data(), val_size); - } - - void SetNext(ISMEntry* next) { - std::memcpy(data_, &next, sizeof(next)); +class FakePrevISLEntry : public ISLEntry { + FakePrevISLEntry(ISLEntry) { + fake_allocated_mem_ = ; } private: - const char* GetKeyData() const { - return data_ + sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t); + void* fake_allocated_mem_; +} + +class IntrusiveStringList { + public: + ~IntrusiveStringList() { + while (start_) { + auto next = start_.Next(); + ISLEntry::Destroy(start_); + start_ = next; + } } - uint32_t GetKeySize() const { - uint32_t size = 0; - std::memcpy(&size, data_ + sizeof(ISMEntry*), sizeof(size)); - return size; + ISLEntry Insert(ISLEntry e) { + e.SetNext(start_); + start_ = e; + return start_; } - const char* GetValData() const { - return GetKeyData() + GetKeySize(); + ISLEntry Emplace(std::string_view key) { + return Insert(ISLEntry::Create(key)); } - uint32_t GetValSize() const { - uint32_t size = 0; - std::memcpy(&size, data_ + sizeof(ISMEntry*) + sizeof(uint32_t), sizeof(size)); - return size; + ISLEntry Find(std::string_view str) { + auto it = start_; + for (; it && it.Key() != str; it = it.Next()) + ; + return it; } - // TODO consider use SDS strings or other approach - // TODO add optimization for big keys - // memory daya layout [ISMEntry*, key_size, val_size, key, val] - char* data_; + bool Erase(std::string_view str) { + if (!start_) { + return false; + } + auto it = start_; + if (it.Key() == str) { + start_ = it.Next(); + ISLEntry::Destroy(it); + return true; + } + + auto prev = it; + for (it = it.Next(); it; prev = it, it = it.Next()) { + if (it.Key() == str) { + prev.SetNext(it.Next()); + ISLEntry::Destroy(it); + return true; + } + } + return false; + } + + void MoveNext(ISLEntry& prev) { + auto next = prev.Next(); + prev.SetNext(next.Next()); + Insert(next); + } + + private: + ISLEntry start_; }; -template class IntrusiveStringSet { +class IntrusiveStringSet { public: + // TODO add TTL processing + ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { + if (size_ >= entries_.size()) { + Grow(); + } + auto bucket_id = BucketId(Hash(str)); + auto& bucket = entries_[bucket_id]; + + if (auto existed_item = bucket.Find(str); existed_item) { + // TODO consider common implementation for key value pair + return ISLEntry(); + } + + ++size_; + return bucket.Emplace(str); + } + + bool Erase(std::string_view str) { + auto bucket_id = BucketId(Hash(str)); + return entries_[bucket_id].Erase(str); + } + + ISLEntry Find(std::string_view member) { + auto bucket_id = BucketId(Hash(member)); + return entries_[bucket_id].Find(member); + } + + // Returns the number of elements in the map. Note that it might be that some of these elements + // have expired and can't be accessed. + size_t UpperBoundSize() const { + return size_; + } + + bool Empty() const { + return size_ == 0; + } + private: - std::vector entries_; + std::uint32_t Capacity() const { + return 1 << capacity_log_; + } + + void Grow() { + ++capacity_log_; + entries_.resize(Capacity()); + + // TODO rehashing + } + + uint32_t BucketId(uint64_t hash) const { + assert(capacity_log_ > 0); + return hash >> (64 - capacity_log_); + } + + uint64_t Hash(std::string_view str) const { + constexpr XXH64_hash_t kHashSeed = 24061983; + return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed); + } + + private: + static constexpr size_t kMinSizeShift = 2; + std::uint32_t capacity_log_ = 1; + std::uint32_t size_ = 0; // number of elements in the set. + + static_assert(sizeof(IntrusiveStringList) == sizeof(void*), + "IntrusiveStringList should be just a pointer"); + std::vector entries_; }; } // namespace dfly diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index e73ebca8f..604f770da 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -25,31 +25,30 @@ class IntrusiveStringSetTest : public ::testing::Test { } }; -TEST_F(IntrusiveStringSetTest, ISSEntryTest) { - ISSEntry test("0123456789"); +TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) { + IntrusiveStringList isl; + ISLEntry test = isl.Emplace("0123456789"); EXPECT_EQ(test.Key(), "0123456789"sv); - EXPECT_EQ(test.Next(), nullptr); - test.SetNext(&test); + test = isl.Emplace("123456789"); - EXPECT_EQ(test.Key(), "0123456789"sv); - EXPECT_EQ(test.Next(), &test); -} + EXPECT_EQ(test.Key(), "123456789"sv); -TEST_F(IntrusiveStringSetTest, ISMEntryTest) { - ISMEntry test("0123456789", "qwertyuiopasdfghjklzxcvbnm"); + test = isl.Emplace("23456789"); - EXPECT_EQ(test.Key(), "0123456789"sv); - EXPECT_EQ(test.Val(), "qwertyuiopasdfghjklzxcvbnm"sv); - EXPECT_EQ(test.Next(), nullptr); + EXPECT_EQ(isl.Find("0123456789").Key(), "0123456789"sv); + EXPECT_EQ(isl.Find("23456789").Key(), "23456789"sv); + EXPECT_EQ(isl.Find("123456789").Key(), "123456789"sv); + EXPECT_EQ(isl.Find("test"), ISLEntry()); - test.SetVal("QWERTYUIOPASDFGHJKLZXCVBNM"); - test.SetNext(&test); + EXPECT_TRUE(isl.Erase("23456789")); + EXPECT_EQ(isl.Find("23456789"), ISLEntry()); + EXPECT_FALSE(isl.Erase("test")); + EXPECT_EQ(isl.Find("test"), ISLEntry()); - EXPECT_EQ(test.Key(), "0123456789"sv); - EXPECT_EQ(test.Val(), "QWERTYUIOPASDFGHJKLZXCVBNM"sv); - EXPECT_EQ(test.Next(), &test); + IntrusiveStringList isl2; + isl2.MoveNext() } } // namespace dfly From 04dc4559e6e3e57f99ab7ca49ccac8d0eba1b9f1 Mon Sep 17 00:00:00 2001 From: Borys Date: Thu, 10 Apr 2025 16:49:02 +0300 Subject: [PATCH 03/12] feat: add Rehash method to IntrusiveStringSet --- src/core/intrusive_string_set.h | 56 +++++++++++++++------------ src/core/intrusive_string_set_test.cc | 24 +++++++++++- 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index 66dc64d4e..dcc23a6b1 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -82,15 +82,6 @@ class ISLEntry { char* data_ = nullptr; }; -class FakePrevISLEntry : public ISLEntry { - FakePrevISLEntry(ISLEntry) { - fake_allocated_mem_ = ; - } - - private: - void* fake_allocated_mem_; -} - class IntrusiveStringList { public: ~IntrusiveStringList() { @@ -101,17 +92,32 @@ class IntrusiveStringList { } } + IntrusiveStringList() = default; + IntrusiveStringList(IntrusiveStringList&& r) { + start_ = r.start_; + r.start_ = {}; + } + ISLEntry Insert(ISLEntry e) { e.SetNext(start_); start_ = e; return start_; } + ISLEntry Pop() { + auto res = start_; + if (start_) { + start_ = start_.Next(); + // TODO consider to res.SetNext(nullptr); for now it looks superfluous + } + return res; + } + ISLEntry Emplace(std::string_view key) { return Insert(ISLEntry::Create(key)); } - ISLEntry Find(std::string_view str) { + ISLEntry Find(std::string_view str) const { auto it = start_; for (; it && it.Key() != str; it = it.Next()) ; @@ -140,12 +146,6 @@ class IntrusiveStringList { return false; } - void MoveNext(ISLEntry& prev) { - auto next = prev.Next(); - prev.SetNext(next.Next()); - Insert(next); - } - private: ISLEntry start_; }; @@ -155,7 +155,10 @@ class IntrusiveStringSet { // TODO add TTL processing ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { if (size_ >= entries_.size()) { - Grow(); + auto prev_size = entries_.size(); + ++capacity_log_; + entries_.resize(Capacity()); + Rehash(prev_size); } auto bucket_id = BucketId(Hash(str)); auto& bucket = entries_[bucket_id]; @@ -174,7 +177,7 @@ class IntrusiveStringSet { return entries_[bucket_id].Erase(str); } - ISLEntry Find(std::string_view member) { + ISLEntry Find(std::string_view member) const { auto bucket_id = BucketId(Hash(member)); return entries_[bucket_id].Find(member); } @@ -189,16 +192,20 @@ class IntrusiveStringSet { return size_ == 0; } - private: std::uint32_t Capacity() const { return 1 << capacity_log_; } - void Grow() { - ++capacity_log_; - entries_.resize(Capacity()); - - // TODO rehashing + private: + // was Grow in StringSet + void Rehash(size_t prev_size) { + for (int64_t i = prev_size - 1; i >= 0; --i) { + auto list = std::move(entries_[i]); + for (auto entry = list.Pop(); entry; entry = list.Pop()) { + auto bucket_id = BucketId(Hash(entry.Key())); + entries_[bucket_id].Insert(entry); + } + } } uint32_t BucketId(uint64_t hash) const { @@ -212,7 +219,6 @@ class IntrusiveStringSet { } private: - static constexpr size_t kMinSizeShift = 2; std::uint32_t capacity_log_ = 1; std::uint32_t size_ = 0; // number of elements in the set. diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index 604f770da..161d57d59 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -4,6 +4,8 @@ #include "core/intrusive_string_set.h" +#include + #include "base/gtest.h" namespace dfly { @@ -46,9 +48,27 @@ TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) { EXPECT_EQ(isl.Find("23456789"), ISLEntry()); EXPECT_FALSE(isl.Erase("test")); EXPECT_EQ(isl.Find("test"), ISLEntry()); +} - IntrusiveStringList isl2; - isl2.MoveNext() +TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) { + IntrusiveStringSet ss; + std::set test_set; + + for (int i = 0; i < 10000; ++i) { + test_set.insert(base::RandStr(20)); + } + + for (const auto& s : test_set) { + auto e = ss.Add(s); + EXPECT_EQ(e.Key(), s); + } + + for (const auto& s : test_set) { + auto e = ss.Find(s); + EXPECT_EQ(e.Key(), s); + } + + EXPECT_EQ(ss.Capacity(), 16384); } } // namespace dfly From 3ee31b4206f5d6116a4eb1a2837ef73c750f38c7 Mon Sep 17 00:00:00 2001 From: Borys Date: Wed, 16 Apr 2025 11:20:59 +0300 Subject: [PATCH 04/12] tests: copy tests from StringSetTest to IntrusiveStringSetTest --- src/core/intrusive_string_set.h | 11 +- src/core/intrusive_string_set_test.cc | 730 ++++++++++++++++++++++++++ 2 files changed, 740 insertions(+), 1 deletion(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index dcc23a6b1..bc60322ba 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -11,6 +11,7 @@ #include #include "base/hash.h" +#include "base/pmr/memory_resource.h" namespace dfly { @@ -152,6 +153,10 @@ class IntrusiveStringList { class IntrusiveStringSet { public: + explicit IntrusiveStringSet(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource()) + : entries_(mr) { + } + // TODO add TTL processing ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { if (size_ >= entries_.size()) { @@ -182,6 +187,10 @@ class IntrusiveStringSet { return entries_[bucket_id].Find(member); } + bool Contains(std::string_view member) const { + return Find(member); + } + // Returns the number of elements in the map. Note that it might be that some of these elements // have expired and can't be accessed. size_t UpperBoundSize() const { @@ -224,7 +233,7 @@ class IntrusiveStringSet { static_assert(sizeof(IntrusiveStringList) == sizeof(void*), "IntrusiveStringList should be just a pointer"); - std::vector entries_; + std::vector> entries_; }; } // namespace dfly diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index 161d57d59..86d1cbf37 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -4,27 +4,74 @@ #include "core/intrusive_string_set.h" +#include + +#include #include +#include #include "base/gtest.h" +#include "core/mi_memory_resource.h" + +extern "C" { +#include "redis/zmalloc.h" +} namespace dfly { using namespace std; +class ISSAllocator : public PMR_NS::memory_resource { + public: + bool all_freed() const { + return alloced_ == 0; + } + + void* do_allocate(size_t bytes, size_t alignment) override { + alloced_ += bytes; + void* p = PMR_NS::new_delete_resource()->allocate(bytes, alignment); + return p; + } + + void do_deallocate(void* p, size_t bytes, size_t alignment) override { + alloced_ -= bytes; + return PMR_NS::new_delete_resource()->deallocate(p, bytes, alignment); + } + + bool do_is_equal(const PMR_NS::memory_resource& other) const noexcept override { + return PMR_NS::new_delete_resource()->is_equal(other); + } + + private: + size_t alloced_ = 0; +}; + class IntrusiveStringSetTest : public ::testing::Test { protected: static void SetUpTestSuite() { + auto* tlh = mi_heap_get_backing(); + init_zmalloc_threadlocal(tlh); } static void TearDownTestSuite() { } void SetUp() override { + ss_ = new IntrusiveStringSet(&alloc_); + generator_.seed(0); } void TearDown() override { + delete ss_; + + // ensure there are no memory leaks after every test + EXPECT_TRUE(alloc_.all_freed()); + EXPECT_EQ(zmalloc_used_memory_tl, 0); } + + IntrusiveStringSet* ss_; + ISSAllocator alloc_; + mt19937 generator_; }; TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) { @@ -71,4 +118,687 @@ TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) { EXPECT_EQ(ss.Capacity(), 16384); } +TEST_F(IntrusiveStringSetTest, Basic) { + EXPECT_TRUE(ss_->Add("foo"sv)); + EXPECT_TRUE(ss_->Add("bar"sv)); + EXPECT_FALSE(ss_->Add("foo"sv)); + EXPECT_FALSE(ss_->Add("bar"sv)); + EXPECT_TRUE(ss_->Contains("foo"sv)); + EXPECT_TRUE(ss_->Contains("bar"sv)); + EXPECT_EQ(2, ss_->UpperBoundSize()); +} + +TEST_F(IntrusiveStringSetTest, StandardAddErase) { + EXPECT_TRUE(ss_->Add("@@@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("A@@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("AA@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("AAA@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAA@@@@@@@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAA@@@@@@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAA@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAAA")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAAD")); + EXPECT_TRUE(ss_->Add("BBBBBAAAAAAAAAAA")); + EXPECT_TRUE(ss_->Add("BBBBBBBBAAAAAAAA")); + EXPECT_TRUE(ss_->Add("CCCCCBBBBBBBBBBB")); + + // Remove link in the middle of chain + EXPECT_TRUE(ss_->Erase("BBBBBBBBAAAAAAAA")); + // Remove start of a chain + EXPECT_TRUE(ss_->Erase("CCCCCBBBBBBBBBBB")); + // Remove end of link + EXPECT_TRUE(ss_->Erase("AAA@@@@@@@@@@@@@")); + // Remove only item in chain + EXPECT_TRUE(ss_->Erase("AA@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Erase("AAAAAAAAA@@@@@@@")); + EXPECT_TRUE(ss_->Erase("AAAAAAAAAA@@@@@@")); + EXPECT_TRUE(ss_->Erase("AAAAAAAAAAAAAAA@")); +} + +// TEST_F(IntrusiveStringSetTest, DisplacedBug) { +// string_view vals[] = {"imY", "OVl", "NhH", "BCe", "YDL", "lpb", +// "nhF", "xod", "zYR", "PSa", "hce", "cTR"}; +// ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX, false); + +// ss_->Add("fIc"); +// ss_->Erase("YDL"); +// ss_->Add("fYs"); +// ss_->Erase("hce"); +// ss_->Erase("nhF"); +// ss_->Add("dye"); +// ss_->Add("xZT"); +// ss_->Add("LVK"); +// ss_->Erase("zYR"); +// ss_->Erase("fYs"); +// ss_->Add("ueB"); +// ss_->Erase("PSa"); +// ss_->Erase("OVl"); +// ss_->Add("cga"); +// ss_->Add("too"); +// ss_->Erase("ueB"); +// ss_->Add("HZe"); +// ss_->Add("oQn"); +// ss_->Erase("too"); +// ss_->Erase("HZe"); +// ss_->Erase("xZT"); +// ss_->Erase("cga"); +// ss_->Erase("cTR"); +// ss_->Erase("BCe"); +// ss_->Add("eua"); +// ss_->Erase("lpb"); +// ss_->Add("OXK"); +// ss_->Add("QmO"); +// ss_->Add("SzV"); +// ss_->Erase("QmO"); +// ss_->Add("jbe"); +// ss_->Add("BPN"); +// ss_->Add("OfH"); +// ss_->Add("Muf"); +// ss_->Add("CwP"); +// ss_->Erase("Muf"); +// ss_->Erase("xod"); +// ss_->Add("Cis"); +// ss_->Add("Xvd"); +// ss_->Erase("SzV"); +// ss_->Erase("eua"); +// ss_->Add("DGb"); +// ss_->Add("leD"); +// ss_->Add("MVX"); +// ss_->Add("HPq"); +// } + +// static string random_string(mt19937& rand, unsigned len) { +// const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; +// string ret; +// ret.reserve(len); + +// for (size_t i = 0; i < len; ++i) { +// ret += alpanum[rand() % alpanum.size()]; +// } + +// return ret; +// } + +// TEST_F(IntrusiveStringSetTest, Resizing) { +// constexpr size_t num_strs = 4096; +// unordered_set strs; +// while (strs.size() != num_strs) { +// auto str = random_string(generator_, 10); +// strs.insert(str); +// } + +// unsigned size = 0; +// for (auto it = strs.begin(); it != strs.end(); ++it) { +// const auto& str = *it; +// EXPECT_TRUE(ss_->Add(str, 1)); +// EXPECT_EQ(ss_->UpperBoundSize(), size + 1); + +// // make sure we haven't lost any items after a grow +// // which happens every power of 2 +// if ((size & (size - 1)) == 0) { +// for (auto j = strs.begin(); j != it; ++j) { +// const auto& str = *j; +// auto it = ss_->Find(str); +// ASSERT_TRUE(it != ss_->end()); +// EXPECT_TRUE(it.HasExpiry()); +// EXPECT_EQ(it.ExpiryTime(), ss_->time_now() + 1); +// } +// } +// ++size; +// } +// } + +// TEST_F(IntrusiveStringSetTest, SimpleScan) { +// unordered_set info = {"foo", "bar"}; +// unordered_set seen; + +// for (auto str : info) { +// EXPECT_TRUE(ss_->Add(str)); +// } + +// uint32_t cursor = 0; +// do { +// cursor = ss_->Scan(cursor, [&](const sds ptr) { +// sds s = (sds)ptr; +// string_view str{s, sdslen(s)}; +// EXPECT_TRUE(info.count(str)); +// seen.insert(str); +// }); +// } while (cursor != 0); + +// EXPECT_TRUE(seen.size() == info.size() && equal(seen.begin(), seen.end(), info.begin())); +// } + +// // Ensure REDIS scan guarantees are met +// TEST_F(IntrusiveStringSetTest, ScanGuarantees) { +// unordered_set to_be_seen = {"foo", "bar"}; +// unordered_set not_be_seen = {"AAA", "BBB"}; +// unordered_set maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@", +// "AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"}; +// unordered_set seen; + +// auto scan_callback = [&](const sds ptr) { +// sds s = (sds)ptr; +// string_view str{s, sdslen(s)}; +// EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str)); +// EXPECT_FALSE(not_be_seen.count(str)); +// if (to_be_seen.count(str)) { +// seen.insert(str); +// } +// }; + +// EXPECT_EQ(ss_->Scan(0, scan_callback), 0); + +// for (auto str : not_be_seen) { +// EXPECT_TRUE(ss_->Add(str)); +// } + +// for (auto str : not_be_seen) { +// EXPECT_TRUE(ss_->Erase(str)); +// } + +// for (auto str : to_be_seen) { +// EXPECT_TRUE(ss_->Add(str)); +// } + +// // should reach at least the first item in the set +// uint32_t cursor = ss_->Scan(0, scan_callback); + +// for (auto str : maybe_seen) { +// EXPECT_TRUE(ss_->Add(str)); +// } + +// while (cursor != 0) { +// cursor = ss_->Scan(cursor, scan_callback); +// } + +// EXPECT_TRUE(seen.size() == to_be_seen.size()); +// } + +// TEST_F(IntrusiveStringSetTest, IntOnly) { +// constexpr size_t num_ints = 8192; +// unordered_set numbers; +// for (size_t i = 0; i < num_ints; ++i) { +// numbers.insert(i); +// EXPECT_TRUE(ss_->Add(to_string(i))); +// } + +// for (size_t i = 0; i < num_ints; ++i) { +// ASSERT_FALSE(ss_->Add(to_string(i))); +// } + +// size_t num_remove = generator_() % 4096; +// unordered_set removed; + +// for (size_t i = 0; i < num_remove; ++i) { +// auto remove_int = generator_() % num_ints; +// auto remove = to_string(remove_int); +// if (numbers.count(remove_int)) { +// ASSERT_TRUE(ss_->Contains(remove)) << remove_int; +// EXPECT_TRUE(ss_->Erase(remove)); +// numbers.erase(remove_int); +// } else { +// EXPECT_FALSE(ss_->Erase(remove)); +// } + +// EXPECT_FALSE(ss_->Contains(remove)); +// removed.insert(remove); +// } + +// size_t expected_seen = 0; +// auto scan_callback = [&](const sds ptr) { +// string str{ptr, sdslen(ptr)}; +// EXPECT_FALSE(removed.count(str)); + +// if (numbers.count(atoi(str.data()))) { +// ++expected_seen; +// } +// }; + +// uint32_t cursor = 0; +// do { +// cursor = ss_->Scan(cursor, scan_callback); +// // randomly throw in some new numbers +// uint32_t val = generator_(); +// VLOG(1) << "Val " << val; +// ss_->Add(to_string(val)); +// } while (cursor != 0); + +// EXPECT_GE(expected_seen + removed.size(), num_ints); +// } + +// TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { +// unordered_set to_see, force_grow, seen; + +// while (to_see.size() != 8) { +// to_see.insert(random_string(generator_, 10)); +// } + +// while (force_grow.size() != 8192) { +// string str = random_string(generator_, 10); + +// if (to_see.count(str)) { +// continue; +// } + +// force_grow.insert(random_string(generator_, 10)); +// } + +// for (auto& str : to_see) { +// EXPECT_TRUE(ss_->Add(str)); +// } + +// auto scan_callback = [&](const sds ptr) { +// sds s = (sds)ptr; +// string_view str{s, sdslen(s)}; +// if (to_see.count(string(str))) { +// seen.insert(string(str)); +// } +// }; + +// uint32_t cursor = ss_->Scan(0, scan_callback); + +// // force approx 10 grows +// for (auto& s : force_grow) { +// EXPECT_TRUE(ss_->Add(s)); +// } + +// while (cursor != 0) { +// cursor = ss_->Scan(cursor, scan_callback); +// } + +// EXPECT_EQ(seen.size(), to_see.size()); +// } + +// TEST_F(IntrusiveStringSetTest, Pop) { +// constexpr size_t num_items = 8; +// unordered_set to_insert; + +// while (to_insert.size() != num_items) { +// auto str = random_string(generator_, 10); +// if (to_insert.count(str)) { +// continue; +// } + +// to_insert.insert(str); +// EXPECT_TRUE(ss_->Add(str)); +// } + +// while (!ss_->Empty()) { +// size_t size = ss_->UpperBoundSize(); +// auto str = ss_->Pop(); +// DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1); +// DCHECK(str.has_value()); +// DCHECK(to_insert.count(str.value())); +// DCHECK_EQ(ss_->UpperBoundSize(), size - 1); +// to_insert.erase(str.value()); +// } + +// DCHECK(ss_->Empty()); +// DCHECK(to_insert.empty()); +// } + +// TEST_F(IntrusiveStringSetTest, Iteration) { +// ss_->Add("foo"); +// for (const sds ptr : *ss_) { +// LOG(INFO) << ptr; +// } +// ss_->Clear(); +// constexpr size_t num_items = 8192; +// unordered_set to_insert; + +// while (to_insert.size() != num_items) { +// auto str = random_string(generator_, 10); +// if (to_insert.count(str)) { +// continue; +// } + +// to_insert.insert(str); +// EXPECT_TRUE(ss_->Add(str)); +// } + +// for (const sds ptr : *ss_) { +// string str{ptr, sdslen(ptr)}; +// EXPECT_TRUE(to_insert.count(str)); +// to_insert.erase(str); +// } + +// EXPECT_EQ(to_insert.size(), 0); +// } + +// TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) { +// EXPECT_TRUE(ss_->Add("k1", 100)); +// auto k = ss_->Find("k1"); +// EXPECT_TRUE(k.HasExpiry()); +// EXPECT_EQ(k.ExpiryTime(), 100); +// k.SetExpiryTime(1); +// EXPECT_TRUE(k.HasExpiry()); +// EXPECT_EQ(k.ExpiryTime(), 1); +// } + +// TEST_F(IntrusiveStringSetTest, SetFieldExpireNoHasExpiry) { +// EXPECT_TRUE(ss_->Add("k1")); +// auto k = ss_->Find("k1"); +// EXPECT_FALSE(k.HasExpiry()); +// k.SetExpiryTime(10); +// EXPECT_TRUE(k.HasExpiry()); +// EXPECT_EQ(k.ExpiryTime(), 10); +// } + +// TEST_F(IntrusiveStringSetTest, Ttl) { +// EXPECT_TRUE(ss_->Add("bla"sv, 1)); +// EXPECT_FALSE(ss_->Add("bla"sv, 1)); +// auto it = ss_->Find("bla"sv); +// EXPECT_EQ(1u, it.ExpiryTime()); + +// ss_->set_time(1); +// EXPECT_TRUE(ss_->Add("bla"sv, 1)); +// EXPECT_EQ(1u, ss_->UpperBoundSize()); + +// for (unsigned i = 0; i < 100; ++i) { +// EXPECT_TRUE(ss_->Add(StrCat("foo", i), 1)); +// } +// EXPECT_EQ(101u, ss_->UpperBoundSize()); +// it = ss_->Find("foo50"); +// EXPECT_STREQ("foo50", *it); +// EXPECT_EQ(2u, it.ExpiryTime()); + +// ss_->set_time(2); +// for (unsigned i = 0; i < 100; ++i) { +// EXPECT_TRUE(ss_->Add(StrCat("bar", i))); +// } +// it = ss_->Find("bar50"); +// EXPECT_FALSE(it.HasExpiry()); + +// for (auto it = ss_->begin(); it != ss_->end(); ++it) { +// ASSERT_TRUE(absl::StartsWith(*it, "bar")) << *it; +// string str = *it; +// VLOG(1) << *it; +// } +// } + +// TEST_F(IntrusiveStringSetTest, Grow) { +// for (size_t j = 0; j < 10; ++j) { +// for (size_t i = 0; i < 4098; ++i) { +// ss_->Reserve(generator_() % 256); +// auto str = random_string(generator_, 3); +// ss_->Add(str); +// } +// ss_->Clear(); +// } +// } + +// TEST_F(IntrusiveStringSetTest, Reserve) { +// vector strs; + +// for (size_t i = 0; i < 10; ++i) { +// strs.push_back(random_string(generator_, 10)); +// ss_->Add(strs.back()); +// } + +// for (size_t j = 2; j < 20; j += 3) { +// ss_->Reserve(j * 20); +// for (size_t i = 0; i < 10; ++i) { +// ASSERT_TRUE(ss_->Contains(strs[i])); +// } +// } +// } + +// TEST_F(IntrusiveStringSetTest, Fill) { +// for (size_t i = 0; i < 100; ++i) { +// ss_->Add(random_string(generator_, 10)); +// } +// StringSet s2; +// ss_->Fill(&s2); +// EXPECT_EQ(s2.UpperBoundSize(), ss_->UpperBoundSize()); +// for (sds str : *ss_) { +// EXPECT_TRUE(s2.Contains(str)); +// } +// } + +// TEST_F(IntrusiveStringSetTest, IterateEmpty) { +// for (const auto& s : *ss_) { +// // We're iterating to make sure there is no crash. However, if we got here, it's a bug +// CHECK(false) << "Found entry " << s << " in empty set"; +// } +// } + +// size_t memUsed(StringSet& obj) { +// return obj.ObjMallocUsed() + obj.SetMallocUsed(); +// } + +// void BM_Clone(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// StringSet ss1, ss2; +// unsigned elems = state.range(0); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, 10); +// ss1.Add(str); +// } +// ss2.Reserve(ss1.UpperBoundSize()); +// while (state.KeepRunning()) { +// for (auto src : ss1) { +// ss2.Add(src); +// } +// state.PauseTiming(); +// ss2.Clear(); +// ss2.Reserve(ss1.UpperBoundSize()); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Clone)->ArgName("elements")->Arg(32000); + +// void BM_Fill(benchmark::State& state) { +// unsigned elems = state.range(0); +// vector strs; +// mt19937 generator(0); +// StringSet ss1, ss2; +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, 10); +// ss1.Add(str); +// } + +// while (state.KeepRunning()) { +// ss1.Fill(&ss2); +// state.PauseTiming(); +// ss2.Clear(); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Fill)->ArgName("elements")->Arg(32000); + +// void BM_Clear(benchmark::State& state) { +// unsigned elems = state.range(0); +// mt19937 generator(0); +// StringSet ss; +// while (state.KeepRunning()) { +// state.PauseTiming(); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, 16); +// ss.Add(str); +// } +// state.ResumeTiming(); +// ss.Clear(); +// } +// } +// BENCHMARK(BM_Clear)->ArgName("elements")->Arg(32000); + +// void BM_Add(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// StringSet ss; +// unsigned elems = state.range(0); +// unsigned keySize = state.range(1); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, keySize); +// strs.push_back(str); +// } +// ss.Reserve(elems); +// while (state.KeepRunning()) { +// for (auto& str : strs) +// ss.Add(str); +// state.PauseTiming(); +// state.counters["Memory_Used"] = memUsed(ss); +// ss.Clear(); +// ss.Reserve(elems); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Add) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_AddMany(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// StringSet ss; +// unsigned elems = state.range(0); +// unsigned keySize = state.range(1); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, keySize); +// strs.push_back(str); +// } +// ss.Reserve(elems); +// vector svs; +// for (const auto& str : strs) { +// svs.push_back(str); +// } +// while (state.KeepRunning()) { +// ss.AddMany(absl::MakeSpan(svs), UINT32_MAX, false); +// state.PauseTiming(); +// CHECK_EQ(ss.UpperBoundSize(), elems); +// state.counters["Memory_Used"] = memUsed(ss); +// ss.Clear(); +// ss.Reserve(elems); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_AddMany) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_Erase(benchmark::State& state) { +// std::vector strs; +// mt19937 generator(0); +// StringSet ss; +// auto elems = state.range(0); +// auto keySize = state.range(1); +// for (long int i = 0; i < elems; ++i) { +// std::string str = random_string(generator, keySize); +// strs.push_back(str); +// ss.Add(str); +// } +// state.counters["Memory_Before_Erase"] = memUsed(ss); +// while (state.KeepRunning()) { +// for (auto& str : strs) { +// ss.Erase(str); +// } +// state.PauseTiming(); +// state.counters["Memory_After_Erase"] = memUsed(ss); +// for (auto& str : strs) { +// ss.Add(str); +// } +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Erase) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_Get(benchmark::State& state) { +// std::vector strs; +// mt19937 generator(0); +// StringSet ss; +// auto elems = state.range(0); +// auto keySize = state.range(1); +// for (long int i = 0; i < elems; ++i) { +// std::string str = random_string(generator, keySize); +// strs.push_back(str); +// ss.Add(str); +// } +// while (state.KeepRunning()) { +// for (auto& str : strs) { +// ss.Find(str); +// } +// } +// } +// BENCHMARK(BM_Get) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_Grow(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// StringSet src; +// unsigned elems = 1 << 18; +// for (size_t i = 0; i < elems; ++i) { +// src.Add(random_string(generator, 16), UINT32_MAX); +// strs.push_back(random_string(generator, 16)); +// } + +// while (state.KeepRunning()) { +// state.PauseTiming(); +// StringSet tmp; +// src.Fill(&tmp); +// CHECK_EQ(tmp.BucketCount(), elems); +// state.ResumeTiming(); +// for (const auto& str : strs) { +// tmp.Add(str); +// if (tmp.BucketCount() > elems) { +// break; // we grew +// } +// } + +// CHECK_GT(tmp.BucketCount(), elems); +// } +// } +// BENCHMARK(BM_Grow); + +// unsigned total_wasted_memory = 0; + +// TEST_F(IntrusiveStringSetTest, ReallocIfNeeded) { +// auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); }; + +// auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block, +// size_t block_size, void* arg) { +// size_t used = block_size * area->used; +// total_wasted_memory += area->committed - used; +// return true; +// }; + +// for (size_t i = 0; i < 10'000; i++) +// ss_->Add(build_str(i)); + +// for (size_t i = 0; i < 10'000; i++) { +// if (i % 10 == 0) +// continue; +// ss_->Erase(build_str(i)); +// } + +// mi_heap_collect(mi_heap_get_backing(), true); +// mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr); +// size_t wasted_before = total_wasted_memory; + +// size_t underutilized = 0; +// for (auto it = ss_->begin(); it != ss_->end(); ++it) { +// underutilized += zmalloc_page_is_underutilized(*it, 0.9); +// it.ReallocIfNeeded(0.9); +// } +// // Check there are underutilized pages +// CHECK_GT(underutilized, 0u); + +// total_wasted_memory = 0; +// mi_heap_collect(mi_heap_get_backing(), true); +// mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr); +// size_t wasted_after = total_wasted_memory; + +// // Check we waste significanlty less now +// EXPECT_GT(wasted_before, wasted_after * 2); + +// EXPECT_EQ(ss_->UpperBoundSize(), 1000); +// for (size_t i = 0; i < 1000; i++) +// EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10)); +// } + } // namespace dfly From a713ff9e3d55580251472cf84451709d3e6170c5 Mon Sep 17 00:00:00 2001 From: Borys Date: Wed, 16 Apr 2025 12:52:51 +0300 Subject: [PATCH 05/12] feat: add AddMany() --- src/core/intrusive_string_set.h | 47 ++++++++++-- src/core/intrusive_string_set_test.cc | 100 +++++++++++++------------- 2 files changed, 91 insertions(+), 56 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index bc60322ba..085f68785 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -4,6 +4,9 @@ #pragma once +#include +#include + #include #include #include @@ -159,13 +162,10 @@ class IntrusiveStringSet { // TODO add TTL processing ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { - if (size_ >= entries_.size()) { - auto prev_size = entries_.size(); - ++capacity_log_; - entries_.resize(Capacity()); - Rehash(prev_size); + if (entries_.empty() || size_ >= entries_.size()) { + Resize(Capacity() * 2); } - auto bucket_id = BucketId(Hash(str)); + const auto bucket_id = BucketId(Hash(str)); auto& bucket = entries_[bucket_id]; if (auto existed_item = bucket.Find(str); existed_item) { @@ -173,16 +173,51 @@ class IntrusiveStringSet { return ISLEntry(); } + return AddUnique(str, bucket, ttl_sec); + } + + void Resize(size_t sz) { + sz = absl::bit_ceil(sz); + if (sz > entries_.size()) { + size_t prev_size = entries_.size(); + capacity_log_ = absl::bit_width(sz) - 1; + entries_.resize(sz); + Rehash(prev_size); + } + } + + ISLEntry AddUnique(std::string_view str, IntrusiveStringList& bucket, + uint32_t ttl_sec = UINT32_MAX) { ++size_; return bucket.Emplace(str); } + unsigned AddMany(absl::Span span, uint32_t ttl_sec, bool keepttl) { + Resize(span.size()); + unsigned res = 0; + for (auto& s : span) { + const auto bucket_id = BucketId(Hash(s)); + auto& bucket = entries_[bucket_id]; + if (auto existed_item = bucket.Find(s); existed_item) { + // TODO update TTL + } else { + ++res; + AddUnique(s, bucket, ttl_sec); + } + } + return res; + } + bool Erase(std::string_view str) { + if (entries_.empty()) + return false; auto bucket_id = BucketId(Hash(str)); return entries_[bucket_id].Erase(str); } ISLEntry Find(std::string_view member) const { + if (entries_.empty()) + return {}; auto bucket_id = BucketId(Hash(member)); return entries_[bucket_id].Find(member); } diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index 86d1cbf37..db3029463 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -155,57 +155,57 @@ TEST_F(IntrusiveStringSetTest, StandardAddErase) { EXPECT_TRUE(ss_->Erase("AAAAAAAAAAAAAAA@")); } -// TEST_F(IntrusiveStringSetTest, DisplacedBug) { -// string_view vals[] = {"imY", "OVl", "NhH", "BCe", "YDL", "lpb", -// "nhF", "xod", "zYR", "PSa", "hce", "cTR"}; -// ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX, false); +TEST_F(IntrusiveStringSetTest, DisplacedBug) { + string_view vals[] = {"imY", "OVl", "NhH", "BCe", "YDL", "lpb", + "nhF", "xod", "zYR", "PSa", "hce", "cTR"}; + ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX, false); -// ss_->Add("fIc"); -// ss_->Erase("YDL"); -// ss_->Add("fYs"); -// ss_->Erase("hce"); -// ss_->Erase("nhF"); -// ss_->Add("dye"); -// ss_->Add("xZT"); -// ss_->Add("LVK"); -// ss_->Erase("zYR"); -// ss_->Erase("fYs"); -// ss_->Add("ueB"); -// ss_->Erase("PSa"); -// ss_->Erase("OVl"); -// ss_->Add("cga"); -// ss_->Add("too"); -// ss_->Erase("ueB"); -// ss_->Add("HZe"); -// ss_->Add("oQn"); -// ss_->Erase("too"); -// ss_->Erase("HZe"); -// ss_->Erase("xZT"); -// ss_->Erase("cga"); -// ss_->Erase("cTR"); -// ss_->Erase("BCe"); -// ss_->Add("eua"); -// ss_->Erase("lpb"); -// ss_->Add("OXK"); -// ss_->Add("QmO"); -// ss_->Add("SzV"); -// ss_->Erase("QmO"); -// ss_->Add("jbe"); -// ss_->Add("BPN"); -// ss_->Add("OfH"); -// ss_->Add("Muf"); -// ss_->Add("CwP"); -// ss_->Erase("Muf"); -// ss_->Erase("xod"); -// ss_->Add("Cis"); -// ss_->Add("Xvd"); -// ss_->Erase("SzV"); -// ss_->Erase("eua"); -// ss_->Add("DGb"); -// ss_->Add("leD"); -// ss_->Add("MVX"); -// ss_->Add("HPq"); -// } + ss_->Add("fIc"); + ss_->Erase("YDL"); + ss_->Add("fYs"); + ss_->Erase("hce"); + ss_->Erase("nhF"); + ss_->Add("dye"); + ss_->Add("xZT"); + ss_->Add("LVK"); + ss_->Erase("zYR"); + ss_->Erase("fYs"); + ss_->Add("ueB"); + ss_->Erase("PSa"); + ss_->Erase("OVl"); + ss_->Add("cga"); + ss_->Add("too"); + ss_->Erase("ueB"); + ss_->Add("HZe"); + ss_->Add("oQn"); + ss_->Erase("too"); + ss_->Erase("HZe"); + ss_->Erase("xZT"); + ss_->Erase("cga"); + ss_->Erase("cTR"); + ss_->Erase("BCe"); + ss_->Add("eua"); + ss_->Erase("lpb"); + ss_->Add("OXK"); + ss_->Add("QmO"); + ss_->Add("SzV"); + ss_->Erase("QmO"); + ss_->Add("jbe"); + ss_->Add("BPN"); + ss_->Add("OfH"); + ss_->Add("Muf"); + ss_->Add("CwP"); + ss_->Erase("Muf"); + ss_->Erase("xod"); + ss_->Add("Cis"); + ss_->Add("Xvd"); + ss_->Erase("SzV"); + ss_->Erase("eua"); + ss_->Add("DGb"); + ss_->Add("leD"); + ss_->Add("MVX"); + ss_->Add("HPq"); +} // static string random_string(mt19937& rand, unsigned len) { // const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; From 9ae71e509a655c84bbf3b2df0a381ab4f6064d33 Mon Sep 17 00:00:00 2001 From: Borys Date: Wed, 16 Apr 2025 15:59:07 +0300 Subject: [PATCH 06/12] feat: add TTL --- src/core/intrusive_string_set.h | 93 +++++++++++++++++++++++---- src/core/intrusive_string_set_test.cc | 70 ++++++++++---------- 2 files changed, 115 insertions(+), 48 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index 085f68785..0e6832c2e 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -21,6 +21,13 @@ namespace dfly { class ISLEntry { friend class IntrusiveStringList; + // we can assume that high 12 bits of user address space + // can be used for tagging. At most 52 bits of address are reserved for + // some configurations, and usually it's 48 bits. + // https://docs.kernel.org/arch/arm64/memory.html + static constexpr size_t kTtlBit = 1ULL << 55; + static constexpr size_t kTagMask = 4095ULL << 52; // we reserve 12 high bits. + public: ISLEntry() = default; @@ -36,50 +43,95 @@ class ISLEntry { return {GetKeyData(), GetKeySize()}; } + bool HasExpiry() const { + return HasTtl(); + } + + // returns the expiry time of the current entry or UINT32_MAX if no ttl is set. + uint32_t ExpiryTime() const { + std::uint32_t res = UINT32_MAX; + if (HasTtl()) { + std::memcpy(&res, Raw() + sizeof(ISLEntry*), sizeof(res)); + } + return res; + } + private: - static ISLEntry Create(std::string_view key) { + static ISLEntry Create(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { char* next = nullptr; uint32_t key_size = key.size(); - auto size = sizeof(next) + sizeof(key_size) + key_size; + bool has_ttl = ttl_sec != UINT32_MAX; + + auto size = sizeof(next) + sizeof(key_size) + key_size + has_ttl * sizeof(ttl_sec); char* data = (char*)malloc(size); + ISLEntry res(data); std::memcpy(data, &next, sizeof(next)); - auto* key_size_pos = data + sizeof(next); + auto* ttl_pos = data + sizeof(next); + if (has_ttl) { + res.SetTtlBit(true); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + } + + auto* key_size_pos = ttl_pos + res.GetTtlSize(); std::memcpy(key_size_pos, &key_size, sizeof(key_size)); auto* key_pos = key_size_pos + sizeof(key_size); std::memcpy(key_pos, key.data(), key_size); - return ISLEntry(data); + return res; } static void Destroy(ISLEntry entry) { - free(entry.data_); + free(entry.Raw()); } ISLEntry Next() const { ISLEntry next; - std::memcpy(&next.data_, data_, sizeof(next)); + std::memcpy(&next.data_, Raw(), sizeof(next)); return next; } void SetNext(ISLEntry next) { - std::memcpy(data_, &next, sizeof(next)); + std::memcpy(Raw(), &next, sizeof(next)); } const char* GetKeyData() const { - return data_ + sizeof(ISLEntry*) + sizeof(uint32_t); + return Raw() + sizeof(ISLEntry*) + sizeof(uint32_t) + GetTtlSize(); } uint32_t GetKeySize() const { uint32_t size = 0; - std::memcpy(&size, data_ + sizeof(ISLEntry*), sizeof(size)); + std::memcpy(&size, Raw() + sizeof(ISLEntry*) + GetTtlSize(), sizeof(size)); return size; } + uint64_t uptr() const { + return uint64_t(data_); + } + + char* Raw() const { + return (char*)(uptr() & ~kTagMask); + } + + void SetTtlBit(bool b) { + if (b) + data_ = (char*)(uptr() | kTtlBit); + else + data_ = (char*)(uptr() & (~kTtlBit)); + } + + bool HasTtl() const { + return (uptr() & kTtlBit) != 0; + } + + std::uint32_t GetTtlSize() const { + return HasTtl() ? sizeof(std::uint32_t) : 0; + } + // TODO consider use SDS strings or other approach // TODO add optimization for big keys // memory daya layout [ISLEntry*, key_size, key] @@ -117,8 +169,8 @@ class IntrusiveStringList { return res; } - ISLEntry Emplace(std::string_view key) { - return Insert(ISLEntry::Create(key)); + ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { + return Insert(ISLEntry::Create(key, ttl_sec)); } ISLEntry Find(std::string_view str) const { @@ -189,7 +241,7 @@ class IntrusiveStringSet { ISLEntry AddUnique(std::string_view str, IntrusiveStringList& bucket, uint32_t ttl_sec = UINT32_MAX) { ++size_; - return bucket.Emplace(str); + return bucket.Emplace(str, ttl_sec); } unsigned AddMany(absl::Span span, uint32_t ttl_sec, bool keepttl) { @@ -219,7 +271,12 @@ class IntrusiveStringSet { if (entries_.empty()) return {}; auto bucket_id = BucketId(Hash(member)); - return entries_[bucket_id].Find(member); + auto res = entries_[bucket_id].Find(member); + if (!res) { + bucket_id = BucketId(Hash(member)); + res = entries_[bucket_id].Find(member); + } + return res; } bool Contains(std::string_view member) const { @@ -240,6 +297,15 @@ class IntrusiveStringSet { return 1 << capacity_log_; } + // set an abstract time that allows expiry. + void set_time(uint32_t val) { + time_now_ = val; + } + + uint32_t time_now() const { + return time_now_; + } + private: // was Grow in StringSet void Rehash(size_t prev_size) { @@ -265,6 +331,7 @@ class IntrusiveStringSet { private: std::uint32_t capacity_log_ = 1; std::uint32_t size_ = 0; // number of elements in the set. + std::uint32_t time_now_ = 0; static_assert(sizeof(IntrusiveStringList) == sizeof(void*), "IntrusiveStringList should be just a pointer"); diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index db3029463..054793fdd 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -207,46 +207,46 @@ TEST_F(IntrusiveStringSetTest, DisplacedBug) { ss_->Add("HPq"); } -// static string random_string(mt19937& rand, unsigned len) { -// const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; -// string ret; -// ret.reserve(len); +static string random_string(mt19937& rand, unsigned len) { + const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; + string ret; + ret.reserve(len); -// for (size_t i = 0; i < len; ++i) { -// ret += alpanum[rand() % alpanum.size()]; -// } + for (size_t i = 0; i < len; ++i) { + ret += alpanum[rand() % alpanum.size()]; + } -// return ret; -// } + return ret; +} -// TEST_F(IntrusiveStringSetTest, Resizing) { -// constexpr size_t num_strs = 4096; -// unordered_set strs; -// while (strs.size() != num_strs) { -// auto str = random_string(generator_, 10); -// strs.insert(str); -// } +TEST_F(IntrusiveStringSetTest, Resizing) { + constexpr size_t num_strs = 4096; + unordered_set strs; + while (strs.size() != num_strs) { + auto str = random_string(generator_, 10); + strs.insert(str); + } -// unsigned size = 0; -// for (auto it = strs.begin(); it != strs.end(); ++it) { -// const auto& str = *it; -// EXPECT_TRUE(ss_->Add(str, 1)); -// EXPECT_EQ(ss_->UpperBoundSize(), size + 1); + unsigned size = 0; + for (auto it = strs.begin(); it != strs.end(); ++it) { + const auto& str = *it; + EXPECT_TRUE(ss_->Add(str, 1)); + EXPECT_EQ(ss_->UpperBoundSize(), size + 1); -// // make sure we haven't lost any items after a grow -// // which happens every power of 2 -// if ((size & (size - 1)) == 0) { -// for (auto j = strs.begin(); j != it; ++j) { -// const auto& str = *j; -// auto it = ss_->Find(str); -// ASSERT_TRUE(it != ss_->end()); -// EXPECT_TRUE(it.HasExpiry()); -// EXPECT_EQ(it.ExpiryTime(), ss_->time_now() + 1); -// } -// } -// ++size; -// } -// } + // make sure we haven't lost any items after a grow + // which happens every power of 2 + if ((size & (size - 1)) == 0) { + for (auto j = strs.begin(); j != it; ++j) { + const auto& str = *j; + auto it = ss_->Find(str); + ASSERT_TRUE(it); + EXPECT_TRUE(it.HasExpiry()); + EXPECT_EQ(it.ExpiryTime(), ss_->time_now() + 1); + } + } + ++size; + } +} // TEST_F(IntrusiveStringSetTest, SimpleScan) { // unordered_set info = {"foo", "bar"}; From 9da231da6a76b96ebe61ff41d8a759038fe345bf Mon Sep 17 00:00:00 2001 From: Borys Date: Thu, 17 Apr 2025 14:33:13 +0300 Subject: [PATCH 07/12] feat: add Scan --- src/core/intrusive_string_set.h | 74 +++++++- src/core/intrusive_string_set_test.cc | 251 +++++++++++++------------- 2 files changed, 191 insertions(+), 134 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index 0e6832c2e..ccd1ddd62 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -169,6 +169,10 @@ class IntrusiveStringList { return res; } + bool Empty() { + return start_; + } + ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { return Insert(ISLEntry::Create(key, ttl_sec)); } @@ -181,19 +185,24 @@ class IntrusiveStringList { } bool Erase(std::string_view str) { + auto cond = [str](const ISLEntry e) { return str == e.Key(); }; + return Erase(cond); + } + + template >* = nullptr> + bool Erase(const T& cond) { if (!start_) { return false; } - auto it = start_; - if (it.Key() == str) { + + if (auto it = start_; cond(it)) { start_ = it.Next(); ISLEntry::Destroy(it); return true; } - auto prev = it; - for (it = it.Next(); it; prev = it, it = it.Next()) { - if (it.Key() == str) { + for (auto prev = start_, it = start_.Next(); it; prev = it, it = it.Next()) { + if (cond(it)) { prev.SetNext(it.Next()); ISLEntry::Destroy(it); return true; @@ -202,6 +211,25 @@ class IntrusiveStringList { return false; } + template >* = nullptr> + bool Scan(const T& cb, uint32_t curr_time) { + for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { + start_ = it.Next(); + ISLEntry::Destroy(it); + } + + for (auto curr = start_, next = start_; curr; curr = next) { + cb(curr.Key()); + next = curr.Next(); + for (auto tmp = next; tmp && tmp.ExpiryTime() < curr_time; tmp = next) { + next = next.Next(); + ISLEntry::Destroy(tmp); + } + curr.SetNext(next); + } + return start_; + } + private: ISLEntry start_; }; @@ -260,6 +288,40 @@ class IntrusiveStringSet { return res; } + /** + * stable scanning api. has the same guarantees as redis scan command. + * we avoid doing bit-reverse by using a different function to derive a bucket id + * from hash values. By using msb part of hash we make it "stable" with respect to + * rehashes. For example, with table log size 4 (size 16), entries in bucket id + * 1110 come from hashes 1110XXXXX.... When a table grows to log size 5, + * these entries can move either to 11100 or 11101. So if we traversed with our cursor + * range [0000-1110], it's guaranteed that in grown table we do not need to cover again + * [00000-11100]. Similarly with shrinkage, if a table is shrunk to log size 3, + * keys from 1110 and 1111 will move to bucket 111. Again, it's guaranteed that we + * covered the range [000-111] (all keys in that case). + * Returns: next cursor or 0 if reached the end of scan. + * cursor = 0 - initiates a new scan. + */ + + using ItemCb = std::function; + + uint32_t Scan(uint32_t cursor, const ItemCb& cb) { + uint32_t entries_idx = cursor >> (32 - capacity_log_); + + // First find the bucket to scan, skip empty buckets. + for (; entries_idx < entries_.size(); ++entries_idx) { + if (entries_[entries_idx].Scan(cb, time_now_)) { + break; + } + } + + if (++entries_idx >= entries_.size()) { + return 0; + } + + return entries_idx << (32 - capacity_log_); + } + bool Erase(std::string_view str) { if (entries_.empty()) return false; @@ -329,7 +391,7 @@ class IntrusiveStringSet { } private: - std::uint32_t capacity_log_ = 1; + std::uint32_t capacity_log_ = 0; std::uint32_t size_ = 0; // number of elements in the set. std::uint32_t time_now_ = 0; diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index 054793fdd..65cfdfd03 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -248,167 +248,162 @@ TEST_F(IntrusiveStringSetTest, Resizing) { } } -// TEST_F(IntrusiveStringSetTest, SimpleScan) { -// unordered_set info = {"foo", "bar"}; -// unordered_set seen; +TEST_F(IntrusiveStringSetTest, SimpleScan) { + unordered_set info = {"foo", "bar"}; + unordered_set seen; -// for (auto str : info) { -// EXPECT_TRUE(ss_->Add(str)); -// } + for (auto str : info) { + EXPECT_TRUE(ss_->Add(str)); + } -// uint32_t cursor = 0; -// do { -// cursor = ss_->Scan(cursor, [&](const sds ptr) { -// sds s = (sds)ptr; -// string_view str{s, sdslen(s)}; -// EXPECT_TRUE(info.count(str)); -// seen.insert(str); -// }); -// } while (cursor != 0); + uint32_t cursor = 0; + do { + cursor = ss_->Scan(cursor, [&](std::string_view str) { + EXPECT_TRUE(info.count(str)); + seen.insert(str); + }); + } while (cursor != 0); -// EXPECT_TRUE(seen.size() == info.size() && equal(seen.begin(), seen.end(), info.begin())); -// } + EXPECT_EQ(seen.size(), info.size()); + EXPECT_TRUE(equal(seen.begin(), seen.end(), info.begin())); +} -// // Ensure REDIS scan guarantees are met -// TEST_F(IntrusiveStringSetTest, ScanGuarantees) { -// unordered_set to_be_seen = {"foo", "bar"}; -// unordered_set not_be_seen = {"AAA", "BBB"}; -// unordered_set maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@", -// "AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"}; -// unordered_set seen; +// Ensure REDIS scan guarantees are met +TEST_F(IntrusiveStringSetTest, ScanGuarantees) { + unordered_set to_be_seen = {"foo", "bar"}; + unordered_set not_be_seen = {"AAA", "BBB"}; + unordered_set maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@", + "AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"}; + unordered_set seen; -// auto scan_callback = [&](const sds ptr) { -// sds s = (sds)ptr; -// string_view str{s, sdslen(s)}; -// EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str)); -// EXPECT_FALSE(not_be_seen.count(str)); -// if (to_be_seen.count(str)) { -// seen.insert(str); -// } -// }; + auto scan_callback = [&](std::string_view str) { + EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str)); + EXPECT_FALSE(not_be_seen.count(str)); + if (to_be_seen.count(str)) { + seen.insert(str); + } + }; -// EXPECT_EQ(ss_->Scan(0, scan_callback), 0); + EXPECT_EQ(ss_->Scan(0, scan_callback), 0); -// for (auto str : not_be_seen) { -// EXPECT_TRUE(ss_->Add(str)); -// } + for (auto str : not_be_seen) { + EXPECT_TRUE(ss_->Add(str)); + } -// for (auto str : not_be_seen) { -// EXPECT_TRUE(ss_->Erase(str)); -// } + for (auto str : not_be_seen) { + EXPECT_TRUE(ss_->Erase(str)); + } -// for (auto str : to_be_seen) { -// EXPECT_TRUE(ss_->Add(str)); -// } + for (auto str : to_be_seen) { + EXPECT_TRUE(ss_->Add(str)); + } -// // should reach at least the first item in the set -// uint32_t cursor = ss_->Scan(0, scan_callback); + // should reach at least the first item in the set + uint32_t cursor = ss_->Scan(0, scan_callback); -// for (auto str : maybe_seen) { -// EXPECT_TRUE(ss_->Add(str)); -// } + for (auto str : maybe_seen) { + EXPECT_TRUE(ss_->Add(str)); + } -// while (cursor != 0) { -// cursor = ss_->Scan(cursor, scan_callback); -// } + while (cursor != 0) { + cursor = ss_->Scan(cursor, scan_callback); + } -// EXPECT_TRUE(seen.size() == to_be_seen.size()); -// } + EXPECT_TRUE(seen.size() == to_be_seen.size()); +} -// TEST_F(IntrusiveStringSetTest, IntOnly) { -// constexpr size_t num_ints = 8192; -// unordered_set numbers; -// for (size_t i = 0; i < num_ints; ++i) { -// numbers.insert(i); -// EXPECT_TRUE(ss_->Add(to_string(i))); -// } +TEST_F(IntrusiveStringSetTest, IntOnly) { + constexpr size_t num_ints = 8192; + unordered_set numbers; + for (size_t i = 0; i < num_ints; ++i) { + numbers.insert(i); + EXPECT_TRUE(ss_->Add(to_string(i))); + } -// for (size_t i = 0; i < num_ints; ++i) { -// ASSERT_FALSE(ss_->Add(to_string(i))); -// } + for (size_t i = 0; i < num_ints; ++i) { + ASSERT_FALSE(ss_->Add(to_string(i))); + } -// size_t num_remove = generator_() % 4096; -// unordered_set removed; + size_t num_remove = generator_() % 4096; + unordered_set removed; -// for (size_t i = 0; i < num_remove; ++i) { -// auto remove_int = generator_() % num_ints; -// auto remove = to_string(remove_int); -// if (numbers.count(remove_int)) { -// ASSERT_TRUE(ss_->Contains(remove)) << remove_int; -// EXPECT_TRUE(ss_->Erase(remove)); -// numbers.erase(remove_int); -// } else { -// EXPECT_FALSE(ss_->Erase(remove)); -// } + for (size_t i = 0; i < num_remove; ++i) { + auto remove_int = generator_() % num_ints; + auto remove = to_string(remove_int); + if (numbers.count(remove_int)) { + ASSERT_TRUE(ss_->Contains(remove)) << remove_int; + EXPECT_TRUE(ss_->Erase(remove)); + numbers.erase(remove_int); + } else { + EXPECT_FALSE(ss_->Erase(remove)); + } -// EXPECT_FALSE(ss_->Contains(remove)); -// removed.insert(remove); -// } + EXPECT_FALSE(ss_->Contains(remove)); + removed.insert(remove); + } -// size_t expected_seen = 0; -// auto scan_callback = [&](const sds ptr) { -// string str{ptr, sdslen(ptr)}; -// EXPECT_FALSE(removed.count(str)); + size_t expected_seen = 0; + auto scan_callback = [&](std::string_view str_v) { + std::string str(str_v); + EXPECT_FALSE(removed.count(str)); -// if (numbers.count(atoi(str.data()))) { -// ++expected_seen; -// } -// }; + if (numbers.count(std::atoi(str.data()))) { + ++expected_seen; + } + }; -// uint32_t cursor = 0; -// do { -// cursor = ss_->Scan(cursor, scan_callback); -// // randomly throw in some new numbers -// uint32_t val = generator_(); -// VLOG(1) << "Val " << val; -// ss_->Add(to_string(val)); -// } while (cursor != 0); + uint32_t cursor = 0; + do { + cursor = ss_->Scan(cursor, scan_callback); + // randomly throw in some new numbers + uint32_t val = generator_(); + ss_->Add(to_string(val)); + } while (cursor != 0); -// EXPECT_GE(expected_seen + removed.size(), num_ints); -// } + EXPECT_GE(expected_seen + removed.size(), num_ints); +} -// TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { -// unordered_set to_see, force_grow, seen; +TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { + unordered_set to_see, force_grow, seen; -// while (to_see.size() != 8) { -// to_see.insert(random_string(generator_, 10)); -// } + while (to_see.size() != 8) { + to_see.insert(random_string(generator_, 10)); + } -// while (force_grow.size() != 8192) { -// string str = random_string(generator_, 10); + while (force_grow.size() != 8192) { + string str = random_string(generator_, 10); -// if (to_see.count(str)) { -// continue; -// } + if (to_see.count(str)) { + continue; + } -// force_grow.insert(random_string(generator_, 10)); -// } + force_grow.insert(random_string(generator_, 10)); + } -// for (auto& str : to_see) { -// EXPECT_TRUE(ss_->Add(str)); -// } + for (auto& str : to_see) { + EXPECT_TRUE(ss_->Add(str)); + } -// auto scan_callback = [&](const sds ptr) { -// sds s = (sds)ptr; -// string_view str{s, sdslen(s)}; -// if (to_see.count(string(str))) { -// seen.insert(string(str)); -// } -// }; + auto scan_callback = [&](string_view strv) { + std::string str(strv); + if (to_see.count(str)) { + seen.insert(str); + } + }; -// uint32_t cursor = ss_->Scan(0, scan_callback); + uint32_t cursor = ss_->Scan(0, scan_callback); -// // force approx 10 grows -// for (auto& s : force_grow) { -// EXPECT_TRUE(ss_->Add(s)); -// } + // force approx 10 grows + for (auto& s : force_grow) { + EXPECT_TRUE(ss_->Add(s)); + } -// while (cursor != 0) { -// cursor = ss_->Scan(cursor, scan_callback); -// } + while (cursor != 0) { + cursor = ss_->Scan(cursor, scan_callback); + } -// EXPECT_EQ(seen.size(), to_see.size()); -// } + EXPECT_EQ(seen.size(), to_see.size()); +} // TEST_F(IntrusiveStringSetTest, Pop) { // constexpr size_t num_items = 8; From 6237fbdde30216e525dd0a38fb82335ea1eefaa1 Mon Sep 17 00:00:00 2001 From: Borys Date: Fri, 18 Apr 2025 16:35:08 +0300 Subject: [PATCH 08/12] feat: add Clear() and tests --- src/core/intrusive_string_set.h | 53 +++++++++--- src/core/intrusive_string_set_test.cc | 119 +++++++++++++------------- 2 files changed, 103 insertions(+), 69 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index ccd1ddd62..d8dae9fdc 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -35,6 +35,10 @@ class ISLEntry { data_ = data; } + static void Destroy(ISLEntry entry) { + free(entry.Raw()); + } + operator bool() const { return data_; } @@ -56,6 +60,17 @@ class ISLEntry { return res; } + void SetExpiryTime(uint32_t ttl_sec) { + if (HasExpiry()) { + auto* ttl_pos = Raw() + sizeof(char*); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + } else { + ISLEntry new_entry = Create(Key(), ttl_sec); + std::swap(data_, new_entry.data_); + Destroy(new_entry); + } + } + private: static ISLEntry Create(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { char* next = nullptr; @@ -85,10 +100,6 @@ class ISLEntry { return res; } - static void Destroy(ISLEntry entry) { - free(entry.Raw()); - } - ISLEntry Next() const { ISLEntry next; std::memcpy(&next.data_, Raw(), sizeof(next)); @@ -160,7 +171,12 @@ class IntrusiveStringList { return start_; } - ISLEntry Pop() { + // TODO rewrite, because it's dangerous operations, ISLEntry shouldn't returned without owner + [[nodiscard]] ISLEntry Pop(uint32_t curr_time) { + for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { + start_ = it.Next(); + ISLEntry::Destroy(it); + } auto res = start_; if (start_) { start_ = start_.Next(); @@ -173,10 +189,12 @@ class IntrusiveStringList { return start_; } + // TODO consider to wrap ISLEntry to prevent usage out of the list ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { return Insert(ISLEntry::Create(key, ttl_sec)); } + // TODO consider to wrap ISLEntry to prevent usage out of the list ISLEntry Find(std::string_view str) const { auto it = start_; for (; it && it.Key() != str; it = it.Next()) @@ -240,10 +258,9 @@ class IntrusiveStringSet { : entries_(mr) { } - // TODO add TTL processing ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { if (entries_.empty() || size_ >= entries_.size()) { - Resize(Capacity() * 2); + Reserve(Capacity() * 2); } const auto bucket_id = BucketId(Hash(str)); auto& bucket = entries_[bucket_id]; @@ -256,7 +273,7 @@ class IntrusiveStringSet { return AddUnique(str, bucket, ttl_sec); } - void Resize(size_t sz) { + void Reserve(size_t sz) { sz = absl::bit_ceil(sz); if (sz > entries_.size()) { size_t prev_size = entries_.size(); @@ -266,6 +283,11 @@ class IntrusiveStringSet { } } + void Clear() { + capacity_log_ = 0; + entries_.resize(0); + } + ISLEntry AddUnique(std::string_view str, IntrusiveStringList& bucket, uint32_t ttl_sec = UINT32_MAX) { ++size_; @@ -273,7 +295,7 @@ class IntrusiveStringSet { } unsigned AddMany(absl::Span span, uint32_t ttl_sec, bool keepttl) { - Resize(span.size()); + Reserve(span.size()); unsigned res = 0; for (auto& s : span) { const auto bucket_id = BucketId(Hash(s)); @@ -322,6 +344,17 @@ class IntrusiveStringSet { return entries_idx << (32 - capacity_log_); } + // return unowned ISLEntry, so it should be destroyed manually + [[nodiscard]] ISLEntry Pop() { + for (auto& bucket : entries_) { + if (auto res = bucket.Pop(time_now_); res) { + --size_; + return res; + } + } + return {}; + } + bool Erase(std::string_view str) { if (entries_.empty()) return false; @@ -373,7 +406,7 @@ class IntrusiveStringSet { void Rehash(size_t prev_size) { for (int64_t i = prev_size - 1; i >= 0; --i) { auto list = std::move(entries_[i]); - for (auto entry = list.Pop(); entry; entry = list.Pop()) { + for (auto entry = list.Pop(time_now_); entry; entry = list.Pop(time_now_)) { auto bucket_id = BucketId(Hash(entry.Key())); entries_[bucket_id].Insert(entry); } diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index 65cfdfd03..5b53b1611 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -12,6 +12,7 @@ #include "base/gtest.h" #include "core/mi_memory_resource.h" +#include "glog/logging.h" extern "C" { #include "redis/zmalloc.h" @@ -405,33 +406,33 @@ TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { EXPECT_EQ(seen.size(), to_see.size()); } -// TEST_F(IntrusiveStringSetTest, Pop) { -// constexpr size_t num_items = 8; -// unordered_set to_insert; +TEST_F(IntrusiveStringSetTest, Pop) { + constexpr size_t num_items = 8; + unordered_set to_insert; -// while (to_insert.size() != num_items) { -// auto str = random_string(generator_, 10); -// if (to_insert.count(str)) { -// continue; -// } + while (to_insert.size() != num_items) { + auto str = random_string(generator_, 10); + if (to_insert.count(str)) { + continue; + } -// to_insert.insert(str); -// EXPECT_TRUE(ss_->Add(str)); -// } + to_insert.insert(str); + EXPECT_TRUE(ss_->Add(str)); + } -// while (!ss_->Empty()) { -// size_t size = ss_->UpperBoundSize(); -// auto str = ss_->Pop(); -// DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1); -// DCHECK(str.has_value()); -// DCHECK(to_insert.count(str.value())); -// DCHECK_EQ(ss_->UpperBoundSize(), size - 1); -// to_insert.erase(str.value()); -// } + while (!ss_->Empty()) { + size_t size = ss_->UpperBoundSize(); + auto str = ss_->Pop(); + DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1); + DCHECK(str); + DCHECK(to_insert.count(std::string(str.Key()))); + DCHECK_EQ(ss_->UpperBoundSize(), size - 1); + to_insert.erase(std::string(str.Key())); + } -// DCHECK(ss_->Empty()); -// DCHECK(to_insert.empty()); -// } + DCHECK(ss_->Empty()); + DCHECK(to_insert.empty()); +} // TEST_F(IntrusiveStringSetTest, Iteration) { // ss_->Add("foo"); @@ -461,15 +462,15 @@ TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { // EXPECT_EQ(to_insert.size(), 0); // } -// TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) { -// EXPECT_TRUE(ss_->Add("k1", 100)); -// auto k = ss_->Find("k1"); -// EXPECT_TRUE(k.HasExpiry()); -// EXPECT_EQ(k.ExpiryTime(), 100); -// k.SetExpiryTime(1); -// EXPECT_TRUE(k.HasExpiry()); -// EXPECT_EQ(k.ExpiryTime(), 1); -// } +TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) { + EXPECT_TRUE(ss_->Add("k1", 100)); + auto k = ss_->Find("k1"); + EXPECT_TRUE(k.HasExpiry()); + EXPECT_EQ(k.ExpiryTime(), 100); + k.SetExpiryTime(1); + EXPECT_TRUE(k.HasExpiry()); + EXPECT_EQ(k.ExpiryTime(), 1); +} // TEST_F(IntrusiveStringSetTest, SetFieldExpireNoHasExpiry) { // EXPECT_TRUE(ss_->Add("k1")); @@ -558,14 +559,14 @@ TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { // } // } -// size_t memUsed(StringSet& obj) { +// size_t memUsed(IntrusiveStringSet& obj) { // return obj.ObjMallocUsed() + obj.SetMallocUsed(); // } // void BM_Clone(benchmark::State& state) { // vector strs; // mt19937 generator(0); -// StringSet ss1, ss2; +// IntrusiveStringSet ss1, ss2; // unsigned elems = state.range(0); // for (size_t i = 0; i < elems; ++i) { // string str = random_string(generator, 10); @@ -588,7 +589,7 @@ TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { // unsigned elems = state.range(0); // vector strs; // mt19937 generator(0); -// StringSet ss1, ss2; +// IntrusiveStringSet ss1, ss2; // for (size_t i = 0; i < elems; ++i) { // string str = random_string(generator, 10); // ss1.Add(str); @@ -619,30 +620,30 @@ TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { // } // BENCHMARK(BM_Clear)->ArgName("elements")->Arg(32000); -// void BM_Add(benchmark::State& state) { -// vector strs; -// mt19937 generator(0); -// StringSet ss; -// unsigned elems = state.range(0); -// unsigned keySize = state.range(1); -// for (size_t i = 0; i < elems; ++i) { -// string str = random_string(generator, keySize); -// strs.push_back(str); -// } -// ss.Reserve(elems); -// while (state.KeepRunning()) { -// for (auto& str : strs) -// ss.Add(str); -// state.PauseTiming(); -// state.counters["Memory_Used"] = memUsed(ss); -// ss.Clear(); -// ss.Reserve(elems); -// state.ResumeTiming(); -// } -// } -// BENCHMARK(BM_Add) -// ->ArgNames({"elements", "Key Size"}) -// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); +void BM_Add(benchmark::State& state) { + vector strs; + mt19937 generator(0); + IntrusiveStringSet ss; + unsigned elems = state.range(0); + unsigned keySize = state.range(1); + for (size_t i = 0; i < elems; ++i) { + string str = random_string(generator, keySize); + strs.push_back(str); + } + ss.Reserve(elems); + while (state.KeepRunning()) { + for (auto& str : strs) + ss.Add(str); + state.PauseTiming(); + // state.counters["Memory_Used"] = memUsed(ss); + ss.Clear(); + ss.Reserve(elems); + state.ResumeTiming(); + } +} +BENCHMARK(BM_Add) + ->ArgNames({"elements", "Key Size"}) + ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); // void BM_AddMany(benchmark::State& state) { // vector strs; From b1b52db27fb0e4857f3393c214a9776f9ff85030 Mon Sep 17 00:00:00 2001 From: Borys Date: Tue, 22 Apr 2025 12:24:46 +0300 Subject: [PATCH 09/12] fix: size_ during clear --- src/core/intrusive_string_set.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index d8dae9fdc..f01e9da00 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -16,6 +16,10 @@ #include "base/hash.h" #include "base/pmr/memory_resource.h" +extern "C" { +#include "redis/zmalloc.h" +} + namespace dfly { class ISLEntry { @@ -36,7 +40,7 @@ class ISLEntry { } static void Destroy(ISLEntry entry) { - free(entry.Raw()); + zfree(entry.Raw()); } operator bool() const { @@ -80,7 +84,7 @@ class ISLEntry { auto size = sizeof(next) + sizeof(key_size) + key_size + has_ttl * sizeof(ttl_sec); - char* data = (char*)malloc(size); + char* data = (char*)zmalloc(size); ISLEntry res(data); std::memcpy(data, &next, sizeof(next)); @@ -286,6 +290,7 @@ class IntrusiveStringSet { void Clear() { capacity_log_ = 0; entries_.resize(0); + size_ = 0; } ISLEntry AddUnique(std::string_view str, IntrusiveStringList& bucket, From 9600f2a93e00ff3dd261ec21ced90c8f24cf0f24 Mon Sep 17 00:00:00 2001 From: Borys Date: Thu, 24 Apr 2025 10:22:40 +0300 Subject: [PATCH 10/12] refactor: move intrusive string list in to separate file --- src/core/intrusive_string_list.h | 251 ++++++++++++++++++++++++++++ src/core/intrusive_string_set.h | 276 ++++--------------------------- 2 files changed, 284 insertions(+), 243 deletions(-) create mode 100644 src/core/intrusive_string_list.h diff --git a/src/core/intrusive_string_list.h b/src/core/intrusive_string_list.h new file mode 100644 index 000000000..48e301015 --- /dev/null +++ b/src/core/intrusive_string_list.h @@ -0,0 +1,251 @@ +// Copyright 2024, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +#include +#include +#include + +extern "C" { +#include "redis/zmalloc.h" +} + +namespace dfly { + +class ISLEntry { + friend class IntrusiveStringList; + + // we can assume that high 12 bits of user address space + // can be used for tagging. At most 52 bits of address are reserved for + // some configurations, and usually it's 48 bits. + // https://docs.kernel.org/arch/arm64/memory.html + static constexpr size_t kTtlBit = 1ULL << 55; + static constexpr size_t kTagMask = 4095ULL << 52; // we reserve 12 high bits. + + public: + ISLEntry() = default; + + ISLEntry(char* data) { + data_ = data; + } + + static void Destroy(ISLEntry entry) { + zfree(entry.Raw()); + } + + operator bool() const { + return data_; + } + + std::string_view Key() const { + return {GetKeyData(), GetKeySize()}; + } + + bool HasExpiry() const { + return HasTtl(); + } + + // returns the expiry time of the current entry or UINT32_MAX if no ttl is set. + uint32_t ExpiryTime() const { + std::uint32_t res = UINT32_MAX; + if (HasTtl()) { + std::memcpy(&res, Raw() + sizeof(ISLEntry*), sizeof(res)); + } + return res; + } + + void SetExpiryTime(uint32_t ttl_sec) { + if (HasExpiry()) { + auto* ttl_pos = Raw() + sizeof(char*); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + } else { + ISLEntry new_entry = Create(Key(), ttl_sec); + std::swap(data_, new_entry.data_); + Destroy(new_entry); + } + } + + private: + static ISLEntry Create(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { + char* next = nullptr; + uint32_t key_size = key.size(); + + bool has_ttl = ttl_sec != UINT32_MAX; + + auto size = sizeof(next) + sizeof(key_size) + key_size + has_ttl * sizeof(ttl_sec); + + char* data = (char*)zmalloc(size); + ISLEntry res(data); + + std::memcpy(data, &next, sizeof(next)); + + auto* ttl_pos = data + sizeof(next); + if (has_ttl) { + res.SetTtlBit(true); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + } + + auto* key_size_pos = ttl_pos + res.GetTtlSize(); + std::memcpy(key_size_pos, &key_size, sizeof(key_size)); + + auto* key_pos = key_size_pos + sizeof(key_size); + std::memcpy(key_pos, key.data(), key_size); + + return res; + } + + ISLEntry Next() const { + ISLEntry next; + std::memcpy(&next.data_, Raw(), sizeof(next)); + return next; + } + + void SetNext(ISLEntry next) { + std::memcpy(Raw(), &next, sizeof(next)); + } + + const char* GetKeyData() const { + return Raw() + sizeof(ISLEntry*) + sizeof(uint32_t) + GetTtlSize(); + } + + uint32_t GetKeySize() const { + uint32_t size = 0; + std::memcpy(&size, Raw() + sizeof(ISLEntry*) + GetTtlSize(), sizeof(size)); + return size; + } + + uint64_t uptr() const { + return uint64_t(data_); + } + + char* Raw() const { + return (char*)(uptr() & ~kTagMask); + } + + void SetTtlBit(bool b) { + if (b) + data_ = (char*)(uptr() | kTtlBit); + else + data_ = (char*)(uptr() & (~kTtlBit)); + } + + bool HasTtl() const { + return (uptr() & kTtlBit) != 0; + } + + std::uint32_t GetTtlSize() const { + return HasTtl() ? sizeof(std::uint32_t) : 0; + } + + // TODO consider use SDS strings or other approach + // TODO add optimization for big keys + // memory daya layout [ISLEntry*, key_size, key] + char* data_ = nullptr; +}; + +class IntrusiveStringList { + public: + ~IntrusiveStringList() { + while (start_) { + auto next = start_.Next(); + ISLEntry::Destroy(start_); + start_ = next; + } + } + + IntrusiveStringList() = default; + IntrusiveStringList(IntrusiveStringList&& r) { + start_ = r.start_; + r.start_ = {}; + } + + ISLEntry Insert(ISLEntry e) { + e.SetNext(start_); + start_ = e; + return start_; + } + + // TODO rewrite, because it's dangerous operations, ISLEntry shouldn't returned without owner + [[nodiscard]] ISLEntry Pop(uint32_t curr_time) { + for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { + start_ = it.Next(); + ISLEntry::Destroy(it); + } + auto res = start_; + if (start_) { + start_ = start_.Next(); + // TODO consider to res.SetNext(nullptr); for now it looks superfluous + } + return res; + } + + bool Empty() { + return start_; + } + + // TODO consider to wrap ISLEntry to prevent usage out of the list + ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { + return Insert(ISLEntry::Create(key, ttl_sec)); + } + + // TODO consider to wrap ISLEntry to prevent usage out of the list + ISLEntry Find(std::string_view str) const { + auto it = start_; + for (; it && it.Key() != str; it = it.Next()) + ; + return it; + } + + bool Erase(std::string_view str) { + auto cond = [str](const ISLEntry e) { return str == e.Key(); }; + return Erase(cond); + } + + template >* = nullptr> + bool Erase(const T& cond) { + if (!start_) { + return false; + } + + if (auto it = start_; cond(it)) { + start_ = it.Next(); + ISLEntry::Destroy(it); + return true; + } + + for (auto prev = start_, it = start_.Next(); it; prev = it, it = it.Next()) { + if (cond(it)) { + prev.SetNext(it.Next()); + ISLEntry::Destroy(it); + return true; + } + } + return false; + } + + template >* = nullptr> + bool Scan(const T& cb, uint32_t curr_time) { + for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { + start_ = it.Next(); + ISLEntry::Destroy(it); + } + + for (auto curr = start_, next = start_; curr; curr = next) { + cb(curr.Key()); + next = curr.Next(); + for (auto tmp = next; tmp && tmp.ExpiryTime() < curr_time; tmp = next) { + next = next.Next(); + ISLEntry::Destroy(tmp); + } + curr.SetNext(next); + } + return start_; + } + + private: + ISLEntry start_; +}; + +} // namespace dfly diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index f01e9da00..a1a3b86e5 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -7,257 +7,47 @@ #include #include -#include -#include -#include -#include #include #include "base/hash.h" #include "base/pmr/memory_resource.h" - -extern "C" { -#include "redis/zmalloc.h" -} +#include "intrusive_string_list.h" namespace dfly { -class ISLEntry { - friend class IntrusiveStringList; - - // we can assume that high 12 bits of user address space - // can be used for tagging. At most 52 bits of address are reserved for - // some configurations, and usually it's 48 bits. - // https://docs.kernel.org/arch/arm64/memory.html - static constexpr size_t kTtlBit = 1ULL << 55; - static constexpr size_t kTagMask = 4095ULL << 52; // we reserve 12 high bits. - - public: - ISLEntry() = default; - - ISLEntry(char* data) { - data_ = data; - } - - static void Destroy(ISLEntry entry) { - zfree(entry.Raw()); - } - - operator bool() const { - return data_; - } - - std::string_view Key() const { - return {GetKeyData(), GetKeySize()}; - } - - bool HasExpiry() const { - return HasTtl(); - } - - // returns the expiry time of the current entry or UINT32_MAX if no ttl is set. - uint32_t ExpiryTime() const { - std::uint32_t res = UINT32_MAX; - if (HasTtl()) { - std::memcpy(&res, Raw() + sizeof(ISLEntry*), sizeof(res)); - } - return res; - } - - void SetExpiryTime(uint32_t ttl_sec) { - if (HasExpiry()) { - auto* ttl_pos = Raw() + sizeof(char*); - std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); - } else { - ISLEntry new_entry = Create(Key(), ttl_sec); - std::swap(data_, new_entry.data_); - Destroy(new_entry); - } - } - - private: - static ISLEntry Create(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { - char* next = nullptr; - uint32_t key_size = key.size(); - - bool has_ttl = ttl_sec != UINT32_MAX; - - auto size = sizeof(next) + sizeof(key_size) + key_size + has_ttl * sizeof(ttl_sec); - - char* data = (char*)zmalloc(size); - ISLEntry res(data); - - std::memcpy(data, &next, sizeof(next)); - - auto* ttl_pos = data + sizeof(next); - if (has_ttl) { - res.SetTtlBit(true); - std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); - } - - auto* key_size_pos = ttl_pos + res.GetTtlSize(); - std::memcpy(key_size_pos, &key_size, sizeof(key_size)); - - auto* key_pos = key_size_pos + sizeof(key_size); - std::memcpy(key_pos, key.data(), key_size); - - return res; - } - - ISLEntry Next() const { - ISLEntry next; - std::memcpy(&next.data_, Raw(), sizeof(next)); - return next; - } - - void SetNext(ISLEntry next) { - std::memcpy(Raw(), &next, sizeof(next)); - } - - const char* GetKeyData() const { - return Raw() + sizeof(ISLEntry*) + sizeof(uint32_t) + GetTtlSize(); - } - - uint32_t GetKeySize() const { - uint32_t size = 0; - std::memcpy(&size, Raw() + sizeof(ISLEntry*) + GetTtlSize(), sizeof(size)); - return size; - } - - uint64_t uptr() const { - return uint64_t(data_); - } - - char* Raw() const { - return (char*)(uptr() & ~kTagMask); - } - - void SetTtlBit(bool b) { - if (b) - data_ = (char*)(uptr() | kTtlBit); - else - data_ = (char*)(uptr() & (~kTtlBit)); - } - - bool HasTtl() const { - return (uptr() & kTtlBit) != 0; - } - - std::uint32_t GetTtlSize() const { - return HasTtl() ? sizeof(std::uint32_t) : 0; - } - - // TODO consider use SDS strings or other approach - // TODO add optimization for big keys - // memory daya layout [ISLEntry*, key_size, key] - char* data_ = nullptr; -}; - -class IntrusiveStringList { - public: - ~IntrusiveStringList() { - while (start_) { - auto next = start_.Next(); - ISLEntry::Destroy(start_); - start_ = next; - } - } - - IntrusiveStringList() = default; - IntrusiveStringList(IntrusiveStringList&& r) { - start_ = r.start_; - r.start_ = {}; - } - - ISLEntry Insert(ISLEntry e) { - e.SetNext(start_); - start_ = e; - return start_; - } - - // TODO rewrite, because it's dangerous operations, ISLEntry shouldn't returned without owner - [[nodiscard]] ISLEntry Pop(uint32_t curr_time) { - for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { - start_ = it.Next(); - ISLEntry::Destroy(it); - } - auto res = start_; - if (start_) { - start_ = start_.Next(); - // TODO consider to res.SetNext(nullptr); for now it looks superfluous - } - return res; - } - - bool Empty() { - return start_; - } - - // TODO consider to wrap ISLEntry to prevent usage out of the list - ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { - return Insert(ISLEntry::Create(key, ttl_sec)); - } - - // TODO consider to wrap ISLEntry to prevent usage out of the list - ISLEntry Find(std::string_view str) const { - auto it = start_; - for (; it && it.Key() != str; it = it.Next()) - ; - return it; - } - - bool Erase(std::string_view str) { - auto cond = [str](const ISLEntry e) { return str == e.Key(); }; - return Erase(cond); - } - - template >* = nullptr> - bool Erase(const T& cond) { - if (!start_) { - return false; - } - - if (auto it = start_; cond(it)) { - start_ = it.Next(); - ISLEntry::Destroy(it); - return true; - } - - for (auto prev = start_, it = start_.Next(); it; prev = it, it = it.Next()) { - if (cond(it)) { - prev.SetNext(it.Next()); - ISLEntry::Destroy(it); - return true; - } - } - return false; - } - - template >* = nullptr> - bool Scan(const T& cb, uint32_t curr_time) { - for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { - start_ = it.Next(); - ISLEntry::Destroy(it); - } - - for (auto curr = start_, next = start_; curr; curr = next) { - cb(curr.Key()); - next = curr.Next(); - for (auto tmp = next; tmp && tmp.ExpiryTime() < curr_time; tmp = next) { - next = next.Next(); - ISLEntry::Destroy(tmp); - } - curr.SetNext(next); - } - return start_; - } - - private: - ISLEntry start_; -}; - class IntrusiveStringSet { + using Buckets = + std::vector>; + public: + class Iterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = std::string_view; + using pointer = std::string_view*; + using reference = std::string_view&; + + Iterator(Buckets::iterator it, ISLEntry prev) : buckets_it_(it), prev_(prev) { + } + + // uint32_t ExpiryTime() const { + // return prev.Next()->ExpiryTime(); + // } + + // void SetExpiryTime(uint32_t ttl_sec); + + // bool HasExpiry() const { + // return curr_entry_.HasExpiry(); + // } + + // void Advance(); + + private: + Buckets::iterator buckets_it_; + ISLEntry prev_; + }; + explicit IntrusiveStringSet(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource()) : entries_(mr) { } @@ -435,7 +225,7 @@ class IntrusiveStringSet { static_assert(sizeof(IntrusiveStringList) == sizeof(void*), "IntrusiveStringList should be just a pointer"); - std::vector> entries_; + Buckets entries_; }; } // namespace dfly From 33dba31e816b236abb55b89457bbe9082ab8dd2b Mon Sep 17 00:00:00 2001 From: Borys Date: Thu, 24 Apr 2025 13:26:52 +0300 Subject: [PATCH 11/12] feat: add setExpiry and iterators --- src/core/CMakeLists.txt | 2 +- src/core/intrusive_string_list.cc | 10 ++ src/core/intrusive_string_list.h | 148 ++++++++++++++++++++++---- src/core/intrusive_string_set.h | 54 +++++++--- src/core/intrusive_string_set_test.cc | 44 ++++---- 5 files changed, 196 insertions(+), 62 deletions(-) create mode 100644 src/core/intrusive_string_list.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6d12a74f4..d6604b8b8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -21,7 +21,7 @@ add_subdirectory(json) set(SEARCH_LIB query_parser) add_library(dfly_core allocation_tracker.cc bloom.cc compact_object.cc dense_set.cc - dragonfly_core.cc extent_tree.cc + dragonfly_core.cc extent_tree.cc intrusive_string_list.cc interpreter.cc glob_matcher.cc mi_memory_resource.cc qlist.cc sds_utils.cc segment_allocator.cc score_map.cc small_string.cc sorted_map.cc task_queue.cc tx_queue.cc string_set.cc string_map.cc top_keys.cc detail/bitpacking.cc) diff --git a/src/core/intrusive_string_list.cc b/src/core/intrusive_string_list.cc new file mode 100644 index 000000000..abbc531d9 --- /dev/null +++ b/src/core/intrusive_string_list.cc @@ -0,0 +1,10 @@ +// Copyright 2025, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "intrusive_string_list.h" + +namespace dfly { +ISLEntry IntrusiveStringList::Iterator::end_; + +} // namespace dfly diff --git a/src/core/intrusive_string_list.h b/src/core/intrusive_string_list.h index 48e301015..4d13127f8 100644 --- a/src/core/intrusive_string_list.h +++ b/src/core/intrusive_string_list.h @@ -4,16 +4,18 @@ #pragma once -#include +// #include #include #include +#include "base/logging.h" + extern "C" { #include "redis/zmalloc.h" } namespace dfly { - +// doesn't possess memory, it should be created and release manually class ISLEntry { friend class IntrusiveStringList; @@ -31,8 +33,17 @@ class ISLEntry { data_ = data; } - static void Destroy(ISLEntry entry) { - zfree(entry.Raw()); + ISLEntry(const ISLEntry& e) = default; + ISLEntry(ISLEntry&& e) { + data_ = e.data_; + e.data_ = nullptr; + } + + ISLEntry& operator=(const ISLEntry& e) = default; + ISLEntry& operator=(ISLEntry&& e) { + data_ = e.data_; + e.data_ = nullptr; + return *this; } operator bool() const { @@ -56,20 +67,14 @@ class ISLEntry { return res; } - void SetExpiryTime(uint32_t ttl_sec) { - if (HasExpiry()) { - auto* ttl_pos = Raw() + sizeof(char*); - std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); - } else { - ISLEntry new_entry = Create(Key(), ttl_sec); - std::swap(data_, new_entry.data_); - Destroy(new_entry); - } + // TODO consider another option to implement iterator + ISLEntry* operator->() { + return this; } - private: - static ISLEntry Create(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { - char* next = nullptr; + protected: + static ISLEntry Create(std::string_view key, char* next = nullptr, + uint32_t ttl_sec = UINT32_MAX) { uint32_t key_size = key.size(); bool has_ttl = ttl_sec != UINT32_MAX; @@ -96,12 +101,20 @@ class ISLEntry { return res; } + static void Destroy(ISLEntry& entry) { + zfree(entry.Raw()); + } + ISLEntry Next() const { ISLEntry next; std::memcpy(&next.data_, Raw(), sizeof(next)); return next; } + ISLEntry FakePrev() { + return ISLEntry(reinterpret_cast(&data_)); + } + void SetNext(ISLEntry next) { std::memcpy(Raw(), &next, sizeof(next)); } @@ -135,6 +148,15 @@ class ISLEntry { return (uptr() & kTtlBit) != 0; } + [[nodiscard]] bool UpdateTtl(uint32_t ttl_sec) { + if (HasTtl()) { + auto* ttl_pos = Raw() + sizeof(char*); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + return true; + } + return false; + } + std::uint32_t GetTtlSize() const { return HasTtl() ? sizeof(std::uint32_t) : 0; } @@ -145,8 +167,87 @@ class ISLEntry { char* data_ = nullptr; }; +class UniqueISLEntry : private ISLEntry { + public: + ~UniqueISLEntry() { + Destroy(*this); + } + + UniqueISLEntry() = default; + UniqueISLEntry(ISLEntry e) : ISLEntry(e) { + } + UniqueISLEntry(UniqueISLEntry&& e) = default; + UniqueISLEntry& operator=(UniqueISLEntry&& e) = default; + + using ISLEntry::ExpiryTime; + using ISLEntry::HasExpiry; + using ISLEntry::Key; + using ISLEntry::operator bool; + + ISLEntry Release() { + ISLEntry res = *this; + data_ = nullptr; + return res; + } + + private: + UniqueISLEntry(const UniqueISLEntry&) = delete; +}; + class IntrusiveStringList { public: + class Iterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = ISLEntry; + using pointer = ISLEntry*; + using reference = ISLEntry&; + + Iterator(ISLEntry prev = end_.FakePrev()) : prev_(prev) { + DCHECK(prev); + } + + uint32_t ExpiryTime() const { + return prev_.Next().ExpiryTime(); + } + + void SetExpiryTime(uint32_t ttl_sec) { + auto entry = prev_.Next(); + + if (!entry.UpdateTtl(ttl_sec)) { + ISLEntry new_entry = ISLEntry::Create(entry.Key(), entry.Next().data_, ttl_sec); + ISLEntry::Destroy(entry); + prev_.SetNext(new_entry); + } + } + + bool HasExpiry() const { + return prev_.Next().HasExpiry(); + } + + Iterator& operator++() { + prev_ = prev_.Next(); + return *this; + } + + operator bool() const { + return prev_.Next(); + } + + value_type operator*() { + return prev_.Next(); + } + + value_type operator->() { + return prev_.Next(); + } + + private: + ISLEntry prev_; + static ISLEntry end_; + }; + ~IntrusiveStringList() { while (start_) { auto next = start_.Next(); @@ -161,14 +262,17 @@ class IntrusiveStringList { r.start_ = {}; } + Iterator begin() { + return start_.FakePrev(); + } + ISLEntry Insert(ISLEntry e) { e.SetNext(start_); start_ = e; return start_; } - // TODO rewrite, because it's dangerous operations, ISLEntry shouldn't returned without owner - [[nodiscard]] ISLEntry Pop(uint32_t curr_time) { + UniqueISLEntry Pop(uint32_t curr_time) { for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { start_ = it.Next(); ISLEntry::Destroy(it); @@ -187,13 +291,13 @@ class IntrusiveStringList { // TODO consider to wrap ISLEntry to prevent usage out of the list ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { - return Insert(ISLEntry::Create(key, ttl_sec)); + return Insert(ISLEntry::Create(key, nullptr, ttl_sec)); } // TODO consider to wrap ISLEntry to prevent usage out of the list - ISLEntry Find(std::string_view str) const { - auto it = start_; - for (; it && it.Key() != str; it = it.Next()) + IntrusiveStringList::Iterator Find(std::string_view str) { + auto it = begin(); + for (; it && it->Key() != str; ++it) ; return it; } diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index a1a3b86e5..fd18d27d8 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -20,7 +20,7 @@ class IntrusiveStringSet { std::vector>; public: - class Iterator { + class iterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = std::ptrdiff_t; @@ -28,14 +28,18 @@ class IntrusiveStringSet { using pointer = std::string_view*; using reference = std::string_view&; - Iterator(Buckets::iterator it, ISLEntry prev) : buckets_it_(it), prev_(prev) { + iterator(Buckets::iterator it, + IntrusiveStringList::Iterator entry = IntrusiveStringList::Iterator()) + : buckets_it_(it), entry_(entry) { } // uint32_t ExpiryTime() const { // return prev.Next()->ExpiryTime(); // } - // void SetExpiryTime(uint32_t ttl_sec); + void SetExpiryTime(uint32_t ttl_sec) { + entry_.SetExpiryTime(ttl_sec); + } // bool HasExpiry() const { // return curr_entry_.HasExpiry(); @@ -43,11 +47,31 @@ class IntrusiveStringSet { // void Advance(); + bool operator==(const iterator& r) const { + return buckets_it_ == r.buckets_it_; + } + + bool operator!=(const iterator& r) const { + return !operator==(r); + } + + IntrusiveStringList::Iterator::value_type operator*() { + return *entry_; + } + + IntrusiveStringList::Iterator operator->() { + return entry_; + } + private: Buckets::iterator buckets_it_; - ISLEntry prev_; + IntrusiveStringList::Iterator entry_; }; + iterator end() { + return iterator(entries_.end()); + } + explicit IntrusiveStringSet(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource()) : entries_(mr) { } @@ -139,8 +163,7 @@ class IntrusiveStringSet { return entries_idx << (32 - capacity_log_); } - // return unowned ISLEntry, so it should be destroyed manually - [[nodiscard]] ISLEntry Pop() { + UniqueISLEntry Pop() { for (auto& bucket : entries_) { if (auto res = bucket.Pop(time_now_); res) { --size_; @@ -157,20 +180,17 @@ class IntrusiveStringSet { return entries_[bucket_id].Erase(str); } - ISLEntry Find(std::string_view member) const { + iterator Find(std::string_view member) { if (entries_.empty()) - return {}; + return iterator(entries_.end()); auto bucket_id = BucketId(Hash(member)); - auto res = entries_[bucket_id].Find(member); - if (!res) { - bucket_id = BucketId(Hash(member)); - res = entries_[bucket_id].Find(member); - } - return res; + auto entry_it = entries_.begin() + bucket_id; + auto res = entry_it->Find(member); + return iterator(res ? entry_it : entries_.end(), res); } - bool Contains(std::string_view member) const { - return Find(member); + bool Contains(std::string_view member) { + return Find(member) != end(); } // Returns the number of elements in the map. Note that it might be that some of these elements @@ -203,7 +223,7 @@ class IntrusiveStringSet { auto list = std::move(entries_[i]); for (auto entry = list.Pop(time_now_); entry; entry = list.Pop(time_now_)) { auto bucket_id = BucketId(Hash(entry.Key())); - entries_[bucket_id].Insert(entry); + entries_[bucket_id].Insert(entry.Release()); } } } diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index 5b53b1611..ec7f29495 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -87,15 +87,15 @@ TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) { test = isl.Emplace("23456789"); - EXPECT_EQ(isl.Find("0123456789").Key(), "0123456789"sv); - EXPECT_EQ(isl.Find("23456789").Key(), "23456789"sv); - EXPECT_EQ(isl.Find("123456789").Key(), "123456789"sv); - EXPECT_EQ(isl.Find("test"), ISLEntry()); + EXPECT_EQ(isl.Find("0123456789")->Key(), "0123456789"sv); + EXPECT_EQ(isl.Find("23456789")->Key(), "23456789"sv); + EXPECT_EQ(isl.Find("123456789")->Key(), "123456789"sv); + EXPECT_FALSE(isl.Find("test")); EXPECT_TRUE(isl.Erase("23456789")); - EXPECT_EQ(isl.Find("23456789"), ISLEntry()); + EXPECT_FALSE(isl.Find("23456789")); EXPECT_FALSE(isl.Erase("test")); - EXPECT_EQ(isl.Find("test"), ISLEntry()); + EXPECT_FALSE(isl.Find("test")); } TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) { @@ -113,7 +113,7 @@ TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) { for (const auto& s : test_set) { auto e = ss.Find(s); - EXPECT_EQ(e.Key(), s); + EXPECT_EQ(e->Key(), s); } EXPECT_EQ(ss.Capacity(), 16384); @@ -240,9 +240,9 @@ TEST_F(IntrusiveStringSetTest, Resizing) { for (auto j = strs.begin(); j != it; ++j) { const auto& str = *j; auto it = ss_->Find(str); - ASSERT_TRUE(it); - EXPECT_TRUE(it.HasExpiry()); - EXPECT_EQ(it.ExpiryTime(), ss_->time_now() + 1); + ASSERT_NE(it, ss_->end()); + EXPECT_TRUE(it->HasExpiry()); + EXPECT_EQ(it->ExpiryTime(), ss_->time_now() + 1); } } ++size; @@ -465,21 +465,21 @@ TEST_F(IntrusiveStringSetTest, Pop) { TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) { EXPECT_TRUE(ss_->Add("k1", 100)); auto k = ss_->Find("k1"); - EXPECT_TRUE(k.HasExpiry()); - EXPECT_EQ(k.ExpiryTime(), 100); + EXPECT_TRUE(k->HasExpiry()); + EXPECT_EQ(k->ExpiryTime(), 100); k.SetExpiryTime(1); - EXPECT_TRUE(k.HasExpiry()); - EXPECT_EQ(k.ExpiryTime(), 1); + EXPECT_TRUE(k->HasExpiry()); + EXPECT_EQ(k->ExpiryTime(), 1); } -// TEST_F(IntrusiveStringSetTest, SetFieldExpireNoHasExpiry) { -// EXPECT_TRUE(ss_->Add("k1")); -// auto k = ss_->Find("k1"); -// EXPECT_FALSE(k.HasExpiry()); -// k.SetExpiryTime(10); -// EXPECT_TRUE(k.HasExpiry()); -// EXPECT_EQ(k.ExpiryTime(), 10); -// } +TEST_F(IntrusiveStringSetTest, SetFieldExpireNoHasExpiry) { + EXPECT_TRUE(ss_->Add("k1")); + auto k = ss_->Find("k1"); + EXPECT_FALSE(k->HasExpiry()); + k.SetExpiryTime(10); + EXPECT_TRUE(k->HasExpiry()); + EXPECT_EQ(k->ExpiryTime(), 10); +} // TEST_F(IntrusiveStringSetTest, Ttl) { // EXPECT_TRUE(ss_->Add("bla"sv, 1)); From fe3b3a24f2c14c910d8c7f56c75a16429f65ead1 Mon Sep 17 00:00:00 2001 From: Borys Date: Fri, 25 Apr 2025 11:49:02 +0300 Subject: [PATCH 12/12] feat: add iterators --- src/core/intrusive_string_list.cc | 2 +- src/core/intrusive_string_list.h | 26 +++++++--- src/core/intrusive_string_set.h | 56 ++++++++++++++++----- src/core/intrusive_string_set_test.cc | 70 +++++++++++++-------------- 4 files changed, 98 insertions(+), 56 deletions(-) diff --git a/src/core/intrusive_string_list.cc b/src/core/intrusive_string_list.cc index abbc531d9..52fd4c6ae 100644 --- a/src/core/intrusive_string_list.cc +++ b/src/core/intrusive_string_list.cc @@ -5,6 +5,6 @@ #include "intrusive_string_list.h" namespace dfly { -ISLEntry IntrusiveStringList::Iterator::end_; +ISLEntry IntrusiveStringList::end_; } // namespace dfly diff --git a/src/core/intrusive_string_list.h b/src/core/intrusive_string_list.h index 4d13127f8..0bed5ae2a 100644 --- a/src/core/intrusive_string_list.h +++ b/src/core/intrusive_string_list.h @@ -196,7 +196,7 @@ class UniqueISLEntry : private ISLEntry { class IntrusiveStringList { public: - class Iterator { + class iterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = std::ptrdiff_t; @@ -204,7 +204,7 @@ class IntrusiveStringList { using pointer = ISLEntry*; using reference = ISLEntry&; - Iterator(ISLEntry prev = end_.FakePrev()) : prev_(prev) { + iterator(ISLEntry prev = end_.FakePrev()) : prev_(prev) { DCHECK(prev); } @@ -226,7 +226,7 @@ class IntrusiveStringList { return prev_.Next().HasExpiry(); } - Iterator& operator++() { + iterator& operator++() { prev_ = prev_.Next(); return *this; } @@ -243,9 +243,16 @@ class IntrusiveStringList { return prev_.Next(); } + bool operator==(const iterator& r) { + return prev_.Next() == r.prev_.Next(); + } + + bool operator!=(const iterator& r) { + return !operator==(r); + } + private: ISLEntry prev_; - static ISLEntry end_; }; ~IntrusiveStringList() { @@ -262,10 +269,14 @@ class IntrusiveStringList { r.start_ = {}; } - Iterator begin() { + iterator begin() { return start_.FakePrev(); } + static iterator end() { + return end_.FakePrev(); + } + ISLEntry Insert(ISLEntry e) { e.SetNext(start_); start_ = e; @@ -286,7 +297,7 @@ class IntrusiveStringList { } bool Empty() { - return start_; + return !start_; } // TODO consider to wrap ISLEntry to prevent usage out of the list @@ -295,7 +306,7 @@ class IntrusiveStringList { } // TODO consider to wrap ISLEntry to prevent usage out of the list - IntrusiveStringList::Iterator Find(std::string_view str) { + IntrusiveStringList::iterator Find(std::string_view str) { auto it = begin(); for (; it && it->Key() != str; ++it) ; @@ -350,6 +361,7 @@ class IntrusiveStringList { private: ISLEntry start_; + static ISLEntry end_; }; } // namespace dfly diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h index fd18d27d8..ad4b5d6b1 100644 --- a/src/core/intrusive_string_set.h +++ b/src/core/intrusive_string_set.h @@ -24,13 +24,16 @@ class IntrusiveStringSet { public: using iterator_category = std::forward_iterator_tag; using difference_type = std::ptrdiff_t; - using value_type = std::string_view; - using pointer = std::string_view*; - using reference = std::string_view&; + using value_type = ISLEntry; + using pointer = ISLEntry*; + using reference = ISLEntry&; - iterator(Buckets::iterator it, - IntrusiveStringList::Iterator entry = IntrusiveStringList::Iterator()) - : buckets_it_(it), entry_(entry) { + iterator(Buckets::iterator it, Buckets::iterator end, IntrusiveStringList::iterator entry) + : buckets_it_(it), end_(end), entry_(entry) { + } + + iterator(Buckets::iterator it, Buckets::iterator end) : buckets_it_(it), end_(end) { + SetEntryIt(); } // uint32_t ExpiryTime() const { @@ -47,29 +50,56 @@ class IntrusiveStringSet { // void Advance(); + iterator& operator++() { + // TODO add expiration logic + if (entry_) + ++entry_; + if (!entry_) { + ++buckets_it_; + SetEntryIt(); + } + return *this; + } + bool operator==(const iterator& r) const { - return buckets_it_ == r.buckets_it_; + return buckets_it_ == r.buckets_it_ && entry_ == r.entry_; } bool operator!=(const iterator& r) const { return !operator==(r); } - IntrusiveStringList::Iterator::value_type operator*() { + IntrusiveStringList::iterator::value_type operator*() { return *entry_; } - IntrusiveStringList::Iterator operator->() { + IntrusiveStringList::iterator operator->() { return entry_; } + private: + // find valid entry_ iterator starting from buckets_it_ and set it + void SetEntryIt() { + for (; buckets_it_ != end_; ++buckets_it_) { + if (!buckets_it_->Empty()) { + entry_ = buckets_it_->begin(); + break; + } + } + } + private: Buckets::iterator buckets_it_; - IntrusiveStringList::Iterator entry_; + Buckets::iterator end_; + IntrusiveStringList::iterator entry_; }; + iterator begin() { + return iterator(entries_.begin(), entries_.end()); + } + iterator end() { - return iterator(entries_.end()); + return iterator(entries_.end(), entries_.end()); } explicit IntrusiveStringSet(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource()) @@ -182,11 +212,11 @@ class IntrusiveStringSet { iterator Find(std::string_view member) { if (entries_.empty()) - return iterator(entries_.end()); + return end(); auto bucket_id = BucketId(Hash(member)); auto entry_it = entries_.begin() + bucket_id; auto res = entry_it->Find(member); - return iterator(res ? entry_it : entries_.end(), res); + return iterator(res ? entry_it : entries_.end(), entries_.end(), res); } bool Contains(std::string_view member) { diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc index ec7f29495..d4f2fe9b8 100644 --- a/src/core/intrusive_string_set_test.cc +++ b/src/core/intrusive_string_set_test.cc @@ -75,6 +75,18 @@ class IntrusiveStringSetTest : public ::testing::Test { mt19937 generator_; }; +static string random_string(mt19937& rand, unsigned len) { + const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; + string ret; + ret.reserve(len); + + for (size_t i = 0; i < len; ++i) { + ret += alpanum[rand() % alpanum.size()]; + } + + return ret; +} + TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) { IntrusiveStringList isl; ISLEntry test = isl.Emplace("0123456789"); @@ -208,18 +220,6 @@ TEST_F(IntrusiveStringSetTest, DisplacedBug) { ss_->Add("HPq"); } -static string random_string(mt19937& rand, unsigned len) { - const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; - string ret; - ret.reserve(len); - - for (size_t i = 0; i < len; ++i) { - ret += alpanum[rand() % alpanum.size()]; - } - - return ret; -} - TEST_F(IntrusiveStringSetTest, Resizing) { constexpr size_t num_strs = 4096; unordered_set strs; @@ -434,33 +434,33 @@ TEST_F(IntrusiveStringSetTest, Pop) { DCHECK(to_insert.empty()); } -// TEST_F(IntrusiveStringSetTest, Iteration) { -// ss_->Add("foo"); -// for (const sds ptr : *ss_) { -// LOG(INFO) << ptr; -// } -// ss_->Clear(); -// constexpr size_t num_items = 8192; -// unordered_set to_insert; +TEST_F(IntrusiveStringSetTest, Iteration) { + ss_->Add("foo"); + for (const auto& ptr : *ss_) { + LOG(INFO) << ptr; + } + ss_->Clear(); + constexpr size_t num_items = 8192; + unordered_set to_insert; -// while (to_insert.size() != num_items) { -// auto str = random_string(generator_, 10); -// if (to_insert.count(str)) { -// continue; -// } + while (to_insert.size() != num_items) { + auto str = random_string(generator_, 10); + if (to_insert.count(str)) { + continue; + } -// to_insert.insert(str); -// EXPECT_TRUE(ss_->Add(str)); -// } + to_insert.insert(str); + EXPECT_TRUE(ss_->Add(str)); + } -// for (const sds ptr : *ss_) { -// string str{ptr, sdslen(ptr)}; -// EXPECT_TRUE(to_insert.count(str)); -// to_insert.erase(str); -// } + for (const auto& ptr : *ss_) { + std::string str(ptr.Key()); + EXPECT_TRUE(to_insert.count(str)); + to_insert.erase(str); + } -// EXPECT_EQ(to_insert.size(), 0); -// } + EXPECT_EQ(to_insert.size(), 0); +} TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) { EXPECT_TRUE(ss_->Add("k1", 100));