diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f3bcbd567..d6604b8b8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -21,7 +21,7 @@ add_subdirectory(json) set(SEARCH_LIB query_parser) add_library(dfly_core allocation_tracker.cc bloom.cc compact_object.cc dense_set.cc - dragonfly_core.cc extent_tree.cc + dragonfly_core.cc extent_tree.cc intrusive_string_list.cc interpreter.cc glob_matcher.cc mi_memory_resource.cc qlist.cc sds_utils.cc segment_allocator.cc score_map.cc small_string.cc sorted_map.cc task_queue.cc tx_queue.cc string_set.cc string_map.cc top_keys.cc detail/bitpacking.cc) @@ -40,6 +40,7 @@ cxx_test(interpreter_test dfly_core LABELS DFLY) cxx_test(string_set_test dfly_core LABELS DFLY) cxx_test(string_map_test dfly_core LABELS DFLY) +cxx_test(intrusive_string_set_test dfly_core LABELS DFLY) cxx_test(sorted_map_test dfly_core redis_test_lib LABELS DFLY) cxx_test(bptree_set_test dfly_core LABELS DFLY) cxx_test(score_map_test dfly_core LABELS DFLY) diff --git a/src/core/intrusive_string_list.cc b/src/core/intrusive_string_list.cc new file mode 100644 index 000000000..52fd4c6ae --- /dev/null +++ b/src/core/intrusive_string_list.cc @@ -0,0 +1,10 @@ +// Copyright 2025, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "intrusive_string_list.h" + +namespace dfly { +ISLEntry IntrusiveStringList::end_; + +} // namespace dfly diff --git a/src/core/intrusive_string_list.h b/src/core/intrusive_string_list.h new file mode 100644 index 000000000..0bed5ae2a --- /dev/null +++ b/src/core/intrusive_string_list.h @@ -0,0 +1,367 @@ +// Copyright 2024, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +// #include +#include +#include + +#include "base/logging.h" + +extern "C" { +#include "redis/zmalloc.h" +} + +namespace dfly { +// doesn't possess memory, it should be created and release manually +class ISLEntry { + friend class IntrusiveStringList; + + // we can assume that high 12 bits of user address space + // can be used for tagging. At most 52 bits of address are reserved for + // some configurations, and usually it's 48 bits. + // https://docs.kernel.org/arch/arm64/memory.html + static constexpr size_t kTtlBit = 1ULL << 55; + static constexpr size_t kTagMask = 4095ULL << 52; // we reserve 12 high bits. + + public: + ISLEntry() = default; + + ISLEntry(char* data) { + data_ = data; + } + + ISLEntry(const ISLEntry& e) = default; + ISLEntry(ISLEntry&& e) { + data_ = e.data_; + e.data_ = nullptr; + } + + ISLEntry& operator=(const ISLEntry& e) = default; + ISLEntry& operator=(ISLEntry&& e) { + data_ = e.data_; + e.data_ = nullptr; + return *this; + } + + operator bool() const { + return data_; + } + + std::string_view Key() const { + return {GetKeyData(), GetKeySize()}; + } + + bool HasExpiry() const { + return HasTtl(); + } + + // returns the expiry time of the current entry or UINT32_MAX if no ttl is set. + uint32_t ExpiryTime() const { + std::uint32_t res = UINT32_MAX; + if (HasTtl()) { + std::memcpy(&res, Raw() + sizeof(ISLEntry*), sizeof(res)); + } + return res; + } + + // TODO consider another option to implement iterator + ISLEntry* operator->() { + return this; + } + + protected: + static ISLEntry Create(std::string_view key, char* next = nullptr, + uint32_t ttl_sec = UINT32_MAX) { + uint32_t key_size = key.size(); + + bool has_ttl = ttl_sec != UINT32_MAX; + + auto size = sizeof(next) + sizeof(key_size) + key_size + has_ttl * sizeof(ttl_sec); + + char* data = (char*)zmalloc(size); + ISLEntry res(data); + + std::memcpy(data, &next, sizeof(next)); + + auto* ttl_pos = data + sizeof(next); + if (has_ttl) { + res.SetTtlBit(true); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + } + + auto* key_size_pos = ttl_pos + res.GetTtlSize(); + std::memcpy(key_size_pos, &key_size, sizeof(key_size)); + + auto* key_pos = key_size_pos + sizeof(key_size); + std::memcpy(key_pos, key.data(), key_size); + + return res; + } + + static void Destroy(ISLEntry& entry) { + zfree(entry.Raw()); + } + + ISLEntry Next() const { + ISLEntry next; + std::memcpy(&next.data_, Raw(), sizeof(next)); + return next; + } + + ISLEntry FakePrev() { + return ISLEntry(reinterpret_cast(&data_)); + } + + void SetNext(ISLEntry next) { + std::memcpy(Raw(), &next, sizeof(next)); + } + + const char* GetKeyData() const { + return Raw() + sizeof(ISLEntry*) + sizeof(uint32_t) + GetTtlSize(); + } + + uint32_t GetKeySize() const { + uint32_t size = 0; + std::memcpy(&size, Raw() + sizeof(ISLEntry*) + GetTtlSize(), sizeof(size)); + return size; + } + + uint64_t uptr() const { + return uint64_t(data_); + } + + char* Raw() const { + return (char*)(uptr() & ~kTagMask); + } + + void SetTtlBit(bool b) { + if (b) + data_ = (char*)(uptr() | kTtlBit); + else + data_ = (char*)(uptr() & (~kTtlBit)); + } + + bool HasTtl() const { + return (uptr() & kTtlBit) != 0; + } + + [[nodiscard]] bool UpdateTtl(uint32_t ttl_sec) { + if (HasTtl()) { + auto* ttl_pos = Raw() + sizeof(char*); + std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec)); + return true; + } + return false; + } + + std::uint32_t GetTtlSize() const { + return HasTtl() ? sizeof(std::uint32_t) : 0; + } + + // TODO consider use SDS strings or other approach + // TODO add optimization for big keys + // memory daya layout [ISLEntry*, key_size, key] + char* data_ = nullptr; +}; + +class UniqueISLEntry : private ISLEntry { + public: + ~UniqueISLEntry() { + Destroy(*this); + } + + UniqueISLEntry() = default; + UniqueISLEntry(ISLEntry e) : ISLEntry(e) { + } + UniqueISLEntry(UniqueISLEntry&& e) = default; + UniqueISLEntry& operator=(UniqueISLEntry&& e) = default; + + using ISLEntry::ExpiryTime; + using ISLEntry::HasExpiry; + using ISLEntry::Key; + using ISLEntry::operator bool; + + ISLEntry Release() { + ISLEntry res = *this; + data_ = nullptr; + return res; + } + + private: + UniqueISLEntry(const UniqueISLEntry&) = delete; +}; + +class IntrusiveStringList { + public: + class iterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = ISLEntry; + using pointer = ISLEntry*; + using reference = ISLEntry&; + + iterator(ISLEntry prev = end_.FakePrev()) : prev_(prev) { + DCHECK(prev); + } + + uint32_t ExpiryTime() const { + return prev_.Next().ExpiryTime(); + } + + void SetExpiryTime(uint32_t ttl_sec) { + auto entry = prev_.Next(); + + if (!entry.UpdateTtl(ttl_sec)) { + ISLEntry new_entry = ISLEntry::Create(entry.Key(), entry.Next().data_, ttl_sec); + ISLEntry::Destroy(entry); + prev_.SetNext(new_entry); + } + } + + bool HasExpiry() const { + return prev_.Next().HasExpiry(); + } + + iterator& operator++() { + prev_ = prev_.Next(); + return *this; + } + + operator bool() const { + return prev_.Next(); + } + + value_type operator*() { + return prev_.Next(); + } + + value_type operator->() { + return prev_.Next(); + } + + bool operator==(const iterator& r) { + return prev_.Next() == r.prev_.Next(); + } + + bool operator!=(const iterator& r) { + return !operator==(r); + } + + private: + ISLEntry prev_; + }; + + ~IntrusiveStringList() { + while (start_) { + auto next = start_.Next(); + ISLEntry::Destroy(start_); + start_ = next; + } + } + + IntrusiveStringList() = default; + IntrusiveStringList(IntrusiveStringList&& r) { + start_ = r.start_; + r.start_ = {}; + } + + iterator begin() { + return start_.FakePrev(); + } + + static iterator end() { + return end_.FakePrev(); + } + + ISLEntry Insert(ISLEntry e) { + e.SetNext(start_); + start_ = e; + return start_; + } + + UniqueISLEntry Pop(uint32_t curr_time) { + for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { + start_ = it.Next(); + ISLEntry::Destroy(it); + } + auto res = start_; + if (start_) { + start_ = start_.Next(); + // TODO consider to res.SetNext(nullptr); for now it looks superfluous + } + return res; + } + + bool Empty() { + return !start_; + } + + // TODO consider to wrap ISLEntry to prevent usage out of the list + ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) { + return Insert(ISLEntry::Create(key, nullptr, ttl_sec)); + } + + // TODO consider to wrap ISLEntry to prevent usage out of the list + IntrusiveStringList::iterator Find(std::string_view str) { + auto it = begin(); + for (; it && it->Key() != str; ++it) + ; + return it; + } + + bool Erase(std::string_view str) { + auto cond = [str](const ISLEntry e) { return str == e.Key(); }; + return Erase(cond); + } + + template >* = nullptr> + bool Erase(const T& cond) { + if (!start_) { + return false; + } + + if (auto it = start_; cond(it)) { + start_ = it.Next(); + ISLEntry::Destroy(it); + return true; + } + + for (auto prev = start_, it = start_.Next(); it; prev = it, it = it.Next()) { + if (cond(it)) { + prev.SetNext(it.Next()); + ISLEntry::Destroy(it); + return true; + } + } + return false; + } + + template >* = nullptr> + bool Scan(const T& cb, uint32_t curr_time) { + for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) { + start_ = it.Next(); + ISLEntry::Destroy(it); + } + + for (auto curr = start_, next = start_; curr; curr = next) { + cb(curr.Key()); + next = curr.Next(); + for (auto tmp = next; tmp && tmp.ExpiryTime() < curr_time; tmp = next) { + next = next.Next(); + ISLEntry::Destroy(tmp); + } + curr.SetNext(next); + } + return start_; + } + + private: + ISLEntry start_; + static ISLEntry end_; +}; + +} // namespace dfly diff --git a/src/core/intrusive_string_set.h b/src/core/intrusive_string_set.h new file mode 100644 index 000000000..ad4b5d6b1 --- /dev/null +++ b/src/core/intrusive_string_set.h @@ -0,0 +1,281 @@ +// Copyright 2024, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +#include +#include + +#include + +#include "base/hash.h" +#include "base/pmr/memory_resource.h" +#include "intrusive_string_list.h" + +namespace dfly { + +class IntrusiveStringSet { + using Buckets = + std::vector>; + + public: + class iterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = ISLEntry; + using pointer = ISLEntry*; + using reference = ISLEntry&; + + iterator(Buckets::iterator it, Buckets::iterator end, IntrusiveStringList::iterator entry) + : buckets_it_(it), end_(end), entry_(entry) { + } + + iterator(Buckets::iterator it, Buckets::iterator end) : buckets_it_(it), end_(end) { + SetEntryIt(); + } + + // uint32_t ExpiryTime() const { + // return prev.Next()->ExpiryTime(); + // } + + void SetExpiryTime(uint32_t ttl_sec) { + entry_.SetExpiryTime(ttl_sec); + } + + // bool HasExpiry() const { + // return curr_entry_.HasExpiry(); + // } + + // void Advance(); + + iterator& operator++() { + // TODO add expiration logic + if (entry_) + ++entry_; + if (!entry_) { + ++buckets_it_; + SetEntryIt(); + } + return *this; + } + + bool operator==(const iterator& r) const { + return buckets_it_ == r.buckets_it_ && entry_ == r.entry_; + } + + bool operator!=(const iterator& r) const { + return !operator==(r); + } + + IntrusiveStringList::iterator::value_type operator*() { + return *entry_; + } + + IntrusiveStringList::iterator operator->() { + return entry_; + } + + private: + // find valid entry_ iterator starting from buckets_it_ and set it + void SetEntryIt() { + for (; buckets_it_ != end_; ++buckets_it_) { + if (!buckets_it_->Empty()) { + entry_ = buckets_it_->begin(); + break; + } + } + } + + private: + Buckets::iterator buckets_it_; + Buckets::iterator end_; + IntrusiveStringList::iterator entry_; + }; + + iterator begin() { + return iterator(entries_.begin(), entries_.end()); + } + + iterator end() { + return iterator(entries_.end(), entries_.end()); + } + + explicit IntrusiveStringSet(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource()) + : entries_(mr) { + } + + ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { + if (entries_.empty() || size_ >= entries_.size()) { + Reserve(Capacity() * 2); + } + const auto bucket_id = BucketId(Hash(str)); + auto& bucket = entries_[bucket_id]; + + if (auto existed_item = bucket.Find(str); existed_item) { + // TODO consider common implementation for key value pair + return ISLEntry(); + } + + return AddUnique(str, bucket, ttl_sec); + } + + void Reserve(size_t sz) { + sz = absl::bit_ceil(sz); + if (sz > entries_.size()) { + size_t prev_size = entries_.size(); + capacity_log_ = absl::bit_width(sz) - 1; + entries_.resize(sz); + Rehash(prev_size); + } + } + + void Clear() { + capacity_log_ = 0; + entries_.resize(0); + size_ = 0; + } + + ISLEntry AddUnique(std::string_view str, IntrusiveStringList& bucket, + uint32_t ttl_sec = UINT32_MAX) { + ++size_; + return bucket.Emplace(str, ttl_sec); + } + + unsigned AddMany(absl::Span span, uint32_t ttl_sec, bool keepttl) { + Reserve(span.size()); + unsigned res = 0; + for (auto& s : span) { + const auto bucket_id = BucketId(Hash(s)); + auto& bucket = entries_[bucket_id]; + if (auto existed_item = bucket.Find(s); existed_item) { + // TODO update TTL + } else { + ++res; + AddUnique(s, bucket, ttl_sec); + } + } + return res; + } + + /** + * stable scanning api. has the same guarantees as redis scan command. + * we avoid doing bit-reverse by using a different function to derive a bucket id + * from hash values. By using msb part of hash we make it "stable" with respect to + * rehashes. For example, with table log size 4 (size 16), entries in bucket id + * 1110 come from hashes 1110XXXXX.... When a table grows to log size 5, + * these entries can move either to 11100 or 11101. So if we traversed with our cursor + * range [0000-1110], it's guaranteed that in grown table we do not need to cover again + * [00000-11100]. Similarly with shrinkage, if a table is shrunk to log size 3, + * keys from 1110 and 1111 will move to bucket 111. Again, it's guaranteed that we + * covered the range [000-111] (all keys in that case). + * Returns: next cursor or 0 if reached the end of scan. + * cursor = 0 - initiates a new scan. + */ + + using ItemCb = std::function; + + uint32_t Scan(uint32_t cursor, const ItemCb& cb) { + uint32_t entries_idx = cursor >> (32 - capacity_log_); + + // First find the bucket to scan, skip empty buckets. + for (; entries_idx < entries_.size(); ++entries_idx) { + if (entries_[entries_idx].Scan(cb, time_now_)) { + break; + } + } + + if (++entries_idx >= entries_.size()) { + return 0; + } + + return entries_idx << (32 - capacity_log_); + } + + UniqueISLEntry Pop() { + for (auto& bucket : entries_) { + if (auto res = bucket.Pop(time_now_); res) { + --size_; + return res; + } + } + return {}; + } + + bool Erase(std::string_view str) { + if (entries_.empty()) + return false; + auto bucket_id = BucketId(Hash(str)); + return entries_[bucket_id].Erase(str); + } + + iterator Find(std::string_view member) { + if (entries_.empty()) + return end(); + auto bucket_id = BucketId(Hash(member)); + auto entry_it = entries_.begin() + bucket_id; + auto res = entry_it->Find(member); + return iterator(res ? entry_it : entries_.end(), entries_.end(), res); + } + + bool Contains(std::string_view member) { + return Find(member) != end(); + } + + // Returns the number of elements in the map. Note that it might be that some of these elements + // have expired and can't be accessed. + size_t UpperBoundSize() const { + return size_; + } + + bool Empty() const { + return size_ == 0; + } + + std::uint32_t Capacity() const { + return 1 << capacity_log_; + } + + // set an abstract time that allows expiry. + void set_time(uint32_t val) { + time_now_ = val; + } + + uint32_t time_now() const { + return time_now_; + } + + private: + // was Grow in StringSet + void Rehash(size_t prev_size) { + for (int64_t i = prev_size - 1; i >= 0; --i) { + auto list = std::move(entries_[i]); + for (auto entry = list.Pop(time_now_); entry; entry = list.Pop(time_now_)) { + auto bucket_id = BucketId(Hash(entry.Key())); + entries_[bucket_id].Insert(entry.Release()); + } + } + } + + uint32_t BucketId(uint64_t hash) const { + assert(capacity_log_ > 0); + return hash >> (64 - capacity_log_); + } + + uint64_t Hash(std::string_view str) const { + constexpr XXH64_hash_t kHashSeed = 24061983; + return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed); + } + + private: + std::uint32_t capacity_log_ = 0; + std::uint32_t size_ = 0; // number of elements in the set. + std::uint32_t time_now_ = 0; + + static_assert(sizeof(IntrusiveStringList) == sizeof(void*), + "IntrusiveStringList should be just a pointer"); + Buckets entries_; +}; + +} // namespace dfly diff --git a/src/core/intrusive_string_set_test.cc b/src/core/intrusive_string_set_test.cc new file mode 100644 index 000000000..d4f2fe9b8 --- /dev/null +++ b/src/core/intrusive_string_set_test.cc @@ -0,0 +1,800 @@ +// Copyright 2022, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/intrusive_string_set.h" + +#include + +#include +#include +#include + +#include "base/gtest.h" +#include "core/mi_memory_resource.h" +#include "glog/logging.h" + +extern "C" { +#include "redis/zmalloc.h" +} + +namespace dfly { + +using namespace std; + +class ISSAllocator : public PMR_NS::memory_resource { + public: + bool all_freed() const { + return alloced_ == 0; + } + + void* do_allocate(size_t bytes, size_t alignment) override { + alloced_ += bytes; + void* p = PMR_NS::new_delete_resource()->allocate(bytes, alignment); + return p; + } + + void do_deallocate(void* p, size_t bytes, size_t alignment) override { + alloced_ -= bytes; + return PMR_NS::new_delete_resource()->deallocate(p, bytes, alignment); + } + + bool do_is_equal(const PMR_NS::memory_resource& other) const noexcept override { + return PMR_NS::new_delete_resource()->is_equal(other); + } + + private: + size_t alloced_ = 0; +}; + +class IntrusiveStringSetTest : public ::testing::Test { + protected: + static void SetUpTestSuite() { + auto* tlh = mi_heap_get_backing(); + init_zmalloc_threadlocal(tlh); + } + + static void TearDownTestSuite() { + } + + void SetUp() override { + ss_ = new IntrusiveStringSet(&alloc_); + generator_.seed(0); + } + + void TearDown() override { + delete ss_; + + // ensure there are no memory leaks after every test + EXPECT_TRUE(alloc_.all_freed()); + EXPECT_EQ(zmalloc_used_memory_tl, 0); + } + + IntrusiveStringSet* ss_; + ISSAllocator alloc_; + mt19937 generator_; +}; + +static string random_string(mt19937& rand, unsigned len) { + const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz"; + string ret; + ret.reserve(len); + + for (size_t i = 0; i < len; ++i) { + ret += alpanum[rand() % alpanum.size()]; + } + + return ret; +} + +TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) { + IntrusiveStringList isl; + ISLEntry test = isl.Emplace("0123456789"); + + EXPECT_EQ(test.Key(), "0123456789"sv); + + test = isl.Emplace("123456789"); + + EXPECT_EQ(test.Key(), "123456789"sv); + + test = isl.Emplace("23456789"); + + EXPECT_EQ(isl.Find("0123456789")->Key(), "0123456789"sv); + EXPECT_EQ(isl.Find("23456789")->Key(), "23456789"sv); + EXPECT_EQ(isl.Find("123456789")->Key(), "123456789"sv); + EXPECT_FALSE(isl.Find("test")); + + EXPECT_TRUE(isl.Erase("23456789")); + EXPECT_FALSE(isl.Find("23456789")); + EXPECT_FALSE(isl.Erase("test")); + EXPECT_FALSE(isl.Find("test")); +} + +TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) { + IntrusiveStringSet ss; + std::set test_set; + + for (int i = 0; i < 10000; ++i) { + test_set.insert(base::RandStr(20)); + } + + for (const auto& s : test_set) { + auto e = ss.Add(s); + EXPECT_EQ(e.Key(), s); + } + + for (const auto& s : test_set) { + auto e = ss.Find(s); + EXPECT_EQ(e->Key(), s); + } + + EXPECT_EQ(ss.Capacity(), 16384); +} + +TEST_F(IntrusiveStringSetTest, Basic) { + EXPECT_TRUE(ss_->Add("foo"sv)); + EXPECT_TRUE(ss_->Add("bar"sv)); + EXPECT_FALSE(ss_->Add("foo"sv)); + EXPECT_FALSE(ss_->Add("bar"sv)); + EXPECT_TRUE(ss_->Contains("foo"sv)); + EXPECT_TRUE(ss_->Contains("bar"sv)); + EXPECT_EQ(2, ss_->UpperBoundSize()); +} + +TEST_F(IntrusiveStringSetTest, StandardAddErase) { + EXPECT_TRUE(ss_->Add("@@@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("A@@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("AA@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("AAA@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAA@@@@@@@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAA@@@@@@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAA@")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAAA")); + EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAAD")); + EXPECT_TRUE(ss_->Add("BBBBBAAAAAAAAAAA")); + EXPECT_TRUE(ss_->Add("BBBBBBBBAAAAAAAA")); + EXPECT_TRUE(ss_->Add("CCCCCBBBBBBBBBBB")); + + // Remove link in the middle of chain + EXPECT_TRUE(ss_->Erase("BBBBBBBBAAAAAAAA")); + // Remove start of a chain + EXPECT_TRUE(ss_->Erase("CCCCCBBBBBBBBBBB")); + // Remove end of link + EXPECT_TRUE(ss_->Erase("AAA@@@@@@@@@@@@@")); + // Remove only item in chain + EXPECT_TRUE(ss_->Erase("AA@@@@@@@@@@@@@@")); + EXPECT_TRUE(ss_->Erase("AAAAAAAAA@@@@@@@")); + EXPECT_TRUE(ss_->Erase("AAAAAAAAAA@@@@@@")); + EXPECT_TRUE(ss_->Erase("AAAAAAAAAAAAAAA@")); +} + +TEST_F(IntrusiveStringSetTest, DisplacedBug) { + string_view vals[] = {"imY", "OVl", "NhH", "BCe", "YDL", "lpb", + "nhF", "xod", "zYR", "PSa", "hce", "cTR"}; + ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX, false); + + ss_->Add("fIc"); + ss_->Erase("YDL"); + ss_->Add("fYs"); + ss_->Erase("hce"); + ss_->Erase("nhF"); + ss_->Add("dye"); + ss_->Add("xZT"); + ss_->Add("LVK"); + ss_->Erase("zYR"); + ss_->Erase("fYs"); + ss_->Add("ueB"); + ss_->Erase("PSa"); + ss_->Erase("OVl"); + ss_->Add("cga"); + ss_->Add("too"); + ss_->Erase("ueB"); + ss_->Add("HZe"); + ss_->Add("oQn"); + ss_->Erase("too"); + ss_->Erase("HZe"); + ss_->Erase("xZT"); + ss_->Erase("cga"); + ss_->Erase("cTR"); + ss_->Erase("BCe"); + ss_->Add("eua"); + ss_->Erase("lpb"); + ss_->Add("OXK"); + ss_->Add("QmO"); + ss_->Add("SzV"); + ss_->Erase("QmO"); + ss_->Add("jbe"); + ss_->Add("BPN"); + ss_->Add("OfH"); + ss_->Add("Muf"); + ss_->Add("CwP"); + ss_->Erase("Muf"); + ss_->Erase("xod"); + ss_->Add("Cis"); + ss_->Add("Xvd"); + ss_->Erase("SzV"); + ss_->Erase("eua"); + ss_->Add("DGb"); + ss_->Add("leD"); + ss_->Add("MVX"); + ss_->Add("HPq"); +} + +TEST_F(IntrusiveStringSetTest, Resizing) { + constexpr size_t num_strs = 4096; + unordered_set strs; + while (strs.size() != num_strs) { + auto str = random_string(generator_, 10); + strs.insert(str); + } + + unsigned size = 0; + for (auto it = strs.begin(); it != strs.end(); ++it) { + const auto& str = *it; + EXPECT_TRUE(ss_->Add(str, 1)); + EXPECT_EQ(ss_->UpperBoundSize(), size + 1); + + // make sure we haven't lost any items after a grow + // which happens every power of 2 + if ((size & (size - 1)) == 0) { + for (auto j = strs.begin(); j != it; ++j) { + const auto& str = *j; + auto it = ss_->Find(str); + ASSERT_NE(it, ss_->end()); + EXPECT_TRUE(it->HasExpiry()); + EXPECT_EQ(it->ExpiryTime(), ss_->time_now() + 1); + } + } + ++size; + } +} + +TEST_F(IntrusiveStringSetTest, SimpleScan) { + unordered_set info = {"foo", "bar"}; + unordered_set seen; + + for (auto str : info) { + EXPECT_TRUE(ss_->Add(str)); + } + + uint32_t cursor = 0; + do { + cursor = ss_->Scan(cursor, [&](std::string_view str) { + EXPECT_TRUE(info.count(str)); + seen.insert(str); + }); + } while (cursor != 0); + + EXPECT_EQ(seen.size(), info.size()); + EXPECT_TRUE(equal(seen.begin(), seen.end(), info.begin())); +} + +// Ensure REDIS scan guarantees are met +TEST_F(IntrusiveStringSetTest, ScanGuarantees) { + unordered_set to_be_seen = {"foo", "bar"}; + unordered_set not_be_seen = {"AAA", "BBB"}; + unordered_set maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@", + "AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"}; + unordered_set seen; + + auto scan_callback = [&](std::string_view str) { + EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str)); + EXPECT_FALSE(not_be_seen.count(str)); + if (to_be_seen.count(str)) { + seen.insert(str); + } + }; + + EXPECT_EQ(ss_->Scan(0, scan_callback), 0); + + for (auto str : not_be_seen) { + EXPECT_TRUE(ss_->Add(str)); + } + + for (auto str : not_be_seen) { + EXPECT_TRUE(ss_->Erase(str)); + } + + for (auto str : to_be_seen) { + EXPECT_TRUE(ss_->Add(str)); + } + + // should reach at least the first item in the set + uint32_t cursor = ss_->Scan(0, scan_callback); + + for (auto str : maybe_seen) { + EXPECT_TRUE(ss_->Add(str)); + } + + while (cursor != 0) { + cursor = ss_->Scan(cursor, scan_callback); + } + + EXPECT_TRUE(seen.size() == to_be_seen.size()); +} + +TEST_F(IntrusiveStringSetTest, IntOnly) { + constexpr size_t num_ints = 8192; + unordered_set numbers; + for (size_t i = 0; i < num_ints; ++i) { + numbers.insert(i); + EXPECT_TRUE(ss_->Add(to_string(i))); + } + + for (size_t i = 0; i < num_ints; ++i) { + ASSERT_FALSE(ss_->Add(to_string(i))); + } + + size_t num_remove = generator_() % 4096; + unordered_set removed; + + for (size_t i = 0; i < num_remove; ++i) { + auto remove_int = generator_() % num_ints; + auto remove = to_string(remove_int); + if (numbers.count(remove_int)) { + ASSERT_TRUE(ss_->Contains(remove)) << remove_int; + EXPECT_TRUE(ss_->Erase(remove)); + numbers.erase(remove_int); + } else { + EXPECT_FALSE(ss_->Erase(remove)); + } + + EXPECT_FALSE(ss_->Contains(remove)); + removed.insert(remove); + } + + size_t expected_seen = 0; + auto scan_callback = [&](std::string_view str_v) { + std::string str(str_v); + EXPECT_FALSE(removed.count(str)); + + if (numbers.count(std::atoi(str.data()))) { + ++expected_seen; + } + }; + + uint32_t cursor = 0; + do { + cursor = ss_->Scan(cursor, scan_callback); + // randomly throw in some new numbers + uint32_t val = generator_(); + ss_->Add(to_string(val)); + } while (cursor != 0); + + EXPECT_GE(expected_seen + removed.size(), num_ints); +} + +TEST_F(IntrusiveStringSetTest, XtremeScanGrow) { + unordered_set to_see, force_grow, seen; + + while (to_see.size() != 8) { + to_see.insert(random_string(generator_, 10)); + } + + while (force_grow.size() != 8192) { + string str = random_string(generator_, 10); + + if (to_see.count(str)) { + continue; + } + + force_grow.insert(random_string(generator_, 10)); + } + + for (auto& str : to_see) { + EXPECT_TRUE(ss_->Add(str)); + } + + auto scan_callback = [&](string_view strv) { + std::string str(strv); + if (to_see.count(str)) { + seen.insert(str); + } + }; + + uint32_t cursor = ss_->Scan(0, scan_callback); + + // force approx 10 grows + for (auto& s : force_grow) { + EXPECT_TRUE(ss_->Add(s)); + } + + while (cursor != 0) { + cursor = ss_->Scan(cursor, scan_callback); + } + + EXPECT_EQ(seen.size(), to_see.size()); +} + +TEST_F(IntrusiveStringSetTest, Pop) { + constexpr size_t num_items = 8; + unordered_set to_insert; + + while (to_insert.size() != num_items) { + auto str = random_string(generator_, 10); + if (to_insert.count(str)) { + continue; + } + + to_insert.insert(str); + EXPECT_TRUE(ss_->Add(str)); + } + + while (!ss_->Empty()) { + size_t size = ss_->UpperBoundSize(); + auto str = ss_->Pop(); + DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1); + DCHECK(str); + DCHECK(to_insert.count(std::string(str.Key()))); + DCHECK_EQ(ss_->UpperBoundSize(), size - 1); + to_insert.erase(std::string(str.Key())); + } + + DCHECK(ss_->Empty()); + DCHECK(to_insert.empty()); +} + +TEST_F(IntrusiveStringSetTest, Iteration) { + ss_->Add("foo"); + for (const auto& ptr : *ss_) { + LOG(INFO) << ptr; + } + ss_->Clear(); + constexpr size_t num_items = 8192; + unordered_set to_insert; + + while (to_insert.size() != num_items) { + auto str = random_string(generator_, 10); + if (to_insert.count(str)) { + continue; + } + + to_insert.insert(str); + EXPECT_TRUE(ss_->Add(str)); + } + + for (const auto& ptr : *ss_) { + std::string str(ptr.Key()); + EXPECT_TRUE(to_insert.count(str)); + to_insert.erase(str); + } + + EXPECT_EQ(to_insert.size(), 0); +} + +TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) { + EXPECT_TRUE(ss_->Add("k1", 100)); + auto k = ss_->Find("k1"); + EXPECT_TRUE(k->HasExpiry()); + EXPECT_EQ(k->ExpiryTime(), 100); + k.SetExpiryTime(1); + EXPECT_TRUE(k->HasExpiry()); + EXPECT_EQ(k->ExpiryTime(), 1); +} + +TEST_F(IntrusiveStringSetTest, SetFieldExpireNoHasExpiry) { + EXPECT_TRUE(ss_->Add("k1")); + auto k = ss_->Find("k1"); + EXPECT_FALSE(k->HasExpiry()); + k.SetExpiryTime(10); + EXPECT_TRUE(k->HasExpiry()); + EXPECT_EQ(k->ExpiryTime(), 10); +} + +// TEST_F(IntrusiveStringSetTest, Ttl) { +// EXPECT_TRUE(ss_->Add("bla"sv, 1)); +// EXPECT_FALSE(ss_->Add("bla"sv, 1)); +// auto it = ss_->Find("bla"sv); +// EXPECT_EQ(1u, it.ExpiryTime()); + +// ss_->set_time(1); +// EXPECT_TRUE(ss_->Add("bla"sv, 1)); +// EXPECT_EQ(1u, ss_->UpperBoundSize()); + +// for (unsigned i = 0; i < 100; ++i) { +// EXPECT_TRUE(ss_->Add(StrCat("foo", i), 1)); +// } +// EXPECT_EQ(101u, ss_->UpperBoundSize()); +// it = ss_->Find("foo50"); +// EXPECT_STREQ("foo50", *it); +// EXPECT_EQ(2u, it.ExpiryTime()); + +// ss_->set_time(2); +// for (unsigned i = 0; i < 100; ++i) { +// EXPECT_TRUE(ss_->Add(StrCat("bar", i))); +// } +// it = ss_->Find("bar50"); +// EXPECT_FALSE(it.HasExpiry()); + +// for (auto it = ss_->begin(); it != ss_->end(); ++it) { +// ASSERT_TRUE(absl::StartsWith(*it, "bar")) << *it; +// string str = *it; +// VLOG(1) << *it; +// } +// } + +// TEST_F(IntrusiveStringSetTest, Grow) { +// for (size_t j = 0; j < 10; ++j) { +// for (size_t i = 0; i < 4098; ++i) { +// ss_->Reserve(generator_() % 256); +// auto str = random_string(generator_, 3); +// ss_->Add(str); +// } +// ss_->Clear(); +// } +// } + +// TEST_F(IntrusiveStringSetTest, Reserve) { +// vector strs; + +// for (size_t i = 0; i < 10; ++i) { +// strs.push_back(random_string(generator_, 10)); +// ss_->Add(strs.back()); +// } + +// for (size_t j = 2; j < 20; j += 3) { +// ss_->Reserve(j * 20); +// for (size_t i = 0; i < 10; ++i) { +// ASSERT_TRUE(ss_->Contains(strs[i])); +// } +// } +// } + +// TEST_F(IntrusiveStringSetTest, Fill) { +// for (size_t i = 0; i < 100; ++i) { +// ss_->Add(random_string(generator_, 10)); +// } +// StringSet s2; +// ss_->Fill(&s2); +// EXPECT_EQ(s2.UpperBoundSize(), ss_->UpperBoundSize()); +// for (sds str : *ss_) { +// EXPECT_TRUE(s2.Contains(str)); +// } +// } + +// TEST_F(IntrusiveStringSetTest, IterateEmpty) { +// for (const auto& s : *ss_) { +// // We're iterating to make sure there is no crash. However, if we got here, it's a bug +// CHECK(false) << "Found entry " << s << " in empty set"; +// } +// } + +// size_t memUsed(IntrusiveStringSet& obj) { +// return obj.ObjMallocUsed() + obj.SetMallocUsed(); +// } + +// void BM_Clone(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// IntrusiveStringSet ss1, ss2; +// unsigned elems = state.range(0); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, 10); +// ss1.Add(str); +// } +// ss2.Reserve(ss1.UpperBoundSize()); +// while (state.KeepRunning()) { +// for (auto src : ss1) { +// ss2.Add(src); +// } +// state.PauseTiming(); +// ss2.Clear(); +// ss2.Reserve(ss1.UpperBoundSize()); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Clone)->ArgName("elements")->Arg(32000); + +// void BM_Fill(benchmark::State& state) { +// unsigned elems = state.range(0); +// vector strs; +// mt19937 generator(0); +// IntrusiveStringSet ss1, ss2; +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, 10); +// ss1.Add(str); +// } + +// while (state.KeepRunning()) { +// ss1.Fill(&ss2); +// state.PauseTiming(); +// ss2.Clear(); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Fill)->ArgName("elements")->Arg(32000); + +// void BM_Clear(benchmark::State& state) { +// unsigned elems = state.range(0); +// mt19937 generator(0); +// StringSet ss; +// while (state.KeepRunning()) { +// state.PauseTiming(); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, 16); +// ss.Add(str); +// } +// state.ResumeTiming(); +// ss.Clear(); +// } +// } +// BENCHMARK(BM_Clear)->ArgName("elements")->Arg(32000); + +void BM_Add(benchmark::State& state) { + vector strs; + mt19937 generator(0); + IntrusiveStringSet ss; + unsigned elems = state.range(0); + unsigned keySize = state.range(1); + for (size_t i = 0; i < elems; ++i) { + string str = random_string(generator, keySize); + strs.push_back(str); + } + ss.Reserve(elems); + while (state.KeepRunning()) { + for (auto& str : strs) + ss.Add(str); + state.PauseTiming(); + // state.counters["Memory_Used"] = memUsed(ss); + ss.Clear(); + ss.Reserve(elems); + state.ResumeTiming(); + } +} +BENCHMARK(BM_Add) + ->ArgNames({"elements", "Key Size"}) + ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_AddMany(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// StringSet ss; +// unsigned elems = state.range(0); +// unsigned keySize = state.range(1); +// for (size_t i = 0; i < elems; ++i) { +// string str = random_string(generator, keySize); +// strs.push_back(str); +// } +// ss.Reserve(elems); +// vector svs; +// for (const auto& str : strs) { +// svs.push_back(str); +// } +// while (state.KeepRunning()) { +// ss.AddMany(absl::MakeSpan(svs), UINT32_MAX, false); +// state.PauseTiming(); +// CHECK_EQ(ss.UpperBoundSize(), elems); +// state.counters["Memory_Used"] = memUsed(ss); +// ss.Clear(); +// ss.Reserve(elems); +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_AddMany) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_Erase(benchmark::State& state) { +// std::vector strs; +// mt19937 generator(0); +// StringSet ss; +// auto elems = state.range(0); +// auto keySize = state.range(1); +// for (long int i = 0; i < elems; ++i) { +// std::string str = random_string(generator, keySize); +// strs.push_back(str); +// ss.Add(str); +// } +// state.counters["Memory_Before_Erase"] = memUsed(ss); +// while (state.KeepRunning()) { +// for (auto& str : strs) { +// ss.Erase(str); +// } +// state.PauseTiming(); +// state.counters["Memory_After_Erase"] = memUsed(ss); +// for (auto& str : strs) { +// ss.Add(str); +// } +// state.ResumeTiming(); +// } +// } +// BENCHMARK(BM_Erase) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_Get(benchmark::State& state) { +// std::vector strs; +// mt19937 generator(0); +// StringSet ss; +// auto elems = state.range(0); +// auto keySize = state.range(1); +// for (long int i = 0; i < elems; ++i) { +// std::string str = random_string(generator, keySize); +// strs.push_back(str); +// ss.Add(str); +// } +// while (state.KeepRunning()) { +// for (auto& str : strs) { +// ss.Find(str); +// } +// } +// } +// BENCHMARK(BM_Get) +// ->ArgNames({"elements", "Key Size"}) +// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}}); + +// void BM_Grow(benchmark::State& state) { +// vector strs; +// mt19937 generator(0); +// StringSet src; +// unsigned elems = 1 << 18; +// for (size_t i = 0; i < elems; ++i) { +// src.Add(random_string(generator, 16), UINT32_MAX); +// strs.push_back(random_string(generator, 16)); +// } + +// while (state.KeepRunning()) { +// state.PauseTiming(); +// StringSet tmp; +// src.Fill(&tmp); +// CHECK_EQ(tmp.BucketCount(), elems); +// state.ResumeTiming(); +// for (const auto& str : strs) { +// tmp.Add(str); +// if (tmp.BucketCount() > elems) { +// break; // we grew +// } +// } + +// CHECK_GT(tmp.BucketCount(), elems); +// } +// } +// BENCHMARK(BM_Grow); + +// unsigned total_wasted_memory = 0; + +// TEST_F(IntrusiveStringSetTest, ReallocIfNeeded) { +// auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); }; + +// auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block, +// size_t block_size, void* arg) { +// size_t used = block_size * area->used; +// total_wasted_memory += area->committed - used; +// return true; +// }; + +// for (size_t i = 0; i < 10'000; i++) +// ss_->Add(build_str(i)); + +// for (size_t i = 0; i < 10'000; i++) { +// if (i % 10 == 0) +// continue; +// ss_->Erase(build_str(i)); +// } + +// mi_heap_collect(mi_heap_get_backing(), true); +// mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr); +// size_t wasted_before = total_wasted_memory; + +// size_t underutilized = 0; +// for (auto it = ss_->begin(); it != ss_->end(); ++it) { +// underutilized += zmalloc_page_is_underutilized(*it, 0.9); +// it.ReallocIfNeeded(0.9); +// } +// // Check there are underutilized pages +// CHECK_GT(underutilized, 0u); + +// total_wasted_memory = 0; +// mi_heap_collect(mi_heap_get_backing(), true); +// mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr); +// size_t wasted_after = total_wasted_memory; + +// // Check we waste significanlty less now +// EXPECT_GT(wasted_before, wasted_after * 2); + +// EXPECT_EQ(ss_->UpperBoundSize(), 1000); +// for (size_t i = 0; i < 1000; i++) +// EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10)); +// } + +} // namespace dfly