This commit is contained in:
Borys 2025-05-06 14:31:11 +03:00 committed by GitHub
commit c2180dab5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 1460 additions and 1 deletions

View file

@ -21,7 +21,7 @@ add_subdirectory(json)
set(SEARCH_LIB query_parser) set(SEARCH_LIB query_parser)
add_library(dfly_core allocation_tracker.cc bloom.cc compact_object.cc dense_set.cc add_library(dfly_core allocation_tracker.cc bloom.cc compact_object.cc dense_set.cc
dragonfly_core.cc extent_tree.cc dragonfly_core.cc extent_tree.cc intrusive_string_list.cc
interpreter.cc glob_matcher.cc mi_memory_resource.cc qlist.cc sds_utils.cc interpreter.cc glob_matcher.cc mi_memory_resource.cc qlist.cc sds_utils.cc
segment_allocator.cc score_map.cc small_string.cc sorted_map.cc task_queue.cc segment_allocator.cc score_map.cc small_string.cc sorted_map.cc task_queue.cc
tx_queue.cc string_set.cc string_map.cc top_keys.cc detail/bitpacking.cc) tx_queue.cc string_set.cc string_map.cc top_keys.cc detail/bitpacking.cc)
@ -40,6 +40,7 @@ cxx_test(interpreter_test dfly_core LABELS DFLY)
cxx_test(string_set_test dfly_core LABELS DFLY) cxx_test(string_set_test dfly_core LABELS DFLY)
cxx_test(string_map_test dfly_core LABELS DFLY) cxx_test(string_map_test dfly_core LABELS DFLY)
cxx_test(intrusive_string_set_test dfly_core LABELS DFLY)
cxx_test(sorted_map_test dfly_core redis_test_lib LABELS DFLY) cxx_test(sorted_map_test dfly_core redis_test_lib LABELS DFLY)
cxx_test(bptree_set_test dfly_core LABELS DFLY) cxx_test(bptree_set_test dfly_core LABELS DFLY)
cxx_test(score_map_test dfly_core LABELS DFLY) cxx_test(score_map_test dfly_core LABELS DFLY)

View file

@ -0,0 +1,10 @@
// Copyright 2025, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "intrusive_string_list.h"
namespace dfly {
ISLEntry IntrusiveStringList::end_;
} // namespace dfly

View file

@ -0,0 +1,367 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
// #include <cassert>
#include <cstring>
#include <string_view>
#include "base/logging.h"
extern "C" {
#include "redis/zmalloc.h"
}
namespace dfly {
// doesn't possess memory, it should be created and release manually
class ISLEntry {
friend class IntrusiveStringList;
// we can assume that high 12 bits of user address space
// can be used for tagging. At most 52 bits of address are reserved for
// some configurations, and usually it's 48 bits.
// https://docs.kernel.org/arch/arm64/memory.html
static constexpr size_t kTtlBit = 1ULL << 55;
static constexpr size_t kTagMask = 4095ULL << 52; // we reserve 12 high bits.
public:
ISLEntry() = default;
ISLEntry(char* data) {
data_ = data;
}
ISLEntry(const ISLEntry& e) = default;
ISLEntry(ISLEntry&& e) {
data_ = e.data_;
e.data_ = nullptr;
}
ISLEntry& operator=(const ISLEntry& e) = default;
ISLEntry& operator=(ISLEntry&& e) {
data_ = e.data_;
e.data_ = nullptr;
return *this;
}
operator bool() const {
return data_;
}
std::string_view Key() const {
return {GetKeyData(), GetKeySize()};
}
bool HasExpiry() const {
return HasTtl();
}
// returns the expiry time of the current entry or UINT32_MAX if no ttl is set.
uint32_t ExpiryTime() const {
std::uint32_t res = UINT32_MAX;
if (HasTtl()) {
std::memcpy(&res, Raw() + sizeof(ISLEntry*), sizeof(res));
}
return res;
}
// TODO consider another option to implement iterator
ISLEntry* operator->() {
return this;
}
protected:
static ISLEntry Create(std::string_view key, char* next = nullptr,
uint32_t ttl_sec = UINT32_MAX) {
uint32_t key_size = key.size();
bool has_ttl = ttl_sec != UINT32_MAX;
auto size = sizeof(next) + sizeof(key_size) + key_size + has_ttl * sizeof(ttl_sec);
char* data = (char*)zmalloc(size);
ISLEntry res(data);
std::memcpy(data, &next, sizeof(next));
auto* ttl_pos = data + sizeof(next);
if (has_ttl) {
res.SetTtlBit(true);
std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec));
}
auto* key_size_pos = ttl_pos + res.GetTtlSize();
std::memcpy(key_size_pos, &key_size, sizeof(key_size));
auto* key_pos = key_size_pos + sizeof(key_size);
std::memcpy(key_pos, key.data(), key_size);
return res;
}
static void Destroy(ISLEntry& entry) {
zfree(entry.Raw());
}
ISLEntry Next() const {
ISLEntry next;
std::memcpy(&next.data_, Raw(), sizeof(next));
return next;
}
ISLEntry FakePrev() {
return ISLEntry(reinterpret_cast<char*>(&data_));
}
void SetNext(ISLEntry next) {
std::memcpy(Raw(), &next, sizeof(next));
}
const char* GetKeyData() const {
return Raw() + sizeof(ISLEntry*) + sizeof(uint32_t) + GetTtlSize();
}
uint32_t GetKeySize() const {
uint32_t size = 0;
std::memcpy(&size, Raw() + sizeof(ISLEntry*) + GetTtlSize(), sizeof(size));
return size;
}
uint64_t uptr() const {
return uint64_t(data_);
}
char* Raw() const {
return (char*)(uptr() & ~kTagMask);
}
void SetTtlBit(bool b) {
if (b)
data_ = (char*)(uptr() | kTtlBit);
else
data_ = (char*)(uptr() & (~kTtlBit));
}
bool HasTtl() const {
return (uptr() & kTtlBit) != 0;
}
[[nodiscard]] bool UpdateTtl(uint32_t ttl_sec) {
if (HasTtl()) {
auto* ttl_pos = Raw() + sizeof(char*);
std::memcpy(ttl_pos, &ttl_sec, sizeof(ttl_sec));
return true;
}
return false;
}
std::uint32_t GetTtlSize() const {
return HasTtl() ? sizeof(std::uint32_t) : 0;
}
// TODO consider use SDS strings or other approach
// TODO add optimization for big keys
// memory daya layout [ISLEntry*, key_size, key]
char* data_ = nullptr;
};
class UniqueISLEntry : private ISLEntry {
public:
~UniqueISLEntry() {
Destroy(*this);
}
UniqueISLEntry() = default;
UniqueISLEntry(ISLEntry e) : ISLEntry(e) {
}
UniqueISLEntry(UniqueISLEntry&& e) = default;
UniqueISLEntry& operator=(UniqueISLEntry&& e) = default;
using ISLEntry::ExpiryTime;
using ISLEntry::HasExpiry;
using ISLEntry::Key;
using ISLEntry::operator bool;
ISLEntry Release() {
ISLEntry res = *this;
data_ = nullptr;
return res;
}
private:
UniqueISLEntry(const UniqueISLEntry&) = delete;
};
class IntrusiveStringList {
public:
class iterator {
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = ISLEntry;
using pointer = ISLEntry*;
using reference = ISLEntry&;
iterator(ISLEntry prev = end_.FakePrev()) : prev_(prev) {
DCHECK(prev);
}
uint32_t ExpiryTime() const {
return prev_.Next().ExpiryTime();
}
void SetExpiryTime(uint32_t ttl_sec) {
auto entry = prev_.Next();
if (!entry.UpdateTtl(ttl_sec)) {
ISLEntry new_entry = ISLEntry::Create(entry.Key(), entry.Next().data_, ttl_sec);
ISLEntry::Destroy(entry);
prev_.SetNext(new_entry);
}
}
bool HasExpiry() const {
return prev_.Next().HasExpiry();
}
iterator& operator++() {
prev_ = prev_.Next();
return *this;
}
operator bool() const {
return prev_.Next();
}
value_type operator*() {
return prev_.Next();
}
value_type operator->() {
return prev_.Next();
}
bool operator==(const iterator& r) {
return prev_.Next() == r.prev_.Next();
}
bool operator!=(const iterator& r) {
return !operator==(r);
}
private:
ISLEntry prev_;
};
~IntrusiveStringList() {
while (start_) {
auto next = start_.Next();
ISLEntry::Destroy(start_);
start_ = next;
}
}
IntrusiveStringList() = default;
IntrusiveStringList(IntrusiveStringList&& r) {
start_ = r.start_;
r.start_ = {};
}
iterator begin() {
return start_.FakePrev();
}
static iterator end() {
return end_.FakePrev();
}
ISLEntry Insert(ISLEntry e) {
e.SetNext(start_);
start_ = e;
return start_;
}
UniqueISLEntry Pop(uint32_t curr_time) {
for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) {
start_ = it.Next();
ISLEntry::Destroy(it);
}
auto res = start_;
if (start_) {
start_ = start_.Next();
// TODO consider to res.SetNext(nullptr); for now it looks superfluous
}
return res;
}
bool Empty() {
return !start_;
}
// TODO consider to wrap ISLEntry to prevent usage out of the list
ISLEntry Emplace(std::string_view key, uint32_t ttl_sec = UINT32_MAX) {
return Insert(ISLEntry::Create(key, nullptr, ttl_sec));
}
// TODO consider to wrap ISLEntry to prevent usage out of the list
IntrusiveStringList::iterator Find(std::string_view str) {
auto it = begin();
for (; it && it->Key() != str; ++it)
;
return it;
}
bool Erase(std::string_view str) {
auto cond = [str](const ISLEntry e) { return str == e.Key(); };
return Erase(cond);
}
template <class T, std::enable_if_t<std::is_invocable_v<T, ISLEntry>>* = nullptr>
bool Erase(const T& cond) {
if (!start_) {
return false;
}
if (auto it = start_; cond(it)) {
start_ = it.Next();
ISLEntry::Destroy(it);
return true;
}
for (auto prev = start_, it = start_.Next(); it; prev = it, it = it.Next()) {
if (cond(it)) {
prev.SetNext(it.Next());
ISLEntry::Destroy(it);
return true;
}
}
return false;
}
template <class T, std::enable_if_t<std::is_invocable_v<T, std::string_view>>* = nullptr>
bool Scan(const T& cb, uint32_t curr_time) {
for (auto it = start_; it && it.ExpiryTime() < curr_time; it = start_) {
start_ = it.Next();
ISLEntry::Destroy(it);
}
for (auto curr = start_, next = start_; curr; curr = next) {
cb(curr.Key());
next = curr.Next();
for (auto tmp = next; tmp && tmp.ExpiryTime() < curr_time; tmp = next) {
next = next.Next();
ISLEntry::Destroy(tmp);
}
curr.SetNext(next);
}
return start_;
}
private:
ISLEntry start_;
static ISLEntry end_;
};
} // namespace dfly

View file

@ -0,0 +1,281 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <absl/numeric/bits.h>
#include <absl/types/span.h>
#include <vector>
#include "base/hash.h"
#include "base/pmr/memory_resource.h"
#include "intrusive_string_list.h"
namespace dfly {
class IntrusiveStringSet {
using Buckets =
std::vector<IntrusiveStringList, PMR_NS::polymorphic_allocator<IntrusiveStringList>>;
public:
class iterator {
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = ISLEntry;
using pointer = ISLEntry*;
using reference = ISLEntry&;
iterator(Buckets::iterator it, Buckets::iterator end, IntrusiveStringList::iterator entry)
: buckets_it_(it), end_(end), entry_(entry) {
}
iterator(Buckets::iterator it, Buckets::iterator end) : buckets_it_(it), end_(end) {
SetEntryIt();
}
// uint32_t ExpiryTime() const {
// return prev.Next()->ExpiryTime();
// }
void SetExpiryTime(uint32_t ttl_sec) {
entry_.SetExpiryTime(ttl_sec);
}
// bool HasExpiry() const {
// return curr_entry_.HasExpiry();
// }
// void Advance();
iterator& operator++() {
// TODO add expiration logic
if (entry_)
++entry_;
if (!entry_) {
++buckets_it_;
SetEntryIt();
}
return *this;
}
bool operator==(const iterator& r) const {
return buckets_it_ == r.buckets_it_ && entry_ == r.entry_;
}
bool operator!=(const iterator& r) const {
return !operator==(r);
}
IntrusiveStringList::iterator::value_type operator*() {
return *entry_;
}
IntrusiveStringList::iterator operator->() {
return entry_;
}
private:
// find valid entry_ iterator starting from buckets_it_ and set it
void SetEntryIt() {
for (; buckets_it_ != end_; ++buckets_it_) {
if (!buckets_it_->Empty()) {
entry_ = buckets_it_->begin();
break;
}
}
}
private:
Buckets::iterator buckets_it_;
Buckets::iterator end_;
IntrusiveStringList::iterator entry_;
};
iterator begin() {
return iterator(entries_.begin(), entries_.end());
}
iterator end() {
return iterator(entries_.end(), entries_.end());
}
explicit IntrusiveStringSet(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource())
: entries_(mr) {
}
ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) {
if (entries_.empty() || size_ >= entries_.size()) {
Reserve(Capacity() * 2);
}
const auto bucket_id = BucketId(Hash(str));
auto& bucket = entries_[bucket_id];
if (auto existed_item = bucket.Find(str); existed_item) {
// TODO consider common implementation for key value pair
return ISLEntry();
}
return AddUnique(str, bucket, ttl_sec);
}
void Reserve(size_t sz) {
sz = absl::bit_ceil(sz);
if (sz > entries_.size()) {
size_t prev_size = entries_.size();
capacity_log_ = absl::bit_width(sz) - 1;
entries_.resize(sz);
Rehash(prev_size);
}
}
void Clear() {
capacity_log_ = 0;
entries_.resize(0);
size_ = 0;
}
ISLEntry AddUnique(std::string_view str, IntrusiveStringList& bucket,
uint32_t ttl_sec = UINT32_MAX) {
++size_;
return bucket.Emplace(str, ttl_sec);
}
unsigned AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec, bool keepttl) {
Reserve(span.size());
unsigned res = 0;
for (auto& s : span) {
const auto bucket_id = BucketId(Hash(s));
auto& bucket = entries_[bucket_id];
if (auto existed_item = bucket.Find(s); existed_item) {
// TODO update TTL
} else {
++res;
AddUnique(s, bucket, ttl_sec);
}
}
return res;
}
/**
* stable scanning api. has the same guarantees as redis scan command.
* we avoid doing bit-reverse by using a different function to derive a bucket id
* from hash values. By using msb part of hash we make it "stable" with respect to
* rehashes. For example, with table log size 4 (size 16), entries in bucket id
* 1110 come from hashes 1110XXXXX.... When a table grows to log size 5,
* these entries can move either to 11100 or 11101. So if we traversed with our cursor
* range [0000-1110], it's guaranteed that in grown table we do not need to cover again
* [00000-11100]. Similarly with shrinkage, if a table is shrunk to log size 3,
* keys from 1110 and 1111 will move to bucket 111. Again, it's guaranteed that we
* covered the range [000-111] (all keys in that case).
* Returns: next cursor or 0 if reached the end of scan.
* cursor = 0 - initiates a new scan.
*/
using ItemCb = std::function<void(std::string_view)>;
uint32_t Scan(uint32_t cursor, const ItemCb& cb) {
uint32_t entries_idx = cursor >> (32 - capacity_log_);
// First find the bucket to scan, skip empty buckets.
for (; entries_idx < entries_.size(); ++entries_idx) {
if (entries_[entries_idx].Scan(cb, time_now_)) {
break;
}
}
if (++entries_idx >= entries_.size()) {
return 0;
}
return entries_idx << (32 - capacity_log_);
}
UniqueISLEntry Pop() {
for (auto& bucket : entries_) {
if (auto res = bucket.Pop(time_now_); res) {
--size_;
return res;
}
}
return {};
}
bool Erase(std::string_view str) {
if (entries_.empty())
return false;
auto bucket_id = BucketId(Hash(str));
return entries_[bucket_id].Erase(str);
}
iterator Find(std::string_view member) {
if (entries_.empty())
return end();
auto bucket_id = BucketId(Hash(member));
auto entry_it = entries_.begin() + bucket_id;
auto res = entry_it->Find(member);
return iterator(res ? entry_it : entries_.end(), entries_.end(), res);
}
bool Contains(std::string_view member) {
return Find(member) != end();
}
// Returns the number of elements in the map. Note that it might be that some of these elements
// have expired and can't be accessed.
size_t UpperBoundSize() const {
return size_;
}
bool Empty() const {
return size_ == 0;
}
std::uint32_t Capacity() const {
return 1 << capacity_log_;
}
// set an abstract time that allows expiry.
void set_time(uint32_t val) {
time_now_ = val;
}
uint32_t time_now() const {
return time_now_;
}
private:
// was Grow in StringSet
void Rehash(size_t prev_size) {
for (int64_t i = prev_size - 1; i >= 0; --i) {
auto list = std::move(entries_[i]);
for (auto entry = list.Pop(time_now_); entry; entry = list.Pop(time_now_)) {
auto bucket_id = BucketId(Hash(entry.Key()));
entries_[bucket_id].Insert(entry.Release());
}
}
}
uint32_t BucketId(uint64_t hash) const {
assert(capacity_log_ > 0);
return hash >> (64 - capacity_log_);
}
uint64_t Hash(std::string_view str) const {
constexpr XXH64_hash_t kHashSeed = 24061983;
return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);
}
private:
std::uint32_t capacity_log_ = 0;
std::uint32_t size_ = 0; // number of elements in the set.
std::uint32_t time_now_ = 0;
static_assert(sizeof(IntrusiveStringList) == sizeof(void*),
"IntrusiveStringList should be just a pointer");
Buckets entries_;
};
} // namespace dfly

View file

@ -0,0 +1,800 @@
// Copyright 2022, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/intrusive_string_set.h"
#include <mimalloc.h>
#include <random>
#include <set>
#include <unordered_set>
#include "base/gtest.h"
#include "core/mi_memory_resource.h"
#include "glog/logging.h"
extern "C" {
#include "redis/zmalloc.h"
}
namespace dfly {
using namespace std;
class ISSAllocator : public PMR_NS::memory_resource {
public:
bool all_freed() const {
return alloced_ == 0;
}
void* do_allocate(size_t bytes, size_t alignment) override {
alloced_ += bytes;
void* p = PMR_NS::new_delete_resource()->allocate(bytes, alignment);
return p;
}
void do_deallocate(void* p, size_t bytes, size_t alignment) override {
alloced_ -= bytes;
return PMR_NS::new_delete_resource()->deallocate(p, bytes, alignment);
}
bool do_is_equal(const PMR_NS::memory_resource& other) const noexcept override {
return PMR_NS::new_delete_resource()->is_equal(other);
}
private:
size_t alloced_ = 0;
};
class IntrusiveStringSetTest : public ::testing::Test {
protected:
static void SetUpTestSuite() {
auto* tlh = mi_heap_get_backing();
init_zmalloc_threadlocal(tlh);
}
static void TearDownTestSuite() {
}
void SetUp() override {
ss_ = new IntrusiveStringSet(&alloc_);
generator_.seed(0);
}
void TearDown() override {
delete ss_;
// ensure there are no memory leaks after every test
EXPECT_TRUE(alloc_.all_freed());
EXPECT_EQ(zmalloc_used_memory_tl, 0);
}
IntrusiveStringSet* ss_;
ISSAllocator alloc_;
mt19937 generator_;
};
static string random_string(mt19937& rand, unsigned len) {
const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz";
string ret;
ret.reserve(len);
for (size_t i = 0; i < len; ++i) {
ret += alpanum[rand() % alpanum.size()];
}
return ret;
}
TEST_F(IntrusiveStringSetTest, IntrusiveStringListTest) {
IntrusiveStringList isl;
ISLEntry test = isl.Emplace("0123456789");
EXPECT_EQ(test.Key(), "0123456789"sv);
test = isl.Emplace("123456789");
EXPECT_EQ(test.Key(), "123456789"sv);
test = isl.Emplace("23456789");
EXPECT_EQ(isl.Find("0123456789")->Key(), "0123456789"sv);
EXPECT_EQ(isl.Find("23456789")->Key(), "23456789"sv);
EXPECT_EQ(isl.Find("123456789")->Key(), "123456789"sv);
EXPECT_FALSE(isl.Find("test"));
EXPECT_TRUE(isl.Erase("23456789"));
EXPECT_FALSE(isl.Find("23456789"));
EXPECT_FALSE(isl.Erase("test"));
EXPECT_FALSE(isl.Find("test"));
}
TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) {
IntrusiveStringSet ss;
std::set<std::string> test_set;
for (int i = 0; i < 10000; ++i) {
test_set.insert(base::RandStr(20));
}
for (const auto& s : test_set) {
auto e = ss.Add(s);
EXPECT_EQ(e.Key(), s);
}
for (const auto& s : test_set) {
auto e = ss.Find(s);
EXPECT_EQ(e->Key(), s);
}
EXPECT_EQ(ss.Capacity(), 16384);
}
TEST_F(IntrusiveStringSetTest, Basic) {
EXPECT_TRUE(ss_->Add("foo"sv));
EXPECT_TRUE(ss_->Add("bar"sv));
EXPECT_FALSE(ss_->Add("foo"sv));
EXPECT_FALSE(ss_->Add("bar"sv));
EXPECT_TRUE(ss_->Contains("foo"sv));
EXPECT_TRUE(ss_->Contains("bar"sv));
EXPECT_EQ(2, ss_->UpperBoundSize());
}
TEST_F(IntrusiveStringSetTest, StandardAddErase) {
EXPECT_TRUE(ss_->Add("@@@@@@@@@@@@@@@@"));
EXPECT_TRUE(ss_->Add("A@@@@@@@@@@@@@@@"));
EXPECT_TRUE(ss_->Add("AA@@@@@@@@@@@@@@"));
EXPECT_TRUE(ss_->Add("AAA@@@@@@@@@@@@@"));
EXPECT_TRUE(ss_->Add("AAAAAAAAA@@@@@@@"));
EXPECT_TRUE(ss_->Add("AAAAAAAAAA@@@@@@"));
EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAA@"));
EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAAA"));
EXPECT_TRUE(ss_->Add("AAAAAAAAAAAAAAAD"));
EXPECT_TRUE(ss_->Add("BBBBBAAAAAAAAAAA"));
EXPECT_TRUE(ss_->Add("BBBBBBBBAAAAAAAA"));
EXPECT_TRUE(ss_->Add("CCCCCBBBBBBBBBBB"));
// Remove link in the middle of chain
EXPECT_TRUE(ss_->Erase("BBBBBBBBAAAAAAAA"));
// Remove start of a chain
EXPECT_TRUE(ss_->Erase("CCCCCBBBBBBBBBBB"));
// Remove end of link
EXPECT_TRUE(ss_->Erase("AAA@@@@@@@@@@@@@"));
// Remove only item in chain
EXPECT_TRUE(ss_->Erase("AA@@@@@@@@@@@@@@"));
EXPECT_TRUE(ss_->Erase("AAAAAAAAA@@@@@@@"));
EXPECT_TRUE(ss_->Erase("AAAAAAAAAA@@@@@@"));
EXPECT_TRUE(ss_->Erase("AAAAAAAAAAAAAAA@"));
}
TEST_F(IntrusiveStringSetTest, DisplacedBug) {
string_view vals[] = {"imY", "OVl", "NhH", "BCe", "YDL", "lpb",
"nhF", "xod", "zYR", "PSa", "hce", "cTR"};
ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX, false);
ss_->Add("fIc");
ss_->Erase("YDL");
ss_->Add("fYs");
ss_->Erase("hce");
ss_->Erase("nhF");
ss_->Add("dye");
ss_->Add("xZT");
ss_->Add("LVK");
ss_->Erase("zYR");
ss_->Erase("fYs");
ss_->Add("ueB");
ss_->Erase("PSa");
ss_->Erase("OVl");
ss_->Add("cga");
ss_->Add("too");
ss_->Erase("ueB");
ss_->Add("HZe");
ss_->Add("oQn");
ss_->Erase("too");
ss_->Erase("HZe");
ss_->Erase("xZT");
ss_->Erase("cga");
ss_->Erase("cTR");
ss_->Erase("BCe");
ss_->Add("eua");
ss_->Erase("lpb");
ss_->Add("OXK");
ss_->Add("QmO");
ss_->Add("SzV");
ss_->Erase("QmO");
ss_->Add("jbe");
ss_->Add("BPN");
ss_->Add("OfH");
ss_->Add("Muf");
ss_->Add("CwP");
ss_->Erase("Muf");
ss_->Erase("xod");
ss_->Add("Cis");
ss_->Add("Xvd");
ss_->Erase("SzV");
ss_->Erase("eua");
ss_->Add("DGb");
ss_->Add("leD");
ss_->Add("MVX");
ss_->Add("HPq");
}
TEST_F(IntrusiveStringSetTest, Resizing) {
constexpr size_t num_strs = 4096;
unordered_set<string> strs;
while (strs.size() != num_strs) {
auto str = random_string(generator_, 10);
strs.insert(str);
}
unsigned size = 0;
for (auto it = strs.begin(); it != strs.end(); ++it) {
const auto& str = *it;
EXPECT_TRUE(ss_->Add(str, 1));
EXPECT_EQ(ss_->UpperBoundSize(), size + 1);
// make sure we haven't lost any items after a grow
// which happens every power of 2
if ((size & (size - 1)) == 0) {
for (auto j = strs.begin(); j != it; ++j) {
const auto& str = *j;
auto it = ss_->Find(str);
ASSERT_NE(it, ss_->end());
EXPECT_TRUE(it->HasExpiry());
EXPECT_EQ(it->ExpiryTime(), ss_->time_now() + 1);
}
}
++size;
}
}
TEST_F(IntrusiveStringSetTest, SimpleScan) {
unordered_set<string_view> info = {"foo", "bar"};
unordered_set<string_view> seen;
for (auto str : info) {
EXPECT_TRUE(ss_->Add(str));
}
uint32_t cursor = 0;
do {
cursor = ss_->Scan(cursor, [&](std::string_view str) {
EXPECT_TRUE(info.count(str));
seen.insert(str);
});
} while (cursor != 0);
EXPECT_EQ(seen.size(), info.size());
EXPECT_TRUE(equal(seen.begin(), seen.end(), info.begin()));
}
// Ensure REDIS scan guarantees are met
TEST_F(IntrusiveStringSetTest, ScanGuarantees) {
unordered_set<string_view> to_be_seen = {"foo", "bar"};
unordered_set<string_view> not_be_seen = {"AAA", "BBB"};
unordered_set<string_view> maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@",
"AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"};
unordered_set<string_view> seen;
auto scan_callback = [&](std::string_view str) {
EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str));
EXPECT_FALSE(not_be_seen.count(str));
if (to_be_seen.count(str)) {
seen.insert(str);
}
};
EXPECT_EQ(ss_->Scan(0, scan_callback), 0);
for (auto str : not_be_seen) {
EXPECT_TRUE(ss_->Add(str));
}
for (auto str : not_be_seen) {
EXPECT_TRUE(ss_->Erase(str));
}
for (auto str : to_be_seen) {
EXPECT_TRUE(ss_->Add(str));
}
// should reach at least the first item in the set
uint32_t cursor = ss_->Scan(0, scan_callback);
for (auto str : maybe_seen) {
EXPECT_TRUE(ss_->Add(str));
}
while (cursor != 0) {
cursor = ss_->Scan(cursor, scan_callback);
}
EXPECT_TRUE(seen.size() == to_be_seen.size());
}
TEST_F(IntrusiveStringSetTest, IntOnly) {
constexpr size_t num_ints = 8192;
unordered_set<unsigned int> numbers;
for (size_t i = 0; i < num_ints; ++i) {
numbers.insert(i);
EXPECT_TRUE(ss_->Add(to_string(i)));
}
for (size_t i = 0; i < num_ints; ++i) {
ASSERT_FALSE(ss_->Add(to_string(i)));
}
size_t num_remove = generator_() % 4096;
unordered_set<string> removed;
for (size_t i = 0; i < num_remove; ++i) {
auto remove_int = generator_() % num_ints;
auto remove = to_string(remove_int);
if (numbers.count(remove_int)) {
ASSERT_TRUE(ss_->Contains(remove)) << remove_int;
EXPECT_TRUE(ss_->Erase(remove));
numbers.erase(remove_int);
} else {
EXPECT_FALSE(ss_->Erase(remove));
}
EXPECT_FALSE(ss_->Contains(remove));
removed.insert(remove);
}
size_t expected_seen = 0;
auto scan_callback = [&](std::string_view str_v) {
std::string str(str_v);
EXPECT_FALSE(removed.count(str));
if (numbers.count(std::atoi(str.data()))) {
++expected_seen;
}
};
uint32_t cursor = 0;
do {
cursor = ss_->Scan(cursor, scan_callback);
// randomly throw in some new numbers
uint32_t val = generator_();
ss_->Add(to_string(val));
} while (cursor != 0);
EXPECT_GE(expected_seen + removed.size(), num_ints);
}
TEST_F(IntrusiveStringSetTest, XtremeScanGrow) {
unordered_set<string> to_see, force_grow, seen;
while (to_see.size() != 8) {
to_see.insert(random_string(generator_, 10));
}
while (force_grow.size() != 8192) {
string str = random_string(generator_, 10);
if (to_see.count(str)) {
continue;
}
force_grow.insert(random_string(generator_, 10));
}
for (auto& str : to_see) {
EXPECT_TRUE(ss_->Add(str));
}
auto scan_callback = [&](string_view strv) {
std::string str(strv);
if (to_see.count(str)) {
seen.insert(str);
}
};
uint32_t cursor = ss_->Scan(0, scan_callback);
// force approx 10 grows
for (auto& s : force_grow) {
EXPECT_TRUE(ss_->Add(s));
}
while (cursor != 0) {
cursor = ss_->Scan(cursor, scan_callback);
}
EXPECT_EQ(seen.size(), to_see.size());
}
TEST_F(IntrusiveStringSetTest, Pop) {
constexpr size_t num_items = 8;
unordered_set<string> to_insert;
while (to_insert.size() != num_items) {
auto str = random_string(generator_, 10);
if (to_insert.count(str)) {
continue;
}
to_insert.insert(str);
EXPECT_TRUE(ss_->Add(str));
}
while (!ss_->Empty()) {
size_t size = ss_->UpperBoundSize();
auto str = ss_->Pop();
DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1);
DCHECK(str);
DCHECK(to_insert.count(std::string(str.Key())));
DCHECK_EQ(ss_->UpperBoundSize(), size - 1);
to_insert.erase(std::string(str.Key()));
}
DCHECK(ss_->Empty());
DCHECK(to_insert.empty());
}
TEST_F(IntrusiveStringSetTest, Iteration) {
ss_->Add("foo");
for (const auto& ptr : *ss_) {
LOG(INFO) << ptr;
}
ss_->Clear();
constexpr size_t num_items = 8192;
unordered_set<string> to_insert;
while (to_insert.size() != num_items) {
auto str = random_string(generator_, 10);
if (to_insert.count(str)) {
continue;
}
to_insert.insert(str);
EXPECT_TRUE(ss_->Add(str));
}
for (const auto& ptr : *ss_) {
std::string str(ptr.Key());
EXPECT_TRUE(to_insert.count(str));
to_insert.erase(str);
}
EXPECT_EQ(to_insert.size(), 0);
}
TEST_F(IntrusiveStringSetTest, SetFieldExpireHasExpiry) {
EXPECT_TRUE(ss_->Add("k1", 100));
auto k = ss_->Find("k1");
EXPECT_TRUE(k->HasExpiry());
EXPECT_EQ(k->ExpiryTime(), 100);
k.SetExpiryTime(1);
EXPECT_TRUE(k->HasExpiry());
EXPECT_EQ(k->ExpiryTime(), 1);
}
TEST_F(IntrusiveStringSetTest, SetFieldExpireNoHasExpiry) {
EXPECT_TRUE(ss_->Add("k1"));
auto k = ss_->Find("k1");
EXPECT_FALSE(k->HasExpiry());
k.SetExpiryTime(10);
EXPECT_TRUE(k->HasExpiry());
EXPECT_EQ(k->ExpiryTime(), 10);
}
// TEST_F(IntrusiveStringSetTest, Ttl) {
// EXPECT_TRUE(ss_->Add("bla"sv, 1));
// EXPECT_FALSE(ss_->Add("bla"sv, 1));
// auto it = ss_->Find("bla"sv);
// EXPECT_EQ(1u, it.ExpiryTime());
// ss_->set_time(1);
// EXPECT_TRUE(ss_->Add("bla"sv, 1));
// EXPECT_EQ(1u, ss_->UpperBoundSize());
// for (unsigned i = 0; i < 100; ++i) {
// EXPECT_TRUE(ss_->Add(StrCat("foo", i), 1));
// }
// EXPECT_EQ(101u, ss_->UpperBoundSize());
// it = ss_->Find("foo50");
// EXPECT_STREQ("foo50", *it);
// EXPECT_EQ(2u, it.ExpiryTime());
// ss_->set_time(2);
// for (unsigned i = 0; i < 100; ++i) {
// EXPECT_TRUE(ss_->Add(StrCat("bar", i)));
// }
// it = ss_->Find("bar50");
// EXPECT_FALSE(it.HasExpiry());
// for (auto it = ss_->begin(); it != ss_->end(); ++it) {
// ASSERT_TRUE(absl::StartsWith(*it, "bar")) << *it;
// string str = *it;
// VLOG(1) << *it;
// }
// }
// TEST_F(IntrusiveStringSetTest, Grow) {
// for (size_t j = 0; j < 10; ++j) {
// for (size_t i = 0; i < 4098; ++i) {
// ss_->Reserve(generator_() % 256);
// auto str = random_string(generator_, 3);
// ss_->Add(str);
// }
// ss_->Clear();
// }
// }
// TEST_F(IntrusiveStringSetTest, Reserve) {
// vector<string> strs;
// for (size_t i = 0; i < 10; ++i) {
// strs.push_back(random_string(generator_, 10));
// ss_->Add(strs.back());
// }
// for (size_t j = 2; j < 20; j += 3) {
// ss_->Reserve(j * 20);
// for (size_t i = 0; i < 10; ++i) {
// ASSERT_TRUE(ss_->Contains(strs[i]));
// }
// }
// }
// TEST_F(IntrusiveStringSetTest, Fill) {
// for (size_t i = 0; i < 100; ++i) {
// ss_->Add(random_string(generator_, 10));
// }
// StringSet s2;
// ss_->Fill(&s2);
// EXPECT_EQ(s2.UpperBoundSize(), ss_->UpperBoundSize());
// for (sds str : *ss_) {
// EXPECT_TRUE(s2.Contains(str));
// }
// }
// TEST_F(IntrusiveStringSetTest, IterateEmpty) {
// for (const auto& s : *ss_) {
// // We're iterating to make sure there is no crash. However, if we got here, it's a bug
// CHECK(false) << "Found entry " << s << " in empty set";
// }
// }
// size_t memUsed(IntrusiveStringSet& obj) {
// return obj.ObjMallocUsed() + obj.SetMallocUsed();
// }
// void BM_Clone(benchmark::State& state) {
// vector<string> strs;
// mt19937 generator(0);
// IntrusiveStringSet ss1, ss2;
// unsigned elems = state.range(0);
// for (size_t i = 0; i < elems; ++i) {
// string str = random_string(generator, 10);
// ss1.Add(str);
// }
// ss2.Reserve(ss1.UpperBoundSize());
// while (state.KeepRunning()) {
// for (auto src : ss1) {
// ss2.Add(src);
// }
// state.PauseTiming();
// ss2.Clear();
// ss2.Reserve(ss1.UpperBoundSize());
// state.ResumeTiming();
// }
// }
// BENCHMARK(BM_Clone)->ArgName("elements")->Arg(32000);
// void BM_Fill(benchmark::State& state) {
// unsigned elems = state.range(0);
// vector<string> strs;
// mt19937 generator(0);
// IntrusiveStringSet ss1, ss2;
// for (size_t i = 0; i < elems; ++i) {
// string str = random_string(generator, 10);
// ss1.Add(str);
// }
// while (state.KeepRunning()) {
// ss1.Fill(&ss2);
// state.PauseTiming();
// ss2.Clear();
// state.ResumeTiming();
// }
// }
// BENCHMARK(BM_Fill)->ArgName("elements")->Arg(32000);
// void BM_Clear(benchmark::State& state) {
// unsigned elems = state.range(0);
// mt19937 generator(0);
// StringSet ss;
// while (state.KeepRunning()) {
// state.PauseTiming();
// for (size_t i = 0; i < elems; ++i) {
// string str = random_string(generator, 16);
// ss.Add(str);
// }
// state.ResumeTiming();
// ss.Clear();
// }
// }
// BENCHMARK(BM_Clear)->ArgName("elements")->Arg(32000);
void BM_Add(benchmark::State& state) {
vector<string> strs;
mt19937 generator(0);
IntrusiveStringSet ss;
unsigned elems = state.range(0);
unsigned keySize = state.range(1);
for (size_t i = 0; i < elems; ++i) {
string str = random_string(generator, keySize);
strs.push_back(str);
}
ss.Reserve(elems);
while (state.KeepRunning()) {
for (auto& str : strs)
ss.Add(str);
state.PauseTiming();
// state.counters["Memory_Used"] = memUsed(ss);
ss.Clear();
ss.Reserve(elems);
state.ResumeTiming();
}
}
BENCHMARK(BM_Add)
->ArgNames({"elements", "Key Size"})
->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});
// void BM_AddMany(benchmark::State& state) {
// vector<string> strs;
// mt19937 generator(0);
// StringSet ss;
// unsigned elems = state.range(0);
// unsigned keySize = state.range(1);
// for (size_t i = 0; i < elems; ++i) {
// string str = random_string(generator, keySize);
// strs.push_back(str);
// }
// ss.Reserve(elems);
// vector<string_view> svs;
// for (const auto& str : strs) {
// svs.push_back(str);
// }
// while (state.KeepRunning()) {
// ss.AddMany(absl::MakeSpan(svs), UINT32_MAX, false);
// state.PauseTiming();
// CHECK_EQ(ss.UpperBoundSize(), elems);
// state.counters["Memory_Used"] = memUsed(ss);
// ss.Clear();
// ss.Reserve(elems);
// state.ResumeTiming();
// }
// }
// BENCHMARK(BM_AddMany)
// ->ArgNames({"elements", "Key Size"})
// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});
// void BM_Erase(benchmark::State& state) {
// std::vector<std::string> strs;
// mt19937 generator(0);
// StringSet ss;
// auto elems = state.range(0);
// auto keySize = state.range(1);
// for (long int i = 0; i < elems; ++i) {
// std::string str = random_string(generator, keySize);
// strs.push_back(str);
// ss.Add(str);
// }
// state.counters["Memory_Before_Erase"] = memUsed(ss);
// while (state.KeepRunning()) {
// for (auto& str : strs) {
// ss.Erase(str);
// }
// state.PauseTiming();
// state.counters["Memory_After_Erase"] = memUsed(ss);
// for (auto& str : strs) {
// ss.Add(str);
// }
// state.ResumeTiming();
// }
// }
// BENCHMARK(BM_Erase)
// ->ArgNames({"elements", "Key Size"})
// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});
// void BM_Get(benchmark::State& state) {
// std::vector<std::string> strs;
// mt19937 generator(0);
// StringSet ss;
// auto elems = state.range(0);
// auto keySize = state.range(1);
// for (long int i = 0; i < elems; ++i) {
// std::string str = random_string(generator, keySize);
// strs.push_back(str);
// ss.Add(str);
// }
// while (state.KeepRunning()) {
// for (auto& str : strs) {
// ss.Find(str);
// }
// }
// }
// BENCHMARK(BM_Get)
// ->ArgNames({"elements", "Key Size"})
// ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});
// void BM_Grow(benchmark::State& state) {
// vector<string> strs;
// mt19937 generator(0);
// StringSet src;
// unsigned elems = 1 << 18;
// for (size_t i = 0; i < elems; ++i) {
// src.Add(random_string(generator, 16), UINT32_MAX);
// strs.push_back(random_string(generator, 16));
// }
// while (state.KeepRunning()) {
// state.PauseTiming();
// StringSet tmp;
// src.Fill(&tmp);
// CHECK_EQ(tmp.BucketCount(), elems);
// state.ResumeTiming();
// for (const auto& str : strs) {
// tmp.Add(str);
// if (tmp.BucketCount() > elems) {
// break; // we grew
// }
// }
// CHECK_GT(tmp.BucketCount(), elems);
// }
// }
// BENCHMARK(BM_Grow);
// unsigned total_wasted_memory = 0;
// TEST_F(IntrusiveStringSetTest, ReallocIfNeeded) {
// auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };
// auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,
// size_t block_size, void* arg) {
// size_t used = block_size * area->used;
// total_wasted_memory += area->committed - used;
// return true;
// };
// for (size_t i = 0; i < 10'000; i++)
// ss_->Add(build_str(i));
// for (size_t i = 0; i < 10'000; i++) {
// if (i % 10 == 0)
// continue;
// ss_->Erase(build_str(i));
// }
// mi_heap_collect(mi_heap_get_backing(), true);
// mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
// size_t wasted_before = total_wasted_memory;
// size_t underutilized = 0;
// for (auto it = ss_->begin(); it != ss_->end(); ++it) {
// underutilized += zmalloc_page_is_underutilized(*it, 0.9);
// it.ReallocIfNeeded(0.9);
// }
// // Check there are underutilized pages
// CHECK_GT(underutilized, 0u);
// total_wasted_memory = 0;
// mi_heap_collect(mi_heap_get_backing(), true);
// mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
// size_t wasted_after = total_wasted_memory;
// // Check we waste significanlty less now
// EXPECT_GT(wasted_before, wasted_after * 2);
// EXPECT_EQ(ss_->UpperBoundSize(), 1000);
// for (size_t i = 0; i < 1000; i++)
// EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10));
// }
} // namespace dfly