From 014a86fc888aef7995ec51d36b1330a27baf775c Mon Sep 17 00:00:00 2001 From: adiholden Date: Sun, 7 Jan 2024 21:51:46 +0200 Subject: [PATCH] feat(lru): add generic lru class (#2351) Signed-off-by: adi_holden --- src/core/CMakeLists.txt | 4 +- src/core/compact_object.h | 9 ++ src/core/lru.h | 200 ++++++++++++++++++++++++++++ src/core/lru_test.cc | 173 ++++++++++++++++++++++++ src/core/simple_lru_counter.cc | 90 ------------- src/core/simple_lru_counter.h | 44 ------ src/core/simple_lru_counter_test.cc | 74 ---------- src/server/db_slice.cc | 4 +- src/server/db_slice.h | 2 +- src/server/table.h | 6 - src/server/tiered_storage.cc | 2 +- 11 files changed, 388 insertions(+), 220 deletions(-) create mode 100644 src/core/lru.h create mode 100644 src/core/lru_test.cc delete mode 100644 src/core/simple_lru_counter.cc delete mode 100644 src/core/simple_lru_counter.h delete mode 100644 src/core/simple_lru_counter_test.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 2a27594b9..b857e70bd 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -3,7 +3,7 @@ set(SEARCH_LIB query_parser) add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc external_alloc.cc interpreter.cc json_object.cc mi_memory_resource.cc sds_utils.cc - segment_allocator.cc simple_lru_counter.cc score_map.cc small_string.cc sorted_map.cc + segment_allocator.cc score_map.cc small_string.cc sorted_map.cc tx_queue.cc dense_set.cc string_set.cc string_map.cc detail/bitpacking.cc) @@ -20,7 +20,7 @@ cxx_test(external_alloc_test dfly_core LABELS DFLY) cxx_test(dash_test dfly_core file DATA testdata/ids.txt LABELS DFLY) cxx_test(interpreter_test dfly_core LABELS DFLY) cxx_test(json_test dfly_core TRDP::jsoncons LABELS DFLY) -cxx_test(simple_lru_counter_test dfly_core LABELS DFLY) +cxx_test(lru_test dfly_core LABELS DFLY) cxx_test(string_set_test dfly_core LABELS DFLY) cxx_test(string_map_test dfly_core LABELS DFLY) cxx_test(sorted_map_test dfly_core LABELS DFLY) diff --git a/src/core/compact_object.h b/src/core/compact_object.h index 98eebf243..071e883e0 100644 --- a/src/core/compact_object.h +++ b/src/core/compact_object.h @@ -449,3 +449,12 @@ class CompactObjectView { }; } // namespace dfly + +namespace std { +template <> struct hash { + std::size_t operator()(const dfly::CompactObjectView& obj) const { + return obj.Hash(); + } +}; + +} // namespace std diff --git a/src/core/lru.h b/src/core/lru.h new file mode 100644 index 000000000..1c923887c --- /dev/null +++ b/src/core/lru.h @@ -0,0 +1,200 @@ +// Copyright 2023, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// +#pragma once + +#include + +#include + +#include "base/logging.h" +#include "base/pmr/memory_resource.h" + +namespace dfly { + +enum class Position { + kHead, + kTail, +}; + +template class Lru { + struct Node { + const T* data_ptr; + + uint32_t prev; + uint32_t next; + + Node() : prev(0), next(0) { + } + }; + + public: + explicit Lru(uint32_t capacity, PMR_NS::memory_resource* mr) : table_(mr), node_arr_(mr) { + CHECK_GT(capacity, 1u); + node_arr_.reserve(capacity); + } + + // Get prev item. In case item is head return tail. + std::optional GetPrev(const T& data) const; + std::optional GetTail() const; + std::optional GetHead() const; + + void Put(const T& data, Position position = Position::kHead); + bool Remove(const T& data); + + size_t Size() const { + DCHECK_EQ(table_.size(), node_arr_.size()); + return table_.size(); + } + + private: + void MoveToPosition(uint32_t index, Position position); + using AllocatorType = PMR_NS::polymorphic_allocator>; + absl::node_hash_map, std::equal_to<>, AllocatorType> + table_; // map from item to index in node arr + std::vector> node_arr_; + uint32_t head_ = 0; +}; + +template std::optional Lru::GetPrev(const T& data) const { + auto it = table_.find(data); + if (it == table_.end()) { + return std::nullopt; + } + DCHECK_GT(node_arr_.size(), it->second); + const auto& node = node_arr_[it->second]; + + DCHECK_EQ(node.data_ptr, &it->first); + const auto& node_prev = node_arr_[node.prev]; + + return *node_prev.data_ptr; +} + +template std::optional Lru::GetTail() const { + if (table_.size() == 0) { + return std::nullopt; + } + unsigned tail = node_arr_[head_].prev; + return *node_arr_[tail].data_ptr; +} + +template std::optional Lru::GetHead() const { + if (table_.size() == 0) { + return std::nullopt; + } + return *node_arr_[head_].data_ptr; +} + +template void Lru::Put(const T& data, Position position) { + DCHECK_EQ(table_.size(), node_arr_.size()); + auto [it, inserted] = table_.emplace(data, table_.size()); + if (inserted) { + unsigned tail = 0; + if (node_arr_.size() > 0) { + tail = node_arr_[head_].prev; + } + + Node node; + // add new item between head and tail. + node.prev = tail; + node.next = head_; + node_arr_[tail].next = it->second; + node_arr_[head_].prev = it->second; + + node.data_ptr = &(it->first); + node_arr_.push_back(node); + + if (position == Position::kHead) { + head_ = it->second; + } + } else { // not inserted. + MoveToPosition(it->second, position); + } +} + +template bool Lru::Remove(const T& data) { + auto it = table_.find(data); + if (it == table_.end()) { + return false; + } + uint32_t remove_index = it->second; + auto& node = node_arr_[remove_index]; + + // remove from list + node_arr_[node.prev].next = node.next; + node_arr_[node.next].prev = node.prev; + + // remove item from table. + if (remove_index == head_) { + head_ = node.next; + } + table_.erase(it); + + if (table_.size() == remove_index) { + node_arr_.pop_back(); + DCHECK_EQ(table_.size(), node_arr_.size()); + return true; // if the removed item was the last in the node array nothing else to do. + } + + // move last item from node array to the removed index + uint32_t move_index = table_.size(); + auto& node_to_move = node_arr_[move_index]; + it = table_.find(*node_to_move.data_ptr); + CHECK(it != table_.end()); + + it->second = remove_index; + // now update the next and prev to point to it + node_arr_[node_to_move.prev].next = remove_index; + node_arr_[node_to_move.next].prev = remove_index; + + // move the data from the node to the removed node. + node = node_to_move; + node_arr_.pop_back(); + + if (head_ == move_index) { + head_ = remove_index; + } + DCHECK_EQ(table_.size(), node_arr_.size()); + return true; +} + +template void Lru::MoveToPosition(uint32_t index, Position position) { + DCHECK_LT(index, node_arr_.size()); + uint32_t tail = node_arr_[head_].prev; + uint32_t curr_node_index = position == Position::kHead ? head_ : tail; + if (index == curr_node_index) { // the index is already head/tail. nothing to change. + return; + } + + auto& node = node_arr_[index]; + CHECK_NE(node.prev, node.next); + + if (position == Position::kHead && index == tail) { + head_ = index; // just shift the cycle. + return; + } + if (position == Position::kTail && index == head_) { + head_ = node.next; // just shift the cycle. + return; + } + + // remove from list + node_arr_[node.prev].next = node.next; + node_arr_[node.next].prev = node.prev; + + // update node next and prev + node.prev = tail; + node.next = head_; + + // update tail to point to new head + node_arr_[tail].next = index; + + // update last head to point to new head + node_arr_[head_].prev = index; + + if (position == Position::kHead) { + head_ = index; + } +} + +}; // namespace dfly diff --git a/src/core/lru_test.cc b/src/core/lru_test.cc new file mode 100644 index 000000000..d0da7907f --- /dev/null +++ b/src/core/lru_test.cc @@ -0,0 +1,173 @@ +// Copyright 2023, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/lru.h" + +#include "base/gtest.h" +#include "base/logging.h" +#include "core/compact_object.h" +#include "core/mi_memory_resource.h" + +using namespace std; + +namespace dfly { + +class StringLruTest : public ::testing::Test { + protected: + StringLruTest() : mr_(mi_heap_get_backing()), cache_(kSize, &mr_) { + } + + const size_t kSize = 4; + MiMemoryResource mr_; + Lru cache_; +}; + +TEST_F(StringLruTest, PutAndGet) { + cache_.Put("a"); + ASSERT_EQ("a", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetPrev("a")); + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("a"); + ASSERT_EQ("a", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("b"); + ASSERT_EQ("b", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("c"); + ASSERT_EQ("c", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("d"); + ASSERT_EQ("d", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("a"); + ASSERT_EQ("a", cache_.GetHead()); + ASSERT_EQ("b", cache_.GetTail()); + cache_.Put("e"); + ASSERT_EQ("e", cache_.GetHead()); + ASSERT_EQ("b", cache_.GetTail()); + cache_.Put("f"); + ASSERT_EQ("f", cache_.GetHead()); + ASSERT_EQ("b", cache_.GetTail()); +} + +TEST_F(StringLruTest, PutAndPutTail) { + cache_.Put("a"); + cache_.Put("a"); // a + cache_.Put("b"); // b -> a + cache_.Put("c"); // c -> b -> a + cache_.Put("d"); // d-> c -> b -> a + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("a"); // a -> d -> c -> b + ASSERT_EQ("b", cache_.GetTail()); + ASSERT_EQ("c", cache_.GetPrev("b")); + ASSERT_EQ("d", cache_.GetPrev("c")); + ASSERT_EQ("b", cache_.GetPrev("a")); + cache_.Put("d", Position::kTail); // a -> c -> b -> d + ASSERT_EQ("d", cache_.GetTail()); + ASSERT_EQ("b", cache_.GetPrev("d")); + ASSERT_EQ("c", cache_.GetPrev("b")); + ASSERT_EQ("a", cache_.GetPrev("c")); + ASSERT_EQ("d", cache_.GetPrev("a")); + cache_.Put("a"); // a -> c -> b -> d + ASSERT_EQ("d", cache_.GetTail()); + cache_.Put("e", Position::kTail); // a -> c -> b -> d -> e + ASSERT_EQ("e", cache_.GetTail()); + ASSERT_EQ("d", cache_.GetPrev("e")); + ASSERT_EQ("b", cache_.GetPrev("d")); + ASSERT_EQ("a", cache_.GetPrev("c")); + ASSERT_EQ("e", cache_.GetPrev("a")); + cache_.Put("e", Position::kTail); // a -> c -> b -> d -> e + ASSERT_EQ("e", cache_.GetTail()); + ASSERT_EQ("d", cache_.GetPrev("e")); + ASSERT_EQ("b", cache_.GetPrev("d")); + ASSERT_EQ("a", cache_.GetPrev("c")); + ASSERT_EQ("e", cache_.GetPrev("a")); +} + +TEST_F(StringLruTest, BumpTest) { + cache_.Put("a"); + cache_.Put("b"); + cache_.Put("c"); + cache_.Put("d"); + ASSERT_EQ("a", cache_.GetTail()); + cache_.Put("c"); + ASSERT_EQ("a", cache_.GetTail()); + ASSERT_EQ("d", cache_.GetPrev("b")); + ASSERT_EQ("c", cache_.GetPrev("d")); +} + +TEST_F(StringLruTest, DifferentOrder) { + for (uint32_t i = 0; i < kSize * 2; ++i) { + cache_.Put(absl::StrCat(i)); + } + ASSERT_EQ("0", cache_.GetTail()); + + for (uint32_t i = kSize; i > 0; --i) { + cache_.Put(absl::StrCat(i)); + } + ASSERT_EQ("0", cache_.GetTail()); + cache_.Put("0"); + ASSERT_EQ("5", cache_.GetTail()); +} + +TEST_F(StringLruTest, Delete) { + cache_.Put("a"); // a + cache_.Put("b"); // b -> a + cache_.Put("c"); // c -> b -> a + cache_.Put("d"); // d-> c -> b -> a + cache_.Put("e"); // e -> d-> c -> b -> a + ASSERT_EQ("e", cache_.GetHead()); + ASSERT_TRUE(cache_.Remove("e")); // d-> c -> b -> a + ASSERT_EQ("d", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + ASSERT_EQ("b", cache_.GetPrev("a")); + ASSERT_EQ("c", cache_.GetPrev("b")); + ASSERT_EQ("d", cache_.GetPrev("c")); + ASSERT_EQ("a", cache_.GetPrev("d")); + ASSERT_FALSE(cache_.Remove("e")); // d-> c -> b -> a + + ASSERT_TRUE(cache_.Remove("c")); // d -> b -> a + ASSERT_EQ("d", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + ASSERT_EQ("b", cache_.GetPrev("a")); + ASSERT_EQ("d", cache_.GetPrev("b")); + ASSERT_EQ("a", cache_.GetPrev("d")); + cache_.Put("c"); // c -> d -> b -> a + ASSERT_EQ("c", cache_.GetHead()); + ASSERT_EQ("a", cache_.GetTail()); + ASSERT_EQ("b", cache_.GetPrev("a")); + ASSERT_EQ("d", cache_.GetPrev("b")); + ASSERT_EQ("c", cache_.GetPrev("d")); + ASSERT_EQ("a", cache_.GetPrev("c")); + ASSERT_TRUE(cache_.Remove("a")); // c -> d -> b + ASSERT_EQ("b", cache_.GetTail()); + ASSERT_EQ("d", cache_.GetPrev("b")); + ASSERT_EQ("c", cache_.GetPrev("d")); + ASSERT_EQ("b", cache_.GetPrev("c")); +} + +class COVLruTest : public ::testing::Test { + protected: + COVLruTest() : mr_(mi_heap_get_backing()), cache_(kSize, &mr_) { + } + + const size_t kSize = 100; + MiMemoryResource mr_; + Lru cache_; +}; + +TEST_F(COVLruTest, MemoryUsagePrint) { + size_t before = mr_.used(); + std::array obj_arr; + for (int i = 0; i < 100; ++i) { + obj_arr[i].SetString(absl::StrCat(i)); + cache_.Put(obj_arr[i]); + } + + size_t after = mr_.used(); + LOG(INFO) << "CompactObjectView lru 100 items memory : " << absl::StrCat(after - before) + << " bytes"; +} + +} // namespace dfly diff --git a/src/core/simple_lru_counter.cc b/src/core/simple_lru_counter.cc deleted file mode 100644 index e80982781..000000000 --- a/src/core/simple_lru_counter.cc +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2023, DragonflyDB authors. All rights reserved. -// See LICENSE for licensing terms. -// - -#include "core/simple_lru_counter.h" - -#include "base/logging.h" - -namespace dfly { - -using namespace std; - -SimpleLruCounter::SimpleLruCounter(size_t capacity) : head_(0) { - CHECK_GT(capacity, 1u); - node_arr_.resize(capacity); -} - -SimpleLruCounter::~SimpleLruCounter() { -} - -optional SimpleLruCounter::Get(string_view key) const { - auto it = table_.find(key); - if (it == table_.end()) { - return nullopt; - } - const auto& node = node_arr_[it->second]; - - DCHECK_EQ(node.key, key); - - return node.count; -} - -void SimpleLruCounter::Put(string_view key, uint64_t value) { - auto [it, inserted] = table_.emplace(key, table_.size()); - - if (inserted) { - unsigned tail = node_arr_[head_].prev; // 0 if we had 1 or 0 elements. - - if (it->second < node_arr_.size()) { - auto& node = node_arr_[it->second]; - // add new head. - node.prev = tail; - node.next = head_; - node_arr_[tail].next = it->second; - node_arr_[head_].prev = it->second; - head_ = it->second; - } else { - // Cache is full, remove the tail. - size_t res = table_.erase(string_view(node_arr_[tail].key)); - DCHECK(res == 1); - - it->second = tail; - - DCHECK_EQ(table_.size(), node_arr_.size()); - } - - auto& node = node_arr_[it->second]; - node.key = it->first; // reference the key. We need it to erase the key referencing tail above. - node.count = value; - } else { // not inserted. - auto& node = node_arr_[it->second]; - node.count = value; - } - - if (it->second != head_) { // bump up to head. - BumpToHead(it->second); - } -} - -void SimpleLruCounter::BumpToHead(uint32_t index) { - DCHECK_LT(index, node_arr_.size()); - DCHECK_NE(index, head_); - - unsigned tail = node_arr_[head_].prev; - if (index == tail) { - head_ = index; // just shift the whole cycle. - return; - } - - auto& node = node_arr_[index]; - - DCHECK(node.prev != node.next); - - node_arr_[node.prev].next = node.next; - node_arr_[node.next].prev = node.prev; - node.prev = tail; - node.next = head_; - head_ = index; -} -}; // namespace dfly diff --git a/src/core/simple_lru_counter.h b/src/core/simple_lru_counter.h deleted file mode 100644 index 75b7676f6..000000000 --- a/src/core/simple_lru_counter.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2023, DragonflyDB authors. All rights reserved. -// See LICENSE for licensing terms. -// -#pragma once - -#include - -#include "base/string_view_sso.h" - -namespace dfly { - -class SimpleLruCounter { - struct Node { - base::string_view_sso key; // key to the table. - - uint32_t prev; - uint32_t next; - - uint64_t count; - - Node() : prev(0), next(0), count(0) { - } - }; - - public: - explicit SimpleLruCounter(size_t capacity); - ~SimpleLruCounter(); - - std::optional Get(std::string_view key) const; - void Put(std::string_view key, uint64_t count); - - size_t Size() const { - return table_.size(); - } - - private: - void BumpToHead(uint32_t index); - - absl::flat_hash_map table_; - std::vector node_arr_; - uint32_t head_; -}; - -}; // namespace dfly diff --git a/src/core/simple_lru_counter_test.cc b/src/core/simple_lru_counter_test.cc deleted file mode 100644 index c976c1090..000000000 --- a/src/core/simple_lru_counter_test.cc +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2023, DragonflyDB authors. All rights reserved. -// See LICENSE for licensing terms. -// - -#include "core/simple_lru_counter.h" - -#include "base/gtest.h" -#include "base/logging.h" - -using namespace std; - -namespace dfly { - -class SimpleLruTest : public ::testing::Test { - protected: - SimpleLruTest() : cache_(kSize) { - } - - const size_t kSize = 4; - SimpleLruCounter cache_; -}; - -TEST_F(SimpleLruTest, Basic) { - cache_.Put("a", 1); - cache_.Put("b", 2); - cache_.Put("c", 3); - cache_.Put("d", 4); - cache_.Put("a", 1); - - ASSERT_EQ(1, cache_.Get("a")); - ASSERT_EQ(2, cache_.Get("b")); - ASSERT_EQ(3, cache_.Get("c")); - ASSERT_EQ(4, cache_.Get("d")); - - ASSERT_EQ(nullopt, cache_.Get("e")); - cache_.Put("e", 5); - - ASSERT_EQ(nullopt, cache_.Get("b")); - ASSERT_EQ(3, cache_.Get("c")); - ASSERT_EQ(4, cache_.Get("d")); - ASSERT_EQ(5, cache_.Get("e")); - - cache_.Put("f", 6); - ASSERT_EQ(nullopt, cache_.Get("c")); - ASSERT_EQ(5, cache_.Get("e")); - ASSERT_EQ(6, cache_.Get("f")); -} - -TEST_F(SimpleLruTest, DifferentOrder) { - for (uint32_t i = 0; i < kSize * 2; ++i) { - cache_.Put(absl::StrCat(i), i); - } - - for (uint32_t i = 0; i < kSize; ++i) { - EXPECT_EQ(nullopt, cache_.Get(absl::StrCat(i))); - } - for (uint32_t i = kSize; i < kSize * 2; ++i) { - EXPECT_EQ(i, cache_.Get(absl::StrCat(i))); - } - - for (uint32_t i = kSize; i > 0; --i) { - cache_.Put(absl::StrCat(i), i); - } - cache_.Put("0", 0); - - for (uint32_t i = 0; i < kSize; ++i) { - EXPECT_EQ(i, cache_.Get(absl::StrCat(i))); - } - for (uint32_t i = kSize; i < kSize * 2; ++i) { - EXPECT_EQ(nullopt, cache_.Get(absl::StrCat(i))); - } -} - -} // namespace dfly diff --git a/src/server/db_slice.cc b/src/server/db_slice.cc index fbccb9462..39bc7eaaf 100644 --- a/src/server/db_slice.cc +++ b/src/server/db_slice.cc @@ -179,7 +179,7 @@ class PrimeEvictionPolicy { class PrimeBumpPolicy { public: - PrimeBumpPolicy(const absl::flat_hash_set& bumped_items) + PrimeBumpPolicy(const absl::flat_hash_set& bumped_items) : bumped_items_(bumped_items) { } // returns true if key can be made less important for eviction (opposite of bump up) @@ -188,7 +188,7 @@ class PrimeBumpPolicy { } private: - const absl::flat_hash_set& bumped_items_; + const absl::flat_hash_set& bumped_items_; }; bool PrimeEvictionPolicy::CanGrow(const PrimeTable& tbl) const { diff --git a/src/server/db_slice.h b/src/server/db_slice.h index 21016458e..664597708 100644 --- a/src/server/db_slice.h +++ b/src/server/db_slice.h @@ -454,7 +454,7 @@ class DbSlice { std::vector> change_cb_; // Used in temporary computations in Find item and CbFinish - mutable absl::flat_hash_set bumped_items_; + mutable absl::flat_hash_set bumped_items_; // Registered by shard indices on when first document index is created. DocDeletionCallback doc_del_cb_; diff --git a/src/server/table.h b/src/server/table.h index 00471f11f..76dd55b89 100644 --- a/src/server/table.h +++ b/src/server/table.h @@ -47,12 +47,6 @@ inline bool IsValid(ExpireConstIterator it) { return !it.is_done(); } -struct PrimeHasher { - size_t operator()(const PrimeKey& o) const { - return o.HashCode(); - } -}; - struct SlotStats { uint64_t key_count = 0; uint64_t total_reads = 0; diff --git a/src/server/tiered_storage.cc b/src/server/tiered_storage.cc index d5f9947da..1bc2a50c9 100644 --- a/src/server/tiered_storage.cc +++ b/src/server/tiered_storage.cc @@ -127,7 +127,7 @@ struct TieredStorage::PerDb { struct BinRecord { // Those that wait to be serialized. Must be less than NumEntriesInSmallBin for each bin. - absl::flat_hash_set pending_entries; + absl::flat_hash_set pending_entries; // Entries that were scheduled to write but have not completed yet. InflightMap enqueued_entries;