feat(lru): add generic lru class (#2351)

Signed-off-by: adi_holden <adi@dragonflydb.io>
This commit is contained in:
adiholden 2024-01-07 21:51:46 +02:00 committed by GitHub
parent a1d85b7cb2
commit 014a86fc88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 388 additions and 220 deletions

View file

@ -3,7 +3,7 @@ set(SEARCH_LIB query_parser)
add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc
external_alloc.cc interpreter.cc json_object.cc mi_memory_resource.cc sds_utils.cc
segment_allocator.cc simple_lru_counter.cc score_map.cc small_string.cc sorted_map.cc
segment_allocator.cc score_map.cc small_string.cc sorted_map.cc
tx_queue.cc dense_set.cc
string_set.cc string_map.cc detail/bitpacking.cc)
@ -20,7 +20,7 @@ cxx_test(external_alloc_test dfly_core LABELS DFLY)
cxx_test(dash_test dfly_core file DATA testdata/ids.txt LABELS DFLY)
cxx_test(interpreter_test dfly_core LABELS DFLY)
cxx_test(json_test dfly_core TRDP::jsoncons LABELS DFLY)
cxx_test(simple_lru_counter_test dfly_core LABELS DFLY)
cxx_test(lru_test dfly_core LABELS DFLY)
cxx_test(string_set_test dfly_core LABELS DFLY)
cxx_test(string_map_test dfly_core LABELS DFLY)
cxx_test(sorted_map_test dfly_core LABELS DFLY)

View file

@ -449,3 +449,12 @@ class CompactObjectView {
};
} // namespace dfly
namespace std {
template <> struct hash<dfly::CompactObjectView> {
std::size_t operator()(const dfly::CompactObjectView& obj) const {
return obj.Hash();
}
};
} // namespace std

200
src/core/lru.h Normal file
View file

@ -0,0 +1,200 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <absl/container/node_hash_map.h>
#include <optional>
#include "base/logging.h"
#include "base/pmr/memory_resource.h"
namespace dfly {
enum class Position {
kHead,
kTail,
};
template <typename T> class Lru {
struct Node {
const T* data_ptr;
uint32_t prev;
uint32_t next;
Node() : prev(0), next(0) {
}
};
public:
explicit Lru(uint32_t capacity, PMR_NS::memory_resource* mr) : table_(mr), node_arr_(mr) {
CHECK_GT(capacity, 1u);
node_arr_.reserve(capacity);
}
// Get prev item. In case item is head return tail.
std::optional<T> GetPrev(const T& data) const;
std::optional<T> GetTail() const;
std::optional<T> GetHead() const;
void Put(const T& data, Position position = Position::kHead);
bool Remove(const T& data);
size_t Size() const {
DCHECK_EQ(table_.size(), node_arr_.size());
return table_.size();
}
private:
void MoveToPosition(uint32_t index, Position position);
using AllocatorType = PMR_NS::polymorphic_allocator<std::pair<T, uint32_t>>;
absl::node_hash_map<T, uint32_t, absl::Hash<T>, std::equal_to<>, AllocatorType>
table_; // map from item to index in node arr
std::vector<Node, PMR_NS::polymorphic_allocator<Node>> node_arr_;
uint32_t head_ = 0;
};
template <typename T> std::optional<T> Lru<T>::GetPrev(const T& data) const {
auto it = table_.find(data);
if (it == table_.end()) {
return std::nullopt;
}
DCHECK_GT(node_arr_.size(), it->second);
const auto& node = node_arr_[it->second];
DCHECK_EQ(node.data_ptr, &it->first);
const auto& node_prev = node_arr_[node.prev];
return *node_prev.data_ptr;
}
template <typename T> std::optional<T> Lru<T>::GetTail() const {
if (table_.size() == 0) {
return std::nullopt;
}
unsigned tail = node_arr_[head_].prev;
return *node_arr_[tail].data_ptr;
}
template <typename T> std::optional<T> Lru<T>::GetHead() const {
if (table_.size() == 0) {
return std::nullopt;
}
return *node_arr_[head_].data_ptr;
}
template <typename T> void Lru<T>::Put(const T& data, Position position) {
DCHECK_EQ(table_.size(), node_arr_.size());
auto [it, inserted] = table_.emplace(data, table_.size());
if (inserted) {
unsigned tail = 0;
if (node_arr_.size() > 0) {
tail = node_arr_[head_].prev;
}
Node node;
// add new item between head and tail.
node.prev = tail;
node.next = head_;
node_arr_[tail].next = it->second;
node_arr_[head_].prev = it->second;
node.data_ptr = &(it->first);
node_arr_.push_back(node);
if (position == Position::kHead) {
head_ = it->second;
}
} else { // not inserted.
MoveToPosition(it->second, position);
}
}
template <typename T> bool Lru<T>::Remove(const T& data) {
auto it = table_.find(data);
if (it == table_.end()) {
return false;
}
uint32_t remove_index = it->second;
auto& node = node_arr_[remove_index];
// remove from list
node_arr_[node.prev].next = node.next;
node_arr_[node.next].prev = node.prev;
// remove item from table.
if (remove_index == head_) {
head_ = node.next;
}
table_.erase(it);
if (table_.size() == remove_index) {
node_arr_.pop_back();
DCHECK_EQ(table_.size(), node_arr_.size());
return true; // if the removed item was the last in the node array nothing else to do.
}
// move last item from node array to the removed index
uint32_t move_index = table_.size();
auto& node_to_move = node_arr_[move_index];
it = table_.find(*node_to_move.data_ptr);
CHECK(it != table_.end());
it->second = remove_index;
// now update the next and prev to point to it
node_arr_[node_to_move.prev].next = remove_index;
node_arr_[node_to_move.next].prev = remove_index;
// move the data from the node to the removed node.
node = node_to_move;
node_arr_.pop_back();
if (head_ == move_index) {
head_ = remove_index;
}
DCHECK_EQ(table_.size(), node_arr_.size());
return true;
}
template <typename T> void Lru<T>::MoveToPosition(uint32_t index, Position position) {
DCHECK_LT(index, node_arr_.size());
uint32_t tail = node_arr_[head_].prev;
uint32_t curr_node_index = position == Position::kHead ? head_ : tail;
if (index == curr_node_index) { // the index is already head/tail. nothing to change.
return;
}
auto& node = node_arr_[index];
CHECK_NE(node.prev, node.next);
if (position == Position::kHead && index == tail) {
head_ = index; // just shift the cycle.
return;
}
if (position == Position::kTail && index == head_) {
head_ = node.next; // just shift the cycle.
return;
}
// remove from list
node_arr_[node.prev].next = node.next;
node_arr_[node.next].prev = node.prev;
// update node next and prev
node.prev = tail;
node.next = head_;
// update tail to point to new head
node_arr_[tail].next = index;
// update last head to point to new head
node_arr_[head_].prev = index;
if (position == Position::kHead) {
head_ = index;
}
}
}; // namespace dfly

173
src/core/lru_test.cc Normal file
View file

@ -0,0 +1,173 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/lru.h"
#include "base/gtest.h"
#include "base/logging.h"
#include "core/compact_object.h"
#include "core/mi_memory_resource.h"
using namespace std;
namespace dfly {
class StringLruTest : public ::testing::Test {
protected:
StringLruTest() : mr_(mi_heap_get_backing()), cache_(kSize, &mr_) {
}
const size_t kSize = 4;
MiMemoryResource mr_;
Lru<std::string> cache_;
};
TEST_F(StringLruTest, PutAndGet) {
cache_.Put("a");
ASSERT_EQ("a", cache_.GetHead());
ASSERT_EQ("a", cache_.GetPrev("a"));
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("a");
ASSERT_EQ("a", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("b");
ASSERT_EQ("b", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("c");
ASSERT_EQ("c", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("d");
ASSERT_EQ("d", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("a");
ASSERT_EQ("a", cache_.GetHead());
ASSERT_EQ("b", cache_.GetTail());
cache_.Put("e");
ASSERT_EQ("e", cache_.GetHead());
ASSERT_EQ("b", cache_.GetTail());
cache_.Put("f");
ASSERT_EQ("f", cache_.GetHead());
ASSERT_EQ("b", cache_.GetTail());
}
TEST_F(StringLruTest, PutAndPutTail) {
cache_.Put("a");
cache_.Put("a"); // a
cache_.Put("b"); // b -> a
cache_.Put("c"); // c -> b -> a
cache_.Put("d"); // d-> c -> b -> a
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("a"); // a -> d -> c -> b
ASSERT_EQ("b", cache_.GetTail());
ASSERT_EQ("c", cache_.GetPrev("b"));
ASSERT_EQ("d", cache_.GetPrev("c"));
ASSERT_EQ("b", cache_.GetPrev("a"));
cache_.Put("d", Position::kTail); // a -> c -> b -> d
ASSERT_EQ("d", cache_.GetTail());
ASSERT_EQ("b", cache_.GetPrev("d"));
ASSERT_EQ("c", cache_.GetPrev("b"));
ASSERT_EQ("a", cache_.GetPrev("c"));
ASSERT_EQ("d", cache_.GetPrev("a"));
cache_.Put("a"); // a -> c -> b -> d
ASSERT_EQ("d", cache_.GetTail());
cache_.Put("e", Position::kTail); // a -> c -> b -> d -> e
ASSERT_EQ("e", cache_.GetTail());
ASSERT_EQ("d", cache_.GetPrev("e"));
ASSERT_EQ("b", cache_.GetPrev("d"));
ASSERT_EQ("a", cache_.GetPrev("c"));
ASSERT_EQ("e", cache_.GetPrev("a"));
cache_.Put("e", Position::kTail); // a -> c -> b -> d -> e
ASSERT_EQ("e", cache_.GetTail());
ASSERT_EQ("d", cache_.GetPrev("e"));
ASSERT_EQ("b", cache_.GetPrev("d"));
ASSERT_EQ("a", cache_.GetPrev("c"));
ASSERT_EQ("e", cache_.GetPrev("a"));
}
TEST_F(StringLruTest, BumpTest) {
cache_.Put("a");
cache_.Put("b");
cache_.Put("c");
cache_.Put("d");
ASSERT_EQ("a", cache_.GetTail());
cache_.Put("c");
ASSERT_EQ("a", cache_.GetTail());
ASSERT_EQ("d", cache_.GetPrev("b"));
ASSERT_EQ("c", cache_.GetPrev("d"));
}
TEST_F(StringLruTest, DifferentOrder) {
for (uint32_t i = 0; i < kSize * 2; ++i) {
cache_.Put(absl::StrCat(i));
}
ASSERT_EQ("0", cache_.GetTail());
for (uint32_t i = kSize; i > 0; --i) {
cache_.Put(absl::StrCat(i));
}
ASSERT_EQ("0", cache_.GetTail());
cache_.Put("0");
ASSERT_EQ("5", cache_.GetTail());
}
TEST_F(StringLruTest, Delete) {
cache_.Put("a"); // a
cache_.Put("b"); // b -> a
cache_.Put("c"); // c -> b -> a
cache_.Put("d"); // d-> c -> b -> a
cache_.Put("e"); // e -> d-> c -> b -> a
ASSERT_EQ("e", cache_.GetHead());
ASSERT_TRUE(cache_.Remove("e")); // d-> c -> b -> a
ASSERT_EQ("d", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
ASSERT_EQ("b", cache_.GetPrev("a"));
ASSERT_EQ("c", cache_.GetPrev("b"));
ASSERT_EQ("d", cache_.GetPrev("c"));
ASSERT_EQ("a", cache_.GetPrev("d"));
ASSERT_FALSE(cache_.Remove("e")); // d-> c -> b -> a
ASSERT_TRUE(cache_.Remove("c")); // d -> b -> a
ASSERT_EQ("d", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
ASSERT_EQ("b", cache_.GetPrev("a"));
ASSERT_EQ("d", cache_.GetPrev("b"));
ASSERT_EQ("a", cache_.GetPrev("d"));
cache_.Put("c"); // c -> d -> b -> a
ASSERT_EQ("c", cache_.GetHead());
ASSERT_EQ("a", cache_.GetTail());
ASSERT_EQ("b", cache_.GetPrev("a"));
ASSERT_EQ("d", cache_.GetPrev("b"));
ASSERT_EQ("c", cache_.GetPrev("d"));
ASSERT_EQ("a", cache_.GetPrev("c"));
ASSERT_TRUE(cache_.Remove("a")); // c -> d -> b
ASSERT_EQ("b", cache_.GetTail());
ASSERT_EQ("d", cache_.GetPrev("b"));
ASSERT_EQ("c", cache_.GetPrev("d"));
ASSERT_EQ("b", cache_.GetPrev("c"));
}
class COVLruTest : public ::testing::Test {
protected:
COVLruTest() : mr_(mi_heap_get_backing()), cache_(kSize, &mr_) {
}
const size_t kSize = 100;
MiMemoryResource mr_;
Lru<CompactObjectView> cache_;
};
TEST_F(COVLruTest, MemoryUsagePrint) {
size_t before = mr_.used();
std::array<CompactObj, 100> obj_arr;
for (int i = 0; i < 100; ++i) {
obj_arr[i].SetString(absl::StrCat(i));
cache_.Put(obj_arr[i]);
}
size_t after = mr_.used();
LOG(INFO) << "CompactObjectView lru 100 items memory : " << absl::StrCat(after - before)
<< " bytes";
}
} // namespace dfly

View file

@ -1,90 +0,0 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/simple_lru_counter.h"
#include "base/logging.h"
namespace dfly {
using namespace std;
SimpleLruCounter::SimpleLruCounter(size_t capacity) : head_(0) {
CHECK_GT(capacity, 1u);
node_arr_.resize(capacity);
}
SimpleLruCounter::~SimpleLruCounter() {
}
optional<uint64_t> SimpleLruCounter::Get(string_view key) const {
auto it = table_.find(key);
if (it == table_.end()) {
return nullopt;
}
const auto& node = node_arr_[it->second];
DCHECK_EQ(node.key, key);
return node.count;
}
void SimpleLruCounter::Put(string_view key, uint64_t value) {
auto [it, inserted] = table_.emplace(key, table_.size());
if (inserted) {
unsigned tail = node_arr_[head_].prev; // 0 if we had 1 or 0 elements.
if (it->second < node_arr_.size()) {
auto& node = node_arr_[it->second];
// add new head.
node.prev = tail;
node.next = head_;
node_arr_[tail].next = it->second;
node_arr_[head_].prev = it->second;
head_ = it->second;
} else {
// Cache is full, remove the tail.
size_t res = table_.erase(string_view(node_arr_[tail].key));
DCHECK(res == 1);
it->second = tail;
DCHECK_EQ(table_.size(), node_arr_.size());
}
auto& node = node_arr_[it->second];
node.key = it->first; // reference the key. We need it to erase the key referencing tail above.
node.count = value;
} else { // not inserted.
auto& node = node_arr_[it->second];
node.count = value;
}
if (it->second != head_) { // bump up to head.
BumpToHead(it->second);
}
}
void SimpleLruCounter::BumpToHead(uint32_t index) {
DCHECK_LT(index, node_arr_.size());
DCHECK_NE(index, head_);
unsigned tail = node_arr_[head_].prev;
if (index == tail) {
head_ = index; // just shift the whole cycle.
return;
}
auto& node = node_arr_[index];
DCHECK(node.prev != node.next);
node_arr_[node.prev].next = node.next;
node_arr_[node.next].prev = node.prev;
node.prev = tail;
node.next = head_;
head_ = index;
}
}; // namespace dfly

View file

@ -1,44 +0,0 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <absl/container/flat_hash_map.h>
#include "base/string_view_sso.h"
namespace dfly {
class SimpleLruCounter {
struct Node {
base::string_view_sso key; // key to the table.
uint32_t prev;
uint32_t next;
uint64_t count;
Node() : prev(0), next(0), count(0) {
}
};
public:
explicit SimpleLruCounter(size_t capacity);
~SimpleLruCounter();
std::optional<uint64_t> Get(std::string_view key) const;
void Put(std::string_view key, uint64_t count);
size_t Size() const {
return table_.size();
}
private:
void BumpToHead(uint32_t index);
absl::flat_hash_map<std::string, uint32_t> table_;
std::vector<Node> node_arr_;
uint32_t head_;
};
}; // namespace dfly

View file

@ -1,74 +0,0 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/simple_lru_counter.h"
#include "base/gtest.h"
#include "base/logging.h"
using namespace std;
namespace dfly {
class SimpleLruTest : public ::testing::Test {
protected:
SimpleLruTest() : cache_(kSize) {
}
const size_t kSize = 4;
SimpleLruCounter cache_;
};
TEST_F(SimpleLruTest, Basic) {
cache_.Put("a", 1);
cache_.Put("b", 2);
cache_.Put("c", 3);
cache_.Put("d", 4);
cache_.Put("a", 1);
ASSERT_EQ(1, cache_.Get("a"));
ASSERT_EQ(2, cache_.Get("b"));
ASSERT_EQ(3, cache_.Get("c"));
ASSERT_EQ(4, cache_.Get("d"));
ASSERT_EQ(nullopt, cache_.Get("e"));
cache_.Put("e", 5);
ASSERT_EQ(nullopt, cache_.Get("b"));
ASSERT_EQ(3, cache_.Get("c"));
ASSERT_EQ(4, cache_.Get("d"));
ASSERT_EQ(5, cache_.Get("e"));
cache_.Put("f", 6);
ASSERT_EQ(nullopt, cache_.Get("c"));
ASSERT_EQ(5, cache_.Get("e"));
ASSERT_EQ(6, cache_.Get("f"));
}
TEST_F(SimpleLruTest, DifferentOrder) {
for (uint32_t i = 0; i < kSize * 2; ++i) {
cache_.Put(absl::StrCat(i), i);
}
for (uint32_t i = 0; i < kSize; ++i) {
EXPECT_EQ(nullopt, cache_.Get(absl::StrCat(i)));
}
for (uint32_t i = kSize; i < kSize * 2; ++i) {
EXPECT_EQ(i, cache_.Get(absl::StrCat(i)));
}
for (uint32_t i = kSize; i > 0; --i) {
cache_.Put(absl::StrCat(i), i);
}
cache_.Put("0", 0);
for (uint32_t i = 0; i < kSize; ++i) {
EXPECT_EQ(i, cache_.Get(absl::StrCat(i)));
}
for (uint32_t i = kSize; i < kSize * 2; ++i) {
EXPECT_EQ(nullopt, cache_.Get(absl::StrCat(i)));
}
}
} // namespace dfly

View file

@ -179,7 +179,7 @@ class PrimeEvictionPolicy {
class PrimeBumpPolicy {
public:
PrimeBumpPolicy(const absl::flat_hash_set<CompactObjectView, PrimeHasher>& bumped_items)
PrimeBumpPolicy(const absl::flat_hash_set<CompactObjectView>& bumped_items)
: bumped_items_(bumped_items) {
}
// returns true if key can be made less important for eviction (opposite of bump up)
@ -188,7 +188,7 @@ class PrimeBumpPolicy {
}
private:
const absl::flat_hash_set<CompactObjectView, PrimeHasher>& bumped_items_;
const absl::flat_hash_set<CompactObjectView>& bumped_items_;
};
bool PrimeEvictionPolicy::CanGrow(const PrimeTable& tbl) const {

View file

@ -454,7 +454,7 @@ class DbSlice {
std::vector<std::pair<uint64_t, ChangeCallback>> change_cb_;
// Used in temporary computations in Find item and CbFinish
mutable absl::flat_hash_set<CompactObjectView, PrimeHasher> bumped_items_;
mutable absl::flat_hash_set<CompactObjectView> bumped_items_;
// Registered by shard indices on when first document index is created.
DocDeletionCallback doc_del_cb_;

View file

@ -47,12 +47,6 @@ inline bool IsValid(ExpireConstIterator it) {
return !it.is_done();
}
struct PrimeHasher {
size_t operator()(const PrimeKey& o) const {
return o.HashCode();
}
};
struct SlotStats {
uint64_t key_count = 0;
uint64_t total_reads = 0;

View file

@ -127,7 +127,7 @@ struct TieredStorage::PerDb {
struct BinRecord {
// Those that wait to be serialized. Must be less than NumEntriesInSmallBin for each bin.
absl::flat_hash_set<CompactObjectView, PrimeHasher> pending_entries;
absl::flat_hash_set<CompactObjectView> pending_entries;
// Entries that were scheduled to write but have not completed yet.
InflightMap enqueued_entries;