feat(tiering): add background offload step (#2504)

* feat(tiering): add background offload step

Signed-off-by: adi_holden <adi@dragonflydb.io
This commit is contained in:
adiholden 2024-02-14 14:28:41 +02:00 committed by GitHub
parent b18fe8c0a8
commit 32e8d49123
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 261 additions and 56 deletions

View file

@ -18,6 +18,7 @@ extern "C" {
#include "server/journal/journal.h"
#include "server/server_state.h"
#include "server/tiered_storage.h"
#include "strings/human_readable.h"
ABSL_FLAG(bool, enable_heartbeat_eviction, true,
"Enable eviction during heartbeat when memory is under pressure.");
@ -229,7 +230,7 @@ DbStats& DbStats::operator+=(const DbStats& o) {
}
SliceEvents& SliceEvents::operator+=(const SliceEvents& o) {
static_assert(sizeof(SliceEvents) == 96, "You should update this function with new fields");
static_assert(sizeof(SliceEvents) == 112, "You should update this function with new fields");
ADD(evicted_keys);
ADD(hard_evictions);
@ -243,6 +244,8 @@ SliceEvents& SliceEvents::operator+=(const SliceEvents& o) {
ADD(mutations);
ADD(insertion_rejections);
ADD(update);
ADD(ram_hits);
ADD(ram_misses);
return *this;
}
@ -472,16 +475,21 @@ OpResult<DbSlice::ItAndExp> DbSlice::FindInternal(const Context& cntx, std::stri
if (TieredStorage* tiered = shard_owner()->tiered_storage();
tiered && load_mode == LoadExternalMode::kLoad) {
if (res.it->second.HasIoPending()) {
tiered->CancelIo(cntx.db_index, res.it);
} else if (res.it->second.IsExternal()) {
if (res.it->second.IsExternal()) {
// Load reads data from disk therefore we will preempt in this function.
// We will update the iterator if it changed during the preemption
res.it = tiered->Load(cntx.db_index, res.it, key);
if (!IsValid(res.it)) {
return OpStatus::KEY_NOTFOUND;
}
events_.ram_misses++;
} else {
if (res.it->second.HasIoPending()) {
tiered->CancelIo(cntx.db_index, res.it);
}
events_.ram_hits++;
}
res.it->first.SetTouched(true);
}
FiberAtomicGuard fg;
@ -975,7 +983,7 @@ bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) {
void DbSlice::ReleaseNormalized(IntentLock::Mode mode, DbIndex db_index, std::string_view key) {
DCHECK_EQ(key, KeyLockArgs::GetLockKey(key));
DVLOG(1) << "Release " << IntentLock::ModeName(mode) << " "
DVLOG(2) << "Release " << IntentLock::ModeName(mode) << " "
<< " for " << key;
auto& lt = db_arr_[db_index]->trans_locks;
@ -1198,6 +1206,37 @@ int32_t DbSlice::GetNextSegmentForEviction(int32_t segment_id, DbIndex db_ind) c
db_arr_[db_ind]->prime.GetSegmentCount();
}
void DbSlice::ScheduleForOffloadStep(DbIndex db_indx, size_t increase_goal_bytes) {
VLOG(1) << "ScheduleForOffloadStep increase_goal_bytes:"
<< strings::HumanReadableNumBytes(increase_goal_bytes);
DCHECK(shard_owner()->tiered_storage());
FiberAtomicGuard guard;
PrimeTable& pt = db_arr_[db_indx]->prime;
static PrimeTable::Cursor cursor;
size_t offloaded_bytes = 0;
auto cb = [&](PrimeIterator it) {
// TBD check we did not lock it for future transaction
// If the item is cold (not touched) and can be externalized, schedule it for offload.
if (increase_goal_bytes > offloaded_bytes && !(it->first.WasTouched()) &&
TieredStorage::CanExternalizeEntry(it)) {
shard_owner()->tiered_storage()->ScheduleOffload(db_indx, it);
if (it->second.HasIoPending()) {
offloaded_bytes += it->second.Size();
VLOG(2) << "ScheduleOffload bytes:" << offloaded_bytes;
}
}
it->first.SetTouched(false);
};
// Traverse a single segment every time this function is called.
for (int i = 0; i < 60; ++i) {
cursor = pt.TraverseBySegmentOrder(cursor, cb);
}
}
void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes) {
DCHECK(!owner_->IsReplica());
if ((!caching_mode_) || !expire_allowed_ || !GetFlag(FLAGS_enable_heartbeat_eviction))