mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 10:25:47 +02:00
DastTable::Traverse is error prone when the callback passed preempts because the segment might change. This is problematic and we need atomicity while traversing segments with preemption. The fix is to add Traverse in DbSlice and protect the traversal via ThreadLocalMutex. * add ConditionFlag to DbSlice * add Traverse in DbSlice and protect it with the ConditionFlag * remove condition flag from snapshot * remove condition flag from streamer --------- Signed-off-by: kostas <kostas@dragonflydb.io>
476 lines
12 KiB
C++
476 lines
12 KiB
C++
// Copyright 2022, DragonflyDB authors. All rights reserved.
|
|
// See LICENSE for licensing terms.
|
|
//
|
|
|
|
#include "server/common.h"
|
|
|
|
#include <absl/strings/match.h>
|
|
#include <absl/strings/str_cat.h>
|
|
#include <fast_float/fast_float.h>
|
|
|
|
#include <system_error>
|
|
|
|
extern "C" {
|
|
#include "redis/rdb.h"
|
|
#include "redis/util.h"
|
|
}
|
|
|
|
#include "base/flags.h"
|
|
#include "base/logging.h"
|
|
#include "core/compact_object.h"
|
|
#include "server/engine_shard_set.h"
|
|
#include "server/error.h"
|
|
#include "server/journal/journal.h"
|
|
#include "server/server_state.h"
|
|
#include "server/transaction.h"
|
|
#include "strings/human_readable.h"
|
|
|
|
// We've generalized "hashtags" so that users can specify custom delimiter and closures, see below.
|
|
// If I had a time machine, I'd rename this to lock_on_tags.
|
|
ABSL_FLAG(bool, lock_on_hashtags, false,
|
|
"When true, locks are done in the {hashtag} level instead of key level. Hashtag "
|
|
"extraction can be further configured with locktag_* flags.");
|
|
|
|
// We would have used `char` instead of `string`, but that's impossible.
|
|
ABSL_FLAG(
|
|
std::string, locktag_delimiter, "",
|
|
"If set, this char is used to extract a lock tag by looking at delimiters, like hash tags. If "
|
|
"unset, regular hashtag extraction is done (with {}). Must be used with --lock_on_hashtags");
|
|
|
|
ABSL_FLAG(unsigned, locktag_skip_n_end_delimiters, 0,
|
|
"How many closing tag delimiters should we skip when extracting lock tags. 0 for no "
|
|
"skipping. For example, when delimiter is ':' and this flag is 2, the locktag for "
|
|
"':a:b:c:d:e' will be 'a:b:c'.");
|
|
|
|
ABSL_FLAG(std::string, locktag_prefix, "",
|
|
"Only keys with this prefix participate in tag extraction.");
|
|
|
|
namespace dfly {
|
|
|
|
using namespace std;
|
|
using namespace util;
|
|
|
|
namespace {
|
|
|
|
// Thread-local cache with static linkage.
|
|
thread_local std::optional<LockTagOptions> locktag_lock_options;
|
|
|
|
} // namespace
|
|
|
|
void TEST_InvalidateLockTagOptions() {
|
|
locktag_lock_options = nullopt; // For test main thread
|
|
CHECK(shard_set != nullptr);
|
|
shard_set->pool()->AwaitBrief(
|
|
[](ShardId shard, ProactorBase* proactor) { locktag_lock_options = nullopt; });
|
|
}
|
|
|
|
const LockTagOptions& LockTagOptions::instance() {
|
|
if (!locktag_lock_options.has_value()) {
|
|
string delimiter = absl::GetFlag(FLAGS_locktag_delimiter);
|
|
if (delimiter.empty()) {
|
|
delimiter = "{}";
|
|
} else if (delimiter.size() == 1) {
|
|
delimiter += delimiter; // Copy delimiter (e.g. "::") so that it's easier to use below
|
|
} else {
|
|
LOG(ERROR) << "Invalid value for locktag_delimiter - must be a single char";
|
|
exit(-1);
|
|
}
|
|
|
|
locktag_lock_options = {
|
|
.enabled = absl::GetFlag(FLAGS_lock_on_hashtags),
|
|
.open_locktag = delimiter[0],
|
|
.close_locktag = delimiter[1],
|
|
.skip_n_end_delimiters = absl::GetFlag(FLAGS_locktag_skip_n_end_delimiters),
|
|
.prefix = absl::GetFlag(FLAGS_locktag_prefix),
|
|
};
|
|
}
|
|
|
|
return *locktag_lock_options;
|
|
}
|
|
|
|
std::string_view LockTagOptions::Tag(std::string_view key) const {
|
|
if (!absl::StartsWith(key, prefix)) {
|
|
return key;
|
|
}
|
|
|
|
const size_t start = key.find(open_locktag);
|
|
if (start == key.npos) {
|
|
return key;
|
|
}
|
|
|
|
size_t end = start;
|
|
for (unsigned i = 0; i <= skip_n_end_delimiters; ++i) {
|
|
size_t next = end + 1;
|
|
end = key.find(close_locktag, next);
|
|
if (end == key.npos || end == next) {
|
|
return key;
|
|
}
|
|
}
|
|
|
|
return key.substr(start + 1, end - start - 1);
|
|
}
|
|
|
|
atomic_uint64_t used_mem_peak(0);
|
|
atomic_uint64_t used_mem_current(0);
|
|
atomic_uint64_t rss_mem_current(0);
|
|
atomic_uint64_t rss_mem_peak(0);
|
|
|
|
unsigned kernel_version = 0;
|
|
size_t max_memory_limit = 0;
|
|
|
|
const char* GlobalStateName(GlobalState s) {
|
|
switch (s) {
|
|
case GlobalState::ACTIVE:
|
|
return "ACTIVE";
|
|
case GlobalState::LOADING:
|
|
return "LOADING";
|
|
case GlobalState::SHUTTING_DOWN:
|
|
return "SHUTTING DOWN";
|
|
case GlobalState::TAKEN_OVER:
|
|
return "TAKEN OVER";
|
|
}
|
|
ABSL_UNREACHABLE();
|
|
}
|
|
|
|
const char* RdbTypeName(unsigned type) {
|
|
switch (type) {
|
|
case RDB_TYPE_STRING:
|
|
return "string";
|
|
case RDB_TYPE_LIST:
|
|
return "list";
|
|
case RDB_TYPE_SET:
|
|
return "set";
|
|
case RDB_TYPE_ZSET:
|
|
return "zset";
|
|
case RDB_TYPE_HASH:
|
|
return "hash";
|
|
case RDB_TYPE_STREAM_LISTPACKS:
|
|
return "stream";
|
|
}
|
|
return "other";
|
|
}
|
|
|
|
bool ParseHumanReadableBytes(std::string_view str, int64_t* num_bytes) {
|
|
if (str.empty())
|
|
return false;
|
|
|
|
const char* cstr = str.data();
|
|
bool neg = (*cstr == '-');
|
|
if (neg) {
|
|
cstr++;
|
|
}
|
|
char* end;
|
|
double d = strtod(cstr, &end);
|
|
|
|
if (end == cstr) // did not succeed to advance
|
|
return false;
|
|
|
|
int64 scale = 1;
|
|
switch (*end) {
|
|
// Considers just the first character after the number
|
|
// so it matches: 1G, 1GB, 1GiB and 1Gigabytes
|
|
// NB: an int64 can only go up to <8 EB.
|
|
case 'E':
|
|
case 'e':
|
|
scale <<= 10; // Fall through...
|
|
ABSL_FALLTHROUGH_INTENDED;
|
|
case 'P':
|
|
case 'p':
|
|
scale <<= 10;
|
|
ABSL_FALLTHROUGH_INTENDED;
|
|
case 'T':
|
|
case 't':
|
|
scale <<= 10;
|
|
ABSL_FALLTHROUGH_INTENDED;
|
|
case 'G':
|
|
case 'g':
|
|
scale <<= 10;
|
|
ABSL_FALLTHROUGH_INTENDED;
|
|
case 'M':
|
|
case 'm':
|
|
scale <<= 10;
|
|
ABSL_FALLTHROUGH_INTENDED;
|
|
case 'K':
|
|
case 'k':
|
|
scale <<= 10;
|
|
ABSL_FALLTHROUGH_INTENDED;
|
|
case 'B':
|
|
case 'b':
|
|
case '\0':
|
|
break; // To here.
|
|
default:
|
|
return false;
|
|
}
|
|
d *= scale;
|
|
if (int64_t(d) > kint64max || d < 0)
|
|
return false;
|
|
|
|
*num_bytes = static_cast<int64>(d + 0.5);
|
|
if (neg) {
|
|
*num_bytes = -*num_bytes;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool ParseDouble(string_view src, double* value) {
|
|
if (src.empty())
|
|
return false;
|
|
|
|
if (absl::EqualsIgnoreCase(src, "-inf")) {
|
|
*value = -HUGE_VAL;
|
|
} else if (absl::EqualsIgnoreCase(src, "+inf")) {
|
|
*value = HUGE_VAL;
|
|
} else {
|
|
fast_float::from_chars_result result = fast_float::from_chars(src.data(), src.end(), *value);
|
|
// nan double could be sent as "nan" with any case.
|
|
if (int(result.ec) != 0 || result.ptr != src.end() || isnan(*value))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#define ADD(x) (x) += o.x
|
|
|
|
TieredStats& TieredStats::operator+=(const TieredStats& o) {
|
|
static_assert(sizeof(TieredStats) == 128);
|
|
|
|
ADD(total_stashes);
|
|
ADD(total_fetches);
|
|
ADD(total_cancels);
|
|
ADD(total_deletes);
|
|
ADD(total_defrags);
|
|
ADD(total_uploads);
|
|
ADD(total_heap_buf_allocs);
|
|
ADD(total_registered_buf_allocs);
|
|
|
|
ADD(allocated_bytes);
|
|
ADD(capacity_bytes);
|
|
|
|
ADD(pending_read_cnt);
|
|
ADD(pending_stash_cnt);
|
|
|
|
ADD(small_bins_cnt);
|
|
ADD(small_bins_entries_cnt);
|
|
ADD(small_bins_filling_bytes);
|
|
ADD(total_stash_overflows);
|
|
ADD(cold_storage_bytes);
|
|
|
|
return *this;
|
|
}
|
|
|
|
SearchStats& SearchStats::operator+=(const SearchStats& o) {
|
|
static_assert(sizeof(SearchStats) == 24);
|
|
ADD(used_memory);
|
|
ADD(num_entries);
|
|
|
|
DCHECK(num_indices == 0 || num_indices == o.num_indices);
|
|
num_indices = std::max(num_indices, o.num_indices);
|
|
return *this;
|
|
}
|
|
|
|
#undef ADD
|
|
|
|
OpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args) {
|
|
ScanOpts scan_opts;
|
|
|
|
for (unsigned i = 0; i < args.size(); i += 2) {
|
|
ToUpper(&args[i]);
|
|
string_view opt = ArgS(args, i);
|
|
if (i + 1 == args.size()) {
|
|
return facade::OpStatus::SYNTAX_ERR;
|
|
}
|
|
|
|
if (opt == "COUNT") {
|
|
if (!absl::SimpleAtoi(ArgS(args, i + 1), &scan_opts.limit)) {
|
|
return facade::OpStatus::INVALID_INT;
|
|
}
|
|
if (scan_opts.limit == 0)
|
|
scan_opts.limit = 1;
|
|
else if (scan_opts.limit > 4096)
|
|
scan_opts.limit = 4096;
|
|
} else if (opt == "MATCH") {
|
|
scan_opts.pattern = ArgS(args, i + 1);
|
|
if (scan_opts.pattern == "*")
|
|
scan_opts.pattern = string_view{};
|
|
} else if (opt == "TYPE") {
|
|
auto obj_type = ObjTypeFromString(ArgS(args, i + 1));
|
|
if (!obj_type) {
|
|
return facade::OpStatus::SYNTAX_ERR;
|
|
}
|
|
scan_opts.type_filter = obj_type;
|
|
} else if (opt == "BUCKET") {
|
|
if (!absl::SimpleAtoi(ArgS(args, i + 1), &scan_opts.bucket_id)) {
|
|
return facade::OpStatus::INVALID_INT;
|
|
}
|
|
} else {
|
|
return facade::OpStatus::SYNTAX_ERR;
|
|
}
|
|
}
|
|
return scan_opts;
|
|
}
|
|
|
|
bool ScanOpts::Matches(std::string_view val_name) const {
|
|
if (pattern.empty())
|
|
return true;
|
|
return stringmatchlen(pattern.data(), pattern.size(), val_name.data(), val_name.size(), 0) == 1;
|
|
}
|
|
|
|
GenericError::operator std::error_code() const {
|
|
return ec_;
|
|
}
|
|
|
|
GenericError::operator bool() const {
|
|
return bool(ec_) || !details_.empty();
|
|
}
|
|
|
|
std::string GenericError::Format() const {
|
|
if (!ec_ && details_.empty())
|
|
return "";
|
|
|
|
if (details_.empty())
|
|
return ec_.message();
|
|
else if (!ec_)
|
|
return details_;
|
|
else
|
|
return absl::StrCat(ec_.message(), ": ", details_);
|
|
}
|
|
|
|
Context::~Context() {
|
|
DCHECK(!err_handler_fb_.IsJoinable());
|
|
err_handler_fb_.JoinIfNeeded();
|
|
}
|
|
|
|
GenericError Context::GetError() const {
|
|
std::lock_guard lk(err_mu_);
|
|
return err_;
|
|
}
|
|
|
|
const Cancellation* Context::GetCancellation() const {
|
|
return this;
|
|
}
|
|
|
|
void Context::Cancel() {
|
|
ReportError(std::make_error_code(errc::operation_canceled), "Context cancelled");
|
|
}
|
|
|
|
void Context::Reset(ErrHandler handler) {
|
|
fb2::Fiber fb;
|
|
|
|
unique_lock lk{err_mu_};
|
|
err_ = {};
|
|
err_handler_ = std::move(handler);
|
|
Cancellation::flag_.store(false, std::memory_order_relaxed);
|
|
fb.swap(err_handler_fb_);
|
|
lk.unlock();
|
|
fb.JoinIfNeeded();
|
|
}
|
|
|
|
GenericError Context::SwitchErrorHandler(ErrHandler handler) {
|
|
std::lock_guard lk{err_mu_};
|
|
if (!err_) {
|
|
// No need to check for the error handler - it can't be running
|
|
// if no error is set.
|
|
err_handler_ = std::move(handler);
|
|
}
|
|
return err_;
|
|
}
|
|
|
|
void Context::JoinErrorHandler() {
|
|
fb2::Fiber fb;
|
|
unique_lock lk{err_mu_};
|
|
fb.swap(err_handler_fb_);
|
|
lk.unlock();
|
|
fb.JoinIfNeeded();
|
|
}
|
|
|
|
GenericError Context::ReportErrorInternal(GenericError&& err) {
|
|
lock_guard lk{err_mu_};
|
|
if (err_)
|
|
return err_;
|
|
|
|
err_ = std::move(err);
|
|
|
|
// This context is either new or was Reset, where the handler was joined
|
|
CHECK(!err_handler_fb_.IsJoinable());
|
|
|
|
DVLOG(1) << "ReportError: " << err_.Format();
|
|
|
|
// We can move err_handler_ because it should run at most once.
|
|
if (err_handler_)
|
|
err_handler_fb_ = fb2::Fiber("report_internal_error", std::move(err_handler_), err_);
|
|
Cancellation::Cancel();
|
|
return err_;
|
|
}
|
|
|
|
bool AbslParseFlag(std::string_view in, dfly::MemoryBytesFlag* flag, std::string* err) {
|
|
int64_t val;
|
|
if (dfly::ParseHumanReadableBytes(in, &val) && val >= 0) {
|
|
flag->value = val;
|
|
return true;
|
|
}
|
|
|
|
*err = "Use human-readable format, eg.: 500MB, 1G, 1TB";
|
|
return false;
|
|
}
|
|
|
|
std::string AbslUnparseFlag(const dfly::MemoryBytesFlag& flag) {
|
|
return strings::HumanReadableNumBytes(flag.value);
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& os, const GlobalState& state) {
|
|
return os << GlobalStateName(state);
|
|
}
|
|
|
|
NonUniquePicksGenerator::NonUniquePicksGenerator(RandomPick max_range) : max_range_(max_range) {
|
|
CHECK_GT(max_range, RandomPick(0));
|
|
}
|
|
|
|
RandomPick NonUniquePicksGenerator::Generate() {
|
|
return absl::Uniform(bitgen_, 0u, max_range_);
|
|
}
|
|
|
|
UniquePicksGenerator::UniquePicksGenerator(std::uint32_t picks_count, RandomPick max_range)
|
|
: remaining_picks_count_(picks_count), picked_indexes_(picks_count) {
|
|
CHECK_GE(max_range, picks_count);
|
|
current_random_limit_ = max_range - picks_count;
|
|
}
|
|
|
|
RandomPick UniquePicksGenerator::Generate() {
|
|
DCHECK_GT(remaining_picks_count_, 0u);
|
|
|
|
remaining_picks_count_--;
|
|
|
|
const RandomPick max_index = current_random_limit_++;
|
|
const RandomPick random_index = absl::Uniform(bitgen_, 0u, max_index + 1u);
|
|
|
|
const bool random_index_is_picked = picked_indexes_.emplace(random_index).second;
|
|
if (random_index_is_picked) {
|
|
return random_index;
|
|
}
|
|
|
|
picked_indexes_.insert(max_index);
|
|
return max_index;
|
|
}
|
|
|
|
ThreadLocalMutex::ThreadLocalMutex() {
|
|
shard_ = EngineShard::tlocal();
|
|
}
|
|
|
|
ThreadLocalMutex::~ThreadLocalMutex() {
|
|
DCHECK_EQ(EngineShard::tlocal(), shard_);
|
|
}
|
|
|
|
void ThreadLocalMutex::lock() {
|
|
DCHECK_EQ(EngineShard::tlocal(), shard_);
|
|
util::fb2::NoOpLock noop_lk_;
|
|
cond_var_.wait(noop_lk_, [this]() { return !flag_; });
|
|
flag_ = true;
|
|
}
|
|
|
|
void ThreadLocalMutex::unlock() {
|
|
DCHECK_EQ(EngineShard::tlocal(), shard_);
|
|
flag_ = false;
|
|
cond_var_.notify_one();
|
|
}
|
|
|
|
} // namespace dfly
|