mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 10:25:47 +02:00
chore: add oom stats to /metrics (#2680)
* chore: add oom stats to /metrics Expose oom/cmd errors when we reject executing a command if we reached OOM state (controlled by oom_deny_ratio flag). Expose oom/insert errors when we do not insert a new key or do not grow a dashtable (controlled by table_growth_margin). Move OOM command check to a place that covers all types of transactions - including multi and squashing transactions. --------- Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
7c443f3a15
commit
0c1150956b
4 changed files with 25 additions and 12 deletions
|
@ -968,7 +968,19 @@ static optional<ErrorReply> VerifyConnectionAclStatus(const CommandId* cid,
|
||||||
optional<ErrorReply> Service::VerifyCommandExecution(const CommandId* cid,
|
optional<ErrorReply> Service::VerifyCommandExecution(const CommandId* cid,
|
||||||
const ConnectionContext* cntx,
|
const ConnectionContext* cntx,
|
||||||
CmdArgList tail_args) {
|
CmdArgList tail_args) {
|
||||||
// TODO: Move OOM check here
|
ServerState& etl = *ServerState::tlocal();
|
||||||
|
|
||||||
|
if ((cid->opt_mask() & CO::DENYOOM) && etl.is_master) {
|
||||||
|
uint64_t start_ns = absl::GetCurrentTimeNanos();
|
||||||
|
|
||||||
|
uint64_t used_memory = etl.GetUsedMemory(start_ns);
|
||||||
|
double oom_deny_ratio = GetFlag(FLAGS_oom_deny_ratio);
|
||||||
|
if (used_memory > (max_memory_limit * oom_deny_ratio)) {
|
||||||
|
etl.stats.oom_error_cmd_cnt++;
|
||||||
|
return facade::ErrorReply{kOutOfMemory};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return VerifyConnectionAclStatus(cid, cntx, "ACL rules changed between the MULTI and EXEC",
|
return VerifyConnectionAclStatus(cid, cntx, "ACL rules changed between the MULTI and EXEC",
|
||||||
tail_args);
|
tail_args);
|
||||||
}
|
}
|
||||||
|
@ -1136,16 +1148,6 @@ void Service::DispatchCommand(CmdArgList args, facade::ConnectionContext* cntx)
|
||||||
return cntx->SendSimpleString("QUEUED");
|
return cntx->SendSimpleString("QUEUED");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cid->opt_mask() & CO::DENYOOM && etl.is_master) {
|
|
||||||
uint64_t start_ns = absl::GetCurrentTimeNanos();
|
|
||||||
|
|
||||||
uint64_t used_memory = etl.GetUsedMemory(start_ns);
|
|
||||||
double oom_deny_ratio = GetFlag(FLAGS_oom_deny_ratio);
|
|
||||||
if (used_memory > (max_memory_limit * oom_deny_ratio)) {
|
|
||||||
return cntx->reply_builder()->SendError(kOutOfMemory);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create command transaction
|
// Create command transaction
|
||||||
intrusive_ptr<Transaction> dist_trans;
|
intrusive_ptr<Transaction> dist_trans;
|
||||||
|
|
||||||
|
|
|
@ -1025,6 +1025,13 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
||||||
&resp->body());
|
&resp->body());
|
||||||
AppendMetricWithoutLabels("memory_max_bytes", "", max_memory_limit, MetricType::GAUGE,
|
AppendMetricWithoutLabels("memory_max_bytes", "", max_memory_limit, MetricType::GAUGE,
|
||||||
&resp->body());
|
&resp->body());
|
||||||
|
|
||||||
|
if (m.events.insertion_rejections | m.coordinator_stats.oom_error_cmd_cnt) {
|
||||||
|
AppendMetricValue("oom_errors_total", m.events.insertion_rejections, {"type"}, {"insert"},
|
||||||
|
&resp->body());
|
||||||
|
AppendMetricValue("oom_errors_total", m.coordinator_stats.oom_error_cmd_cnt, {"type"}, {"cmd"},
|
||||||
|
&resp->body());
|
||||||
|
}
|
||||||
if (sdata_res.has_value()) {
|
if (sdata_res.has_value()) {
|
||||||
size_t rss = sdata_res->vm_rss + sdata_res->hugetlb_pages;
|
size_t rss = sdata_res->vm_rss + sdata_res->hugetlb_pages;
|
||||||
AppendMetricWithoutLabels("used_memory_rss_bytes", "", rss, MetricType::GAUGE, &resp->body());
|
AppendMetricWithoutLabels("used_memory_rss_bytes", "", rss, MetricType::GAUGE, &resp->body());
|
||||||
|
|
|
@ -28,7 +28,7 @@ ServerState::Stats::Stats(unsigned num_shards) : tx_width_freq_arr(num_shards) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) {
|
ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) {
|
||||||
static_assert(sizeof(Stats) == 14 * 8, "Stats size mismatch");
|
static_assert(sizeof(Stats) == 15 * 8, "Stats size mismatch");
|
||||||
|
|
||||||
for (int i = 0; i < NUM_TX_TYPES; ++i) {
|
for (int i = 0; i < NUM_TX_TYPES; ++i) {
|
||||||
this->tx_type_cnt[i] += other.tx_type_cnt[i];
|
this->tx_type_cnt[i] += other.tx_type_cnt[i];
|
||||||
|
@ -44,6 +44,7 @@ ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) {
|
||||||
this->multi_squash_exec_reply_usec += other.multi_squash_exec_reply_usec;
|
this->multi_squash_exec_reply_usec += other.multi_squash_exec_reply_usec;
|
||||||
|
|
||||||
this->blocked_on_interpreter += other.blocked_on_interpreter;
|
this->blocked_on_interpreter += other.blocked_on_interpreter;
|
||||||
|
this->oom_error_cmd_cnt += other.oom_error_cmd_cnt;
|
||||||
|
|
||||||
if (this->tx_width_freq_arr.size() > 0) {
|
if (this->tx_width_freq_arr.size() > 0) {
|
||||||
DCHECK_EQ(this->tx_width_freq_arr.size(), other.tx_width_freq_arr.size());
|
DCHECK_EQ(this->tx_width_freq_arr.size(), other.tx_width_freq_arr.size());
|
||||||
|
|
|
@ -118,6 +118,9 @@ class ServerState { // public struct - to allow initialization.
|
||||||
|
|
||||||
uint64_t blocked_on_interpreter = 0;
|
uint64_t blocked_on_interpreter = 0;
|
||||||
|
|
||||||
|
// Number of times we rejected command dispatch due to OOM condition.
|
||||||
|
uint64_t oom_error_cmd_cnt = 0;
|
||||||
|
|
||||||
std::valarray<uint64_t> tx_width_freq_arr;
|
std::valarray<uint64_t> tx_width_freq_arr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue