chore(metrics): add rdb_bgsave_in_progress and rdb_last_bgsave_status (#5061)

* add metric rdb_bgsave_in_progress
* add metric rdb_last_bgsave_status
This commit is contained in:
Kostas Kyrimis 2025-05-07 10:15:14 +03:00 committed by GitHub
parent 3f3d232211
commit 843a40dba9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 39 additions and 7 deletions

View file

@ -35,6 +35,8 @@ struct SaveStagesInputs {
Service* service_;
util::fb2::FiberQueueThreadPool* fq_threadpool_;
std::shared_ptr<SnapshotStorage> snapshot_storage_;
// true if the command that triggered this flow is bgsave. false otherwise.
bool is_bg_save_;
};
class RdbSnapshot {
@ -77,7 +79,7 @@ class RdbSnapshot {
};
struct SaveStagesController : public SaveStagesInputs {
SaveStagesController(SaveStagesInputs&& input);
explicit SaveStagesController(SaveStagesInputs&& input);
// Objects of this class are used concurrently. Call this function
// in a mutually exlusive context to avoid data races.
// Also call this function before any call to `WaitAllSnapshots`
@ -97,6 +99,10 @@ struct SaveStagesController : public SaveStagesInputs {
uint32_t GetCurrentSaveDuration();
RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;
bool IsBgSave() const {
return is_bg_save_;
}
private:
// In the new version (.dfs) we store a file for every shard and one more summary file.
// Summary file is always last in snapshots array.
@ -126,7 +132,6 @@ struct SaveStagesController : public SaveStagesInputs {
void RunStage(void (SaveStagesController::*cb)(unsigned));
private:
time_t start_time_;
std::filesystem::path full_path_;
@ -135,6 +140,7 @@ struct SaveStagesController : public SaveStagesInputs {
absl::flat_hash_map<string_view, size_t> rdb_name_map_;
util::fb2::Mutex rdb_name_map_mu_;
bool is_bg_save_ = false;
};
GenericError ValidateFilename(const std::filesystem::path& filename, bool new_version);

View file

@ -1707,7 +1707,8 @@ GenericError ServerFamily::DoSave(bool ignore_state) {
}
GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts,
Transaction* trans, bool ignore_state) {
Transaction* trans, DoSaveCheckAndStartOpts opts) {
auto [ignore_state, bg_save] = opts;
auto state = ServerState::tlocal()->gstate();
// In some cases we want to create a snapshot even if server is not active, f.e in takeover
@ -1728,7 +1729,7 @@ GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_op
save_controller_ = make_unique<SaveStagesController>(detail::SaveStagesInputs{
save_cmd_opts.new_version, save_cmd_opts.cloud_uri, save_cmd_opts.basename, trans,
&service_, fq_threadpool_.get(), snapshot_storage});
&service_, fq_threadpool_.get(), snapshot_storage, opts.bg_save});
auto res = save_controller_->InitResourcesAndStart();
@ -1736,8 +1737,13 @@ GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_op
DCHECK_EQ(res->error, true);
last_save_info_.SetLastSaveError(*res);
save_controller_.reset();
if (bg_save) {
last_save_info_.last_bgsave_status = false;
}
return res->error;
}
last_save_info_.bgsave_in_progress = bg_save;
}
return {};
}
@ -1751,6 +1757,11 @@ GenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore
util::fb2::LockGuard lk(save_mu_);
save_info = save_controller_->Finalize();
if (save_controller_->IsBgSave()) {
last_save_info_.bgsave_in_progress = false;
last_save_info_.last_bgsave_status = !save_info.error;
}
if (save_info.error) {
last_save_info_.SetLastSaveError(save_info);
} else {
@ -1767,7 +1778,8 @@ GenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore
GenericError ServerFamily::DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
bool ignore_state) {
if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, ignore_state); ec) {
DoSaveCheckAndStartOpts opts{.ignore_state = ignore_state};
if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, opts); ec) {
return ec;
}
@ -2182,7 +2194,8 @@ void ServerFamily::BgSave(CmdArgList args, const CommandContext& cmd_cntx) {
return;
}
if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx.tx); ec) {
DoSaveCheckAndStartOpts opts{.bg_save = true};
if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx.tx, opts); ec) {
cmd_cntx.rb->SendError(ec.Format());
return;
}
@ -2621,6 +2634,11 @@ string ServerFamily::FormatInfoMetrics(const Metrics& m, std::string_view sectio
}
append("rdb_changes_since_last_success_save", m.events.update);
auto save = GetLastSaveInfo();
append("rdb_bgsave_in_progress", static_cast<int>(save.bgsave_in_progress));
std::string val = save.last_bgsave_status ? "ok" : "err";
append("rdb_last_bgsave_status", val);
// when last failed save
append("last_failed_save", save_info.last_error_time);
append("last_error", save_info.last_error.Format());

View file

@ -146,6 +146,9 @@ struct LastSaveInfo {
GenericError last_error;
time_t last_error_time = 0; // epoch time in seconds.
time_t failed_duration_sec = 0; // epoch time in seconds.
// false if last attempt failed
bool last_bgsave_status = true;
bool bgsave_in_progress = false;
};
struct SnapshotSpec {
@ -340,8 +343,13 @@ class ServerFamily {
void BgSaveFb(boost::intrusive_ptr<Transaction> trans);
struct DoSaveCheckAndStartOpts {
bool ignore_state = false;
bool bg_save = false;
};
GenericError DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
bool ignore_state = false) ABSL_LOCKS_EXCLUDED(save_mu_);
DoSaveCheckAndStartOpts opts) ABSL_LOCKS_EXCLUDED(save_mu_);
GenericError WaitUntilSaveFinished(Transaction* trans,
bool ignore_state = false) ABSL_NO_THREAD_SAFETY_ANALYSIS;