chore(metrics): add rdb_bgsave_in_progress and rdb_last_bgsave_status (#5061)

* add metric rdb_bgsave_in_progress
* add metric rdb_last_bgsave_status
This commit is contained in:
Kostas Kyrimis 2025-05-07 10:15:14 +03:00 committed by GitHub
parent 3f3d232211
commit 843a40dba9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 39 additions and 7 deletions

View file

@ -35,6 +35,8 @@ struct SaveStagesInputs {
Service* service_; Service* service_;
util::fb2::FiberQueueThreadPool* fq_threadpool_; util::fb2::FiberQueueThreadPool* fq_threadpool_;
std::shared_ptr<SnapshotStorage> snapshot_storage_; std::shared_ptr<SnapshotStorage> snapshot_storage_;
// true if the command that triggered this flow is bgsave. false otherwise.
bool is_bg_save_;
}; };
class RdbSnapshot { class RdbSnapshot {
@ -77,7 +79,7 @@ class RdbSnapshot {
}; };
struct SaveStagesController : public SaveStagesInputs { struct SaveStagesController : public SaveStagesInputs {
SaveStagesController(SaveStagesInputs&& input); explicit SaveStagesController(SaveStagesInputs&& input);
// Objects of this class are used concurrently. Call this function // Objects of this class are used concurrently. Call this function
// in a mutually exlusive context to avoid data races. // in a mutually exlusive context to avoid data races.
// Also call this function before any call to `WaitAllSnapshots` // Also call this function before any call to `WaitAllSnapshots`
@ -97,6 +99,10 @@ struct SaveStagesController : public SaveStagesInputs {
uint32_t GetCurrentSaveDuration(); uint32_t GetCurrentSaveDuration();
RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const; RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;
bool IsBgSave() const {
return is_bg_save_;
}
private: private:
// In the new version (.dfs) we store a file for every shard and one more summary file. // In the new version (.dfs) we store a file for every shard and one more summary file.
// Summary file is always last in snapshots array. // Summary file is always last in snapshots array.
@ -126,7 +132,6 @@ struct SaveStagesController : public SaveStagesInputs {
void RunStage(void (SaveStagesController::*cb)(unsigned)); void RunStage(void (SaveStagesController::*cb)(unsigned));
private:
time_t start_time_; time_t start_time_;
std::filesystem::path full_path_; std::filesystem::path full_path_;
@ -135,6 +140,7 @@ struct SaveStagesController : public SaveStagesInputs {
absl::flat_hash_map<string_view, size_t> rdb_name_map_; absl::flat_hash_map<string_view, size_t> rdb_name_map_;
util::fb2::Mutex rdb_name_map_mu_; util::fb2::Mutex rdb_name_map_mu_;
bool is_bg_save_ = false;
}; };
GenericError ValidateFilename(const std::filesystem::path& filename, bool new_version); GenericError ValidateFilename(const std::filesystem::path& filename, bool new_version);

View file

@ -1707,7 +1707,8 @@ GenericError ServerFamily::DoSave(bool ignore_state) {
} }
GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts,
Transaction* trans, bool ignore_state) { Transaction* trans, DoSaveCheckAndStartOpts opts) {
auto [ignore_state, bg_save] = opts;
auto state = ServerState::tlocal()->gstate(); auto state = ServerState::tlocal()->gstate();
// In some cases we want to create a snapshot even if server is not active, f.e in takeover // In some cases we want to create a snapshot even if server is not active, f.e in takeover
@ -1728,7 +1729,7 @@ GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_op
save_controller_ = make_unique<SaveStagesController>(detail::SaveStagesInputs{ save_controller_ = make_unique<SaveStagesController>(detail::SaveStagesInputs{
save_cmd_opts.new_version, save_cmd_opts.cloud_uri, save_cmd_opts.basename, trans, save_cmd_opts.new_version, save_cmd_opts.cloud_uri, save_cmd_opts.basename, trans,
&service_, fq_threadpool_.get(), snapshot_storage}); &service_, fq_threadpool_.get(), snapshot_storage, opts.bg_save});
auto res = save_controller_->InitResourcesAndStart(); auto res = save_controller_->InitResourcesAndStart();
@ -1736,8 +1737,13 @@ GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_op
DCHECK_EQ(res->error, true); DCHECK_EQ(res->error, true);
last_save_info_.SetLastSaveError(*res); last_save_info_.SetLastSaveError(*res);
save_controller_.reset(); save_controller_.reset();
if (bg_save) {
last_save_info_.last_bgsave_status = false;
}
return res->error; return res->error;
} }
last_save_info_.bgsave_in_progress = bg_save;
} }
return {}; return {};
} }
@ -1751,6 +1757,11 @@ GenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore
util::fb2::LockGuard lk(save_mu_); util::fb2::LockGuard lk(save_mu_);
save_info = save_controller_->Finalize(); save_info = save_controller_->Finalize();
if (save_controller_->IsBgSave()) {
last_save_info_.bgsave_in_progress = false;
last_save_info_.last_bgsave_status = !save_info.error;
}
if (save_info.error) { if (save_info.error) {
last_save_info_.SetLastSaveError(save_info); last_save_info_.SetLastSaveError(save_info);
} else { } else {
@ -1767,7 +1778,8 @@ GenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore
GenericError ServerFamily::DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* trans, GenericError ServerFamily::DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
bool ignore_state) { bool ignore_state) {
if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, ignore_state); ec) { DoSaveCheckAndStartOpts opts{.ignore_state = ignore_state};
if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, opts); ec) {
return ec; return ec;
} }
@ -2182,7 +2194,8 @@ void ServerFamily::BgSave(CmdArgList args, const CommandContext& cmd_cntx) {
return; return;
} }
if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx.tx); ec) { DoSaveCheckAndStartOpts opts{.bg_save = true};
if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx.tx, opts); ec) {
cmd_cntx.rb->SendError(ec.Format()); cmd_cntx.rb->SendError(ec.Format());
return; return;
} }
@ -2621,6 +2634,11 @@ string ServerFamily::FormatInfoMetrics(const Metrics& m, std::string_view sectio
} }
append("rdb_changes_since_last_success_save", m.events.update); append("rdb_changes_since_last_success_save", m.events.update);
auto save = GetLastSaveInfo();
append("rdb_bgsave_in_progress", static_cast<int>(save.bgsave_in_progress));
std::string val = save.last_bgsave_status ? "ok" : "err";
append("rdb_last_bgsave_status", val);
// when last failed save // when last failed save
append("last_failed_save", save_info.last_error_time); append("last_failed_save", save_info.last_error_time);
append("last_error", save_info.last_error.Format()); append("last_error", save_info.last_error.Format());

View file

@ -146,6 +146,9 @@ struct LastSaveInfo {
GenericError last_error; GenericError last_error;
time_t last_error_time = 0; // epoch time in seconds. time_t last_error_time = 0; // epoch time in seconds.
time_t failed_duration_sec = 0; // epoch time in seconds. time_t failed_duration_sec = 0; // epoch time in seconds.
// false if last attempt failed
bool last_bgsave_status = true;
bool bgsave_in_progress = false;
}; };
struct SnapshotSpec { struct SnapshotSpec {
@ -340,8 +343,13 @@ class ServerFamily {
void BgSaveFb(boost::intrusive_ptr<Transaction> trans); void BgSaveFb(boost::intrusive_ptr<Transaction> trans);
struct DoSaveCheckAndStartOpts {
bool ignore_state = false;
bool bg_save = false;
};
GenericError DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, Transaction* trans, GenericError DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
bool ignore_state = false) ABSL_LOCKS_EXCLUDED(save_mu_); DoSaveCheckAndStartOpts opts) ABSL_LOCKS_EXCLUDED(save_mu_);
GenericError WaitUntilSaveFinished(Transaction* trans, GenericError WaitUntilSaveFinished(Transaction* trans,
bool ignore_state = false) ABSL_NO_THREAD_SAFETY_ANALYSIS; bool ignore_state = false) ABSL_NO_THREAD_SAFETY_ANALYSIS;