mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 10:25:47 +02:00
chore(metrics): add rdb_bgsave_in_progress and rdb_last_bgsave_status (#5061)
* add metric rdb_bgsave_in_progress * add metric rdb_last_bgsave_status
This commit is contained in:
parent
3f3d232211
commit
843a40dba9
3 changed files with 39 additions and 7 deletions
|
@ -35,6 +35,8 @@ struct SaveStagesInputs {
|
||||||
Service* service_;
|
Service* service_;
|
||||||
util::fb2::FiberQueueThreadPool* fq_threadpool_;
|
util::fb2::FiberQueueThreadPool* fq_threadpool_;
|
||||||
std::shared_ptr<SnapshotStorage> snapshot_storage_;
|
std::shared_ptr<SnapshotStorage> snapshot_storage_;
|
||||||
|
// true if the command that triggered this flow is bgsave. false otherwise.
|
||||||
|
bool is_bg_save_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RdbSnapshot {
|
class RdbSnapshot {
|
||||||
|
@ -77,7 +79,7 @@ class RdbSnapshot {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SaveStagesController : public SaveStagesInputs {
|
struct SaveStagesController : public SaveStagesInputs {
|
||||||
SaveStagesController(SaveStagesInputs&& input);
|
explicit SaveStagesController(SaveStagesInputs&& input);
|
||||||
// Objects of this class are used concurrently. Call this function
|
// Objects of this class are used concurrently. Call this function
|
||||||
// in a mutually exlusive context to avoid data races.
|
// in a mutually exlusive context to avoid data races.
|
||||||
// Also call this function before any call to `WaitAllSnapshots`
|
// Also call this function before any call to `WaitAllSnapshots`
|
||||||
|
@ -97,6 +99,10 @@ struct SaveStagesController : public SaveStagesInputs {
|
||||||
uint32_t GetCurrentSaveDuration();
|
uint32_t GetCurrentSaveDuration();
|
||||||
RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;
|
RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;
|
||||||
|
|
||||||
|
bool IsBgSave() const {
|
||||||
|
return is_bg_save_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// In the new version (.dfs) we store a file for every shard and one more summary file.
|
// In the new version (.dfs) we store a file for every shard and one more summary file.
|
||||||
// Summary file is always last in snapshots array.
|
// Summary file is always last in snapshots array.
|
||||||
|
@ -126,7 +132,6 @@ struct SaveStagesController : public SaveStagesInputs {
|
||||||
|
|
||||||
void RunStage(void (SaveStagesController::*cb)(unsigned));
|
void RunStage(void (SaveStagesController::*cb)(unsigned));
|
||||||
|
|
||||||
private:
|
|
||||||
time_t start_time_;
|
time_t start_time_;
|
||||||
std::filesystem::path full_path_;
|
std::filesystem::path full_path_;
|
||||||
|
|
||||||
|
@ -135,6 +140,7 @@ struct SaveStagesController : public SaveStagesInputs {
|
||||||
|
|
||||||
absl::flat_hash_map<string_view, size_t> rdb_name_map_;
|
absl::flat_hash_map<string_view, size_t> rdb_name_map_;
|
||||||
util::fb2::Mutex rdb_name_map_mu_;
|
util::fb2::Mutex rdb_name_map_mu_;
|
||||||
|
bool is_bg_save_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
GenericError ValidateFilename(const std::filesystem::path& filename, bool new_version);
|
GenericError ValidateFilename(const std::filesystem::path& filename, bool new_version);
|
||||||
|
|
|
@ -1707,7 +1707,8 @@ GenericError ServerFamily::DoSave(bool ignore_state) {
|
||||||
}
|
}
|
||||||
|
|
||||||
GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts,
|
GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts,
|
||||||
Transaction* trans, bool ignore_state) {
|
Transaction* trans, DoSaveCheckAndStartOpts opts) {
|
||||||
|
auto [ignore_state, bg_save] = opts;
|
||||||
auto state = ServerState::tlocal()->gstate();
|
auto state = ServerState::tlocal()->gstate();
|
||||||
|
|
||||||
// In some cases we want to create a snapshot even if server is not active, f.e in takeover
|
// In some cases we want to create a snapshot even if server is not active, f.e in takeover
|
||||||
|
@ -1728,7 +1729,7 @@ GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_op
|
||||||
|
|
||||||
save_controller_ = make_unique<SaveStagesController>(detail::SaveStagesInputs{
|
save_controller_ = make_unique<SaveStagesController>(detail::SaveStagesInputs{
|
||||||
save_cmd_opts.new_version, save_cmd_opts.cloud_uri, save_cmd_opts.basename, trans,
|
save_cmd_opts.new_version, save_cmd_opts.cloud_uri, save_cmd_opts.basename, trans,
|
||||||
&service_, fq_threadpool_.get(), snapshot_storage});
|
&service_, fq_threadpool_.get(), snapshot_storage, opts.bg_save});
|
||||||
|
|
||||||
auto res = save_controller_->InitResourcesAndStart();
|
auto res = save_controller_->InitResourcesAndStart();
|
||||||
|
|
||||||
|
@ -1736,8 +1737,13 @@ GenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_op
|
||||||
DCHECK_EQ(res->error, true);
|
DCHECK_EQ(res->error, true);
|
||||||
last_save_info_.SetLastSaveError(*res);
|
last_save_info_.SetLastSaveError(*res);
|
||||||
save_controller_.reset();
|
save_controller_.reset();
|
||||||
|
if (bg_save) {
|
||||||
|
last_save_info_.last_bgsave_status = false;
|
||||||
|
}
|
||||||
return res->error;
|
return res->error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
last_save_info_.bgsave_in_progress = bg_save;
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -1751,6 +1757,11 @@ GenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore
|
||||||
util::fb2::LockGuard lk(save_mu_);
|
util::fb2::LockGuard lk(save_mu_);
|
||||||
save_info = save_controller_->Finalize();
|
save_info = save_controller_->Finalize();
|
||||||
|
|
||||||
|
if (save_controller_->IsBgSave()) {
|
||||||
|
last_save_info_.bgsave_in_progress = false;
|
||||||
|
last_save_info_.last_bgsave_status = !save_info.error;
|
||||||
|
}
|
||||||
|
|
||||||
if (save_info.error) {
|
if (save_info.error) {
|
||||||
last_save_info_.SetLastSaveError(save_info);
|
last_save_info_.SetLastSaveError(save_info);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1767,7 +1778,8 @@ GenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore
|
||||||
|
|
||||||
GenericError ServerFamily::DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
|
GenericError ServerFamily::DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
|
||||||
bool ignore_state) {
|
bool ignore_state) {
|
||||||
if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, ignore_state); ec) {
|
DoSaveCheckAndStartOpts opts{.ignore_state = ignore_state};
|
||||||
|
if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, opts); ec) {
|
||||||
return ec;
|
return ec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2182,7 +2194,8 @@ void ServerFamily::BgSave(CmdArgList args, const CommandContext& cmd_cntx) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx.tx); ec) {
|
DoSaveCheckAndStartOpts opts{.bg_save = true};
|
||||||
|
if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx.tx, opts); ec) {
|
||||||
cmd_cntx.rb->SendError(ec.Format());
|
cmd_cntx.rb->SendError(ec.Format());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2621,6 +2634,11 @@ string ServerFamily::FormatInfoMetrics(const Metrics& m, std::string_view sectio
|
||||||
}
|
}
|
||||||
append("rdb_changes_since_last_success_save", m.events.update);
|
append("rdb_changes_since_last_success_save", m.events.update);
|
||||||
|
|
||||||
|
auto save = GetLastSaveInfo();
|
||||||
|
append("rdb_bgsave_in_progress", static_cast<int>(save.bgsave_in_progress));
|
||||||
|
std::string val = save.last_bgsave_status ? "ok" : "err";
|
||||||
|
append("rdb_last_bgsave_status", val);
|
||||||
|
|
||||||
// when last failed save
|
// when last failed save
|
||||||
append("last_failed_save", save_info.last_error_time);
|
append("last_failed_save", save_info.last_error_time);
|
||||||
append("last_error", save_info.last_error.Format());
|
append("last_error", save_info.last_error.Format());
|
||||||
|
|
|
@ -146,6 +146,9 @@ struct LastSaveInfo {
|
||||||
GenericError last_error;
|
GenericError last_error;
|
||||||
time_t last_error_time = 0; // epoch time in seconds.
|
time_t last_error_time = 0; // epoch time in seconds.
|
||||||
time_t failed_duration_sec = 0; // epoch time in seconds.
|
time_t failed_duration_sec = 0; // epoch time in seconds.
|
||||||
|
// false if last attempt failed
|
||||||
|
bool last_bgsave_status = true;
|
||||||
|
bool bgsave_in_progress = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SnapshotSpec {
|
struct SnapshotSpec {
|
||||||
|
@ -340,8 +343,13 @@ class ServerFamily {
|
||||||
|
|
||||||
void BgSaveFb(boost::intrusive_ptr<Transaction> trans);
|
void BgSaveFb(boost::intrusive_ptr<Transaction> trans);
|
||||||
|
|
||||||
|
struct DoSaveCheckAndStartOpts {
|
||||||
|
bool ignore_state = false;
|
||||||
|
bool bg_save = false;
|
||||||
|
};
|
||||||
|
|
||||||
GenericError DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
|
GenericError DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, Transaction* trans,
|
||||||
bool ignore_state = false) ABSL_LOCKS_EXCLUDED(save_mu_);
|
DoSaveCheckAndStartOpts opts) ABSL_LOCKS_EXCLUDED(save_mu_);
|
||||||
|
|
||||||
GenericError WaitUntilSaveFinished(Transaction* trans,
|
GenericError WaitUntilSaveFinished(Transaction* trans,
|
||||||
bool ignore_state = false) ABSL_NO_THREAD_SAFETY_ANALYSIS;
|
bool ignore_state = false) ABSL_NO_THREAD_SAFETY_ANALYSIS;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue