mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 10:25:47 +02:00
feat(info): add new persistence section fields (#2396)
* feat(info): add new persistence section fields implement #2386 added fields: 1) last_failed_save 2) last_error 3) last_failed_save_duration_sec 4) saving 5) current_save_duration_sec
This commit is contained in:
parent
8d09478474
commit
7b61268533
8 changed files with 68 additions and 40 deletions
|
@ -694,7 +694,7 @@ TEST_F(ClusterFamilyTest, ClusterFirstConfigCallDropsEntriesNotOwnedByNode) {
|
|||
EXPECT_EQ(Run({"save", "df"}), "OK");
|
||||
|
||||
auto save_info = service_->server_family().GetLastSaveInfo();
|
||||
EXPECT_EQ(Run({"debug", "load", save_info->file_name}), "OK");
|
||||
EXPECT_EQ(Run({"debug", "load", save_info.file_name}), "OK");
|
||||
EXPECT_EQ(CheckedInt({"dbsize"}), 50000);
|
||||
|
||||
EXPECT_EQ(RunPrivileged({"dflycluster", "config", R"json(
|
||||
|
|
|
@ -350,7 +350,7 @@ void DebugCmd::Reload(CmdArgList args) {
|
|||
}
|
||||
}
|
||||
|
||||
string last_save_file = sf_.GetLastSaveInfo()->file_name;
|
||||
string last_save_file = sf_.GetLastSaveInfo().file_name;
|
||||
Load(last_save_file);
|
||||
}
|
||||
|
||||
|
|
|
@ -178,7 +178,6 @@ GenericError SaveStagesController::Save() {
|
|||
|
||||
FinalizeFileMovement();
|
||||
|
||||
if (!shared_err_)
|
||||
UpdateSaveInfo();
|
||||
|
||||
return *shared_err_;
|
||||
|
@ -266,26 +265,32 @@ void SaveStagesController::SaveRdb() {
|
|||
}
|
||||
|
||||
void SaveStagesController::UpdateSaveInfo() {
|
||||
auto seconds = (absl::Now() - start_time_) / absl::Seconds(1);
|
||||
if (shared_err_) {
|
||||
lock_guard lk{*save_mu_};
|
||||
last_save_info_->last_error = *shared_err_;
|
||||
last_save_info_->last_error_time = absl::ToUnixSeconds(start_time_);
|
||||
last_save_info_->failed_duration_sec = seconds;
|
||||
return;
|
||||
}
|
||||
|
||||
fs::path resulting_path = full_path_;
|
||||
if (use_dfs_format_)
|
||||
SetExtension("summary", ".dfs", &resulting_path);
|
||||
else
|
||||
resulting_path.replace_extension(); // remove .tmp
|
||||
|
||||
double seconds = double(absl::ToInt64Milliseconds(absl::Now() - start_time_)) / 1000;
|
||||
LOG(INFO) << "Saving " << resulting_path << " finished after "
|
||||
<< strings::HumanReadableElapsedTime(seconds);
|
||||
|
||||
auto save_info = make_shared<LastSaveInfo>();
|
||||
for (const auto& k_v : rdb_name_map_) {
|
||||
save_info->freq_map.emplace_back(k_v);
|
||||
}
|
||||
save_info->save_time = absl::ToUnixSeconds(start_time_);
|
||||
save_info->file_name = resulting_path.generic_string();
|
||||
save_info->duration_sec = uint32_t(seconds);
|
||||
|
||||
lock_guard lk{*save_mu_};
|
||||
last_save_info_->swap(save_info); // swap - to deallocate the old version outstide of the lock.
|
||||
last_save_info_->freq_map.clear();
|
||||
for (const auto& k_v : rdb_name_map_) {
|
||||
last_save_info_->freq_map.emplace_back(k_v);
|
||||
}
|
||||
last_save_info_->save_time = absl::ToUnixSeconds(start_time_);
|
||||
last_save_info_->file_name = resulting_path.generic_string();
|
||||
last_save_info_->success_duration_sec = seconds;
|
||||
}
|
||||
|
||||
GenericError SaveStagesController::InitResources() {
|
||||
|
|
|
@ -26,7 +26,7 @@ struct SaveStagesInputs {
|
|||
Service* service_;
|
||||
std::atomic_bool* is_saving_;
|
||||
util::fb2::FiberQueueThreadPool* fq_threadpool_;
|
||||
std::shared_ptr<LastSaveInfo>* last_save_info_;
|
||||
LastSaveInfo* last_save_info_ ABSL_GUARDED_BY(save_mu_);
|
||||
util::fb2::Mutex* save_mu_;
|
||||
std::function<size_t()>* save_bytes_cb_;
|
||||
std::shared_ptr<SnapshotStorage> snapshot_storage_;
|
||||
|
|
|
@ -636,7 +636,7 @@ TEST_F(GenericFamilyTest, Info) {
|
|||
InitWithDbFilename(); // Needed for `save`
|
||||
|
||||
auto get_rdb_changes_since_last_save = [](const string& str) -> size_t {
|
||||
const string matcher = "rdb_changes_since_last_save:";
|
||||
const string matcher = "rdb_changes_since_last_success_save:";
|
||||
const auto pos = str.find(matcher) + matcher.size();
|
||||
const auto sub = str.substr(pos, 1);
|
||||
return atoi(sub.c_str());
|
||||
|
|
|
@ -166,7 +166,7 @@ TEST_F(RdbTest, ComressionModeSaveDragonflyAndReload) {
|
|||
ASSERT_EQ(resp, "OK");
|
||||
|
||||
auto save_info = service_->server_family().GetLastSaveInfo();
|
||||
resp = Run({"debug", "load", save_info->file_name});
|
||||
resp = Run({"debug", "load", save_info.file_name});
|
||||
ASSERT_EQ(resp, "OK");
|
||||
ASSERT_EQ(50000, CheckedInt({"dbsize"}));
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ TEST_F(RdbTest, RdbLoaderOnReadCompressedDataShouldNotEnterEnsureReadFlow) {
|
|||
ASSERT_EQ(resp, "OK");
|
||||
|
||||
auto save_info = service_->server_family().GetLastSaveInfo();
|
||||
resp = Run({"debug", "load", save_info->file_name});
|
||||
resp = Run({"debug", "load", save_info.file_name});
|
||||
ASSERT_EQ(resp, "OK");
|
||||
}
|
||||
|
||||
|
@ -323,8 +323,8 @@ TEST_F(RdbTest, SaveFlush) {
|
|||
Run({"flushdb"});
|
||||
save_fb.Join();
|
||||
auto save_info = service_->server_family().GetLastSaveInfo();
|
||||
ASSERT_EQ(1, save_info->freq_map.size());
|
||||
auto& k_v = save_info->freq_map.front();
|
||||
ASSERT_EQ(1, save_info.freq_map.size());
|
||||
auto& k_v = save_info.freq_map.front();
|
||||
EXPECT_EQ("string", k_v.first);
|
||||
EXPECT_EQ(500000, k_v.second);
|
||||
}
|
||||
|
@ -360,8 +360,8 @@ TEST_F(RdbTest, SaveManyDbs) {
|
|||
save_fb.Join();
|
||||
|
||||
auto save_info = service_->server_family().GetLastSaveInfo();
|
||||
ASSERT_EQ(1, save_info->freq_map.size());
|
||||
auto& k_v = save_info->freq_map.front();
|
||||
ASSERT_EQ(1, save_info.freq_map.size());
|
||||
auto& k_v = save_info.freq_map.front();
|
||||
|
||||
EXPECT_EQ("string", k_v.first);
|
||||
EXPECT_EQ(60000, k_v.second);
|
||||
|
|
|
@ -521,8 +521,7 @@ std::string_view GetOSString() {
|
|||
|
||||
ServerFamily::ServerFamily(Service* service) : service_(*service) {
|
||||
start_time_ = time(NULL);
|
||||
last_save_info_ = make_shared<LastSaveInfo>();
|
||||
last_save_info_->save_time = start_time_;
|
||||
last_save_info_.save_time = start_time_;
|
||||
script_mgr_.reset(new ScriptMgr());
|
||||
journal_.reset(new journal::Journal());
|
||||
|
||||
|
@ -1227,10 +1226,18 @@ GenericError ServerFamily::DoSave(bool new_version, string_view basename, Transa
|
|||
StrCat(GlobalStateName(new_state), " - can not save database")};
|
||||
}
|
||||
}
|
||||
{
|
||||
std::lock_guard lck(save_mu_);
|
||||
start_save_time_ = absl::Now();
|
||||
}
|
||||
SaveStagesController sc{detail::SaveStagesInputs{
|
||||
new_version, basename, trans, &service_, &is_saving_, fq_threadpool_.get(), &last_save_info_,
|
||||
&save_mu_, &save_bytes_cb_, snapshot_storage_}};
|
||||
auto res = sc.Save();
|
||||
{
|
||||
std::lock_guard lck(save_mu_);
|
||||
start_save_time_.reset();
|
||||
}
|
||||
if (!ignore_state)
|
||||
service_.SwitchState(GlobalState::SAVING, GlobalState::ACTIVE);
|
||||
return res;
|
||||
|
@ -1251,7 +1258,7 @@ error_code ServerFamily::Drakarys(Transaction* transaction, DbIndex db_ind) {
|
|||
return error_code{};
|
||||
}
|
||||
|
||||
shared_ptr<const LastSaveInfo> ServerFamily::GetLastSaveInfo() const {
|
||||
LastSaveInfo ServerFamily::GetLastSaveInfo() const {
|
||||
lock_guard lk(save_mu_);
|
||||
return last_save_info_;
|
||||
}
|
||||
|
@ -1782,22 +1789,30 @@ void ServerFamily::Info(CmdArgList args, ConnectionContext* cntx) {
|
|||
}
|
||||
|
||||
if (should_enter("PERSISTENCE", true)) {
|
||||
decltype(last_save_info_) save_info;
|
||||
{
|
||||
lock_guard lk(save_mu_);
|
||||
save_info = last_save_info_;
|
||||
}
|
||||
// when when last save
|
||||
append("last_save", save_info->save_time);
|
||||
append("last_save_duration_sec", save_info->duration_sec);
|
||||
append("last_save_file", save_info->file_name);
|
||||
auto save_info = GetLastSaveInfo();
|
||||
|
||||
// when last success save
|
||||
append("last_success_save", save_info.save_time);
|
||||
append("last_saved_file", save_info.file_name);
|
||||
append("last_success_save_duration_sec", save_info.success_duration_sec);
|
||||
|
||||
size_t is_loading = service_.GetGlobalState() == GlobalState::LOADING;
|
||||
append("loading", is_loading);
|
||||
|
||||
for (const auto& k_v : save_info->freq_map) {
|
||||
auto curent_durration_sec =
|
||||
start_save_time_ ? (absl::Now() - *start_save_time_) / absl::Seconds(1) : 0;
|
||||
append("saving", curent_durration_sec != 0);
|
||||
append("current_save_duration_sec", curent_durration_sec);
|
||||
|
||||
for (const auto& k_v : save_info.freq_map) {
|
||||
append(StrCat("rdb_", k_v.first), k_v.second);
|
||||
}
|
||||
append("rdb_changes_since_last_save", m.events.update);
|
||||
append("rdb_changes_since_last_success_save", m.events.update);
|
||||
|
||||
// when last failed save
|
||||
append("last_failed_save", save_info.last_error_time);
|
||||
append("last_error", save_info.last_error.Format());
|
||||
append("last_failed_save_duration_sec", save_info.failed_duration_sec);
|
||||
}
|
||||
|
||||
if (should_enter("TRANSACTION", true)) {
|
||||
|
@ -2289,7 +2304,7 @@ void ServerFamily::LastSave(CmdArgList args, ConnectionContext* cntx) {
|
|||
time_t save_time;
|
||||
{
|
||||
lock_guard lk(save_mu_);
|
||||
save_time = last_save_info_->save_time;
|
||||
save_time = last_save_info_.save_time;
|
||||
}
|
||||
cntx->SendLong(save_time);
|
||||
}
|
||||
|
|
|
@ -102,10 +102,15 @@ struct Metrics {
|
|||
};
|
||||
|
||||
struct LastSaveInfo {
|
||||
// last success save info
|
||||
time_t save_time = 0; // epoch time in seconds.
|
||||
uint32_t duration_sec = 0;
|
||||
uint32_t success_duration_sec = 0;
|
||||
std::string file_name; //
|
||||
std::vector<std::pair<std::string_view, size_t>> freq_map; // RDB_TYPE_xxx -> count mapping.
|
||||
// last error save info
|
||||
GenericError last_error;
|
||||
time_t last_error_time = 0; // epoch time in seconds.
|
||||
time_t failed_duration_sec = 0; // epoch time in seconds.
|
||||
};
|
||||
|
||||
struct SnapshotSpec {
|
||||
|
@ -158,7 +163,7 @@ class ServerFamily {
|
|||
// if kDbAll is passed, burns all the databases to the ground.
|
||||
std::error_code Drakarys(Transaction* transaction, DbIndex db_ind);
|
||||
|
||||
std::shared_ptr<const LastSaveInfo> GetLastSaveInfo() const;
|
||||
LastSaveInfo GetLastSaveInfo() const;
|
||||
|
||||
// Load snapshot from file (.rdb file or summary.dfs file) and return
|
||||
// future with error_code.
|
||||
|
@ -272,8 +277,11 @@ class ServerFamily {
|
|||
|
||||
time_t start_time_ = 0; // in seconds, epoch time.
|
||||
|
||||
std::shared_ptr<LastSaveInfo> last_save_info_; // protected by save_mu_;
|
||||
LastSaveInfo last_save_info_ ABSL_GUARDED_BY(save_mu_);
|
||||
std::atomic_bool is_saving_{false};
|
||||
// this field duplicate SaveStagesController::start_save_time_
|
||||
// TODO make SaveStagesController as member of this class
|
||||
std::optional<absl::Time> start_save_time_;
|
||||
// If a save operation is currently in progress, calling this function will provide information
|
||||
// about the memory consumption during the save operation.
|
||||
std::function<size_t()> save_bytes_cb_ = nullptr;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue