mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 10:25:47 +02:00
parent
c42b3dc02f
commit
7666aae6dc
5 changed files with 38 additions and 39 deletions
|
@ -494,6 +494,8 @@ void ClusterFamily::DflyClusterConfig(CmdArgList args, ConnectionContext* cntx)
|
||||||
|
|
||||||
lock_guard gu(set_config_mu);
|
lock_guard gu(set_config_mu);
|
||||||
|
|
||||||
|
lock_guard config_update_lk(
|
||||||
|
config_update_mu_); // to prevent simultaneous update config from outgoing migration
|
||||||
// TODO we shouldn't provide cntx into StartSlotMigrations
|
// TODO we shouldn't provide cntx into StartSlotMigrations
|
||||||
if (!StartSlotMigrations(new_config->GetNewOutgoingMigrations(tl_cluster_config), cntx)) {
|
if (!StartSlotMigrations(new_config->GetNewOutgoingMigrations(tl_cluster_config), cntx)) {
|
||||||
return cntx->SendError("Can't start the migration");
|
return cntx->SendError("Can't start the migration");
|
||||||
|
@ -706,19 +708,17 @@ void ClusterFamily::DflyMigrate(CmdArgList args, ConnectionContext* cntx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IncomingSlotMigration* ClusterFamily::CreateIncomingMigration(std::string source_id,
|
std::shared_ptr<IncomingSlotMigration> ClusterFamily::CreateIncomingMigration(std::string source_id,
|
||||||
SlotRanges slots,
|
SlotRanges slots,
|
||||||
uint32_t shards_num) {
|
uint32_t shards_num) {
|
||||||
lock_guard lk(migration_mu_);
|
lock_guard lk(migration_mu_);
|
||||||
for (const auto& mj : incoming_migrations_jobs_) {
|
for (const auto& mj : incoming_migrations_jobs_) {
|
||||||
if (mj->GetSourceID() == source_id) {
|
if (mj->GetSourceID() == source_id) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return incoming_migrations_jobs_
|
return incoming_migrations_jobs_.emplace_back(make_shared<IncomingSlotMigration>(
|
||||||
.emplace_back(make_shared<IncomingSlotMigration>(
|
std::move(source_id), &server_family_->service(), std::move(slots), shards_num));
|
||||||
std::move(source_id), &server_family_->service(), std::move(slots), shards_num))
|
|
||||||
.get();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<IncomingSlotMigration> ClusterFamily::GetIncomingMigration(
|
std::shared_ptr<IncomingSlotMigration> ClusterFamily::GetIncomingMigration(
|
||||||
|
@ -742,7 +742,7 @@ void ClusterFamily::RemoveOutgoingMigrations(const std::vector<MigrationInfo>& m
|
||||||
OutgoingMigration& migration = *it->get();
|
OutgoingMigration& migration = *it->get();
|
||||||
LOG(INFO) << "Outgoing migration cancelled: slots " << SlotRange::ToString(migration.GetSlots())
|
LOG(INFO) << "Outgoing migration cancelled: slots " << SlotRange::ToString(migration.GetSlots())
|
||||||
<< " to " << migration.GetHostIp() << ":" << migration.GetPort();
|
<< " to " << migration.GetHostIp() << ":" << migration.GetPort();
|
||||||
migration.Cancel();
|
migration.Finish();
|
||||||
outgoing_migration_jobs_.erase(it);
|
outgoing_migration_jobs_.erase(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -833,8 +833,10 @@ void ClusterFamily::DflyMigrateFlow(CmdArgList args, ConnectionContext* cntx) {
|
||||||
cntx->conn()->SetName(absl::StrCat("migration_flow_", source_id));
|
cntx->conn()->SetName(absl::StrCat("migration_flow_", source_id));
|
||||||
|
|
||||||
auto migration = GetIncomingMigration(source_id);
|
auto migration = GetIncomingMigration(source_id);
|
||||||
if (!migration)
|
if (!migration) {
|
||||||
|
// TODO process error when migration is canceled
|
||||||
return cntx->SendError(kIdNotFound);
|
return cntx->SendError(kIdNotFound);
|
||||||
|
}
|
||||||
|
|
||||||
DCHECK(cntx->sync_dispatch);
|
DCHECK(cntx->sync_dispatch);
|
||||||
// we do this to be ignored by the dispatch tracker
|
// we do this to be ignored by the dispatch tracker
|
||||||
|
@ -847,7 +849,7 @@ void ClusterFamily::DflyMigrateFlow(CmdArgList args, ConnectionContext* cntx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ClusterFamily::UpdateConfig(const std::vector<SlotRange>& slots, bool enable) {
|
void ClusterFamily::UpdateConfig(const std::vector<SlotRange>& slots, bool enable) {
|
||||||
lock_guard gu(set_config_mu);
|
lock_guard gu(config_update_mu_);
|
||||||
|
|
||||||
auto new_config = tl_cluster_config->CloneWithChanges(slots, enable);
|
auto new_config = tl_cluster_config->CloneWithChanges(slots, enable);
|
||||||
|
|
||||||
|
@ -870,6 +872,7 @@ void ClusterFamily::DflyMigrateAck(CmdArgList args, ConnectionContext* cntx) {
|
||||||
[source_id](const auto& m) { return m.node_id == source_id; });
|
[source_id](const auto& m) { return m.node_id == source_id; });
|
||||||
if (m_it == in_migrations.end()) {
|
if (m_it == in_migrations.end()) {
|
||||||
LOG(WARNING) << "migration isn't in config";
|
LOG(WARNING) << "migration isn't in config";
|
||||||
|
// TODO process error if migration was canceled
|
||||||
return cntx->SendLong(OutgoingMigration::kInvalidAttempt);
|
return cntx->SendLong(OutgoingMigration::kInvalidAttempt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -73,8 +73,9 @@ class ClusterFamily {
|
||||||
void DflyMigrateAck(CmdArgList args, ConnectionContext* cntx);
|
void DflyMigrateAck(CmdArgList args, ConnectionContext* cntx);
|
||||||
|
|
||||||
// create a IncomingSlotMigration entity which will execute migration
|
// create a IncomingSlotMigration entity which will execute migration
|
||||||
IncomingSlotMigration* CreateIncomingMigration(std::string source_id, SlotRanges slots,
|
std::shared_ptr<IncomingSlotMigration> CreateIncomingMigration(std::string source_id,
|
||||||
uint32_t shards_num);
|
SlotRanges slots,
|
||||||
|
uint32_t shards_num);
|
||||||
|
|
||||||
std::shared_ptr<IncomingSlotMigration> GetIncomingMigration(std::string_view source_id);
|
std::shared_ptr<IncomingSlotMigration> GetIncomingMigration(std::string_view source_id);
|
||||||
|
|
||||||
|
@ -97,6 +98,8 @@ class ClusterFamily {
|
||||||
private:
|
private:
|
||||||
ClusterShardInfo GetEmulatedShardInfo(ConnectionContext* cntx) const;
|
ClusterShardInfo GetEmulatedShardInfo(ConnectionContext* cntx) const;
|
||||||
|
|
||||||
|
mutable util::fb2::Mutex config_update_mu_;
|
||||||
|
|
||||||
std::string id_;
|
std::string id_;
|
||||||
|
|
||||||
ServerFamily* server_family_ = nullptr;
|
ServerFamily* server_family_ = nullptr;
|
||||||
|
|
|
@ -81,15 +81,12 @@ OutgoingMigration::~OutgoingMigration() {
|
||||||
main_sync_fb_.JoinIfNeeded();
|
main_sync_fb_.JoinIfNeeded();
|
||||||
}
|
}
|
||||||
|
|
||||||
void OutgoingMigration::Cancel() {
|
void OutgoingMigration::Finish() {
|
||||||
state_.store(MigrationState::C_CANCELLED);
|
shard_set->pool()->AwaitFiberOnAll([this](util::ProactorBase* pb) {
|
||||||
|
if (const auto* shard = EngineShard::tlocal(); shard)
|
||||||
auto start_cb = [this](util::ProactorBase* pb) {
|
|
||||||
if (auto* shard = EngineShard::tlocal(); shard) {
|
|
||||||
slot_migrations_[shard->shard_id()]->Cancel();
|
slot_migrations_[shard->shard_id()]->Cancel();
|
||||||
}
|
});
|
||||||
};
|
state_.store(MigrationState::C_FINISHED);
|
||||||
shard_set->pool()->AwaitFiberOnAll(std::move(start_cb));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MigrationState OutgoingMigration::GetState() const {
|
MigrationState OutgoingMigration::GetState() const {
|
||||||
|
@ -108,8 +105,6 @@ void OutgoingMigration::SyncFb() {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
state_.store(MigrationState::C_SYNC);
|
|
||||||
|
|
||||||
shard_set->pool()->AwaitFiberOnAll(std::move(start_cb));
|
shard_set->pool()->AwaitFiberOnAll(std::move(start_cb));
|
||||||
|
|
||||||
for (auto& migration : slot_migrations_) {
|
for (auto& migration : slot_migrations_) {
|
||||||
|
@ -121,13 +116,13 @@ void OutgoingMigration::SyncFb() {
|
||||||
// TODO implement blocking on migrated slots only
|
// TODO implement blocking on migrated slots only
|
||||||
|
|
||||||
long attempt = 0;
|
long attempt = 0;
|
||||||
while (state_.load() != MigrationState::C_CANCELLED && !FinishMigration(++attempt)) {
|
while (state_.load() != MigrationState::C_FINISHED && !FinalyzeMigration(++attempt)) {
|
||||||
// process commands that were on pause and try again
|
// process commands that were on pause and try again
|
||||||
ThisFiber::SleepFor(500ms);
|
ThisFiber::SleepFor(500ms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OutgoingMigration::FinishMigration(long attempt) {
|
bool OutgoingMigration::FinalyzeMigration(long attempt) {
|
||||||
bool is_block_active = true;
|
bool is_block_active = true;
|
||||||
auto is_pause_in_progress = [&is_block_active] { return is_block_active; };
|
auto is_pause_in_progress = [&is_block_active] { return is_block_active; };
|
||||||
auto pause_fb_opt = Pause(server_family_->GetNonPriviligedListeners(), nullptr,
|
auto pause_fb_opt = Pause(server_family_->GetNonPriviligedListeners(), nullptr,
|
||||||
|
@ -181,12 +176,8 @@ bool OutgoingMigration::FinishMigration(long attempt) {
|
||||||
}
|
}
|
||||||
} while (attempt_res != attempt);
|
} while (attempt_res != attempt);
|
||||||
|
|
||||||
shard_set->pool()->AwaitFiberOnAll([this](util::ProactorBase* pb) {
|
Finish();
|
||||||
if (const auto* shard = EngineShard::tlocal(); shard)
|
|
||||||
slot_migrations_[shard->shard_id()]->Cancel();
|
|
||||||
});
|
|
||||||
|
|
||||||
state_.store(MigrationState::C_FINISHED);
|
|
||||||
cf_->UpdateConfig(migration_info_.slot_ranges, false);
|
cf_->UpdateConfig(migration_info_.slot_ranges, false);
|
||||||
VLOG(1) << "Config is updated for " << cf_->MyID();
|
VLOG(1) << "Config is updated for " << cf_->MyID();
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -28,10 +28,8 @@ class OutgoingMigration : private ProtocolClient {
|
||||||
// start migration process, sends INIT command to the target node
|
// start migration process, sends INIT command to the target node
|
||||||
std::error_code Start(ConnectionContext* cntx);
|
std::error_code Start(ConnectionContext* cntx);
|
||||||
|
|
||||||
// should be run for all shards
|
// mark migration as FINISHED and cancel migration if it's not finished yet
|
||||||
void StartFlow(journal::Journal* journal, io::Sink* dest);
|
void Finish();
|
||||||
|
|
||||||
void Cancel();
|
|
||||||
|
|
||||||
MigrationState GetState() const;
|
MigrationState GetState() const;
|
||||||
|
|
||||||
|
@ -54,18 +52,21 @@ class OutgoingMigration : private ProtocolClient {
|
||||||
static constexpr long kInvalidAttempt = -1;
|
static constexpr long kInvalidAttempt = -1;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// should be run for all shards
|
||||||
|
void StartFlow(journal::Journal* journal, io::Sink* dest);
|
||||||
|
|
||||||
MigrationState GetStateImpl() const;
|
MigrationState GetStateImpl() const;
|
||||||
// SliceSlotMigration manages state and data transfering for the corresponding shard
|
// SliceSlotMigration manages state and data transfering for the corresponding shard
|
||||||
class SliceSlotMigration;
|
class SliceSlotMigration;
|
||||||
|
|
||||||
void SyncFb();
|
void SyncFb();
|
||||||
bool FinishMigration(long attempt);
|
bool FinalyzeMigration(long attempt);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MigrationInfo migration_info_;
|
MigrationInfo migration_info_;
|
||||||
Context cntx_;
|
Context cntx_;
|
||||||
mutable util::fb2::Mutex flows_mu_;
|
mutable util::fb2::Mutex finish_mu_;
|
||||||
std::vector<std::unique_ptr<SliceSlotMigration>> slot_migrations_ ABSL_GUARDED_BY(flows_mu_);
|
std::vector<std::unique_ptr<SliceSlotMigration>> slot_migrations_;
|
||||||
ServerFamily* server_family_;
|
ServerFamily* server_family_;
|
||||||
ClusterFamily* cf_;
|
ClusterFamily* cf_;
|
||||||
|
|
||||||
|
|
|
@ -112,11 +112,12 @@ RestoreStreamer::~RestoreStreamer() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RestoreStreamer::Cancel() {
|
void RestoreStreamer::Cancel() {
|
||||||
if (snapshot_version_ != 0) {
|
auto sver = snapshot_version_;
|
||||||
|
snapshot_version_ = 0; // to prevent double cancel in another fiber
|
||||||
|
if (sver != 0) {
|
||||||
fiber_cancellation_.Cancel();
|
fiber_cancellation_.Cancel();
|
||||||
db_slice_->UnregisterOnChange(snapshot_version_);
|
db_slice_->UnregisterOnChange(sver);
|
||||||
JournalStreamer::Cancel();
|
JournalStreamer::Cancel();
|
||||||
snapshot_version_ = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue