mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 18:35:46 +02:00
fix: Fix test_network_disconnect_during_migration
test (#4224)
There are actually a few failures fixed in this PR, only one of which is a test bug: * `db_slice_->Traverse()` can yield, causing `fiber_cancelled_`'s value to change * When a migration is cancelled, it may never finish `WaitForInflightToComplete()` because it has `in_flight_bytes_` that will never reach destination due to the cancellation * `IterateMap()` with numeric key/values overrode the key's buffer with the value's buffer Fixes #4207
This commit is contained in:
parent
dcee9a9874
commit
779bba71f9
5 changed files with 23 additions and 8 deletions
|
@ -37,7 +37,7 @@ class OutgoingMigration::SliceSlotMigration : private ProtocolClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
~SliceSlotMigration() {
|
~SliceSlotMigration() {
|
||||||
streamer_.Cancel();
|
Cancel();
|
||||||
cntx_.JoinErrorHandler();
|
cntx_.JoinErrorHandler();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,6 +81,7 @@ class OutgoingMigration::SliceSlotMigration : private ProtocolClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cancel() {
|
void Cancel() {
|
||||||
|
cntx_.Cancel();
|
||||||
streamer_.Cancel();
|
streamer_.Cancel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -274,13 +274,13 @@ bool IterateMap(const PrimeValue& pv, const IterateKVFunc& func) {
|
||||||
bool finished = true;
|
bool finished = true;
|
||||||
|
|
||||||
if (pv.Encoding() == kEncodingListPack) {
|
if (pv.Encoding() == kEncodingListPack) {
|
||||||
uint8_t intbuf[LP_INTBUF_SIZE];
|
uint8_t k_intbuf[LP_INTBUF_SIZE], v_intbuf[LP_INTBUF_SIZE];
|
||||||
uint8_t* lp = (uint8_t*)pv.RObjPtr();
|
uint8_t* lp = (uint8_t*)pv.RObjPtr();
|
||||||
uint8_t* fptr = lpFirst(lp);
|
uint8_t* fptr = lpFirst(lp);
|
||||||
while (fptr) {
|
while (fptr) {
|
||||||
string_view key = LpGetView(fptr, intbuf);
|
string_view key = LpGetView(fptr, k_intbuf);
|
||||||
fptr = lpNext(lp, fptr);
|
fptr = lpNext(lp, fptr);
|
||||||
string_view val = LpGetView(fptr, intbuf);
|
string_view val = LpGetView(fptr, v_intbuf);
|
||||||
fptr = lpNext(lp, fptr);
|
fptr = lpNext(lp, fptr);
|
||||||
if (!func(ContainerEntry{key.data(), key.size()}, ContainerEntry{val.data(), val.size()})) {
|
if (!func(ContainerEntry{key.data(), key.size()}, ContainerEntry{val.data(), val.size()})) {
|
||||||
finished = false;
|
finished = false;
|
||||||
|
|
|
@ -41,7 +41,9 @@ JournalStreamer::JournalStreamer(journal::Journal* journal, Context* cntx)
|
||||||
}
|
}
|
||||||
|
|
||||||
JournalStreamer::~JournalStreamer() {
|
JournalStreamer::~JournalStreamer() {
|
||||||
|
if (!cntx_->IsCancelled()) {
|
||||||
DCHECK_EQ(in_flight_bytes_, 0u);
|
DCHECK_EQ(in_flight_bytes_, 0u);
|
||||||
|
}
|
||||||
VLOG(1) << "~JournalStreamer";
|
VLOG(1) << "~JournalStreamer";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +81,9 @@ void JournalStreamer::Cancel() {
|
||||||
VLOG(1) << "JournalStreamer::Cancel";
|
VLOG(1) << "JournalStreamer::Cancel";
|
||||||
waker_.notifyAll();
|
waker_.notifyAll();
|
||||||
journal_->UnregisterOnChange(journal_cb_id_);
|
journal_->UnregisterOnChange(journal_cb_id_);
|
||||||
|
if (!cntx_->IsCancelled()) {
|
||||||
WaitForInflightToComplete();
|
WaitForInflightToComplete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t JournalStreamer::GetTotalBufferCapacities() const {
|
size_t JournalStreamer::GetTotalBufferCapacities() const {
|
||||||
|
@ -215,8 +219,15 @@ void RestoreStreamer::Run() {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cursor = db_slice_->Traverse(pt, cursor, [&](PrimeTable::bucket_iterator it) {
|
cursor = db_slice_->Traverse(pt, cursor, [&](PrimeTable::bucket_iterator it) {
|
||||||
|
if (fiber_cancelled_) // Could be cancelled any time as Traverse may preempt
|
||||||
|
return;
|
||||||
|
|
||||||
db_slice_->FlushChangeToEarlierCallbacks(0 /*db_id always 0 for cluster*/,
|
db_slice_->FlushChangeToEarlierCallbacks(0 /*db_id always 0 for cluster*/,
|
||||||
DbSlice::Iterator::FromPrime(it), snapshot_version_);
|
DbSlice::Iterator::FromPrime(it), snapshot_version_);
|
||||||
|
|
||||||
|
if (fiber_cancelled_) // Could have been cancelled in above call too
|
||||||
|
return;
|
||||||
|
|
||||||
WriteBucket(it);
|
WriteBucket(it);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -112,6 +112,8 @@ MultiCommandSquasher::SquashResult MultiCommandSquasher::TrySquash(StoredCmd* cm
|
||||||
|
|
||||||
cmd->Fill(&tmp_keylist_);
|
cmd->Fill(&tmp_keylist_);
|
||||||
auto args = absl::MakeSpan(tmp_keylist_);
|
auto args = absl::MakeSpan(tmp_keylist_);
|
||||||
|
if (args.empty())
|
||||||
|
return SquashResult::NOT_SQUASHED;
|
||||||
|
|
||||||
auto keys = DetermineKeys(cmd->Cid(), args);
|
auto keys = DetermineKeys(cmd->Cid(), args);
|
||||||
if (!keys.ok())
|
if (!keys.ok())
|
||||||
|
|
|
@ -1290,7 +1290,7 @@ async def test_migration_with_key_ttl(df_factory):
|
||||||
|
|
||||||
|
|
||||||
@dfly_args({"proactor_threads": 4, "cluster_mode": "yes"})
|
@dfly_args({"proactor_threads": 4, "cluster_mode": "yes"})
|
||||||
async def test_network_disconnect_during_migration(df_factory, df_seeder_factory):
|
async def test_network_disconnect_during_migration(df_factory):
|
||||||
instances = [
|
instances = [
|
||||||
df_factory.create(port=BASE_PORT + i, admin_port=BASE_PORT + i + 1000) for i in range(2)
|
df_factory.create(port=BASE_PORT + i, admin_port=BASE_PORT + i + 1000) for i in range(2)
|
||||||
]
|
]
|
||||||
|
@ -1328,7 +1328,7 @@ async def test_network_disconnect_during_migration(df_factory, df_seeder_factory
|
||||||
|
|
||||||
await proxy.start()
|
await proxy.start()
|
||||||
|
|
||||||
await wait_for_status(nodes[0].admin_client, nodes[1].id, "FINISHED", 20)
|
await wait_for_status(nodes[0].admin_client, nodes[1].id, "FINISHED", 60)
|
||||||
nodes[0].migrations = []
|
nodes[0].migrations = []
|
||||||
nodes[0].slots = []
|
nodes[0].slots = []
|
||||||
nodes[1].slots = [(0, 16383)]
|
nodes[1].slots = [(0, 16383)]
|
||||||
|
@ -1336,6 +1336,7 @@ async def test_network_disconnect_during_migration(df_factory, df_seeder_factory
|
||||||
await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])
|
await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])
|
||||||
|
|
||||||
assert (await StaticSeeder.capture(nodes[1].client)) == start_capture
|
assert (await StaticSeeder.capture(nodes[1].client)) == start_capture
|
||||||
|
await proxy.close()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue