mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 10:25:47 +02:00
fix: preemption in atomic section of heartbeat (#4720)
The bug is that expiring keys during heartbeat should not preempt while writing to the journal and we assert this with a FiberAtomicGuard. However, this atomicity guarantee is violated because the journal callback acquires a lock on a mutex that is already locked by on OnJournalEntry(). The fix is to release the lock when OnJournalEntry() preempts. Signed-off-by: kostas <kostas@dragonflydb.io>
This commit is contained in:
parent
e6fabfef6f
commit
9e52438862
2 changed files with 35 additions and 3 deletions
|
@ -410,10 +410,13 @@ void SliceSnapshot::OnJournalEntry(const journal::JournalItem& item, bool await)
|
||||||
// To enable journal flushing to sync after non auto journal command is executed we call
|
// To enable journal flushing to sync after non auto journal command is executed we call
|
||||||
// TriggerJournalWriteToSink. This call uses the NOOP opcode with await=true. Since there is no
|
// TriggerJournalWriteToSink. This call uses the NOOP opcode with await=true. Since there is no
|
||||||
// additional journal change to serialize, it simply invokes PushSerialized.
|
// additional journal change to serialize, it simply invokes PushSerialized.
|
||||||
|
{
|
||||||
|
// We should release the lock after we preempt
|
||||||
std::lock_guard guard(big_value_mu_);
|
std::lock_guard guard(big_value_mu_);
|
||||||
if (item.opcode != journal::Op::NOOP) {
|
if (item.opcode != journal::Op::NOOP) {
|
||||||
serializer_->WriteJournalEntry(item.data);
|
serializer_->WriteJournalEntry(item.data);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (await) {
|
if (await) {
|
||||||
// This is the only place that flushes in streaming mode
|
// This is the only place that flushes in streaming mode
|
||||||
|
|
|
@ -2770,3 +2770,32 @@ async def test_stream_approximate_trimming(df_factory):
|
||||||
master_data = await StaticSeeder.capture(c_master)
|
master_data = await StaticSeeder.capture(c_master)
|
||||||
replica_data = await StaticSeeder.capture(c_replica)
|
replica_data = await StaticSeeder.capture(c_replica)
|
||||||
assert master_data == replica_data
|
assert master_data == replica_data
|
||||||
|
|
||||||
|
|
||||||
|
async def test_preempt_in_atomic_section_of_heartbeat(df_factory: DflyInstanceFactory):
|
||||||
|
master = df_factory.create(proactor_threads=1, serialization_max_chunk_size=100000000000)
|
||||||
|
replicas = [df_factory.create(proactor_threads=1) for i in range(2)]
|
||||||
|
|
||||||
|
# Start instances and connect clients
|
||||||
|
df_factory.start_all([master] + replicas)
|
||||||
|
c_master = master.client()
|
||||||
|
c_replicas = [replica.client() for replica in replicas]
|
||||||
|
|
||||||
|
total = 100000
|
||||||
|
await c_master.execute_command(f"DEBUG POPULATE {total} tmp 100 TYPE SET ELEMENTS 100")
|
||||||
|
|
||||||
|
thresehold = 50000
|
||||||
|
for i in range(thresehold):
|
||||||
|
rand = random.randint(1, 10)
|
||||||
|
await c_master.execute_command(f"EXPIRE tmp:{i} {rand} NX")
|
||||||
|
|
||||||
|
seeder = StaticSeeder(key_target=10000)
|
||||||
|
fill_task = asyncio.create_task(seeder.run(master.client()))
|
||||||
|
|
||||||
|
for replica in c_replicas:
|
||||||
|
await replica.execute_command(f"REPLICAOF LOCALHOST {master.port}")
|
||||||
|
|
||||||
|
async with async_timeout.timeout(240):
|
||||||
|
await wait_for_replicas_state(*c_replicas)
|
||||||
|
|
||||||
|
await fill_task
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue