mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
fix(replication): Correctly replicate commands even when OOM (#2428)
* fix(replication): Correctly replicate commands even when OOM Before this change, OOM in shard callbacks could have led to data inconsistency between the master and the replica. For example, commands which mutated data on 1 shard but failed on another, like `LMOVE`. After this change, callbacks that result in an OOM will correctly replicate their work (none, partial or complete) to replicas. Note that `MSET` and `MSETNX` required special handling, in that they are the only commands that can _create_ multiple keys, and so some of them can fail. Fixes #2381 * fixes * test fix * RecordJournal * UNDO idiotnessness * 2 shards * fix pytest
This commit is contained in:
parent
b66db852f9
commit
2f0287429d
4 changed files with 96 additions and 69 deletions
|
@ -555,7 +555,7 @@ bool Transaction::RunInShard(EngineShard* shard, bool txq_ooo) {
|
|||
|
||||
// Log to jounrnal only once the command finished running
|
||||
if (is_concluding || (multi_ && multi_->concluding))
|
||||
LogAutoJournalOnShard(shard);
|
||||
LogAutoJournalOnShard(shard, result);
|
||||
|
||||
// If we're the head of tx queue (txq_ooo is false), we remove ourselves upon first invocation
|
||||
// and successive hops are run by continuation_trans_ in engine shard.
|
||||
|
@ -1047,6 +1047,10 @@ ShardId Transaction::GetUniqueShard() const {
|
|||
return unique_shard_id_;
|
||||
}
|
||||
|
||||
optional<SlotId> Transaction::GetUniqueSlotId() const {
|
||||
return unique_slot_checker_.GetUniqueSlotId();
|
||||
}
|
||||
|
||||
KeyLockArgs Transaction::GetLockArgs(ShardId sid) const {
|
||||
KeyLockArgs res;
|
||||
res.db_index = db_index_;
|
||||
|
@ -1089,7 +1093,7 @@ bool Transaction::ScheduleUniqueShard(EngineShard* shard) {
|
|||
DCHECK_EQ(sd.is_armed, false);
|
||||
unlocked_keys = false;
|
||||
} else {
|
||||
LogAutoJournalOnShard(shard);
|
||||
LogAutoJournalOnShard(shard, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1327,7 +1331,7 @@ OpStatus Transaction::RunSquashedMultiCb(RunnableType cb) {
|
|||
auto* shard = EngineShard::tlocal();
|
||||
auto result = cb(this, shard);
|
||||
shard->db_slice().OnCbFinish();
|
||||
LogAutoJournalOnShard(shard);
|
||||
LogAutoJournalOnShard(shard, result);
|
||||
|
||||
DCHECK_EQ(result.flags, 0); // if it's sophisticated, we shouldn't squash it
|
||||
return result;
|
||||
|
@ -1454,7 +1458,7 @@ optional<string_view> Transaction::GetWakeKey(ShardId sid) const {
|
|||
return GetShardArgs(sid).at(sd.wake_key_pos);
|
||||
}
|
||||
|
||||
void Transaction::LogAutoJournalOnShard(EngineShard* shard) {
|
||||
void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult result) {
|
||||
// TODO: For now, we ignore non shard coordination.
|
||||
if (shard == nullptr)
|
||||
return;
|
||||
|
@ -1467,14 +1471,22 @@ void Transaction::LogAutoJournalOnShard(EngineShard* shard) {
|
|||
if (cid_->IsWriteOnly() == 0 && (cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL) == 0)
|
||||
return;
|
||||
|
||||
// If autojournaling was disabled and not re-enabled, skip it
|
||||
if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !renabled_auto_journal_.load(memory_order_relaxed))
|
||||
return;
|
||||
|
||||
auto journal = shard->journal();
|
||||
if (journal == nullptr)
|
||||
return;
|
||||
|
||||
if (result.status != OpStatus::OK) {
|
||||
// We log NOOP even for NO_AUTOJOURNAL commands because the non-success status could have been
|
||||
// due to OOM in a single shard, while other shards succeeded
|
||||
journal->RecordEntry(txid_, journal::Op::NOOP, db_index_, unique_shard_cnt_,
|
||||
unique_slot_checker_.GetUniqueSlotId(), journal::Entry::Payload{}, true);
|
||||
return;
|
||||
}
|
||||
|
||||
// If autojournaling was disabled and not re-enabled, skip it
|
||||
if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !renabled_auto_journal_.load(memory_order_relaxed))
|
||||
return;
|
||||
|
||||
// TODO: Handle complex commands like LMPOP correctly once they are implemented.
|
||||
journal::Entry::Payload entry_payload;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue