fix(replication): Correctly replicate commands even when OOM (#2428)

* fix(replication): Correctly replicate commands even when OOM

Before this change, OOM in shard callbacks could have led to data
inconsistency between the master and the replica. For example, commands
which mutated data on 1 shard but failed on another, like `LMOVE`.

After this change, callbacks that result in an OOM will correctly
replicate their work (none, partial or complete) to replicas.

Note that `MSET` and `MSETNX` required special handling, in that they are
the only commands that can _create_ multiple keys, and so some of them
can fail.

Fixes #2381

* fixes

* test fix

* RecordJournal

* UNDO idiotnessness

* 2 shards

* fix pytest
This commit is contained in:
Shahar Mike 2024-01-18 12:29:59 +02:00 committed by GitHub
parent b66db852f9
commit 2f0287429d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 96 additions and 69 deletions

View file

@ -555,7 +555,7 @@ bool Transaction::RunInShard(EngineShard* shard, bool txq_ooo) {
// Log to jounrnal only once the command finished running
if (is_concluding || (multi_ && multi_->concluding))
LogAutoJournalOnShard(shard);
LogAutoJournalOnShard(shard, result);
// If we're the head of tx queue (txq_ooo is false), we remove ourselves upon first invocation
// and successive hops are run by continuation_trans_ in engine shard.
@ -1047,6 +1047,10 @@ ShardId Transaction::GetUniqueShard() const {
return unique_shard_id_;
}
optional<SlotId> Transaction::GetUniqueSlotId() const {
return unique_slot_checker_.GetUniqueSlotId();
}
KeyLockArgs Transaction::GetLockArgs(ShardId sid) const {
KeyLockArgs res;
res.db_index = db_index_;
@ -1089,7 +1093,7 @@ bool Transaction::ScheduleUniqueShard(EngineShard* shard) {
DCHECK_EQ(sd.is_armed, false);
unlocked_keys = false;
} else {
LogAutoJournalOnShard(shard);
LogAutoJournalOnShard(shard, result);
}
}
@ -1327,7 +1331,7 @@ OpStatus Transaction::RunSquashedMultiCb(RunnableType cb) {
auto* shard = EngineShard::tlocal();
auto result = cb(this, shard);
shard->db_slice().OnCbFinish();
LogAutoJournalOnShard(shard);
LogAutoJournalOnShard(shard, result);
DCHECK_EQ(result.flags, 0); // if it's sophisticated, we shouldn't squash it
return result;
@ -1454,7 +1458,7 @@ optional<string_view> Transaction::GetWakeKey(ShardId sid) const {
return GetShardArgs(sid).at(sd.wake_key_pos);
}
void Transaction::LogAutoJournalOnShard(EngineShard* shard) {
void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult result) {
// TODO: For now, we ignore non shard coordination.
if (shard == nullptr)
return;
@ -1467,14 +1471,22 @@ void Transaction::LogAutoJournalOnShard(EngineShard* shard) {
if (cid_->IsWriteOnly() == 0 && (cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL) == 0)
return;
// If autojournaling was disabled and not re-enabled, skip it
if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !renabled_auto_journal_.load(memory_order_relaxed))
return;
auto journal = shard->journal();
if (journal == nullptr)
return;
if (result.status != OpStatus::OK) {
// We log NOOP even for NO_AUTOJOURNAL commands because the non-success status could have been
// due to OOM in a single shard, while other shards succeeded
journal->RecordEntry(txid_, journal::Op::NOOP, db_index_, unique_shard_cnt_,
unique_slot_checker_.GetUniqueSlotId(), journal::Entry::Payload{}, true);
return;
}
// If autojournaling was disabled and not re-enabled, skip it
if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !renabled_auto_journal_.load(memory_order_relaxed))
return;
// TODO: Handle complex commands like LMPOP correctly once they are implemented.
journal::Entry::Payload entry_payload;