fix(replication): Correctly replicate commands even when OOM (#2428)

* fix(replication): Correctly replicate commands even when OOM Before this change, OOM in shard callbacks could have led to data inconsistency between the master and the replica. For example, commands which mutated data on 1 shard but failed on another, like `LMOVE`. After this change, callbacks that result in an OOM will correctly replicate their work (none, partial or complete) to replicas. Note that `MSET` and `MSETNX` required special handling, in that they are the only commands that can _create_ multiple keys, and so some of them can fail. Fixes #2381 * fixes * test fix * RecordJournal * UNDO idiotnessness * 2 shards * fix pytest
2025-05-10 18:05:44 +02:00 · 2024-01-18 12:29:59 +02:00 · 2024-01-18 12:29:59 +02:00 · 2f0287429d
commit 2f0287429d
parent b66db852f9
4 changed files with 96 additions and 69 deletions
--- a/src/server/transaction.cc
+++ b/src/server/transaction.cc
@ -555,7 +555,7 @@ bool Transaction::RunInShard(EngineShard* shard, bool txq_ooo) {

  // Log to jounrnal only once the command finished running
  if (is_concluding || (multi_ && multi_->concluding))
-    LogAutoJournalOnShard(shard);
+    LogAutoJournalOnShard(shard, result);

  // If we're the head of tx queue (txq_ooo is false), we remove ourselves upon first invocation
  // and successive hops are run by continuation_trans_ in engine shard.
@ -1047,6 +1047,10 @@ ShardId Transaction::GetUniqueShard() const {
  return unique_shard_id_;
 }

+optional<SlotId> Transaction::GetUniqueSlotId() const {
+  return unique_slot_checker_.GetUniqueSlotId();
+}
+
 KeyLockArgs Transaction::GetLockArgs(ShardId sid) const {
  KeyLockArgs res;
  res.db_index = db_index_;
@ -1089,7 +1093,7 @@ bool Transaction::ScheduleUniqueShard(EngineShard* shard) {
      DCHECK_EQ(sd.is_armed, false);
      unlocked_keys = false;
    } else {
-      LogAutoJournalOnShard(shard);
+      LogAutoJournalOnShard(shard, result);
    }
  }

@ -1327,7 +1331,7 @@ OpStatus Transaction::RunSquashedMultiCb(RunnableType cb) {
  auto* shard = EngineShard::tlocal();
  auto result = cb(this, shard);
  shard->db_slice().OnCbFinish();
-  LogAutoJournalOnShard(shard);
+  LogAutoJournalOnShard(shard, result);

  DCHECK_EQ(result.flags, 0);  // if it's sophisticated, we shouldn't squash it
  return result;
@ -1454,7 +1458,7 @@ optional<string_view> Transaction::GetWakeKey(ShardId sid) const {
  return GetShardArgs(sid).at(sd.wake_key_pos);
 }

-void Transaction::LogAutoJournalOnShard(EngineShard* shard) {
+void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult result) {
  // TODO: For now, we ignore non shard coordination.
  if (shard == nullptr)
    return;
@ -1467,14 +1471,22 @@ void Transaction::LogAutoJournalOnShard(EngineShard* shard) {
  if (cid_->IsWriteOnly() == 0 && (cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL) == 0)
    return;

-  // If autojournaling was disabled and not re-enabled, skip it
-  if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !renabled_auto_journal_.load(memory_order_relaxed))
-    return;
-
  auto journal = shard->journal();
  if (journal == nullptr)
    return;

+  if (result.status != OpStatus::OK) {
+    // We log NOOP even for NO_AUTOJOURNAL commands because the non-success status could have been
+    // due to OOM in a single shard, while other shards succeeded
+    journal->RecordEntry(txid_, journal::Op::NOOP, db_index_, unique_shard_cnt_,
+                         unique_slot_checker_.GetUniqueSlotId(), journal::Entry::Payload{}, true);
+    return;
+  }
+
+  // If autojournaling was disabled and not re-enabled, skip it
+  if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !renabled_auto_journal_.load(memory_order_relaxed))
+    return;
+
  // TODO: Handle complex commands like LMPOP correctly once they are implemented.
  journal::Entry::Payload entry_payload;