Merge 44c607aa7f into 561c0a4733

2025-05-10 18:05:44 +02:00 · 2025-05-09 12:14:30 +03:00 · 2025-05-09 12:14:30 +03:00 · 1ea231a440
commit 1ea231a440
parent 561c0a4733 44c607aa7f
4 changed files with 75 additions and 3 deletions
--- a/src/server/main_service.cc
+++ b/src/server/main_service.cc
@ -1435,6 +1435,7 @@ size_t Service::DispatchManyCommands(absl::Span<CmdArgList> args_list, SinkReply
    MultiCommandSquasher::Opts opts;
    opts.verify_commands = true;
    opts.max_squash_size = ss->max_squash_cmd_num;
+    opts.is_mult_non_atomic = true;

    size_t squashed_num = MultiCommandSquasher::Execute(absl::MakeSpan(stored_cmds),
                                                        static_cast<RedisReplyBuilder*>(builder),
--- a/src/server/multi_command_squasher.cc
+++ b/src/server/multi_command_squasher.cc
@ -6,6 +6,7 @@

 #include <absl/container/inlined_vector.h>

+#include "base/flags.h"
 #include "base/logging.h"
 #include "core/overloaded.h"
 #include "facade/dragonfly_connection.h"
@ -15,6 +16,10 @@
 #include "server/transaction.h"
 #include "server/tx_base.h"

+ABSL_FLAG(size_t, squashed_reply_size_limit, 0,
+          "Max bytes allowed for squashing_current_reply_size. If this limit is reached, "
+          "connections dispatching via pipelines will block until this value is decremented.");
+
 namespace dfly {

 using namespace std;
@ -63,6 +68,9 @@ size_t Size(const facade::CapturingReplyBuilder::Payload& payload) {
 }  // namespace

 atomic_uint64_t MultiCommandSquasher::current_reply_size_ = 0;
+thread_local size_t MultiCommandSquasher::reply_size_limit_ =
+    absl::GetFlag(FLAGS_squashed_reply_size_limit);
+util::fb2::EventCount MultiCommandSquasher::ec_;

 MultiCommandSquasher::MultiCommandSquasher(absl::Span<StoredCmd> cmds, ConnectionContext* cntx,
                                           Service* service, const Opts& opts)
@ -208,6 +216,15 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {
  if (order_.empty())
    return true;

+  // Multi non atomic does not lock ahead. So it's safe to preempt while we haven't
+  // really started the transaction.
+  // This is not true for `multi/exec` which uses `Execute()` but locks ahead before it
+  // calls `ScheduleSingleHop` below.
+  // TODO Investigate what are the side effects for allowing it `lock ahead` mode.
+  if (opts_.is_mult_non_atomic) {
+    MultiCommandSquasher::ec_.await([]() { return !MultiCommandSquasher::IsReplySizeOverLimit(); });
+  }
+
  unsigned num_shards = 0;
  for (auto& sd : sharded_) {
    if (!sd.dispatched.empty())
@ -246,6 +263,7 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {
  uint64_t after_hop = proactor->GetMonotonicTimeNs();
  bool aborted = false;

+  size_t size = 0;
  for (auto idx : order_) {
    auto& sinfo = sharded_[idx];
    DCHECK_LT(sinfo.reply_id, sinfo.dispatched.size());
@ -258,6 +276,9 @@ bool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {
    if (aborted)
      break;
  }
+  current_reply_size_.fetch_sub(size, std::memory_order_relaxed);
+  MultiCommandSquasher::ec_.notifyAll();
+
  uint64_t after_reply = proactor->GetMonotonicTimeNs();
  ServerState::SafeTLocal()->stats.multi_squash_exec_hop_usec += (after_hop - start) / 1000;
  ServerState::SafeTLocal()->stats.multi_squash_exec_reply_usec += (after_reply - after_hop) / 1000;
--- a/src/server/multi_command_squasher.h
+++ b/src/server/multi_command_squasher.h
@ -7,6 +7,7 @@
 #include "facade/reply_capture.h"
 #include "server/conn_context.h"
 #include "server/main_service.h"
+#include "util/fibers/synchronization.h"

 namespace dfly {

@ -23,11 +24,12 @@ namespace dfly {
 class MultiCommandSquasher {
 public:
  struct Opts {
-    bool verify_commands = false;   // Whether commands need to be verified before execution
-    bool error_abort = false;       // Abort upon receiving error
+    bool verify_commands = false;  // Whether commands need to be verified before execution
+    bool error_abort = false;      // Abort upon receiving error
+    // If MultiCommandSquasher was used from a pipeline and not from multi/exec block
+    bool is_mult_non_atomic = false;
    unsigned max_squash_size = 32;  // How many commands to squash at once
  };
-
  // Returns number of processed commands.
  static size_t Execute(absl::Span<StoredCmd> cmds, facade::RedisReplyBuilder* rb,
                        ConnectionContext* cntx, Service* service, const Opts& opts) {
@ -38,6 +40,14 @@ class MultiCommandSquasher {
    return current_reply_size_.load(std::memory_order_relaxed);
  }

+  static bool IsReplySizeOverLimit() {
+    const bool over_limit = reply_size_limit_ > 0 &&
+                            current_reply_size_.load(std::memory_order_relaxed) > reply_size_limit_;
+    VLOG_IF(2, over_limit) << "MultiCommandSquasher overlimit: " << reply_size_limit_
+                           << " current reply size " << current_reply_size_;
+    return over_limit;
+  }
+
 private:
  // Per-shard execution info.
  struct ShardExecInfo {
@ -97,6 +107,10 @@ class MultiCommandSquasher {

  // we increase size in one thread and decrease in another
  static atomic_uint64_t current_reply_size_;
+  // Used to throttle when memory is tight
+  static util::fb2::EventCount ec_;
+
+  static thread_local size_t reply_size_limit_;
 };

 }  // namespace dfly
--- a/tests/dragonfly/memory_test.py
+++ b/tests/dragonfly/memory_test.py
@ -1,4 +1,5 @@
 import pytest
+import asyncio
 from redis import asyncio as aioredis
 from .utility import *
 import logging
@ -222,3 +223,38 @@ async def test_cache_eviction_with_rss_deny_oom(
        )
        stats_info = await async_client.info("stats")
        logging.info(f'Current evicted: {stats_info["evicted_keys"]}. Total keys: {num_keys}.')
+
+
+@pytest.mark.asyncio
+async def test_throttle_on_commands_squashing_replies_bytes(df_factory: DflyInstanceFactory):
+    df = df_factory.create(
+        proactor_threads=2,
+        squashed_reply_size_limit=500_000_000,
+        vmodule="multi_command_squasher=2",
+    )
+    df.start()
+
+    client = df.client()
+    # 0.5gb
+    await client.execute_command("debug populate 64 test 3125 rand type hash elements 500")
+
+    async def poll():
+        # At any point we should not cross this limit
+        assert df.rss < 1_500_000_000
+        cl = df.client()
+        pipe = cl.pipeline(transaction=False)
+        for i in range(64):
+            pipe.execute_command(f"hgetall test:{i}")
+
+        await pipe.execute()
+
+    tasks = []
+    for i in range(20):
+        tasks.append(asyncio.create_task(poll()))
+
+    for task in tasks:
+        await task
+
+    df.stop()
+    found = df.find_in_logs("MultiCommandSquasher overlimit: ")
+    assert len(found) > 0