From 0832d23f13c8a49582f266e2851938cc6f12f96d Mon Sep 17 00:00:00 2001
From: Borys <borys@dragonflydb.io>
Date: Thu, 2 Jan 2025 13:50:21 +0200
Subject: [PATCH] fix: allow cluster node load snapshot bigger than maxmemory
 (#4394)

---
 src/server/rdb_load.cc          | 11 ++++--
 tests/dragonfly/cluster_test.py | 59 +++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/src/server/rdb_load.cc b/src/server/rdb_load.cc
index d227cd823..383b218da 100644
--- a/src/server/rdb_load.cc
+++ b/src/server/rdb_load.cc
@@ -2464,9 +2464,14 @@ error_code RdbLoader::HandleAux() {
     if (absl::SimpleAtoi(auxval, &usedmem)) {
       VLOG(1) << "RDB memory usage when created " << strings::HumanReadableNumBytes(usedmem);
       if (usedmem > ssize_t(max_memory_limit)) {
-        LOG(WARNING) << "Could not load snapshot - its used memory is " << usedmem
-                     << " but the limit is " << max_memory_limit;
-        return RdbError(errc::out_of_memory);
+        if (cluster::IsClusterEnabled()) {
+          LOG(INFO) << "Attempting to load a snapshot of size " << usedmem
+                    << ", despite memory limit of " << max_memory_limit;
+        } else {
+          LOG(WARNING) << "Could not load snapshot - its used memory is " << usedmem
+                       << " but the limit is " << max_memory_limit;
+          return RdbError(errc::out_of_memory);
+        }
       }
     }
   } else if (auxkey == "aof-preamble") {
diff --git a/tests/dragonfly/cluster_test.py b/tests/dragonfly/cluster_test.py
index b90d48665..5d9f9e6a9 100644
--- a/tests/dragonfly/cluster_test.py
+++ b/tests/dragonfly/cluster_test.py
@@ -2499,3 +2499,62 @@ async def test_migration_timeout_on_sync(df_factory: DflyInstanceFactory, df_see
     await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])
 
     assert (await StaticSeeder.capture(nodes[1].client)) == start_capture
+
+
+@pytest.mark.slow
+@dfly_args({"proactor_threads": 4, "cluster_mode": "yes"})
+async def test_snapshot_bigger_than_maxmemory(df_factory: DflyInstanceFactory, df_seeder_factory):
+    """
+    Test load snapshot that is bigger than max_memory, but contains more slots and should be load without OOM:
+
+    1) Create snapshot
+    2) split slots between 2 instances and reduce maxmemory
+    3) load snapshot to both instances
+
+    The result should be the same: instances contain all the data that was in snapshot
+    """
+    dbfilename = f"dump_{tmp_file_name()}"
+    instances = [
+        df_factory.create(
+            port=next(next_port), admin_port=next(next_port), maxmemory="3G", dbfilename=dbfilename
+        ),
+        df_factory.create(port=next(next_port), admin_port=next(next_port), maxmemory="1G"),
+    ]
+    df_factory.start_all(instances)
+
+    nodes = [await create_node_info(n) for n in instances]
+
+    nodes[0].slots = [(0, 16383)]
+    nodes[1].slots = []
+
+    logging.debug("Push initial config")
+    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])
+
+    logging.debug("create data")
+    seeder = df_seeder_factory.create(
+        keys=30000, val_size=10000, port=nodes[0].instance.port, cluster_mode=True
+    )
+    await seeder.run(target_deviation=0.05)
+    capture = await seeder.capture()
+
+    logging.debug("SAVE")
+    await nodes[0].client.execute_command("SAVE", "rdb")
+
+    logging.debug("flushall")
+    for node in nodes:
+        await node.client.execute_command("flushall")
+        await node.client.execute_command("CONFIG SET maxmemory 1G")
+
+    nodes[0].slots = [(0, 8191)]
+    nodes[1].slots = [(8192, 16383)]
+
+    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])
+
+    for node in nodes:
+        await node.client.execute_command("DFLY", "LOAD", f"{dbfilename}.rdb")
+
+    assert await seeder.compare(capture, nodes[0].instance.port)
+
+    # prevent saving during shutdown
+    for node in nodes:
+        await node.client.execute_command("flushall")