From 0832d23f13c8a49582f266e2851938cc6f12f96d Mon Sep 17 00:00:00 2001 From: Borys Date: Thu, 2 Jan 2025 13:50:21 +0200 Subject: [PATCH] fix: allow cluster node load snapshot bigger than maxmemory (#4394) --- src/server/rdb_load.cc | 11 ++++-- tests/dragonfly/cluster_test.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/server/rdb_load.cc b/src/server/rdb_load.cc index d227cd823..383b218da 100644 --- a/src/server/rdb_load.cc +++ b/src/server/rdb_load.cc @@ -2464,9 +2464,14 @@ error_code RdbLoader::HandleAux() { if (absl::SimpleAtoi(auxval, &usedmem)) { VLOG(1) << "RDB memory usage when created " << strings::HumanReadableNumBytes(usedmem); if (usedmem > ssize_t(max_memory_limit)) { - LOG(WARNING) << "Could not load snapshot - its used memory is " << usedmem - << " but the limit is " << max_memory_limit; - return RdbError(errc::out_of_memory); + if (cluster::IsClusterEnabled()) { + LOG(INFO) << "Attempting to load a snapshot of size " << usedmem + << ", despite memory limit of " << max_memory_limit; + } else { + LOG(WARNING) << "Could not load snapshot - its used memory is " << usedmem + << " but the limit is " << max_memory_limit; + return RdbError(errc::out_of_memory); + } } } } else if (auxkey == "aof-preamble") { diff --git a/tests/dragonfly/cluster_test.py b/tests/dragonfly/cluster_test.py index b90d48665..5d9f9e6a9 100644 --- a/tests/dragonfly/cluster_test.py +++ b/tests/dragonfly/cluster_test.py @@ -2499,3 +2499,62 @@ async def test_migration_timeout_on_sync(df_factory: DflyInstanceFactory, df_see await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes]) assert (await StaticSeeder.capture(nodes[1].client)) == start_capture + + +@pytest.mark.slow +@dfly_args({"proactor_threads": 4, "cluster_mode": "yes"}) +async def test_snapshot_bigger_than_maxmemory(df_factory: DflyInstanceFactory, df_seeder_factory): + """ + Test load snapshot that is bigger than max_memory, but contains more slots and should be load without OOM: + + 1) Create snapshot + 2) split slots between 2 instances and reduce maxmemory + 3) load snapshot to both instances + + The result should be the same: instances contain all the data that was in snapshot + """ + dbfilename = f"dump_{tmp_file_name()}" + instances = [ + df_factory.create( + port=next(next_port), admin_port=next(next_port), maxmemory="3G", dbfilename=dbfilename + ), + df_factory.create(port=next(next_port), admin_port=next(next_port), maxmemory="1G"), + ] + df_factory.start_all(instances) + + nodes = [await create_node_info(n) for n in instances] + + nodes[0].slots = [(0, 16383)] + nodes[1].slots = [] + + logging.debug("Push initial config") + await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes]) + + logging.debug("create data") + seeder = df_seeder_factory.create( + keys=30000, val_size=10000, port=nodes[0].instance.port, cluster_mode=True + ) + await seeder.run(target_deviation=0.05) + capture = await seeder.capture() + + logging.debug("SAVE") + await nodes[0].client.execute_command("SAVE", "rdb") + + logging.debug("flushall") + for node in nodes: + await node.client.execute_command("flushall") + await node.client.execute_command("CONFIG SET maxmemory 1G") + + nodes[0].slots = [(0, 8191)] + nodes[1].slots = [(8192, 16383)] + + await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes]) + + for node in nodes: + await node.client.execute_command("DFLY", "LOAD", f"{dbfilename}.rdb") + + assert await seeder.compare(capture, nodes[0].instance.port) + + # prevent saving during shutdown + for node in nodes: + await node.client.execute_command("flushall")