fix(cluster): Don't miss updates in FLUSHSLOTS (#2783)

* fix(flushslots): Don't miss updates in `FLUSHSLOTS`

This PR registers for PreUpdate() from inside the `FLUSHSLOTS` fiber so
that any attempt to update a to-be-deleted key will work as expected
(first delete, then apply the change).

This fixes several issues:

* Any attempt to touch bucket B (like insert a key), where another key
  in B should be removed, caused us to _not_ remove the latter key
* Commands which use an existing value but not completely override then,
  like `APPEND` and `LPUSH` did not treat the key as removed but instead
  used the original value

Fixes #2771

* fix flushslots syntax in test

* EXPECT_EQ(key:0, xxxx)

* dbsize
This commit is contained in:
Shahar Mike 2024-03-31 15:47:38 +03:00 committed by GitHub
parent 7d093460f0
commit 1d04683c48
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 116 additions and 157 deletions

View file

@ -36,6 +36,28 @@ class ClusterFamilyTest : public BaseFamilyTest {
string GetMyId() { string GetMyId() {
return RunPrivileged({"dflycluster", "myid"}).GetString(); return RunPrivileged({"dflycluster", "myid"}).GetString();
} }
void ConfigSingleNodeCluster(string id) {
string config_template = R"json(
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "$0",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json";
string config = absl::Substitute(config_template, id);
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
}
}; };
TEST_F(ClusterFamilyTest, ClusterConfigInvalidJSON) { TEST_F(ClusterFamilyTest, ClusterConfigInvalidJSON) {
@ -136,25 +158,7 @@ TEST_F(ClusterFamilyTest, ClusterConfigInvalidOverlappingSlots) {
} }
TEST_F(ClusterFamilyTest, ClusterConfigNoReplicas) { TEST_F(ClusterFamilyTest, ClusterConfigNoReplicas) {
EXPECT_EQ(RunPrivileged({"dflycluster", "config", R"json( ConfigSingleNodeCluster("abcd1234");
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "abcd1234",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json"}),
"OK");
string cluster_info = Run({"cluster", "info"}).GetString(); string cluster_info = Run({"cluster", "info"}).GetString();
EXPECT_THAT(cluster_info, HasSubstr("cluster_state:ok")); EXPECT_THAT(cluster_info, HasSubstr("cluster_state:ok"));
EXPECT_THAT(cluster_info, HasSubstr("cluster_slots_assigned:16384")); EXPECT_THAT(cluster_info, HasSubstr("cluster_slots_assigned:16384"));
@ -422,26 +426,7 @@ TEST_F(ClusterFamilyTest, ClusterGetSlotInfoInvalid) {
} }
TEST_F(ClusterFamilyTest, ClusterGetSlotInfo) { TEST_F(ClusterFamilyTest, ClusterGetSlotInfo) {
string config_template = R"json( ConfigSingleNodeCluster(GetMyId());
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "$0",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json";
string config = absl::Substitute(config_template, GetMyId());
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
constexpr string_view kKey = "some-key"; constexpr string_view kKey = "some-key";
const SlotId slot = ClusterConfig::KeySlot(kKey); const SlotId slot = ClusterConfig::KeySlot(kKey);
@ -480,26 +465,7 @@ TEST_F(ClusterFamilyTest, ClusterGetSlotInfo) {
} }
TEST_F(ClusterFamilyTest, ClusterSlotsPopulate) { TEST_F(ClusterFamilyTest, ClusterSlotsPopulate) {
string config_template = R"json( ConfigSingleNodeCluster(GetMyId());
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "$0",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json";
string config = absl::Substitute(config_template, GetMyId());
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
Run({"debug", "populate", "10000", "key", "4", "SLOTS", "0", "1000"}); Run({"debug", "populate", "10000", "key", "4", "SLOTS", "0", "1000"});
@ -515,26 +481,7 @@ TEST_F(ClusterFamilyTest, ClusterSlotsPopulate) {
} }
TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlots) { TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlots) {
string config_template = R"json( ConfigSingleNodeCluster(GetMyId());
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "$0",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json";
string config = absl::Substitute(config_template, GetMyId());
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
Run({"debug", "populate", "100000"}); Run({"debug", "populate", "100000"});
@ -546,8 +493,7 @@ TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlots) {
RespArray(ElementsAre(IntArg(2), "key_count", Not(IntArg(0)), "total_reads", IntArg(0), RespArray(ElementsAre(IntArg(2), "key_count", Not(IntArg(0)), "total_reads", IntArg(0),
"total_writes", Not(IntArg(0)), "memory_bytes", IntArg(0)))))); "total_writes", Not(IntArg(0)), "memory_bytes", IntArg(0))))));
config = absl::Substitute(config_template, "abc"); ConfigSingleNodeCluster("abc");
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
ExpectConditionWithinTimeout([&]() { return CheckedInt({"dbsize"}) == 0; }); ExpectConditionWithinTimeout([&]() { return CheckedInt({"dbsize"}) == 0; });
@ -562,26 +508,7 @@ TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlots) {
// Test issue #1302 // Test issue #1302
TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlotsNoCrashOnShutdown) { TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlotsNoCrashOnShutdown) {
string config_template = R"json( ConfigSingleNodeCluster(GetMyId());
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "$0",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json";
string config = absl::Substitute(config_template, GetMyId());
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
Run({"debug", "populate", "100000"}); Run({"debug", "populate", "100000"});
@ -593,10 +520,9 @@ TEST_F(ClusterFamilyTest, ClusterConfigDeleteSlotsNoCrashOnShutdown) {
RespArray(ElementsAre(IntArg(2), "key_count", Not(IntArg(0)), "total_reads", IntArg(0), RespArray(ElementsAre(IntArg(2), "key_count", Not(IntArg(0)), "total_reads", IntArg(0),
"total_writes", Not(IntArg(0)), "memory_bytes", IntArg(0)))))); "total_writes", Not(IntArg(0)), "memory_bytes", IntArg(0))))));
config = absl::Substitute(config_template, "abc");
// After running the new config we start a fiber that removes all slots from current instance // After running the new config we start a fiber that removes all slots from current instance
// we immediately shut down to test that we do not crash. // we immediately shut down to test that we do not crash.
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK"); ConfigSingleNodeCluster("abc");
} }
TEST_F(ClusterFamilyTest, ClusterConfigDeleteSomeSlots) { TEST_F(ClusterFamilyTest, ClusterConfigDeleteSomeSlots) {
@ -674,24 +600,7 @@ TEST_F(ClusterFamilyTest, ClusterFirstConfigCallDropsEntriesNotOwnedByNode) {
EXPECT_EQ(Run({"debug", "load", save_info.file_name}), "OK"); EXPECT_EQ(Run({"debug", "load", save_info.file_name}), "OK");
EXPECT_EQ(CheckedInt({"dbsize"}), 50000); EXPECT_EQ(CheckedInt({"dbsize"}), 50000);
EXPECT_EQ(RunPrivileged({"dflycluster", "config", R"json( ConfigSingleNodeCluster("abcd1234");
[
{
"slot_ranges": [
{
"start": 0,
"end": 16383
}
],
"master": {
"id": "abcd1234",
"ip": "10.0.0.1",
"port": 7000
},
"replicas": []
}
])json"}),
"OK");
// Make sure `dbsize` all slots were removed // Make sure `dbsize` all slots were removed
ExpectConditionWithinTimeout([&]() { return CheckedInt({"dbsize"}) == 0; }); ExpectConditionWithinTimeout([&]() { return CheckedInt({"dbsize"}) == 0; });
@ -739,27 +648,36 @@ TEST_F(ClusterFamilyTest, FlushSlots) {
_, "total_writes", _, "memory_bytes", _))))); _, "total_writes", _, "memory_bytes", _)))));
} }
TEST_F(ClusterFamilyTest, ClusterCrossSlot) { TEST_F(ClusterFamilyTest, FlushSlotsAndImmediatelySetValue) {
string config_template = R"json( for (int count : {1, 10, 100, 1000, 10000, 100000}) {
[ ConfigSingleNodeCluster(GetMyId());
{
"slot_ranges": [ EXPECT_EQ(Run({"debug", "populate", absl::StrCat(count), "key", "4"}), "OK");
{ EXPECT_EQ(Run({"get", "key:0"}), "xxxx");
"start": 0,
"end": 16383 EXPECT_THAT(Run({"cluster", "keyslot", "key:0"}), IntArg(2592));
} EXPECT_THAT(Run({"dbsize"}), IntArg(count));
], auto slot_size_response = Run({"dflycluster", "getslotinfo", "slots", "2592"});
"master": { EXPECT_THAT(slot_size_response, RespArray(ElementsAre(_, "key_count", _, "total_reads", _,
"id": "$0", "total_writes", _, "memory_bytes", _)));
"ip": "10.0.0.1", auto slot_size = slot_size_response.GetVec()[2].GetInt();
"port": 7000 EXPECT_TRUE(slot_size.has_value());
},
"replicas": [] EXPECT_EQ(Run({"dflycluster", "flushslots", "2592", "2592"}), "OK");
} // key:0 should have been removed, so APPEND will end up with key:0 == ZZZZ
])json"; EXPECT_THAT(Run({"append", "key:0", "ZZZZ"}), IntArg(4));
string config = absl::Substitute(config_template, GetMyId()); EXPECT_EQ(Run({"get", "key:0"}), "ZZZZ");
// db size should be count - (size of slot 2592) + 1, where 1 is for 'key:0'
ExpectConditionWithinTimeout(
[&]() { return CheckedInt({"dbsize"}) == (count - *slot_size + 1); });
ResetService();
}
}
TEST_F(ClusterFamilyTest, ClusterCrossSlot) {
ConfigSingleNodeCluster(GetMyId());
EXPECT_EQ(RunPrivileged({"dflycluster", "config", config}), "OK");
EXPECT_EQ(Run({"SET", "key", "value"}), "OK"); EXPECT_EQ(Run({"SET", "key", "value"}), "OK");
EXPECT_EQ(Run({"GET", "key"}), "value"); EXPECT_EQ(Run({"GET", "key"}), "value");

View file

@ -409,12 +409,17 @@ OpResult<DbSlice::ItAndUpdater> DbSlice::FindMutableInternal(const Context& cntx
} }
PreUpdate(cntx.db_index, res->it); PreUpdate(cntx.db_index, res->it);
return {{res->it, res->exp_it, // PreUpdate() might have caused a deletion of `it`
AutoUpdater({.action = AutoUpdater::DestructorAction::kRun, if (res->it.IsOccupied()) {
.db_slice = this, return {{res->it, res->exp_it,
.db_ind = cntx.db_index, AutoUpdater({.action = AutoUpdater::DestructorAction::kRun,
.it = res->it, .db_slice = this,
.key = key})}}; .db_ind = cntx.db_index,
.it = res->it,
.key = key})}};
} else {
return OpStatus::KEY_NOTFOUND;
}
} }
DbSlice::ItAndExpConst DbSlice::FindReadOnly(const Context& cntx, std::string_view key) { DbSlice::ItAndExpConst DbSlice::FindReadOnly(const Context& cntx, std::string_view key) {
@ -577,15 +582,20 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
if (res.ok()) { if (res.ok()) {
PreUpdate(cntx.db_index, res->it); PreUpdate(cntx.db_index, res->it);
return DbSlice::AddOrFindResult{ // PreUpdate() might have caused a deletion of `it`
.it = res->it, if (res->it.IsOccupied()) {
.exp_it = res->exp_it, return DbSlice::AddOrFindResult{
.is_new = false, .it = res->it,
.post_updater = AutoUpdater({.action = AutoUpdater::DestructorAction::kRun, .exp_it = res->exp_it,
.db_slice = this, .is_new = false,
.db_ind = cntx.db_index, .post_updater = AutoUpdater({.action = AutoUpdater::DestructorAction::kRun,
.it = res->it, .db_slice = this,
.key = key})}; .db_ind = cntx.db_index,
.it = res->it,
.key = key})};
} else {
res = OpStatus::KEY_NOTFOUND;
}
} }
auto status = res.status(); auto status = res.status();
CHECK(status == OpStatus::KEY_NOTFOUND || status == OpStatus::OUT_OF_MEMORY) << status; CHECK(status == OpStatus::KEY_NOTFOUND || status == OpStatus::OUT_OF_MEMORY) << status;
@ -706,7 +716,8 @@ void DbSlice::FlushSlotsFb(const SlotSet& slot_ids) {
// Slot deletion can take time as it traverses all the database, hence it runs in fiber. // Slot deletion can take time as it traverses all the database, hence it runs in fiber.
// We want to flush all the data of a slot that was added till the time the call to FlushSlotsFb // We want to flush all the data of a slot that was added till the time the call to FlushSlotsFb
// was made. Therefore we delete slots entries with version < next_version // was made. Therefore we delete slots entries with version < next_version
uint64_t next_version = NextVersion(); uint64_t next_version = 0;
std::string tmp; std::string tmp;
auto del_entry_cb = [&](PrimeTable::iterator it) { auto del_entry_cb = [&](PrimeTable::iterator it) {
std::string_view key = it->first.GetSlice(&tmp); std::string_view key = it->first.GetSlice(&tmp);
@ -717,6 +728,33 @@ void DbSlice::FlushSlotsFb(const SlotSet& slot_ids) {
return true; return true;
}; };
auto on_change = [&](DbIndex db_index, const ChangeReq& req) {
FiberAtomicGuard fg;
PrimeTable* table = GetTables(db_index).first;
auto iterate_bucket = [&](DbIndex db_index, PrimeTable::bucket_iterator it) {
while (!it.is_done()) {
del_entry_cb(it);
++it;
}
};
if (const PrimeTable::bucket_iterator* bit = req.update()) {
if (bit->GetVersion() < next_version) {
iterate_bucket(db_index, *bit);
}
} else {
string_view key = get<string_view>(req.change);
table->CVCUponInsert(
next_version, key,
[this, db_index, next_version, iterate_bucket](PrimeTable::bucket_iterator it) {
DCHECK_LT(it.GetVersion(), next_version);
iterate_bucket(db_index, it);
});
}
};
next_version = RegisterOnChange(std::move(on_change));
ServerState& etl = *ServerState::tlocal(); ServerState& etl = *ServerState::tlocal();
PrimeTable* pt = &db_arr_[0]->prime; PrimeTable* pt = &db_arr_[0]->prime;
PrimeTable::Cursor cursor; PrimeTable::Cursor cursor;
@ -730,6 +768,9 @@ void DbSlice::FlushSlotsFb(const SlotSet& slot_ids) {
} }
} while (cursor && etl.gstate() != GlobalState::SHUTTING_DOWN); } while (cursor && etl.gstate() != GlobalState::SHUTTING_DOWN);
UnregisterOnChange(next_version);
etl.DecommitMemory(ServerState::kDataHeap); etl.DecommitMemory(ServerState::kDataHeap);
} }