fix: ExternalAllocator::Free with large sizes (#4388)

ExternalAllocator allocates large sizes directly from the extent tree bypassing segment data structure.
Unfortunately, we forgot to align Free() the same way. This PR:

1. Make sure that we add back the allocated range to the extent tree in Free.
2. Rewrite and simplify ExtentTree::Add implementation.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-12-31 18:41:14 +02:00 committed by GitHub
parent 5e7acbb396
commit 2abe2c0ac2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 41 additions and 35 deletions

View file

@ -15,47 +15,39 @@ void ExtentTree::Add(size_t start, size_t len) {
DCHECK_GT(len, 0u);
DCHECK_EQ(len_extents_.size(), extents_.size());
size_t end = start + len;
if (extents_.empty()) {
extents_.emplace(start, end);
len_extents_.emplace(len, start);
return;
}
auto it = extents_.lower_bound(start);
bool merged = false;
optional<absl::btree_map<size_t, size_t>::iterator> prev_extent;
if (it != extents_.begin()) {
auto prev = it;
--prev;
DCHECK_LE(prev->second, start);
if (prev->second == start) { // [first, second = start, end)
merged = true;
len_extents_.erase(pair{prev->second - prev->first, prev->first});
// check if we join: prev, [start, end), [it->first, it->second]
if (it != extents_.end() && end == it->first) { // [first, end = it->first, it->second)
prev->second = it->second;
len_extents_.erase(pair{it->second - it->first, it->first});
extents_.erase(it);
} else {
prev->second = end; // just extend prev
}
len_extents_.emplace(prev->second - prev->first, prev->first);
if (prev->second == start) { // combine with the previous extent
size_t prev_len = prev->second - prev->first;
CHECK_EQ(1u, len_extents_.erase(pair{prev_len, prev->first}));
prev->second += len;
start = prev->first;
len += prev_len;
prev_extent = prev;
}
}
if (!merged) {
if (end == it->first) { // [start, end), [it->first, it->second]
len_extents_.erase(pair{it->second - it->first, it->first});
end = it->second;
if (it != extents_.end()) {
DCHECK_GE(it->first, start + len);
if (start + len == it->first) { // merge with the next extent
size_t it_len = it->second - it->first;
CHECK_EQ(1u, len_extents_.erase(pair{it_len, it->first}));
extents_.erase(it);
len += it_len;
}
extents_.emplace(start, end);
len_extents_.emplace(end - start, start);
}
len_extents_.emplace(len, start);
if (prev_extent) {
(*prev_extent)->second = start + len;
} else {
extents_.emplace(start, start + len);
}
}

View file

@ -28,28 +28,28 @@ TEST_F(ExtentTreeTest, Basic) {
tree_.Add(0, 256);
auto op = tree_.GetRange(64, 16);
EXPECT_TRUE(op);
EXPECT_THAT(*op, testing::Pair(0, 64));
EXPECT_THAT(*op, testing::Pair(0, 64)); // [64, 256)
tree_.Add(56, 8);
op = tree_.GetRange(64, 16);
EXPECT_TRUE(op);
EXPECT_THAT(*op, testing::Pair(64, 128));
EXPECT_THAT(*op, testing::Pair(64, 128)); // {[56, 64), [128, 256)}
op = tree_.GetRange(18, 2);
EXPECT_TRUE(op);
EXPECT_THAT(*op, testing::Pair(128, 146));
EXPECT_THAT(*op, testing::Pair(128, 146)); // {[56, 64), [146, 256)}
op = tree_.GetRange(80, 16);
EXPECT_TRUE(op);
EXPECT_THAT(*op, testing::Pair(160, 240));
EXPECT_THAT(*op, testing::Pair(160, 240)); // {[56, 64), [146, 160), [240, 256)}
op = tree_.GetRange(4, 1);
EXPECT_TRUE(op);
EXPECT_THAT(*op, testing::Pair(56, 60));
EXPECT_THAT(*op, testing::Pair(56, 60)); // {[60, 64), [146, 160), [240, 256)}
op = tree_.GetRange(32, 1);
EXPECT_FALSE(op);
tree_.Add(64, 240 - 64);
tree_.Add(64, 146 - 64);
op = tree_.GetRange(32, 4);
EXPECT_TRUE(op);
EXPECT_THAT(*op, testing::Pair(60, 92));

View file

@ -367,6 +367,12 @@ int64_t ExternalAllocator::Malloc(size_t sz) {
}
void ExternalAllocator::Free(size_t offset, size_t sz) {
if (sz > kMediumObjMaxSize) {
size_t align_sz = alignup(sz, 4_KB);
extent_tree_.Add(offset, align_sz);
return;
}
size_t idx = offset / 256_MB;
size_t delta = offset % 256_MB;
CHECK_LT(idx, segments_.size());

View file

@ -146,4 +146,12 @@ TEST_F(ExternalAllocatorTest, EmptyFull) {
EXPECT_GT(ext_alloc_.Malloc(kAllocSize), 0u);
}
TEST_F(ExternalAllocatorTest, AllocLarge) {
ext_alloc_.AddStorage(0, kSegSize);
off_t offs = ext_alloc_.Malloc(2_MB - 1);
EXPECT_EQ(offs, 0);
ext_alloc_.Free(offs, 2_MB - 1);
}
} // namespace dfly::tiering