From 2abe2c0ac2d921f539e1dfe52b9c739818350bf6 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Tue, 31 Dec 2024 18:41:14 +0200 Subject: [PATCH] fix: ExternalAllocator::Free with large sizes (#4388) ExternalAllocator allocates large sizes directly from the extent tree bypassing segment data structure. Unfortunately, we forgot to align Free() the same way. This PR: 1. Make sure that we add back the allocated range to the extent tree in Free. 2. Rewrite and simplify ExtentTree::Add implementation. Signed-off-by: Roman Gershman --- src/core/extent_tree.cc | 50 ++++++++++------------- src/core/extent_tree_test.cc | 12 +++--- src/server/tiering/external_alloc.cc | 6 +++ src/server/tiering/external_alloc_test.cc | 8 ++++ 4 files changed, 41 insertions(+), 35 deletions(-) diff --git a/src/core/extent_tree.cc b/src/core/extent_tree.cc index 555dddf65..f05bb8475 100644 --- a/src/core/extent_tree.cc +++ b/src/core/extent_tree.cc @@ -15,47 +15,39 @@ void ExtentTree::Add(size_t start, size_t len) { DCHECK_GT(len, 0u); DCHECK_EQ(len_extents_.size(), extents_.size()); - size_t end = start + len; - - if (extents_.empty()) { - extents_.emplace(start, end); - len_extents_.emplace(len, start); - - return; - } - auto it = extents_.lower_bound(start); - bool merged = false; + optional::iterator> prev_extent; if (it != extents_.begin()) { auto prev = it; --prev; DCHECK_LE(prev->second, start); - if (prev->second == start) { // [first, second = start, end) - merged = true; - len_extents_.erase(pair{prev->second - prev->first, prev->first}); - - // check if we join: prev, [start, end), [it->first, it->second] - if (it != extents_.end() && end == it->first) { // [first, end = it->first, it->second) - prev->second = it->second; - len_extents_.erase(pair{it->second - it->first, it->first}); - extents_.erase(it); - } else { - prev->second = end; // just extend prev - } - len_extents_.emplace(prev->second - prev->first, prev->first); + if (prev->second == start) { // combine with the previous extent + size_t prev_len = prev->second - prev->first; + CHECK_EQ(1u, len_extents_.erase(pair{prev_len, prev->first})); + prev->second += len; + start = prev->first; + len += prev_len; + prev_extent = prev; } } - if (!merged) { - if (end == it->first) { // [start, end), [it->first, it->second] - len_extents_.erase(pair{it->second - it->first, it->first}); - end = it->second; + if (it != extents_.end()) { + DCHECK_GE(it->first, start + len); + if (start + len == it->first) { // merge with the next extent + size_t it_len = it->second - it->first; + CHECK_EQ(1u, len_extents_.erase(pair{it_len, it->first})); extents_.erase(it); + len += it_len; } - extents_.emplace(start, end); - len_extents_.emplace(end - start, start); + } + + len_extents_.emplace(len, start); + if (prev_extent) { + (*prev_extent)->second = start + len; + } else { + extents_.emplace(start, start + len); } } diff --git a/src/core/extent_tree_test.cc b/src/core/extent_tree_test.cc index 74f682a73..6a4ccc652 100644 --- a/src/core/extent_tree_test.cc +++ b/src/core/extent_tree_test.cc @@ -28,28 +28,28 @@ TEST_F(ExtentTreeTest, Basic) { tree_.Add(0, 256); auto op = tree_.GetRange(64, 16); EXPECT_TRUE(op); - EXPECT_THAT(*op, testing::Pair(0, 64)); + EXPECT_THAT(*op, testing::Pair(0, 64)); // [64, 256) tree_.Add(56, 8); op = tree_.GetRange(64, 16); EXPECT_TRUE(op); - EXPECT_THAT(*op, testing::Pair(64, 128)); + EXPECT_THAT(*op, testing::Pair(64, 128)); // {[56, 64), [128, 256)} op = tree_.GetRange(18, 2); EXPECT_TRUE(op); - EXPECT_THAT(*op, testing::Pair(128, 146)); + EXPECT_THAT(*op, testing::Pair(128, 146)); // {[56, 64), [146, 256)} op = tree_.GetRange(80, 16); EXPECT_TRUE(op); - EXPECT_THAT(*op, testing::Pair(160, 240)); + EXPECT_THAT(*op, testing::Pair(160, 240)); // {[56, 64), [146, 160), [240, 256)} op = tree_.GetRange(4, 1); EXPECT_TRUE(op); - EXPECT_THAT(*op, testing::Pair(56, 60)); + EXPECT_THAT(*op, testing::Pair(56, 60)); // {[60, 64), [146, 160), [240, 256)} op = tree_.GetRange(32, 1); EXPECT_FALSE(op); - tree_.Add(64, 240 - 64); + tree_.Add(64, 146 - 64); op = tree_.GetRange(32, 4); EXPECT_TRUE(op); EXPECT_THAT(*op, testing::Pair(60, 92)); diff --git a/src/server/tiering/external_alloc.cc b/src/server/tiering/external_alloc.cc index fd8b3c8e0..37f016c72 100644 --- a/src/server/tiering/external_alloc.cc +++ b/src/server/tiering/external_alloc.cc @@ -367,6 +367,12 @@ int64_t ExternalAllocator::Malloc(size_t sz) { } void ExternalAllocator::Free(size_t offset, size_t sz) { + if (sz > kMediumObjMaxSize) { + size_t align_sz = alignup(sz, 4_KB); + extent_tree_.Add(offset, align_sz); + return; + } + size_t idx = offset / 256_MB; size_t delta = offset % 256_MB; CHECK_LT(idx, segments_.size()); diff --git a/src/server/tiering/external_alloc_test.cc b/src/server/tiering/external_alloc_test.cc index dccd44140..76db86dc0 100644 --- a/src/server/tiering/external_alloc_test.cc +++ b/src/server/tiering/external_alloc_test.cc @@ -146,4 +146,12 @@ TEST_F(ExternalAllocatorTest, EmptyFull) { EXPECT_GT(ext_alloc_.Malloc(kAllocSize), 0u); } +TEST_F(ExternalAllocatorTest, AllocLarge) { + ext_alloc_.AddStorage(0, kSegSize); + + off_t offs = ext_alloc_.Malloc(2_MB - 1); + EXPECT_EQ(offs, 0); + ext_alloc_.Free(offs, 2_MB - 1); +} + } // namespace dfly::tiering