feat(server): Memory tracker (#2501)

* feat(server): Memory tracker

* PR comments
This commit is contained in:
Shahar Mike 2024-01-30 12:44:07 +02:00 committed by GitHub
parent ad90602bc2
commit b2bdb0f683
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 388 additions and 37 deletions

View file

@ -4,7 +4,7 @@ set(SEARCH_LIB query_parser)
add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc
external_alloc.cc interpreter.cc json_object.cc mi_memory_resource.cc sds_utils.cc
segment_allocator.cc score_map.cc small_string.cc sorted_map.cc
tx_queue.cc dense_set.cc task_queue.cc
tx_queue.cc dense_set.cc allocation_tracker.cc task_queue.cc
string_set.cc string_map.cc detail/bitpacking.cc)
cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua lua_modules

View file

@ -0,0 +1,79 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/allocation_tracker.h"
#include "absl/random/random.h"
#include "base/logging.h"
#include "util/fibers/stacktrace.h"
namespace dfly {
namespace {
thread_local AllocationTracker g_tracker;
thread_local absl::InsecureBitGen g_bitgen;
} // namespace
AllocationTracker& AllocationTracker::Get() {
return g_tracker;
}
bool AllocationTracker::Add(const TrackingInfo& info) {
if (tracking_.size() >= tracking_.max_size()) {
return false;
}
tracking_.push_back(info);
return true;
}
bool AllocationTracker::Remove(size_t lower_bound, size_t upper_bound) {
size_t before_size = tracking_.size();
tracking_.erase(std::remove_if(tracking_.begin(), tracking_.end(),
[&](const TrackingInfo& info) {
return info.lower_bound == lower_bound &&
info.upper_bound == upper_bound;
}),
tracking_.end());
return before_size == tracking_.size();
}
void AllocationTracker::Clear() {
tracking_.clear();
}
absl::Span<const AllocationTracker::TrackingInfo> AllocationTracker::GetRanges() const {
return absl::MakeConstSpan(tracking_);
}
void AllocationTracker::ProcessNew(void* ptr, size_t size) {
if (tracking_.empty()) {
return;
}
thread_local bool inside_process_new = false;
if (inside_process_new) {
return;
}
// Prevent endless recursion, in case logging allocates memory
inside_process_new = true;
double random = absl::Uniform(g_bitgen, 0.0, 1.0);
for (const auto& band : tracking_) {
if (random >= band.sample_odds || size > band.upper_bound || size < band.lower_bound) {
continue;
}
LOG(INFO) << "Allocating " << size << " bytes (" << ptr
<< "). Stack: " << util::fb2::GetStacktrace();
}
inside_process_new = false;
}
void AllocationTracker::ProcessDelete(void* ptr) {
// We currently do not handle delete.
}
} // namespace dfly

View file

@ -0,0 +1,154 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <absl/container/inlined_vector.h>
#include <mimalloc.h>
#include <cstddef>
namespace dfly {
// Allows "tracking" of memory allocations by size bands. Tracking is naive in that it only prints
// the stack trace of the memory allocation, if matched by size & sampling criteria.
// Supports up to 4 different bands in parallel.
//
// Thread-local. Must be configured in all relevant threads separately.
//
// #define INJECT_ALLOCATION_TRACKER before #include exactly once to override new/delete
class AllocationTracker {
public:
struct TrackingInfo {
size_t lower_bound = 0;
size_t upper_bound = 0;
double sample_odds = 0.0;
};
// Returns a thread-local reference.
static AllocationTracker& Get();
// Will track memory allocations in range [lower, upper]. Sample odds must be between [0, 1],
// where 1 means all allocations are tracked and 0 means none.
bool Add(const TrackingInfo& info);
// Removes all tracking exactly matching lower_bound and upper_bound.
bool Remove(size_t lower_bound, size_t upper_bound);
// Clears *all* tracking.
void Clear();
absl::Span<const TrackingInfo> GetRanges() const;
void ProcessNew(void* ptr, size_t size);
void ProcessDelete(void* ptr);
private:
absl::InlinedVector<TrackingInfo, 4> tracking_;
};
} // namespace dfly
#ifdef INJECT_ALLOCATION_TRACKER
// Code here is copied from mimalloc-new-delete, and modified to add tracking
void operator delete(void* p) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free(p);
};
void operator delete[](void* p) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free(p);
};
void operator delete(void* p, const std::nothrow_t&) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free(p);
}
void operator delete[](void* p, const std::nothrow_t&) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free(p);
}
void* operator new(std::size_t n) noexcept(false) {
auto v = mi_new(n);
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
void* operator new[](std::size_t n) noexcept(false) {
auto v = mi_new(n);
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
void* operator new(std::size_t n, const std::nothrow_t& tag) noexcept {
(void)(tag);
auto v = mi_new_nothrow(n);
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept {
(void)(tag);
auto v = mi_new_nothrow(n);
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
void operator delete(void* p, std::size_t n) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_size(p, n);
};
void operator delete[](void* p, std::size_t n) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_size(p, n);
};
#endif
#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
void operator delete(void* p, std::align_val_t al) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_aligned(p, static_cast<size_t>(al));
}
void operator delete[](void* p, std::align_val_t al) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_aligned(p, static_cast<size_t>(al));
}
void operator delete(void* p, std::size_t n, std::align_val_t al) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_size_aligned(p, n, static_cast<size_t>(al));
};
void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_size_aligned(p, n, static_cast<size_t>(al));
};
void operator delete(void* p, std::align_val_t al, const std::nothrow_t&) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_aligned(p, static_cast<size_t>(al));
}
void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept {
dfly::AllocationTracker::Get().ProcessDelete(p);
mi_free_aligned(p, static_cast<size_t>(al));
}
void* operator new(std::size_t n, std::align_val_t al) noexcept(false) {
auto v = mi_new_aligned(n, static_cast<size_t>(al));
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) {
auto v = mi_new_aligned(n, static_cast<size_t>(al));
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
void* operator new(std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept {
auto v = mi_new_aligned_nothrow(n, static_cast<size_t>(al));
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept {
auto v = mi_new_aligned_nothrow(n, static_cast<size_t>(al));
dfly::AllocationTracker::Get().ProcessNew(v, n);
return v;
}
#endif
#endif // INJECT_ALLOCATION_TRACKER

View file

@ -1,6 +1,11 @@
add_executable(dragonfly dfly_main.cc version_monitor.cc)
cxx_link(dragonfly base dragonfly_lib)
option(DF_ENABLE_MEMORY_TRACKING "Adds memory tracking debugging via MEMORY TRACK command" ON)
if (DF_ENABLE_MEMORY_TRACKING)
target_compile_definitions(dragonfly PRIVATE DFLY_ENABLE_MEMORY_TRACKING)
endif()
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_BUILD_TYPE STREQUAL "Release")
# Add core2 only to this file, thus avoiding instructions in this object file that
# can cause SIGILL.
@ -47,6 +52,10 @@ add_library(dragonfly_lib engine_shard_set.cc channel_store.cc
acl/user.cc acl/user_registry.cc acl/acl_family.cc
acl/validator.cc acl/helpers.cc)
if (DF_ENABLE_MEMORY_TRACKING)
target_compile_definitions(dragonfly_lib PRIVATE DFLY_ENABLE_MEMORY_TRACKING)
endif()
cxx_link(dfly_transaction dfly_core strings_lib TRDP::fast_float)
option(PRINT_STACKTRACES_ON_SIGNAL "Enables DF to print all fiber stacktraces on SIGUSR1" OFF)

View file

@ -3,13 +3,6 @@
// See LICENSE for licensing terms.
//
#include "absl/cleanup/cleanup.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/numbers.h"
#ifdef NDEBUG
#include <mimalloc-new-delete.h>
#endif
#include <absl/flags/parse.h>
#include <absl/flags/usage.h>
#include <absl/flags/usage_config.h>
@ -18,6 +11,17 @@
#include <absl/strings/str_split.h>
#include <absl/strings/strip.h>
#include "absl/cleanup/cleanup.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/numbers.h"
#ifdef DFLY_ENABLE_MEMORY_TRACKING
#define INJECT_ALLOCATION_TRACKER
#include "core/allocation_tracker.h"
#else
#include <mimalloc-new-delete.h>
#endif
#ifdef __linux__
#include <liburing.h>
#endif

View file

@ -8,6 +8,9 @@
#include <mimalloc.h>
#include "base/io_buf.h"
#include "base/logging.h"
#include "core/allocation_tracker.h"
#include "facade/cmd_arg_parser.h"
#include "facade/dragonfly_connection.h"
#include "facade/error.h"
#include "server/engine_shard_set.h"
@ -41,7 +44,7 @@ bool MiArenaVisit(const mi_heap_t* heap, const mi_heap_area_t* area, void* block
return true;
};
std::string MallocStats(bool backing, unsigned tid) {
std::string MallocStatsCb(bool backing, unsigned tid) {
string str;
uint64_t start = absl::GetCurrentTimeNanos();
@ -121,35 +124,12 @@ void MemoryCmd::Run(CmdArgList args) {
}
if (sub_cmd == "MALLOC-STATS") {
uint32_t tid = 0;
bool backing = false;
if (args.size() >= 2) {
ToUpper(&args[1]);
return MallocStats(args);
}
unsigned tid_indx = 1;
if (ArgS(args, tid_indx) == "BACKING") {
++tid_indx;
backing = true;
}
if (args.size() > tid_indx && !absl::SimpleAtoi(ArgS(args, tid_indx), &tid)) {
return cntx_->SendError(kInvalidIntErr);
}
}
if (backing && tid >= shard_set->pool()->size()) {
return cntx_->SendError(
absl::StrCat("Thread id must be less than ", shard_set->pool()->size()));
}
if (!backing && tid >= shard_set->size()) {
return cntx_->SendError(absl::StrCat("Thread id must be less than ", shard_set->size()));
}
string res = shard_set->pool()->at(tid)->AwaitBrief([=] { return MallocStats(backing, tid); });
auto* rb = static_cast<RedisReplyBuilder*>(cntx_->reply_builder());
return rb->SendVerbatimString(res);
if (sub_cmd == "TRACK") {
args.remove_prefix(1);
return Track(args);
}
string err = UnknownSubCmd(sub_cmd, "MEMORY");
@ -270,6 +250,38 @@ void MemoryCmd::Stats() {
}
}
void MemoryCmd::MallocStats(CmdArgList args) {
uint32_t tid = 0;
bool backing = false;
if (args.size() >= 2) {
ToUpper(&args[1]);
unsigned tid_indx = 1;
if (ArgS(args, tid_indx) == "BACKING") {
++tid_indx;
backing = true;
}
if (args.size() > tid_indx && !absl::SimpleAtoi(ArgS(args, tid_indx), &tid)) {
return cntx_->SendError(kInvalidIntErr);
}
}
if (backing && tid >= shard_set->pool()->size()) {
return cntx_->SendError(
absl::StrCat("Thread id must be less than ", shard_set->pool()->size()));
}
if (!backing && tid >= shard_set->size()) {
return cntx_->SendError(absl::StrCat("Thread id must be less than ", shard_set->size()));
}
string res = shard_set->pool()->at(tid)->AwaitBrief([=] { return MallocStatsCb(backing, tid); });
auto* rb = static_cast<RedisReplyBuilder*>(cntx_->reply_builder());
return rb->SendVerbatimString(res);
}
void MemoryCmd::Usage(std::string_view key) {
ShardId sid = Shard(key, shard_set->size());
ssize_t memory_usage = shard_set->pool()->at(sid)->AwaitBrief([key, this]() -> ssize_t {
@ -289,4 +301,95 @@ void MemoryCmd::Usage(std::string_view key) {
rb->SendLong(memory_usage);
}
// Allow tracking of memory allocation via `new` and `delete` based on input criteria.
//
// MEMORY TRACK ADD <lower-bound> <upper-bound> <sample-odds>
// - Sets up tracking memory allocations in the (inclusive) range [lower, upper]
// - sample-odds indicates how many of the allocations will be logged, there 0 means none, 1 means
// all, and everything in between is linear
// - There could be at most 4 tracking placed in parallel
//
// MEMORY TRACK REMOVE <lower-bound> <upper-bound>
// - Removes all memory tracking added which match bounds
// - Could remove 0, 1 or more
//
// MEMORY TRACK CLEAR
// - Removes all memory tracking
//
// MEMORY TRACK GET
// - Returns an array with all active tracking
//
// This command is not documented in `MEMORY HELP` because it's meant to be used internally.
void MemoryCmd::Track(CmdArgList args) {
#ifndef DFLY_ENABLE_MEMORY_TRACKING
return cntx_->SendError("MEMORY TRACK must be enabled at build time.");
#endif
CmdArgParser parser(args);
string_view sub_cmd = parser.ToUpper().Next();
if (parser.HasError()) {
return cntx_->SendError(parser.Error()->MakeReply());
}
if (sub_cmd == "ADD") {
auto [lower_bound, upper_bound, odds] = parser.Next<size_t, size_t, double>();
if (parser.HasError()) {
return cntx_->SendError(parser.Error()->MakeReply());
}
atomic_bool error;
shard_set->pool()->Await([&](unsigned index, auto*) {
if (!AllocationTracker::Get().Add(
{.lower_bound = lower_bound, .upper_bound = upper_bound, .sample_odds = odds})) {
error.store(true);
}
});
if (error.load()) {
return cntx_->SendError("Unable to add tracker");
} else {
return cntx_->SendOk();
}
}
if (sub_cmd == "REMOVE") {
auto [lower_bound, upper_bound] = parser.Next<size_t, size_t>();
if (parser.HasError()) {
return cntx_->SendError(parser.Error()->MakeReply());
}
atomic_bool error;
shard_set->pool()->Await([&](unsigned index, auto*) {
if (!AllocationTracker::Get().Remove(lower_bound, upper_bound)) {
error.store(true);
}
});
if (error.load()) {
return cntx_->SendError("Unable to remove tracker");
} else {
return cntx_->SendOk();
}
}
if (sub_cmd == "CLEAR") {
shard_set->pool()->Await([&](unsigned index, auto*) { AllocationTracker::Get().Clear(); });
return cntx_->SendOk();
}
if (sub_cmd == "GET") {
auto ranges = AllocationTracker::Get().GetRanges();
auto* rb = static_cast<facade::RedisReplyBuilder*>(cntx_->reply_builder());
rb->StartArray(ranges.size());
for (const auto& range : ranges) {
rb->SendSimpleString(
absl::StrCat(range.lower_bound, ",", range.upper_bound, ",", range.sample_odds));
}
return;
}
return cntx_->SendError(kSyntaxErrType);
}
} // namespace dfly

View file

@ -18,7 +18,9 @@ class MemoryCmd {
private:
void Stats();
void MallocStats(CmdArgList args);
void Usage(std::string_view key);
void Track(CmdArgList args);
ConnectionContext* cntx_;
ServerFamily* owner_;