chore: allow setting huffman tables via DEBUG COMPRESSION SET (#5083)

Also, revert the impact of performance bomb from #2564

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2025-05-08 22:10:59 +03:00 committed by GitHub
parent 9a05343b5f
commit b8ef7cdf69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 80 additions and 24 deletions

View file

@ -0,0 +1,22 @@
{
"name": "ubuntu24",
"image": "ghcr.io/romange/ubuntu-dev:24",
"customizations": {
"vscode": {
"extensions": [
"ms-vscode.cpptools",
"ms-vscode.cmake-tools",
"ms-vscode.cpptools-themes",
"twxs.cmake"
],
"settings": {
"cmake.buildDirectory": "/build",
"extensions.ignoreRecommendations": true
}
}
},
"mounts": [
"source=ubuntu24-vol,target=/build,type=volume"
],
"postCreateCommand": ".devcontainer/ubuntu24/post-create.sh ${containerWorkspaceFolder}"
}

View file

@ -0,0 +1 @@
../ubuntu22/post-create.sh

View file

@ -57,7 +57,7 @@ if (SUPPORT_USAN AND WITH_USAN)
endif()
if(SANITIZERS)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -rtlib=compiler-rt")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
endif()
include(third_party)

View file

@ -759,7 +759,7 @@ bool CompactObj::InitHuffmanThreadLocal(std::string_view hufftable) {
return false;
}
if (tl.huff_decoder.Load(hufftable, &err_msg)) {
if (!tl.huff_decoder.Load(hufftable, &err_msg)) {
LOG(DFATAL) << "Failed to load huffman table: " << err_msg;
return false;
}
@ -1105,18 +1105,20 @@ void CompactObj::GetString(char* dest) const {
CHECK(!IsExternal());
if (IsInline()) {
if (mask_bits_.encoding) {
size_t decoded_len = taglen_ + 2;
// must be this because we either shortened 17 or 18.
DCHECK_EQ(mask_bits_.encoding, ASCII2_ENC);
DCHECK_EQ(decoded_len, ascii_len(taglen_));
detail::ascii_unpack(to_byte(u_.inline_str), decoded_len, dest);
} else {
memcpy(dest, u_.inline_str, taglen_);
switch (mask_bits_.encoding) {
case ASCII2_ENC:
DCHECK_EQ(taglen_ + 2u, ascii_len(taglen_));
detail::ascii_unpack(to_byte(u_.inline_str), taglen_ + 2, dest);
break;
case HUFFMAN_ENC:
tl.huff_decoder.Decode(u_.inline_str, taglen_, dest);
break;
case NONE_ENC:
memcpy(dest, u_.inline_str, taglen_);
break;
default:
DLOG(FATAL) << "should not reach " << int(mask_bits_.encoding);
}
return;
}
@ -1135,15 +1137,17 @@ void CompactObj::GetString(char* dest) const {
} else if (taglen_ == SMALL_TAG) {
size_t decoded_len = DecodedLen(u_.small_str.size());
// we left some space on the left to allow inplace ascii unpacking.
size_t space_left = decoded_len - u_.small_str.size();
string_view slices[2];
unsigned num = u_.small_str.GetV(slices);
DCHECK_EQ(2u, num);
std::string tmp(decoded_len, ' ');
char* next = tmp.data();
char* next = dest + space_left;
memcpy(next, slices[0].data(), slices[0].size());
next += slices[0].size();
memcpy(next, slices[1].data(), slices[1].size());
detail::ascii_unpack_simd(reinterpret_cast<uint8_t*>(tmp.data()), decoded_len, dest);
detail::ascii_unpack_simd(reinterpret_cast<uint8_t*>(dest + space_left), decoded_len, dest);
} else {
LOG(FATAL) << "Unsupported tag " << int(taglen_);
}

View file

@ -622,6 +622,12 @@ TEST_F(CompactObjectTest, RawInterface) {
}
}
TEST_F(CompactObjectTest, AsanTriggerReadOverflow) {
cobj_.SetString(string(32, 'a'));
auto dest = make_unique<char[]>(32);
cobj_.GetString(dest.get());
}
TEST_F(CompactObjectTest, lpGetInteger) {
int64_t val = -1;
uint8_t* lp = lpNew(0);

View file

@ -269,6 +269,11 @@ void ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii) {
}
}
// In some cases we may read 2 bytes more than "allowed" (16 bytes during mm_loadu_si128
// instead of 14 bytes that we actually need). Since we ignore these two bytes, there is no harm.
// See CompactObjectTest.AsanTriggerReadOverflow for more details.
// __attribute__((no_sanitize_address)) does not work here for some reason, so I had
// to put it in sse_port.h.
void ascii_unpack_simd(const uint8_t* bin, size_t ascii_len, char* ascii) {
#ifdef __SSSE3__

View file

@ -16,8 +16,13 @@
namespace dfly {
#if defined(__clang__)
__attribute__((no_sanitize_address))
#endif
#ifndef __s390x__
inline __m128i mm_loadu_si128(const __m128i* ptr) {
inline __m128i
mm_loadu_si128(const __m128i* ptr) {
#if defined(__aarch64__)
__m128i res;
memcpy(&res, ptr, sizeof(res));

View file

@ -52,8 +52,6 @@ namespace dfly {
using namespace util;
using boost::intrusive_ptr;
using namespace facade;
namespace fs = std::filesystem;
using absl::GetFlag;
using absl::StrAppend;
using absl::StrCat;
@ -297,9 +295,13 @@ void DoComputeHist(CompactObjType type, EngineShard* shard, ConnectionContext* c
cursor = table.Traverse(cursor, [&](PrimeIterator it) {
scratch.clear();
if (type == kInvalidCompactObjType) { // KEYSPACE
it->first.GetString(&scratch);
if (it->first.MallocUsed() > 0) {
it->first.GetString(&scratch);
}
} else if (type == OBJ_STRING && it->second.ObjType() == OBJ_STRING) {
it->second.GetString(&scratch);
if (it->first.MallocUsed() > 0) {
it->second.GetString(&scratch);
}
} else if (type == OBJ_ZSET && it->second.ObjType() == OBJ_ZSET) {
container_utils::IterateSortedSet(
it->second.GetRobjWrapper(), [&](container_utils::ContainerEntry entry, double) {
@ -328,9 +330,10 @@ void DoComputeHist(CompactObjType type, EngineShard* shard, ConnectionContext* c
});
}
size_t len = std::min(scratch.size(), kMaxLen);
if (len > 16) // filtering out values that are too small and hosted inline.
if (scratch.size() > 0) {
size_t len = std::min(scratch.size(), kMaxLen);
HIST_add(dest->hist.data(), scratch.data(), len);
}
});
if (steps >= 20000) {
@ -598,7 +601,7 @@ void DebugCmd::Run(CmdArgList args, facade::SinkReplyBuilder* builder) {
" traffic logging is stopped.",
"RECVSIZE [<tid> | ENABLE | DISABLE]",
" Prints the histogram of the received request sizes on the given thread",
"COMPRESSION [IMPORT <bintable> | EXPORT] [type]",
"COMPRESSION [IMPORT <bintable> | EXPORT | SET <bintable>] [type]",
" Estimate the compressibility of values of the given type. if no type is given, ",
" checks compressibility of keys. If IN is specified, then the provided ",
" bintable is used to check compressibility. If OUT is specified, then ",
@ -1290,6 +1293,16 @@ void DebugCmd::Compression(CmdArgList args, facade::SinkReplyBuilder* builder) {
string bintable;
bool print_bintable = false;
if (parser.Check("SET", &bintable)) {
atomic_bool succeed = true;
shard_set->RunBriefInParallel([&](EngineShard* shard) {
if (!CompactObj::InitHuffmanThreadLocal(bintable)) {
succeed = false;
}
});
return succeed ? builder->SendOk() : builder->SendError("Failed to set bintable");
}
if (parser.Check("EXPORT")) {
print_bintable = true;
} else {