feat: support lz4 compression method (#4610)

The feature is enabled via set_method call but since no code calls it
the lz4 compression is still disabled in dragonfly.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2025-02-14 13:17:41 +02:00 committed by GitHub
parent ff7a0d58e6
commit eade07ab3f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 247 additions and 94 deletions

View file

@ -81,3 +81,4 @@ Checks: >
# modernize-use-nullptr, # modernize-use-nullptr,
# modernize-use-equals-default, # modernize-use-equals-default,
# readability-qualified-auto, # readability-qualified-auto,
cppcoreguidelines-narrowing-conversions.WarnOnIntegerNarrowingConversion: 'false'

View file

@ -10,7 +10,7 @@ add_library(dfly_core allocation_tracker.cc bloom.cc compact_object.cc dense_set
tx_queue.cc string_set.cc string_map.cc detail/bitpacking.cc) tx_queue.cc string_set.cc string_map.cc detail/bitpacking.cc)
cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua lua_modules cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua lua_modules
fibers2 ${SEARCH_LIB} jsonpath OpenSSL::Crypto TRDP::dconv) fibers2 ${SEARCH_LIB} jsonpath OpenSSL::Crypto TRDP::dconv TRDP::lz4)
add_executable(dash_bench dash_bench.cc) add_executable(dash_bench dash_bench.cc)
cxx_link(dash_bench dfly_core redis_test_lib) cxx_link(dash_bench dfly_core redis_test_lib)

View file

@ -14,6 +14,7 @@ extern "C" {
#include <absl/base/optimization.h> #include <absl/base/optimization.h>
#include <absl/strings/escaping.h> #include <absl/strings/escaping.h>
#include <absl/strings/str_cat.h> #include <absl/strings/str_cat.h>
#include <lz4frame.h>
#include "base/logging.h" #include "base/logging.h"
@ -33,12 +34,12 @@ using namespace std;
#define SIZE_ESTIMATE_OVERHEAD 8 #define SIZE_ESTIMATE_OVERHEAD 8
/* Minimum listpack size in bytes for attempting compression. */ /* Minimum listpack size in bytes for attempting compression. */
#define MIN_COMPRESS_BYTES 48 #define MIN_COMPRESS_BYTES 256
/* Minimum size reduction in bytes to store compressed quicklistNode data. /* Minimum size reduction in bytes to store compressed quicklistNode data.
* This also prevents us from storing compression if the compression * This also prevents us from storing compression if the compression
* resulted in a larger size than the original data. */ * resulted in a larger size than the original data. */
#define MIN_COMPRESS_IMPROVE 8 #define MIN_COMPRESS_IMPROVE 32
/* This macro is used to compress a node. /* This macro is used to compress a node.
* *
@ -49,19 +50,22 @@ using namespace std;
* *
* If the 'recompress' flag of the node is false, we check whether the node is * If the 'recompress' flag of the node is false, we check whether the node is
* within the range of compress depth before compressing it. */ * within the range of compress depth before compressing it. */
#define quicklistCompress(_node) \ #define quicklistCompress(_node) \
do { \ do { \
if ((_node)->recompress) \ if ((_node)->recompress) \
CompressNode((_node)); \ CompressNode((_node), this->compr_method_); \
else \ else \
this->Compress(_node); \ this->Compress(_node); \
} while (0) } while (0)
#define QLIST_NODE_ENCODING_LZ4 3
namespace dfly { namespace dfly {
namespace { namespace {
static_assert(sizeof(QList) == 32); static_assert(sizeof(QList) == 32);
static_assert(sizeof(QList::Node) == 40);
enum IterDir : uint8_t { FWD = 1, REV = 0 }; enum IterDir : uint8_t { FWD = 1, REV = 0 };
@ -181,10 +185,78 @@ inline ssize_t NodeSetEntry(QList::Node* node, uint8_t* entry) {
return diff; return diff;
} }
inline quicklistLZF* GetLzf(QList::Node* node) {
DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_LZF ||
node->encoding == QLIST_NODE_ENCODING_LZ4);
return (quicklistLZF*)node->entry;
}
bool CompressLZF(QList::Node* node) {
// We allocate LZF_STATE on heap, piggy-backing on the existing allocation.
char* uptr = (char*)zmalloc(sizeof(quicklistLZF) + node->sz + sizeof(LZF_STATE));
quicklistLZF* lzf = (quicklistLZF*)uptr;
LZF_HSLOT* sdata = (LZF_HSLOT*)(uptr + sizeof(quicklistLZF) + node->sz);
/* Cancel if compression fails or doesn't compress small enough */
if (((lzf->sz = lzf_compress(node->entry, node->sz, lzf->compressed, node->sz, sdata)) == 0) ||
lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {
/* lzf_compress aborts/rejects compression if value not compressible. */
DVLOG(2) << "Uncompressable " << node->sz << " vs " << lzf->sz;
zfree(lzf);
QList::stats.bad_compression_attempts++;
return false;
}
DVLOG(2) << "Compressed " << node->sz << " to " << lzf->sz;
QList::stats.compressed_bytes += lzf->sz;
QList::stats.raw_compressed_bytes += node->sz;
lzf = (quicklistLZF*)zrealloc(lzf, sizeof(*lzf) + lzf->sz);
zfree(node->entry);
node->entry = (unsigned char*)lzf;
node->encoding = QUICKLIST_NODE_ENCODING_LZF;
return true;
}
bool CompressLZ4(QList::Node* node) {
LZ4F_cctx* cntx;
LZ4F_errorCode_t code = LZ4F_createCompressionContext(&cntx, LZ4F_VERSION);
CHECK(!LZ4F_isError(code));
LZ4F_preferences_t lz4_pref = LZ4F_INIT_PREFERENCES;
lz4_pref.compressionLevel = -1;
lz4_pref.frameInfo.contentSize = node->sz;
size_t buf_size = LZ4F_compressFrameBound(node->sz, &lz4_pref);
// We reuse quicklistLZF struct for LZ4 metadata.
quicklistLZF* dest = (quicklistLZF*)zmalloc(sizeof(quicklistLZF) + buf_size);
size_t compr_sz = LZ4F_compressFrame_usingCDict(cntx, dest->compressed, buf_size, node->entry,
node->sz, nullptr /* dict */, &lz4_pref);
CHECK(!LZ4F_isError(compr_sz));
code = LZ4F_freeCompressionContext(cntx);
CHECK(!LZ4F_isError(code));
if (compr_sz + MIN_COMPRESS_IMPROVE >= node->sz) {
QList::stats.bad_compression_attempts++;
zfree(dest);
return false;
}
dest->sz = compr_sz;
dest = (quicklistLZF*)zrealloc(dest, sizeof(quicklistLZF) + compr_sz);
QList::stats.compressed_bytes += compr_sz;
QList::stats.raw_compressed_bytes += node->sz;
zfree(node->entry);
node->entry = (unsigned char*)dest;
node->encoding = QLIST_NODE_ENCODING_LZ4;
return true;
}
/* Compress the listpack in 'node' and update encoding details. /* Compress the listpack in 'node' and update encoding details.
* Returns true if listpack compressed successfully. * Returns true if listpack compressed successfully.
* Returns false if compression failed or if listpack too small to compress. */ * Returns false if compression failed or if listpack too small to compress. */
bool CompressNode(QList::Node* node) { bool CompressNode(QList::Node* node, unsigned method) {
DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_RAW); DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_RAW);
DCHECK(!node->dont_compress); DCHECK(!node->dont_compress);
@ -197,34 +269,22 @@ bool CompressNode(QList::Node* node) {
if (node->sz < MIN_COMPRESS_BYTES) if (node->sz < MIN_COMPRESS_BYTES)
return false; return false;
// We allocate LZF_STATE on heap, piggy-backing on the existing allocation. QList::stats.compression_attempts++;
char* uptr = (char*)zmalloc(sizeof(quicklistLZF) + node->sz + sizeof(LZF_STATE)); if (method == static_cast<unsigned>(QList::LZF)) {
quicklistLZF* lzf = (quicklistLZF*)uptr; return CompressLZF(node);
LZF_HSLOT* sdata = (LZF_HSLOT*)(uptr + sizeof(quicklistLZF) + node->sz);
/* Cancel if compression fails or doesn't compress small enough */
if (((lzf->sz = lzf_compress(node->entry, node->sz, lzf->compressed, node->sz, sdata)) == 0) ||
lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {
/* lzf_compress aborts/rejects compression if value not compressible. */
DVLOG(2) << "Uncompressable " << node->sz << " vs " << lzf->sz;
zfree(lzf);
return false;
} }
DVLOG(2) << "Compressed " << node->sz << " to " << lzf->sz;
lzf = (quicklistLZF*)zrealloc(lzf, sizeof(*lzf) + lzf->sz); return CompressLZ4(node);
zfree(node->entry);
node->entry = (unsigned char*)lzf;
node->encoding = QUICKLIST_NODE_ENCODING_LZF;
return true;
} }
ssize_t CompressNodeIfNeeded(QList::Node* node) { ssize_t CompressNodeIfNeeded(QList::Node* node, unsigned method) {
DCHECK(node); DCHECK(node);
if (node->encoding == QUICKLIST_NODE_ENCODING_RAW && !node->dont_compress) { if (node->encoding == QUICKLIST_NODE_ENCODING_RAW) {
if (CompressNode(node)) node->attempted_compress = 1;
return ((quicklistLZF*)node->entry)->sz - node->sz; if (!node->dont_compress) {
if (CompressNode(node, method))
return ssize_t(GetLzf(node)->sz) - node->sz;
}
} }
return 0; return 0;
} }
@ -232,14 +292,34 @@ ssize_t CompressNodeIfNeeded(QList::Node* node) {
/* Uncompress the listpack in 'node' and update encoding details. /* Uncompress the listpack in 'node' and update encoding details.
* Returns 1 on successful decode, 0 on failure to decode. */ * Returns 1 on successful decode, 0 on failure to decode. */
bool DecompressNode(bool recompress, QList::Node* node) { bool DecompressNode(bool recompress, QList::Node* node) {
DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_LZF ||
node->encoding == QLIST_NODE_ENCODING_LZ4);
node->recompress = int(recompress); node->recompress = int(recompress);
void* decompressed = zmalloc(node->sz); void* decompressed = zmalloc(node->sz);
quicklistLZF* lzf = (quicklistLZF*)node->entry; quicklistLZF* lzf = GetLzf(node);
if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) { QList::stats.decompression_calls++;
/* Someone requested decompress, but we can't decompress. Not good. */ QList::stats.compressed_bytes -= lzf->sz;
zfree(decompressed); QList::stats.raw_compressed_bytes -= node->sz;
return false;
if (node->encoding == QLIST_NODE_ENCODING_LZ4) {
LZ4F_dctx* dctx = nullptr;
LZ4F_errorCode_t code = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
CHECK(!LZ4F_isError(code));
size_t decompressed_sz = node->sz;
size_t left =
LZ4F_decompress(dctx, decompressed, &decompressed_sz, lzf->compressed, &lzf->sz, nullptr);
CHECK_EQ(left, 0u);
CHECK_EQ(decompressed_sz, node->sz);
LZ4F_freeDecompressionContext(dctx);
} else {
if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) {
LOG(DFATAL) << "Invalid LZF compressed data";
/* Someone requested decompress, but we can't decompress. Not good. */
zfree(decompressed);
return false;
}
} }
zfree(lzf); zfree(lzf);
node->entry = (uint8_t*)decompressed; node->entry = (uint8_t*)decompressed;
@ -252,8 +332,8 @@ bool DecompressNode(bool recompress, QList::Node* node) {
returns by how much the size of the node has increased. returns by how much the size of the node has increased.
*/ */
ssize_t DecompressNodeIfNeeded(bool recompress, QList::Node* node) { ssize_t DecompressNodeIfNeeded(bool recompress, QList::Node* node) {
if ((node) && (node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { if (node && node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
size_t compressed_sz = ((quicklistLZF*)node->entry)->sz; size_t compressed_sz = GetLzf(node)->sz;
if (DecompressNode(recompress, node)) { if (DecompressNode(recompress, node)) {
return node->sz - compressed_sz; return node->sz - compressed_sz;
} }
@ -261,10 +341,10 @@ ssize_t DecompressNodeIfNeeded(bool recompress, QList::Node* node) {
return 0; return 0;
} }
ssize_t RecompressOnly(QList::Node* node) { ssize_t RecompressOnly(QList::Node* node, unsigned method) {
if (node->recompress && !node->dont_compress) { if (node->recompress && !node->dont_compress) {
if (CompressNode(node)) if (CompressNode(node, method))
return ((quicklistLZF*)node->entry)->sz - node->sz; return (GetLzf(node))->sz - node->sz;
} }
return 0; return 0;
} }
@ -299,11 +379,14 @@ QList::Node* SplitNode(QList::Node* node, int offset, bool after, ssize_t* diff)
} // namespace } // namespace
__thread QList::Stats QList::stats;
void QList::SetPackedThreshold(unsigned threshold) { void QList::SetPackedThreshold(unsigned threshold) {
packed_threshold = threshold; packed_threshold = threshold;
} }
QList::QList(int fill, int compress) : fill_(fill), compress_(compress), bookmark_count_(0) { QList::QList(int fill, int compress) : fill_(fill), compress_(compress), bookmark_count_(0) {
compr_method_ = 0;
} }
QList::QList(QList&& other) QList::QList(QList&& other)
@ -342,7 +425,11 @@ void QList::Clear() {
while (len_) { while (len_) {
Node* next = current->next; Node* next = current->next;
if (current->encoding != QUICKLIST_NODE_ENCODING_RAW) {
quicklistLZF* lzf = (quicklistLZF*)current->entry;
QList::stats.compressed_bytes -= lzf->sz;
QList::stats.raw_compressed_bytes -= current->sz;
}
zfree(current->entry); zfree(current->entry);
zfree(current); zfree(current);
@ -587,7 +674,7 @@ void QList::Insert(Iterator it, std::string_view elem, InsertOpt insert_opt) {
uint8_t* new_entry = LP_Insert(node->entry, elem, it.zi_, after ? LP_AFTER : LP_BEFORE); uint8_t* new_entry = LP_Insert(node->entry, elem, it.zi_, after ? LP_AFTER : LP_BEFORE);
malloc_size_ += NodeSetEntry(node, new_entry); malloc_size_ += NodeSetEntry(node, new_entry);
node->count++; node->count++;
malloc_size_ += RecompressOnly(node); malloc_size_ += RecompressOnly(node, compr_method_);
} else { } else {
bool insert_tail = at_tail && after; bool insert_tail = at_tail && after;
bool insert_head = at_head && !after; bool insert_head = at_head && !after;
@ -598,8 +685,8 @@ void QList::Insert(Iterator it, std::string_view elem, InsertOpt insert_opt) {
malloc_size_ += DecompressNodeIfNeeded(true, new_node); malloc_size_ += DecompressNodeIfNeeded(true, new_node);
malloc_size_ += NodeSetEntry(new_node, LP_Prepend(new_node->entry, elem)); malloc_size_ += NodeSetEntry(new_node, LP_Prepend(new_node->entry, elem));
new_node->count++; new_node->count++;
malloc_size_ += RecompressOnly(new_node); malloc_size_ += RecompressOnly(new_node, compr_method_);
malloc_size_ += RecompressOnly(node); malloc_size_ += RecompressOnly(node, compr_method_);
} else if (insert_head && avail_prev) { } else if (insert_head && avail_prev) {
/* If we are: at head, previous has free space, and inserting before: /* If we are: at head, previous has free space, and inserting before:
* - insert entry at tail of previous node. */ * - insert entry at tail of previous node. */
@ -607,8 +694,8 @@ void QList::Insert(Iterator it, std::string_view elem, InsertOpt insert_opt) {
malloc_size_ += DecompressNodeIfNeeded(true, new_node); malloc_size_ += DecompressNodeIfNeeded(true, new_node);
malloc_size_ += NodeSetEntry(new_node, LP_Append(new_node->entry, elem)); malloc_size_ += NodeSetEntry(new_node, LP_Append(new_node->entry, elem));
new_node->count++; new_node->count++;
malloc_size_ += RecompressOnly(new_node); malloc_size_ += RecompressOnly(new_node, compr_method_);
malloc_size_ += RecompressOnly(node); malloc_size_ += RecompressOnly(node, compr_method_);
} else if (insert_tail || insert_head) { } else if (insert_tail || insert_head) {
/* If we are: full, and our prev/next has no available space, then: /* If we are: full, and our prev/next has no available space, then:
* - create new node and attach to qlist */ * - create new node and attach to qlist */
@ -732,12 +819,12 @@ void QList::Compress(Node* node) {
reverse = reverse->prev; reverse = reverse->prev;
} }
if (!in_depth && node) if (!in_depth && node) {
malloc_size_ += CompressNodeIfNeeded(node); malloc_size_ += CompressNodeIfNeeded(node, this->compr_method_);
}
/* At this point, forward and reverse are one node beyond depth */ /* At this point, forward and reverse are one node beyond depth */
malloc_size_ += CompressNodeIfNeeded(forward); malloc_size_ += CompressNodeIfNeeded(forward, this->compr_method_);
malloc_size_ += CompressNodeIfNeeded(reverse); malloc_size_ += CompressNodeIfNeeded(reverse, this->compr_method_);
} }
/* Attempt to merge listpacks within two nodes on either side of 'center'. /* Attempt to merge listpacks within two nodes on either side of 'center'.
@ -1063,7 +1150,7 @@ bool QList::Erase(const long start, unsigned count) {
if (node->count == 0) { if (node->count == 0) {
DelNode(node); DelNode(node);
} else { } else {
malloc_size_ += RecompressOnly(node); malloc_size_ += RecompressOnly(node, compr_method_);
} }
} }

View file

@ -19,9 +19,10 @@ namespace dfly {
class QList { class QList {
public: public:
enum Where { TAIL, HEAD }; enum Where { TAIL, HEAD };
enum COMPR_METHOD { LZF = 0, LZ4 = 1 };
/* Node is a 32 byte struct describing a listpack for a quicklist. /* Node is a 40 byte struct describing a listpack for a quicklist.
* We use bit fields keep the Node at 32 bytes. * We use bit fields keep the Node at 40 bytes.
* count: 16 bits, max 65536 (max lp bytes is 65k, so max count actually < 32k). * count: 16 bits, max 65536 (max lp bytes is 65k, so max count actually < 32k).
* encoding: 2 bits, RAW=1, LZF=2. * encoding: 2 bits, RAW=1, LZF=2.
* container: 2 bits, PLAIN=1 (a single item as char array), PACKED=2 (listpack with multiple * container: 2 bits, PLAIN=1 (a single item as char array), PACKED=2 (listpack with multiple
@ -43,7 +44,7 @@ class QList {
unsigned int recompress : 1; /* was this node previous compressed? */ unsigned int recompress : 1; /* was this node previous compressed? */
unsigned int attempted_compress : 1; /* node can't compress; too small */ unsigned int attempted_compress : 1; /* node can't compress; too small */
unsigned int dont_compress : 1; /* prevent compression of entry that will be used later */ unsigned int dont_compress : 1; /* prevent compression of entry that will be used later */
unsigned int extra : 9; /* more bits to steal for future usage */ unsigned int extra : 25; /* more bits to steal for future usage */
} Node; } Node;
// Provides wrapper around the references to the listpack entries. // Provides wrapper around the references to the listpack entries.
@ -208,8 +209,30 @@ class QList {
fill_ = fill; fill_ = fill;
} }
void set_compr_method(COMPR_METHOD cm) {
compr_method_ = static_cast<unsigned>(cm);
}
static void SetPackedThreshold(unsigned threshold); static void SetPackedThreshold(unsigned threshold);
struct Stats {
uint64_t compression_attempts = 0;
// compression attempts with compression ratio that was not good enough to keep.
// Subset of compression_attempts.
uint64_t bad_compression_attempts = 0;
uint64_t decompression_calls = 0;
// How many bytes we currently keep compressed.
size_t compressed_bytes = 0;
// how many bytes we compressed from.
// Compressed savings are calculated as raw_compressed_bytes - compressed_bytes.
size_t raw_compressed_bytes = 0;
};
static __thread Stats stats;
private: private:
bool AllowCompression() const { bool AllowCompression() const {
return compress_ != 0; return compress_ != 0;
@ -242,7 +265,8 @@ class QList {
uint32_t count_ = 0; /* total count of all entries in all listpacks */ uint32_t count_ = 0; /* total count of all entries in all listpacks */
uint32_t len_ = 0; /* number of quicklistNodes */ uint32_t len_ = 0; /* number of quicklistNodes */
int fill_ : QL_FILL_BITS; /* fill factor for individual nodes */ int fill_ : QL_FILL_BITS; /* fill factor for individual nodes */
int reserved1_ : 16; int compr_method_ : 2; // 0 - lzf, 1 - lz4
int reserved1_ : 14;
unsigned compress_ : QL_COMP_BITS; /* depth of end nodes not to compress;0=off */ unsigned compress_ : QL_COMP_BITS; /* depth of end nodes not to compress;0=off */
unsigned bookmark_count_ : QL_BM_BITS; unsigned bookmark_count_ : QL_BM_BITS;
unsigned reserved2_ : 12; unsigned reserved2_ : 12;

View file

@ -8,6 +8,7 @@
#include <absl/strings/str_cat.h> #include <absl/strings/str_cat.h>
#include <absl/strings/str_format.h> #include <absl/strings/str_format.h>
#include <gmock/gmock.h> #include <gmock/gmock.h>
#include <mimalloc.h>
#include "base/gtest.h" #include "base/gtest.h"
#include "base/logging.h" #include "base/logging.h"
@ -125,6 +126,7 @@ static void SetupMalloc() {
// configure redis lib zmalloc which requires mimalloc heap to work. // configure redis lib zmalloc which requires mimalloc heap to work.
auto* tlh = mi_heap_get_backing(); auto* tlh = mi_heap_get_backing();
init_zmalloc_threadlocal(tlh); init_zmalloc_threadlocal(tlh);
mi_option_set(mi_option_purge_delay, -1); // disable purging of segments (affects benchmarks)
} }
class QListTest : public ::testing::Test { class QListTest : public ::testing::Test {
@ -309,15 +311,17 @@ TEST_F(QListTest, RemoveListpack) {
ASSERT_FALSE(it.Next()); ASSERT_FALSE(it.Next());
} }
using FillCompress = tuple<int, unsigned>; using FillCompress = tuple<int, unsigned, QList::COMPR_METHOD>;
class PrintToFillCompress { class PrintToFillCompress {
public: public:
std::string operator()(const TestParamInfo<FillCompress>& info) const { std::string operator()(const TestParamInfo<FillCompress>& info) const {
int fill = get<0>(info.param); int fill = get<0>(info.param);
int compress = get<1>(info.param); int compress = get<1>(info.param);
QList::COMPR_METHOD method = get<2>(info.param);
string fill_str = fill >= 0 ? absl::StrCat("f", fill) : absl::StrCat("fminus", -fill); string fill_str = fill >= 0 ? absl::StrCat("f", fill) : absl::StrCat("fminus", -fill);
return absl::StrCat(fill_str, "compress", compress); string method_str = method == QList::LZF ? "lzf" : "lz4";
return absl::StrCat(fill_str, "compr", compress, method_str);
} }
}; };
@ -325,12 +329,13 @@ class OptionsTest : public QListTest, public WithParamInterface<FillCompress> {}
INSTANTIATE_TEST_SUITE_P(Matrix, OptionsTest, INSTANTIATE_TEST_SUITE_P(Matrix, OptionsTest,
Combine(Values(-5, -4, -3, -2, -1, 0, 1, 2, 32, 66, 128, 999), Combine(Values(-5, -4, -3, -2, -1, 0, 1, 2, 32, 66, 128, 999),
Values(0, 1, 2, 3, 4, 5, 6, 10)), Values(0, 1, 2, 3, 4, 5, 6, 10), Values(QList::LZF, QList::LZ4)),
PrintToFillCompress()); PrintToFillCompress());
TEST_P(OptionsTest, Numbers) { TEST_P(OptionsTest, Numbers) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.set_compr_method(method);
array<int64_t, 5000> nums; array<int64_t, 5000> nums;
for (unsigned i = 0; i < nums.size(); i++) { for (unsigned i = 0; i < nums.size(); i++) {
@ -352,8 +357,9 @@ TEST_P(OptionsTest, Numbers) {
} }
TEST_P(OptionsTest, NumbersIndex) { TEST_P(OptionsTest, NumbersIndex) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.set_compr_method(method);
long long nums[5000]; long long nums[5000];
for (int i = 0; i < 760; i++) { for (int i = 0; i < 760; i++) {
@ -371,8 +377,9 @@ TEST_P(OptionsTest, NumbersIndex) {
} }
TEST_P(OptionsTest, DelRangeA) { TEST_P(OptionsTest, DelRangeA) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.set_compr_method(method);
long long nums[5000]; long long nums[5000];
for (int i = 0; i < 33; i++) { for (int i = 0; i < 33; i++) {
nums[i] = -5157318210846258176 + i; nums[i] = -5157318210846258176 + i;
@ -395,8 +402,9 @@ TEST_P(OptionsTest, DelRangeA) {
} }
TEST_P(OptionsTest, DelRangeB) { TEST_P(OptionsTest, DelRangeB) {
auto [fill, _] = GetParam(); auto [fill, _, method] = GetParam();
ql_ = QList(fill, QUICKLIST_NOCOMPRESS); // ignore compress parameter ql_ = QList(fill, QUICKLIST_NOCOMPRESS); // ignore compress parameter
ql_.set_compr_method(method);
long long nums[5000]; long long nums[5000];
for (int i = 0; i < 33; i++) { for (int i = 0; i < 33; i++) {
@ -434,8 +442,10 @@ TEST_P(OptionsTest, DelRangeB) {
} }
TEST_P(OptionsTest, DelRangeC) { TEST_P(OptionsTest, DelRangeC) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.set_compr_method(method);
long long nums[5000]; long long nums[5000];
for (int i = 0; i < 33; i++) { for (int i = 0; i < 33; i++) {
nums[i] = -5157318210846258176 + i; nums[i] = -5157318210846258176 + i;
@ -457,8 +467,10 @@ TEST_P(OptionsTest, DelRangeC) {
} }
TEST_P(OptionsTest, DelRangeD) { TEST_P(OptionsTest, DelRangeD) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.set_compr_method(method);
long long nums[5000]; long long nums[5000];
for (int i = 0; i < 33; i++) { for (int i = 0; i < 33; i++) {
nums[i] = -5157318210846258176 + i; nums[i] = -5157318210846258176 + i;
@ -473,8 +485,9 @@ TEST_P(OptionsTest, DelRangeD) {
} }
TEST_P(OptionsTest, DelRangeNode) { TEST_P(OptionsTest, DelRangeNode) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(-2, compress); ql_ = QList(-2, compress);
ql_.set_compr_method(method);
for (int i = 0; i < 32; i++) for (int i = 0; i < 32; i++)
ql_.Push(StrCat("hello", i), QList::HEAD); ql_.Push(StrCat("hello", i), QList::HEAD);
@ -485,8 +498,9 @@ TEST_P(OptionsTest, DelRangeNode) {
} }
TEST_P(OptionsTest, DelRangeNodeOverflow) { TEST_P(OptionsTest, DelRangeNodeOverflow) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(-2, compress); ql_ = QList(-2, compress);
ql_.set_compr_method(method);
for (int i = 0; i < 32; i++) for (int i = 0; i < 32; i++)
ql_.Push(StrCat("hello", i), QList::HEAD); ql_.Push(StrCat("hello", i), QList::HEAD);
@ -496,7 +510,7 @@ TEST_P(OptionsTest, DelRangeNodeOverflow) {
} }
TEST_P(OptionsTest, DelRangeMiddle100of500) { TEST_P(OptionsTest, DelRangeMiddle100of500) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
@ -508,7 +522,7 @@ TEST_P(OptionsTest, DelRangeMiddle100of500) {
} }
TEST_P(OptionsTest, DelLessFillAcrossNodes) { TEST_P(OptionsTest, DelLessFillAcrossNodes) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
@ -519,7 +533,7 @@ TEST_P(OptionsTest, DelLessFillAcrossNodes) {
} }
TEST_P(OptionsTest, DelNegOne) { TEST_P(OptionsTest, DelNegOne) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
ql_.Push(StrCat("hello", i + 1), QList::TAIL); ql_.Push(StrCat("hello", i + 1), QList::TAIL);
@ -529,7 +543,7 @@ TEST_P(OptionsTest, DelNegOne) {
} }
TEST_P(OptionsTest, DelNegOneOverflow) { TEST_P(OptionsTest, DelNegOneOverflow) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
ql_.Push(StrCat("hello", i + 1), QList::TAIL); ql_.Push(StrCat("hello", i + 1), QList::TAIL);
@ -541,7 +555,7 @@ TEST_P(OptionsTest, DelNegOneOverflow) {
} }
TEST_P(OptionsTest, DelNeg100From500) { TEST_P(OptionsTest, DelNeg100From500) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
ql_.Push(StrCat("hello", i + 1), QList::TAIL); ql_.Push(StrCat("hello", i + 1), QList::TAIL);
@ -554,7 +568,7 @@ TEST_P(OptionsTest, DelNeg100From500) {
} }
TEST_P(OptionsTest, DelMin10_5_from50) { TEST_P(OptionsTest, DelMin10_5_from50) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 50; i++) for (int i = 0; i < 50; i++)
@ -565,7 +579,7 @@ TEST_P(OptionsTest, DelMin10_5_from50) {
} }
TEST_P(OptionsTest, DelElems) { TEST_P(OptionsTest, DelElems) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
const char* words[] = {"abc", "foo", "bar", "foobar", "foobared", "zap", "bar", "test", "foo"}; const char* words[] = {"abc", "foo", "bar", "foobar", "foobared", "zap", "bar", "test", "foo"};
@ -605,7 +619,7 @@ TEST_P(OptionsTest, DelElems) {
} }
TEST_P(OptionsTest, IterateReverse) { TEST_P(OptionsTest, IterateReverse) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
@ -621,7 +635,7 @@ TEST_P(OptionsTest, IterateReverse) {
} }
TEST_P(OptionsTest, Iterate500) { TEST_P(OptionsTest, Iterate500) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(32, compress); ql_ = QList(32, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
ql_.Push(StrCat("hello", i), QList::HEAD); ql_.Push(StrCat("hello", i), QList::HEAD);
@ -647,7 +661,7 @@ TEST_P(OptionsTest, Iterate500) {
} }
TEST_P(OptionsTest, IterateAfterOne) { TEST_P(OptionsTest, IterateAfterOne) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(-2, compress); ql_ = QList(-2, compress);
ql_.Push("hello", QList::HEAD); ql_.Push("hello", QList::HEAD);
@ -668,7 +682,7 @@ TEST_P(OptionsTest, IterateAfterOne) {
} }
TEST_P(OptionsTest, IterateDelete) { TEST_P(OptionsTest, IterateDelete) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.Push("abc", QList::TAIL); ql_.Push("abc", QList::TAIL);
@ -692,7 +706,7 @@ TEST_P(OptionsTest, IterateDelete) {
} }
TEST_P(OptionsTest, InsertBeforeOne) { TEST_P(OptionsTest, InsertBeforeOne) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(-2, compress); ql_ = QList(-2, compress);
ql_.Push("hello", QList::HEAD); ql_.Push("hello", QList::HEAD);
@ -712,7 +726,7 @@ TEST_P(OptionsTest, InsertBeforeOne) {
} }
TEST_P(OptionsTest, InsertWithHeadFull) { TEST_P(OptionsTest, InsertWithHeadFull) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(4, compress); ql_ = QList(4, compress);
for (int i = 0; i < 10; i++) for (int i = 0; i < 10; i++)
@ -728,7 +742,7 @@ TEST_P(OptionsTest, InsertWithHeadFull) {
} }
TEST_P(OptionsTest, InsertWithTailFull) { TEST_P(OptionsTest, InsertWithTailFull) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(4, compress); ql_ = QList(4, compress);
for (int i = 0; i < 10; i++) for (int i = 0; i < 10; i++)
ql_.Push(StrCat("hello", i), QList::HEAD); ql_.Push(StrCat("hello", i), QList::HEAD);
@ -743,7 +757,7 @@ TEST_P(OptionsTest, InsertWithTailFull) {
} }
TEST_P(OptionsTest, InsertOnceWhileIterating) { TEST_P(OptionsTest, InsertOnceWhileIterating) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
ql_.Push("abc", QList::TAIL); ql_.Push("abc", QList::TAIL);
@ -767,7 +781,7 @@ TEST_P(OptionsTest, InsertOnceWhileIterating) {
} }
TEST_P(OptionsTest, InsertBefore250NewInMiddleOf500Elements) { TEST_P(OptionsTest, InsertBefore250NewInMiddleOf500Elements) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
for (int i = 0; i < 500; i++) { for (int i = 0; i < 500; i++) {
string val = StrCat("hello", i); string val = StrCat("hello", i);
@ -787,7 +801,7 @@ TEST_P(OptionsTest, InsertBefore250NewInMiddleOf500Elements) {
} }
TEST_P(OptionsTest, InsertAfter250NewInMiddleOf500Elements) { TEST_P(OptionsTest, InsertAfter250NewInMiddleOf500Elements) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
ql_.Push(StrCat("hello", i), QList::HEAD); ql_.Push(StrCat("hello", i), QList::HEAD);
@ -806,7 +820,7 @@ TEST_P(OptionsTest, InsertAfter250NewInMiddleOf500Elements) {
} }
TEST_P(OptionsTest, NextPlain) { TEST_P(OptionsTest, NextPlain) {
auto [_, compress] = GetParam(); auto [_, compress, method] = GetParam();
ql_ = QList(-2, compress); ql_ = QList(-2, compress);
QList::SetPackedThreshold(3); QList::SetPackedThreshold(3);
@ -826,7 +840,7 @@ TEST_P(OptionsTest, NextPlain) {
} }
TEST_P(OptionsTest, IndexFrom500) { TEST_P(OptionsTest, IndexFrom500) {
auto [fill, compress] = GetParam(); auto [fill, compress, method] = GetParam();
ql_ = QList(fill, compress); ql_ = QList(fill, compress);
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
ql_.Push(StrCat("hello", i + 1), QList::TAIL); ql_.Push(StrCat("hello", i + 1), QList::TAIL);
@ -867,18 +881,22 @@ static void BM_QListCompress(benchmark::State& state) {
lines.push_back(string(line)); lines.push_back(string(line));
} }
VLOG(1) << "Read " << lines.size() << " lines " << state.range(0);
while (state.KeepRunning()) { while (state.KeepRunning()) {
QList ql(-2, state.range(0)); // uses differrent compression modes, see below. QList ql(-2, state.range(0)); // uses differrent compression modes, see below.
ql.set_compr_method(state.range(1) == 0 ? QList::LZF : QList::LZ4);
for (const string& l : lines) { for (const string& l : lines) {
ql.Push(l, QList::TAIL); ql.Push(l, QList::TAIL);
} }
DVLOG(1) << ql.node_count() << ", " << ql.MallocUsed(true); DVLOG(1) << ql.node_count() << ", " << ql.MallocUsed(true);
} }
CHECK_EQ(0, zmalloc_used_memory_tl);
} }
BENCHMARK(BM_QListCompress) BENCHMARK(BM_QListCompress)
->Arg(0) // no compression ->ArgsProduct({{1, 4, 0}, {0, 1}}); // x - compression depth, y compression method.
->Arg(1) // compress all nodes but edges. // x = 0 no compression, 1 - compress all nodes but edges,
->Arg(4); // compress all nodes but 4 nodes from edges. // 4 - compress all but 4 nodes from edges.
static void BM_QListUncompress(benchmark::State& state) { static void BM_QListUncompress(benchmark::State& state) {
SetupMalloc(); SetupMalloc();
@ -889,18 +907,41 @@ static void BM_QListUncompress(benchmark::State& state) {
io::LineReader lr(*src, TAKE_OWNERSHIP); io::LineReader lr(*src, TAKE_OWNERSHIP);
string_view line; string_view line;
QList ql(-2, state.range(0)); QList ql(-2, state.range(0));
ql.set_compr_method(state.range(1) == 0 ? QList::LZF : QList::LZ4);
QList::stats.compression_attempts = 0;
CHECK_EQ(QList::stats.compressed_bytes, 0u);
CHECK_EQ(QList::stats.raw_compressed_bytes, 0u);
size_t line_len = 0;
while (lr.Next(&line)) { while (lr.Next(&line)) {
ql.Push(line, QList::TAIL); ql.Push(line, QList::TAIL);
line_len += line.size();
}
if (ql.compress_param() > 0) {
CHECK_GT(QList::stats.compression_attempts, 0u);
CHECK_GT(QList::stats.compressed_bytes, 0u);
CHECK_GT(QList::stats.raw_compressed_bytes, QList::stats.compressed_bytes);
} }
LOG(INFO) << "MallocUsed " << ql.compress_param() << ": " << ql.MallocUsed(true) << ", " LOG(INFO) << "MallocUsed " << ql.compress_param() << ": " << ql.MallocUsed(true) << ", "
<< ql.MallocUsed(false); << ql.MallocUsed(false);
size_t exp_count = ql.Size();
while (state.KeepRunning()) { while (state.KeepRunning()) {
ql.Iterate([](const QList::Entry& e) { return true; }, 0, -1); unsigned actual_count = 0, actual_len = 0;
ql.Iterate(
[&](const QList::Entry& e) {
actual_len += e.view().size();
++actual_count;
return true;
},
0, -1);
CHECK_EQ(exp_count, actual_count);
CHECK_EQ(line_len, actual_len);
} }
} }
BENCHMARK(BM_QListUncompress)->Arg(0)->Arg(1)->Arg(4); BENCHMARK(BM_QListUncompress)->ArgsProduct({{1, 4, 0}, {0, 1}});
} // namespace dfly } // namespace dfly