mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
chore: reorganize compact object mask bits (#5077)
Specifically get rit of MaskEnum and replace it with explicit bits aliasing the mask. Reorganize the encoding bits to be able to store 4 states that include huffman encoding. Solves the first part of #4880 Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
05d99769e1
commit
54328fd00e
3 changed files with 81 additions and 98 deletions
|
@ -804,14 +804,13 @@ size_t CompactObj::Size() const {
|
|||
LOG(DFATAL) << "Should not reach " << int(taglen_);
|
||||
}
|
||||
}
|
||||
uint8_t encoded = (mask_ & kEncMask);
|
||||
return encoded ? DecodedLen(raw_size) : raw_size;
|
||||
return mask_bits_.encoding ? DecodedLen(raw_size) : raw_size;
|
||||
}
|
||||
|
||||
uint64_t CompactObj::HashCode() const {
|
||||
DCHECK(taglen_ != JSON_TAG) << "JSON type cannot be used for keys!";
|
||||
|
||||
uint8_t encoded = (mask_ & kEncMask);
|
||||
uint8_t encoded = mask_bits_.encoding;
|
||||
if (IsInline()) {
|
||||
if (encoded) {
|
||||
char buf[kInlineLen * 2];
|
||||
|
@ -887,7 +886,8 @@ void CompactObj::SetInt(int64_t val) {
|
|||
DCHECK(!IsExternal());
|
||||
|
||||
if (INT_TAG != taglen_) {
|
||||
SetMeta(INT_TAG, mask_ & ~kEncMask);
|
||||
SetMeta(INT_TAG, mask_);
|
||||
mask_bits_.encoding = NONE_ENC;
|
||||
}
|
||||
|
||||
u_.ival = val;
|
||||
|
@ -970,8 +970,9 @@ SBF* CompactObj::GetSBF() const {
|
|||
}
|
||||
|
||||
void CompactObj::SetString(std::string_view str) {
|
||||
uint8_t mask = mask_ & ~kEncMask;
|
||||
CHECK(!IsExternal());
|
||||
mask_bits_.encoding = NONE_ENC;
|
||||
|
||||
// Trying auto-detection heuristics first.
|
||||
if (str.size() <= 20) {
|
||||
long long ival;
|
||||
|
@ -979,14 +980,14 @@ void CompactObj::SetString(std::string_view str) {
|
|||
|
||||
// We use redis string2ll to be compatible with Redis.
|
||||
if (string2ll(str.data(), str.size(), &ival)) {
|
||||
SetMeta(INT_TAG, mask);
|
||||
SetMeta(INT_TAG, mask_);
|
||||
u_.ival = ival;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (str.size() <= kInlineLen) {
|
||||
SetMeta(str.size(), mask);
|
||||
SetMeta(str.size(), mask_);
|
||||
if (!str.empty())
|
||||
memcpy(u_.inline_str, str.data(), str.size());
|
||||
return;
|
||||
|
@ -997,8 +998,9 @@ void CompactObj::SetString(std::string_view str) {
|
|||
}
|
||||
|
||||
void CompactObj::ReserveString(size_t size) {
|
||||
uint8_t mask = mask_ & ~kEncMask;
|
||||
SetMeta(ROBJ_TAG, mask);
|
||||
mask_bits_.encoding = NONE_ENC;
|
||||
SetMeta(ROBJ_TAG, mask_);
|
||||
|
||||
u_.r_obj.ReserveString(size, tl.local_mr);
|
||||
}
|
||||
|
||||
|
@ -1006,16 +1008,16 @@ void CompactObj::AppendString(std::string_view str) {
|
|||
u_.r_obj.AppendString(str, tl.local_mr);
|
||||
}
|
||||
|
||||
// TODO: to simplify this code using GetString(char*) variant.
|
||||
string_view CompactObj::GetSlice(string* scratch) const {
|
||||
CHECK(!IsExternal());
|
||||
uint8_t is_encoded = mask_ & kEncMask;
|
||||
|
||||
if (IsInline()) {
|
||||
if (is_encoded) {
|
||||
if (mask_bits_.encoding) {
|
||||
size_t decoded_len = taglen_ + 2;
|
||||
|
||||
// must be this because we either shortened 17 or 18.
|
||||
DCHECK_EQ(is_encoded, ASCII2_ENC_BIT);
|
||||
DCHECK_EQ(mask_bits_.encoding, ASCII2_ENC);
|
||||
DCHECK_EQ(decoded_len, ascii_len(taglen_));
|
||||
|
||||
scratch->resize(decoded_len);
|
||||
|
@ -1033,7 +1035,7 @@ string_view CompactObj::GetSlice(string* scratch) const {
|
|||
return *scratch;
|
||||
}
|
||||
|
||||
if (is_encoded) {
|
||||
if (mask_bits_.encoding) {
|
||||
if (taglen_ == ROBJ_TAG) {
|
||||
CHECK_EQ(OBJ_STRING, u_.r_obj.type());
|
||||
DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());
|
||||
|
@ -1120,14 +1122,13 @@ void __attribute__((noinline)) CompactObj::GetString(string* res) const {
|
|||
|
||||
void CompactObj::GetString(char* dest) const {
|
||||
CHECK(!IsExternal());
|
||||
uint8_t is_encoded = mask_ & kEncMask;
|
||||
|
||||
if (IsInline()) {
|
||||
if (is_encoded) {
|
||||
if (mask_bits_.encoding) {
|
||||
size_t decoded_len = taglen_ + 2;
|
||||
|
||||
// must be this because we either shortened 17 or 18.
|
||||
DCHECK_EQ(is_encoded, ASCII2_ENC_BIT);
|
||||
DCHECK_EQ(mask_bits_.encoding, ASCII2_ENC);
|
||||
DCHECK_EQ(decoded_len, ascii_len(taglen_));
|
||||
|
||||
detail::ascii_unpack(to_byte(u_.inline_str), decoded_len, dest);
|
||||
|
@ -1144,7 +1145,7 @@ void CompactObj::GetString(char* dest) const {
|
|||
return;
|
||||
}
|
||||
|
||||
if (is_encoded) {
|
||||
if (mask_bits_.encoding) {
|
||||
if (taglen_ == ROBJ_TAG) {
|
||||
CHECK_EQ(OBJ_STRING, u_.r_obj.type());
|
||||
DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());
|
||||
|
@ -1220,7 +1221,9 @@ auto CompactObj::GetCool() const -> CoolItem {
|
|||
|
||||
void CompactObj::ImportExternal(const CompactObj& src) {
|
||||
DCHECK(src.IsExternal());
|
||||
SetMeta(EXTERNAL_TAG, src.mask_ & kEncMask);
|
||||
uint8_t encoding = src.mask_bits_.encoding;
|
||||
SetMeta(EXTERNAL_TAG, 0);
|
||||
mask_bits_.encoding = encoding;
|
||||
u_.ext_ptr = src.u_.ext_ptr;
|
||||
}
|
||||
|
||||
|
@ -1238,13 +1241,11 @@ void CompactObj::Materialize(std::string_view blob, bool is_raw) {
|
|||
DCHECK_GT(blob.size(), kInlineLen);
|
||||
|
||||
if (is_raw) {
|
||||
uint8_t mask = mask_;
|
||||
|
||||
if (kUseSmallStrings && SmallString::CanAllocate(blob.size())) {
|
||||
SetMeta(SMALL_TAG, mask);
|
||||
SetMeta(SMALL_TAG, mask_);
|
||||
tl.small_str_bytes += u_.small_str.Assign(blob);
|
||||
} else {
|
||||
SetMeta(ROBJ_TAG, mask);
|
||||
SetMeta(ROBJ_TAG, mask_);
|
||||
u_.r_obj.SetString(blob, tl.local_mr);
|
||||
}
|
||||
} else {
|
||||
|
@ -1317,8 +1318,8 @@ size_t CompactObj::MallocUsed(bool slow) const {
|
|||
bool CompactObj::operator==(const CompactObj& o) const {
|
||||
DCHECK(taglen_ != JSON_TAG && o.taglen_ != JSON_TAG) << "cannot use JSON type to check equal";
|
||||
|
||||
uint8_t m1 = mask_ & kEncMask;
|
||||
uint8_t m2 = o.mask_ & kEncMask;
|
||||
uint8_t m1 = mask_bits_.encoding;
|
||||
uint8_t m2 = o.mask_bits_.encoding;
|
||||
if (m1 != m2)
|
||||
return false;
|
||||
|
||||
|
@ -1433,8 +1434,8 @@ bool CompactObj::CmpEncoded(string_view sv) const {
|
|||
|
||||
void CompactObj::EncodeString(string_view str) {
|
||||
DCHECK_GT(str.size(), kInlineLen);
|
||||
DCHECK_EQ(NONE_ENC, mask_bits_.encoding);
|
||||
|
||||
uint8_t mask = mask_ & ~kEncMask;
|
||||
string_view encoded = str;
|
||||
bool is_ascii = kUseAsciiEncoding && detail::validate_ascii_fast(str.data(), str.size());
|
||||
|
||||
|
@ -1443,11 +1444,10 @@ void CompactObj::EncodeString(string_view str) {
|
|||
size_t rev_len = ascii_len(encode_len);
|
||||
|
||||
if (rev_len == str.size()) {
|
||||
mask |= ASCII2_ENC_BIT; // str hits its highest bound.
|
||||
mask_bits_.encoding = ASCII2_ENC; // str hits its highest bound.
|
||||
} else {
|
||||
CHECK_EQ(str.size(), rev_len - 1) << "Bad ascii encoding for len " << str.size();
|
||||
|
||||
mask |= ASCII1_ENC_BIT;
|
||||
mask_bits_.encoding = ASCII1_ENC; // str is shorter than its highest bound.
|
||||
}
|
||||
|
||||
tl.tmp_buf.resize(encode_len);
|
||||
|
@ -1455,7 +1455,7 @@ void CompactObj::EncodeString(string_view str) {
|
|||
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};
|
||||
|
||||
if (encoded.size() <= kInlineLen) {
|
||||
SetMeta(encoded.size(), mask);
|
||||
SetMeta(encoded.size(), mask_);
|
||||
detail::ascii_pack(str.data(), str.size(), reinterpret_cast<uint8_t*>(u_.inline_str));
|
||||
|
||||
return;
|
||||
|
@ -1464,20 +1464,19 @@ void CompactObj::EncodeString(string_view str) {
|
|||
|
||||
if (kUseSmallStrings && SmallString::CanAllocate(encoded.size())) {
|
||||
if (taglen_ == 0) {
|
||||
SetMeta(SMALL_TAG, mask);
|
||||
SetMeta(SMALL_TAG, mask_);
|
||||
tl.small_str_bytes += u_.small_str.Assign(encoded);
|
||||
return;
|
||||
}
|
||||
|
||||
if (taglen_ == SMALL_TAG && encoded.size() <= u_.small_str.size()) {
|
||||
mask_ = mask;
|
||||
tl.small_str_bytes -= u_.small_str.MallocUsed();
|
||||
tl.small_str_bytes += u_.small_str.Assign(encoded);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SetMeta(ROBJ_TAG, mask);
|
||||
SetMeta(ROBJ_TAG, mask_);
|
||||
u_.r_obj.SetString(encoded, tl.local_mr);
|
||||
}
|
||||
|
||||
|
@ -1501,7 +1500,8 @@ StringOrView CompactObj::GetRawString() const {
|
|||
}
|
||||
|
||||
size_t CompactObj::DecodedLen(size_t sz) const {
|
||||
return ascii_len(sz) - ((mask_ & ASCII1_ENC_BIT) ? 1 : 0);
|
||||
unsigned delta = (mask_bits_.encoding == ASCII1_ENC ? 1 : 0);
|
||||
return ascii_len(sz) - delta;
|
||||
}
|
||||
|
||||
MemoryResource* CompactObj::memory_resource() {
|
||||
|
|
|
@ -125,36 +125,13 @@ class CompactObj {
|
|||
SBF_TAG = 22,
|
||||
};
|
||||
|
||||
enum MaskBit {
|
||||
REF_BIT = 1,
|
||||
EXPIRE_BIT = 2, // Mark objects that have expiry timestamp assigned.
|
||||
FLAG_BIT = 4, // Used to mark keys that have memcache flags assigned.
|
||||
|
||||
// ascii encoding is not an injective function. it compresses 8 bytes to 7 but also 7 to 7.
|
||||
// therefore, in order to know the original length we introduce 2 flags that
|
||||
// correct the length upon decoding. ASCII1_ENC_BIT rounds down the decoded length,
|
||||
// while ASCII2_ENC_BIT rounds it up. See DecodedLen implementation for more info.
|
||||
ASCII1_ENC_BIT = 8,
|
||||
ASCII2_ENC_BIT = 0x10,
|
||||
|
||||
// IO_PENDING is set when the tiered storage has issued an i/o request to save the value. It is
|
||||
// cleared when the io request finishes or is cancelled.
|
||||
IO_PENDING = 0x20,
|
||||
|
||||
// Applied only on keys that should be deleted asynchronously.
|
||||
// (it can be the same value as IO_PENDING) that is applied only on values.
|
||||
KEY_ASYNC_DELETE = 0x20,
|
||||
STICKY = 0x40,
|
||||
|
||||
// TOUCHED used to determin which items are hot/cold.
|
||||
// by checking if the item was touched from the last time we
|
||||
// reached this item while travering the database to set items as cold.
|
||||
// https://junchengyang.com/publication/nsdi24-SIEVE.pdf
|
||||
TOUCHED = 0x80,
|
||||
enum Encoding : uint8_t {
|
||||
NONE_ENC = 0,
|
||||
ASCII1_ENC = 1,
|
||||
ASCII2_ENC = 2,
|
||||
HUFFMAN_ENC = 3, // TBD
|
||||
};
|
||||
|
||||
static constexpr uint8_t kEncMask = ASCII1_ENC_BIT | ASCII2_ENC_BIT;
|
||||
|
||||
public:
|
||||
using PrefixArray = std::vector<std::string_view>;
|
||||
using MemoryResource = detail::RobjWrapper::MemoryResource;
|
||||
|
@ -185,13 +162,14 @@ class CompactObj {
|
|||
CompactObj res;
|
||||
memcpy(&res.u_, &u_, sizeof(u_));
|
||||
res.taglen_ = taglen_;
|
||||
res.mask_ = mask_ | REF_BIT;
|
||||
res.mask_ = mask_;
|
||||
res.mask_bits_.ref = 1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
bool IsRef() const {
|
||||
return mask_ & REF_BIT;
|
||||
return mask_bits_.ref;
|
||||
}
|
||||
|
||||
std::string_view GetSlice(std::string* scratch) const;
|
||||
|
@ -222,73 +200,53 @@ class CompactObj {
|
|||
}
|
||||
|
||||
bool HasExpire() const {
|
||||
return mask_ & EXPIRE_BIT;
|
||||
return mask_bits_.expire;
|
||||
}
|
||||
|
||||
void SetExpire(bool e) {
|
||||
if (e) {
|
||||
mask_ |= EXPIRE_BIT;
|
||||
} else {
|
||||
mask_ &= ~EXPIRE_BIT;
|
||||
}
|
||||
mask_bits_.expire = e;
|
||||
}
|
||||
|
||||
bool HasFlag() const {
|
||||
return mask_ & FLAG_BIT;
|
||||
return mask_bits_.mc_flag;
|
||||
}
|
||||
|
||||
void SetFlag(bool e) {
|
||||
if (e) {
|
||||
mask_ |= FLAG_BIT;
|
||||
} else {
|
||||
mask_ &= ~FLAG_BIT;
|
||||
}
|
||||
mask_bits_.mc_flag = e;
|
||||
}
|
||||
|
||||
bool WasTouched() const {
|
||||
return mask_ & TOUCHED;
|
||||
return mask_bits_.touched;
|
||||
}
|
||||
|
||||
void SetTouched(bool e) {
|
||||
if (e) {
|
||||
mask_ |= TOUCHED;
|
||||
} else {
|
||||
mask_ &= ~TOUCHED;
|
||||
}
|
||||
mask_bits_.touched = e;
|
||||
}
|
||||
|
||||
bool DefragIfNeeded(float ratio);
|
||||
|
||||
void SetAsyncDelete() {
|
||||
mask_ |= KEY_ASYNC_DELETE;
|
||||
mask_bits_.io_pending = 1; // io_pending flag is used for async delete for keys.
|
||||
}
|
||||
|
||||
bool IsAsyncDelete() const {
|
||||
return mask_ & KEY_ASYNC_DELETE;
|
||||
return mask_bits_.io_pending;
|
||||
}
|
||||
|
||||
bool HasStashPending() const {
|
||||
return mask_ & IO_PENDING;
|
||||
return mask_bits_.io_pending;
|
||||
}
|
||||
|
||||
void SetStashPending(bool b) {
|
||||
if (b) {
|
||||
mask_ |= IO_PENDING;
|
||||
} else {
|
||||
mask_ &= ~IO_PENDING;
|
||||
}
|
||||
mask_bits_.io_pending = b;
|
||||
}
|
||||
|
||||
bool IsSticky() const {
|
||||
return mask_ & STICKY;
|
||||
return mask_bits_.sticky;
|
||||
}
|
||||
|
||||
void SetSticky(bool s) {
|
||||
if (s) {
|
||||
mask_ |= STICKY;
|
||||
} else {
|
||||
mask_ &= ~STICKY;
|
||||
}
|
||||
void SetSticky(bool e) {
|
||||
mask_bits_.sticky = e;
|
||||
}
|
||||
|
||||
unsigned Encoding() const;
|
||||
|
@ -525,14 +483,38 @@ class CompactObj {
|
|||
//
|
||||
static_assert(sizeof(u_) == 16);
|
||||
|
||||
uint8_t mask_ = 0;
|
||||
union {
|
||||
uint8_t mask_ = 0;
|
||||
struct {
|
||||
uint8_t ref : 1; // Mark objects that have expiry timestamp assigned.
|
||||
uint8_t expire : 1;
|
||||
uint8_t mc_flag : 1; // Marks keys that have memcache flags assigned.
|
||||
|
||||
// ascii encoding is not an injective function. it compresses 8 bytes to 7 but also 7 to 7.
|
||||
// therefore, in order to know the original length we introduce 2 flags that
|
||||
// correct the length upon decoding. ASCII1_ENC_BIT rounds down the decoded length,
|
||||
// while ASCII2_ENC_BIT rounds it up. See DecodedLen implementation for more info.
|
||||
uint8_t encoding : 2;
|
||||
|
||||
// IO_PENDING is set when the tiered storage has issued an i/o request to save the value.
|
||||
// It is cleared when the io request finishes or is cancelled.
|
||||
uint8_t io_pending : 1; // also serves as async-delete for keys.
|
||||
uint8_t sticky : 1;
|
||||
|
||||
// TOUCHED used to determin which items are hot/cold.
|
||||
// by checking if the item was touched from the last time we
|
||||
// reached this item while travering the database to set items as cold.
|
||||
// https://junchengyang.com/publication/nsdi24-SIEVE.pdf
|
||||
uint8_t touched : 1; // used to mark keys that were accessed.
|
||||
} mask_bits_;
|
||||
};
|
||||
|
||||
// We currently reserve 5 bits for tags and 3 bits for extending the mask. currently reserved.
|
||||
uint8_t taglen_ = 0;
|
||||
};
|
||||
|
||||
inline bool CompactObj::operator==(std::string_view sv) const {
|
||||
if (mask_ & kEncMask)
|
||||
if (mask_bits_.encoding)
|
||||
return CmpEncoded(sv);
|
||||
|
||||
if (IsInline()) {
|
||||
|
|
|
@ -607,6 +607,7 @@ TEST_F(CompactObjectTest, RawInterface) {
|
|||
|
||||
str.assign(50, char(200)); // non ascii
|
||||
cobj_.SetString(str);
|
||||
ASSERT_EQ(str, cobj_.GetSlice(&tmp));
|
||||
|
||||
{
|
||||
auto raw_blob = cobj_.GetRawString();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue