chore: refactor compact_object and introduce materialize method (#3300)

This commit is contained in:
Roman Gershman 2024-07-10 13:57:59 +03:00 committed by GitHub
parent 21620ef46f
commit 038d081fd0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 62 additions and 46 deletions

View file

@ -761,52 +761,7 @@ void CompactObj::SetString(std::string_view str) {
}
}
DCHECK_GT(str.size(), kInlineLen);
string_view encoded = str;
bool is_ascii = kUseAsciiEncoding && detail::validate_ascii_fast(str.data(), str.size());
if (is_ascii) {
size_t encode_len = binpacked_len(str.size());
size_t rev_len = ascii_len(encode_len);
if (rev_len == str.size()) {
mask |= ASCII2_ENC_BIT; // str hits its highest bound.
} else {
CHECK_EQ(str.size(), rev_len - 1) << "Bad ascii encoding for len " << str.size();
mask |= ASCII1_ENC_BIT;
}
tl.tmp_buf.resize(encode_len);
detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};
if (encoded.size() <= kInlineLen) {
SetMeta(encoded.size(), mask);
detail::ascii_pack(str.data(), str.size(), reinterpret_cast<uint8_t*>(u_.inline_str));
return;
}
}
if (kUseSmallStrings && SmallString::CanAllocate(encoded.size())) {
if (taglen_ == 0) {
SetMeta(SMALL_TAG, mask);
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}
if (taglen_ == SMALL_TAG && encoded.size() <= u_.small_str.size()) {
mask_ = mask;
tl.small_str_bytes -= u_.small_str.MallocUsed();
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}
}
SetMeta(ROBJ_TAG, mask);
u_.r_obj.SetString(encoded, tl.local_mr);
EncodeString(str);
}
string_view CompactObj::GetSlice(string* scratch) const {
@ -1000,6 +955,13 @@ std::pair<size_t, size_t> CompactObj::GetExternalSlice() const {
return pair<size_t, size_t>(offset, size_t(u_.ext_ptr.size));
}
void CompactObj::Materialize(std::string_view str) {
CHECK(IsExternal());
CHECK_GT(str.size(), 20u);
EncodeString(str);
}
void CompactObj::Reset() {
if (HasAllocated()) {
Free();
@ -1174,6 +1136,56 @@ bool CompactObj::CmpEncoded(string_view sv) const {
return false;
}
void CompactObj::EncodeString(string_view str) {
DCHECK_GT(str.size(), kInlineLen);
uint8_t mask = mask_ & ~kEncMask;
string_view encoded = str;
bool is_ascii = kUseAsciiEncoding && detail::validate_ascii_fast(str.data(), str.size());
if (is_ascii) {
size_t encode_len = binpacked_len(str.size());
size_t rev_len = ascii_len(encode_len);
if (rev_len == str.size()) {
mask |= ASCII2_ENC_BIT; // str hits its highest bound.
} else {
CHECK_EQ(str.size(), rev_len - 1) << "Bad ascii encoding for len " << str.size();
mask |= ASCII1_ENC_BIT;
}
tl.tmp_buf.resize(encode_len);
detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};
if (encoded.size() <= kInlineLen) {
SetMeta(encoded.size(), mask);
detail::ascii_pack(str.data(), str.size(), reinterpret_cast<uint8_t*>(u_.inline_str));
return;
}
}
if (kUseSmallStrings && SmallString::CanAllocate(encoded.size())) {
if (taglen_ == 0) {
SetMeta(SMALL_TAG, mask);
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}
if (taglen_ == SMALL_TAG && encoded.size() <= u_.small_str.size()) {
mask_ = mask;
tl.small_str_bytes -= u_.small_str.MallocUsed();
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}
}
SetMeta(ROBJ_TAG, mask);
u_.r_obj.SetString(encoded, tl.local_mr);
}
size_t CompactObj::DecodedLen(size_t sz) const {
return ascii_len(sz) - ((mask_ & ASCII1_ENC_BIT) ? 1 : 0);
}