chore: Refactor AddMany (#3869)

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-10-05 19:27:48 +03:00 committed by GitHub
parent 4dbed3f8dd
commit 612c75c67b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 44 additions and 37 deletions

View file

@ -51,39 +51,28 @@ bool StringSet::Add(string_view src, uint32_t ttl_sec) {
return true;
}
unsigned StringSet::AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec) {
unsigned StringSet::AddBatch(absl::Span<std::string_view> span, uint32_t ttl_sec) {
uint64_t hash[kMaxBatchLen];
string_view* data = span.data();
bool has_ttl = ttl_sec != UINT32_MAX;
size_t count = span.size();
unsigned count = span.size();
unsigned res = 0;
if (BucketCount() < count) {
Reserve(count);
}
while (count >= kMaxBatchLen) {
for (unsigned i = 0; i < kMaxBatchLen; ++i) {
hash[i] = CompactObj::HashCode(data[i]);
Prefetch(hash[i]);
}
DCHECK_LE(count, kMaxBatchLen);
for (unsigned i = 0; i < kMaxBatchLen; ++i) {
void* prev = FindInternal(data + i, hash[i], 1);
if (prev == nullptr) {
++res;
sds field = MakeSetSds(data[i], ttl_sec);
AddUnique(field, has_ttl, hash[i]);
}
}
count -= kMaxBatchLen;
data += kMaxBatchLen;
res += kMaxBatchLen;
for (size_t i = 0; i < count; i++) {
hash[i] = CompactObj::HashCode(span[i]);
Prefetch(hash[i]);
}
for (unsigned i = 0; i < count; ++i) {
res += Add(data[i], ttl_sec);
void* prev = FindInternal(&span[i], hash[i], 1);
if (prev == nullptr) {
++res;
sds field = MakeSetSds(span[i], ttl_sec);
AddUnique(field, has_ttl, hash[i]);
}
}
return res;
}

View file

@ -29,7 +29,7 @@ class StringSet : public DenseSet {
// Returns true if elem was added.
bool Add(std::string_view s1, uint32_t ttl_sec = UINT32_MAX);
unsigned AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec);
template <typename T> unsigned AddMany(absl::Span<T> span, uint32_t ttl_sec);
bool Erase(std::string_view str) {
return EraseInternal(&str, 1);
@ -104,6 +104,8 @@ class StringSet : public DenseSet {
protected:
uint64_t Hash(const void* ptr, uint32_t cookie) const override;
unsigned AddBatch(absl::Span<std::string_view> span, uint32_t ttl_sec);
bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const override;
size_t ObjectAllocSize(const void* s1) const override;
@ -114,4 +116,28 @@ class StringSet : public DenseSet {
sds MakeSetSds(std::string_view src, uint32_t ttl_sec) const;
};
template <typename T> unsigned StringSet::AddMany(absl::Span<T> span, uint32_t ttl_sec) {
std::string_view views[kMaxBatchLen];
unsigned res = 0;
if (BucketCount() < span.size()) {
Reserve(span.size());
}
while (span.size() >= kMaxBatchLen) {
for (size_t i = 0; i < kMaxBatchLen; i++)
views[i] = span[i];
span.remove_prefix(kMaxBatchLen);
res += AddBatch(absl::MakeSpan(views), ttl_sec);
}
if (span.size()) {
for (size_t i = 0; i < span.size(); i++)
views[i] = span[i];
res += AddBatch(absl::MakeSpan(views, span.size()), ttl_sec);
}
return res;
}
} // end namespace dfly

View file

@ -562,19 +562,11 @@ void BM_AddMany(benchmark::State& state) {
strs.push_back(str);
}
ss.Reserve(elems);
array<string_view, 32> str_views;
while (state.KeepRunning()) {
unsigned offset = 0;
while (offset < elems) {
unsigned len = min(elems - offset, 32u);
for (size_t i = 0; i < len; ++i) {
str_views[i] = strs[offset + i];
}
offset += len;
ss.AddMany({str_views.data(), len}, UINT32_MAX);
}
ss.AddMany(absl::MakeSpan(strs), UINT32_MAX);
state.PauseTiming();
CHECK_EQ(ss.UpperBoundSize(), elems);
ss.Clear();
ss.Reserve(elems);
state.ResumeTiming();

View file

@ -102,13 +102,13 @@ struct StringSetWrapper {
for (string_view member : EntriesRange(entries)) {
members[len++] = member;
if (len == StringSet::kMaxBatchLen) {
res += ss->AddMany({members, StringSet::kMaxBatchLen}, ttl_sec);
res += ss->AddMany(absl::MakeSpan(members, StringSet::kMaxBatchLen), ttl_sec);
len = 0;
}
}
if (len) {
res += ss->AddMany({members, len}, ttl_sec);
res += ss->AddMany(absl::MakeSpan(members, len), ttl_sec);
}
return res;