mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 18:35:46 +02:00
chore: Implement AddMany method (#3866)
* chore: Implement AddMany method 1. Fix a performance bug in Find2 that made redundant comparisons 2. Provide a method to StringSet that adds several items in a batch 3. Use AddMany inside set_family Before: ``` BM_Add 4253939 ns 4253713 ns 991 ``` After: ``` BM_Add 3482177 ns 3482050 ns 1206 BM_AddMany 3101622 ns 3101507 ns 1360 ``` Signed-off-by: Roman Gershman <roman@dragonflydb.io> * chore: fixes --------- Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
a86fcf80be
commit
bd972b6384
6 changed files with 106 additions and 9 deletions
|
@ -549,6 +549,11 @@ void DenseSet::AddUnique(void* obj, bool has_ttl, uint64_t hashcode) {
|
||||||
++size_;
|
++size_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DenseSet::Prefetch(uint64_t hash) {
|
||||||
|
uint32_t bid = BucketId(hash);
|
||||||
|
PREFETCH_READ(&entries_[bid]);
|
||||||
|
}
|
||||||
|
|
||||||
auto DenseSet::Find2(const void* ptr, uint32_t bid, uint32_t cookie)
|
auto DenseSet::Find2(const void* ptr, uint32_t bid, uint32_t cookie)
|
||||||
-> tuple<size_t, DensePtr*, DensePtr*> {
|
-> tuple<size_t, DensePtr*, DensePtr*> {
|
||||||
DCHECK_LT(bid, entries_.size());
|
DCHECK_LT(bid, entries_.size());
|
||||||
|
@ -563,19 +568,23 @@ auto DenseSet::Find2(const void* ptr, uint32_t bid, uint32_t cookie)
|
||||||
// first look for displaced nodes since this is quicker than iterating a potential long chain
|
// first look for displaced nodes since this is quicker than iterating a potential long chain
|
||||||
if (bid > 0) {
|
if (bid > 0) {
|
||||||
curr = &entries_[bid - 1];
|
curr = &entries_[bid - 1];
|
||||||
ExpireIfNeeded(nullptr, curr);
|
if (curr->IsDisplaced() && curr->GetDisplacedDirection() == -1) {
|
||||||
|
ExpireIfNeeded(nullptr, curr);
|
||||||
|
|
||||||
if (Equal(*curr, ptr, cookie)) {
|
if (Equal(*curr, ptr, cookie)) {
|
||||||
return {bid - 1, nullptr, curr};
|
return {bid - 1, nullptr, curr};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bid + 1 < entries_.size()) {
|
if (bid + 1 < entries_.size()) {
|
||||||
curr = &entries_[bid + 1];
|
curr = &entries_[bid + 1];
|
||||||
ExpireIfNeeded(nullptr, curr);
|
if (curr->IsDisplaced() && curr->GetDisplacedDirection() == 1) {
|
||||||
|
ExpireIfNeeded(nullptr, curr);
|
||||||
|
|
||||||
if (Equal(*curr, ptr, cookie)) {
|
if (Equal(*curr, ptr, cookie)) {
|
||||||
return {bid + 1, nullptr, curr};
|
return {bid + 1, nullptr, curr};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -208,6 +208,7 @@ class DenseSet {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using MemoryResource = PMR_NS::memory_resource;
|
using MemoryResource = PMR_NS::memory_resource;
|
||||||
|
static constexpr uint32_t kMaxBatchLen = 32;
|
||||||
|
|
||||||
explicit DenseSet(MemoryResource* mr = PMR_NS::get_default_resource());
|
explicit DenseSet(MemoryResource* mr = PMR_NS::get_default_resource());
|
||||||
virtual ~DenseSet();
|
virtual ~DenseSet();
|
||||||
|
@ -317,6 +318,8 @@ class DenseSet {
|
||||||
// Assumes that the object does not exist in the set.
|
// Assumes that the object does not exist in the set.
|
||||||
void AddUnique(void* obj, bool has_ttl, uint64_t hashcode);
|
void AddUnique(void* obj, bool has_ttl, uint64_t hashcode);
|
||||||
|
|
||||||
|
void Prefetch(uint64_t hash);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DenseSet(const DenseSet&) = delete;
|
DenseSet(const DenseSet&) = delete;
|
||||||
DenseSet& operator=(DenseSet&) = delete;
|
DenseSet& operator=(DenseSet&) = delete;
|
||||||
|
|
|
@ -51,6 +51,42 @@ bool StringSet::Add(string_view src, uint32_t ttl_sec) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned StringSet::AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec) {
|
||||||
|
uint64_t hash[kMaxBatchLen];
|
||||||
|
string_view* data = span.data();
|
||||||
|
bool has_ttl = ttl_sec != UINT32_MAX;
|
||||||
|
size_t count = span.size();
|
||||||
|
unsigned res = 0;
|
||||||
|
|
||||||
|
if (BucketCount() < count) {
|
||||||
|
Reserve(count);
|
||||||
|
}
|
||||||
|
while (count >= kMaxBatchLen) {
|
||||||
|
for (unsigned i = 0; i < kMaxBatchLen; ++i) {
|
||||||
|
hash[i] = CompactObj::HashCode(data[i]);
|
||||||
|
Prefetch(hash[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < kMaxBatchLen; ++i) {
|
||||||
|
void* prev = FindInternal(data + i, hash[i], 1);
|
||||||
|
if (prev == nullptr) {
|
||||||
|
++res;
|
||||||
|
sds field = MakeSetSds(data[i], ttl_sec);
|
||||||
|
AddUnique(field, has_ttl, hash[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
count -= kMaxBatchLen;
|
||||||
|
data += kMaxBatchLen;
|
||||||
|
res += kMaxBatchLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < count; ++i) {
|
||||||
|
res += Add(data[i], ttl_sec);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
std::optional<std::string> StringSet::Pop() {
|
std::optional<std::string> StringSet::Pop() {
|
||||||
sds str = (sds)PopInternal();
|
sds str = (sds)PopInternal();
|
||||||
|
|
||||||
|
|
|
@ -4,10 +4,11 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <absl/types/span.h>
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
#include "core/dense_set.h"
|
#include "core/dense_set.h"
|
||||||
|
@ -28,6 +29,8 @@ class StringSet : public DenseSet {
|
||||||
// Returns true if elem was added.
|
// Returns true if elem was added.
|
||||||
bool Add(std::string_view s1, uint32_t ttl_sec = UINT32_MAX);
|
bool Add(std::string_view s1, uint32_t ttl_sec = UINT32_MAX);
|
||||||
|
|
||||||
|
unsigned AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec);
|
||||||
|
|
||||||
bool Erase(std::string_view str) {
|
bool Erase(std::string_view str) {
|
||||||
return EraseInternal(&str, 1);
|
return EraseInternal(&str, 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -552,4 +552,34 @@ void BM_Add(benchmark::State& state) {
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_Add);
|
BENCHMARK(BM_Add);
|
||||||
|
|
||||||
|
void BM_AddMany(benchmark::State& state) {
|
||||||
|
vector<string> strs;
|
||||||
|
mt19937 generator(0);
|
||||||
|
StringSet ss;
|
||||||
|
unsigned elems = 100000;
|
||||||
|
for (size_t i = 0; i < elems; ++i) {
|
||||||
|
string str = random_string(generator, 16);
|
||||||
|
strs.push_back(str);
|
||||||
|
}
|
||||||
|
ss.Reserve(elems);
|
||||||
|
array<string_view, 32> str_views;
|
||||||
|
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
unsigned offset = 0;
|
||||||
|
while (offset < elems) {
|
||||||
|
unsigned len = min(elems - offset, 32u);
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
str_views[i] = strs[offset + i];
|
||||||
|
}
|
||||||
|
offset += len;
|
||||||
|
ss.AddMany({str_views.data(), len}, UINT32_MAX);
|
||||||
|
}
|
||||||
|
state.PauseTiming();
|
||||||
|
ss.Clear();
|
||||||
|
ss.Reserve(elems);
|
||||||
|
state.ResumeTiming();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_AddMany);
|
||||||
|
|
||||||
} // namespace dfly
|
} // namespace dfly
|
||||||
|
|
|
@ -93,8 +93,24 @@ struct StringSetWrapper {
|
||||||
|
|
||||||
unsigned Add(const NewEntries& entries, uint32_t ttl_sec) const {
|
unsigned Add(const NewEntries& entries, uint32_t ttl_sec) const {
|
||||||
unsigned res = 0;
|
unsigned res = 0;
|
||||||
for (string_view member : EntriesRange(entries))
|
string_view members[StringSet::kMaxBatchLen];
|
||||||
res += ss->Add(member, ttl_sec);
|
size_t entries_len = std::visit([](const auto& e) { return e.size(); }, entries);
|
||||||
|
unsigned len = 0;
|
||||||
|
if (ss->BucketCount() < entries_len) {
|
||||||
|
ss->Reserve(entries_len);
|
||||||
|
}
|
||||||
|
for (string_view member : EntriesRange(entries)) {
|
||||||
|
members[len++] = member;
|
||||||
|
if (len == StringSet::kMaxBatchLen) {
|
||||||
|
res += ss->AddMany({members, StringSet::kMaxBatchLen}, ttl_sec);
|
||||||
|
len = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (len) {
|
||||||
|
res += ss->AddMany({members, len}, ttl_sec);
|
||||||
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue