chore: simplify CloneBatch code (#3862)

Remove awkward fetch_tail case and streamline the code.
Fix invalid prefetch adresses. Performance improved a little.

Before:
`BM_Fill/elements:32000     874677 ns       874647 ns         4774`

After:
`BM_Fill/elements:32000     831786 ns       831761 ns         5111`

Also added a benchmark for Clear() operation.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-10-04 12:08:41 +03:00 committed by GitHub
parent 1958e09a9a
commit 819f6e125d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 56 additions and 43 deletions

View file

@ -214,6 +214,14 @@ void DenseSet::CloneBatch(unsigned len, CloneItem* items, DenseSet* other) const
// We handle a batch of items to minimize data dependencies when accessing memory for a single
// item. We prefetch the memory for entire batch before actually reading data from any of the
// elements.
auto clone = [this](void* obj, bool has_ttl, DenseSet* other) {
// The majority of the CPU is spent in this block.
void* new_obj = other->ObjectClone(obj, has_ttl, false);
uint64_t hash = this->Hash(obj, 0);
other->AddUnique(new_obj, has_ttl, hash);
};
while (len) {
unsigned dest_id = 0;
// we walk "len" linked lists in parallel, and prefetch their next, obj pointers
@ -221,34 +229,23 @@ void DenseSet::CloneBatch(unsigned len, CloneItem* items, DenseSet* other) const
for (unsigned i = 0; i < len; ++i) {
auto& src = items[i];
if (src.obj) {
// The majority of the CPU is spent in this block.
void* new_obj = other->ObjectClone(src.obj, src.has_ttl, false);
uint64_t hash = Hash(src.obj, 0);
other->AddUnique(new_obj, src.has_ttl, hash);
clone(src.obj, src.has_ttl, other);
src.obj = nullptr;
}
const DenseLinkKey* link = src.link;
if (link) {
src.link = nullptr;
if (src.ptr.IsEmpty()) {
continue;
}
if (src.ptr.IsObject()) {
clone(src.ptr.Raw(), src.has_ttl, other);
} else {
auto& dest = items[dest_id++];
DCHECK(!link->next.IsEmpty());
if (src.fetch_tail) {
// switch to the final state.
DCHECK(link->next.IsObject());
dest.obj = link->next.Raw();
src.fetch_tail = false;
} else {
dest.obj = link->Raw();
if (link->next.IsObject()) {
// next state - pre-terminal, fetch the final object.
dest.fetch_tail = true;
dest.link = link;
} else {
dest.link = link->next.AsLink();
PREFETCH_READ(dest.link);
}
}
DenseLinkKey* link = src.ptr.AsLink();
dest.obj = link->Raw();
dest.has_ttl = link->HasTtl();
dest.ptr = link->next;
PREFETCH_READ(dest.ptr.Raw());
PREFETCH_READ(dest.obj);
}
}
@ -334,26 +331,27 @@ void DenseSet::Fill(DenseSet* other) const {
unsigned len = 0;
for (auto it = entries_.begin(); it != entries_.end(); ++it) {
const DensePtr* ptr = &(*it);
DensePtr ptr = *it;
if (!ptr->IsEmpty()) {
arr[len].has_ttl = ptr->HasTtl();
if (ptr.IsEmpty())
continue;
if (ptr->IsObject()) {
arr[len].link = nullptr;
arr[len].obj = ptr->Raw();
PREFETCH_READ(arr[len].obj);
} else {
arr[len].link = ptr->AsLink();
arr[len].obj = nullptr;
PREFETCH_READ(arr[len].link);
}
auto& item = arr[len++];
item.has_ttl = ptr.HasTtl();
++len;
if (len == kArrLen) {
CloneBatch(kArrLen, arr, other);
len = 0;
}
if (ptr.IsObject()) {
item.ptr.Reset();
item.obj = ptr.Raw();
PREFETCH_READ(item.obj);
} else {
item.ptr = ptr;
item.obj = nullptr;
PREFETCH_READ(item.ptr.Raw());
}
if (len == kArrLen) {
CloneBatch(kArrLen, arr, other);
len = 0;
}
}
CloneBatch(len, arr, other);

View file

@ -331,10 +331,9 @@ class DenseSet {
bool Equal(DensePtr dptr, const void* ptr, uint32_t cookie) const;
struct CloneItem {
const DenseLinkKey* link = nullptr;
DensePtr ptr;
void* obj = nullptr;
bool has_ttl = false;
bool fetch_tail = false;
};
void CloneBatch(unsigned len, CloneItem* items, DenseSet* other) const;

View file

@ -515,4 +515,20 @@ void BM_Fill(benchmark::State& state) {
}
BENCHMARK(BM_Fill)->ArgName("elements")->Arg(32000);
void BM_Clear(benchmark::State& state) {
unsigned elems = state.range(0);
mt19937 generator(0);
StringSet ss;
while (state.KeepRunning()) {
state.PauseTiming();
for (size_t i = 0; i < elems; ++i) {
string str = random_string(generator, 16);
ss.Add(str);
}
state.ResumeTiming();
ss.Clear();
}
}
BENCHMARK(BM_Clear)->ArgName("elements")->Arg(32000);
} // namespace dfly