chore: refactoring around glob matching (#4540)

1. Make GlobMatcher non-copyable. It's something I will need in the next PR.
2. Move low-level benchmarking testing code into dfly_core_test.

No functional changes.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2025-01-31 09:54:37 +02:00 committed by GitHub
parent 89db7ebf9b
commit 9d303f8abe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 132 additions and 105 deletions

View file

@ -297,7 +297,7 @@ OpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args) {
} else if (opt == "MATCH") {
string_view pattern = ArgS(args, i + 1);
if (pattern != "*")
scan_opts.matcher.emplace(pattern, true);
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
} else if (opt == "TYPE") {
auto obj_type = ObjTypeFromString(ArgS(args, i + 1));
if (!obj_type) {

View file

@ -304,7 +304,7 @@ class Context : protected Cancellation {
};
struct ScanOpts {
std::optional<GlobMatcher> matcher;
std::unique_ptr<GlobMatcher> matcher;
size_t limit = 10;
std::optional<CompactObjType> type_filter;
unsigned bucket_id = UINT_MAX;

View file

@ -8,10 +8,8 @@ extern "C" {
}
#include <absl/strings/ascii.h>
#include <absl/strings/charconv.h>
#include <absl/strings/str_join.h>
#include <absl/strings/strip.h>
#include <fast_float/fast_float.h>
#include <gmock/gmock.h>
#include <reflex/matcher.h>
@ -832,97 +830,8 @@ TEST_F(DflyEngineTest, ReplicaofRejectOnLoad) {
ASSERT_THAT(res, ErrArg("LOADING Dragonfly is loading the dataset in memory"));
}
using benchmark::DoNotOptimize;
// TODO: to test transactions with a single shard since then all transactions become local.
// To consider having a parameter in dragonfly engine controlling number of shards
// unconditionally from number of cpus. TO TEST BLPOP under multi for single/multi argument case.
// Parse Double benchmarks
static void BM_ParseFastFloat(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseFastFloat);
static void BM_ParseDoubleAbsl(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseDoubleAbsl);
static void BM_MatchPattern(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
ScanOpts scan_opts;
scan_opts.matcher.emplace("*foobar*", true);
while (state.KeepRunning()) {
DoNotOptimize(scan_opts.Matches(random_val));
}
}
BENCHMARK(BM_MatchPattern)->Arg(1000)->Arg(10000);
static void BM_MatchFindSubstr(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
while (state.KeepRunning()) {
DoNotOptimize(random_val.find("foobar"));
}
}
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);
static void BM_MatchReflexFind(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
reflex::Matcher matcher("foobar");
matcher.input("xxxxxxfoobaryyyyyyyy");
CHECK_GT(matcher.find(), 0u);
matcher.input("xxxxxxfoobayyyyyyyy");
CHECK_EQ(0u, matcher.find());
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);
static void BM_MatchReflexMatch(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
reflex::Matcher matcher(".*foobar.*");
matcher.input("xxxxxxfoobaryyyyyyyy");
CHECK_GT(matcher.matches(), 0u);
matcher.input("xxxxxxfoobayyyyyyyy");
CHECK_EQ(0u, matcher.matches());
matcher.input(random_val);
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.matches());
}
}
BENCHMARK(BM_MatchReflexMatch)->Arg(1000)->Arg(10000);
} // namespace dfly

View file

@ -1179,7 +1179,7 @@ void GenericFamily::Keys(CmdArgList args, const CommandContext& cmd_cntx) {
ScanOpts scan_opts;
if (pattern != "*") {
scan_opts.matcher.emplace(pattern, true);
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
}
scan_opts.limit = 512;
@ -1745,7 +1745,7 @@ void GenericFamily::Scan(CmdArgList args, const CommandContext& cmd_cntx) {
return builder->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
StringVec keys;
cursor = ScanGeneric(cursor, scan_op, &keys, cmd_cntx.conn_cntx);

View file

@ -1027,7 +1027,7 @@ void HSetFamily::HScan(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);

View file

@ -9,7 +9,7 @@
extern "C" {
#include "redis/intset.h"
#include "redis/redis_aux.h"
#include "redis/util.h"
#include "redis/util.h" // for string2ll
}
#include "base/flags.h"
@ -1413,7 +1413,7 @@ void SScan(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);

View file

@ -2602,7 +2602,7 @@ void ZSetFamily::ZScan(CmdArgList args, const CommandContext& cmd_cntx) {
DVLOG(1) << "Scan invalid args - return " << ops << " to the user";
return rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);