chore: refactoring around glob matching (#4540)

1. Make GlobMatcher non-copyable. It's something I will need in the next PR.
2. Move low-level benchmarking testing code into dfly_core_test.

No functional changes.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2025-01-31 09:54:37 +02:00 committed by GitHub
parent 89db7ebf9b
commit 9d303f8abe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 132 additions and 105 deletions

View file

@ -15,7 +15,7 @@ cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua
add_executable(dash_bench dash_bench.cc)
cxx_link(dash_bench dfly_core redis_test_lib)
cxx_test(dfly_core_test dfly_core LABELS DFLY)
cxx_test(dfly_core_test dfly_core TRDP::fast_float LABELS DFLY)
cxx_test(compact_object_test dfly_core LABELS DFLY)
cxx_test(extent_tree_test dfly_core LABELS DFLY)
cxx_test(dash_test dfly_core file redis_test_lib DATA testdata/ids.txt LABELS DFLY)

View file

@ -2,7 +2,15 @@
// See LICENSE for licensing terms.
//
#include <absl/strings/charconv.h>
#include <absl/strings/numbers.h>
#include <fast_float/fast_float.h>
#include <reflex/matcher.h>
#include <random>
#include "base/gtest.h"
#include "base/logging.h"
#include "core/glob_matcher.h"
#include "core/intent_lock.h"
#include "core/tx_queue.h"
@ -11,6 +19,28 @@ namespace dfly {
using namespace std;
std::random_device rd;
static string GetRandomHex(size_t len) {
std::string res(len, '\0');
size_t indx = 0;
for (; indx < len; indx += 16) { // 2 chars per byte
absl::numbers_internal::FastHexToBufferZeroPad16(rd(), res.data() + indx);
}
if (indx < len) {
char buf[24];
absl::numbers_internal::FastHexToBufferZeroPad16(rd(), buf);
for (unsigned j = 0; indx < len; indx++, j++) {
res[indx] = buf[j];
}
}
return res;
}
class TxQueueTest : public ::testing::Test {
protected:
TxQueueTest() {
@ -78,6 +108,13 @@ class StringMatchTest : public ::testing::Test {
};
TEST_F(StringMatchTest, Basic) {
EXPECT_EQ(MatchLen("", "", 0), 1);
EXPECT_EQ(MatchLen("*", "", 0), 0);
EXPECT_EQ(MatchLen("*", "", 1), 0);
EXPECT_EQ(MatchLen("\\\\", "\\", 0), 1);
EXPECT_EQ(MatchLen("h\\\\llo", "h\\llo", 0), 1);
// ExactMatch
EXPECT_EQ(MatchLen("hello", "hello", 0), 1);
EXPECT_EQ(MatchLen("hello", "world", 0), 0);
@ -85,19 +122,18 @@ TEST_F(StringMatchTest, Basic) {
// Wildcards
EXPECT_EQ(MatchLen("*", "hello", 0), 1);
EXPECT_EQ(MatchLen("h*", "hello", 0), 1);
EXPECT_EQ(MatchLen("h*", "abc", 0), 0);
EXPECT_EQ(MatchLen("h*o", "hello", 0), 1);
EXPECT_EQ(MatchLen("hel*o*", "hello*", 0), 1);
EXPECT_EQ(MatchLen("h\\*llo", "h*llo", 0), 1);
// Single character wildcard
EXPECT_EQ(MatchLen("h[aeiou]llo", "hello", 0), 1);
EXPECT_EQ(MatchLen("h[aeiou]llo", "hallo", 0), 1);
EXPECT_EQ(MatchLen("h[^aeiou]llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h[a-z]llo", "hello", 0), 1);
EXPECT_EQ(MatchLen("h[A-Z]llo", "Hello", 1), 1);
EXPECT_EQ(MatchLen("h\\*llo", "h*llo", 0), 1);
EXPECT_EQ(MatchLen("h\\\\llo", "h\\llo", 0), 1);
EXPECT_EQ(MatchLen("h[A-Z]llo", "HeLLO", 1), 1);
EXPECT_EQ(MatchLen("[[]", "[", 0), 1);
// ?
EXPECT_EQ(MatchLen("h?llo", "hello", 0), 1);
@ -105,6 +141,85 @@ TEST_F(StringMatchTest, Basic) {
EXPECT_EQ(MatchLen("h??llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h\\?llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h\\?llo", "h?llo", 0), 1);
// special regex chars
EXPECT_EQ(MatchLen("h\\[^|", "h[^|", 0), 1);
EXPECT_EQ(MatchLen("[^", "[^", 0), 0);
EXPECT_EQ(MatchLen("[$?^]a", "?a", 0), 1);
}
using benchmark::DoNotOptimize;
// Parse Double benchmarks
static void BM_ParseFastFloat(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseFastFloat);
static void BM_ParseDoubleAbsl(benchmark::State& state) {
std::vector<std::string> args(100);
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseDoubleAbsl);
static void BM_MatchGlob(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
GlobMatcher matcher("*foobar*", true);
while (state.KeepRunning()) {
DoNotOptimize(matcher.Matches(random_val));
}
}
BENCHMARK(BM_MatchGlob)->Arg(1000)->Arg(10000);
static void BM_MatchFindSubstr(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
while (state.KeepRunning()) {
DoNotOptimize(random_val.find("foobar"));
}
}
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);
static void BM_MatchReflexFind(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
reflex::Matcher matcher("foobar");
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);
static void BM_MatchReflexFindStar(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
reflex::Matcher matcher(".*foobar");
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFindStar)->Arg(1000)->Arg(10000);
} // namespace dfly

View file

@ -8,6 +8,9 @@
namespace dfly {
class GlobMatcher {
GlobMatcher(const GlobMatcher&) = delete;
GlobMatcher& operator=(const GlobMatcher&) = delete;
public:
explicit GlobMatcher(std::string_view pattern, bool case_sensitive);

View file

@ -297,7 +297,7 @@ OpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args) {
} else if (opt == "MATCH") {
string_view pattern = ArgS(args, i + 1);
if (pattern != "*")
scan_opts.matcher.emplace(pattern, true);
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
} else if (opt == "TYPE") {
auto obj_type = ObjTypeFromString(ArgS(args, i + 1));
if (!obj_type) {

View file

@ -304,7 +304,7 @@ class Context : protected Cancellation {
};
struct ScanOpts {
std::optional<GlobMatcher> matcher;
std::unique_ptr<GlobMatcher> matcher;
size_t limit = 10;
std::optional<CompactObjType> type_filter;
unsigned bucket_id = UINT_MAX;

View file

@ -8,10 +8,8 @@ extern "C" {
}
#include <absl/strings/ascii.h>
#include <absl/strings/charconv.h>
#include <absl/strings/str_join.h>
#include <absl/strings/strip.h>
#include <fast_float/fast_float.h>
#include <gmock/gmock.h>
#include <reflex/matcher.h>
@ -832,97 +830,8 @@ TEST_F(DflyEngineTest, ReplicaofRejectOnLoad) {
ASSERT_THAT(res, ErrArg("LOADING Dragonfly is loading the dataset in memory"));
}
using benchmark::DoNotOptimize;
// TODO: to test transactions with a single shard since then all transactions become local.
// To consider having a parameter in dragonfly engine controlling number of shards
// unconditionally from number of cpus. TO TEST BLPOP under multi for single/multi argument case.
// Parse Double benchmarks
static void BM_ParseFastFloat(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseFastFloat);
static void BM_ParseDoubleAbsl(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseDoubleAbsl);
static void BM_MatchPattern(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
ScanOpts scan_opts;
scan_opts.matcher.emplace("*foobar*", true);
while (state.KeepRunning()) {
DoNotOptimize(scan_opts.Matches(random_val));
}
}
BENCHMARK(BM_MatchPattern)->Arg(1000)->Arg(10000);
static void BM_MatchFindSubstr(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
while (state.KeepRunning()) {
DoNotOptimize(random_val.find("foobar"));
}
}
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);
static void BM_MatchReflexFind(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
reflex::Matcher matcher("foobar");
matcher.input("xxxxxxfoobaryyyyyyyy");
CHECK_GT(matcher.find(), 0u);
matcher.input("xxxxxxfoobayyyyyyyy");
CHECK_EQ(0u, matcher.find());
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);
static void BM_MatchReflexMatch(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
reflex::Matcher matcher(".*foobar.*");
matcher.input("xxxxxxfoobaryyyyyyyy");
CHECK_GT(matcher.matches(), 0u);
matcher.input("xxxxxxfoobayyyyyyyy");
CHECK_EQ(0u, matcher.matches());
matcher.input(random_val);
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.matches());
}
}
BENCHMARK(BM_MatchReflexMatch)->Arg(1000)->Arg(10000);
} // namespace dfly

View file

@ -1179,7 +1179,7 @@ void GenericFamily::Keys(CmdArgList args, const CommandContext& cmd_cntx) {
ScanOpts scan_opts;
if (pattern != "*") {
scan_opts.matcher.emplace(pattern, true);
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
}
scan_opts.limit = 512;
@ -1745,7 +1745,7 @@ void GenericFamily::Scan(CmdArgList args, const CommandContext& cmd_cntx) {
return builder->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
StringVec keys;
cursor = ScanGeneric(cursor, scan_op, &keys, cmd_cntx.conn_cntx);

View file

@ -1027,7 +1027,7 @@ void HSetFamily::HScan(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);

View file

@ -9,7 +9,7 @@
extern "C" {
#include "redis/intset.h"
#include "redis/redis_aux.h"
#include "redis/util.h"
#include "redis/util.h" // for string2ll
}
#include "base/flags.h"
@ -1413,7 +1413,7 @@ void SScan(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);

View file

@ -2602,7 +2602,7 @@ void ZSetFamily::ZScan(CmdArgList args, const CommandContext& cmd_cntx) {
DVLOG(1) << "Scan invalid args - return " << ops << " to the user";
return rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);