mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 09:55:45 +02:00
chore: refactoring around glob matching (#4540)
1. Make GlobMatcher non-copyable. It's something I will need in the next PR. 2. Move low-level benchmarking testing code into dfly_core_test. No functional changes. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
89db7ebf9b
commit
9d303f8abe
10 changed files with 132 additions and 105 deletions
|
@ -15,7 +15,7 @@ cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua
|
|||
add_executable(dash_bench dash_bench.cc)
|
||||
cxx_link(dash_bench dfly_core redis_test_lib)
|
||||
|
||||
cxx_test(dfly_core_test dfly_core LABELS DFLY)
|
||||
cxx_test(dfly_core_test dfly_core TRDP::fast_float LABELS DFLY)
|
||||
cxx_test(compact_object_test dfly_core LABELS DFLY)
|
||||
cxx_test(extent_tree_test dfly_core LABELS DFLY)
|
||||
cxx_test(dash_test dfly_core file redis_test_lib DATA testdata/ids.txt LABELS DFLY)
|
||||
|
|
|
@ -2,7 +2,15 @@
|
|||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include <absl/strings/charconv.h>
|
||||
#include <absl/strings/numbers.h>
|
||||
#include <fast_float/fast_float.h>
|
||||
#include <reflex/matcher.h>
|
||||
|
||||
#include <random>
|
||||
|
||||
#include "base/gtest.h"
|
||||
#include "base/logging.h"
|
||||
#include "core/glob_matcher.h"
|
||||
#include "core/intent_lock.h"
|
||||
#include "core/tx_queue.h"
|
||||
|
@ -11,6 +19,28 @@ namespace dfly {
|
|||
|
||||
using namespace std;
|
||||
|
||||
std::random_device rd;
|
||||
|
||||
static string GetRandomHex(size_t len) {
|
||||
std::string res(len, '\0');
|
||||
size_t indx = 0;
|
||||
|
||||
for (; indx < len; indx += 16) { // 2 chars per byte
|
||||
absl::numbers_internal::FastHexToBufferZeroPad16(rd(), res.data() + indx);
|
||||
}
|
||||
|
||||
if (indx < len) {
|
||||
char buf[24];
|
||||
absl::numbers_internal::FastHexToBufferZeroPad16(rd(), buf);
|
||||
|
||||
for (unsigned j = 0; indx < len; indx++, j++) {
|
||||
res[indx] = buf[j];
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
class TxQueueTest : public ::testing::Test {
|
||||
protected:
|
||||
TxQueueTest() {
|
||||
|
@ -78,6 +108,13 @@ class StringMatchTest : public ::testing::Test {
|
|||
};
|
||||
|
||||
TEST_F(StringMatchTest, Basic) {
|
||||
EXPECT_EQ(MatchLen("", "", 0), 1);
|
||||
|
||||
EXPECT_EQ(MatchLen("*", "", 0), 0);
|
||||
EXPECT_EQ(MatchLen("*", "", 1), 0);
|
||||
EXPECT_EQ(MatchLen("\\\\", "\\", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h\\\\llo", "h\\llo", 0), 1);
|
||||
|
||||
// ExactMatch
|
||||
EXPECT_EQ(MatchLen("hello", "hello", 0), 1);
|
||||
EXPECT_EQ(MatchLen("hello", "world", 0), 0);
|
||||
|
@ -85,19 +122,18 @@ TEST_F(StringMatchTest, Basic) {
|
|||
// Wildcards
|
||||
EXPECT_EQ(MatchLen("*", "hello", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h*", "hello", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h*", "abc", 0), 0);
|
||||
EXPECT_EQ(MatchLen("h*o", "hello", 0), 1);
|
||||
EXPECT_EQ(MatchLen("hel*o*", "hello*", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h\\*llo", "h*llo", 0), 1);
|
||||
|
||||
// Single character wildcard
|
||||
EXPECT_EQ(MatchLen("h[aeiou]llo", "hello", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h[aeiou]llo", "hallo", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h[^aeiou]llo", "hallo", 0), 0);
|
||||
EXPECT_EQ(MatchLen("h[a-z]llo", "hello", 0), 1);
|
||||
|
||||
EXPECT_EQ(MatchLen("h[A-Z]llo", "Hello", 1), 1);
|
||||
|
||||
EXPECT_EQ(MatchLen("h\\*llo", "h*llo", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h\\\\llo", "h\\llo", 0), 1);
|
||||
EXPECT_EQ(MatchLen("h[A-Z]llo", "HeLLO", 1), 1);
|
||||
EXPECT_EQ(MatchLen("[[]", "[", 0), 1);
|
||||
|
||||
// ?
|
||||
EXPECT_EQ(MatchLen("h?llo", "hello", 0), 1);
|
||||
|
@ -105,6 +141,85 @@ TEST_F(StringMatchTest, Basic) {
|
|||
EXPECT_EQ(MatchLen("h??llo", "hallo", 0), 0);
|
||||
EXPECT_EQ(MatchLen("h\\?llo", "hallo", 0), 0);
|
||||
EXPECT_EQ(MatchLen("h\\?llo", "h?llo", 0), 1);
|
||||
|
||||
// special regex chars
|
||||
EXPECT_EQ(MatchLen("h\\[^|", "h[^|", 0), 1);
|
||||
EXPECT_EQ(MatchLen("[^", "[^", 0), 0);
|
||||
EXPECT_EQ(MatchLen("[$?^]a", "?a", 0), 1);
|
||||
}
|
||||
|
||||
using benchmark::DoNotOptimize;
|
||||
|
||||
// Parse Double benchmarks
|
||||
static void BM_ParseFastFloat(benchmark::State& state) {
|
||||
std::vector<std::string> args(100);
|
||||
std::random_device rd;
|
||||
|
||||
for (auto& arg : args) {
|
||||
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
|
||||
}
|
||||
double res;
|
||||
while (state.KeepRunning()) {
|
||||
for (const auto& arg : args) {
|
||||
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ParseFastFloat);
|
||||
|
||||
static void BM_ParseDoubleAbsl(benchmark::State& state) {
|
||||
std::vector<std::string> args(100);
|
||||
|
||||
for (auto& arg : args) {
|
||||
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
|
||||
}
|
||||
|
||||
double res;
|
||||
while (state.KeepRunning()) {
|
||||
for (const auto& arg : args) {
|
||||
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ParseDoubleAbsl);
|
||||
|
||||
static void BM_MatchGlob(benchmark::State& state) {
|
||||
string random_val = GetRandomHex(state.range(0));
|
||||
GlobMatcher matcher("*foobar*", true);
|
||||
while (state.KeepRunning()) {
|
||||
DoNotOptimize(matcher.Matches(random_val));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchGlob)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_MatchFindSubstr(benchmark::State& state) {
|
||||
string random_val = GetRandomHex(state.range(0));
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
DoNotOptimize(random_val.find("foobar"));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_MatchReflexFind(benchmark::State& state) {
|
||||
string random_val = GetRandomHex(state.range(0));
|
||||
reflex::Matcher matcher("foobar");
|
||||
while (state.KeepRunning()) {
|
||||
matcher.input(random_val);
|
||||
DoNotOptimize(matcher.find());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_MatchReflexFindStar(benchmark::State& state) {
|
||||
string random_val = GetRandomHex(state.range(0));
|
||||
reflex::Matcher matcher(".*foobar");
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
matcher.input(random_val);
|
||||
DoNotOptimize(matcher.find());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchReflexFindStar)->Arg(1000)->Arg(10000);
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -8,6 +8,9 @@
|
|||
namespace dfly {
|
||||
|
||||
class GlobMatcher {
|
||||
GlobMatcher(const GlobMatcher&) = delete;
|
||||
GlobMatcher& operator=(const GlobMatcher&) = delete;
|
||||
|
||||
public:
|
||||
explicit GlobMatcher(std::string_view pattern, bool case_sensitive);
|
||||
|
||||
|
|
|
@ -297,7 +297,7 @@ OpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args) {
|
|||
} else if (opt == "MATCH") {
|
||||
string_view pattern = ArgS(args, i + 1);
|
||||
if (pattern != "*")
|
||||
scan_opts.matcher.emplace(pattern, true);
|
||||
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
|
||||
} else if (opt == "TYPE") {
|
||||
auto obj_type = ObjTypeFromString(ArgS(args, i + 1));
|
||||
if (!obj_type) {
|
||||
|
|
|
@ -304,7 +304,7 @@ class Context : protected Cancellation {
|
|||
};
|
||||
|
||||
struct ScanOpts {
|
||||
std::optional<GlobMatcher> matcher;
|
||||
std::unique_ptr<GlobMatcher> matcher;
|
||||
size_t limit = 10;
|
||||
std::optional<CompactObjType> type_filter;
|
||||
unsigned bucket_id = UINT_MAX;
|
||||
|
|
|
@ -8,10 +8,8 @@ extern "C" {
|
|||
}
|
||||
|
||||
#include <absl/strings/ascii.h>
|
||||
#include <absl/strings/charconv.h>
|
||||
#include <absl/strings/str_join.h>
|
||||
#include <absl/strings/strip.h>
|
||||
#include <fast_float/fast_float.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <reflex/matcher.h>
|
||||
|
||||
|
@ -832,97 +830,8 @@ TEST_F(DflyEngineTest, ReplicaofRejectOnLoad) {
|
|||
ASSERT_THAT(res, ErrArg("LOADING Dragonfly is loading the dataset in memory"));
|
||||
}
|
||||
|
||||
using benchmark::DoNotOptimize;
|
||||
|
||||
// TODO: to test transactions with a single shard since then all transactions become local.
|
||||
// To consider having a parameter in dragonfly engine controlling number of shards
|
||||
// unconditionally from number of cpus. TO TEST BLPOP under multi for single/multi argument case.
|
||||
|
||||
// Parse Double benchmarks
|
||||
static void BM_ParseFastFloat(benchmark::State& state) {
|
||||
std::vector<std::string> args(100);
|
||||
std::random_device rd;
|
||||
|
||||
for (auto& arg : args) {
|
||||
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
|
||||
}
|
||||
double res;
|
||||
while (state.KeepRunning()) {
|
||||
for (const auto& arg : args) {
|
||||
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ParseFastFloat);
|
||||
|
||||
static void BM_ParseDoubleAbsl(benchmark::State& state) {
|
||||
std::vector<std::string> args(100);
|
||||
std::random_device rd;
|
||||
for (auto& arg : args) {
|
||||
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
|
||||
}
|
||||
|
||||
double res;
|
||||
while (state.KeepRunning()) {
|
||||
for (const auto& arg : args) {
|
||||
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ParseDoubleAbsl);
|
||||
|
||||
static void BM_MatchPattern(benchmark::State& state) {
|
||||
absl::InsecureBitGen eng;
|
||||
string random_val = GetRandomHex(eng, state.range(0));
|
||||
ScanOpts scan_opts;
|
||||
scan_opts.matcher.emplace("*foobar*", true);
|
||||
while (state.KeepRunning()) {
|
||||
DoNotOptimize(scan_opts.Matches(random_val));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchPattern)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_MatchFindSubstr(benchmark::State& state) {
|
||||
absl::InsecureBitGen eng;
|
||||
string random_val = GetRandomHex(eng, state.range(0));
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
DoNotOptimize(random_val.find("foobar"));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_MatchReflexFind(benchmark::State& state) {
|
||||
absl::InsecureBitGen eng;
|
||||
string random_val = GetRandomHex(eng, state.range(0));
|
||||
reflex::Matcher matcher("foobar");
|
||||
matcher.input("xxxxxxfoobaryyyyyyyy");
|
||||
CHECK_GT(matcher.find(), 0u);
|
||||
matcher.input("xxxxxxfoobayyyyyyyy");
|
||||
CHECK_EQ(0u, matcher.find());
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
matcher.input(random_val);
|
||||
DoNotOptimize(matcher.find());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_MatchReflexMatch(benchmark::State& state) {
|
||||
absl::InsecureBitGen eng;
|
||||
string random_val = GetRandomHex(eng, state.range(0));
|
||||
reflex::Matcher matcher(".*foobar.*");
|
||||
matcher.input("xxxxxxfoobaryyyyyyyy");
|
||||
CHECK_GT(matcher.matches(), 0u);
|
||||
matcher.input("xxxxxxfoobayyyyyyyy");
|
||||
CHECK_EQ(0u, matcher.matches());
|
||||
|
||||
matcher.input(random_val);
|
||||
while (state.KeepRunning()) {
|
||||
matcher.input(random_val);
|
||||
DoNotOptimize(matcher.matches());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MatchReflexMatch)->Arg(1000)->Arg(10000);
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -1179,7 +1179,7 @@ void GenericFamily::Keys(CmdArgList args, const CommandContext& cmd_cntx) {
|
|||
|
||||
ScanOpts scan_opts;
|
||||
if (pattern != "*") {
|
||||
scan_opts.matcher.emplace(pattern, true);
|
||||
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
|
||||
}
|
||||
|
||||
scan_opts.limit = 512;
|
||||
|
@ -1745,7 +1745,7 @@ void GenericFamily::Scan(CmdArgList args, const CommandContext& cmd_cntx) {
|
|||
return builder->SendError(ops.status());
|
||||
}
|
||||
|
||||
ScanOpts scan_op = ops.value();
|
||||
const ScanOpts& scan_op = ops.value();
|
||||
|
||||
StringVec keys;
|
||||
cursor = ScanGeneric(cursor, scan_op, &keys, cmd_cntx.conn_cntx);
|
||||
|
|
|
@ -1027,7 +1027,7 @@ void HSetFamily::HScan(CmdArgList args, const CommandContext& cmd_cntx) {
|
|||
return cmd_cntx.rb->SendError(ops.status());
|
||||
}
|
||||
|
||||
ScanOpts scan_op = ops.value();
|
||||
const ScanOpts& scan_op = ops.value();
|
||||
|
||||
auto cb = [&](Transaction* t, EngineShard* shard) {
|
||||
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
extern "C" {
|
||||
#include "redis/intset.h"
|
||||
#include "redis/redis_aux.h"
|
||||
#include "redis/util.h"
|
||||
#include "redis/util.h" // for string2ll
|
||||
}
|
||||
|
||||
#include "base/flags.h"
|
||||
|
@ -1413,7 +1413,7 @@ void SScan(CmdArgList args, const CommandContext& cmd_cntx) {
|
|||
return cmd_cntx.rb->SendError(ops.status());
|
||||
}
|
||||
|
||||
ScanOpts scan_op = ops.value();
|
||||
const ScanOpts& scan_op = ops.value();
|
||||
|
||||
auto cb = [&](Transaction* t, EngineShard* shard) {
|
||||
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);
|
||||
|
|
|
@ -2602,7 +2602,7 @@ void ZSetFamily::ZScan(CmdArgList args, const CommandContext& cmd_cntx) {
|
|||
DVLOG(1) << "Scan invalid args - return " << ops << " to the user";
|
||||
return rb->SendError(ops.status());
|
||||
}
|
||||
ScanOpts scan_op = ops.value();
|
||||
const ScanOpts& scan_op = ops.value();
|
||||
|
||||
auto cb = [&](Transaction* t, EngineShard* shard) {
|
||||
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue