From dc853fe4bdf0b56f23b6102f13c1e8ef7a7447e3 Mon Sep 17 00:00:00 2001 From: Vladislav Date: Sun, 7 May 2023 01:02:08 +0300 Subject: [PATCH] Support fields in search mvp (#1184) Field support for search mvp --- src/core/search/CMakeLists.txt | 2 +- src/core/search/ast_expr.cc | 33 ++++++- src/core/search/ast_expr.h | 45 +++++++-- src/core/search/base.cc | 12 +++ src/core/search/base.h | 43 +++++++++ src/core/search/parser.y | 37 ++++---- src/core/search/search_parser_test.cc | 129 ++++++++++++++++++++++++-- 7 files changed, 260 insertions(+), 41 deletions(-) create mode 100644 src/core/search/base.cc create mode 100644 src/core/search/base.h diff --git a/src/core/search/CMakeLists.txt b/src/core/search/CMakeLists.txt index e06d908df..50be4e583 100644 --- a/src/core/search/CMakeLists.txt +++ b/src/core/search/CMakeLists.txt @@ -3,6 +3,6 @@ gen_bison(parser) cur_gen_dir(gen_dir) -add_library(query_parser ast_expr.cc query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc) +add_library(query_parser base.cc ast_expr.cc query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc) target_link_libraries(query_parser base absl::strings TRDP::reflex) cxx_test(search_parser_test query_parser LABELS DFLY) diff --git a/src/core/search/ast_expr.cc b/src/core/search/ast_expr.cc index 7f8e43e7e..7941e6cbe 100644 --- a/src/core/search/ast_expr.cc +++ b/src/core/search/ast_expr.cc @@ -4,6 +4,8 @@ #include "core/search/ast_expr.h" +#include + #include #include @@ -15,15 +17,17 @@ AstTermNode::AstTermNode(std::string term) : term_{move(term)}, pattern_{"\\b" + term_ + "\\b", std::regex::icase} { } -bool AstTermNode::Check(string_view input) const { - return regex_search(input.begin(), input.begin() + input.size(), pattern_); +bool AstTermNode::Check(SearchInput input) const { + return input.Check([this](string_view str) { + return regex_search(str.begin(), str.begin() + str.size(), pattern_); + }); } string AstTermNode::Debug() const { return "term{" + term_ + "}"; } -bool AstNegateNode::Check(string_view input) const { +bool AstNegateNode::Check(SearchInput input) const { return !node_->Check(input); } @@ -31,7 +35,7 @@ string AstNegateNode::Debug() const { return "not{" + node_->Debug() + "}"; } -bool AstLogicalNode::Check(string_view input) const { +bool AstLogicalNode::Check(SearchInput input) const { return op_ == kOr ? (l_->Check(input) || r_->Check(input)) : (l_->Check(input) && r_->Check(input)); } @@ -41,4 +45,25 @@ string AstLogicalNode::Debug() const { return op + "{" + l_->Debug() + "," + r_->Debug() + "}"; } +bool AstFieldNode::Check(SearchInput input) const { + return node_->Check(SearchInput{input, field_}); +} + +string AstFieldNode::Debug() const { + return "field:" + field_ + "{" + node_->Debug() + "}"; +} + +bool AstRangeNode::Check(SearchInput input) const { + return input.Check([this](string_view str) { + int64_t v; + if (!absl::SimpleAtoi(str, &v)) + return false; + return l_ <= v && v <= r_; + }); +} + +string AstRangeNode::Debug() const { + return "range{" + to_string(l_) + " " + to_string(r_) + "}"; +} + } // namespace dfly::search diff --git a/src/core/search/ast_expr.h b/src/core/search/ast_expr.h index 905ac9998..40c67ba04 100644 --- a/src/core/search/ast_expr.h +++ b/src/core/search/ast_expr.h @@ -11,6 +11,8 @@ #include #include +#include "core/search/base.h" + namespace dfly { namespace search { @@ -21,7 +23,7 @@ class AstNode { virtual ~AstNode() = default; // Check if this input is matched by the node. - virtual bool Check(std::string_view input) const = 0; + virtual bool Check(SearchInput) const = 0; // Debug print node. virtual std::string Debug() const = 0; @@ -38,7 +40,7 @@ template AstExpr MakeExpr(Ts&&... ts) { class AstTermNode : public AstNode { public: AstTermNode(std::string term); - virtual bool Check(std::string_view input) const; + virtual bool Check(SearchInput) const; virtual std::string Debug() const; private: @@ -46,19 +48,19 @@ class AstTermNode : public AstNode { std::regex pattern_; }; -// Ast negation node, matches only if its sub node didn't match. +// Ast negation node, matches only if its subtree didn't match. class AstNegateNode : public AstNode { public: AstNegateNode(NodePtr node) : node_{node} { } - virtual bool Check(std::string_view input) const; - virtual std::string Debug() const; + bool Check(SearchInput) const override; + std::string Debug() const override; private: NodePtr node_; }; -// Ast logical operation node, matches only if sub nodes match +// Ast logical operation node, matches only if subtrees match // in respect to logical operation (and/or). class AstLogicalNode : public AstNode { public: @@ -69,14 +71,41 @@ class AstLogicalNode : public AstNode { AstLogicalNode(NodePtr l, NodePtr r, Op op) : l_{l}, r_{r}, op_{op} { } - virtual bool Check(std::string_view input) const; - virtual std::string Debug() const; + bool Check(SearchInput) const override; + std::string Debug() const override; private: NodePtr l_, r_; Op op_; }; +// Ast field node, selects a field from the input for its subtree. +class AstFieldNode : public AstNode { + public: + AstFieldNode(std::string field, NodePtr node) : field_{field.substr(1)}, node_{node} { + } + + bool Check(SearchInput) const override; + std::string Debug() const override; + + private: + std::string field_; + NodePtr node_; +}; + +// Ast range node, checks if input is inside int range +class AstRangeNode : public AstNode { + public: + AstRangeNode(int64_t l, int64_t r) : l_{l}, r_{r} { + } + + bool Check(SearchInput) const override; + std::string Debug() const override; + + private: + int64_t l_, r_; +}; + } // namespace search } // namespace dfly diff --git a/src/core/search/base.cc b/src/core/search/base.cc new file mode 100644 index 000000000..1a90fdc58 --- /dev/null +++ b/src/core/search/base.cc @@ -0,0 +1,12 @@ +// Copyright 2023, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/search/base.h" + +#include +#include + +using namespace std; + +namespace dfly::search {} // namespace dfly::search diff --git a/src/core/search/base.h b/src/core/search/base.h new file mode 100644 index 000000000..03c192d9f --- /dev/null +++ b/src/core/search/base.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "core/core_types.h" +#include "core/string_map.h" + +namespace dfly::search { + +// Interface for accessing hashset values with different data structures underneath. +struct HSetAccessor { + // Callback that's supplied with field values. + using FieldConsumer = std::function; + + virtual bool Check(FieldConsumer f, std::string_view active_field) const = 0; +}; + +// Wrapper around hashset accessor and optional active field. +struct SearchInput { + SearchInput(const HSetAccessor* hset, std::string_view active_field = {}) + : hset_{hset}, active_field_{active_field} { + } + + SearchInput(const SearchInput& base, std::string_view active_field) + : hset_{base.hset_}, active_field_{active_field} { + } + + bool Check(HSetAccessor::FieldConsumer f) { + return hset_->Check(move(f), active_field_); + } + + private: + const HSetAccessor* hset_; + std::string_view active_field_; +}; + +} // namespace dfly::search diff --git a/src/core/search/parser.y b/src/core/search/parser.y index 610d28b66..f85ccd9f0 100644 --- a/src/core/search/parser.y +++ b/src/core/search/parser.y @@ -63,7 +63,7 @@ using namespace std; %precedence LPAREN RPAREN %token INT64 "int64" -%nterm final_query filter search_expr field_filter field_cond range_value term_list opt_neg_term +%nterm final_query filter search_expr field_cond field_cond_expr %printer { yyo << $$; } <*>; @@ -76,26 +76,25 @@ filter: search_expr { $$ = $1; } search_expr: - LPAREN search_expr RPAREN { $$ = $2; } - | search_expr search_expr %prec AND_OP { $$ = MakeExpr($1, $2, AstLogicalNode::kAnd); }; - | search_expr OR_OP search_expr { $$ = MakeExpr($1, $3, AstLogicalNode::kOr); } - | NOT_OP search_expr { $$ = MakeExpr($2); }; - | TERM { $$ = MakeExpr($1); } - | field_filter; + LPAREN search_expr RPAREN { $$ = $2; } + | search_expr search_expr %prec AND_OP { $$ = MakeExpr($1, $2, AstLogicalNode::kAnd); } + | search_expr OR_OP search_expr { $$ = MakeExpr($1, $3, AstLogicalNode::kOr); } + | NOT_OP search_expr { $$ = MakeExpr($2); } + | TERM { $$ = MakeExpr($1); } + | FIELD COLON field_cond { $$ = MakeExpr($1, $3); } -field_filter: - FIELD COLON field_cond { $$ = AstExpr{}; } - -field_cond: term_list | range_value - range_value: LBRACKET INT64 INT64 RBRACKET { $$ = AstExpr{}; } - -term_list: - opt_neg_term | - LPAREN term_list opt_neg_term RPAREN { }; - -opt_neg_term: - TERM { } | NOT_OP TERM { $$ = AstExpr{}; }; +field_cond: + TERM { $$ = MakeExpr($1); } + | NOT_OP field_cond { $$ = MakeExpr($2); } + | LPAREN field_cond_expr RPAREN { $$ = $2; } + | LBRACKET INT64 INT64 RBRACKET { $$ = MakeExpr($2, $3); } +field_cond_expr: + LPAREN field_cond_expr RPAREN { $$ = $2; } + | field_cond_expr field_cond_expr %prec AND_OP { $$ = MakeExpr($1, $2, AstLogicalNode::kAnd); } + | field_cond_expr OR_OP field_cond_expr { $$ = MakeExpr($1, $3, AstLogicalNode::kOr); } + | NOT_OP field_cond_expr { $$ = MakeExpr($2); }; + | TERM { $$ = MakeExpr($1); } %% void diff --git a/src/core/search/search_parser_test.cc b/src/core/search/search_parser_test.cc index 6da751ff3..9fadc67f6 100644 --- a/src/core/search/search_parser_test.cc +++ b/src/core/search/search_parser_test.cc @@ -4,6 +4,7 @@ #include "base/gtest.h" #include "base/logging.h" +#include "core/search/base.h" #include "core/search/query_driver.h" namespace dfly { @@ -38,7 +39,7 @@ class SearchParserTest : public ::testing::Test { expr_ = query_driver_.Get(); } - bool Check(string_view input) const { + bool Check(SearchInput input) const { return expr_->Check(input); } @@ -51,6 +52,35 @@ class SearchParserTest : public ::testing::Test { QueryDriver query_driver_; }; +class MockedHSetAccessor : public HSetAccessor { + public: + using Map = std::unordered_map; + + MockedHSetAccessor() = default; + MockedHSetAccessor(std::string test_field) : hset_{{"field", test_field}} { + } + + bool Check(HSetAccessor::FieldConsumer f, string_view active_field) const override { + if (!active_field.empty()) { + auto it = hset_.find(string{active_field}); + return f(it != hset_.end() ? it->second : ""); + } else { + for (const auto& [k, v] : hset_) { + if (f(v)) + return true; + } + return false; + } + } + + void Set(Map hset) { + hset_ = hset; + } + + private: + Map hset_{}; +}; + // tokens are not assignable, so we can not reuse them. This macros reduce the boilerplate. #define NEXT_EQ(tok_enum, type, val) \ { \ @@ -75,16 +105,20 @@ class SearchParserTest : public ::testing::Test { ASSERT_TRUE(caught); \ } -#define CHECK_ALL(...) \ - { \ - for (auto input : {__VA_ARGS__}) \ - EXPECT_TRUE(Check(input)) << input << " failed on " << DebugExpr(); \ +#define CHECK_ALL(...) \ + { \ + for (auto str : {__VA_ARGS__}) { \ + MockedHSetAccessor hset{str}; \ + EXPECT_TRUE(Check(SearchInput{&hset})) << str << " failed on " << DebugExpr(); \ + } \ } -#define CHECK_NONE(...) \ - { \ - for (auto input : {__VA_ARGS__}) \ - EXPECT_FALSE(Check(input)) << input << " failed on " << DebugExpr(); \ +#define CHECK_NONE(...) \ + { \ + for (auto str : {__VA_ARGS__}) { \ + MockedHSetAccessor hset{str}; \ + EXPECT_FALSE(Check(SearchInput{&hset})) << str << " failed on " << DebugExpr(); \ + } \ } TEST_F(SearchParserTest, Scanner) { @@ -201,6 +235,83 @@ TEST_F(SearchParserTest, CheckParenthesisPriority) { CHECK_NONE("wrong", "foo bar baz", "foo rab zab", "foo bar what", "foo rab foo"); } +TEST_F(SearchParserTest, MatchField) { + ParseExpr("@f1:foo @f2:bar @f3:baz"); + + MockedHSetAccessor hset{}; + SearchInput input{&hset}; + + hset.Set({{"f1", "foo"}, {"f2", "bar"}, {"f3", "baz"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "foo"}, {"f2", "bar"}, {"f3", "last is wrong"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({{"f1", "its"}, {"f2", "totally"}, {"f3", "wrong"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({{"f1", "im foo but its only me and"}, {"f2", "bar"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({}); + EXPECT_FALSE(Check(input)); +} + +TEST_F(SearchParserTest, MatchRange) { + ParseExpr("@f1:[1 10] @f2:[50 100]"); + + MockedHSetAccessor hset{}; + SearchInput input{&hset}; + + hset.Set({{"f1", "5"}, {"f2", "50"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "1"}, {"f2", "100"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "10"}, {"f2", "50"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "11"}, {"f2", "49"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({{"f1", "0"}, {"f2", "101"}}); + EXPECT_FALSE(Check(input)); +} + +TEST_F(SearchParserTest, CheckExprInField) { + ParseExpr("@f1:(a|b) @f2:(c d) @f3:-e"); + + MockedHSetAccessor hset{}; + SearchInput input{&hset}; + + hset.Set({{"f1", "a"}, {"f2", "c and d"}, {"f3", "right"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "b"}, {"f2", "d and c"}, {"f3", "ok"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "none"}, {"f2", "only d"}, {"f3", "ok"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({{"f1", "b"}, {"f2", "d and c"}, {"f3", "it has an e"}}); + EXPECT_FALSE(Check(input)) << DebugExpr(); + + ParseExpr({"@f1:(a (b | c) -(d | e)) @f2:-(a|b)"}); + + hset.Set({{"f1", "a b w"}, {"f2", "c"}}); + EXPECT_TRUE(Check(input)); + + hset.Set({{"f1", "a b d"}, {"f2", "c"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({{"f1", "a b w"}, {"f2", "a"}}); + EXPECT_FALSE(Check(input)); + + hset.Set({{"f1", "a w"}, {"f2", "c"}}); + EXPECT_FALSE(Check(input)); +} + } // namespace search } // namespace dfly