diff --git a/src/core/search/ast_expr.cc b/src/core/search/ast_expr.cc index 6fd9894fc..30fdfcac1 100644 --- a/src/core/search/ast_expr.cc +++ b/src/core/search/ast_expr.cc @@ -16,13 +16,6 @@ using namespace std; namespace dfly::search { -AstTermNode::AstTermNode(string term) : term{std::move(term)} { -} - -AstPrefixNode::AstPrefixNode(string prefix) : prefix{std::move(prefix)} { - this->prefix.pop_back(); -} - AstRangeNode::AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl) : lo{lo_excl ? nextafter(lo, hi) : lo}, hi{hi_excl ? nextafter(hi, lo) : hi} { } diff --git a/src/core/search/ast_expr.h b/src/core/search/ast_expr.h index 6f1980982..be911d696 100644 --- a/src/core/search/ast_expr.h +++ b/src/core/search/ast_expr.h @@ -12,6 +12,7 @@ #include #include "core/search/base.h" +#include "core/search/tag_types.h" namespace dfly { @@ -25,18 +26,17 @@ struct AstStarNode {}; // Matches all documents where this field has a non-null value struct AstStarFieldNode {}; -// Matches terms in text fields -struct AstTermNode { - explicit AstTermNode(std::string term); +template struct AstAffixNode { + explicit AstAffixNode(std::string affix) : affix{std::move(affix)} { + } - std::string term; + std::string affix; }; -struct AstPrefixNode { - explicit AstPrefixNode(std::string prefix); - - std::string prefix; -}; +using AstTermNode = AstAffixNode; +using AstPrefixNode = AstAffixNode; +using AstSuffixNode = AstAffixNode; +using AstInfixNode = AstAffixNode; // Matches numeric range struct AstRangeNode { @@ -73,7 +73,7 @@ struct AstFieldNode { // Stores a list of tags for a tag query struct AstTagsNode { - using TagValue = std::variant; + using TagValue = std::variant; struct TagValueProxy : public AstTagsNode::TagValue { // bison needs it to be default constructible @@ -83,6 +83,10 @@ struct AstTagsNode { } TagValueProxy(AstTermNode tv) : AstTagsNode::TagValue(std::move(tv)) { } + TagValueProxy(AstSuffixNode tv) : AstTagsNode::TagValue(std::move(tv)) { + } + TagValueProxy(AstInfixNode tv) : AstTagsNode::TagValue(std::move(tv)) { + } }; AstTagsNode(TagValue); @@ -111,9 +115,10 @@ struct AstKnnNode { std::optional ef_runtime; }; -using NodeVariants = std::variant; +using NodeVariants = + std::variant; struct AstNode : public NodeVariants { using variant::variant; diff --git a/src/core/search/lexer.lex b/src/core/search/lexer.lex index 9c33e54ae..c8a91fbbe 100644 --- a/src/core/search/lexer.lex +++ b/src/core/search/lexer.lex @@ -1,6 +1,7 @@ %top{ // Our lexer need to know about Parser::symbol_type #include "core/search/parser.hh" + #include "core/search/tag_types.h" // Include TagType enum } %{ @@ -25,18 +26,19 @@ // A number symbol corresponding to the value in S. using dfly::search::Parser; using namespace std; + using dfly::search::TagType; Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc); - Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc); + Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc); %} -dq \" -sq \' -esc_chars ['"\?\\abfnrtv] -esc_seq \\{esc_chars} -term_char \w -tag_val_char {term_char}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ] -asterisk_char \* +dq \" +sq \' +esc_chars ['"\?\\abfnrtv] +esc_seq \\{esc_chars} +term_ch \w +tag_val_ch {term_ch}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ] +astrsk_ch \* %{ @@ -67,21 +69,25 @@ asterisk_char \* "AS" return Parser::make_AS (loc()); "EF_RUNTIME" return Parser::make_EF_RUNTIME (loc()); -[0-9]{1,9} return Parser::make_UINT32(str(), loc()); -[+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc()); +[0-9]{1,9} return Parser::make_UINT32(str(), loc()); +[+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc()); -{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc()); -{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc()); +{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc()); +{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc()); -"$"{term_char}+ return ParseParam(str(), loc()); -"@"{term_char}+ return Parser::make_FIELD(str(), loc()); -{term_char}+{asterisk_char} return Parser::make_PREFIX(str(), loc()); +"$"{term_ch}+ return ParseParam(str(), loc()); +"@"{term_ch}+ return Parser::make_FIELD(str(), loc()); +{term_ch}+{astrsk_ch} return Parser::make_PREFIX(string{matched_view(0, 1)}, loc()); +{astrsk_ch}{term_ch}+ return Parser::make_SUFFIX(string{matched_view(1, 0)}, loc()); +{astrsk_ch}{term_ch}+{astrsk_ch} return Parser::make_INFIX(string{matched_view(1, 1)}, loc()); -{term_char}+ return Parser::make_TERM(str(), loc()); -{tag_val_char}+{asterisk_char} return make_TagVal(str(), true, loc()); -{tag_val_char}+ return make_TagVal(str(), false, loc()); +{term_ch}+ return Parser::make_TERM(str(), loc()); +{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::PREFIX, loc()); +{astrsk_ch}{tag_val_ch}+ return make_Tag(str(), TagType::SUFFIX, loc()); +{astrsk_ch}{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::INFIX, loc()); +{tag_val_ch}+ return make_Tag(str(), TagType::REGULAR, loc()); -<> return Parser::make_YYEOF(loc()); +<> return Parser::make_YYEOF(loc()); %% Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc) { @@ -92,14 +98,20 @@ Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& return Parser::make_TERM(res, loc); } -Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc) { +Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc) { string res; res.reserve(src.size()); - bool escaped = false; - size_t len = is_prefix ? src.size() - 1 : src.size(); // Exclude the '*' at the end for prefix + // Determine processing boundaries + size_t start = (type == TagType::SUFFIX || type == TagType::INFIX) ? 1 : 0; + size_t end = src.size(); + if (type == TagType::PREFIX || type == TagType::INFIX) { + end--; // Skip the last '*' character + } - for (size_t i = 0; i < len; ++i) { + // Handle escaping + bool escaped = false; + for (size_t i = start; i < end; ++i) { if (escaped) { escaped = false; } else if (src[i] == '\\') { @@ -109,11 +121,16 @@ Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::l res.push_back(src[i]); } - // Add '*' back for prefix - if (is_prefix) { - res.push_back('*'); - return Parser::make_PREFIX(res, loc); + // Return the appropriate token type + switch (type) { + case TagType::PREFIX: + return Parser::make_PREFIX(res, loc); + case TagType::SUFFIX: + return Parser::make_SUFFIX(res, loc); + case TagType::INFIX: + return Parser::make_INFIX(res, loc); + case TagType::REGULAR: + default: + return Parser::make_TAG_VAL(res, loc); } - - return Parser::make_TAG_VAL(res, loc); } diff --git a/src/core/search/parser.y b/src/core/search/parser.y index 9da2ddb13..8bd2f796f 100644 --- a/src/core/search/parser.y +++ b/src/core/search/parser.y @@ -67,7 +67,7 @@ double toDouble(string_view src); // Needed 0 at the end to satisfy bison 3.5.1 %token YYEOF 0 -%token TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix" +%token TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix" SUFFIX "suffix" INFIX "infix" %precedence TERM TAG_VAL %left OR_OP @@ -134,24 +134,26 @@ search_or_expr: | search_expr OR_OP search_unary_expr { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); } search_unary_expr: - LPAREN search_expr RPAREN { $$ = std::move($2); } + LPAREN search_expr RPAREN { $$ = std::move($2); } | NOT_OP search_unary_expr { $$ = AstNegateNode(std::move($2)); } - | TERM { $$ = AstTermNode(std::move($1)); } + | TERM { $$ = AstTermNode(std::move($1)); } | PREFIX { $$ = AstPrefixNode(std::move($1)); } - | UINT32 { $$ = AstTermNode(std::move($1)); } + | SUFFIX { $$ = AstSuffixNode(std::move($1)); } + | INFIX { $$ = AstInfixNode(std::move($1)); } + | UINT32 { $$ = AstTermNode(std::move($1)); } | FIELD COLON field_cond { $$ = AstFieldNode(std::move($1), std::move($3)); } field_cond: - TERM { $$ = AstTermNode(std::move($1)); } - | UINT32 { $$ = AstTermNode(std::move($1)); } - | STAR { $$ = AstStarFieldNode(); } + TERM { $$ = AstTermNode(std::move($1)); } + | UINT32 { $$ = AstTermNode(std::move($1)); } + | STAR { $$ = AstStarFieldNode(); } | NOT_OP field_cond { $$ = AstNegateNode(std::move($2)); } | LPAREN field_cond_expr RPAREN { $$ = std::move($2); } | LBRACKET numeric_filter_expr RBRACKET { $$ = std::move($2); } | LCURLBR tag_list RCURLBR { $$ = std::move($2); } numeric_filter_expr: - opt_lparen generic_number opt_lparen generic_number { $$ = AstRangeNode($2, $1, $4, $3); } + opt_lparen generic_number opt_lparen generic_number { $$ = AstRangeNode($2, $1, $4, $3); } | opt_lparen generic_number COMMA opt_lparen generic_number { $$ = AstRangeNode($2, $1, $5, $4); } generic_number: @@ -163,9 +165,9 @@ opt_lparen: | LPAREN { $$ = true; } field_cond_expr: - field_unary_expr { $$ = std::move($1); } - | field_and_expr { $$ = std::move($1); } - | field_or_expr { $$ = std::move($1); } + field_unary_expr { $$ = std::move($1); } + | field_and_expr { $$ = std::move($1); } + | field_or_expr { $$ = std::move($1); } field_and_expr: field_unary_expr field_unary_expr %prec AND_OP { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); } @@ -176,21 +178,23 @@ field_or_expr: | field_cond_expr OR_OP field_and_expr { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); } field_unary_expr: - LPAREN field_cond_expr RPAREN { $$ = std::move($2); } - | NOT_OP field_unary_expr { $$ = AstNegateNode(std::move($2)); } - | TERM { $$ = AstTermNode(std::move($1)); } - | UINT32 { $$ = AstTermNode(std::move($1)); } + LPAREN field_cond_expr RPAREN { $$ = std::move($2); } + | NOT_OP field_unary_expr { $$ = AstNegateNode(std::move($2)); } + | TERM { $$ = AstTermNode(std::move($1)); } + | UINT32 { $$ = AstTermNode(std::move($1)); } tag_list: - tag_list_element { $$ = AstTagsNode(std::move($1)); } + tag_list_element { $$ = AstTagsNode(std::move($1)); } | tag_list OR_OP tag_list_element { $$ = AstTagsNode(std::move($1), std::move($3)); } tag_list_element: - TERM { $$ = AstTermNode(std::move($1)); } + TERM { $$ = AstTermNode(std::move($1)); } | PREFIX { $$ = AstPrefixNode(std::move($1)); } - | UINT32 { $$ = AstTermNode(std::move($1)); } - | DOUBLE { $$ = AstTermNode(std::move($1)); } - | TAG_VAL { $$ = AstTermNode(std::move($1)); } + | SUFFIX { $$ = AstSuffixNode(std::move($1)); } + | INFIX { $$ = AstInfixNode(std::move($1)); } + | UINT32 { $$ = AstTermNode(std::move($1)); } + | DOUBLE { $$ = AstTermNode(std::move($1)); } + | TAG_VAL { $$ = AstTermNode(std::move($1)); } %% diff --git a/src/core/search/search.cc b/src/core/search/search.cc index 4d50d71fd..b4534a858 100644 --- a/src/core/search/search.cc +++ b/src/core/search/search.cc @@ -118,10 +118,16 @@ struct ProfileBuilder { string GetNodeInfo(const AstNode& node) { struct NodeFormatter { void operator()(std::string* out, const AstPrefixNode& node) const { - out->append(node.prefix); + out->append(node.affix); + } + void operator()(std::string* out, const AstSuffixNode& node) const { + out->append(node.affix); + } + void operator()(std::string* out, const AstInfixNode& node) const { + out->append(node.affix); } void operator()(std::string* out, const AstTermNode& node) const { - out->append(node.term); + out->append(node.affix); } void operator()(std::string* out, const AstTagsNode::TagValue& value) const { visit([this, out](const auto& n) { this->operator()(out, n); }, value); @@ -129,8 +135,10 @@ struct ProfileBuilder { }; Overloaded node_info{ [](monostate) -> string { return ""s; }, - [](const AstTermNode& n) { return absl::StrCat("Term{", n.term, "}"); }, - [](const AstPrefixNode& n) { return absl::StrCat("Prefix{", n.prefix, "}"); }, + [](const AstTermNode& n) { return absl::StrCat("Term{", n.affix, "}"); }, + [](const AstPrefixNode& n) { return absl::StrCat("Prefix{", n.affix, "}"); }, + [](const AstSuffixNode& n) { return absl::StrCat("Suffix{", n.affix, "}"); }, + [](const AstInfixNode& n) { return absl::StrCat("Infix{", n.affix, "}"); }, [](const AstRangeNode& n) { return absl::StrCat("Range{", n.lo, "<>", n.hi, "}"); }, [](const AstLogicalNode& n) { auto op = n.op == AstLogicalNode::AND ? "and" : "or"; @@ -268,6 +276,20 @@ struct BasicSearch { return result; } + template + IndexResult CollectSuffixMatches(BaseStringIndex* index, std::string_view suffix) { + // TODO: Implement full text search for suffix + error_ = "Not implemented"; + return IndexResult{}; + } + + template + IndexResult CollectInfixMatches(BaseStringIndex* index, std::string_view infix) { + // TODO: Implement full text search for infix + error_ = "Not implemented"; + return IndexResult{}; + } + IndexResult Search(monostate, string_view) { return vector{}; } @@ -279,7 +301,7 @@ struct BasicSearch { // "term": access field's text index or unify results from all text indices if no field is set IndexResult Search(const AstTermNode& node, string_view active_field) { - std::string term = node.term; + std::string term = node.affix; bool strip_whitespace = true; if (auto synonyms = indices_->GetSynonyms(); synonyms) { @@ -341,11 +363,23 @@ struct BasicSearch { } auto mapping = [&node, this](TextIndex* index) { - return CollectPrefixMatches(index, node.prefix); + return CollectPrefixMatches(index, node.affix); }; return UnifyResults(GetSubResults(indices, mapping), LogicOp::OR); } + IndexResult Search(const AstSuffixNode& node, string_view active_field) { + // TODO: Implement full text search for suffix + error_ = "Not implemented"; + return IndexResult{}; + } + + IndexResult Search(const AstInfixNode& node, string_view active_field) { + // TODO: Implement full text search for infix + error_ = "Not implemented"; + return IndexResult{}; + } + // [range]: access field's numeric index IndexResult Search(const AstRangeNode& node, string_view active_field) { DCHECK(!active_field.empty()); @@ -388,10 +422,16 @@ struct BasicSearch { return IndexResult{}; Overloaded ov{[tag_index](const AstTermNode& term) -> IndexResult { - return tag_index->Matching(term.term); + return tag_index->Matching(term.affix); }, [tag_index, this](const AstPrefixNode& prefix) { - return CollectPrefixMatches(tag_index, prefix.prefix); + return CollectPrefixMatches(tag_index, prefix.affix); + }, + [tag_index, this](const AstSuffixNode& suffix) { + return CollectSuffixMatches(tag_index, suffix.affix); + }, + [tag_index, this](const AstInfixNode& infix) { + return CollectInfixMatches(tag_index, infix.affix); }}; auto mapping = [ov](const auto& tag) { return visit(ov, tag); }; return UnifyResults(GetSubResults(node.tags, mapping), LogicOp::OR); diff --git a/src/core/search/search_parser_test.cc b/src/core/search/search_parser_test.cc index 73049e599..8043a25a2 100644 --- a/src/core/search/search_parser_test.cc +++ b/src/core/search/search_parser_test.cc @@ -155,7 +155,7 @@ TEST_F(SearchParserTest, Scanner) { // Prefix simple SetInput("pre*"); - NEXT_EQ(TOK_PREFIX, string, "pre*"); + NEXT_EQ(TOK_PREFIX, string, "pre"); // TODO: uncomment when we support escaped terms // Prefix escaped (redis doesn't support quoted prefix matches) @@ -167,7 +167,7 @@ TEST_F(SearchParserTest, Scanner) { NEXT_EQ(TOK_FIELD, string, "@color"); NEXT_TOK(TOK_COLON); NEXT_TOK(TOK_LCURLBR); - NEXT_EQ(TOK_PREFIX, string, "prefix*"); + NEXT_EQ(TOK_PREFIX, string, "prefix"); NEXT_TOK(TOK_RCURLBR); // Prefix escaped star @@ -196,28 +196,28 @@ TEST_F(SearchParserTest, EscapedTagPrefixes) { NEXT_EQ(TOK_FIELD, string, "@name"); NEXT_TOK(TOK_COLON); NEXT_TOK(TOK_LCURLBR); - NEXT_EQ(TOK_PREFIX, string, "escape-err*"); + NEXT_EQ(TOK_PREFIX, string, "escape-err"); NEXT_TOK(TOK_RCURLBR); SetInput("@name:{escape\\+pre*}"); NEXT_EQ(TOK_FIELD, string, "@name"); NEXT_TOK(TOK_COLON); NEXT_TOK(TOK_LCURLBR); - NEXT_EQ(TOK_PREFIX, string, "escape+pre*"); + NEXT_EQ(TOK_PREFIX, string, "escape+pre"); NEXT_TOK(TOK_RCURLBR); SetInput("@name:{escape\\.pre*}"); NEXT_EQ(TOK_FIELD, string, "@name"); NEXT_TOK(TOK_COLON); NEXT_TOK(TOK_LCURLBR); - NEXT_EQ(TOK_PREFIX, string, "escape.pre*"); + NEXT_EQ(TOK_PREFIX, string, "escape.pre"); NEXT_TOK(TOK_RCURLBR); SetInput("@name:{complex\\-escape\\+with\\.many\\*chars*}"); NEXT_EQ(TOK_FIELD, string, "@name"); NEXT_TOK(TOK_COLON); NEXT_TOK(TOK_LCURLBR); - NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars*"); + NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars"); NEXT_TOK(TOK_RCURLBR); } @@ -237,9 +237,8 @@ TEST_F(SearchParserTest, Parse) { EXPECT_EQ(1, Parse(" @foo:@bar ")); EXPECT_EQ(1, Parse(" @foo: ")); - // We don't support suffix/any other position for now - EXPECT_EQ(1, Parse("*pre")); - EXPECT_EQ(1, Parse("*pre*")); + EXPECT_EQ(0, Parse("*suffix")); + EXPECT_EQ(0, Parse("*infix")); EXPECT_EQ(1, Parse("pre***")); } diff --git a/src/core/search/search_test.cc b/src/core/search/search_test.cc index 05f571978..f9fa987df 100644 --- a/src/core/search/search_test.cc +++ b/src/core/search/search_test.cc @@ -872,6 +872,32 @@ TEST_F(SearchTest, InvalidVectorParameter) { ASSERT_FALSE(algo.Init("*=>[KNN 2 @v $b]", &query_params)); } +TEST_F(SearchTest, NotImplementedSearchTypes) { + auto schema = MakeSimpleSchema({{"title", SchemaField::TEXT}}); + FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr}; + + SearchAlgorithm algo{}; + QueryParams params; + + // Add a document for testing + MockedDocument doc{Map{{"title", "text for search"}}}; + indices.Add(0, doc); + + // Test suffix search (words ending with "search") + algo.Init("*search", ¶ms); + auto suffix_result = algo.Search(&indices); + EXPECT_TRUE(suffix_result.ids.empty()) << "Suffix search should return empty result"; + EXPECT_THAT(suffix_result.error, testing::HasSubstr("Not implemented")) + << "Suffix search should return a not implemented error"; + + // Test infix search (words containing "for") + algo.Init("*for*", ¶ms); + auto infix_result = algo.Search(&indices); + EXPECT_TRUE(infix_result.ids.empty()) << "Infix search should return empty result"; + EXPECT_THAT(infix_result.error, testing::HasSubstr("Not implemented")) + << "Infix search should return a not implemented error"; +} + } // namespace search } // namespace dfly diff --git a/src/core/search/tag_types.h b/src/core/search/tag_types.h new file mode 100644 index 000000000..42166ca9b --- /dev/null +++ b/src/core/search/tag_types.h @@ -0,0 +1,13 @@ +// Copyright 2023, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +namespace dfly { +namespace search { + +enum class TagType { PREFIX, SUFFIX, INFIX, REGULAR }; + +} // namespace search +} // namespace dfly