From ca0cdf8c40ff8780cfddfcfe3fd9d86a8fa71d3c Mon Sep 17 00:00:00 2001 From: Volodymyr Yavdoshenko Date: Fri, 4 Apr 2025 12:38:22 +0300 Subject: [PATCH] fix: '@text:prefix*' matching for fields. (#4868) fixed: https://github.com/dragonflydb/dragonfly/issues/4691 --- src/core/search/lexer.lex | 32 +++++++++++++++---------- src/core/search/search_parser_test.cc | 32 +++++++++++++++++++++++++ src/server/search/search_family_test.cc | 10 ++++++++ 3 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/core/search/lexer.lex b/src/core/search/lexer.lex index 7c154e0e5..9c33e54ae 100644 --- a/src/core/search/lexer.lex +++ b/src/core/search/lexer.lex @@ -27,16 +27,16 @@ using namespace std; Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc); - Parser::symbol_type make_TagVal(string_view src, const Parser::location_type& loc); + Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc); %} -blank [ \t\r] dq \" sq \' esc_chars ['"\?\\abfnrtv] esc_seq \\{esc_chars} -term_char [_]|\w +term_char \w tag_val_char {term_char}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ] +asterisk_char \* %{ @@ -70,15 +70,16 @@ tag_val_char {term_char}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ] [0-9]{1,9} return Parser::make_UINT32(str(), loc()); [+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc()); -{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc()); -{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc()); +{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc()); +{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc()); -"$"{term_char}+ return ParseParam(str(), loc()); -"@"{term_char}+ return Parser::make_FIELD(str(), loc()); -{term_char}+"*" return Parser::make_PREFIX(str(), loc()); +"$"{term_char}+ return ParseParam(str(), loc()); +"@"{term_char}+ return Parser::make_FIELD(str(), loc()); +{term_char}+{asterisk_char} return Parser::make_PREFIX(str(), loc()); -{term_char}+ return Parser::make_TERM(str(), loc()); -{tag_val_char}+ return make_TagVal(str(), loc()); +{term_char}+ return Parser::make_TERM(str(), loc()); +{tag_val_char}+{asterisk_char} return make_TagVal(str(), true, loc()); +{tag_val_char}+ return make_TagVal(str(), false, loc()); <> return Parser::make_YYEOF(loc()); %% @@ -91,12 +92,14 @@ Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& return Parser::make_TERM(res, loc); } -Parser::symbol_type make_TagVal(string_view src, const Parser::location_type& loc) { +Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc) { string res; res.reserve(src.size()); bool escaped = false; - for (size_t i = 0; i < src.size(); ++i) { + size_t len = is_prefix ? src.size() - 1 : src.size(); // Exclude the '*' at the end for prefix + + for (size_t i = 0; i < len; ++i) { if (escaped) { escaped = false; } else if (src[i] == '\\') { @@ -104,7 +107,12 @@ Parser::symbol_type make_TagVal(string_view src, const Parser::location_type& lo continue; } res.push_back(src[i]); + } + // Add '*' back for prefix + if (is_prefix) { + res.push_back('*'); + return Parser::make_PREFIX(res, loc); } return Parser::make_TAG_VAL(res, loc); diff --git a/src/core/search/search_parser_test.cc b/src/core/search/search_parser_test.cc index 93baef934..73049e599 100644 --- a/src/core/search/search_parser_test.cc +++ b/src/core/search/search_parser_test.cc @@ -191,6 +191,36 @@ TEST_F(SearchParserTest, Scanner) { NEXT_EQ(TOK_DOUBLE, string, "33.3"); } +TEST_F(SearchParserTest, EscapedTagPrefixes) { + SetInput("@name:{escape\\-err*}"); + NEXT_EQ(TOK_FIELD, string, "@name"); + NEXT_TOK(TOK_COLON); + NEXT_TOK(TOK_LCURLBR); + NEXT_EQ(TOK_PREFIX, string, "escape-err*"); + NEXT_TOK(TOK_RCURLBR); + + SetInput("@name:{escape\\+pre*}"); + NEXT_EQ(TOK_FIELD, string, "@name"); + NEXT_TOK(TOK_COLON); + NEXT_TOK(TOK_LCURLBR); + NEXT_EQ(TOK_PREFIX, string, "escape+pre*"); + NEXT_TOK(TOK_RCURLBR); + + SetInput("@name:{escape\\.pre*}"); + NEXT_EQ(TOK_FIELD, string, "@name"); + NEXT_TOK(TOK_COLON); + NEXT_TOK(TOK_LCURLBR); + NEXT_EQ(TOK_PREFIX, string, "escape.pre*"); + NEXT_TOK(TOK_RCURLBR); + + SetInput("@name:{complex\\-escape\\+with\\.many\\*chars*}"); + NEXT_EQ(TOK_FIELD, string, "@name"); + NEXT_TOK(TOK_COLON); + NEXT_TOK(TOK_LCURLBR); + NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars*"); + NEXT_TOK(TOK_RCURLBR); +} + TEST_F(SearchParserTest, Parse) { EXPECT_EQ(0, Parse(" foo bar (baz) ")); EXPECT_EQ(0, Parse(" -(foo) @foo:bar @ss:[1 2]")); @@ -200,6 +230,8 @@ TEST_F(SearchParserTest, Parse) { EXPECT_EQ(0, Parse("@foo:{1|2.0|4|3.0}")); EXPECT_EQ(0, Parse("@foo:{1|hello|3.0|world|4}")); + EXPECT_EQ(0, Parse("@name:{escape\\-err*}")); + EXPECT_EQ(1, Parse(" -(foo ")); EXPECT_EQ(1, Parse(" foo:bar ")); EXPECT_EQ(1, Parse(" @foo:@bar ")); diff --git a/src/server/search/search_family_test.cc b/src/server/search/search_family_test.cc index a8b98025c..b91782987 100644 --- a/src/server/search/search_family_test.cc +++ b/src/server/search/search_family_test.cc @@ -545,6 +545,16 @@ TEST_F(SearchFamilyTest, TagNumbers) { EXPECT_THAT(Run({"ft.search", "i1", "@number:{1|hello|2}"}), AreDocIds("d:1", "d:2")); } +TEST_F(SearchFamilyTest, TagEscapeCharacters) { + EXPECT_EQ(Run({"ft.create", "item_idx", "ON", "JSON", "PREFIX", "1", "p", "SCHEMA", "$.name", + "AS", "name", "TAG"}), + "OK"); + EXPECT_EQ(Run({"json.set", "p:1", "$", "{\"name\":\"escape-error\"}"}), "OK"); + + auto resp = Run({"ft.search", "item_idx", "@name:{escape\\-err*}"}); + EXPECT_THAT(resp, AreDocIds("p:1")); +} + TEST_F(SearchFamilyTest, Numbers) { for (unsigned i = 0; i <= 10; i++) { for (unsigned j = 0; j <= 10; j++) {