From e46dd24bb7273a5ef25e8d12b54a9d53979d7121 Mon Sep 17 00:00:00 2001 From: Vladislav Date: Thu, 2 Nov 2023 23:43:11 +0300 Subject: [PATCH] chore(search): Safe json validation, serialize fix (#2100) * chore(search): Safe json validation, serialize fix Signed-off-by: Vladislav Oleshko --------- Signed-off-by: Vladislav Oleshko --- src/core/search/lexer.lex | 7 +++--- src/core/search/search_parser_test.cc | 18 +++++++++++--- src/server/search/doc_accessors.cc | 27 +++++++++++++++++---- src/server/search/doc_accessors.h | 6 ++--- src/server/search/search_family.cc | 8 +++---- src/server/search/search_family_test.cc | 31 +++++++++++++++++++------ 6 files changed, 72 insertions(+), 25 deletions(-) diff --git a/src/core/search/lexer.lex b/src/core/search/lexer.lex index 629a83109..76e002c00 100644 --- a/src/core/search/lexer.lex +++ b/src/core/search/lexer.lex @@ -33,9 +33,9 @@ blank [ \t\r] dq \" +sq \' esc_chars ['"\?\\abfnrtv] esc_seq \\{esc_chars} -str_char ([^"]|{esc_seq}) term_char [_]|\w @@ -65,10 +65,11 @@ term_char [_]|\w "KNN" return Parser::make_KNN (loc()); "AS" return Parser::make_AS (loc()); -[0-9]+ return make_UINT32(matched_view(), loc()); +[0-9]{1,9} return make_UINT32(matched_view(), loc()); [+-]?(([0-9]*[.])?[0-9]+|inf) return make_DOUBLE(matched_view(), loc()); -{dq}{str_char}*{dq} return make_StringLit(matched_view(1, 1), loc()); +{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc()); +{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc()); "$"{term_char}+ return ParseParam(str(), loc()); "@"{term_char}+ return Parser::make_FIELD(str(), loc()); diff --git a/src/core/search/search_parser_test.cc b/src/core/search/search_parser_test.cc index a8d7fa539..c2a7252e9 100644 --- a/src/core/search/search_parser_test.cc +++ b/src/core/search/search_parser_test.cc @@ -103,9 +103,6 @@ TEST_F(SearchParserTest, Scanner) { absl::SimpleAtod("33.3", &d); SetInput("33.3"); NEXT_EQ(TOK_DOUBLE, double, d); - - SetInput("18446744073709551616"); - NEXT_ERROR(); } TEST_F(SearchParserTest, Parse) { @@ -130,4 +127,19 @@ TEST_F(SearchParserTest, ParseParams) { NEXT_EQ(TOK_UINT32, uint32_t, 10); } +TEST_F(SearchParserTest, Quotes) { + SetInput(" \"fir st\" 'sec@o@nd' \":third:\" 'four\\\"th' "); + NEXT_EQ(TOK_TERM, string, "fir st"); + NEXT_EQ(TOK_TERM, string, "sec@o@nd"); + NEXT_EQ(TOK_TERM, string, ":third:"); + NEXT_EQ(TOK_TERM, string, "four\"th"); +} + +TEST_F(SearchParserTest, Numeric) { + SetInput("11 123123123123 '22'"); + NEXT_EQ(TOK_UINT32, uint32_t, 11); + NEXT_EQ(TOK_DOUBLE, double, 123123123123.0); + NEXT_EQ(TOK_TERM, string, "22"); +} + } // namespace dfly::search diff --git a/src/server/search/doc_accessors.cc b/src/server/search/doc_accessors.cc index bdd6314f8..b75dfa650 100644 --- a/src/server/search/doc_accessors.cc +++ b/src/server/search/doc_accessors.cc @@ -110,7 +110,11 @@ struct JsonAccessor::JsonPathContainer : public jsoncons::jsonpath::jsonpath_exp }; BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) const { - auto path_res = GetPath(active_field)->evaluate(json_); + auto* path = GetPath(active_field); + if (!path) + return {}; + + auto path_res = path->evaluate(json_); DCHECK(path_res.is_array()); // json path always returns arrays if (path_res.empty()) @@ -121,6 +125,8 @@ BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) cons return {buf_}; } + buf_.clear(); + // First, grow buffer and compute string sizes vector sizes; for (auto element : path_res.array_range()) { @@ -141,7 +147,11 @@ BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) cons } BaseAccessor::VectorInfo JsonAccessor::GetVector(string_view active_field) const { - auto res = GetPath(active_field)->evaluate(json_); + auto* path = GetPath(active_field); + if (!path) + return {}; + + auto res = path->evaluate(json_); DCHECK(res.is_array()); if (res.empty()) return {nullptr, 0}; @@ -163,7 +173,10 @@ JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) c error_code ec; auto path_expr = jsoncons::jsonpath::make_expression(field, ec); - DCHECK(!ec) << "missing validation on ft.create step"; + if (ec) { + LOG(WARNING) << "Invalid Json path: " << field << ' ' << ec.message(); + return nullptr; + } JsonPathContainer path_container{move(path_expr)}; auto ptr = make_unique(move(path_container)); @@ -180,8 +193,12 @@ SearchDocData JsonAccessor::Serialize(const search::Schema& schema) const { SearchDocData JsonAccessor::Serialize(const search::Schema& schema, const SearchParams::FieldReturnList& fields) const { SearchDocData out{}; - for (const auto& [ident, name] : fields) - out[name] = GetPath(ident)->evaluate(json_).to_string(); + for (const auto& [ident, name] : fields) { + if (auto* path = GetPath(ident); path) { + if (auto res = path->evaluate(json_); !res.empty()) + out[name] = res[0].to_string(); + } + } return out; } diff --git a/src/server/search/doc_accessors.h b/src/server/search/doc_accessors.h index 37f8bf682..6f2c69083 100644 --- a/src/server/search/doc_accessors.h +++ b/src/server/search/doc_accessors.h @@ -28,8 +28,8 @@ struct BaseAccessor : public search::DocumentAccessor { virtual SearchDocData Serialize(const search::Schema& schema) const = 0; // Serialize selected fields - SearchDocData Serialize(const search::Schema& schema, - const SearchParams::FieldReturnList& fields) const; + virtual SearchDocData Serialize(const search::Schema& schema, + const SearchParams::FieldReturnList& fields) const; }; // Accessor for hashes stored with listpack @@ -74,7 +74,7 @@ struct JsonAccessor : public BaseAccessor { // The JsonAccessor works with structured types and not plain strings, so an overload is needed SearchDocData Serialize(const search::Schema& schema, - const SearchParams::FieldReturnList& fields) const; + const SearchParams::FieldReturnList& fields) const override; static void RemoveFieldFromCache(std::string_view field); diff --git a/src/server/search/search_family.cc b/src/server/search/search_family.cc index ac0478ce1..d682a79fd 100644 --- a/src/server/search/search_family.cc +++ b/src/server/search/search_family.cc @@ -366,9 +366,9 @@ void SearchFamily::FtDropIndex(CmdArgList args, ConnectionContext* cntx) { }); DCHECK(num_deleted == 0u || num_deleted == shard_set->size()); - if (num_deleted == shard_set->size()) - return (*cntx)->SendOk(); - (*cntx)->SendError("Unknown Index name"); + if (num_deleted == 0u) + return (*cntx)->SendError("-Unknown Index name"); + return (*cntx)->SendOk(); } void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) { @@ -389,7 +389,7 @@ void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) { DCHECK(num_notfound == 0u || num_notfound == shard_set->size()); if (num_notfound > 0u) - return (*cntx)->SendError("Unknown index name"); + return (*cntx)->SendError("Unknown Index name"); DCHECK(infos.front().base_index.schema.fields.size() == infos.back().base_index.schema.fields.size()); diff --git a/src/server/search/search_family_test.cc b/src/server/search/search_family_test.cc index b4f6a2afb..a1c02ecf4 100644 --- a/src/server/search/search_family_test.cc +++ b/src/server/search/search_family_test.cc @@ -211,7 +211,7 @@ TEST_F(SearchFamilyTest, JsonArrayValues) { {"game": "Pacman", "score": 15}, {"game": "Mario", "score": 7} ], - "areas": "US-central" + "areas": ["US-central"] } )"; string_view D3 = R"( @@ -229,10 +229,17 @@ TEST_F(SearchFamilyTest, JsonArrayValues) { Run({"json.set", "k2", ".", D2}); Run({"json.set", "k3", ".", D3}); - auto resp = Run({"ft.create", "i1", "on", "json", "schema", "$.name", "text", "$.plays[*].game", - "as", "games", "tag", "$.plays[*].score", "as", "scores", "numeric", - "$.areas[*]", "as", "areas", "tag"}); - EXPECT_EQ(resp, "OK"); + Run({"ft.create", "i1", + "on", "json", + "schema", "$.name", + "as", "name", + "text", "$.plays[*].game", + "as", "games", + "tag", "$.plays[*].score", + "as", "scores", + "numeric", "$.areas[*]", + "as", "areas", + "tag"}); EXPECT_THAT(Run({"ft.search", "i1", "*"}), AreDocIds("k1", "k2", "k3")); @@ -248,8 +255,18 @@ TEST_F(SearchFamilyTest, JsonArrayValues) { EXPECT_THAT(Run({"ft.search", "i1", "@scores:[(15 20]"}), AreDocIds("k3")); // Find platers by areas - EXPECT_THAT(Run({"ft.search", "i1", "@areas:{\"EU-central\"}"}), AreDocIds("k1", "k3")); - EXPECT_THAT(Run({"ft.search", "i1", "@areas:{\"US-central\"}"}), AreDocIds("k2")); + EXPECT_THAT(Run({"ft.search", "i1", "@areas:{'EU-central'}"}), AreDocIds("k1", "k3")); + EXPECT_THAT(Run({"ft.search", "i1", "@areas:{'US-central'}"}), AreDocIds("k2")); + + // Test complicated RETURN expression + auto res = Run( + {"ft.search", "i1", "@name:bob", "return", "1", "max($.plays[*].score)", "as", "max-score"}); + EXPECT_THAT(res.GetVec()[2], RespArray(ElementsAre("max-score", "15"))); + + // Test invalid json path expression omits that field + res = Run({"ft.search", "i1", "@name:alex", "return", "1", "::??INVALID??::", "as", "retval"}); + EXPECT_EQ(res.GetVec()[1], "k1"); + EXPECT_THAT(res.GetVec()[2], RespArray(ElementsAre())); } TEST_F(SearchFamilyTest, Tags) {