mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
chore(search): Safe json validation, serialize fix (#2100)
* chore(search): Safe json validation, serialize fix Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io> --------- Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
parent
146f46e77a
commit
e46dd24bb7
6 changed files with 72 additions and 25 deletions
|
@ -33,9 +33,9 @@
|
|||
|
||||
blank [ \t\r]
|
||||
dq \"
|
||||
sq \'
|
||||
esc_chars ['"\?\\abfnrtv]
|
||||
esc_seq \\{esc_chars}
|
||||
str_char ([^"]|{esc_seq})
|
||||
term_char [_]|\w
|
||||
|
||||
|
||||
|
@ -65,10 +65,11 @@ term_char [_]|\w
|
|||
"KNN" return Parser::make_KNN (loc());
|
||||
"AS" return Parser::make_AS (loc());
|
||||
|
||||
[0-9]+ return make_UINT32(matched_view(), loc());
|
||||
[0-9]{1,9} return make_UINT32(matched_view(), loc());
|
||||
[+-]?(([0-9]*[.])?[0-9]+|inf) return make_DOUBLE(matched_view(), loc());
|
||||
|
||||
{dq}{str_char}*{dq} return make_StringLit(matched_view(1, 1), loc());
|
||||
{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc());
|
||||
{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc());
|
||||
|
||||
"$"{term_char}+ return ParseParam(str(), loc());
|
||||
"@"{term_char}+ return Parser::make_FIELD(str(), loc());
|
||||
|
|
|
@ -103,9 +103,6 @@ TEST_F(SearchParserTest, Scanner) {
|
|||
absl::SimpleAtod("33.3", &d);
|
||||
SetInput("33.3");
|
||||
NEXT_EQ(TOK_DOUBLE, double, d);
|
||||
|
||||
SetInput("18446744073709551616");
|
||||
NEXT_ERROR();
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, Parse) {
|
||||
|
@ -130,4 +127,19 @@ TEST_F(SearchParserTest, ParseParams) {
|
|||
NEXT_EQ(TOK_UINT32, uint32_t, 10);
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, Quotes) {
|
||||
SetInput(" \"fir st\" 'sec@o@nd' \":third:\" 'four\\\"th' ");
|
||||
NEXT_EQ(TOK_TERM, string, "fir st");
|
||||
NEXT_EQ(TOK_TERM, string, "sec@o@nd");
|
||||
NEXT_EQ(TOK_TERM, string, ":third:");
|
||||
NEXT_EQ(TOK_TERM, string, "four\"th");
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, Numeric) {
|
||||
SetInput("11 123123123123 '22'");
|
||||
NEXT_EQ(TOK_UINT32, uint32_t, 11);
|
||||
NEXT_EQ(TOK_DOUBLE, double, 123123123123.0);
|
||||
NEXT_EQ(TOK_TERM, string, "22");
|
||||
}
|
||||
|
||||
} // namespace dfly::search
|
||||
|
|
|
@ -110,7 +110,11 @@ struct JsonAccessor::JsonPathContainer : public jsoncons::jsonpath::jsonpath_exp
|
|||
};
|
||||
|
||||
BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) const {
|
||||
auto path_res = GetPath(active_field)->evaluate(json_);
|
||||
auto* path = GetPath(active_field);
|
||||
if (!path)
|
||||
return {};
|
||||
|
||||
auto path_res = path->evaluate(json_);
|
||||
DCHECK(path_res.is_array()); // json path always returns arrays
|
||||
|
||||
if (path_res.empty())
|
||||
|
@ -121,6 +125,8 @@ BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) cons
|
|||
return {buf_};
|
||||
}
|
||||
|
||||
buf_.clear();
|
||||
|
||||
// First, grow buffer and compute string sizes
|
||||
vector<size_t> sizes;
|
||||
for (auto element : path_res.array_range()) {
|
||||
|
@ -141,7 +147,11 @@ BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) cons
|
|||
}
|
||||
|
||||
BaseAccessor::VectorInfo JsonAccessor::GetVector(string_view active_field) const {
|
||||
auto res = GetPath(active_field)->evaluate(json_);
|
||||
auto* path = GetPath(active_field);
|
||||
if (!path)
|
||||
return {};
|
||||
|
||||
auto res = path->evaluate(json_);
|
||||
DCHECK(res.is_array());
|
||||
if (res.empty())
|
||||
return {nullptr, 0};
|
||||
|
@ -163,7 +173,10 @@ JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) c
|
|||
|
||||
error_code ec;
|
||||
auto path_expr = jsoncons::jsonpath::make_expression<JsonType>(field, ec);
|
||||
DCHECK(!ec) << "missing validation on ft.create step";
|
||||
if (ec) {
|
||||
LOG(WARNING) << "Invalid Json path: " << field << ' ' << ec.message();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
JsonPathContainer path_container{move(path_expr)};
|
||||
auto ptr = make_unique<JsonPathContainer>(move(path_container));
|
||||
|
@ -180,8 +193,12 @@ SearchDocData JsonAccessor::Serialize(const search::Schema& schema) const {
|
|||
SearchDocData JsonAccessor::Serialize(const search::Schema& schema,
|
||||
const SearchParams::FieldReturnList& fields) const {
|
||||
SearchDocData out{};
|
||||
for (const auto& [ident, name] : fields)
|
||||
out[name] = GetPath(ident)->evaluate(json_).to_string();
|
||||
for (const auto& [ident, name] : fields) {
|
||||
if (auto* path = GetPath(ident); path) {
|
||||
if (auto res = path->evaluate(json_); !res.empty())
|
||||
out[name] = res[0].to_string();
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,8 +28,8 @@ struct BaseAccessor : public search::DocumentAccessor {
|
|||
virtual SearchDocData Serialize(const search::Schema& schema) const = 0;
|
||||
|
||||
// Serialize selected fields
|
||||
SearchDocData Serialize(const search::Schema& schema,
|
||||
const SearchParams::FieldReturnList& fields) const;
|
||||
virtual SearchDocData Serialize(const search::Schema& schema,
|
||||
const SearchParams::FieldReturnList& fields) const;
|
||||
};
|
||||
|
||||
// Accessor for hashes stored with listpack
|
||||
|
@ -74,7 +74,7 @@ struct JsonAccessor : public BaseAccessor {
|
|||
|
||||
// The JsonAccessor works with structured types and not plain strings, so an overload is needed
|
||||
SearchDocData Serialize(const search::Schema& schema,
|
||||
const SearchParams::FieldReturnList& fields) const;
|
||||
const SearchParams::FieldReturnList& fields) const override;
|
||||
|
||||
static void RemoveFieldFromCache(std::string_view field);
|
||||
|
||||
|
|
|
@ -366,9 +366,9 @@ void SearchFamily::FtDropIndex(CmdArgList args, ConnectionContext* cntx) {
|
|||
});
|
||||
|
||||
DCHECK(num_deleted == 0u || num_deleted == shard_set->size());
|
||||
if (num_deleted == shard_set->size())
|
||||
return (*cntx)->SendOk();
|
||||
(*cntx)->SendError("Unknown Index name");
|
||||
if (num_deleted == 0u)
|
||||
return (*cntx)->SendError("-Unknown Index name");
|
||||
return (*cntx)->SendOk();
|
||||
}
|
||||
|
||||
void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) {
|
||||
|
@ -389,7 +389,7 @@ void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) {
|
|||
DCHECK(num_notfound == 0u || num_notfound == shard_set->size());
|
||||
|
||||
if (num_notfound > 0u)
|
||||
return (*cntx)->SendError("Unknown index name");
|
||||
return (*cntx)->SendError("Unknown Index name");
|
||||
|
||||
DCHECK(infos.front().base_index.schema.fields.size() ==
|
||||
infos.back().base_index.schema.fields.size());
|
||||
|
|
|
@ -211,7 +211,7 @@ TEST_F(SearchFamilyTest, JsonArrayValues) {
|
|||
{"game": "Pacman", "score": 15},
|
||||
{"game": "Mario", "score": 7}
|
||||
],
|
||||
"areas": "US-central"
|
||||
"areas": ["US-central"]
|
||||
}
|
||||
)";
|
||||
string_view D3 = R"(
|
||||
|
@ -229,10 +229,17 @@ TEST_F(SearchFamilyTest, JsonArrayValues) {
|
|||
Run({"json.set", "k2", ".", D2});
|
||||
Run({"json.set", "k3", ".", D3});
|
||||
|
||||
auto resp = Run({"ft.create", "i1", "on", "json", "schema", "$.name", "text", "$.plays[*].game",
|
||||
"as", "games", "tag", "$.plays[*].score", "as", "scores", "numeric",
|
||||
"$.areas[*]", "as", "areas", "tag"});
|
||||
EXPECT_EQ(resp, "OK");
|
||||
Run({"ft.create", "i1",
|
||||
"on", "json",
|
||||
"schema", "$.name",
|
||||
"as", "name",
|
||||
"text", "$.plays[*].game",
|
||||
"as", "games",
|
||||
"tag", "$.plays[*].score",
|
||||
"as", "scores",
|
||||
"numeric", "$.areas[*]",
|
||||
"as", "areas",
|
||||
"tag"});
|
||||
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "*"}), AreDocIds("k1", "k2", "k3"));
|
||||
|
||||
|
@ -248,8 +255,18 @@ TEST_F(SearchFamilyTest, JsonArrayValues) {
|
|||
EXPECT_THAT(Run({"ft.search", "i1", "@scores:[(15 20]"}), AreDocIds("k3"));
|
||||
|
||||
// Find platers by areas
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{\"EU-central\"}"}), AreDocIds("k1", "k3"));
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{\"US-central\"}"}), AreDocIds("k2"));
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{'EU-central'}"}), AreDocIds("k1", "k3"));
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{'US-central'}"}), AreDocIds("k2"));
|
||||
|
||||
// Test complicated RETURN expression
|
||||
auto res = Run(
|
||||
{"ft.search", "i1", "@name:bob", "return", "1", "max($.plays[*].score)", "as", "max-score"});
|
||||
EXPECT_THAT(res.GetVec()[2], RespArray(ElementsAre("max-score", "15")));
|
||||
|
||||
// Test invalid json path expression omits that field
|
||||
res = Run({"ft.search", "i1", "@name:alex", "return", "1", "::??INVALID??::", "as", "retval"});
|
||||
EXPECT_EQ(res.GetVec()[1], "k1");
|
||||
EXPECT_THAT(res.GetVec()[2], RespArray(ElementsAre()));
|
||||
}
|
||||
|
||||
TEST_F(SearchFamilyTest, Tags) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue