mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 18:35:46 +02:00
fix(search_family): Support boolean and nullable types in indexes (#4314)
* fix(search_family): Support boolean and nullable types in indexes fixes dragonflydb#4107, dragonflydb#4129 Signed-off-by: Stepan Bagritsevich <stefan@dragonflydb.io> * refactor: address comments Signed-off-by: Stepan Bagritsevich <stefan@dragonflydb.io> --------- Signed-off-by: Stepan Bagritsevich <stefan@dragonflydb.io>
This commit is contained in:
parent
01f24da2b6
commit
3c7e31240f
9 changed files with 191 additions and 76 deletions
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "server/search/doc_accessors.h"
|
||||
|
||||
#include <absl/functional/any_invocable.h>
|
||||
#include <absl/strings/str_cat.h>
|
||||
#include <absl/strings/str_join.h>
|
||||
|
||||
|
@ -72,10 +73,31 @@ FieldValue ExtractSortableValue(const search::Schema& schema, string_view key, s
|
|||
|
||||
FieldValue ExtractSortableValueFromJson(const search::Schema& schema, string_view key,
|
||||
const JsonType& json) {
|
||||
if (json.is_null()) {
|
||||
return std::monostate{};
|
||||
}
|
||||
auto json_as_string = json.to_string();
|
||||
return ExtractSortableValue(schema, key, json_as_string);
|
||||
}
|
||||
|
||||
/* Returns true if json elements were successfully processed. */
|
||||
template <typename Callback>
|
||||
bool ProcessJsonElements(const std::vector<JsonType>& json_elements, Callback&& cb) {
|
||||
auto process = [&cb](const auto& json_range) -> bool {
|
||||
for (const auto& json : json_range) {
|
||||
if (!json.is_null() && !cb(json)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!json_elements[0].is_array()) {
|
||||
return process(json_elements);
|
||||
}
|
||||
return json_elements.size() == 1 && process(json_elements[0].array_range());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SearchDocData BaseAccessor::Serialize(const search::Schema& schema,
|
||||
|
@ -127,6 +149,10 @@ std::optional<BaseAccessor::NumsList> BaseAccessor::GetNumbers(
|
|||
return nums_list;
|
||||
}
|
||||
|
||||
std::optional<BaseAccessor::StringList> BaseAccessor::GetTags(std::string_view active_field) const {
|
||||
return GetStrings(active_field);
|
||||
}
|
||||
|
||||
std::optional<BaseAccessor::StringList> ListPackAccessor::GetStrings(
|
||||
string_view active_field) const {
|
||||
auto strsv = container_utils::LpFind(lp_, active_field, intbuf_[0].data());
|
||||
|
@ -192,8 +218,17 @@ struct JsonAccessor::JsonPathContainer {
|
|||
variant<json::Path, jsoncons::jsonpath::jsonpath_expression<JsonType>> val;
|
||||
};
|
||||
|
||||
std::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(string_view active_field) const {
|
||||
auto* path = GetPath(active_field);
|
||||
std::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(std::string_view field) const {
|
||||
return GetStrings(field, false);
|
||||
}
|
||||
|
||||
std::optional<BaseAccessor::StringList> JsonAccessor::GetTags(std::string_view active_field) const {
|
||||
return GetStrings(active_field, true);
|
||||
}
|
||||
|
||||
std::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(std::string_view field,
|
||||
bool accept_boolean_values) const {
|
||||
auto* path = GetPath(field);
|
||||
if (!path)
|
||||
return search::EmptyAccessResult<StringList>();
|
||||
|
||||
|
@ -201,8 +236,18 @@ std::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(string_view act
|
|||
if (path_res.empty())
|
||||
return search::EmptyAccessResult<StringList>();
|
||||
|
||||
auto is_convertible_to_string = [](bool accept_boolean_values) -> bool (*)(const JsonType& json) {
|
||||
if (accept_boolean_values) {
|
||||
return [](const JsonType& json) -> bool { return json.is_string() || json.is_bool(); };
|
||||
} else {
|
||||
return [](const JsonType& json) -> bool { return json.is_string(); };
|
||||
}
|
||||
}(accept_boolean_values);
|
||||
|
||||
if (path_res.size() == 1 && !path_res[0].is_array()) {
|
||||
if (!path_res[0].is_string())
|
||||
if (path_res[0].is_null())
|
||||
return StringList{};
|
||||
if (!is_convertible_to_string(path_res[0]))
|
||||
return std::nullopt;
|
||||
|
||||
buf_ = path_res[0].as_string();
|
||||
|
@ -213,33 +258,21 @@ std::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(string_view act
|
|||
|
||||
// First, grow buffer and compute string sizes
|
||||
vector<size_t> sizes;
|
||||
sizes.reserve(path_res.size());
|
||||
|
||||
// Returns true if json element is convertiable to string
|
||||
auto add_json_element_to_buf = [&](const JsonType& json) -> bool {
|
||||
if (!is_convertible_to_string(json))
|
||||
return false;
|
||||
|
||||
auto add_json_to_buf = [&](const JsonType& json) {
|
||||
size_t start = buf_.size();
|
||||
buf_ += json.as_string();
|
||||
sizes.push_back(buf_.size() - start);
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!path_res[0].is_array()) {
|
||||
sizes.reserve(path_res.size());
|
||||
for (const auto& element : path_res) {
|
||||
if (!element.is_string())
|
||||
return std::nullopt;
|
||||
|
||||
add_json_to_buf(element);
|
||||
}
|
||||
} else {
|
||||
if (path_res.size() > 1) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
sizes.reserve(path_res[0].size());
|
||||
for (const auto& element : path_res[0].array_range()) {
|
||||
if (!element.is_string())
|
||||
return std::nullopt;
|
||||
|
||||
add_json_to_buf(element);
|
||||
}
|
||||
if (!ProcessJsonElements(path_res, std::move(add_json_element_to_buf))) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Reposition start pointers to the most recent allocation of buf
|
||||
|
@ -260,7 +293,7 @@ std::optional<BaseAccessor::VectorInfo> JsonAccessor::GetVector(string_view acti
|
|||
return VectorInfo{};
|
||||
|
||||
auto res = path->Evaluate(json_);
|
||||
if (res.empty())
|
||||
if (res.empty() || res[0].is_null())
|
||||
return VectorInfo{};
|
||||
|
||||
if (!res[0].is_array())
|
||||
|
@ -290,24 +323,18 @@ std::optional<BaseAccessor::NumsList> JsonAccessor::GetNumbers(string_view activ
|
|||
return search::EmptyAccessResult<NumsList>();
|
||||
|
||||
NumsList nums_list;
|
||||
if (!path_res[0].is_array()) {
|
||||
nums_list.reserve(path_res.size());
|
||||
for (const auto& element : path_res) {
|
||||
if (!element.is_number())
|
||||
return std::nullopt;
|
||||
nums_list.push_back(element.as<double>());
|
||||
}
|
||||
} else {
|
||||
if (path_res.size() > 1) {
|
||||
return std::nullopt;
|
||||
}
|
||||
nums_list.reserve(path_res.size());
|
||||
|
||||
nums_list.reserve(path_res[0].size());
|
||||
for (const auto& element : path_res[0].array_range()) {
|
||||
if (!element.is_number())
|
||||
return std::nullopt;
|
||||
nums_list.push_back(element.as<double>());
|
||||
}
|
||||
// Returns true if json element is convertiable to number
|
||||
auto add_json_element = [&](const JsonType& json) -> bool {
|
||||
if (!json.is_number())
|
||||
return false;
|
||||
nums_list.push_back(json.as<double>());
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!ProcessJsonElements(path_res, std::move(add_json_element))) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return nums_list;
|
||||
}
|
||||
|
|
|
@ -40,8 +40,9 @@ struct BaseAccessor : public search::DocumentAccessor {
|
|||
virtual SearchDocData SerializeDocument(const search::Schema& schema) const;
|
||||
|
||||
// Default implementation uses GetStrings
|
||||
virtual std::optional<VectorInfo> GetVector(std::string_view active_field) const;
|
||||
virtual std::optional<NumsList> GetNumbers(std::string_view active_field) const;
|
||||
virtual std::optional<VectorInfo> GetVector(std::string_view active_field) const override;
|
||||
virtual std::optional<NumsList> GetNumbers(std::string_view active_field) const override;
|
||||
virtual std::optional<StringList> GetTags(std::string_view active_field) const override;
|
||||
};
|
||||
|
||||
// Accessor for hashes stored with listpack
|
||||
|
@ -81,6 +82,7 @@ struct JsonAccessor : public BaseAccessor {
|
|||
std::optional<StringList> GetStrings(std::string_view field) const override;
|
||||
std::optional<VectorInfo> GetVector(std::string_view field) const override;
|
||||
std::optional<NumsList> GetNumbers(std::string_view active_field) const override;
|
||||
std::optional<StringList> GetTags(std::string_view active_field) const override;
|
||||
|
||||
// The JsonAccessor works with structured types and not plain strings, so an overload is needed
|
||||
SearchDocData Serialize(const search::Schema& schema,
|
||||
|
@ -91,6 +93,9 @@ struct JsonAccessor : public BaseAccessor {
|
|||
static void RemoveFieldFromCache(std::string_view field);
|
||||
|
||||
private:
|
||||
/* If accept_boolean_values is true, then json boolean values are converted to strings */
|
||||
std::optional<StringList> GetStrings(std::string_view field, bool accept_boolean_values) const;
|
||||
|
||||
/// Parses `field` into a JSON path. Caches the results internally.
|
||||
JsonPathContainer* GetPath(std::string_view field) const;
|
||||
|
||||
|
|
|
@ -1298,9 +1298,15 @@ TEST_F(SearchFamilyTest, WrongFieldTypeHardJson) {
|
|||
Run({"JSON.SET", "j1", ".", R"({"data":1,"name":"doc_with_int"})"});
|
||||
Run({"JSON.SET", "j2", ".", R"({"data":"1","name":"doc_with_int_as_string"})"});
|
||||
Run({"JSON.SET", "j3", ".", R"({"data":"string","name":"doc_with_string"})"});
|
||||
Run({"JSON.SET", "j4", ".", R"({"name":"no_data"})"});
|
||||
Run({"JSON.SET", "j5", ".", R"({"data":[5,4,3],"name":"doc_with_vector"})"});
|
||||
Run({"JSON.SET", "j6", ".", R"({"data":"[5,4,3]","name":"doc_with_vector_as_string"})"});
|
||||
Run({"JSON.SET", "j4", ".",
|
||||
R"({"data":["first", "second", "third"],"name":"doc_with_strings"})"});
|
||||
Run({"JSON.SET", "j5", ".", R"({"name":"no_data"})"});
|
||||
Run({"JSON.SET", "j6", ".", R"({"data":[5,4,3],"name":"doc_with_vector"})"});
|
||||
Run({"JSON.SET", "j7", ".", R"({"data":"[5,4,3]","name":"doc_with_vector_as_string"})"});
|
||||
Run({"JSON.SET", "j8", ".", R"({"data":null,"name":"doc_with_null"})"});
|
||||
Run({"JSON.SET", "j9", ".", R"({"data":[null, null, null],"name":"doc_with_nulls"})"});
|
||||
Run({"JSON.SET", "j10", ".", R"({"data":true,"name":"doc_with_boolean"})"});
|
||||
Run({"JSON.SET", "j11", ".", R"({"data":[true, false, true],"name":"doc_with_booleans"})"});
|
||||
|
||||
auto resp = Run({"FT.CREATE", "i1", "ON", "JSON", "SCHEMA", "$.data", "AS", "data", "NUMERIC"});
|
||||
EXPECT_EQ(resp, "OK");
|
||||
|
@ -1328,25 +1334,25 @@ TEST_F(SearchFamilyTest, WrongFieldTypeHardJson) {
|
|||
EXPECT_EQ(resp, "OK");
|
||||
|
||||
resp = Run({"FT.SEARCH", "i1", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j1", "j4", "j5"));
|
||||
EXPECT_THAT(resp, AreDocIds("j1", "j5", "j6", "j8", "j9"));
|
||||
|
||||
resp = Run({"FT.SEARCH", "i2", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j1", "j4", "j5"));
|
||||
EXPECT_THAT(resp, AreDocIds("j1", "j5", "j6", "j8", "j9"));
|
||||
|
||||
resp = Run({"FT.SEARCH", "i3", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j2", "j3", "j6", "j4"));
|
||||
EXPECT_THAT(resp, AreDocIds("j2", "j3", "j4", "j5", "j7", "j8", "j9", "j10", "j11"));
|
||||
|
||||
resp = Run({"FT.SEARCH", "i4", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j2", "j3", "j6", "j4"));
|
||||
EXPECT_THAT(resp, AreDocIds("j2", "j3", "j4", "j5", "j7", "j8", "j9", "j10", "j11"));
|
||||
|
||||
resp = Run({"FT.SEARCH", "i5", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j4", "j2", "j3", "j6"));
|
||||
EXPECT_THAT(resp, AreDocIds("j2", "j3", "j4", "j5", "j7", "j8", "j9"));
|
||||
|
||||
resp = Run({"FT.SEARCH", "i6", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j4", "j2", "j3", "j6"));
|
||||
EXPECT_THAT(resp, AreDocIds("j2", "j3", "j4", "j5", "j7", "j8", "j9"));
|
||||
|
||||
resp = Run({"FT.SEARCH", "i7", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j4", "j5"));
|
||||
EXPECT_THAT(resp, AreDocIds("j5", "j6", "j8"));
|
||||
}
|
||||
|
||||
TEST_F(SearchFamilyTest, WrongFieldTypeHardHash) {
|
||||
|
@ -1417,6 +1423,12 @@ TEST_F(SearchFamilyTest, WrongVectorFieldType) {
|
|||
Run({"JSON.SET", "j6", ".", R"({"name":"doc_with_no_field"})"});
|
||||
Run({"JSON.SET", "j7", ".",
|
||||
R"({"vector_field": [999999999999999999999999999999999999999, -999999999999999999999999999999999999999, 500000000000000000000000000000000000000], "name": "doc_with_out_of_range_values"})"});
|
||||
Run({"JSON.SET", "j8", ".", R"({"vector_field":null, "name": "doc_with_null"})"});
|
||||
Run({"JSON.SET", "j9", ".", R"({"vector_field":[null, null, null], "name": "doc_with_nulls"})"});
|
||||
Run({"JSON.SET", "j10", ".", R"({"vector_field":true, "name": "doc_with_boolean"})"});
|
||||
Run({"JSON.SET", "j11", ".",
|
||||
R"({"vector_field":[true, false, true], "name": "doc_with_booleans"})"});
|
||||
Run({"JSON.SET", "j12", ".", R"({"vector_field":1, "name": "doc_with_int"})"});
|
||||
|
||||
auto resp =
|
||||
Run({"FT.CREATE", "index", "ON", "JSON", "SCHEMA", "$.vector_field", "AS", "vector_field",
|
||||
|
@ -1424,7 +1436,7 @@ TEST_F(SearchFamilyTest, WrongVectorFieldType) {
|
|||
EXPECT_EQ(resp, "OK");
|
||||
|
||||
resp = Run({"FT.SEARCH", "index", "*"});
|
||||
EXPECT_THAT(resp, AreDocIds("j6", "j7", "j1", "j4"));
|
||||
EXPECT_THAT(resp, AreDocIds("j6", "j7", "j1", "j4", "j8"));
|
||||
}
|
||||
|
||||
#ifndef SANITIZERS
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue