chore: preparations for adding flat json support (#2752)

1. More test fixes to support both types
2. Adjust Aggregation functions to work with both types
3. jsonpath_test build from both types and passes for JsonType,
   the tests are skipped for flexbuffers

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-03-21 14:21:37 +02:00 committed by GitHub
parent 9207dec983
commit 9db825013a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 348 additions and 60 deletions

View file

@ -21,7 +21,18 @@ auto FlatDfsItem::Init(const PathSegment& segment) -> AdvanceResult {
}
auto FlatDfsItem::Advance(const PathSegment& segment) -> AdvanceResult {
return AdvanceResult{};
if (state_ == kInit) {
return Init(segment);
}
if (!ShouldIterateAll(segment.type()))
return Exhausted();
++state_;
auto vec = obj().AsVector();
if (state_ >= vec.size())
return Exhausted();
return Next(vec[state_]);
}
FlatDfs FlatDfs::Traverse(absl::Span<const PathSegment> path, const flexbuffers::Reference root,
@ -29,11 +40,86 @@ FlatDfs FlatDfs::Traverse(absl::Span<const PathSegment> path, const flexbuffers:
DCHECK(!path.empty());
FlatDfs dfs;
if (path.size() == 1) {
dfs.PerformStep(path[0], root, callback);
return dfs;
}
using ConstItem = FlatDfsItem;
vector<ConstItem> stack;
stack.emplace_back(root);
do {
unsigned segment_index = stack.back().segment_idx();
const auto& path_segment = path[segment_index];
// init or advance the current object
ConstItem::AdvanceResult res = stack.back().Advance(path_segment);
if (res && !res->first.IsNull()) {
const flexbuffers::Reference next = res->first;
DVLOG(2) << "Handling now " << next.GetType() << " " << next.ToString();
// We descent only if next is object or an array.
if (IsRecursive(next.GetType())) {
unsigned next_seg_id = res->second;
if (next_seg_id + 1 < path.size()) {
stack.emplace_back(next, next_seg_id);
} else {
// terminal step
// TODO: to take into account MatchStatus
// for `json.set foo $.a[10]` or for `json.set foo $.*.b`
dfs.PerformStep(path[next_seg_id], next, callback);
}
}
} else {
stack.pop_back();
}
} while (!stack.empty());
return dfs;
}
auto FlatDfs::PerformStep(const PathSegment& segment, const flexbuffers::Reference node,
const PathFlatCallback& callback) -> nonstd::expected<void, MatchStatus> {
switch (segment.type()) {
case SegmentType::IDENTIFIER: {
if (!node.IsMap())
return make_unexpected(MISMATCH);
auto map = node.AsMap();
flexbuffers::Reference value = map[segment.identifier().c_str()];
if (!value.IsNull()) {
DoCall(callback, string_view{segment.identifier()}, value);
}
} break;
case SegmentType::INDEX: {
if (!node.IsVector())
return make_unexpected(MISMATCH);
auto vec = node.AsVector();
if (segment.index() >= vec.size()) {
return make_unexpected(OUT_OF_BOUNDS);
}
DoCall(callback, nullopt, vec[segment.index()]);
} break;
case SegmentType::DESCENT:
case SegmentType::WILDCARD: {
auto vec = node.AsVector(); // always succeeds
auto keys = node.AsMap().Keys(); // always succeeds
string str;
for (size_t i = 0; i < vec.size(); ++i) {
flexbuffers::Reference key = keys[i];
optional<string_view> opt_key;
if (key.IsString()) {
str = key.ToString();
opt_key = str;
}
DoCall(callback, opt_key, vec[i]);
}
} break;
default:
LOG(DFATAL) << "Unknown segment " << SegmentName(segment.type());
}
return {};
}

View file

@ -17,13 +17,8 @@ namespace dfly::json {
namespace {
class SingleValueImpl : public AggFunction {
JsonType GetResultImpl() const final {
return visit(Overloaded{
[](monostate) { return JsonType::null(); },
[](double d) { return JsonType(d); },
[](int64_t i) { return JsonType(i); },
},
val_);
Result GetResultImpl() const final {
return val_;
}
protected:
@ -35,7 +30,15 @@ class SingleValueImpl : public AggFunction {
}
}
variant<monostate, double, int64_t> val_;
void Init(const flexbuffers::Reference src) {
if (src.IsFloat()) {
val_.emplace<double>(src.AsDouble());
} else {
val_.emplace<int64_t>(src.AsInt64());
}
}
Result val_;
};
class MaxImpl : public SingleValueImpl {
@ -58,6 +61,25 @@ class MaxImpl : public SingleValueImpl {
return true;
}
bool ApplyImpl(flexbuffers::Reference src) final {
if (!src.IsNumeric()) {
return false;
}
visit(Overloaded{
[&](monostate) { Init(src); },
[&](double d) { val_ = max(d, src.AsDouble()); },
[&](int64_t i) {
if (src.IsFloat())
val_ = max(double(i), src.AsDouble());
else
val_ = max(i, src.AsInt64());
},
},
val_);
return true;
}
};
class MinImpl : public SingleValueImpl {
@ -81,6 +103,25 @@ class MinImpl : public SingleValueImpl {
return true;
}
bool ApplyImpl(flexbuffers::Reference src) final {
if (!src.IsNumeric()) {
return false;
}
visit(Overloaded{
[&](monostate) { Init(src); },
[&](double d) { val_ = min(d, src.AsDouble()); },
[&](int64_t i) {
if (src.IsFloat())
val_ = min(double(i), src.AsDouble());
else
val_ = min(i, src.AsInt64());
},
},
val_);
return true;
}
};
class AvgImpl : public AggFunction {
@ -95,9 +136,19 @@ class AvgImpl : public AggFunction {
return true;
}
JsonType GetResultImpl() const final {
bool ApplyImpl(flexbuffers::Reference src) final {
if (!src.IsNumeric()) {
return false;
}
sum_ += src.AsDouble();
count_++;
return true;
}
Result GetResultImpl() const final {
DCHECK_GT(count_, 0u); // AggFunction guarantees that
return JsonType(sum_ / count_);
return Result(double(sum_ / count_));
}
double sum_ = 0;

View file

@ -9,11 +9,18 @@
#include "core/json/driver.h"
#include "core/json/lexer_impl.h"
namespace flexbuffers {
bool operator==(const Reference left, const Reference right) {
return left.ToString() == right.ToString();
}
} // namespace flexbuffers
namespace dfly::json {
using namespace std;
using testing::ElementsAre;
using FlatJson = flexbuffers::Reference;
MATCHER_P(SegType, value, "") {
return ExplainMatchResult(testing::Property(&PathSegment::type, value), arg, result_listener);
@ -38,6 +45,17 @@ template <> JsonType ValidJson<JsonType>(string_view str) {
return *res;
}
template <> FlatJson ValidJson<FlatJson>(string_view str) {
static flexbuffers::Builder fbb;
flatbuffers::Parser parser;
fbb.Clear();
CHECK(parser.ParseFlexBuffer(str.data(), nullptr, &fbb));
fbb.Finish();
const auto& buffer = fbb.GetBuffer();
return flexbuffers::GetRoot(buffer);
}
bool is_int(const JsonType& val) {
return val.is<int>();
}
@ -46,6 +64,30 @@ int to_int(const JsonType& val) {
return val.as<int>();
}
bool is_object(const JsonType& val) {
return val.is_object();
}
bool is_array(const JsonType& val) {
return val.is_array();
}
int is_int(FlatJson ref) {
return ref.IsInt();
}
int to_int(FlatJson ref) {
return ref.AsInt32();
}
bool is_object(FlatJson ref) {
return ref.IsMap();
}
bool is_array(FlatJson ref) {
return ref.IsVector();
}
class ScannerTest : public ::testing::Test {
protected:
ScannerTest() {
@ -79,7 +121,7 @@ template <typename JSON> class JsonPathTest : public ScannerTest {
return Parser(&driver_)();
}
};
using MyTypes = ::testing::Types<JsonType>;
using MyTypes = ::testing::Types<JsonType, FlatJson>;
TYPED_TEST_SUITE(JsonPathTest, MyTypes);
#define NEXT_TOK(tok_enum) \
@ -145,19 +187,26 @@ TYPED_TEST(JsonPathTest, Parser) {
}
TYPED_TEST(JsonPathTest, Root) {
if constexpr (std::is_same_v<TypeParam, FlatJson>) {
return; // TODO
}
TypeParam json = ValidJson<TypeParam>(R"({"foo" : 1, "bar": "str" })");
ASSERT_EQ(0, this->Parse("$"));
Path path = this->driver_.TakePath();
int called = 0;
EvaluatePath(path, json, [&](optional<string_view>, const TypeParam& val) {
++called;
ASSERT_TRUE(val.is_object());
ASSERT_TRUE(is_object(val));
ASSERT_EQ(json, val);
});
ASSERT_EQ(1, called);
}
TYPED_TEST(JsonPathTest, Functions) {
if constexpr (std::is_same_v<TypeParam, FlatJson>) {
return; // TODO
}
ASSERT_EQ(0, this->Parse("max($.plays[*].score)"));
Path path = this->driver_.TakePath();
ASSERT_EQ(4, path.size());
@ -176,6 +225,10 @@ TYPED_TEST(JsonPathTest, Functions) {
}
TYPED_TEST(JsonPathTest, Descent) {
if constexpr (std::is_same_v<TypeParam, FlatJson>) {
return; // TODO
}
EXPECT_EQ(0, this->Parse("$..foo"));
Path path = this->driver_.TakePath();
ASSERT_EQ(2, path.size());
@ -194,6 +247,10 @@ TYPED_TEST(JsonPathTest, Descent) {
}
TYPED_TEST(JsonPathTest, Path) {
if constexpr (std::is_same_v<TypeParam, FlatJson>) {
return; // TODO
}
Path path;
TypeParam json = ValidJson<TypeParam>(R"({"v11":{ "f" : 1, "a2": [0]}, "v12": {"f": 2, "a2": [1]},
"v13": 3
@ -237,6 +294,10 @@ TYPED_TEST(JsonPathTest, Path) {
}
TYPED_TEST(JsonPathTest, EvalDescent) {
if constexpr (std::is_same_v<TypeParam, FlatJson>) {
return; // TODO
}
TypeParam json = ValidJson<TypeParam>(R"(
{"v11":{ "f" : 1, "a2": [0]}, "v12": {"f": 2, "v21": {"f": 3, "a2": [1]}},
"v13": { "a2" : { "b" : {"f" : 4}}}
@ -250,9 +311,9 @@ TYPED_TEST(JsonPathTest, EvalDescent) {
path.emplace_back(SegmentType::IDENTIFIER, "a2");
EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {
EXPECT_EQ("a2", key);
if (val.is_array()) {
if (is_array(val)) {
++called_arr;
} else if (val.is_object()) {
} else if (is_object(val)) {
++called_obj;
} else {
FAIL() << "Unexpected type";
@ -277,16 +338,25 @@ TYPED_TEST(JsonPathTest, EvalDescent) {
path.pop_back();
path.emplace_back(SegmentType::IDENTIFIER, "a");
using jsoncons::json_type;
vector<json_type> arr;
vector<char> arr;
auto gettype = [](const TypeParam& p) {
if (is_array(p))
return 'a';
return is_object(p) ? 'o' : 'u';
};
EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {
arr.push_back(val.type());
arr.push_back(gettype(val));
ASSERT_EQ("a", key);
});
ASSERT_THAT(arr, ElementsAre(json_type::array_value, json_type::object_value));
ASSERT_THAT(arr, ElementsAre('a', 'o'));
}
TYPED_TEST(JsonPathTest, Wildcard) {
if constexpr (std::is_same_v<TypeParam, FlatJson>) {
return; // TODO
}
ASSERT_EQ(0, this->Parse("$[*]"));
Path path = this->driver_.TakePath();
ASSERT_EQ(1, path.size());
@ -294,51 +364,55 @@ TYPED_TEST(JsonPathTest, Wildcard) {
TypeParam json = ValidJson<TypeParam>(R"([1, 2, 3])");
vector<int> arr;
EvaluatePath(path, json, [&](optional<string_view> key, const JsonType& val) {
EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {
ASSERT_FALSE(key);
arr.push_back(val.as<int>());
arr.push_back(to_int(val));
});
ASSERT_THAT(arr, ElementsAre(1, 2, 3));
}
TYPED_TEST(JsonPathTest, Mutate) {
JsonType json = ValidJson<TypeParam>(R"([1, 2, 3, 5, 6])");
ASSERT_EQ(0, this->Parse("$[*]"));
Path path = this->driver_.TakePath();
MutateCallback cb = [&](optional<string_view>, JsonType* val) {
int intval = val->as<int>();
*val = intval + 1;
return false;
};
MutatePath(path, cb, &json);
vector<int> arr;
for (auto& el : json.array_range()) {
arr.push_back(el.as<int>());
// Currently this code compiles only for JsonType.
if constexpr (std::is_same_v<TypeParam, JsonType>) {
TypeParam json = ValidJson<TypeParam>(R"([1, 2, 3, 5, 6])");
MutateCallback cb = [&](optional<string_view>, JsonType* val) {
int intval = val->as<int>();
*val = intval + 1;
return false;
};
MutatePath(path, cb, &json);
vector<int> arr;
for (JsonType& el : json.array_range()) {
arr.push_back(to_int(el));
}
ASSERT_THAT(arr, ElementsAre(2, 3, 4, 6, 7));
json = ValidJson<TypeParam>(R"(
{"a":[7], "inner": {"a": {"bool": true, "c": 42}}}
)");
ASSERT_EQ(0, this->Parse("$..a.*"));
path = this->driver_.TakePath();
MutatePath(
path,
[&](optional<string_view> key, JsonType* val) {
if (val->is_int64() && !key) { // array element
*val = 42;
return false;
}
if (val->is_bool()) {
*val = false;
return false;
}
return true;
},
&json);
ASSERT_EQ(R"({"a":[42],"inner":{"a":{"bool":false}}})", json.to_string());
}
ASSERT_THAT(arr, ElementsAre(2, 3, 4, 6, 7));
json = ValidJson<TypeParam>(R"(
{"a":[7], "inner": {"a": {"bool": true, "c": 42}}}
)");
ASSERT_EQ(0, this->Parse("$..a.*"));
path = this->driver_.TakePath();
MutatePath(
path,
[&](optional<string_view> key, JsonType* val) {
if (val->is_int64() && !key) { // array element
*val = 42;
return false;
}
if (val->is_bool()) {
*val = false;
return false;
}
return true;
},
&json);
ASSERT_EQ(R"({"a":[42],"inner":{"a":{"bool":false}}})", json.to_string());
}
} // namespace dfly::json

View file

@ -8,6 +8,7 @@
#include <absl/types/span.h>
#include "base/logging.h"
#include "core/json/detail/flat_dfs.h"
#include "core/json/detail/jsoncons_dfs.h"
#include "core/json/jsonpath_grammar.hh"
#include "src/core/json/driver.h"
@ -19,6 +20,7 @@ using nonstd::make_unexpected;
namespace dfly::json {
using detail::Dfs;
using detail::FlatDfs;
namespace {
@ -55,7 +57,14 @@ void PathSegment::Evaluate(const JsonType& json) const {
func->Apply(json);
}
JsonType PathSegment::GetResult() const {
void PathSegment::Evaluate(flexbuffers::Reference json) const {
CHECK(type() == SegmentType::FUNCTION);
AggFunction* func = std::get<shared_ptr<AggFunction>>(value_).get();
CHECK(func);
func->Apply(json);
}
AggFunction::Result PathSegment::GetResult() const {
CHECK(type() == SegmentType::FUNCTION);
const auto& func = std::get<shared_ptr<AggFunction>>(value_).get();
CHECK(func);
@ -85,7 +94,17 @@ void EvaluatePath(const Path& path, const JsonType& json, PathCallback callback)
} else {
Dfs::Traverse(path_tail, json, [&](auto, const JsonType& val) { func_segment.Evaluate(val); });
}
callback(nullopt, func_segment.GetResult());
AggFunction::Result res = func_segment.GetResult();
JsonType val = visit( // Transform the result to JsonType.
Overloaded{
[](monostate) { return JsonType::null(); },
[&](double d) { return JsonType(d); },
[&](int64_t i) { return JsonType(i); },
},
res);
callback(nullopt, val);
}
nonstd::expected<json::Path, string> ParsePath(string_view path) {
@ -116,4 +135,51 @@ unsigned MutatePath(const Path& path, MutateCallback callback, JsonType* json) {
return dfs.matches();
}
// Flat json path evaluation
void EvaluatePath(const Path& path, flexbuffers::Reference json, PathFlatCallback callback) {
if (path.empty()) { // root node
callback(nullopt, json);
return;
}
if (path.front().type() != SegmentType::FUNCTION) {
FlatDfs::Traverse(path, json, std::move(callback));
return;
}
// Handling the case of `func($.somepath)`
// We pass our own callback to gather all the results and then call the function.
flexbuffers::Reference result;
absl::Span<const PathSegment> path_tail(path.data() + 1, path.size() - 1);
const PathSegment& func_segment = path.front();
if (path_tail.empty()) {
LOG(DFATAL) << "Invalid path"; // parser should not allow this.
} else {
FlatDfs::Traverse(path_tail, json,
[&](auto, flexbuffers::Reference val) { func_segment.Evaluate(val); });
}
AggFunction::Result res = func_segment.GetResult();
flexbuffers::Builder fbb;
flexbuffers::Reference val = visit( // Transform the result to a flexbuffer reference.
Overloaded{
[](monostate) { return flexbuffers::Reference{}; },
[&](double d) {
fbb.Double(d);
fbb.Finish();
return flexbuffers::GetRoot(fbb.GetBuffer());
},
[&](int64_t i) {
fbb.Int(i);
fbb.Finish();
return flexbuffers::GetRoot(fbb.GetBuffer());
},
},
res);
callback(nullopt, val);
}
} // namespace dfly::json

View file

@ -28,6 +28,7 @@ const char* SegmentName(SegmentType type);
class AggFunction {
public:
using Result = std::variant<std::monostate, double, int64_t>;
virtual ~AggFunction() {
}
@ -36,14 +37,20 @@ class AggFunction {
valid_ = ApplyImpl(src);
}
void Apply(flexbuffers::Reference src) {
if (valid_ != 0)
valid_ = ApplyImpl(src);
}
// returns null if Apply was not called or ApplyImpl failed.
JsonType GetResult() const {
return valid_ == 1 ? GetResultImpl() : JsonType::null();
Result GetResult() const {
return valid_ == 1 ? GetResultImpl() : Result{};
}
protected:
virtual bool ApplyImpl(const JsonType& src) = 0;
virtual JsonType GetResultImpl() const = 0;
virtual bool ApplyImpl(flexbuffers::Reference src) = 0;
virtual Result GetResultImpl() const = 0;
int valid_ = -1;
};
@ -77,7 +84,8 @@ class PathSegment {
}
void Evaluate(const JsonType& json) const;
JsonType GetResult() const;
void Evaluate(flexbuffers::Reference json) const;
AggFunction::Result GetResult() const;
private:
SegmentType type_;
@ -99,6 +107,9 @@ using MutateCallback = absl::FunctionRef<bool(std::optional<std::string_view>, J
void EvaluatePath(const Path& path, const JsonType& json, PathCallback callback);
// Same as above but for flatbuffers.
void EvaluatePath(const Path& path, flexbuffers::Reference json, PathFlatCallback callback);
// returns number of matches found with the given path.
unsigned MutatePath(const Path& path, MutateCallback callback, JsonType* json);
nonstd::expected<Path, std::string> ParsePath(std::string_view path);