chore: provide plumbing for jsonpath error propagation (#2567)

* chore: provide plumbing for jsonpath error propagation

Also update re/flex library to the latest version.
Finally, introduce very basic parser test.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>

* feat: construct Path object

Parse jsonpath and partially fill it with path segments.
---------

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-02-12 13:24:31 +02:00 committed by GitHub
parent 9802f2c489
commit 6cd2f05a22
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 163 additions and 13 deletions

View file

@ -63,7 +63,7 @@ add_third_party(
add_third_party(
reflex
URL https://github.com/Genivia/RE-flex/archive/refs/tags/v3.3.2.tar.gz
URL https://github.com/Genivia/RE-flex/archive/refs/tags/v3.5.1.tar.gz
CONFIGURE_COMMAND <SOURCE_DIR>/configure --disable-avx --prefix=${THIRD_PARTY_LIB_DIR}/reflex
CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER}
)

View file

@ -4,19 +4,27 @@
#include "src/core/json/driver.h"
#include "base/logging.h"
#include "src/core/json/lexer_impl.h"
using namespace std;
namespace dfly::json {
Driver::Driver() : lexer_(std::make_unique<Lexer>()) {
Driver::Driver() : lexer_(make_unique<Lexer>()) {
}
Driver::~Driver() {
}
void Driver::SetInput(std::string str) {
void Driver::SetInput(string str) {
cur_str_ = std::move(str);
lexer_->in(cur_str_);
path_.Clear();
}
void Driver::ResetScanner() {
lexer_ = make_unique<Lexer>();
}
} // namespace dfly::json

View file

@ -6,24 +6,85 @@
#include <memory>
#include <string>
#include <vector>
namespace dfly {
namespace json {
class Lexer;
class location; // from jsonpath_grammar.hh
enum class SegmentType {
IDENTIFIER = 1, // $.identifier
INDEX = 2, // $.array[0]
WILDCARD = 3, // $.array[*] or $.*
};
class PathSegment {
public:
PathSegment(SegmentType type, std::string identifier = std::string())
: type_(type), identifier_(std::move(identifier)) {
}
SegmentType type() const {
return type_;
}
const std::string& identifier() const {
return identifier_;
}
private:
SegmentType type_;
std::string identifier_;
int index_;
};
class Path {
public:
void AddSegment(PathSegment segment) {
segments_.push_back(std::move(segment));
}
size_t size() const {
return segments_.size();
}
const PathSegment& operator[](size_t i) const {
return segments_[i];
}
void Clear() {
segments_.clear();
}
private:
std::vector<PathSegment> segments_;
};
class Driver {
public:
Driver();
~Driver();
virtual ~Driver();
Lexer* lexer() {
return lexer_.get();
}
void SetInput(std::string str);
void ResetScanner();
virtual void Error(const location& l, const std::string& msg) = 0;
void AddIdentifier(const std::string& identifier) {
path_.AddSegment(PathSegment(SegmentType::IDENTIFIER, identifier));
}
Path TakePath() {
return std::move(path_);
}
private:
Path path_;
std::string cur_str_;
std::unique_ptr<Lexer> lexer_;
};

View file

@ -55,21 +55,23 @@ using namespace std;
%token <std::string> UNQ_STR "unquoted string"
%token <unsigned> UINT "integer"
%nterm <std::string> identifier
%%
// Based on the following specification:
// https://danielaparker.github.io/JsonCons.Net/articles/JsonPath/Specification.html
jsonpath: ROOT
| ROOT relative_location
jsonpath: ROOT { /* skip adding root */ } opt_relative_location
opt_relative_location:
| relative_location
relative_location: DOT relative_path
| LBRACKET bracket_expr RBRACKET
relative_path: identifier opt_relative_location
relative_path: identifier { driver->AddIdentifier($1); } opt_relative_location
| WILDCARD opt_relative_location
opt_relative_location:
| relative_location
identifier: UNQ_STR
// | single_quoted_string | double_quoted_string
@ -84,5 +86,5 @@ index_expr: UINT
void dfly::json::Parser::error(const location_type& l, const string& m)
{
cerr << l << ": " << m << '\n';
driver->Error(l, m);
}

View file

@ -17,6 +17,8 @@
// our derived class from AbstractLexer
%o class="Lexer"
/* nodefault removes default echo rule */
%o nodefault batch
%option unicode
@ -51,7 +53,8 @@
return Parser::make_UINT(val, loc());
}
\w[\w_\-]* return Parser::make_UNQ_STR(str(), loc());
<<EOF>> printf("EOF%s\n", matcher().text());
<<EOF>> return Parser::make_YYEOF(loc());
. throw Parser::syntax_error(loc(), UnknownTokenMsg());
%%
// Function definitions

View file

@ -2,6 +2,8 @@
// See LICENSE for licensing terms.
//
#include <gmock/gmock.h>
#include "base/gtest.h"
#include "base/logging.h"
#include "core/json/driver.h"
@ -11,6 +13,37 @@ namespace dfly::json {
using namespace std;
using testing::ElementsAre;
MATCHER_P(SegType, value, "") {
return ExplainMatchResult(testing::Property(&PathSegment::type, value), arg, result_listener);
}
void PrintTo(SegmentType st, std::ostream* os) {
*os << " segment(";
switch (st) {
{
case SegmentType::IDENTIFIER:
*os << "IDENTIFIER";
break;
case SegmentType::INDEX:
*os << "INDEX";
break;
case SegmentType::WILDCARD:
*os << "WILDCARD";
break;
}
}
*os << ")";
}
class TestDriver : public Driver {
public:
void Error(const location& l, const std::string& msg) final {
LOG(INFO) << "Error at " << l << ": " << msg;
}
};
class JsonPathTest : public ::testing::Test {
protected:
JsonPathTest() {
@ -22,10 +55,24 @@ class JsonPathTest : public ::testing::Test {
}
Parser::symbol_type Lex() {
return driver_.lexer()->Lex();
try {
return driver_.lexer()->Lex();
} catch (const Parser::syntax_error& e) {
LOG(INFO) << "Caught exception: " << e.what();
// with later bison versions we can return make_YYerror
return Parser::make_YYEOF(e.location);
}
}
Driver driver_;
int Parse(const std::string& str) {
driver_.ResetScanner();
driver_.SetInput(str);
return Parser(&driver_)();
}
TestDriver driver_;
};
#define NEXT_TOK(tok_enum) \
@ -53,6 +100,25 @@ TEST_F(JsonPathTest, Scanner) {
NEXT_TOK(RBRACKET);
NEXT_TOK(DOT);
NEXT_TOK(WILDCARD);
SetInput("|");
NEXT_TOK(YYEOF);
}
TEST_F(JsonPathTest, Parser) {
EXPECT_NE(0, Parse("foo"));
EXPECT_NE(0, Parse("$foo"));
EXPECT_NE(0, Parse("$|foo"));
EXPECT_EQ(0, Parse("$.foo.bar"));
Path path = driver_.TakePath();
// TODO: to improve the UX with gmock/c++ magic.
ASSERT_EQ(2, path.size());
EXPECT_THAT(path[0], SegType(SegmentType::IDENTIFIER));
EXPECT_THAT(path[1], SegType(SegmentType::IDENTIFIER));
EXPECT_EQ("foo", path[0].identifier());
EXPECT_EQ("bar", path[1].identifier());
}
} // namespace dfly::json

View file

@ -4,6 +4,10 @@
#include "src/core/json/lexer_impl.h"
#include <absl/strings/str_cat.h>
using namespace std;
namespace dfly::json {
Lexer::Lexer() {
@ -12,4 +16,8 @@ Lexer::Lexer() {
Lexer::~Lexer() {
}
string Lexer::UnknownTokenMsg() const {
return absl::StrCat("Unknown token '", text(), "'");
}
} // namespace dfly::json

View file

@ -26,6 +26,8 @@ class Lexer : public AbstractLexer {
dfly::json::location loc() {
return location();
}
std::string UnknownTokenMsg() const;
};
} // namespace json