mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-11 02:15:45 +02:00
feat: Add skeleton code for parser/lexer generation (#1126)
Add initial test for the scanner. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
7f56a435c4
commit
51c5bb7273
9 changed files with 318 additions and 0 deletions
|
@ -9,6 +9,53 @@ add_third_party(
|
||||||
<SOURCE_DIR>/luaconf.h ${THIRD_PARTY_LIB_DIR}/lua/include
|
<SOURCE_DIR>/luaconf.h ${THIRD_PARTY_LIB_DIR}/lua/include
|
||||||
)
|
)
|
||||||
|
|
||||||
|
function(cur_gen_dir out_dir)
|
||||||
|
file(RELATIVE_PATH _rel_folder "${PROJECT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
|
||||||
|
set(_tmp_dir ${ROOT_GEN_DIR}/${_rel_folder})
|
||||||
|
set(${out_dir} ${_tmp_dir} PARENT_SCOPE)
|
||||||
|
file(MAKE_DIRECTORY ${_tmp_dir})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
set(ROOT_GEN_DIR ${CMAKE_SOURCE_DIR}/genfiles)
|
||||||
|
file(MAKE_DIRECTORY ${ROOT_GEN_DIR})
|
||||||
|
include_directories(${ROOT_GEN_DIR}/src)
|
||||||
|
|
||||||
|
function(gen_bison name)
|
||||||
|
GET_FILENAME_COMPONENT(_in ${name}.y ABSOLUTE)
|
||||||
|
cur_gen_dir(gen_dir)
|
||||||
|
# add_library(${lib_name} ${gen_dir}/${name}.cc)
|
||||||
|
set(full_path_cc ${gen_dir}/${name}.cc ${gen_dir}/${name}.hh)
|
||||||
|
|
||||||
|
ADD_CUSTOM_COMMAND(
|
||||||
|
OUTPUT ${full_path_cc}
|
||||||
|
COMMAND mkdir -p ${gen_dir}
|
||||||
|
COMMAND bison --language=c++ -o ${gen_dir}/${name}.cc ${name}.y
|
||||||
|
DEPENDS ${_in}
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
COMMENT "Generating parser from ${name}.y" VERBATIM)
|
||||||
|
set_source_files_properties(${name}.cc ${name}_base.h PROPERTIES GENERATED TRUE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
function(gen_flex name)
|
||||||
|
GET_FILENAME_COMPONENT(_in ${name}.lex ABSOLUTE)
|
||||||
|
cur_gen_dir(gen_dir)
|
||||||
|
# set(lib_name "${name}_flex")
|
||||||
|
|
||||||
|
set(full_path_cc ${gen_dir}/${name}.cc)
|
||||||
|
ADD_CUSTOM_COMMAND(
|
||||||
|
OUTPUT ${full_path_cc}
|
||||||
|
COMMAND mkdir -p ${gen_dir}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E remove ${gen_dir}/${name}.ih
|
||||||
|
COMMAND flex -o ${gen_dir}/${name}.cc --c++ ${_in}
|
||||||
|
DEPENDS ${_in}
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
COMMENT "Generating lexer from ${name}.lex" VERBATIM)
|
||||||
|
|
||||||
|
set_source_files_properties(${gen_dir}/${name}.h ${gen_dir}/${name}.cc ${gen_dir}/${name}_base.h
|
||||||
|
PROPERTIES GENERATED TRUE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
add_third_party(
|
add_third_party(
|
||||||
dconv
|
dconv
|
||||||
URL https://github.com/google/double-conversion/archive/refs/tags/v3.2.0.tar.gz
|
URL https://github.com/google/double-conversion/archive/refs/tags/v3.2.0.tar.gz
|
||||||
|
|
|
@ -19,3 +19,5 @@ cxx_test(json_test dfly_core TRDP::jsoncons LABELS DFLY)
|
||||||
cxx_test(simple_lru_counter_test dfly_core LABELS DFLY)
|
cxx_test(simple_lru_counter_test dfly_core LABELS DFLY)
|
||||||
cxx_test(string_set_test dfly_core LABELS DFLY)
|
cxx_test(string_set_test dfly_core LABELS DFLY)
|
||||||
cxx_test(string_map_test dfly_core LABELS DFLY)
|
cxx_test(string_map_test dfly_core LABELS DFLY)
|
||||||
|
|
||||||
|
add_subdirectory(search)
|
||||||
|
|
10
src/core/search/CMakeLists.txt
Normal file
10
src/core/search/CMakeLists.txt
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# apt install libfl-dev
|
||||||
|
|
||||||
|
gen_flex(lexer)
|
||||||
|
gen_bison(parser)
|
||||||
|
|
||||||
|
cur_gen_dir(gen_dir)
|
||||||
|
|
||||||
|
add_library(query_parser query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
|
||||||
|
target_link_libraries(query_parser glog)
|
||||||
|
cxx_test(search_parser_test query_parser)
|
60
src/core/search/lexer.lex
Normal file
60
src/core/search/lexer.lex
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
%{
|
||||||
|
#include <climits>
|
||||||
|
#include "core/search/query_driver.h"
|
||||||
|
|
||||||
|
// Define main lexer function. QueryDriver is the shared state between scanner and parser
|
||||||
|
#undef YY_DECL
|
||||||
|
#define YY_DECL auto dfly::search::Scanner::ParserLex(QueryDriver& driver) -> Parser::symbol_type
|
||||||
|
%}
|
||||||
|
|
||||||
|
%option noyywrap nounput noinput batch debug
|
||||||
|
%option yyclass="dfly::Scanner"
|
||||||
|
%option c++
|
||||||
|
|
||||||
|
/* Declarations before lexer implementation. */
|
||||||
|
%{
|
||||||
|
// A number symbol corresponding to the value in S.
|
||||||
|
using dfly::search::Parser;
|
||||||
|
|
||||||
|
Parser::symbol_type make_NUMBER (const std::string &s, const Parser::location_type& loc);
|
||||||
|
%}
|
||||||
|
|
||||||
|
int [0-9]+
|
||||||
|
blank [ \t\r]
|
||||||
|
|
||||||
|
%{
|
||||||
|
// Code run each time a pattern is matched.
|
||||||
|
# define YY_USER_ACTION loc.columns (yyleng);
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
%{
|
||||||
|
// A handy shortcut to the location held by the driver.
|
||||||
|
auto& loc = driver.location;
|
||||||
|
// Code run each time yylex is called.
|
||||||
|
loc.step ();
|
||||||
|
%}
|
||||||
|
|
||||||
|
{blank}+ loc.step ();
|
||||||
|
|
||||||
|
\n loc.lines (yyleng); loc.step ();
|
||||||
|
|
||||||
|
"(" return Parser::make_LPAREN (loc);
|
||||||
|
")" return Parser::make_RPAREN (loc);
|
||||||
|
|
||||||
|
{int} return make_NUMBER (yytext, loc);
|
||||||
|
[^ \t\r]+ return Parser::make_TERM (yytext, loc);
|
||||||
|
|
||||||
|
<<EOF>> return Parser::make_YYEOF(loc);
|
||||||
|
%%
|
||||||
|
|
||||||
|
Parser::symbol_type
|
||||||
|
make_NUMBER (const std::string &s, const Parser::location_type& loc)
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
long n = strtol (s.c_str(), NULL, 10);
|
||||||
|
if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE))
|
||||||
|
throw Parser::syntax_error (loc, "integer is out of range: " + s);
|
||||||
|
return Parser::make_NUMBER ((int) n, loc);
|
||||||
|
}
|
75
src/core/search/parser.y
Normal file
75
src/core/search/parser.y
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
%skeleton "lalr1.cc" // -*- C++ -*-
|
||||||
|
%require "3.5.1" // That's what's present on ubuntu 20.04.
|
||||||
|
|
||||||
|
%defines // %header starts from 3.8.1
|
||||||
|
|
||||||
|
%define api.namespace {dfly::search}
|
||||||
|
|
||||||
|
%define api.token.raw
|
||||||
|
%define api.token.constructor
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.parser.class {Parser}
|
||||||
|
%define parse.assert
|
||||||
|
|
||||||
|
// Added to header file before parser declaration.
|
||||||
|
%code requires {
|
||||||
|
namespace dfly {
|
||||||
|
namespace search {
|
||||||
|
class QueryDriver;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Added to cc file
|
||||||
|
%code {
|
||||||
|
#include "core/search/query_driver.h"
|
||||||
|
|
||||||
|
#define yylex driver.scanner()->ParserLex
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only for parser
|
||||||
|
%param { QueryDriver& driver }
|
||||||
|
|
||||||
|
%locations
|
||||||
|
|
||||||
|
%define parse.trace
|
||||||
|
%define parse.error verbose // detailed
|
||||||
|
%define parse.lac full
|
||||||
|
%define api.token.prefix {TOK_}
|
||||||
|
|
||||||
|
%token
|
||||||
|
LPAREN "("
|
||||||
|
RPAREN ")"
|
||||||
|
;
|
||||||
|
|
||||||
|
%token YYEOF
|
||||||
|
%token <std::string> TERM "term"
|
||||||
|
%token <int> NUMBER "number"
|
||||||
|
%nterm <int> bool_expr
|
||||||
|
|
||||||
|
%printer { yyo << $$; } <*>;
|
||||||
|
|
||||||
|
%%
|
||||||
|
%start input;
|
||||||
|
|
||||||
|
input:
|
||||||
|
%empty
|
||||||
|
| bool_expr
|
||||||
|
{
|
||||||
|
std::cout << $1 << std::endl;
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
bool_expr: TERM {
|
||||||
|
std::cout << $1 << std::endl;
|
||||||
|
} | TERM bool_expr {
|
||||||
|
std::cout << $1 << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
void
|
||||||
|
dfly::search::Parser::error(const location_type& l, const std::string& m)
|
||||||
|
{
|
||||||
|
std::cerr << l << ": " << m << '\n';
|
||||||
|
}
|
18
src/core/search/query_driver.cc
Normal file
18
src/core/search/query_driver.cc
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "core/search/query_driver.h"
|
||||||
|
|
||||||
|
namespace dfly {
|
||||||
|
namespace search {
|
||||||
|
|
||||||
|
QueryDriver::QueryDriver() : scanner_(std::make_unique<Scanner>()) {
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryDriver::~QueryDriver() {
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace search
|
||||||
|
|
||||||
|
} // namespace dfly
|
32
src/core/search/query_driver.h
Normal file
32
src/core/search/query_driver.h
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "core/search/parser.hh"
|
||||||
|
#include "core/search/scanner.h"
|
||||||
|
|
||||||
|
namespace dfly {
|
||||||
|
|
||||||
|
namespace search {
|
||||||
|
|
||||||
|
class QueryDriver {
|
||||||
|
public:
|
||||||
|
QueryDriver();
|
||||||
|
~QueryDriver();
|
||||||
|
|
||||||
|
Scanner* scanner() {
|
||||||
|
return scanner_.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
Parser::location_type location;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<Scanner> scanner_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace search
|
||||||
|
} // namespace dfly
|
31
src/core/search/scanner.h
Normal file
31
src/core/search/scanner.h
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if !defined(yyFlexLexerOnce)
|
||||||
|
#include <FlexLexer.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "core/search/parser.hh"
|
||||||
|
|
||||||
|
namespace dfly {
|
||||||
|
namespace search {
|
||||||
|
|
||||||
|
class QueryDriver;
|
||||||
|
|
||||||
|
class Scanner : public yyFlexLexer {
|
||||||
|
public:
|
||||||
|
Scanner() {
|
||||||
|
}
|
||||||
|
|
||||||
|
Parser::symbol_type ParserLex(QueryDriver& drv);
|
||||||
|
|
||||||
|
std::string matched() {
|
||||||
|
return yytext;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace search
|
||||||
|
} // namespace dfly
|
43
src/core/search/search_parser_test.cc
Normal file
43
src/core/search/search_parser_test.cc
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||||
|
// See LICENSE for licensing terms.
|
||||||
|
//
|
||||||
|
#include "base/gtest.h"
|
||||||
|
#include "core/search/query_driver.h"
|
||||||
|
|
||||||
|
namespace dfly {
|
||||||
|
namespace search {
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class SearchParserTest : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
SearchParserTest() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInput(const std::string& str) {
|
||||||
|
istr_.str(str);
|
||||||
|
query_driver_.scanner()->switch_streams(&istr_);
|
||||||
|
}
|
||||||
|
|
||||||
|
Parser::symbol_type Lex() {
|
||||||
|
return query_driver_.scanner()->ParserLex(query_driver_);
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryDriver query_driver_;
|
||||||
|
|
||||||
|
std::istringstream istr_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(SearchParserTest, Scanner) {
|
||||||
|
SetInput("ab cd");
|
||||||
|
Parser::symbol_type tok = Lex();
|
||||||
|
|
||||||
|
// 3.5.1 does not have name() method.
|
||||||
|
// EXPECT_STREQ("term", tok.name());
|
||||||
|
EXPECT_EQ(tok.type_get(), Parser::token::TOK_TERM);
|
||||||
|
EXPECT_EQ("ab", tok.value.as<string>());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace search
|
||||||
|
|
||||||
|
} // namespace dfly
|
Loading…
Add table
Add a link
Reference in a new issue