feat: Add skeleton code for parser/lexer generation (#1126)

Add initial test for the scanner.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2023-04-24 08:58:20 +02:00 committed by GitHub
parent 7f56a435c4
commit 51c5bb7273
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 318 additions and 0 deletions

View file

@ -9,6 +9,53 @@ add_third_party(
<SOURCE_DIR>/luaconf.h ${THIRD_PARTY_LIB_DIR}/lua/include
)
function(cur_gen_dir out_dir)
file(RELATIVE_PATH _rel_folder "${PROJECT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
set(_tmp_dir ${ROOT_GEN_DIR}/${_rel_folder})
set(${out_dir} ${_tmp_dir} PARENT_SCOPE)
file(MAKE_DIRECTORY ${_tmp_dir})
endfunction()
set(ROOT_GEN_DIR ${CMAKE_SOURCE_DIR}/genfiles)
file(MAKE_DIRECTORY ${ROOT_GEN_DIR})
include_directories(${ROOT_GEN_DIR}/src)
function(gen_bison name)
GET_FILENAME_COMPONENT(_in ${name}.y ABSOLUTE)
cur_gen_dir(gen_dir)
# add_library(${lib_name} ${gen_dir}/${name}.cc)
set(full_path_cc ${gen_dir}/${name}.cc ${gen_dir}/${name}.hh)
ADD_CUSTOM_COMMAND(
OUTPUT ${full_path_cc}
COMMAND mkdir -p ${gen_dir}
COMMAND bison --language=c++ -o ${gen_dir}/${name}.cc ${name}.y
DEPENDS ${_in}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Generating parser from ${name}.y" VERBATIM)
set_source_files_properties(${name}.cc ${name}_base.h PROPERTIES GENERATED TRUE)
endfunction()
function(gen_flex name)
GET_FILENAME_COMPONENT(_in ${name}.lex ABSOLUTE)
cur_gen_dir(gen_dir)
# set(lib_name "${name}_flex")
set(full_path_cc ${gen_dir}/${name}.cc)
ADD_CUSTOM_COMMAND(
OUTPUT ${full_path_cc}
COMMAND mkdir -p ${gen_dir}
COMMAND ${CMAKE_COMMAND} -E remove ${gen_dir}/${name}.ih
COMMAND flex -o ${gen_dir}/${name}.cc --c++ ${_in}
DEPENDS ${_in}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Generating lexer from ${name}.lex" VERBATIM)
set_source_files_properties(${gen_dir}/${name}.h ${gen_dir}/${name}.cc ${gen_dir}/${name}_base.h
PROPERTIES GENERATED TRUE)
endfunction()
add_third_party(
dconv
URL https://github.com/google/double-conversion/archive/refs/tags/v3.2.0.tar.gz

View file

@ -19,3 +19,5 @@ cxx_test(json_test dfly_core TRDP::jsoncons LABELS DFLY)
cxx_test(simple_lru_counter_test dfly_core LABELS DFLY)
cxx_test(string_set_test dfly_core LABELS DFLY)
cxx_test(string_map_test dfly_core LABELS DFLY)
add_subdirectory(search)

View file

@ -0,0 +1,10 @@
# apt install libfl-dev
gen_flex(lexer)
gen_bison(parser)
cur_gen_dir(gen_dir)
add_library(query_parser query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
target_link_libraries(query_parser glog)
cxx_test(search_parser_test query_parser)

60
src/core/search/lexer.lex Normal file
View file

@ -0,0 +1,60 @@
%{
#include <climits>
#include "core/search/query_driver.h"
// Define main lexer function. QueryDriver is the shared state between scanner and parser
#undef YY_DECL
#define YY_DECL auto dfly::search::Scanner::ParserLex(QueryDriver& driver) -> Parser::symbol_type
%}
%option noyywrap nounput noinput batch debug
%option yyclass="dfly::Scanner"
%option c++
/* Declarations before lexer implementation. */
%{
// A number symbol corresponding to the value in S.
using dfly::search::Parser;
Parser::symbol_type make_NUMBER (const std::string &s, const Parser::location_type& loc);
%}
int [0-9]+
blank [ \t\r]
%{
// Code run each time a pattern is matched.
# define YY_USER_ACTION loc.columns (yyleng);
%}
%%
%{
// A handy shortcut to the location held by the driver.
auto& loc = driver.location;
// Code run each time yylex is called.
loc.step ();
%}
{blank}+ loc.step ();
\n loc.lines (yyleng); loc.step ();
"(" return Parser::make_LPAREN (loc);
")" return Parser::make_RPAREN (loc);
{int} return make_NUMBER (yytext, loc);
[^ \t\r]+ return Parser::make_TERM (yytext, loc);
<<EOF>> return Parser::make_YYEOF(loc);
%%
Parser::symbol_type
make_NUMBER (const std::string &s, const Parser::location_type& loc)
{
errno = 0;
long n = strtol (s.c_str(), NULL, 10);
if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE))
throw Parser::syntax_error (loc, "integer is out of range: " + s);
return Parser::make_NUMBER ((int) n, loc);
}

75
src/core/search/parser.y Normal file
View file

@ -0,0 +1,75 @@
%skeleton "lalr1.cc" // -*- C++ -*-
%require "3.5.1" // That's what's present on ubuntu 20.04.
%defines // %header starts from 3.8.1
%define api.namespace {dfly::search}
%define api.token.raw
%define api.token.constructor
%define api.value.type variant
%define api.parser.class {Parser}
%define parse.assert
// Added to header file before parser declaration.
%code requires {
namespace dfly {
namespace search {
class QueryDriver;
}
}
}
// Added to cc file
%code {
#include "core/search/query_driver.h"
#define yylex driver.scanner()->ParserLex
}
// Only for parser
%param { QueryDriver& driver }
%locations
%define parse.trace
%define parse.error verbose // detailed
%define parse.lac full
%define api.token.prefix {TOK_}
%token
LPAREN "("
RPAREN ")"
;
%token YYEOF
%token <std::string> TERM "term"
%token <int> NUMBER "number"
%nterm <int> bool_expr
%printer { yyo << $$; } <*>;
%%
%start input;
input:
%empty
| bool_expr
{
std::cout << $1 << std::endl;
}
;
bool_expr: TERM {
std::cout << $1 << std::endl;
} | TERM bool_expr {
std::cout << $1 << std::endl;
}
%%
void
dfly::search::Parser::error(const location_type& l, const std::string& m)
{
std::cerr << l << ": " << m << '\n';
}

View file

@ -0,0 +1,18 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/search/query_driver.h"
namespace dfly {
namespace search {
QueryDriver::QueryDriver() : scanner_(std::make_unique<Scanner>()) {
}
QueryDriver::~QueryDriver() {
}
} // namespace search
} // namespace dfly

View file

@ -0,0 +1,32 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <memory>
#include "core/search/parser.hh"
#include "core/search/scanner.h"
namespace dfly {
namespace search {
class QueryDriver {
public:
QueryDriver();
~QueryDriver();
Scanner* scanner() {
return scanner_.get();
}
Parser::location_type location;
private:
std::unique_ptr<Scanner> scanner_;
};
} // namespace search
} // namespace dfly

31
src/core/search/scanner.h Normal file
View file

@ -0,0 +1,31 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#if !defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif
#include "core/search/parser.hh"
namespace dfly {
namespace search {
class QueryDriver;
class Scanner : public yyFlexLexer {
public:
Scanner() {
}
Parser::symbol_type ParserLex(QueryDriver& drv);
std::string matched() {
return yytext;
}
};
} // namespace search
} // namespace dfly

View file

@ -0,0 +1,43 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#include "base/gtest.h"
#include "core/search/query_driver.h"
namespace dfly {
namespace search {
using namespace std;
class SearchParserTest : public ::testing::Test {
protected:
SearchParserTest() {
}
void SetInput(const std::string& str) {
istr_.str(str);
query_driver_.scanner()->switch_streams(&istr_);
}
Parser::symbol_type Lex() {
return query_driver_.scanner()->ParserLex(query_driver_);
}
QueryDriver query_driver_;
std::istringstream istr_;
};
TEST_F(SearchParserTest, Scanner) {
SetInput("ab cd");
Parser::symbol_type tok = Lex();
// 3.5.1 does not have name() method.
// EXPECT_STREQ("term", tok.name());
EXPECT_EQ(tok.type_get(), Parser::token::TOK_TERM);
EXPECT_EQ("ab", tok.value.as<string>());
}
} // namespace search
} // namespace dfly