diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 33d7fd57d..a7c0dd4eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,53 @@ add_third_party( /luaconf.h ${THIRD_PARTY_LIB_DIR}/lua/include ) +function(cur_gen_dir out_dir) + file(RELATIVE_PATH _rel_folder "${PROJECT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") + + set(_tmp_dir ${ROOT_GEN_DIR}/${_rel_folder}) + set(${out_dir} ${_tmp_dir} PARENT_SCOPE) + file(MAKE_DIRECTORY ${_tmp_dir}) +endfunction() + +set(ROOT_GEN_DIR ${CMAKE_SOURCE_DIR}/genfiles) +file(MAKE_DIRECTORY ${ROOT_GEN_DIR}) +include_directories(${ROOT_GEN_DIR}/src) + +function(gen_bison name) + GET_FILENAME_COMPONENT(_in ${name}.y ABSOLUTE) + cur_gen_dir(gen_dir) + # add_library(${lib_name} ${gen_dir}/${name}.cc) + set(full_path_cc ${gen_dir}/${name}.cc ${gen_dir}/${name}.hh) + + ADD_CUSTOM_COMMAND( + OUTPUT ${full_path_cc} + COMMAND mkdir -p ${gen_dir} + COMMAND bison --language=c++ -o ${gen_dir}/${name}.cc ${name}.y + DEPENDS ${_in} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generating parser from ${name}.y" VERBATIM) + set_source_files_properties(${name}.cc ${name}_base.h PROPERTIES GENERATED TRUE) +endfunction() + +function(gen_flex name) + GET_FILENAME_COMPONENT(_in ${name}.lex ABSOLUTE) + cur_gen_dir(gen_dir) + # set(lib_name "${name}_flex") + + set(full_path_cc ${gen_dir}/${name}.cc) + ADD_CUSTOM_COMMAND( + OUTPUT ${full_path_cc} + COMMAND mkdir -p ${gen_dir} + COMMAND ${CMAKE_COMMAND} -E remove ${gen_dir}/${name}.ih + COMMAND flex -o ${gen_dir}/${name}.cc --c++ ${_in} + DEPENDS ${_in} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generating lexer from ${name}.lex" VERBATIM) + + set_source_files_properties(${gen_dir}/${name}.h ${gen_dir}/${name}.cc ${gen_dir}/${name}_base.h + PROPERTIES GENERATED TRUE) +endfunction() + add_third_party( dconv URL https://github.com/google/double-conversion/archive/refs/tags/v3.2.0.tar.gz diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 50d95479f..24c16cf92 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -19,3 +19,5 @@ cxx_test(json_test dfly_core TRDP::jsoncons LABELS DFLY) cxx_test(simple_lru_counter_test dfly_core LABELS DFLY) cxx_test(string_set_test dfly_core LABELS DFLY) cxx_test(string_map_test dfly_core LABELS DFLY) + +add_subdirectory(search) diff --git a/src/core/search/CMakeLists.txt b/src/core/search/CMakeLists.txt new file mode 100644 index 000000000..1b0b17640 --- /dev/null +++ b/src/core/search/CMakeLists.txt @@ -0,0 +1,10 @@ +# apt install libfl-dev + +gen_flex(lexer) +gen_bison(parser) + +cur_gen_dir(gen_dir) + +add_library(query_parser query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc) +target_link_libraries(query_parser glog) +cxx_test(search_parser_test query_parser) diff --git a/src/core/search/lexer.lex b/src/core/search/lexer.lex new file mode 100644 index 000000000..4d8e88be6 --- /dev/null +++ b/src/core/search/lexer.lex @@ -0,0 +1,60 @@ +%{ + #include + #include "core/search/query_driver.h" + + // Define main lexer function. QueryDriver is the shared state between scanner and parser + #undef YY_DECL + #define YY_DECL auto dfly::search::Scanner::ParserLex(QueryDriver& driver) -> Parser::symbol_type +%} + +%option noyywrap nounput noinput batch debug +%option yyclass="dfly::Scanner" +%option c++ + +/* Declarations before lexer implementation. */ +%{ + // A number symbol corresponding to the value in S. + using dfly::search::Parser; + + Parser::symbol_type make_NUMBER (const std::string &s, const Parser::location_type& loc); +%} + +int [0-9]+ +blank [ \t\r] + +%{ + // Code run each time a pattern is matched. + # define YY_USER_ACTION loc.columns (yyleng); +%} + +%% + +%{ + // A handy shortcut to the location held by the driver. + auto& loc = driver.location; + // Code run each time yylex is called. + loc.step (); +%} + +{blank}+ loc.step (); + +\n loc.lines (yyleng); loc.step (); + +"(" return Parser::make_LPAREN (loc); +")" return Parser::make_RPAREN (loc); + +{int} return make_NUMBER (yytext, loc); +[^ \t\r]+ return Parser::make_TERM (yytext, loc); + +<> return Parser::make_YYEOF(loc); +%% + +Parser::symbol_type +make_NUMBER (const std::string &s, const Parser::location_type& loc) +{ + errno = 0; + long n = strtol (s.c_str(), NULL, 10); + if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE)) + throw Parser::syntax_error (loc, "integer is out of range: " + s); + return Parser::make_NUMBER ((int) n, loc); +} diff --git a/src/core/search/parser.y b/src/core/search/parser.y new file mode 100644 index 000000000..8f3e04908 --- /dev/null +++ b/src/core/search/parser.y @@ -0,0 +1,75 @@ +%skeleton "lalr1.cc" // -*- C++ -*- +%require "3.5.1" // That's what's present on ubuntu 20.04. + +%defines // %header starts from 3.8.1 + +%define api.namespace {dfly::search} + +%define api.token.raw +%define api.token.constructor +%define api.value.type variant +%define api.parser.class {Parser} +%define parse.assert + +// Added to header file before parser declaration. +%code requires { + namespace dfly { + namespace search { + class QueryDriver; + } + } +} + +// Added to cc file +%code { +#include "core/search/query_driver.h" + +#define yylex driver.scanner()->ParserLex +} + +// Only for parser +%param { QueryDriver& driver } + +%locations + +%define parse.trace +%define parse.error verbose // detailed +%define parse.lac full +%define api.token.prefix {TOK_} + +%token + LPAREN "(" + RPAREN ")" +; + +%token YYEOF +%token TERM "term" +%token NUMBER "number" +%nterm bool_expr + +%printer { yyo << $$; } <*>; + +%% +%start input; + +input: + %empty + | bool_expr + { + std::cout << $1 << std::endl; + } + ; + +bool_expr: TERM { + std::cout << $1 << std::endl; +} | TERM bool_expr { + std::cout << $1 << std::endl; +} + +%% + +void +dfly::search::Parser::error(const location_type& l, const std::string& m) +{ + std::cerr << l << ": " << m << '\n'; +} diff --git a/src/core/search/query_driver.cc b/src/core/search/query_driver.cc new file mode 100644 index 000000000..ea080c9b6 --- /dev/null +++ b/src/core/search/query_driver.cc @@ -0,0 +1,18 @@ +// Copyright 2023, Roman Gershman. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/search/query_driver.h" + +namespace dfly { +namespace search { + +QueryDriver::QueryDriver() : scanner_(std::make_unique()) { +} + +QueryDriver::~QueryDriver() { +} + +} // namespace search + +} // namespace dfly diff --git a/src/core/search/query_driver.h b/src/core/search/query_driver.h new file mode 100644 index 000000000..ff3f7ee83 --- /dev/null +++ b/src/core/search/query_driver.h @@ -0,0 +1,32 @@ +// Copyright 2023, Roman Gershman. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +#include + +#include "core/search/parser.hh" +#include "core/search/scanner.h" + +namespace dfly { + +namespace search { + +class QueryDriver { + public: + QueryDriver(); + ~QueryDriver(); + + Scanner* scanner() { + return scanner_.get(); + } + + Parser::location_type location; + + private: + std::unique_ptr scanner_; +}; + +} // namespace search +} // namespace dfly diff --git a/src/core/search/scanner.h b/src/core/search/scanner.h new file mode 100644 index 000000000..f6cbfc723 --- /dev/null +++ b/src/core/search/scanner.h @@ -0,0 +1,31 @@ +// Copyright 2023, Roman Gershman. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +#if !defined(yyFlexLexerOnce) +#include +#endif + +#include "core/search/parser.hh" + +namespace dfly { +namespace search { + +class QueryDriver; + +class Scanner : public yyFlexLexer { + public: + Scanner() { + } + + Parser::symbol_type ParserLex(QueryDriver& drv); + + std::string matched() { + return yytext; + } +}; + +} // namespace search +} // namespace dfly diff --git a/src/core/search/search_parser_test.cc b/src/core/search/search_parser_test.cc new file mode 100644 index 000000000..828154a77 --- /dev/null +++ b/src/core/search/search_parser_test.cc @@ -0,0 +1,43 @@ +// Copyright 2023, Roman Gershman. All rights reserved. +// See LICENSE for licensing terms. +// +#include "base/gtest.h" +#include "core/search/query_driver.h" + +namespace dfly { +namespace search { + +using namespace std; + +class SearchParserTest : public ::testing::Test { + protected: + SearchParserTest() { + } + + void SetInput(const std::string& str) { + istr_.str(str); + query_driver_.scanner()->switch_streams(&istr_); + } + + Parser::symbol_type Lex() { + return query_driver_.scanner()->ParserLex(query_driver_); + } + + QueryDriver query_driver_; + + std::istringstream istr_; +}; + +TEST_F(SearchParserTest, Scanner) { + SetInput("ab cd"); + Parser::symbol_type tok = Lex(); + + // 3.5.1 does not have name() method. + // EXPECT_STREQ("term", tok.name()); + EXPECT_EQ(tok.type_get(), Parser::token::TOK_TERM); + EXPECT_EQ("ab", tok.value.as()); +} + +} // namespace search + +} // namespace dfly