mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
chore: basic syntax parsing added for suffix and infix search
This commit is contained in:
parent
561c0a4733
commit
01e72683b6
6 changed files with 139 additions and 60 deletions
|
@ -20,7 +20,12 @@ AstTermNode::AstTermNode(string term) : term{std::move(term)} {
|
|||
}
|
||||
|
||||
AstPrefixNode::AstPrefixNode(string prefix) : prefix{std::move(prefix)} {
|
||||
this->prefix.pop_back();
|
||||
}
|
||||
|
||||
AstSuffixNode::AstSuffixNode(string suffix) : suffix{std::move(suffix)} {
|
||||
}
|
||||
|
||||
AstInfixNode::AstInfixNode(string infix) : infix{std::move(infix)} {
|
||||
}
|
||||
|
||||
AstRangeNode::AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl)
|
||||
|
|
|
@ -38,6 +38,18 @@ struct AstPrefixNode {
|
|||
std::string prefix;
|
||||
};
|
||||
|
||||
struct AstSuffixNode {
|
||||
explicit AstSuffixNode(std::string suffix);
|
||||
|
||||
std::string suffix;
|
||||
};
|
||||
|
||||
struct AstInfixNode {
|
||||
explicit AstInfixNode(std::string infix);
|
||||
|
||||
std::string infix;
|
||||
};
|
||||
|
||||
// Matches numeric range
|
||||
struct AstRangeNode {
|
||||
AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl);
|
||||
|
@ -73,7 +85,7 @@ struct AstFieldNode {
|
|||
|
||||
// Stores a list of tags for a tag query
|
||||
struct AstTagsNode {
|
||||
using TagValue = std::variant<AstTermNode, AstPrefixNode>;
|
||||
using TagValue = std::variant<AstTermNode, AstPrefixNode, AstSuffixNode, AstInfixNode>;
|
||||
|
||||
struct TagValueProxy
|
||||
: public AstTagsNode::TagValue { // bison needs it to be default constructible
|
||||
|
@ -83,6 +95,10 @@ struct AstTagsNode {
|
|||
}
|
||||
TagValueProxy(AstTermNode tv) : AstTagsNode::TagValue(std::move(tv)) {
|
||||
}
|
||||
TagValueProxy(AstSuffixNode tv) : AstTagsNode::TagValue(std::move(tv)) {
|
||||
}
|
||||
TagValueProxy(AstInfixNode tv) : AstTagsNode::TagValue(std::move(tv)) {
|
||||
}
|
||||
};
|
||||
|
||||
AstTagsNode(TagValue);
|
||||
|
@ -111,9 +127,10 @@ struct AstKnnNode {
|
|||
std::optional<float> ef_runtime;
|
||||
};
|
||||
|
||||
using NodeVariants = std::variant<std::monostate, AstStarNode, AstStarFieldNode, AstTermNode,
|
||||
AstPrefixNode, AstRangeNode, AstNegateNode, AstLogicalNode,
|
||||
AstFieldNode, AstTagsNode, AstKnnNode>;
|
||||
using NodeVariants =
|
||||
std::variant<std::monostate, AstStarNode, AstStarFieldNode, AstTermNode, AstPrefixNode,
|
||||
AstSuffixNode, AstInfixNode, AstRangeNode, AstNegateNode, AstLogicalNode,
|
||||
AstFieldNode, AstTagsNode, AstKnnNode>;
|
||||
|
||||
struct AstNode : public NodeVariants {
|
||||
using variant::variant;
|
||||
|
|
|
@ -26,17 +26,19 @@
|
|||
using dfly::search::Parser;
|
||||
using namespace std;
|
||||
|
||||
enum class TagType { PREFIX, SUFFIX, INFIX, REGULAR };
|
||||
|
||||
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc);
|
||||
Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc);
|
||||
Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc);
|
||||
%}
|
||||
|
||||
dq \"
|
||||
sq \'
|
||||
esc_chars ['"\?\\abfnrtv]
|
||||
esc_seq \\{esc_chars}
|
||||
term_char \w
|
||||
tag_val_char {term_char}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]
|
||||
asterisk_char \*
|
||||
dq \"
|
||||
sq \'
|
||||
esc_chars ['"\?\\abfnrtv]
|
||||
esc_seq \\{esc_chars}
|
||||
term_ch \w
|
||||
tag_val_ch {term_ch}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]
|
||||
astrsk_ch \*
|
||||
|
||||
|
||||
%{
|
||||
|
@ -67,21 +69,25 @@ asterisk_char \*
|
|||
"AS" return Parser::make_AS (loc());
|
||||
"EF_RUNTIME" return Parser::make_EF_RUNTIME (loc());
|
||||
|
||||
[0-9]{1,9} return Parser::make_UINT32(str(), loc());
|
||||
[+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc());
|
||||
[0-9]{1,9} return Parser::make_UINT32(str(), loc());
|
||||
[+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc());
|
||||
|
||||
{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc());
|
||||
{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc());
|
||||
{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc());
|
||||
{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc());
|
||||
|
||||
"$"{term_char}+ return ParseParam(str(), loc());
|
||||
"@"{term_char}+ return Parser::make_FIELD(str(), loc());
|
||||
{term_char}+{asterisk_char} return Parser::make_PREFIX(str(), loc());
|
||||
"$"{term_ch}+ return ParseParam(str(), loc());
|
||||
"@"{term_ch}+ return Parser::make_FIELD(str(), loc());
|
||||
{term_ch}+{astrsk_ch} return Parser::make_PREFIX(string{matched_view(0, 1)}, loc());
|
||||
{astrsk_ch}{term_ch}+ return Parser::make_SUFFIX(string{matched_view(1, 0)}, loc());
|
||||
{astrsk_ch}{term_ch}+{astrsk_ch} return Parser::make_INFIX(string{matched_view(1, 1)}, loc());
|
||||
|
||||
{term_char}+ return Parser::make_TERM(str(), loc());
|
||||
{tag_val_char}+{asterisk_char} return make_TagVal(str(), true, loc());
|
||||
{tag_val_char}+ return make_TagVal(str(), false, loc());
|
||||
{term_ch}+ return Parser::make_TERM(str(), loc());
|
||||
{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::PREFIX, loc());
|
||||
{astrsk_ch}{tag_val_ch}+ return make_Tag(str(), TagType::SUFFIX, loc());
|
||||
{astrsk_ch}{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::INFIX, loc());
|
||||
{tag_val_ch}+ return make_Tag(str(), TagType::REGULAR, loc());
|
||||
|
||||
<<EOF>> return Parser::make_YYEOF(loc());
|
||||
<<EOF>> return Parser::make_YYEOF(loc());
|
||||
%%
|
||||
|
||||
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc) {
|
||||
|
@ -92,14 +98,20 @@ Parser::symbol_type make_StringLit(string_view src, const Parser::location_type&
|
|||
return Parser::make_TERM(res, loc);
|
||||
}
|
||||
|
||||
Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc) {
|
||||
Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc) {
|
||||
string res;
|
||||
res.reserve(src.size());
|
||||
|
||||
bool escaped = false;
|
||||
size_t len = is_prefix ? src.size() - 1 : src.size(); // Exclude the '*' at the end for prefix
|
||||
// Determine processing boundaries
|
||||
size_t start = (type == TagType::SUFFIX || type == TagType::INFIX) ? 1 : 0;
|
||||
size_t end = src.size();
|
||||
if (type == TagType::PREFIX || type == TagType::INFIX) {
|
||||
end--; // Skip the last '*' character
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
// Handle escaping
|
||||
bool escaped = false;
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (src[i] == '\\') {
|
||||
|
@ -109,11 +121,16 @@ Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::l
|
|||
res.push_back(src[i]);
|
||||
}
|
||||
|
||||
// Add '*' back for prefix
|
||||
if (is_prefix) {
|
||||
res.push_back('*');
|
||||
return Parser::make_PREFIX(res, loc);
|
||||
// Return the appropriate token type
|
||||
switch (type) {
|
||||
case TagType::PREFIX:
|
||||
return Parser::make_PREFIX(res, loc);
|
||||
case TagType::SUFFIX:
|
||||
return Parser::make_SUFFIX(res, loc);
|
||||
case TagType::INFIX:
|
||||
return Parser::make_INFIX(res, loc);
|
||||
case TagType::REGULAR:
|
||||
default:
|
||||
return Parser::make_TAG_VAL(res, loc);
|
||||
}
|
||||
|
||||
return Parser::make_TAG_VAL(res, loc);
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ double toDouble(string_view src);
|
|||
|
||||
// Needed 0 at the end to satisfy bison 3.5.1
|
||||
%token YYEOF 0
|
||||
%token <std::string> TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix"
|
||||
%token <std::string> TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix" SUFFIX "suffix" INFIX "infix"
|
||||
|
||||
%precedence TERM TAG_VAL
|
||||
%left OR_OP
|
||||
|
@ -134,24 +134,26 @@ search_or_expr:
|
|||
| search_expr OR_OP search_unary_expr { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }
|
||||
|
||||
search_unary_expr:
|
||||
LPAREN search_expr RPAREN { $$ = std::move($2); }
|
||||
LPAREN search_expr RPAREN { $$ = std::move($2); }
|
||||
| NOT_OP search_unary_expr { $$ = AstNegateNode(std::move($2)); }
|
||||
| TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| PREFIX { $$ = AstPrefixNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
| SUFFIX { $$ = AstSuffixNode(std::move($1)); }
|
||||
| INFIX { $$ = AstInfixNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
| FIELD COLON field_cond { $$ = AstFieldNode(std::move($1), std::move($3)); }
|
||||
|
||||
field_cond:
|
||||
TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
| STAR { $$ = AstStarFieldNode(); }
|
||||
TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
| STAR { $$ = AstStarFieldNode(); }
|
||||
| NOT_OP field_cond { $$ = AstNegateNode(std::move($2)); }
|
||||
| LPAREN field_cond_expr RPAREN { $$ = std::move($2); }
|
||||
| LBRACKET numeric_filter_expr RBRACKET { $$ = std::move($2); }
|
||||
| LCURLBR tag_list RCURLBR { $$ = std::move($2); }
|
||||
|
||||
numeric_filter_expr:
|
||||
opt_lparen generic_number opt_lparen generic_number { $$ = AstRangeNode($2, $1, $4, $3); }
|
||||
opt_lparen generic_number opt_lparen generic_number { $$ = AstRangeNode($2, $1, $4, $3); }
|
||||
| opt_lparen generic_number COMMA opt_lparen generic_number { $$ = AstRangeNode($2, $1, $5, $4); }
|
||||
|
||||
generic_number:
|
||||
|
@ -163,9 +165,9 @@ opt_lparen:
|
|||
| LPAREN { $$ = true; }
|
||||
|
||||
field_cond_expr:
|
||||
field_unary_expr { $$ = std::move($1); }
|
||||
| field_and_expr { $$ = std::move($1); }
|
||||
| field_or_expr { $$ = std::move($1); }
|
||||
field_unary_expr { $$ = std::move($1); }
|
||||
| field_and_expr { $$ = std::move($1); }
|
||||
| field_or_expr { $$ = std::move($1); }
|
||||
|
||||
field_and_expr:
|
||||
field_unary_expr field_unary_expr %prec AND_OP { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); }
|
||||
|
@ -176,21 +178,23 @@ field_or_expr:
|
|||
| field_cond_expr OR_OP field_and_expr { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }
|
||||
|
||||
field_unary_expr:
|
||||
LPAREN field_cond_expr RPAREN { $$ = std::move($2); }
|
||||
| NOT_OP field_unary_expr { $$ = AstNegateNode(std::move($2)); }
|
||||
| TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
LPAREN field_cond_expr RPAREN { $$ = std::move($2); }
|
||||
| NOT_OP field_unary_expr { $$ = AstNegateNode(std::move($2)); }
|
||||
| TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
|
||||
tag_list:
|
||||
tag_list_element { $$ = AstTagsNode(std::move($1)); }
|
||||
tag_list_element { $$ = AstTagsNode(std::move($1)); }
|
||||
| tag_list OR_OP tag_list_element { $$ = AstTagsNode(std::move($1), std::move($3)); }
|
||||
|
||||
tag_list_element:
|
||||
TERM { $$ = AstTermNode(std::move($1)); }
|
||||
TERM { $$ = AstTermNode(std::move($1)); }
|
||||
| PREFIX { $$ = AstPrefixNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
| DOUBLE { $$ = AstTermNode(std::move($1)); }
|
||||
| TAG_VAL { $$ = AstTermNode(std::move($1)); }
|
||||
| SUFFIX { $$ = AstSuffixNode(std::move($1)); }
|
||||
| INFIX { $$ = AstInfixNode(std::move($1)); }
|
||||
| UINT32 { $$ = AstTermNode(std::move($1)); }
|
||||
| DOUBLE { $$ = AstTermNode(std::move($1)); }
|
||||
| TAG_VAL { $$ = AstTermNode(std::move($1)); }
|
||||
|
||||
|
||||
%%
|
||||
|
|
|
@ -120,6 +120,12 @@ struct ProfileBuilder {
|
|||
void operator()(std::string* out, const AstPrefixNode& node) const {
|
||||
out->append(node.prefix);
|
||||
}
|
||||
void operator()(std::string* out, const AstSuffixNode& node) const {
|
||||
out->append(node.suffix);
|
||||
}
|
||||
void operator()(std::string* out, const AstInfixNode& node) const {
|
||||
out->append(node.infix);
|
||||
}
|
||||
void operator()(std::string* out, const AstTermNode& node) const {
|
||||
out->append(node.term);
|
||||
}
|
||||
|
@ -131,6 +137,8 @@ struct ProfileBuilder {
|
|||
[](monostate) -> string { return ""s; },
|
||||
[](const AstTermNode& n) { return absl::StrCat("Term{", n.term, "}"); },
|
||||
[](const AstPrefixNode& n) { return absl::StrCat("Prefix{", n.prefix, "}"); },
|
||||
[](const AstSuffixNode& n) { return absl::StrCat("Suffix{", n.suffix, "}"); },
|
||||
[](const AstInfixNode& n) { return absl::StrCat("Infix{", n.infix, "}"); },
|
||||
[](const AstRangeNode& n) { return absl::StrCat("Range{", n.lo, "<>", n.hi, "}"); },
|
||||
[](const AstLogicalNode& n) {
|
||||
auto op = n.op == AstLogicalNode::AND ? "and" : "or";
|
||||
|
@ -268,6 +276,18 @@ struct BasicSearch {
|
|||
return result;
|
||||
}
|
||||
|
||||
template <typename C>
|
||||
IndexResult CollectSuffixMatches(BaseStringIndex<C>* index, std::string_view suffix) {
|
||||
// TODO: Implement full text search for suffix
|
||||
return IndexResult{};
|
||||
}
|
||||
|
||||
template <typename C>
|
||||
IndexResult CollectInfixMatches(BaseStringIndex<C>* index, std::string_view infix) {
|
||||
// TODO: Implement full text search for infix
|
||||
return IndexResult{};
|
||||
}
|
||||
|
||||
IndexResult Search(monostate, string_view) {
|
||||
return vector<DocId>{};
|
||||
}
|
||||
|
@ -346,6 +366,16 @@ struct BasicSearch {
|
|||
return UnifyResults(GetSubResults(indices, mapping), LogicOp::OR);
|
||||
}
|
||||
|
||||
IndexResult Search(const AstSuffixNode& node, string_view active_field) {
|
||||
// TODO: Implement full text search for suffix
|
||||
return IndexResult{};
|
||||
}
|
||||
|
||||
IndexResult Search(const AstInfixNode& node, string_view active_field) {
|
||||
// TODO: Implement full text search for infix
|
||||
return IndexResult{};
|
||||
}
|
||||
|
||||
// [range]: access field's numeric index
|
||||
IndexResult Search(const AstRangeNode& node, string_view active_field) {
|
||||
DCHECK(!active_field.empty());
|
||||
|
@ -392,6 +422,12 @@ struct BasicSearch {
|
|||
},
|
||||
[tag_index, this](const AstPrefixNode& prefix) {
|
||||
return CollectPrefixMatches(tag_index, prefix.prefix);
|
||||
},
|
||||
[tag_index, this](const AstSuffixNode& suffix) {
|
||||
return CollectSuffixMatches(tag_index, suffix.suffix);
|
||||
},
|
||||
[tag_index, this](const AstInfixNode& infix) {
|
||||
return CollectInfixMatches(tag_index, infix.infix);
|
||||
}};
|
||||
auto mapping = [ov](const auto& tag) { return visit(ov, tag); };
|
||||
return UnifyResults(GetSubResults(node.tags, mapping), LogicOp::OR);
|
||||
|
|
|
@ -155,7 +155,7 @@ TEST_F(SearchParserTest, Scanner) {
|
|||
|
||||
// Prefix simple
|
||||
SetInput("pre*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "pre*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "pre");
|
||||
|
||||
// TODO: uncomment when we support escaped terms
|
||||
// Prefix escaped (redis doesn't support quoted prefix matches)
|
||||
|
@ -167,7 +167,7 @@ TEST_F(SearchParserTest, Scanner) {
|
|||
NEXT_EQ(TOK_FIELD, string, "@color");
|
||||
NEXT_TOK(TOK_COLON);
|
||||
NEXT_TOK(TOK_LCURLBR);
|
||||
NEXT_EQ(TOK_PREFIX, string, "prefix*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "prefix");
|
||||
NEXT_TOK(TOK_RCURLBR);
|
||||
|
||||
// Prefix escaped star
|
||||
|
@ -196,28 +196,28 @@ TEST_F(SearchParserTest, EscapedTagPrefixes) {
|
|||
NEXT_EQ(TOK_FIELD, string, "@name");
|
||||
NEXT_TOK(TOK_COLON);
|
||||
NEXT_TOK(TOK_LCURLBR);
|
||||
NEXT_EQ(TOK_PREFIX, string, "escape-err*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "escape-err");
|
||||
NEXT_TOK(TOK_RCURLBR);
|
||||
|
||||
SetInput("@name:{escape\\+pre*}");
|
||||
NEXT_EQ(TOK_FIELD, string, "@name");
|
||||
NEXT_TOK(TOK_COLON);
|
||||
NEXT_TOK(TOK_LCURLBR);
|
||||
NEXT_EQ(TOK_PREFIX, string, "escape+pre*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "escape+pre");
|
||||
NEXT_TOK(TOK_RCURLBR);
|
||||
|
||||
SetInput("@name:{escape\\.pre*}");
|
||||
NEXT_EQ(TOK_FIELD, string, "@name");
|
||||
NEXT_TOK(TOK_COLON);
|
||||
NEXT_TOK(TOK_LCURLBR);
|
||||
NEXT_EQ(TOK_PREFIX, string, "escape.pre*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "escape.pre");
|
||||
NEXT_TOK(TOK_RCURLBR);
|
||||
|
||||
SetInput("@name:{complex\\-escape\\+with\\.many\\*chars*}");
|
||||
NEXT_EQ(TOK_FIELD, string, "@name");
|
||||
NEXT_TOK(TOK_COLON);
|
||||
NEXT_TOK(TOK_LCURLBR);
|
||||
NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars*");
|
||||
NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars");
|
||||
NEXT_TOK(TOK_RCURLBR);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue