This commit is contained in:
Volodymyr Yavdoshenko 2025-05-09 15:58:08 +00:00 committed by GitHub
commit 20088040eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 183 additions and 86 deletions

View file

@ -16,13 +16,6 @@ using namespace std;
namespace dfly::search {
AstTermNode::AstTermNode(string term) : term{std::move(term)} {
}
AstPrefixNode::AstPrefixNode(string prefix) : prefix{std::move(prefix)} {
this->prefix.pop_back();
}
AstRangeNode::AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl)
: lo{lo_excl ? nextafter(lo, hi) : lo}, hi{hi_excl ? nextafter(hi, lo) : hi} {
}

View file

@ -12,6 +12,7 @@
#include <vector>
#include "core/search/base.h"
#include "core/search/tag_types.h"
namespace dfly {
@ -25,18 +26,17 @@ struct AstStarNode {};
// Matches all documents where this field has a non-null value
struct AstStarFieldNode {};
// Matches terms in text fields
struct AstTermNode {
explicit AstTermNode(std::string term);
template <TagType T> struct AstAffixNode {
explicit AstAffixNode(std::string affix) : affix{std::move(affix)} {
}
std::string term;
std::string affix;
};
struct AstPrefixNode {
explicit AstPrefixNode(std::string prefix);
std::string prefix;
};
using AstTermNode = AstAffixNode<TagType::REGULAR>;
using AstPrefixNode = AstAffixNode<TagType::PREFIX>;
using AstSuffixNode = AstAffixNode<TagType::SUFFIX>;
using AstInfixNode = AstAffixNode<TagType::INFIX>;
// Matches numeric range
struct AstRangeNode {
@ -73,7 +73,7 @@ struct AstFieldNode {
// Stores a list of tags for a tag query
struct AstTagsNode {
using TagValue = std::variant<AstTermNode, AstPrefixNode>;
using TagValue = std::variant<AstTermNode, AstPrefixNode, AstSuffixNode, AstInfixNode>;
struct TagValueProxy
: public AstTagsNode::TagValue { // bison needs it to be default constructible
@ -83,6 +83,10 @@ struct AstTagsNode {
}
TagValueProxy(AstTermNode tv) : AstTagsNode::TagValue(std::move(tv)) {
}
TagValueProxy(AstSuffixNode tv) : AstTagsNode::TagValue(std::move(tv)) {
}
TagValueProxy(AstInfixNode tv) : AstTagsNode::TagValue(std::move(tv)) {
}
};
AstTagsNode(TagValue);
@ -111,9 +115,10 @@ struct AstKnnNode {
std::optional<float> ef_runtime;
};
using NodeVariants = std::variant<std::monostate, AstStarNode, AstStarFieldNode, AstTermNode,
AstPrefixNode, AstRangeNode, AstNegateNode, AstLogicalNode,
AstFieldNode, AstTagsNode, AstKnnNode>;
using NodeVariants =
std::variant<std::monostate, AstStarNode, AstStarFieldNode, AstTermNode, AstPrefixNode,
AstSuffixNode, AstInfixNode, AstRangeNode, AstNegateNode, AstLogicalNode,
AstFieldNode, AstTagsNode, AstKnnNode>;
struct AstNode : public NodeVariants {
using variant::variant;

View file

@ -1,6 +1,7 @@
%top{
// Our lexer need to know about Parser::symbol_type
#include "core/search/parser.hh"
#include "core/search/tag_types.h" // Include TagType enum
}
%{
@ -25,18 +26,19 @@
// A number symbol corresponding to the value in S.
using dfly::search::Parser;
using namespace std;
using dfly::search::TagType;
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc);
Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc);
Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc);
%}
dq \"
sq \'
esc_chars ['"\?\\abfnrtv]
esc_seq \\{esc_chars}
term_char \w
tag_val_char {term_char}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]
asterisk_char \*
dq \"
sq \'
esc_chars ['"\?\\abfnrtv]
esc_seq \\{esc_chars}
term_ch \w
tag_val_ch {term_ch}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]
astrsk_ch \*
%{
@ -67,21 +69,25 @@ asterisk_char \*
"AS" return Parser::make_AS (loc());
"EF_RUNTIME" return Parser::make_EF_RUNTIME (loc());
[0-9]{1,9} return Parser::make_UINT32(str(), loc());
[+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc());
[0-9]{1,9} return Parser::make_UINT32(str(), loc());
[+-]?(([0-9]*[.])?[0-9]+|inf) return Parser::make_DOUBLE(str(), loc());
{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc());
{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc());
{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc());
{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc());
"$"{term_char}+ return ParseParam(str(), loc());
"@"{term_char}+ return Parser::make_FIELD(str(), loc());
{term_char}+{asterisk_char} return Parser::make_PREFIX(str(), loc());
"$"{term_ch}+ return ParseParam(str(), loc());
"@"{term_ch}+ return Parser::make_FIELD(str(), loc());
{term_ch}+{astrsk_ch} return Parser::make_PREFIX(string{matched_view(0, 1)}, loc());
{astrsk_ch}{term_ch}+ return Parser::make_SUFFIX(string{matched_view(1, 0)}, loc());
{astrsk_ch}{term_ch}+{astrsk_ch} return Parser::make_INFIX(string{matched_view(1, 1)}, loc());
{term_char}+ return Parser::make_TERM(str(), loc());
{tag_val_char}+{asterisk_char} return make_TagVal(str(), true, loc());
{tag_val_char}+ return make_TagVal(str(), false, loc());
{term_ch}+ return Parser::make_TERM(str(), loc());
{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::PREFIX, loc());
{astrsk_ch}{tag_val_ch}+ return make_Tag(str(), TagType::SUFFIX, loc());
{astrsk_ch}{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::INFIX, loc());
{tag_val_ch}+ return make_Tag(str(), TagType::REGULAR, loc());
<<EOF>> return Parser::make_YYEOF(loc());
<<EOF>> return Parser::make_YYEOF(loc());
%%
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc) {
@ -92,14 +98,20 @@ Parser::symbol_type make_StringLit(string_view src, const Parser::location_type&
return Parser::make_TERM(res, loc);
}
Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::location_type& loc) {
Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc) {
string res;
res.reserve(src.size());
bool escaped = false;
size_t len = is_prefix ? src.size() - 1 : src.size(); // Exclude the '*' at the end for prefix
// Determine processing boundaries
size_t start = (type == TagType::SUFFIX || type == TagType::INFIX) ? 1 : 0;
size_t end = src.size();
if (type == TagType::PREFIX || type == TagType::INFIX) {
end--; // Skip the last '*' character
}
for (size_t i = 0; i < len; ++i) {
// Handle escaping
bool escaped = false;
for (size_t i = start; i < end; ++i) {
if (escaped) {
escaped = false;
} else if (src[i] == '\\') {
@ -109,11 +121,16 @@ Parser::symbol_type make_TagVal(string_view src, bool is_prefix, const Parser::l
res.push_back(src[i]);
}
// Add '*' back for prefix
if (is_prefix) {
res.push_back('*');
return Parser::make_PREFIX(res, loc);
// Return the appropriate token type
switch (type) {
case TagType::PREFIX:
return Parser::make_PREFIX(res, loc);
case TagType::SUFFIX:
return Parser::make_SUFFIX(res, loc);
case TagType::INFIX:
return Parser::make_INFIX(res, loc);
case TagType::REGULAR:
default:
return Parser::make_TAG_VAL(res, loc);
}
return Parser::make_TAG_VAL(res, loc);
}

View file

@ -67,7 +67,7 @@ double toDouble(string_view src);
// Needed 0 at the end to satisfy bison 3.5.1
%token YYEOF 0
%token <std::string> TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix"
%token <std::string> TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix" SUFFIX "suffix" INFIX "infix"
%precedence TERM TAG_VAL
%left OR_OP
@ -134,24 +134,26 @@ search_or_expr:
| search_expr OR_OP search_unary_expr { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }
search_unary_expr:
LPAREN search_expr RPAREN { $$ = std::move($2); }
LPAREN search_expr RPAREN { $$ = std::move($2); }
| NOT_OP search_unary_expr { $$ = AstNegateNode(std::move($2)); }
| TERM { $$ = AstTermNode(std::move($1)); }
| TERM { $$ = AstTermNode(std::move($1)); }
| PREFIX { $$ = AstPrefixNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
| SUFFIX { $$ = AstSuffixNode(std::move($1)); }
| INFIX { $$ = AstInfixNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
| FIELD COLON field_cond { $$ = AstFieldNode(std::move($1), std::move($3)); }
field_cond:
TERM { $$ = AstTermNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
| STAR { $$ = AstStarFieldNode(); }
TERM { $$ = AstTermNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
| STAR { $$ = AstStarFieldNode(); }
| NOT_OP field_cond { $$ = AstNegateNode(std::move($2)); }
| LPAREN field_cond_expr RPAREN { $$ = std::move($2); }
| LBRACKET numeric_filter_expr RBRACKET { $$ = std::move($2); }
| LCURLBR tag_list RCURLBR { $$ = std::move($2); }
numeric_filter_expr:
opt_lparen generic_number opt_lparen generic_number { $$ = AstRangeNode($2, $1, $4, $3); }
opt_lparen generic_number opt_lparen generic_number { $$ = AstRangeNode($2, $1, $4, $3); }
| opt_lparen generic_number COMMA opt_lparen generic_number { $$ = AstRangeNode($2, $1, $5, $4); }
generic_number:
@ -163,9 +165,9 @@ opt_lparen:
| LPAREN { $$ = true; }
field_cond_expr:
field_unary_expr { $$ = std::move($1); }
| field_and_expr { $$ = std::move($1); }
| field_or_expr { $$ = std::move($1); }
field_unary_expr { $$ = std::move($1); }
| field_and_expr { $$ = std::move($1); }
| field_or_expr { $$ = std::move($1); }
field_and_expr:
field_unary_expr field_unary_expr %prec AND_OP { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); }
@ -176,21 +178,23 @@ field_or_expr:
| field_cond_expr OR_OP field_and_expr { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }
field_unary_expr:
LPAREN field_cond_expr RPAREN { $$ = std::move($2); }
| NOT_OP field_unary_expr { $$ = AstNegateNode(std::move($2)); }
| TERM { $$ = AstTermNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
LPAREN field_cond_expr RPAREN { $$ = std::move($2); }
| NOT_OP field_unary_expr { $$ = AstNegateNode(std::move($2)); }
| TERM { $$ = AstTermNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
tag_list:
tag_list_element { $$ = AstTagsNode(std::move($1)); }
tag_list_element { $$ = AstTagsNode(std::move($1)); }
| tag_list OR_OP tag_list_element { $$ = AstTagsNode(std::move($1), std::move($3)); }
tag_list_element:
TERM { $$ = AstTermNode(std::move($1)); }
TERM { $$ = AstTermNode(std::move($1)); }
| PREFIX { $$ = AstPrefixNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
| DOUBLE { $$ = AstTermNode(std::move($1)); }
| TAG_VAL { $$ = AstTermNode(std::move($1)); }
| SUFFIX { $$ = AstSuffixNode(std::move($1)); }
| INFIX { $$ = AstInfixNode(std::move($1)); }
| UINT32 { $$ = AstTermNode(std::move($1)); }
| DOUBLE { $$ = AstTermNode(std::move($1)); }
| TAG_VAL { $$ = AstTermNode(std::move($1)); }
%%

View file

@ -118,10 +118,16 @@ struct ProfileBuilder {
string GetNodeInfo(const AstNode& node) {
struct NodeFormatter {
void operator()(std::string* out, const AstPrefixNode& node) const {
out->append(node.prefix);
out->append(node.affix);
}
void operator()(std::string* out, const AstSuffixNode& node) const {
out->append(node.affix);
}
void operator()(std::string* out, const AstInfixNode& node) const {
out->append(node.affix);
}
void operator()(std::string* out, const AstTermNode& node) const {
out->append(node.term);
out->append(node.affix);
}
void operator()(std::string* out, const AstTagsNode::TagValue& value) const {
visit([this, out](const auto& n) { this->operator()(out, n); }, value);
@ -129,8 +135,10 @@ struct ProfileBuilder {
};
Overloaded node_info{
[](monostate) -> string { return ""s; },
[](const AstTermNode& n) { return absl::StrCat("Term{", n.term, "}"); },
[](const AstPrefixNode& n) { return absl::StrCat("Prefix{", n.prefix, "}"); },
[](const AstTermNode& n) { return absl::StrCat("Term{", n.affix, "}"); },
[](const AstPrefixNode& n) { return absl::StrCat("Prefix{", n.affix, "}"); },
[](const AstSuffixNode& n) { return absl::StrCat("Suffix{", n.affix, "}"); },
[](const AstInfixNode& n) { return absl::StrCat("Infix{", n.affix, "}"); },
[](const AstRangeNode& n) { return absl::StrCat("Range{", n.lo, "<>", n.hi, "}"); },
[](const AstLogicalNode& n) {
auto op = n.op == AstLogicalNode::AND ? "and" : "or";
@ -268,6 +276,20 @@ struct BasicSearch {
return result;
}
template <typename C>
IndexResult CollectSuffixMatches(BaseStringIndex<C>* index, std::string_view suffix) {
// TODO: Implement full text search for suffix
error_ = "Not implemented";
return IndexResult{};
}
template <typename C>
IndexResult CollectInfixMatches(BaseStringIndex<C>* index, std::string_view infix) {
// TODO: Implement full text search for infix
error_ = "Not implemented";
return IndexResult{};
}
IndexResult Search(monostate, string_view) {
return vector<DocId>{};
}
@ -279,7 +301,7 @@ struct BasicSearch {
// "term": access field's text index or unify results from all text indices if no field is set
IndexResult Search(const AstTermNode& node, string_view active_field) {
std::string term = node.term;
std::string term = node.affix;
bool strip_whitespace = true;
if (auto synonyms = indices_->GetSynonyms(); synonyms) {
@ -341,11 +363,23 @@ struct BasicSearch {
}
auto mapping = [&node, this](TextIndex* index) {
return CollectPrefixMatches(index, node.prefix);
return CollectPrefixMatches(index, node.affix);
};
return UnifyResults(GetSubResults(indices, mapping), LogicOp::OR);
}
IndexResult Search(const AstSuffixNode& node, string_view active_field) {
// TODO: Implement full text search for suffix
error_ = "Not implemented";
return IndexResult{};
}
IndexResult Search(const AstInfixNode& node, string_view active_field) {
// TODO: Implement full text search for infix
error_ = "Not implemented";
return IndexResult{};
}
// [range]: access field's numeric index
IndexResult Search(const AstRangeNode& node, string_view active_field) {
DCHECK(!active_field.empty());
@ -388,10 +422,16 @@ struct BasicSearch {
return IndexResult{};
Overloaded ov{[tag_index](const AstTermNode& term) -> IndexResult {
return tag_index->Matching(term.term);
return tag_index->Matching(term.affix);
},
[tag_index, this](const AstPrefixNode& prefix) {
return CollectPrefixMatches(tag_index, prefix.prefix);
return CollectPrefixMatches(tag_index, prefix.affix);
},
[tag_index, this](const AstSuffixNode& suffix) {
return CollectSuffixMatches(tag_index, suffix.affix);
},
[tag_index, this](const AstInfixNode& infix) {
return CollectInfixMatches(tag_index, infix.affix);
}};
auto mapping = [ov](const auto& tag) { return visit(ov, tag); };
return UnifyResults(GetSubResults(node.tags, mapping), LogicOp::OR);

View file

@ -155,7 +155,7 @@ TEST_F(SearchParserTest, Scanner) {
// Prefix simple
SetInput("pre*");
NEXT_EQ(TOK_PREFIX, string, "pre*");
NEXT_EQ(TOK_PREFIX, string, "pre");
// TODO: uncomment when we support escaped terms
// Prefix escaped (redis doesn't support quoted prefix matches)
@ -167,7 +167,7 @@ TEST_F(SearchParserTest, Scanner) {
NEXT_EQ(TOK_FIELD, string, "@color");
NEXT_TOK(TOK_COLON);
NEXT_TOK(TOK_LCURLBR);
NEXT_EQ(TOK_PREFIX, string, "prefix*");
NEXT_EQ(TOK_PREFIX, string, "prefix");
NEXT_TOK(TOK_RCURLBR);
// Prefix escaped star
@ -196,28 +196,28 @@ TEST_F(SearchParserTest, EscapedTagPrefixes) {
NEXT_EQ(TOK_FIELD, string, "@name");
NEXT_TOK(TOK_COLON);
NEXT_TOK(TOK_LCURLBR);
NEXT_EQ(TOK_PREFIX, string, "escape-err*");
NEXT_EQ(TOK_PREFIX, string, "escape-err");
NEXT_TOK(TOK_RCURLBR);
SetInput("@name:{escape\\+pre*}");
NEXT_EQ(TOK_FIELD, string, "@name");
NEXT_TOK(TOK_COLON);
NEXT_TOK(TOK_LCURLBR);
NEXT_EQ(TOK_PREFIX, string, "escape+pre*");
NEXT_EQ(TOK_PREFIX, string, "escape+pre");
NEXT_TOK(TOK_RCURLBR);
SetInput("@name:{escape\\.pre*}");
NEXT_EQ(TOK_FIELD, string, "@name");
NEXT_TOK(TOK_COLON);
NEXT_TOK(TOK_LCURLBR);
NEXT_EQ(TOK_PREFIX, string, "escape.pre*");
NEXT_EQ(TOK_PREFIX, string, "escape.pre");
NEXT_TOK(TOK_RCURLBR);
SetInput("@name:{complex\\-escape\\+with\\.many\\*chars*}");
NEXT_EQ(TOK_FIELD, string, "@name");
NEXT_TOK(TOK_COLON);
NEXT_TOK(TOK_LCURLBR);
NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars*");
NEXT_EQ(TOK_PREFIX, string, "complex-escape+with.many*chars");
NEXT_TOK(TOK_RCURLBR);
}
@ -237,9 +237,8 @@ TEST_F(SearchParserTest, Parse) {
EXPECT_EQ(1, Parse(" @foo:@bar "));
EXPECT_EQ(1, Parse(" @foo: "));
// We don't support suffix/any other position for now
EXPECT_EQ(1, Parse("*pre"));
EXPECT_EQ(1, Parse("*pre*"));
EXPECT_EQ(0, Parse("*suffix"));
EXPECT_EQ(0, Parse("*infix"));
EXPECT_EQ(1, Parse("pre***"));
}

View file

@ -872,6 +872,32 @@ TEST_F(SearchTest, InvalidVectorParameter) {
ASSERT_FALSE(algo.Init("*=>[KNN 2 @v $b]", &query_params));
}
TEST_F(SearchTest, NotImplementedSearchTypes) {
auto schema = MakeSimpleSchema({{"title", SchemaField::TEXT}});
FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};
SearchAlgorithm algo{};
QueryParams params;
// Add a document for testing
MockedDocument doc{Map{{"title", "text for search"}}};
indices.Add(0, doc);
// Test suffix search (words ending with "search")
algo.Init("*search", &params);
auto suffix_result = algo.Search(&indices);
EXPECT_TRUE(suffix_result.ids.empty()) << "Suffix search should return empty result";
EXPECT_THAT(suffix_result.error, testing::HasSubstr("Not implemented"))
<< "Suffix search should return a not implemented error";
// Test infix search (words containing "for")
algo.Init("*for*", &params);
auto infix_result = algo.Search(&indices);
EXPECT_TRUE(infix_result.ids.empty()) << "Infix search should return empty result";
EXPECT_THAT(infix_result.error, testing::HasSubstr("Not implemented"))
<< "Infix search should return a not implemented error";
}
} // namespace search
} // namespace dfly

View file

@ -0,0 +1,13 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
namespace dfly {
namespace search {
enum class TagType { PREFIX, SUFFIX, INFIX, REGULAR };
} // namespace search
} // namespace dfly