feat: implement FT.TAGVALS (#3493)

* feat: implement FT.TAGVALS

Fixes #3491

---------

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-08-11 22:53:25 +03:00 committed by GitHub
parent 766b0ec86f
commit 29a13893c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 86 additions and 2 deletions

View file

@ -149,6 +149,15 @@ void BaseStringIndex<C>::Remove(DocId id, DocumentAccessor* doc, string_view fie
}
}
template <typename C> vector<string> BaseStringIndex<C>::GetTerms() const {
vector<string> res;
res.reserve(entries_.size());
for (const auto& [term, _] : entries_) {
res.push_back(string{term});
}
return res;
}
template struct BaseStringIndex<CompressedSortedSet>;
template struct BaseStringIndex<SortedVector>;

View file

@ -51,6 +51,9 @@ template <typename C> struct BaseStringIndex : public BaseIndex {
// Pointer is valid as long as index is not mutated. Nullptr if not found
const Container* Matching(std::string_view str) const;
// Returns all the terms that appear as keys in the reverse index.
std::vector<std::string> GetTerms() const;
protected:
Container* GetOrCreate(std::string_view word);

View file

@ -39,5 +39,6 @@ extern const char kInvalidDumpValueErr[];
extern const char kSyntaxErrType[];
extern const char kScriptErrType[];
extern const char kConfigErrType[];
extern const char kSearchErrType[];
} // namespace facade

View file

@ -101,6 +101,7 @@ const char kInvalidDumpValueErr[] = "DUMP payload version or checksum are wrong"
const char kSyntaxErrType[] = "syntax_error";
const char kScriptErrType[] = "script_error";
const char kConfigErrType[] = "config_error";
const char kSearchErrType[] = "search_error";
const char* RespExpr::TypeName(Type t) {
switch (t) {

View file

@ -10,6 +10,7 @@
#include "base/logging.h"
#include "core/overloaded.h"
#include "core/search/indices.h"
#include "server/engine_shard_set.h"
#include "server/search/doc_accessors.h"
#include "server/server_state.h"
@ -17,6 +18,8 @@
namespace dfly {
using namespace std;
using facade::ErrorReply;
using nonstd::make_unexpected;
namespace {
@ -269,6 +272,20 @@ DocIndexInfo ShardDocIndex::GetInfo() const {
return {*base_, key_index_.Size()};
}
io::Result<StringVec, ErrorReply> ShardDocIndex::GetTagVals(string_view field) const {
search::BaseIndex* base_index = indices_.GetIndex(field);
if (base_index == nullptr) {
return make_unexpected(ErrorReply{"-No such field"});
}
search::TagIndex* tag_index = dynamic_cast<search::TagIndex*>(base_index);
if (tag_index == nullptr) {
return make_unexpected(ErrorReply{"-Not a tag field"});
}
return tag_index->GetTerms();
}
ShardDocIndices::ShardDocIndices() : local_mr_{ServerState::tlocal()->data_heap()} {
}

View file

@ -138,6 +138,8 @@ class ShardDocIndex {
DocIndexInfo GetInfo() const;
io::Result<StringVec, facade::ErrorReply> GetTagVals(std::string_view field) const;
private:
// Clears internal data. Traverses all matching documents and assigns ids.
void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr);

View file

@ -798,6 +798,41 @@ void SearchFamily::FtProfile(CmdArgList args, ConnectionContext* cntx) {
}
}
void SearchFamily::FtTagVals(CmdArgList args, ConnectionContext* cntx) {
string_view index_name = ArgS(args, 0);
string_view field_name = ArgS(args, 1);
VLOG(1) << "FtTagVals: " << index_name << " " << field_name;
vector<io::Result<StringVec, ErrorReply>> shard_results(shard_set->size(), StringVec{});
cntx->transaction->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {
if (auto* index = es->search_indices()->GetIndex(index_name); index)
shard_results[es->shard_id()] = index->GetTagVals(field_name);
else
shard_results[es->shard_id()] = nonstd::make_unexpected(ErrorReply("-Unknown Index name"));
return OpStatus::OK;
});
absl::flat_hash_set<string> result_set;
// Check first if either shard had errors. Also merge the results into a single set.
for (auto& res : shard_results) {
if (res) {
result_set.insert(make_move_iterator(res->begin()), make_move_iterator(res->end()));
} else {
res.error().kind = facade::kSearchErrType;
return cntx->SendError(res.error());
}
}
shard_results.clear();
vector<string> vec(result_set.begin(), result_set.end());
auto* rb = static_cast<RedisReplyBuilder*>(cntx->reply_builder());
rb->SendStringArr(vec, RedisReplyBuilder::SET);
}
void SearchFamily::FtAggregate(CmdArgList args, ConnectionContext* cntx) {
const auto params = ParseAggregatorParamsOrReply(args, cntx);
if (!params)
@ -871,7 +906,8 @@ void SearchFamily::Register(CommandRegistry* registry) {
<< CI{"FT._LIST", kReadOnlyMask, 1, 0, 0, acl::FT_SEARCH}.HFUNC(FtList)
<< CI{"FT.SEARCH", kReadOnlyMask, -3, 0, 0, acl::FT_SEARCH}.HFUNC(FtSearch)
<< CI{"FT.AGGREGATE", kReadOnlyMask, -3, 0, 0, acl::FT_SEARCH}.HFUNC(FtAggregate)
<< CI{"FT.PROFILE", kReadOnlyMask, -4, 0, 0, acl::FT_SEARCH}.HFUNC(FtProfile);
<< CI{"FT.PROFILE", kReadOnlyMask, -4, 0, 0, acl::FT_SEARCH}.HFUNC(FtProfile)
<< CI{"FT.TAGVALS", kReadOnlyMask, 3, 0, 0, acl::FT_SEARCH}.HFUNC(FtTagVals);
}
} // namespace dfly

View file

@ -22,6 +22,7 @@ class SearchFamily {
static void FtSearch(CmdArgList args, ConnectionContext* cntx);
static void FtProfile(CmdArgList args, ConnectionContext* cntx);
static void FtAggregate(CmdArgList args, ConnectionContext* cntx);
static void FtTagVals(CmdArgList args, ConnectionContext* cntx);
public:
static void Register(CommandRegistry* registry);

View file

@ -310,7 +310,13 @@ TEST_F(SearchFamilyTest, Tags) {
Run({"hset", "d:5", "color", "green"});
Run({"hset", "d:6", "color", "blue"});
EXPECT_EQ(Run({"ft.create", "i1", "on", "hash", "schema", "color", "tag"}), "OK");
EXPECT_EQ(Run({"ft.create", "i1", "on", "hash", "schema", "color", "tag", "dummy", "numeric"}),
"OK");
EXPECT_THAT(Run({"ft.tagvals", "i2", "color"}), ErrArg("Unknown Index name"));
EXPECT_THAT(Run({"ft.tagvals", "i1", "foo"}), ErrArg("No such field"));
EXPECT_THAT(Run({"ft.tagvals", "i1", "dummy"}), ErrArg("Not a tag field"));
auto resp = Run({"ft.tagvals", "i1", "color"});
ASSERT_THAT(resp, IsUnordArray("red", "blue", "green"));
// Tags don't participate in full text search
EXPECT_THAT(Run({"ft.search", "i1", "red"}), kNoResults);
@ -323,6 +329,14 @@ TEST_F(SearchFamilyTest, Tags) {
AreDocIds("d:1", "d:2", "d:3", "d:4", "d:5"));
EXPECT_THAT(Run({"ft.search", "i1", "@color:{blue | green}"}),
AreDocIds("d:1", "d:2", "d:3", "d:5", "d:6"));
EXPECT_EQ(Run({"ft.create", "i2", "on", "hash", "schema", "c1", "as", "c2", "tag"}), "OK");
// TODO: there is a discrepancy here between redis stack and Dragonfly,
// we accept the original field when it has alias, while redis stack does not.
//
// EXPECT_THAT(Run({"ft.tagvals", "i2", "c1"}), ErrArg("No such field"));
EXPECT_THAT(Run({"ft.tagvals", "i2", "c2"}), ArrLen(0));
}
TEST_F(SearchFamilyTest, TagOptions) {