feat(search): Tag field options (separator, case sensitivity) (#3144)

Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
Vladislav 2024-06-07 22:34:06 +03:00 committed by GitHub
parent 0edcbc80ef
commit 1053633a6a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 103 additions and 23 deletions

View file

@ -111,6 +111,11 @@ string DocIndexInfo::BuildRestoreCommand() const {
absl::StrAppend(out, " ", params.use_hnsw ? "HNSW" : "FLAT", " 6 ", "DIM ", params.dim,
" DISTANCE_METRIC ", sim, " INITIAL_CAP ", params.capacity);
},
[out = &out](const search::SchemaField::TagParams& params) {
absl::StrAppend(out, " ", "SEPARATOR", " ", string{params.separator});
if (params.case_sensitive)
absl::StrAppend(out, " ", "CASESENSITIVE");
},
};
visit(info, finfo.special_params);
}

View file

@ -80,6 +80,32 @@ search::SchemaField::VectorParams ParseVectorParams(CmdArgParser* parser) {
return params;
}
search::SchemaField::TagParams ParseTagParams(CmdArgParser* parser) {
search::SchemaField::TagParams params{};
while (parser->HasNext()) {
if (parser->Check("SEPARATOR").IgnoreCase().ExpectTail(1)) {
string_view separator = parser->Next();
params.separator = separator.front();
continue;
}
if (parser->Check("CASESENSITIVE").IgnoreCase()) {
params.case_sensitive = true;
continue;
}
break;
}
return params;
}
// breaks on ParamsVariant initialization
#ifndef __clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
optional<search::Schema> ParseSchemaOrReply(DocIndex::DataType type, CmdArgParser parser,
ConnectionContext* cntx) {
search::Schema schema;
@ -108,15 +134,18 @@ optional<search::Schema> ParseSchemaOrReply(DocIndex::DataType type, CmdArgParse
return nullopt;
}
// Tag fields include: [separator char] [casesensitive]
// Vector fields include: {algorithm} num_args args...
search::SchemaField::ParamsVariant params = std::monostate{};
if (*type == search::SchemaField::VECTOR) {
search::SchemaField::ParamsVariant params(monostate{});
if (*type == search::SchemaField::TAG) {
params = ParseTagParams(&parser);
} else if (*type == search::SchemaField::VECTOR) {
auto vector_params = ParseVectorParams(&parser);
if (!parser.HasError() && vector_params.dim == 0) {
cntx->SendError("Knn vector dimension cannot be zero");
return nullopt;
}
params = std::move(vector_params);
params = vector_params;
}
// Flags: check for SORTABLE and NOINDEX
@ -154,6 +183,10 @@ optional<search::Schema> ParseSchemaOrReply(DocIndex::DataType type, CmdArgParse
return schema;
}
#ifndef __clang__
#pragma GCC diagnostic pop
#endif
search::QueryParams ParseQueryParams(CmdArgParser* parser) {
search::QueryParams params;
size_t num_args = parser->Next<size_t>();

View file

@ -307,6 +307,21 @@ TEST_F(SearchFamilyTest, Tags) {
AreDocIds("d:1", "d:2", "d:3", "d:5", "d:6"));
}
TEST_F(SearchFamilyTest, TagOptions) {
Run({"hset", "d:1", "color", " red/ green // bLUe "});
Run({"hset", "d:2", "color", "blue /// GReeN "});
Run({"hset", "d:3", "color", "grEEn // yellow //"});
Run({"hset", "d:4", "color", " /blue/green/ "});
EXPECT_EQ(Run({"ft.create", "i1", "on", "hash", "schema", "color", "tag", "casesensitive",
"separator", "/"}),
"OK");
EXPECT_THAT(Run({"ft.search", "i1", "@color:{green}"}), AreDocIds("d:1", "d:4"));
EXPECT_THAT(Run({"ft.search", "i1", "@color:{GReeN}"}), AreDocIds("d:2"));
EXPECT_THAT(Run({"ft.search", "i1", "@color:{blue}"}), AreDocIds("d:2", "d:4"));
}
TEST_F(SearchFamilyTest, Numbers) {
for (unsigned i = 0; i <= 10; i++) {
for (unsigned j = 0; j <= 10; j++) {