mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
feat: track update and delete for search (#1325)
Automatically update search indices --------- Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
parent
2a5fd79856
commit
f97dbad6f9
15 changed files with 223 additions and 28 deletions
|
@ -22,6 +22,7 @@ using DocId = uint32_t;
|
|||
struct BaseIndex {
|
||||
virtual ~BaseIndex() = default;
|
||||
virtual void Add(DocId doc, std::string_view value) = 0;
|
||||
virtual void Remove(DocId doc, std::string_view value) = 0;
|
||||
};
|
||||
|
||||
} // namespace dfly::search
|
||||
|
|
|
@ -21,7 +21,7 @@ using namespace std;
|
|||
namespace {
|
||||
|
||||
// Get all words from text as matched by regex word boundaries
|
||||
vector<string> GetWords(string_view text) {
|
||||
absl::flat_hash_set<string> Tokenize(string_view text) {
|
||||
std::regex rx{"\\b.*?\\b", std::regex_constants::icase};
|
||||
std::cregex_iterator begin{text.data(), text.data() + text.size(), rx}, end{};
|
||||
|
||||
|
@ -31,8 +31,19 @@ vector<string> GetWords(string_view text) {
|
|||
absl::AsciiStrToLower(&word);
|
||||
words.insert(move(word));
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
return vector<string>{make_move_iterator(words.begin()), make_move_iterator(words.end())};
|
||||
// Split taglist, remove duplicates and convert all to lowercase
|
||||
absl::flat_hash_set<string> NormalizeTags(string_view taglist) {
|
||||
string tmp;
|
||||
absl::flat_hash_set<string> tags;
|
||||
for (string_view tag : absl::StrSplit(taglist, ',')) {
|
||||
tmp = absl::StripAsciiWhitespace(tag);
|
||||
absl::AsciiStrToLower(&tmp);
|
||||
tags.insert(move(tmp));
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
}; // namespace
|
||||
|
@ -43,9 +54,15 @@ void NumericIndex::Add(DocId doc, string_view value) {
|
|||
entries_.emplace(num, doc);
|
||||
}
|
||||
|
||||
void NumericIndex::Remove(DocId doc, string_view value) {
|
||||
int64_t num;
|
||||
if (absl::SimpleAtoi(value, &num))
|
||||
entries_.erase({num, doc});
|
||||
}
|
||||
|
||||
vector<DocId> NumericIndex::Range(int64_t l, int64_t r) const {
|
||||
auto it_l = entries_.lower_bound(l);
|
||||
auto it_r = entries_.lower_bound(r + 1);
|
||||
auto it_l = entries_.lower_bound({l, 0});
|
||||
auto it_r = entries_.lower_bound({r + 1, 0});
|
||||
|
||||
vector<DocId> out;
|
||||
for (auto it = it_l; it != it_r; ++it)
|
||||
|
@ -61,18 +78,35 @@ const vector<DocId>* BaseStringIndex::Matching(string_view str) const {
|
|||
}
|
||||
|
||||
void TextIndex::Add(DocId doc, string_view value) {
|
||||
for (const auto& word : GetWords(value)) {
|
||||
for (const auto& word : Tokenize(value)) {
|
||||
auto& list = entries_[word];
|
||||
list.insert(upper_bound(list.begin(), list.end(), doc), doc);
|
||||
}
|
||||
}
|
||||
|
||||
void TextIndex::Remove(DocId doc, string_view value) {
|
||||
for (const auto& word : Tokenize(value)) {
|
||||
auto& list = entries_[word];
|
||||
auto it = lower_bound(list.begin(), list.end(), doc);
|
||||
if (it != list.end() && *it == doc)
|
||||
list.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void TagIndex::Add(DocId doc, string_view value) {
|
||||
auto tags = absl::StrSplit(value, ',');
|
||||
for (string_view tag : tags) {
|
||||
auto& list = entries_[absl::StripAsciiWhitespace(tag)];
|
||||
for (auto& tag : NormalizeTags(value)) {
|
||||
auto& list = entries_[tag];
|
||||
list.insert(upper_bound(list.begin(), list.end(), doc), doc);
|
||||
}
|
||||
}
|
||||
|
||||
void TagIndex::Remove(DocId doc, string_view value) {
|
||||
for (auto& tag : NormalizeTags(value)) {
|
||||
auto& list = entries_[tag];
|
||||
auto it = lower_bound(list.begin(), list.end(), doc);
|
||||
if (it != list.end() && *it == doc)
|
||||
list.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace dfly::search
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include <absl/container/btree_map.h>
|
||||
#include <absl/container/btree_set.h>
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
|
||||
#include <map>
|
||||
|
@ -17,11 +17,12 @@ namespace dfly::search {
|
|||
// Range bounds are queried in logarithmic time, iteration is constant.
|
||||
struct NumericIndex : public BaseIndex {
|
||||
void Add(DocId doc, std::string_view value) override;
|
||||
void Remove(DocId doc, std::string_view value) override;
|
||||
|
||||
std::vector<DocId> Range(int64_t l, int64_t r) const;
|
||||
|
||||
private:
|
||||
absl::btree_multimap<int64_t, DocId> entries_;
|
||||
absl::btree_set<std::pair<int64_t, DocId>> entries_;
|
||||
};
|
||||
|
||||
// Base index for string based indices.
|
||||
|
@ -37,12 +38,14 @@ struct BaseStringIndex : public BaseIndex {
|
|||
// Hashmap based lookup per word.
|
||||
struct TextIndex : public BaseStringIndex {
|
||||
void Add(DocId doc, std::string_view value) override;
|
||||
void Remove(DocId doc, std::string_view value) override;
|
||||
};
|
||||
|
||||
// Index for text fields.
|
||||
// Hashmap based lookup per word.
|
||||
struct TagIndex : public BaseStringIndex {
|
||||
void Add(DocId doc, std::string_view value) override;
|
||||
void Remove(DocId doc, std::string_view value) override;
|
||||
};
|
||||
|
||||
} // namespace dfly::search
|
||||
|
|
|
@ -221,8 +221,16 @@ void FieldIndices::Add(DocId doc, DocumentAccessor* access) {
|
|||
for (auto& [field, index] : indices_) {
|
||||
index->Add(doc, access->Get(field));
|
||||
}
|
||||
all_ids_.push_back(doc);
|
||||
sort(all_ids_.begin(), all_ids_.end());
|
||||
all_ids_.insert(upper_bound(all_ids_.begin(), all_ids_.end(), doc), doc);
|
||||
}
|
||||
|
||||
void FieldIndices::Remove(DocId doc, DocumentAccessor* access) {
|
||||
for (auto& [field, index] : indices_) {
|
||||
index->Remove(doc, access->Get(field));
|
||||
}
|
||||
auto it = lower_bound(all_ids_.begin(), all_ids_.end(), doc);
|
||||
CHECK(it != all_ids_.end() && *it == doc);
|
||||
all_ids_.erase(it);
|
||||
}
|
||||
|
||||
BaseIndex* FieldIndices::GetIndex(string_view field) const {
|
||||
|
|
|
@ -38,6 +38,7 @@ class FieldIndices {
|
|||
FieldIndices(Schema schema);
|
||||
|
||||
void Add(DocId doc, DocumentAccessor* access);
|
||||
void Remove(DocId doc, DocumentAccessor* access);
|
||||
|
||||
BaseIndex* GetIndex(std::string_view field) const;
|
||||
std::vector<TextIndex*> GetAllTextIndices() const;
|
||||
|
|
|
@ -496,6 +496,14 @@ bool DbSlice::Del(DbIndex db_ind, PrimeIterator it) {
|
|||
CHECK_EQ(1u, db->mcflag.Erase(it->first));
|
||||
}
|
||||
|
||||
auto obj_type = it->second.ObjType();
|
||||
if (doc_del_cb_ && (obj_type == OBJ_JSON || obj_type == OBJ_HASH)) {
|
||||
string tmp;
|
||||
string_view key = it->first.GetSlice(&tmp);
|
||||
DbContext cntx{db_ind, GetCurrentTimeMs()};
|
||||
doc_del_cb_(key, cntx, it->second);
|
||||
}
|
||||
|
||||
PerformDeletion(it, shard_owner(), db.get());
|
||||
|
||||
return true;
|
||||
|
@ -814,6 +822,7 @@ void DbSlice::PreUpdate(DbIndex db_ind, PrimeIterator it) {
|
|||
for (const auto& ccb : change_cb_) {
|
||||
ccb.second(db_ind, ChangeReq{it});
|
||||
}
|
||||
|
||||
size_t value_heap_size = it->second.MallocUsed();
|
||||
auto* stats = MutableStats(db_ind);
|
||||
stats->obj_memory_usage -= value_heap_size;
|
||||
|
@ -1130,4 +1139,8 @@ void DbSlice::InvalidateSlotWatches(const SlotSet& slot_ids) {
|
|||
}
|
||||
}
|
||||
|
||||
void DbSlice::SetDocDeletionCallback(DocDeletionCallback ddcb) {
|
||||
doc_del_cb_ = move(ddcb);
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -84,6 +84,10 @@ class DbSlice {
|
|||
}
|
||||
};
|
||||
|
||||
// Called before deleting an element to notify the search indices.
|
||||
using DocDeletionCallback =
|
||||
std::function<void(std::string_view, const Context&, const PrimeValue& pv)>;
|
||||
|
||||
struct ExpireParams {
|
||||
int64_t value = INT64_MIN; // undefined
|
||||
|
||||
|
@ -312,6 +316,8 @@ class DbSlice {
|
|||
// Unregisted all watched key entries for connection.
|
||||
void UnregisterConnectionWatches(ConnectionState::ExecInfo* exec_info);
|
||||
|
||||
void SetDocDeletionCallback(DocDeletionCallback ddcb);
|
||||
|
||||
private:
|
||||
std::pair<PrimeIterator, bool> AddOrUpdateInternal(const Context& cntx, std::string_view key,
|
||||
PrimeValue obj, uint64_t expire_at_ms,
|
||||
|
@ -354,6 +360,9 @@ class DbSlice {
|
|||
|
||||
// ordered from the smallest to largest version.
|
||||
std::vector<std::pair<uint64_t, ChangeCallback>> change_cb_;
|
||||
|
||||
// Registered by shard indices on when first document index is created.
|
||||
DocDeletionCallback doc_del_cb_;
|
||||
};
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -18,6 +18,7 @@ extern "C" {
|
|||
#include "server/conn_context.h"
|
||||
#include "server/container_utils.h"
|
||||
#include "server/engine_shard_set.h"
|
||||
#include "server/search/doc_index.h"
|
||||
#include "server/transaction.h"
|
||||
|
||||
using namespace std;
|
||||
|
@ -171,6 +172,7 @@ OpStatus OpIncrBy(const OpArgs& op_args, string_view key, string_view field, Inc
|
|||
if (pv.ObjType() != OBJ_HASH)
|
||||
return OpStatus::WRONG_TYPE;
|
||||
|
||||
op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, it->second);
|
||||
db_slice.PreUpdate(op_args.db_cntx.db_index, it);
|
||||
|
||||
if (pv.Encoding() == kEncodingListPack) {
|
||||
|
@ -248,6 +250,7 @@ OpStatus OpIncrBy(const OpArgs& op_args, string_view key, string_view field, Inc
|
|||
}
|
||||
|
||||
db_slice.PostUpdate(op_args.db_cntx.db_index, it, key);
|
||||
op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, pv);
|
||||
|
||||
return OpStatus::OK;
|
||||
}
|
||||
|
@ -331,7 +334,9 @@ OpResult<uint32_t> OpDel(const OpArgs& op_args, string_view key, CmdArgList valu
|
|||
if (!it_res)
|
||||
return it_res.status();
|
||||
|
||||
op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, (*it_res)->second);
|
||||
db_slice.PreUpdate(op_args.db_cntx.db_index, *it_res);
|
||||
|
||||
PrimeValue& pv = (*it_res)->second;
|
||||
unsigned deleted = 0;
|
||||
bool key_remove = false;
|
||||
|
@ -370,6 +375,10 @@ OpResult<uint32_t> OpDel(const OpArgs& op_args, string_view key, CmdArgList valu
|
|||
}
|
||||
|
||||
db_slice.PostUpdate(op_args.db_cntx.db_index, *it_res, key);
|
||||
|
||||
if (!key_remove)
|
||||
op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, pv);
|
||||
|
||||
if (key_remove) {
|
||||
if (enc == kEncodingListPack) {
|
||||
stats->listpack_blob_cnt--;
|
||||
|
@ -623,6 +632,7 @@ OpResult<uint32_t> OpSet(const OpArgs& op_args, string_view key, CmdArgList valu
|
|||
if (pv.ObjType() != OBJ_HASH)
|
||||
return OpStatus::WRONG_TYPE;
|
||||
|
||||
op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, it->second);
|
||||
db_slice.PreUpdate(op_args.db_cntx.db_index, it);
|
||||
}
|
||||
|
||||
|
@ -663,7 +673,9 @@ OpResult<uint32_t> OpSet(const OpArgs& op_args, string_view key, CmdArgList valu
|
|||
created += unsigned(added);
|
||||
}
|
||||
}
|
||||
|
||||
db_slice.PostUpdate(op_args.db_cntx.db_index, it, key);
|
||||
op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, pv);
|
||||
|
||||
return created;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ extern "C" {
|
|||
#include "server/command_registry.h"
|
||||
#include "server/error.h"
|
||||
#include "server/journal/journal.h"
|
||||
#include "server/search/doc_index.h"
|
||||
#include "server/tiered_storage.h"
|
||||
#include "server/transaction.h"
|
||||
|
||||
|
@ -51,9 +52,14 @@ void SetJson(const OpArgs& op_args, string_view key, JsonType&& value) {
|
|||
auto& db_slice = op_args.shard->db_slice();
|
||||
DbIndex db_index = op_args.db_cntx.db_index;
|
||||
auto [it_output, added] = db_slice.AddOrFind(op_args.db_cntx, key);
|
||||
|
||||
op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, it_output->second);
|
||||
db_slice.PreUpdate(db_index, it_output);
|
||||
|
||||
it_output->second.SetJson(std::move(value));
|
||||
|
||||
db_slice.PostUpdate(db_index, it_output, key);
|
||||
op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, it_output->second);
|
||||
}
|
||||
|
||||
string JsonTypeToName(const JsonType& val) {
|
||||
|
@ -139,6 +145,8 @@ OpStatus UpdateEntry(const OpArgs& op_args, std::string_view key, std::string_vi
|
|||
DCHECK(json_val) << "should have a valid JSON object for key '" << key << "' the type for it is '"
|
||||
<< entry_it->second.ObjType() << "'";
|
||||
JsonType& json_entry = *json_val;
|
||||
|
||||
op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, entry_it->second);
|
||||
db_slice.PreUpdate(db_index, entry_it);
|
||||
|
||||
// Run the update operation on this entry
|
||||
|
@ -152,6 +160,7 @@ OpStatus UpdateEntry(const OpArgs& op_args, std::string_view key, std::string_vi
|
|||
OpStatus res = verify_op(json_entry);
|
||||
if (res == OpStatus::OK) {
|
||||
db_slice.PostUpdate(db_index, entry_it, key);
|
||||
op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, entry_it->second);
|
||||
}
|
||||
|
||||
return res;
|
||||
|
|
|
@ -74,7 +74,7 @@ SearchDocData JsonAccessor::Serialize() const {
|
|||
return out;
|
||||
}
|
||||
|
||||
unique_ptr<BaseAccessor> GetAccessor(const OpArgs& op_args, const PrimeValue& pv) {
|
||||
unique_ptr<BaseAccessor> GetAccessor(const DbContext& db_cntx, const PrimeValue& pv) {
|
||||
DCHECK(pv.ObjType() == OBJ_HASH || pv.ObjType() == OBJ_JSON);
|
||||
|
||||
if (pv.ObjType() == OBJ_JSON) {
|
||||
|
@ -86,7 +86,7 @@ unique_ptr<BaseAccessor> GetAccessor(const OpArgs& op_args, const PrimeValue& pv
|
|||
auto ptr = reinterpret_cast<ListPackAccessor::LpPtr>(pv.RObjPtr());
|
||||
return make_unique<ListPackAccessor>(ptr);
|
||||
} else {
|
||||
auto* sm = container_utils::GetStringMap(pv, op_args.db_cntx);
|
||||
auto* sm = container_utils::GetStringMap(pv, db_cntx);
|
||||
return make_unique<StringMapAccessor>(sm);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -65,6 +65,6 @@ struct JsonAccessor : public BaseAccessor {
|
|||
};
|
||||
|
||||
// Get accessor for value
|
||||
std::unique_ptr<BaseAccessor> GetAccessor(const OpArgs& op_args, const PrimeValue& pv);
|
||||
std::unique_ptr<BaseAccessor> GetAccessor(const DbContext& db_cntx, const PrimeValue& pv);
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -34,7 +34,7 @@ void TraverseAllMatching(const DocIndex& index, const OpArgs& op_args, F&& f) {
|
|||
if (key.rfind(index.prefix, 0) != 0)
|
||||
return;
|
||||
|
||||
auto accessor = GetAccessor(op_args, pv);
|
||||
auto accessor = GetAccessor(op_args.db_cntx, pv);
|
||||
f(key, accessor.get());
|
||||
};
|
||||
|
||||
|
@ -64,13 +64,15 @@ ShardDocIndex::DocId ShardDocIndex::DocKeyIndex::Add(string_view key) {
|
|||
return id;
|
||||
}
|
||||
|
||||
void ShardDocIndex::DocKeyIndex::Delete(string_view key) {
|
||||
ShardDocIndex::DocId ShardDocIndex::DocKeyIndex::Remove(string_view key) {
|
||||
DCHECK_GT(ids_.count(key), 0u);
|
||||
|
||||
DocId id = ids_.find(key)->second;
|
||||
keys_[id] = "";
|
||||
ids_.erase(key);
|
||||
free_ids_.push_back(id);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
string_view ShardDocIndex::DocKeyIndex::Get(DocId id) const {
|
||||
|
@ -84,6 +86,10 @@ uint8_t DocIndex::GetObjCode() const {
|
|||
return type == JSON ? OBJ_JSON : OBJ_HASH;
|
||||
}
|
||||
|
||||
bool DocIndex::Matches(string_view key, unsigned obj_code) const {
|
||||
return obj_code == GetObjCode() && key.rfind(prefix, 0) == 0;
|
||||
}
|
||||
|
||||
ShardDocIndex::ShardDocIndex(shared_ptr<DocIndex> index)
|
||||
: base_{index}, indices_{index->schema}, key_index_{} {
|
||||
}
|
||||
|
@ -93,6 +99,21 @@ void ShardDocIndex::Init(const OpArgs& op_args) {
|
|||
TraverseAllMatching(*base_, op_args, cb);
|
||||
}
|
||||
|
||||
void ShardDocIndex::AddDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {
|
||||
auto accessor = GetAccessor(db_cntx, pv);
|
||||
indices_.Add(key_index_.Add(key), accessor.get());
|
||||
}
|
||||
|
||||
void ShardDocIndex::RemoveDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {
|
||||
auto accessor = GetAccessor(db_cntx, pv);
|
||||
DocId id = key_index_.Remove(key);
|
||||
indices_.Remove(id, accessor.get());
|
||||
}
|
||||
|
||||
bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const {
|
||||
return base_->Matches(key, obj_code);
|
||||
}
|
||||
|
||||
SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,
|
||||
search::SearchAlgorithm* search_algo) const {
|
||||
auto& db_slice = op_args.shard->db_slice();
|
||||
|
@ -104,7 +125,7 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
|
|||
auto key = key_index_.Get(doc);
|
||||
auto it = db_slice.Find(op_args.db_cntx, key, base_->GetObjCode());
|
||||
CHECK(it) << "Expected key: " << key << " to exist";
|
||||
auto doc_access = GetAccessor(op_args, (*it)->second);
|
||||
auto doc_access = GetAccessor(op_args.db_cntx, (*it)->second);
|
||||
out.emplace_back(key, doc_access->Serialize());
|
||||
|
||||
// Scoring is not implemented yet, so we take just the first documents
|
||||
|
@ -115,16 +136,35 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
|
|||
return SearchResult{std::move(out), doc_ids.size()};
|
||||
}
|
||||
|
||||
ShardDocIndex* ShardDocIndices::Get(string_view name) const {
|
||||
ShardDocIndex* ShardDocIndices::GetIndex(string_view name) {
|
||||
auto it = indices_.find(name);
|
||||
return it != indices_.end() ? it->second.get() : nullptr;
|
||||
}
|
||||
|
||||
void ShardDocIndices::Init(const OpArgs& op_args, std::string_view name,
|
||||
shared_ptr<DocIndex> index_ptr) {
|
||||
void ShardDocIndices::InitIndex(const OpArgs& op_args, std::string_view name,
|
||||
shared_ptr<DocIndex> index_ptr) {
|
||||
auto shard_index = make_unique<ShardDocIndex>(index_ptr);
|
||||
auto [it, _] = indices_.emplace(name, move(shard_index));
|
||||
it->second->Init(op_args);
|
||||
|
||||
op_args.shard->db_slice().SetDocDeletionCallback(
|
||||
[this](string_view key, const DbContext& cntx, const PrimeValue& pv) {
|
||||
RemoveDoc(key, cntx, pv);
|
||||
});
|
||||
}
|
||||
|
||||
void ShardDocIndices::AddDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {
|
||||
for (auto& [_, index] : indices_) {
|
||||
if (index->Matches(key, pv.ObjType()))
|
||||
index->AddDoc(key, db_cntx, pv);
|
||||
}
|
||||
}
|
||||
|
||||
void ShardDocIndices::RemoveDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {
|
||||
for (auto& [_, index] : indices_) {
|
||||
if (index->Matches(key, pv.ObjType()))
|
||||
index->RemoveDoc(key, db_cntx, pv);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "core/search/search.h"
|
||||
#include "server/common.h"
|
||||
#include "server/table.h"
|
||||
|
||||
namespace dfly {
|
||||
|
||||
|
@ -38,6 +39,9 @@ struct DocIndex {
|
|||
// Get numeric OBJ_ code
|
||||
uint8_t GetObjCode() const;
|
||||
|
||||
// Return true if the following document (key, obj_code) is tracked by this index.
|
||||
bool Matches(std::string_view key, unsigned obj_code) const;
|
||||
|
||||
search::Schema schema;
|
||||
std::string prefix{};
|
||||
DataType type{HASH};
|
||||
|
@ -50,7 +54,7 @@ class ShardDocIndex {
|
|||
// DocKeyIndex manages mapping document keys to ids and vice versa through a simple interface.
|
||||
struct DocKeyIndex {
|
||||
DocId Add(std::string_view key);
|
||||
void Delete(std::string_view key);
|
||||
DocId Remove(std::string_view key);
|
||||
std::string_view Get(DocId id) const;
|
||||
|
||||
private:
|
||||
|
@ -70,6 +74,12 @@ class ShardDocIndex {
|
|||
// Initialize index. Traverses all matching documents and assigns ids.
|
||||
void Init(const OpArgs& op_args);
|
||||
|
||||
// Return whether base index matches
|
||||
bool Matches(std::string_view key, unsigned obj_code) const;
|
||||
|
||||
void AddDoc(std::string_view key, const DbContext& db_cntx, const PrimeValue& pv);
|
||||
void RemoveDoc(std::string_view key, const DbContext& db_cntx, const PrimeValue& pv);
|
||||
|
||||
private:
|
||||
std::shared_ptr<const DocIndex> base_;
|
||||
search::FieldIndices indices_;
|
||||
|
@ -79,8 +89,13 @@ class ShardDocIndex {
|
|||
// Stores shard doc indices by name on a specific shard.
|
||||
class ShardDocIndices {
|
||||
public:
|
||||
ShardDocIndex* Get(std::string_view name) const;
|
||||
void Init(const OpArgs& op_args, std::string_view name, std::shared_ptr<DocIndex> index);
|
||||
// Get sharded document index by its name
|
||||
ShardDocIndex* GetIndex(std::string_view name);
|
||||
// Init index: create shard local state for given index with given name
|
||||
void InitIndex(const OpArgs& op_args, std::string_view name, std::shared_ptr<DocIndex> index);
|
||||
|
||||
void AddDoc(std::string_view key, const DbContext& db_cnt, const PrimeValue& pv);
|
||||
void RemoveDoc(std::string_view key, const DbContext& db_cnt, const PrimeValue& pv);
|
||||
|
||||
private:
|
||||
absl::flat_hash_map<std::string, std::unique_ptr<ShardDocIndex>> indices_;
|
||||
|
|
|
@ -146,7 +146,7 @@ void SearchFamily::FtCreate(CmdArgList args, ConnectionContext* cntx) {
|
|||
|
||||
auto idx_ptr = make_shared<DocIndex>(move(index));
|
||||
cntx->transaction->ScheduleSingleHop([idx_name, idx_ptr](auto* tx, auto* es) {
|
||||
es->search_indices()->Init(tx->GetOpArgs(es), idx_name, idx_ptr);
|
||||
es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, idx_ptr);
|
||||
return OpStatus::OK;
|
||||
});
|
||||
|
||||
|
@ -170,7 +170,7 @@ void SearchFamily::FtSearch(CmdArgList args, ConnectionContext* cntx) {
|
|||
vector<SearchResult> docs(shard_set->size());
|
||||
|
||||
cntx->transaction->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {
|
||||
if (auto* index = es->search_indices()->Get(index_name); index)
|
||||
if (auto* index = es->search_indices()->GetIndex(index_name); index)
|
||||
docs[es->shard_id()] = index->Search(t->GetOpArgs(es), *params, &search_algo);
|
||||
else
|
||||
index_not_found.store(true, memory_order_relaxed);
|
||||
|
|
|
@ -72,8 +72,7 @@ TEST_F(SearchFamilyTest, Simple) {
|
|||
Run({"hset", "d:2", "foo", "bar", "k", "v"});
|
||||
Run({"hset", "d:3", "foo", "bad", "k", "v"});
|
||||
|
||||
EXPECT_EQ(Run({"ft.create", "i1", "ON", "HASH", "PREFIX", "1", "d:", "SCHEMA", "foo", "TEXT", "k",
|
||||
"TEXT"}),
|
||||
EXPECT_EQ(Run({"ft.create", "i1", "PREFIX", "1", "d:", "SCHEMA", "foo", "TEXT", "k", "TEXT"}),
|
||||
"OK");
|
||||
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@foo:bar"}), AreDocIds("d:2"));
|
||||
|
@ -209,4 +208,55 @@ TEST_F(SearchFamilyTest, TestLimit) {
|
|||
EXPECT_THAT(resp, ArrLen(3 * 2 + 1));
|
||||
}
|
||||
|
||||
TEST_F(SearchFamilyTest, SimpleUpdates) {
|
||||
EXPECT_EQ(Run({"ft.create", "i1", "schema", "title", "text", "visits", "numeric"}), "OK");
|
||||
|
||||
Run({"hset", "d:1", "title", "Dragonfly article", "visits", "100"});
|
||||
Run({"hset", "d:2", "title", "Butterfly observations", "visits", "50"});
|
||||
Run({"hset", "d:3", "title", "Bumblebee studies", "visits", "30"});
|
||||
|
||||
// Check values above were added to the index
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "article | observations | studies"}),
|
||||
AreDocIds("d:1", "d:2", "d:3"));
|
||||
|
||||
// Update title - text value
|
||||
{
|
||||
Run({"hset", "d:2", "title", "Butterfly studies"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "observations"}), kNoResults);
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "studies"}), AreDocIds("d:2", "d:3"));
|
||||
|
||||
Run({"hset", "d:1", "title", "Upcoming Dragonfly presentation"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "article"}), kNoResults);
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "upcoming presentation"}), AreDocIds("d:1"));
|
||||
|
||||
Run({"hset", "d:3", "title", "Secret bumblebee research"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "studies"}), AreDocIds("d:2"));
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "secret research"}), AreDocIds("d:3"));
|
||||
}
|
||||
|
||||
// Update visits - numeric value
|
||||
{
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@visits:[50 1000]"}), AreDocIds("d:1", "d:2"));
|
||||
|
||||
Run({"hset", "d:3", "visits", "75"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@visits:[0 49]"}), kNoResults);
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@visits:[50 1000]"}), AreDocIds("d:1", "d:2", "d:3"));
|
||||
|
||||
Run({"hset", "d:1", "visits", "125"});
|
||||
Run({"hset", "d:2", "visits", "150"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@visits:[100 1000]"}), AreDocIds("d:1", "d:2"));
|
||||
|
||||
Run({"hset", "d:3", "visits", "175"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@visits:[0 100]"}), kNoResults);
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "@visits:[150 1000]"}), AreDocIds("d:2", "d:3"));
|
||||
}
|
||||
|
||||
// Delete documents
|
||||
{
|
||||
Run({"del", "d:2", "d:3"});
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "dragonfly"}), AreDocIds("d:1"));
|
||||
EXPECT_THAT(Run({"ft.search", "i1", "butterfly | bumblebee"}), kNoResults);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue