chore: get rid of lock keys (#2894)

* chore: get rid of lock keys

1. Introduce LockTag a type representing the part of the key that is used for locking.
2. Hash keys once in each transaction.
3. Expose swap_memory_bytes metric.

---------

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-04-21 11:34:42 +03:00 committed by GitHub
parent 9b9c32c91d
commit 2ff7ff9841
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 136 additions and 133 deletions

View file

@ -69,8 +69,7 @@ struct LockTagOptions {
struct KeyLockArgs { struct KeyLockArgs {
DbIndex db_index = 0; DbIndex db_index = 0;
ArgSlice args; absl::Span<const LockFp> fps;
unsigned key_step = 1;
}; };
// Describes key indices. // Describes key indices.
@ -118,7 +117,7 @@ struct OpArgs {
} }
}; };
// A strong type for a lock tag. Helps to disambiguide between keys and the parts of the // A strong type for a lock tag. Helps to disambiguate between keys and the parts of the
// keys that are used for locking. // keys that are used for locking.
class LockTag { class LockTag {
std::string_view str_; std::string_view str_;

View file

@ -989,70 +989,56 @@ size_t DbSlice::DbSize(DbIndex db_ind) const {
} }
bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) { bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) {
if (lock_args.args.empty()) { // Can be empty for NO_KEY_TRANSACTIONAL commands. if (lock_args.fps.empty()) { // Can be empty for NO_KEY_TRANSACTIONAL commands.
return true; return true;
} }
DCHECK_GT(lock_args.key_step, 0u);
auto& lt = db_arr_[lock_args.db_index]->trans_locks; auto& lt = db_arr_[lock_args.db_index]->trans_locks;
bool lock_acquired = true; bool lock_acquired = true;
if (lock_args.args.size() == 1) { if (lock_args.fps.size() == 1) {
LockTag tag(lock_args.args.front()); lock_acquired = lt.Acquire(lock_args.fps.front(), mode);
lock_acquired = lt.Acquire(tag, mode); uniq_fps_ = {lock_args.fps.front()}; // needed only for tests.
uniq_keys_ = {string_view(tag)}; // needed only for tests.
} else { } else {
uniq_keys_.clear(); uniq_fps_.clear();
for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { for (LockFp fp : lock_args.fps) {
LockTag tag(lock_args.args[i]); if (uniq_fps_.insert(fp).second) {
if (uniq_keys_.insert(string_view(tag)).second) { lock_acquired &= lt.Acquire(fp, mode);
lock_acquired &= lt.Acquire(tag, mode);
} }
} }
} }
DVLOG(2) << "Acquire " << IntentLock::ModeName(mode) << " for " << lock_args.args[0] DVLOG(2) << "Acquire " << IntentLock::ModeName(mode) << " for " << lock_args.fps[0]
<< " has_acquired: " << lock_acquired; << " has_acquired: " << lock_acquired;
return lock_acquired; return lock_acquired;
} }
void DbSlice::ReleaseNormalized(IntentLock::Mode mode, DbIndex db_index, LockTag tag) {
DVLOG(2) << "Release " << IntentLock::ModeName(mode) << " "
<< " for " << string_view(tag);
auto& lt = db_arr_[db_index]->trans_locks;
lt.Release(tag, mode);
}
void DbSlice::Release(IntentLock::Mode mode, const KeyLockArgs& lock_args) { void DbSlice::Release(IntentLock::Mode mode, const KeyLockArgs& lock_args) {
if (lock_args.args.empty()) { // Can be empty for NO_KEY_TRANSACTIONAL commands. if (lock_args.fps.empty()) { // Can be empty for NO_KEY_TRANSACTIONAL commands.
return; return;
} }
DVLOG(2) << "Release " << IntentLock::ModeName(mode) << " for " << lock_args.args[0]; DVLOG(2) << "Release " << IntentLock::ModeName(mode) << " for " << lock_args.fps[0];
if (lock_args.args.size() == 1) { auto& lt = db_arr_[lock_args.db_index]->trans_locks;
string_view key = lock_args.args.front(); if (lock_args.fps.size() == 1) {
ReleaseNormalized(mode, lock_args.db_index, LockTag{key}); uint64_t fp = lock_args.fps.front();
lt.Release(fp, mode);
} else { } else {
auto& lt = db_arr_[lock_args.db_index]->trans_locks; uniq_fps_.clear();
uniq_keys_.clear(); for (LockFp fp : lock_args.fps) {
for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { if (uniq_fps_.insert(fp).second) {
LockTag tag(lock_args.args[i]); lt.Release(fp, mode);
if (uniq_keys_.insert(string_view(tag)).second) {
lt.Release(tag, mode);
} }
} }
} }
uniq_keys_.clear(); uniq_fps_.clear();
} }
bool DbSlice::CheckLock(IntentLock::Mode mode, DbIndex dbid, string_view key) const { bool DbSlice::CheckLock(IntentLock::Mode mode, DbIndex dbid, uint64_t fp) const {
const auto& lt = db_arr_[dbid]->trans_locks; const auto& lt = db_arr_[dbid]->trans_locks;
LockTag tag(key); auto lock = lt.Find(fp);
auto lock = lt.Find(tag);
if (lock) { if (lock) {
return lock->Check(mode); return lock->Check(mode);
} }

View file

@ -362,11 +362,13 @@ class DbSlice {
void OnCbFinish(); void OnCbFinish();
bool Acquire(IntentLock::Mode m, const KeyLockArgs& lock_args); bool Acquire(IntentLock::Mode m, const KeyLockArgs& lock_args);
void Release(IntentLock::Mode m, const KeyLockArgs& lock_args); void Release(IntentLock::Mode m, const KeyLockArgs& lock_args);
// Returns true if the key can be locked under m. Does not lock. // Returns true if the key can be locked under m. Does not lock.
bool CheckLock(IntentLock::Mode m, DbIndex dbid, std::string_view key) const; bool CheckLock(IntentLock::Mode mode, DbIndex dbid, uint64_t fp) const;
bool CheckLock(IntentLock::Mode mode, DbIndex dbid, std::string_view key) const {
return CheckLock(mode, dbid, LockTag(key).Fingerprint());
}
size_t db_array_size() const { size_t db_array_size() const {
return db_arr_.size(); return db_arr_.size();
@ -448,8 +450,8 @@ class DbSlice {
} }
// Test hook to inspect last locked keys. // Test hook to inspect last locked keys.
absl::flat_hash_set<std::string_view> TEST_GetLastLockedKeys() const { const auto& TEST_GetLastLockedFps() const {
return uniq_keys_; return uniq_fps_;
} }
void RegisterWatchedKey(DbIndex db_indx, std::string_view key, void RegisterWatchedKey(DbIndex db_indx, std::string_view key,
@ -477,9 +479,6 @@ class DbSlice {
void PerformDeletion(Iterator del_it, DbTable* table); void PerformDeletion(Iterator del_it, DbTable* table);
void PerformDeletion(PrimeIterator del_it, DbTable* table); void PerformDeletion(PrimeIterator del_it, DbTable* table);
// Releases a single tag.
void ReleaseNormalized(IntentLock::Mode m, DbIndex db_index, LockTag tag);
private: private:
void PreUpdate(DbIndex db_ind, Iterator it); void PreUpdate(DbIndex db_ind, Iterator it);
void PostUpdate(DbIndex db_ind, Iterator it, std::string_view key, size_t orig_size); void PostUpdate(DbIndex db_ind, Iterator it, std::string_view key, size_t orig_size);
@ -552,7 +551,7 @@ class DbSlice {
DbTableArray db_arr_; DbTableArray db_arr_;
// Used in temporary computations in Acquire/Release. // Used in temporary computations in Acquire/Release.
mutable absl::flat_hash_set<std::string_view> uniq_keys_; mutable absl::flat_hash_set<uint64_t> uniq_fps_;
// ordered from the smallest to largest version. // ordered from the smallest to largest version.
std::vector<std::pair<uint64_t, ChangeCallback>> change_cb_; std::vector<std::pair<uint64_t, ChangeCallback>> change_cb_;

View file

@ -174,18 +174,18 @@ class RoundRobinSharder {
}; };
bool HasContendedLocks(ShardId shard_id, Transaction* trx, const DbTable* table) { bool HasContendedLocks(ShardId shard_id, Transaction* trx, const DbTable* table) {
auto is_contended = [table](LockTag tag) { return table->trans_locks.Find(tag)->IsContended(); }; auto is_contended = [table](LockFp fp) { return table->trans_locks.Find(fp)->IsContended(); };
if (trx->IsMulti()) { if (trx->IsMulti()) {
auto keys = trx->GetMultiKeys(); auto fps = trx->GetMultiFps();
for (string_view key : keys) { for (const auto& [sid, fp] : fps) {
if (Shard(key, shard_set->size()) == shard_id && is_contended(LockTag{key})) if (sid == shard_id && is_contended(fp))
return true; return true;
} }
} else { } else {
KeyLockArgs lock_args = trx->GetLockArgs(shard_id); KeyLockArgs lock_args = trx->GetLockArgs(shard_id);
for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { for (size_t i = 0; i < lock_args.fps.size(); ++i) {
if (is_contended(LockTag{lock_args.args[i]})) if (is_contended(lock_args.fps[i]))
return true; return true;
} }
} }

View file

@ -1810,7 +1810,7 @@ optional<bool> StartMultiEval(DbIndex dbid, CmdArgList keys, ScriptMgr::ScriptPa
trans->StartMultiGlobal(dbid); trans->StartMultiGlobal(dbid);
return true; return true;
case Transaction::LOCK_AHEAD: case Transaction::LOCK_AHEAD:
trans->StartMultiLockedAhead(dbid, CmdArgVec{keys.begin(), keys.end()}); trans->StartMultiLockedAhead(dbid, keys);
return true; return true;
case Transaction::NON_ATOMIC: case Transaction::NON_ATOMIC:
trans->StartMultiNonAtomic(); trans->StartMultiNonAtomic();
@ -2087,9 +2087,10 @@ void StartMultiExec(DbIndex dbid, Transaction* trans, ConnectionState::ExecInfo*
case Transaction::GLOBAL: case Transaction::GLOBAL:
trans->StartMultiGlobal(dbid); trans->StartMultiGlobal(dbid);
break; break;
case Transaction::LOCK_AHEAD: case Transaction::LOCK_AHEAD: {
trans->StartMultiLockedAhead(dbid, CollectAllKeys(exec_info)); auto vec = CollectAllKeys(exec_info);
break; trans->StartMultiLockedAhead(dbid, absl::MakeSpan(vec));
} break;
case Transaction::NON_ATOMIC: case Transaction::NON_ATOMIC:
trans->StartMultiNonAtomic(); trans->StartMultiNonAtomic();
break; break;

View file

@ -1088,6 +1088,8 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
if (sdata_res.has_value()) { if (sdata_res.has_value()) {
size_t rss = sdata_res->vm_rss + sdata_res->hugetlb_pages; size_t rss = sdata_res->vm_rss + sdata_res->hugetlb_pages;
AppendMetricWithoutLabels("used_memory_rss_bytes", "", rss, MetricType::GAUGE, &resp->body()); AppendMetricWithoutLabels("used_memory_rss_bytes", "", rss, MetricType::GAUGE, &resp->body());
AppendMetricWithoutLabels("swap_memory_bytes", "", sdata_res->vm_swap, MetricType::GAUGE,
&resp->body());
} else { } else {
LOG_FIRST_N(ERROR, 10) << "Error fetching /proc/self/status stats. error " LOG_FIRST_N(ERROR, 10) << "Error fetching /proc/self/status stats. error "
<< sdata_res.error().message(); << sdata_res.error().message();

View file

@ -65,16 +65,15 @@ std::optional<const IntentLock> LockTable::Find(LockTag tag) const {
return std::nullopt; return std::nullopt;
} }
bool LockTable::Acquire(LockTag tag, IntentLock::Mode mode) { std::optional<const IntentLock> LockTable::Find(uint64_t fp) const {
LockFp fp = tag.Fingerprint(); if (auto it = locks_.find(fp); it != locks_.end())
auto [it, inserted] = locks_.try_emplace(fp); return it->second;
return it->second.Acquire(mode); return std::nullopt;
} }
void LockTable::Release(LockTag tag, IntentLock::Mode mode) { void LockTable::Release(uint64_t fp, IntentLock::Mode mode) {
LockFp fp = tag.Fingerprint();
auto it = locks_.find(fp); auto it = locks_.find(fp);
DCHECK(it != locks_.end()) << string_view(tag); DCHECK(it != locks_.end()) << fp;
it->second.Release(mode); it->second.Release(mode);
if (it->second.IsFree()) if (it->second.IsFree())

View file

@ -87,8 +87,11 @@ class LockTable {
std::optional<const IntentLock> Find(LockTag tag) const; std::optional<const IntentLock> Find(LockTag tag) const;
std::optional<const IntentLock> Find(LockFp fp) const; std::optional<const IntentLock> Find(LockFp fp) const;
bool Acquire(LockTag tag, IntentLock::Mode mode); bool Acquire(LockFp fp, IntentLock::Mode mode) {
void Release(LockTag tag, IntentLock::Mode mode); return locks_[fp].Acquire(mode);
}
void Release(LockFp fp, IntentLock::Mode mode);
auto begin() const { auto begin() const {
return locks_.cbegin(); return locks_.cbegin();

View file

@ -628,9 +628,9 @@ vector<string> BaseFamilyTest::StrArray(const RespExpr& expr) {
return res; return res;
} }
absl::flat_hash_set<string> BaseFamilyTest::GetLastUsedKeys() { vector<LockFp> BaseFamilyTest::GetLastFps() {
fb2::Mutex mu; fb2::Mutex mu;
absl::flat_hash_set<string> result; vector<LockFp> result;
auto add_keys = [&](ProactorBase* proactor) { auto add_keys = [&](ProactorBase* proactor) {
EngineShard* shard = EngineShard::tlocal(); EngineShard* shard = EngineShard::tlocal();
@ -639,8 +639,8 @@ absl::flat_hash_set<string> BaseFamilyTest::GetLastUsedKeys() {
} }
lock_guard lk(mu); lock_guard lk(mu);
for (string_view key : shard->db_slice().TEST_GetLastLockedKeys()) { for (auto fp : shard->db_slice().TEST_GetLastLockedFps()) {
result.insert(string(key)); result.push_back(fp);
} }
}; };
shard_set->pool()->AwaitFiberOnAll(add_keys); shard_set->pool()->AwaitFiberOnAll(add_keys);
@ -677,13 +677,15 @@ fb2::Fiber BaseFamilyTest::ExpectConditionWithSuspension(const std::function<boo
} }
util::fb2::Fiber BaseFamilyTest::ExpectUsedKeys(const std::vector<std::string_view>& keys) { util::fb2::Fiber BaseFamilyTest::ExpectUsedKeys(const std::vector<std::string_view>& keys) {
absl::flat_hash_set<string> own_keys; vector<LockFp> key_fps;
for (const auto& k : keys) { for (const auto& k : keys) {
own_keys.insert(string(k)); key_fps.push_back(LockTag(k).Fingerprint());
} }
sort(key_fps.begin(), key_fps.end());
auto cb = [=] { auto cb = [=] {
auto last_keys = GetLastUsedKeys(); auto last_fps = GetLastFps();
return last_keys == own_keys; sort(last_fps.begin(), last_fps.end());
return last_fps == key_fps;
}; };
return ExpectConditionWithSuspension(std::move(cb)); return ExpectConditionWithSuspension(std::move(cb));

View file

@ -144,7 +144,7 @@ class BaseFamilyTest : public ::testing::Test {
const facade::Connection::InvalidationMessage& GetInvalidationMessage(std::string_view conn_id, const facade::Connection::InvalidationMessage& GetInvalidationMessage(std::string_view conn_id,
size_t index) const; size_t index) const;
static absl::flat_hash_set<std::string> GetLastUsedKeys(); static std::vector<LockFp> GetLastFps();
static void ExpectConditionWithinTimeout(const std::function<bool()>& condition, static void ExpectConditionWithinTimeout(const std::function<bool()>& condition,
absl::Duration timeout = absl::Seconds(10)); absl::Duration timeout = absl::Seconds(10));
util::fb2::Fiber ExpectConditionWithSuspension(const std::function<bool()>& condition); util::fb2::Fiber ExpectConditionWithSuspension(const std::function<bool()>& condition);

View file

@ -83,9 +83,8 @@ uint16_t trans_id(const Transaction* ptr) {
} }
bool CheckLocks(const DbSlice& db_slice, IntentLock::Mode mode, const KeyLockArgs& lock_args) { bool CheckLocks(const DbSlice& db_slice, IntentLock::Mode mode, const KeyLockArgs& lock_args) {
for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { for (LockFp fp : lock_args.fps) {
string_view s = lock_args.args[i]; if (!db_slice.CheckLock(mode, lock_args.db_index, fp))
if (!db_slice.CheckLock(mode, lock_args.db_index, s))
return false; return false;
} }
return true; return true;
@ -206,8 +205,9 @@ void Transaction::BuildShardIndex(const KeyIndex& key_index, std::vector<PerShar
string_view key = ArgS(args, i); string_view key = ArgS(args, i);
unique_slot_checker_.Add(key); unique_slot_checker_.Add(key);
uint32_t sid = Shard(key, shard_data_.size()); uint32_t sid = Shard(key, shard_data_.size());
add(sid, i); shard_index[sid].key_step = key_index.step;
add(sid, i);
DCHECK_LE(key_index.step, 2u); DCHECK_LE(key_index.step, 2u);
if (key_index.step == 2) { // Handle value associated with preceding key. if (key_index.step == 2) { // Handle value associated with preceding key.
add(sid, ++i); add(sid, ++i);
@ -218,6 +218,9 @@ void Transaction::BuildShardIndex(const KeyIndex& key_index, std::vector<PerShar
void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args, void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args,
bool rev_mapping) { bool rev_mapping) {
kv_args_.reserve(num_args); kv_args_.reserve(num_args);
DCHECK(kv_fp_.empty());
kv_fp_.reserve(num_args);
if (rev_mapping) if (rev_mapping)
reverse_index_.reserve(num_args); reverse_index_.reserve(num_args);
@ -229,6 +232,8 @@ void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, siz
sd.arg_count = si.args.size(); sd.arg_count = si.args.size();
sd.arg_start = kv_args_.size(); sd.arg_start = kv_args_.size();
sd.fp_start = kv_fp_.size();
sd.fp_count = 0;
// Multi transactions can re-initialize on different shards, so clear ACTIVE flag. // Multi transactions can re-initialize on different shards, so clear ACTIVE flag.
DCHECK_EQ(sd.local_mask & ACTIVE, 0); DCHECK_EQ(sd.local_mask & ACTIVE, 0);
@ -242,7 +247,12 @@ void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, siz
unique_shard_id_ = i; unique_shard_id_ = i;
for (size_t j = 0; j < si.args.size(); ++j) { for (size_t j = 0; j < si.args.size(); ++j) {
kv_args_.push_back(si.args[j]); string_view arg = si.args[j];
kv_args_.push_back(arg);
if (si.key_step == 1 || j % si.key_step == 0) {
kv_fp_.push_back(LockTag(arg).Fingerprint());
sd.fp_count++;
}
if (rev_mapping) if (rev_mapping)
reverse_index_.push_back(si.original_index[j]); reverse_index_.push_back(si.original_index[j]);
} }
@ -251,38 +261,34 @@ void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, siz
DCHECK_EQ(kv_args_.size(), num_args); DCHECK_EQ(kv_args_.size(), num_args);
} }
void Transaction::LaunderKeyStorage(CmdArgVec* keys) { void Transaction::PrepareMultiFps(CmdArgList keys) {
DCHECK_EQ(multi_->mode, LOCK_AHEAD); DCHECK_EQ(multi_->mode, LOCK_AHEAD);
DCHECK_GT(keys->size(), 0u); DCHECK_GT(keys.size(), 0u);
auto& m_keys = multi_->frozen_keys; auto& tag_fps = multi_->tag_fps;
auto& m_keys_set = multi_->frozen_keys_set;
// Reserve enough space, so pointers from frozen_keys_set are not invalidated tag_fps.reserve(keys.size());
m_keys.reserve(keys->size()); for (MutableSlice key : keys) {
string_view sv = facade::ToSV(key);
for (MutableSlice key : *keys) { ShardId sid = Shard(sv, shard_set->size());
string_view key_s = string_view(LockTag{facade::ToSV(key)}); tag_fps.emplace(sid, LockTag(sv).Fingerprint());
// Insert copied string view, not original. This is why "try insert" is not allowed
if (!m_keys_set.contains(key_s))
m_keys_set.insert(m_keys.emplace_back(key_s));
} }
// Copy mutable pointers into keys
keys->clear();
for (string& key : m_keys)
keys->emplace_back(key.data(), key.size());
} }
void Transaction::StoreKeysInArgs(const KeyIndex& key_index) { void Transaction::StoreKeysInArgs(const KeyIndex& key_index) {
DCHECK(!key_index.bonus); DCHECK(!key_index.bonus);
DCHECK(key_index.step == 1u || key_index.step == 2u); DCHECK(key_index.step == 1u || key_index.step == 2u);
DCHECK(kv_fp_.empty());
// even for a single key we may have multiple arguments per key (MSET). // even for a single key we may have multiple arguments per key (MSET).
for (unsigned j = key_index.start; j < key_index.end; j++) { for (unsigned j = key_index.start; j < key_index.end; j++) {
kv_args_.push_back(ArgS(full_args_, j)); string_view arg = ArgS(full_args_, j);
if (key_index.step == 2) kv_args_.push_back(arg);
kv_fp_.push_back(LockTag(arg).Fingerprint());
if (key_index.step == 2) {
kv_args_.push_back(ArgS(full_args_, ++j)); kv_args_.push_back(ArgS(full_args_, ++j));
}
} }
if (key_index.has_reverse_mapping) { if (key_index.has_reverse_mapping) {
@ -339,7 +345,7 @@ void Transaction::InitByKeys(const KeyIndex& key_index) {
// Initialize shard data based on distributed arguments. // Initialize shard data based on distributed arguments.
InitShardData(shard_index, key_index.num_args(), key_index.has_reverse_mapping); InitShardData(shard_index, key_index.num_args(), key_index.has_reverse_mapping);
DCHECK(!multi_ || multi_->mode != LOCK_AHEAD || !multi_->frozen_keys.empty()); DCHECK(!multi_ || multi_->mode != LOCK_AHEAD || !multi_->tag_fps.empty());
DVLOG(1) << "InitByArgs " << DebugId() << " " << kv_args_.front(); DVLOG(1) << "InitByArgs " << DebugId() << " " << kv_args_.front();
@ -394,6 +400,7 @@ OpStatus Transaction::InitByArgs(DbIndex index, CmdArgList args) {
DCHECK_EQ(unique_shard_cnt_, 0u); DCHECK_EQ(unique_shard_cnt_, 0u);
DCHECK(kv_args_.empty()); DCHECK(kv_args_.empty());
DCHECK(kv_fp_.empty());
OpResult<KeyIndex> key_index = DetermineKeys(cid_, args); OpResult<KeyIndex> key_index = DetermineKeys(cid_, args);
if (!key_index) if (!key_index)
@ -442,7 +449,7 @@ void Transaction::StartMultiGlobal(DbIndex dbid) {
ScheduleInternal(); ScheduleInternal();
} }
void Transaction::StartMultiLockedAhead(DbIndex dbid, CmdArgVec keys, bool skip_scheduling) { void Transaction::StartMultiLockedAhead(DbIndex dbid, CmdArgList keys, bool skip_scheduling) {
DVLOG(1) << "StartMultiLockedAhead on " << keys.size() << " keys"; DVLOG(1) << "StartMultiLockedAhead on " << keys.size() << " keys";
DCHECK(multi_); DCHECK(multi_);
@ -451,9 +458,9 @@ void Transaction::StartMultiLockedAhead(DbIndex dbid, CmdArgVec keys, bool skip_
multi_->mode = LOCK_AHEAD; multi_->mode = LOCK_AHEAD;
multi_->lock_mode = LockMode(); multi_->lock_mode = LockMode();
LaunderKeyStorage(&keys); // Filter uniques and normalize PrepareMultiFps(keys);
InitBase(dbid, absl::MakeSpan(keys)); InitBase(dbid, keys);
InitByKeys(KeyIndex::Range(0, keys.size())); InitByKeys(KeyIndex::Range(0, keys.size()));
if (!skip_scheduling) if (!skip_scheduling)
@ -482,6 +489,7 @@ void Transaction::MultiSwitchCmd(const CommandId* cid) {
unique_shard_cnt_ = 0; unique_shard_cnt_ = 0;
kv_args_.clear(); kv_args_.clear();
kv_fp_.clear();
reverse_index_.clear(); reverse_index_.clear();
cid_ = cid; cid_ = cid;
@ -632,7 +640,6 @@ bool Transaction::RunInShard(EngineShard* shard, bool txq_ooo) {
// of the queue and notify the next one. // of the queue and notify the next one.
if (auto* bcontroller = shard->blocking_controller(); bcontroller) { if (auto* bcontroller = shard->blocking_controller(); bcontroller) {
if (awaked_prerun || was_suspended) { if (awaked_prerun || was_suspended) {
CHECK_EQ(largs.key_step, 1u);
bcontroller->FinalizeWatched(GetShardArgs(idx), this); bcontroller->FinalizeWatched(GetShardArgs(idx), this);
} }
@ -796,12 +803,9 @@ void Transaction::UnlockMulti() {
if ((coordinator_state_ & COORD_SCHED) == 0 || (coordinator_state_ & COORD_CONCLUDING) > 0) if ((coordinator_state_ & COORD_SCHED) == 0 || (coordinator_state_ & COORD_CONCLUDING) > 0)
return; return;
multi_->frozen_keys_set.clear(); vector<vector<LockFp>> sharded_keys(shard_set->size());
for (const auto& [sid, fp] : multi_->tag_fps) {
auto sharded_keys = make_shared<vector<vector<string_view>>>(shard_set->size()); sharded_keys[sid].emplace_back(fp);
for (string& key : multi_->frozen_keys) {
ShardId sid = Shard(key, sharded_keys->size());
(*sharded_keys)[sid].emplace_back(key);
} }
multi_->shard_journal_cnt = ServerState::tlocal()->journal() ? CalcMultiNumOfShardJournals() : 0; multi_->shard_journal_cnt = ServerState::tlocal()->journal() ? CalcMultiNumOfShardJournals() : 0;
@ -810,8 +814,9 @@ void Transaction::UnlockMulti() {
DCHECK_EQ(shard_data_.size(), shard_set->size()); DCHECK_EQ(shard_data_.size(), shard_set->size());
for (ShardId i = 0; i < shard_data_.size(); ++i) { for (ShardId i = 0; i < shard_data_.size(); ++i) {
shard_set->Add(i, [this, sharded_keys, i]() { vector<LockFp> fps = std::move(sharded_keys[i]);
this->UnlockMultiShardCb((*sharded_keys)[i], EngineShard::tlocal()); shard_set->Add(i, [this, fps = std::move(fps)]() {
this->UnlockMultiShardCb(fps, EngineShard::tlocal());
intrusive_ptr_release(this); intrusive_ptr_release(this);
}); });
} }
@ -935,9 +940,9 @@ void Transaction::Refurbish() {
cb_ptr_ = nullptr; cb_ptr_ = nullptr;
} }
const absl::flat_hash_set<std::string_view>& Transaction::GetMultiKeys() const { const absl::flat_hash_set<std::pair<ShardId, LockFp>>& Transaction::GetMultiFps() const {
DCHECK(multi_); DCHECK(multi_);
return multi_->frozen_keys_set; return multi_->tag_fps;
} }
void Transaction::FIX_ConcludeJournalExec() { void Transaction::FIX_ConcludeJournalExec() {
@ -1011,10 +1016,14 @@ optional<SlotId> Transaction::GetUniqueSlotId() const {
KeyLockArgs Transaction::GetLockArgs(ShardId sid) const { KeyLockArgs Transaction::GetLockArgs(ShardId sid) const {
KeyLockArgs res; KeyLockArgs res;
res.db_index = db_index_; res.db_index = db_index_;
res.key_step = cid_->opt_mask() & CO::INTERLEAVED_KEYS ? 2 : 1;
res.args = GetShardArgs(sid);
DCHECK(!res.args.empty() || (cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL));
if (unique_shard_cnt_ == 1) {
res.fps = {kv_fp_.data(), kv_fp_.size()};
} else {
const auto& sd = shard_data_[sid];
DCHECK_LE(sd.fp_start + sd.fp_count, kv_fp_.size());
res.fps = {kv_fp_.data() + sd.fp_start, sd.fp_count};
}
return res; return res;
} }
@ -1159,7 +1168,7 @@ bool Transaction::CancelShardCb(EngineShard* shard) {
} else { } else {
auto lock_args = GetLockArgs(shard->shard_id()); auto lock_args = GetLockArgs(shard->shard_id());
DCHECK(sd.local_mask & KEYLOCK_ACQUIRED); DCHECK(sd.local_mask & KEYLOCK_ACQUIRED);
DCHECK(!lock_args.args.empty()); DCHECK(!lock_args.fps.empty());
shard->db_slice().Release(LockMode(), lock_args); shard->db_slice().Release(LockMode(), lock_args);
sd.local_mask &= ~KEYLOCK_ACQUIRED; sd.local_mask &= ~KEYLOCK_ACQUIRED;
} }
@ -1296,8 +1305,7 @@ void Transaction::MultiReportJournalOnShard(EngineShard* shard) const {
} }
} }
void Transaction::UnlockMultiShardCb(absl::Span<const std::string_view> sharded_keys, void Transaction::UnlockMultiShardCb(absl::Span<const LockFp> fps, EngineShard* shard) {
EngineShard* shard) {
DCHECK(multi_ && multi_->lock_mode); DCHECK(multi_ && multi_->lock_mode);
MultiReportJournalOnShard(shard); MultiReportJournalOnShard(shard);
@ -1305,9 +1313,7 @@ void Transaction::UnlockMultiShardCb(absl::Span<const std::string_view> sharded_
if (multi_->mode == GLOBAL) { if (multi_->mode == GLOBAL) {
shard->shard_lock()->Release(IntentLock::EXCLUSIVE); shard->shard_lock()->Release(IntentLock::EXCLUSIVE);
} else { } else {
for (const auto& key : sharded_keys) { shard->db_slice().Release(*multi_->lock_mode, KeyLockArgs{db_index_, fps});
shard->db_slice().ReleaseNormalized(*multi_->lock_mode, db_index_, LockTag{key});
}
} }
ShardId sid = shard->shard_id(); ShardId sid = shard->shard_id();

View file

@ -223,7 +223,7 @@ class Transaction {
void StartMultiGlobal(DbIndex dbid); void StartMultiGlobal(DbIndex dbid);
// Start multi in LOCK_AHEAD mode with given keys. // Start multi in LOCK_AHEAD mode with given keys.
void StartMultiLockedAhead(DbIndex dbid, CmdArgVec keys, bool skip_scheduling = false); void StartMultiLockedAhead(DbIndex dbid, CmdArgList keys, bool skip_scheduling = false);
// Start multi in NON_ATOMIC mode. // Start multi in NON_ATOMIC mode.
void StartMultiNonAtomic(); void StartMultiNonAtomic();
@ -339,7 +339,7 @@ class Transaction {
void Refurbish(); void Refurbish();
// Get keys multi transaction was initialized with, normalized and unique // Get keys multi transaction was initialized with, normalized and unique
const absl::flat_hash_set<std::string_view>& GetMultiKeys() const; const absl::flat_hash_set<std::pair<ShardId, LockFp>>& GetMultiFps() const;
// Send journal EXEC opcode after a series of MULTI commands on the currently active shard // Send journal EXEC opcode after a series of MULTI commands on the currently active shard
void FIX_ConcludeJournalExec(); void FIX_ConcludeJournalExec();
@ -389,6 +389,10 @@ class Transaction {
uint32_t arg_start = 0; // Subspan in kv_args_ with local arguments. uint32_t arg_start = 0; // Subspan in kv_args_ with local arguments.
uint32_t arg_count = 0; uint32_t arg_count = 0;
// span into kv_fp_
uint32_t fp_start = 0;
uint32_t fp_count = 0;
// Position in the tx queue. OOO or cancelled schedules remove themselves by this index. // Position in the tx queue. OOO or cancelled schedules remove themselves by this index.
TxQueue::Iterator pq_pos = TxQueue::kEnd; TxQueue::Iterator pq_pos = TxQueue::kEnd;
@ -401,7 +405,7 @@ class Transaction {
} stats; } stats;
// Prevent "false sharing" between cache lines: occupy a full cache line (64 bytes) // Prevent "false sharing" between cache lines: occupy a full cache line (64 bytes)
char pad[64 - 5 * sizeof(uint32_t) - sizeof(Stats)]; char pad[64 - 7 * sizeof(uint32_t) - sizeof(Stats)];
}; };
static_assert(sizeof(PerShardData) == 64); // cacheline static_assert(sizeof(PerShardData) == 64); // cacheline
@ -412,9 +416,8 @@ class Transaction {
MultiMode mode; MultiMode mode;
std::optional<IntentLock::Mode> lock_mode; std::optional<IntentLock::Mode> lock_mode;
// Unique normalized keys used for scheduling the multi transaction. // Unique normalized fingerprints used for scheduling the multi transaction.
std::vector<std::string> frozen_keys; absl::flat_hash_set<std::pair<ShardId, LockFp>> tag_fps;
absl::flat_hash_set<std::string_view> frozen_keys_set; // point to frozen_keys
// Set if the multi command is concluding to avoid ambiguity with COORD_CONCLUDING // Set if the multi command is concluding to avoid ambiguity with COORD_CONCLUDING
bool concluding = false; bool concluding = false;
@ -439,6 +442,7 @@ class Transaction {
struct PerShardCache { struct PerShardCache {
std::vector<std::string_view> args; std::vector<std::string_view> args;
std::vector<uint32_t> original_index; std::vector<uint32_t> original_index;
unsigned key_step = 1;
void Clear() { void Clear() {
args.clear(); args.clear();
@ -487,9 +491,8 @@ class Transaction {
// Store all key index keys in args_. Used only for single shard initialization. // Store all key index keys in args_. Used only for single shard initialization.
void StoreKeysInArgs(const KeyIndex& key_index); void StoreKeysInArgs(const KeyIndex& key_index);
// Multi transactions unlock asynchronously, so they need to keep a copy of all they keys. // Multi transactions unlock asynchronously, so they need to keep fingerprints of keys.
// "Launder" keys by filtering uniques and replacing pointers with same lifetime as transaction. void PrepareMultiFps(CmdArgList keys);
void LaunderKeyStorage(CmdArgVec* keys);
void ScheduleInternal(); void ScheduleInternal();
@ -526,7 +529,7 @@ class Transaction {
// If journaling is enabled, report final exec opcode to finish the chain of commands. // If journaling is enabled, report final exec opcode to finish the chain of commands.
void MultiReportJournalOnShard(EngineShard* shard) const; void MultiReportJournalOnShard(EngineShard* shard) const;
void UnlockMultiShardCb(absl::Span<const std::string_view> sharded_keys, EngineShard* shard); void UnlockMultiShardCb(absl::Span<const LockFp> fps, EngineShard* shard);
// In a multi-command transaction, we determine the number of shard journals that we wrote entries // In a multi-command transaction, we determine the number of shard journals that we wrote entries
// to by updating the shard_journal_write vector during command execution. The total number of // to by updating the shard_journal_write vector during command execution. The total number of
@ -588,6 +591,9 @@ class Transaction {
// to what key. // to what key.
absl::InlinedVector<std::string_view, 4> kv_args_; absl::InlinedVector<std::string_view, 4> kv_args_;
// Fingerprints of keys, precomputed once during the transaction initialization.
absl::InlinedVector<LockFp, 4> kv_fp_;
// Stores the full undivided command. // Stores the full undivided command.
CmdArgList full_args_; CmdArgList full_args_;