From d608ec9c62b8278e7bf59dda948e67686a5b50ca Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Sun, 28 Jan 2024 12:19:15 +0200 Subject: [PATCH] chore: Introduce LockKey for LockTable (#2463) This should reduce allocations in a common case (not multi). In addition, rename Transaction::args_ to kv_args_. Signed-off-by: Vladislav Oleshko Co-authored-by: Vladislav --- src/server/db_slice.cc | 45 +++++++++++------------------- src/server/db_slice.h | 3 +- src/server/engine_shard_set.cc | 16 +++++------ src/server/table.cc | 38 ++++++++++++++++++++++++++ src/server/table.h | 42 +++++++++++++++++++++++++++- src/server/test_utils.cc | 2 +- src/server/transaction.cc | 50 +++++++++++++++++----------------- src/server/transaction.h | 11 ++++---- 8 files changed, 134 insertions(+), 73 deletions(-) diff --git a/src/server/db_slice.cc b/src/server/db_slice.cc index e0db3e33e..24dfabbf9 100644 --- a/src/server/db_slice.cc +++ b/src/server/db_slice.cc @@ -191,7 +191,7 @@ unsigned PrimeEvictionPolicy::Evict(const PrimeTable::HotspotBuckets& eb, PrimeT string tmp; string_view key = last_slot_it->first.GetSlice(&tmp); // do not evict locked keys - if (lt.find(KeyLockArgs::GetLockKey(key)) != lt.end()) + if (lt.Find(KeyLockArgs::GetLockKey(key)).has_value()) return 0; // log the evicted keys to journal. @@ -732,7 +732,7 @@ void DbSlice::FlushDbIndexes(const std::vector& indexes) { flush_db_arr[index] = std::move(db); CreateDb(index); - db_arr_[index]->trans_locks.swap(flush_db_arr[index]->trans_locks); + std::swap(db_arr_[index]->trans_locks, flush_db_arr[index]->trans_locks); if (TieredStorage* tiered = shard_owner()->tiered_storage(); tiered) { tiered->CancelAllIos(index); } @@ -939,7 +939,7 @@ size_t DbSlice::DbSize(DbIndex db_ind) const { } bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) { - if (lock_args.args.empty()) { + if (lock_args.args.empty()) { // Can be empty for NO_KEY_TRANSACTIONAL commands. return true; } DCHECK_GT(lock_args.key_step, 0u); @@ -949,7 +949,7 @@ bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) { if (lock_args.args.size() == 1) { string_view key = KeyLockArgs::GetLockKey(lock_args.args.front()); - lock_acquired = lt[key].Acquire(mode); + lock_acquired = lt.Acquire(key, mode); uniq_keys_ = {key}; // needed only for tests. } else { uniq_keys_.clear(); @@ -957,8 +957,7 @@ bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) { for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { string_view s = KeyLockArgs::GetLockKey(lock_args.args[i]); if (uniq_keys_.insert(s).second) { - bool res = lt[s].Acquire(mode); - lock_acquired &= res; + lock_acquired &= lt.Acquire(s, mode); } } } @@ -969,41 +968,31 @@ bool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) { return lock_acquired; } -void DbSlice::ReleaseNormalized(IntentLock::Mode mode, DbIndex db_index, std::string_view key, - unsigned count) { +void DbSlice::ReleaseNormalized(IntentLock::Mode mode, DbIndex db_index, std::string_view key) { DCHECK_EQ(key, KeyLockArgs::GetLockKey(key)); - DVLOG(1) << "Release " << IntentLock::ModeName(mode) << " " << count << " for " << key; + DVLOG(1) << "Release " << IntentLock::ModeName(mode) << " " + << " for " << key; auto& lt = db_arr_[db_index]->trans_locks; - auto it = lt.find(KeyLockArgs::GetLockKey(key)); - CHECK(it != lt.end()) << key; - it->second.Release(mode, count); - if (it->second.IsFree()) { - lt.erase(it); - } + lt.Release(KeyLockArgs::GetLockKey(key), mode); } void DbSlice::Release(IntentLock::Mode mode, const KeyLockArgs& lock_args) { - if (lock_args.args.empty()) { + if (lock_args.args.empty()) { // Can be empty for NO_KEY_TRANSACTIONAL commands. return; } DVLOG(2) << "Release " << IntentLock::ModeName(mode) << " for " << lock_args.args[0]; if (lock_args.args.size() == 1) { string_view key = KeyLockArgs::GetLockKey(lock_args.args.front()); - ReleaseNormalized(mode, lock_args.db_index, key, 1); + ReleaseNormalized(mode, lock_args.db_index, key); } else { auto& lt = db_arr_[lock_args.db_index]->trans_locks; uniq_keys_.clear(); for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { - auto s = KeyLockArgs::GetLockKey(lock_args.args[i]); + string_view s = KeyLockArgs::GetLockKey(lock_args.args[i]); if (uniq_keys_.insert(s).second) { - auto it = lt.find(s); - CHECK(it != lt.end()); - it->second.Release(mode); - if (it->second.IsFree()) { - lt.erase(it); - } + lt.Release(s, mode); } } } @@ -1021,11 +1010,9 @@ bool DbSlice::CheckLock(IntentLock::Mode mode, DbIndex dbid, string_view key) co bool DbSlice::CheckLock(IntentLock::Mode mode, const KeyLockArgs& lock_args) const { const auto& lt = db_arr_[lock_args.db_index]->trans_locks; for (size_t i = 0; i < lock_args.args.size(); i += lock_args.key_step) { - auto s = KeyLockArgs::GetLockKey(lock_args.args[i]); - auto it = lt.find(s); - if (it != lt.end() && !it->second.Check(mode)) { + string_view s = KeyLockArgs::GetLockKey(lock_args.args[i]); + if (auto lock = lt.Find(s); lock && !lock->Check(mode)) return false; - } } return true; } @@ -1250,7 +1237,7 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes // check if the key is locked by looking up transaction table. auto& lt = db_table->trans_locks; string_view key = evict_it->first.GetSlice(&tmp); - if (lt.find(KeyLockArgs::GetLockKey(key)) != lt.end()) + if (lt.Find(KeyLockArgs::GetLockKey(key)).has_value()) continue; if (auto journal = owner_->journal(); journal) { diff --git a/src/server/db_slice.h b/src/server/db_slice.h index c24a74d0c..6e034ab69 100644 --- a/src/server/db_slice.h +++ b/src/server/db_slice.h @@ -405,8 +405,7 @@ class DbSlice { void PerformDeletion(PrimeIterator del_it, DbTable* table); // Releases a single key. `key` must have been normalized by GetLockKey(). - void ReleaseNormalized(IntentLock::Mode m, DbIndex db_index, std::string_view key, - unsigned count); + void ReleaseNormalized(IntentLock::Mode m, DbIndex db_index, std::string_view key); private: void PreUpdate(DbIndex db_ind, PrimeIterator it); diff --git a/src/server/engine_shard_set.cc b/src/server/engine_shard_set.cc index 03c291c3e..8a12b4fe3 100644 --- a/src/server/engine_shard_set.cc +++ b/src/server/engine_shard_set.cc @@ -167,9 +167,7 @@ class RoundRobinSharder { bool HasContendedLocks(unsigned shard_id, Transaction* trx, const DbTable* table) { auto is_contended = [table](string_view key) { - auto it = table->trans_locks.find(key); - DCHECK(it != table->trans_locks.end()); - return it->second.IsContended(); + return table->trans_locks.Find(key)->IsContended(); }; if (trx->IsMulti()) { @@ -771,13 +769,13 @@ auto EngineShard::AnalyzeTxQueue() const -> TxQueueInfo { if (table == nullptr) continue; - info.total_locks += table->trans_locks.size(); - for (const auto& k_v : table->trans_locks) { - if (k_v.second.IsContended()) { + info.total_locks += table->trans_locks.Size(); + for (const auto& [key, lock] : table->trans_locks) { + if (lock.IsContended()) { info.contended_locks++; - if (k_v.second.ContentionScore() > info.max_contention_score) { - info.max_contention_score = k_v.second.ContentionScore(); - info.max_contention_lock_name = k_v.first; + if (lock.ContentionScore() > info.max_contention_score) { + info.max_contention_score = lock.ContentionScore(); + info.max_contention_lock_name = string_view{key}; } } } diff --git a/src/server/table.cc b/src/server/table.cc index 214f9462e..53cbcb16b 100644 --- a/src/server/table.cc +++ b/src/server/table.cc @@ -56,6 +56,44 @@ SlotStats& SlotStats::operator+=(const SlotStats& o) { return *this; } +void LockTable::Key::MakeOwned() const { + if (std::holds_alternative(val_)) + val_ = std::string{std::get(val_)}; +} + +size_t LockTable::Size() const { + return locks_.size(); +} + +std::optional LockTable::Find(std::string_view key) const { + DCHECK_EQ(KeyLockArgs::GetLockKey(key), key); + + if (auto it = locks_.find(Key{key}); it != locks_.end()) + return it->second; + return std::nullopt; +} + +bool LockTable::Acquire(std::string_view key, IntentLock::Mode mode) { + DCHECK_EQ(KeyLockArgs::GetLockKey(key), key); + + auto [it, inserted] = locks_.try_emplace(Key{key}); + if (!inserted) // If more than one transaction refers to a key + it->first.MakeOwned(); // we must fall back to using a self-contained string + + return it->second.Acquire(mode); +} + +void LockTable::Release(std::string_view key, IntentLock::Mode mode) { + DCHECK_EQ(KeyLockArgs::GetLockKey(key), key); + + auto it = locks_.find(Key{key}); + CHECK(it != locks_.end()) << key; + + it->second.Release(mode); + if (it->second.IsFree()) + locks_.erase(it); +} + DbTable::DbTable(PMR_NS::memory_resource* mr, DbIndex db_index) : prime(kInitSegmentLog, detail::PrimeTablePolicy{}, mr), expire(0, detail::ExpireTablePolicy{}, mr), diff --git a/src/server/table.h b/src/server/table.h index 22cddf582..948920211 100644 --- a/src/server/table.h +++ b/src/server/table.h @@ -79,7 +79,47 @@ struct DbTableStats { DbTableStats& operator+=(const DbTableStats& o); }; -using LockTable = absl::flat_hash_map; +// Table for recording locks that uses string_views where possible. LockTable falls back to +// strings for locks that are used by multiple transactions. Keys used with the lock table +// should be normalized with GetLockKey +class LockTable { + public: + size_t Size() const; + std::optional Find(std::string_view key) const; + + bool Acquire(std::string_view key, IntentLock::Mode mode); + void Release(std::string_view key, IntentLock::Mode mode); + + auto begin() const { + return locks_.cbegin(); + } + + auto end() const { + return locks_.cbegin(); + } + + private: + struct Key { + operator std::string_view() const { + return visit([](const auto& s) -> std::string_view { return s; }, val_); + } + + bool operator==(const Key& o) const { + return *this == std::string_view(o); + } + + friend std::ostream& operator<<(std::ostream& o, const Key& key) { + return o << std::string_view(key); + } + + // If the key is backed by a string_view, replace it with a string with the same value + void MakeOwned() const; + + mutable std::variant val_; + }; + + absl::flat_hash_map locks_; +}; // A single Db table that represents a table that can be chosen with "SELECT" command. struct DbTable : boost::intrusive_ref_counter { diff --git a/src/server/test_utils.cc b/src/server/test_utils.cc index 657a526fc..70f66f348 100644 --- a/src/server/test_utils.cc +++ b/src/server/test_utils.cc @@ -311,7 +311,7 @@ unsigned BaseFamilyTest::NumLocked() { if (db == nullptr) { continue; } - count += db->trans_locks.size(); + count += db->trans_locks.Size(); } }); return count; diff --git a/src/server/transaction.cc b/src/server/transaction.cc index 269e16804..5db16d012 100644 --- a/src/server/transaction.cc +++ b/src/server/transaction.cc @@ -170,18 +170,18 @@ void Transaction::BuildShardIndex(const KeyIndex& key_index, bool rev_mapping, void Transaction::InitShardData(absl::Span shard_index, size_t num_args, bool rev_mapping) { - args_.reserve(num_args); + kv_args_.reserve(num_args); if (rev_mapping) reverse_index_.reserve(num_args); - // Store the concatenated per-shard arguments from the shard index inside args_ - // and make each shard data point to its own sub-span inside args_. + // Store the concatenated per-shard arguments from the shard index inside kv_args_ + // and make each shard data point to its own sub-span inside kv_args_. for (size_t i = 0; i < shard_data_.size(); ++i) { auto& sd = shard_data_[i]; - auto& si = shard_index[i]; + const auto& si = shard_index[i]; sd.arg_count = si.args.size(); - sd.arg_start = args_.size(); + sd.arg_start = kv_args_.size(); // Multi transactions can re-initialize on different shards, so clear ACTIVE flag. DCHECK_EQ(sd.local_mask & ACTIVE, 0); @@ -195,13 +195,13 @@ void Transaction::InitShardData(absl::Span shard_index, siz unique_shard_id_ = i; for (size_t j = 0; j < si.args.size(); ++j) { - args_.push_back(si.args[j]); + kv_args_.push_back(si.args[j]); if (rev_mapping) reverse_index_.push_back(si.original_index[j]); } } - CHECK_EQ(args_.size(), num_args); + DCHECK_EQ(kv_args_.size(), num_args); } void Transaction::LaunderKeyStorage(CmdArgVec* keys) { @@ -233,13 +233,13 @@ void Transaction::StoreKeysInArgs(KeyIndex key_index, bool rev_mapping) { // even for a single key we may have multiple arguments per key (MSET). for (unsigned j = key_index.start; j < key_index.end; j++) { - args_.push_back(ArgS(full_args_, j)); + kv_args_.push_back(ArgS(full_args_, j)); if (key_index.step == 2) - args_.push_back(ArgS(full_args_, ++j)); + kv_args_.push_back(ArgS(full_args_, ++j)); } if (rev_mapping) { - reverse_index_.resize(args_.size()); + reverse_index_.resize(kv_args_.size()); for (unsigned j = 0; j < reverse_index_.size(); ++j) { reverse_index_[j] = j + key_index.start; } @@ -287,9 +287,9 @@ void Transaction::InitByKeys(const KeyIndex& key_index) { unique_shard_cnt_ = 1; if (is_stub) // stub transactions don't migrate - DCHECK_EQ(unique_shard_id_, Shard(args_.front(), shard_set->size())); + DCHECK_EQ(unique_shard_id_, Shard(kv_args_.front(), shard_set->size())); else - unique_shard_id_ = Shard(args_.front(), shard_set->size()); + unique_shard_id_ = Shard(kv_args_.front(), shard_set->size()); // Multi transactions that execute commands on their own (not stubs) can't shrink the backing // array, as it still might be read by leftover callbacks. @@ -314,7 +314,7 @@ void Transaction::InitByKeys(const KeyIndex& key_index) { DCHECK(!multi_ || multi_->mode != LOCK_AHEAD || !multi_->frozen_keys.empty()); - DVLOG(1) << "InitByArgs " << DebugId() << " " << args_.front(); + DVLOG(1) << "InitByArgs " << DebugId() << " " << kv_args_.front(); // Compress shard data, if we occupy only one shard. if (unique_shard_cnt_ == 1) { @@ -333,8 +333,8 @@ void Transaction::InitByKeys(const KeyIndex& key_index) { // Validation. Check reverse mapping was built correctly. if (needs_reverse_mapping) { - for (size_t i = 0; i < args_.size(); ++i) { - DCHECK_EQ(args_[i], ArgS(full_args_, reverse_index_[i])) << full_args_; + for (size_t i = 0; i < kv_args_.size(); ++i) { + DCHECK_EQ(kv_args_[i], ArgS(full_args_, reverse_index_[i])) << full_args_; } } @@ -366,7 +366,7 @@ OpStatus Transaction::InitByArgs(DbIndex index, CmdArgList args) { } DCHECK_EQ(unique_shard_cnt_, 0u); - DCHECK(args_.empty()); + DCHECK(kv_args_.empty()); OpResult key_index = DetermineKeys(cid_, args); if (!key_index) @@ -448,7 +448,7 @@ void Transaction::MultiSwitchCmd(const CommandId* cid) { unique_shard_id_ = 0; unique_shard_cnt_ = 0; - args_.clear(); + kv_args_.clear(); reverse_index_.clear(); cid_ = cid; @@ -805,10 +805,10 @@ void Transaction::UnlockMulti() { multi_->frozen_keys_set.clear(); - auto sharded_keys = make_shared>(shard_set->size()); + auto sharded_keys = make_shared>>(shard_set->size()); for (string& key : multi_->frozen_keys) { ShardId sid = Shard(key, sharded_keys->size()); - (*sharded_keys)[sid].emplace_back(std::move(key)); + (*sharded_keys)[sid].emplace_back(key); } unsigned shard_journals_cnt = @@ -1199,11 +1199,11 @@ ArgSlice Transaction::GetShardArgs(ShardId sid) const { // We can read unique_shard_cnt_ only because ShardArgsInShard is called after IsArmedInShard // barrier. if (unique_shard_cnt_ == 1) { - return args_; + return kv_args_; } const auto& sd = shard_data_[sid]; - return ArgSlice{args_.data() + sd.arg_start, sd.arg_count}; + return ArgSlice{kv_args_.data() + sd.arg_start, sd.arg_count}; } // from local index back to original arg index skipping the command. @@ -1319,8 +1319,8 @@ OpStatus Transaction::RunSquashedMultiCb(RunnableType cb) { return result; } -void Transaction::UnlockMultiShardCb(const KeyList& sharded_keys, EngineShard* shard, - uint32_t shard_journals_cnt) { +void Transaction::UnlockMultiShardCb(absl::Span sharded_keys, + EngineShard* shard, uint32_t shard_journals_cnt) { DCHECK(multi_ && multi_->lock_mode); auto journal = shard->journal(); @@ -1334,7 +1334,7 @@ void Transaction::UnlockMultiShardCb(const KeyList& sharded_keys, EngineShard* s shard->shard_lock()->Release(IntentLock::EXCLUSIVE); } else { for (const auto& key : sharded_keys) { - shard->db_slice().ReleaseNormalized(*multi_->lock_mode, db_index_, key, 1); + shard->db_slice().ReleaseNormalized(*multi_->lock_mode, db_index_, key); } } @@ -1476,7 +1476,7 @@ void Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult resul journal::Entry::Payload entry_payload; string_view cmd{cid_->name()}; - if (unique_shard_cnt_ == 1 || args_.empty()) { + if (unique_shard_cnt_ == 1 || kv_args_.empty()) { entry_payload = make_pair(cmd, full_args_); } else { entry_payload = make_pair(cmd, GetShardArgs(shard->shard_id())); diff --git a/src/server/transaction.h b/src/server/transaction.h index 128a4ac80..88fa8787e 100644 --- a/src/server/transaction.h +++ b/src/server/transaction.h @@ -362,9 +362,6 @@ class Transaction { unsigned cnt[2] = {0, 0}; }; - // owned std::string because callbacks its used in run fully async and can outlive the entries. - using KeyList = std::vector; - struct alignas(64) PerShardData { PerShardData(PerShardData&&) noexcept { } @@ -489,7 +486,7 @@ class Transaction { // Run callback inline as part of multi stub. OpStatus RunSquashedMultiCb(RunnableType cb); - void UnlockMultiShardCb(const KeyList& sharded_keys, EngineShard* shard, + void UnlockMultiShardCb(absl::Span sharded_keys, EngineShard* shard, uint32_t shard_journals_cnt); // In a multi-command transaction, we determine the number of shard journals that we wrote entries @@ -563,8 +560,10 @@ class Transaction { // Never access directly with index, always use SidToId. absl::InlinedVector shard_data_; // length = shard_count - // Stores arguments of the transaction (i.e. keys + values) partitioned by shards. - absl::InlinedVector args_; + // Stores keys/values of the transaction partitioned by shards. + // We need values as well since we reorder keys, and we need to know what value corresponds + // to what key. + absl::InlinedVector kv_args_; // Stores the full undivided command. CmdArgList full_args_;