mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
fix(transaction): Add special barrier for blocking tx (#2512)
Refactor blocking transaction code. Introduce BatonBarrier for managing atomic and exclusive wakeup notifications that don't conflict with neither expiration nor cancelling Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
parent
7e875bdafe
commit
40d08a3c67
2 changed files with 106 additions and 70 deletions
|
@ -82,6 +82,14 @@ void RecordTxScheduleFastStats(const Transaction* tx, bool was_ooo, bool was_inl
|
|||
ss->stats.tx_width_freq_arr[0]++;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, Transaction::time_point tp) {
|
||||
using namespace chrono;
|
||||
if (tp == Transaction::time_point::max())
|
||||
return os << "inf";
|
||||
size_t ms = duration_cast<milliseconds>(tp - Transaction::time_point::clock::now()).count();
|
||||
return os << ms << "ms";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
IntentLock::Mode Transaction::LockMode() const {
|
||||
|
@ -116,6 +124,41 @@ uint32_t Transaction::PhasedBarrier::DEBUG_Count() const {
|
|||
return count_.load(memory_order_relaxed);
|
||||
}
|
||||
|
||||
bool Transaction::BatonBarrierrier::IsClaimed() const {
|
||||
return claimed_.load(memory_order_relaxed);
|
||||
}
|
||||
|
||||
bool Transaction::BatonBarrierrier::TryClaim() {
|
||||
return !claimed_.exchange(true, memory_order_relaxed); // false means first means success
|
||||
}
|
||||
|
||||
void Transaction::BatonBarrierrier::Close() {
|
||||
DCHECK(claimed_.load(memory_order_relaxed));
|
||||
closed_.store(true, memory_order_relaxed);
|
||||
ec_.notify(); // release
|
||||
}
|
||||
|
||||
cv_status Transaction::BatonBarrierrier::Wait(time_point tp) {
|
||||
auto cb = [this] { return closed_.load(memory_order_acquire); };
|
||||
|
||||
if (tp != time_point::max()) {
|
||||
// Wait until timepoint and return immediately if we finished without a timeout
|
||||
if (ec_.await_until(cb, tp) == cv_status::no_timeout)
|
||||
return cv_status::no_timeout;
|
||||
|
||||
// We timed out and claimed the barrier, so no one will be able to claim it anymore
|
||||
if (TryClaim()) {
|
||||
closed_.store(true, memory_order_relaxed); // Purely formal
|
||||
return cv_status::timeout;
|
||||
}
|
||||
|
||||
// fallthrough: otherwise a modification is in progress, wait for it below
|
||||
}
|
||||
|
||||
ec_.await(cb);
|
||||
return cv_status::no_timeout;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new Transaction:: Transaction object
|
||||
*
|
||||
|
@ -1205,42 +1248,24 @@ size_t Transaction::ReverseArgIndex(ShardId shard_id, size_t arg_index) const {
|
|||
|
||||
OpStatus Transaction::WaitOnWatch(const time_point& tp, WaitKeysProvider wkeys_provider,
|
||||
KeyReadyChecker krc) {
|
||||
DVLOG(2) << "WaitOnWatch " << DebugId();
|
||||
using namespace chrono;
|
||||
DCHECK(!blocking_barrier_.IsClaimed()); // Blocking barrier can't be re-used
|
||||
|
||||
// Register keys on active shards blocking controllers and mark shard state as suspended.
|
||||
auto cb = [&](Transaction* t, EngineShard* shard) {
|
||||
auto keys = wkeys_provider(t, shard);
|
||||
return t->WatchInShard(keys, shard, krc);
|
||||
};
|
||||
|
||||
Execute(std::move(cb), true);
|
||||
|
||||
coordinator_state_ |= COORD_BLOCKED;
|
||||
|
||||
auto wake_cb = [this] {
|
||||
return (coordinator_state_ & COORD_CANCELLED) ||
|
||||
wakeup_requested_.load(memory_order_relaxed) > 0;
|
||||
};
|
||||
|
||||
auto* stats = ServerState::tl_connection_stats();
|
||||
++stats->num_blocked_clients;
|
||||
DVLOG(1) << "WaitOnWatch wait for " << tp << " " << DebugId();
|
||||
|
||||
if (DCHECK_IS_ON()) {
|
||||
int64_t ms = -1;
|
||||
if (tp != time_point::max())
|
||||
ms = duration_cast<milliseconds>(tp - time_point::clock::now()).count();
|
||||
DVLOG(1) << "WaitOnWatch TimeWait for " << ms << " ms " << DebugId();
|
||||
}
|
||||
|
||||
cv_status status = cv_status::no_timeout;
|
||||
if (tp == time_point::max()) {
|
||||
blocking_ec_.await(std::move(wake_cb));
|
||||
} else {
|
||||
status = blocking_ec_.await_until(std::move(wake_cb), tp);
|
||||
}
|
||||
// Wait for the blocking barrier to be closed.
|
||||
// Note: It might return immediately if another thread already notified us.
|
||||
cv_status status = blocking_barrier_.Wait(tp);
|
||||
|
||||
DVLOG(1) << "WaitOnWatch done " << int(status) << " " << DebugId();
|
||||
|
||||
--stats->num_blocked_clients;
|
||||
|
||||
OpStatus result = OpStatus::OK;
|
||||
|
@ -1250,38 +1275,32 @@ OpStatus Transaction::WaitOnWatch(const time_point& tp, WaitKeysProvider wkeys_p
|
|||
result = local_result_;
|
||||
}
|
||||
|
||||
// If we don't follow up with an "action" hop, we must clean up manually on all shards.
|
||||
if (result != OpStatus::OK)
|
||||
ExpireBlocking(wkeys_provider);
|
||||
|
||||
coordinator_state_ &= ~COORD_BLOCKED;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Runs only in the shard thread.
|
||||
OpStatus Transaction::WatchInShard(ArgSlice keys, EngineShard* shard, KeyReadyChecker krc) {
|
||||
ShardId idx = SidToId(shard->shard_id());
|
||||
auto& sd = shard_data_[SidToId(shard->shard_id())];
|
||||
|
||||
auto& sd = shard_data_[idx];
|
||||
CHECK_EQ(0, sd.local_mask & SUSPENDED_Q);
|
||||
|
||||
auto* bc = shard->EnsureBlockingController();
|
||||
bc->AddWatched(keys, std::move(krc), this);
|
||||
|
||||
sd.local_mask |= SUSPENDED_Q;
|
||||
sd.local_mask &= ~OUT_OF_ORDER;
|
||||
DVLOG(2) << "AddWatched " << DebugId() << " local_mask:" << sd.local_mask
|
||||
<< ", first_key:" << keys.front();
|
||||
|
||||
shard->EnsureBlockingController()->AddWatched(keys, std::move(krc), this);
|
||||
DVLOG(2) << "WatchInShard " << DebugId() << ", first_key:" << keys.front();
|
||||
|
||||
return OpStatus::OK;
|
||||
}
|
||||
|
||||
void Transaction::ExpireShardCb(ArgSlice wkeys, EngineShard* shard) {
|
||||
// Blocking transactions don't release keys when suspending, release them now.
|
||||
auto lock_args = GetLockArgs(shard->shard_id());
|
||||
shard->db_slice().Release(LockMode(), lock_args);
|
||||
|
||||
unsigned sd_idx = SidToId(shard->shard_id());
|
||||
auto& sd = shard_data_[sd_idx];
|
||||
sd.local_mask |= EXPIRED_Q;
|
||||
auto& sd = shard_data_[SidToId(shard->shard_id())];
|
||||
sd.local_mask &= ~KEYLOCK_ACQUIRED;
|
||||
|
||||
shard->blocking_controller()->FinalizeWatched(wkeys, this);
|
||||
|
@ -1361,40 +1380,32 @@ bool Transaction::IsGlobal() const {
|
|||
// Returns true if the transacton has changed its state from suspended to awakened,
|
||||
// false, otherwise.
|
||||
bool Transaction::NotifySuspended(TxId committed_txid, ShardId sid, string_view key) {
|
||||
unsigned idx = SidToId(sid);
|
||||
auto& sd = shard_data_[idx];
|
||||
unsigned local_mask = sd.local_mask;
|
||||
|
||||
if (local_mask & Transaction::EXPIRED_Q) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Wake a transaction only once on the first notify.
|
||||
// We don't care about preserving the strict order with multiple operations running on blocking
|
||||
// keys in parallel, because the internal order is not observable from outside either way.
|
||||
if (wakeup_requested_.fetch_add(1, memory_order_relaxed) > 0)
|
||||
if (!blocking_barrier_.TryClaim())
|
||||
return false;
|
||||
|
||||
DVLOG(1) << "NotifySuspended " << DebugId() << ", local_mask:" << local_mask << " by commited_id "
|
||||
<< committed_txid;
|
||||
auto& sd = shard_data_[SidToId(sid)];
|
||||
|
||||
// local_mask could be awaked (i.e. not suspended) if the transaction has been
|
||||
// awakened by another key or awakened by the same key multiple times.
|
||||
if (local_mask & SUSPENDED_Q) {
|
||||
DCHECK_EQ(0u, local_mask & AWAKED_Q);
|
||||
DVLOG(1) << "NotifySuspended " << DebugId() << ", local_mask:" << sd.local_mask
|
||||
<< " by commited_id " << committed_txid;
|
||||
|
||||
sd.local_mask &= ~SUSPENDED_Q;
|
||||
sd.local_mask |= AWAKED_Q;
|
||||
// We're the first and only to wake this transaction, expect the shard to be suspended
|
||||
CHECK(sd.local_mask & SUSPENDED_Q);
|
||||
CHECK_EQ(sd.local_mask & AWAKED_Q, 0);
|
||||
|
||||
// Find index of awakened key.
|
||||
auto args = GetShardArgs(sid);
|
||||
auto it =
|
||||
find_if(args.begin(), args.end(), [key](auto arg) { return facade::ToSV(arg) == key; });
|
||||
DCHECK(it != args.end());
|
||||
sd.wake_key_pos = it - args.begin();
|
||||
}
|
||||
// Find index of awakened key
|
||||
auto args = GetShardArgs(sid);
|
||||
auto it = find_if(args.begin(), args.end(), [key](auto arg) { return facade::ToSV(arg) == key; });
|
||||
CHECK(it != args.end());
|
||||
|
||||
blocking_ec_.notify();
|
||||
// Change state to awaked and store index of awakened key
|
||||
sd.local_mask &= ~SUSPENDED_Q;
|
||||
sd.local_mask |= AWAKED_Q;
|
||||
sd.wake_key_pos = it - args.begin();
|
||||
|
||||
blocking_barrier_.Close();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1474,7 +1485,10 @@ void Transaction::FinishLogJournalOnShard(EngineShard* shard, uint32_t shard_cnt
|
|||
}
|
||||
|
||||
void Transaction::CancelBlocking(std::function<OpStatus(ArgSlice)> status_cb) {
|
||||
if ((coordinator_state_ & COORD_BLOCKED) == 0)
|
||||
// We're on the owning thread of this transaction, so we can safely access it's data below.
|
||||
// We still need to claim the blocking barrier, but as this function is often called blindly, we
|
||||
// want to check first if it makes sense to even proceed.
|
||||
if (blocking_barrier_.IsClaimed())
|
||||
return;
|
||||
|
||||
OpStatus status = OpStatus::CANCELLED;
|
||||
|
@ -1490,9 +1504,13 @@ void Transaction::CancelBlocking(std::function<OpStatus(ArgSlice)> status_cb) {
|
|||
if (status == OpStatus::OK)
|
||||
return;
|
||||
|
||||
// Check if someone else is about to wake us up
|
||||
if (!blocking_barrier_.TryClaim())
|
||||
return;
|
||||
|
||||
coordinator_state_ |= COORD_CANCELLED;
|
||||
local_result_ = status;
|
||||
blocking_ec_.notify();
|
||||
blocking_barrier_.Close();
|
||||
}
|
||||
|
||||
OpResult<KeyIndex> DetermineKeys(const CommandId* cid, CmdArgList args) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue