opt(server): Short-circuit ExecuteAsync(). (#1601)

* feat(server): Short-circuit ExecuteAsync().

* Do not leak (hopefully :)

* Add documentation about coordinator_index_

* Use ServerState::tlocal()->thread_index()
This commit is contained in:
Shahar Mike 2023-07-31 12:12:59 +03:00 committed by GitHub
parent 4c85d5825d
commit fba0800081
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 6 deletions

View file

@ -136,7 +136,7 @@ void Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, siz
sd.arg_count = si.args.size();
sd.arg_start = args_.size();
// Multi transactions can re-intitialize on different shards, so clear ACTIVE flag.
// Multi transactions can re-initialize on different shards, so clear ACTIVE flag.
if (multi_)
sd.local_mask &= ~ACTIVE;
@ -212,7 +212,7 @@ void Transaction::StoreKeysInArgs(KeyIndex key_index, bool rev_mapping) {
/**
*
* There are 4 options that we consider here:
* a. T spans a single shard and its not multi.
* a. T spans a single shard and it's not multi.
* unique_shard_id_ is predefined before the schedule() is called.
* In that case only a single thread will be scheduled and it will use shard_data[0] just because
* shard_data.size() = 1. Coordinator thread can access any data because there is a
@ -828,7 +828,7 @@ void Transaction::ExecuteAsync() {
// We do not necessarily Execute this transaction in 'cb' below. It well may be that it will be
// executed by the engine shard once it has been armed and coordinator thread will finish the
// transaction before engine shard thread stops accessing it. Therefore, we increase reference
// by number of callbacks accessesing 'this' to allow callbacks to execute shard->Execute(this);
// by number of callbacks accessing 'this' to allow callbacks to execute shard->Execute(this);
// safely.
use_count_.fetch_add(unique_shard_cnt_, memory_order_relaxed);
@ -844,6 +844,15 @@ void Transaction::ExecuteAsync() {
// IsArmedInShard in other threads.
run_count_.store(unique_shard_cnt_, memory_order_release);
// Execute inline when we can. We can't use coordinator_index_ because we may offload this
// function to run in a different thread.
if (unique_shard_cnt_ == 1 && ServerState::tlocal()->thread_index() == unique_shard_id_) {
DVLOG(1) << "Short-circuit ExecuteAsync " << DebugId();
EngineShard::tlocal()->PollExecution("exec_cb", this);
intrusive_ptr_release(this); // against use_count_.fetch_add above.
return;
}
// We verify seq lock has the same generation number. See below for more info.
auto cb = [seq, this] {
EngineShard* shard = EngineShard::tlocal();
@ -856,7 +865,7 @@ void Transaction::ExecuteAsync() {
DVLOG(3) << "PollExecCb " << DebugId() << " sid(" << shard->shard_id() << ") "
<< run_count_.load(memory_order_relaxed);
// We also make sure that for mult-operation transactions like Multi/Eval
// We also make sure that for multi-operation transactions like Multi/Eval
// this callback runs on a correct operation. We want to avoid a situation
// where the first operation is executed and the second operation is armed and
// now this callback from the previous operation finally runs and calls PollExecution.

View file

@ -131,7 +131,7 @@ class Transaction {
// UNUSED = 1 << 1,
OUT_OF_ORDER = 1 << 2, // Whether it can run as out of order
KEYLOCK_ACQUIRED = 1 << 3, // Whether its key locks are acquired
SUSPENDED_Q = 1 << 4, // Whether is suspened (by WatchInShard())
SUSPENDED_Q = 1 << 4, // Whether is suspended (by WatchInShard())
AWAKED_Q = 1 << 5, // Whether it was awakened (by NotifySuspended())
EXPIRED_Q = 1 << 6, // Whether it timed out and should be dropped
UNLOCK_MULTI = 1 << 7, // Whether this shard executed UnlockMultiShardCb
@ -333,7 +333,7 @@ class Transaction {
// this is the only variable that is accessed by both shard and coordinator threads.
std::atomic_bool is_armed{false};
// We pad with some memory so that atomic loads won't cause false sharing betweem threads.
// We pad with some memory so that atomic loads won't cause false sharing between threads.
char pad[46]; // to make sure PerShardData is 64 bytes and takes full cacheline.
uint32_t arg_start = 0; // Indices into args_ array.