mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2025-05-10 18:05:44 +02:00
chore(transaction): Add debug stats for fail printing (#2600)
* chore(transaction): Add debug stats for per shard data --------- Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
This commit is contained in:
parent
4d4fed6fec
commit
1b51e82e55
2 changed files with 41 additions and 5 deletions
|
@ -111,7 +111,7 @@ void Transaction::PhasedBarrier::Dec(Transaction* keep_alive) {
|
|||
::boost::intrusive_ptr guard(keep_alive);
|
||||
|
||||
uint32_t before = count_.fetch_sub(1);
|
||||
CHECK_GE(before, 1u);
|
||||
CHECK_GE(before, 1u) << keep_alive->DEBUG_PrintFailState(EngineShard::tlocal()->shard_id());
|
||||
if (before == 1)
|
||||
ec_.notify();
|
||||
}
|
||||
|
@ -209,6 +209,7 @@ void Transaction::InitBase(DbIndex dbid, CmdArgList args) {
|
|||
db_index_ = dbid;
|
||||
full_args_ = args;
|
||||
local_result_ = OpStatus::OK;
|
||||
stats_.coordinator_index = ProactorBase::me() ? ProactorBase::me()->GetPoolIndex() : kInvalidSid;
|
||||
}
|
||||
|
||||
void Transaction::InitGlobal() {
|
||||
|
@ -600,10 +601,12 @@ bool Transaction::RunInShard(EngineShard* shard, bool txq_ooo) {
|
|||
unsigned idx = SidToId(shard->shard_id());
|
||||
auto& sd = shard_data_[idx];
|
||||
|
||||
CHECK(sd.local_mask & ARMED);
|
||||
CHECK(sd.local_mask & ARMED) << DEBUG_PrintFailState(shard->shard_id());
|
||||
sd.local_mask &= ~ARMED;
|
||||
|
||||
CHECK_GT(run_barrier_.DEBUG_Count(), 0u);
|
||||
sd.stats.total_runs++;
|
||||
|
||||
DCHECK_GT(run_barrier_.DEBUG_Count(), 0u);
|
||||
VLOG(2) << "RunInShard: " << DebugId() << " sid:" << shard->shard_id() << " " << sd.local_mask;
|
||||
|
||||
bool was_suspended = sd.local_mask & SUSPENDED_Q;
|
||||
|
@ -743,6 +746,8 @@ void Transaction::ScheduleInternal() {
|
|||
|
||||
// Loop until successfully scheduled in all shards.
|
||||
while (true) {
|
||||
stats_.schedule_attempts++;
|
||||
|
||||
txid_ = op_seq.fetch_add(1, memory_order_relaxed);
|
||||
InitTxTime();
|
||||
|
||||
|
@ -1021,6 +1026,21 @@ void Transaction::FIX_ConcludeJournalExec() {
|
|||
MultiReportJournalOnShard(EngineShard::tlocal());
|
||||
}
|
||||
|
||||
string Transaction::DEBUG_PrintFailState(ShardId sid) const {
|
||||
auto res = StrCat(
|
||||
"usc: ", unique_shard_cnt_, ", name:", GetCId()->name(),
|
||||
", usecnt:", use_count_.load(memory_order_relaxed), ", runcnt: ", run_barrier_.DEBUG_Count(),
|
||||
", coordstate: ", coordinator_state_, ", coord native thread: ", stats_.coordinator_index,
|
||||
", schedule attempts: ", stats_.schedule_attempts, ", report from sid: ", sid, "\n");
|
||||
std::atomic_thread_fence(memory_order_acquire);
|
||||
for (unsigned i = 0; i < shard_data_.size(); ++i) {
|
||||
const auto& sd = shard_data_[i];
|
||||
absl::StrAppend(&res, "- shard: ", i, " local_mask:", sd.local_mask,
|
||||
" total_runs: ", sd.stats.total_runs, "\n");
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void Transaction::EnableShard(ShardId sid) {
|
||||
unique_shard_cnt_ = 1;
|
||||
unique_shard_id_ = sid;
|
||||
|
@ -1048,9 +1068,11 @@ Transaction::RunnableResult Transaction::RunQuickie(EngineShard* shard) {
|
|||
DVLOG(1) << "RunQuickSingle " << DebugId() << " " << shard->shard_id();
|
||||
DCHECK(cb_ptr_) << DebugId() << " " << shard->shard_id();
|
||||
|
||||
CHECK(sd.local_mask & ARMED);
|
||||
CHECK(sd.local_mask & ARMED) << DEBUG_PrintFailState(shard->shard_id());
|
||||
sd.local_mask &= ~ARMED;
|
||||
|
||||
sd.stats.total_runs++;
|
||||
|
||||
// Calling the callback in somewhat safe way
|
||||
RunnableResult result;
|
||||
try {
|
||||
|
|
|
@ -354,6 +354,9 @@ class Transaction {
|
|||
// Send journal EXEC opcode after a series of MULTI commands on the currently active shard
|
||||
void FIX_ConcludeJournalExec();
|
||||
|
||||
// Print in-dept failure state for debugging.
|
||||
std::string DEBUG_PrintFailState(ShardId sid) const;
|
||||
|
||||
private:
|
||||
// Holds number of locks for each IntentLock::Mode: shared and exlusive.
|
||||
struct LockCnt {
|
||||
|
@ -382,8 +385,13 @@ class Transaction {
|
|||
// Index of key relative to args in shard that the shard was woken up after blocking wait.
|
||||
uint16_t wake_key_pos = UINT16_MAX;
|
||||
|
||||
// Irrational stats purely for debugging purposes.
|
||||
struct Stats {
|
||||
unsigned total_runs = 0; // total number of runs
|
||||
} stats;
|
||||
|
||||
// Prevent "false sharing" between cache lines: occupy a full cache line (64 bytes)
|
||||
char pad[64 - 4 * sizeof(uint32_t)];
|
||||
char pad[64 - 4 * sizeof(uint32_t) - sizeof(Stats)];
|
||||
};
|
||||
|
||||
static_assert(sizeof(PerShardData) == 64); // cacheline
|
||||
|
@ -626,6 +634,12 @@ class Transaction {
|
|||
OpStatus local_result_ = OpStatus::OK;
|
||||
absl::base_internal::SpinLock local_result_mu_;
|
||||
|
||||
// Stats purely for debugging purposes
|
||||
struct Stats {
|
||||
size_t schedule_attempts = 0;
|
||||
ShardId coordinator_index = 0;
|
||||
} stats_;
|
||||
|
||||
private:
|
||||
struct TLTmpSpace {
|
||||
std::vector<PerShardCache>& GetShardIndex(unsigned size);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue