chore: reduce pipelining latency by reusing existing shard fibers (#3494)

* chore: reduce pipelining latency by reusing existing shard fibers To prove the benefits, run `./dfly_bench --pipeline=50 -n 20000 --ratio 0:1 --qps=0 --key_maximum=1` Before: the average pipelining latency was 10ms After: the average pipelining latency is 5ms. Avg latency: pipelined_latency_usec / total_pipelined_squashed_commands Also, improved counting of squashed commands - to count actual squashed ones. --------- Signed-off-by: Roman Gershman <roman@dragonflydb.io>
2025-05-10 18:05:44 +02:00 · 2024-08-14 14:45:54 +03:00 · 2024-08-14 14:45:54 +03:00 · 93f6773297
commit 93f6773297
parent a2e63f144c
9 changed files with 51 additions and 24 deletions
--- a/src/server/server_state.cc
+++ b/src/server/server_state.cc
@ -27,25 +27,29 @@ ServerState::Stats::Stats(unsigned num_shards) : tx_width_freq_arr(num_shards) {
 }

 ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) {
-  static_assert(sizeof(Stats) == 16 * 8, "Stats size mismatch");
+  static_assert(sizeof(Stats) == 17 * 8, "Stats size mismatch");

-  this->eval_io_coordination_cnt += other.eval_io_coordination_cnt;
-  this->eval_shardlocal_coordination_cnt += other.eval_shardlocal_coordination_cnt;
-  this->eval_squashed_flushes += other.eval_squashed_flushes;
+#define ADD(x) this->x += (other.x)

-  this->tx_global_cnt += other.tx_global_cnt;
-  this->tx_normal_cnt += other.tx_normal_cnt;
-  this->tx_inline_runs += other.tx_inline_runs;
-  this->tx_schedule_cancel_cnt += other.tx_schedule_cancel_cnt;
+  ADD(eval_io_coordination_cnt);

-  this->multi_squash_executions += other.multi_squash_executions;
-  this->multi_squash_exec_hop_usec += other.multi_squash_exec_hop_usec;
-  this->multi_squash_exec_reply_usec += other.multi_squash_exec_reply_usec;
+  ADD(eval_shardlocal_coordination_cnt);
+  ADD(eval_squashed_flushes);

-  this->blocked_on_interpreter += other.blocked_on_interpreter;
-  this->rdb_save_usec += other.rdb_save_usec;
-  this->rdb_save_count += other.rdb_save_count;
-  this->oom_error_cmd_cnt += other.oom_error_cmd_cnt;
+  ADD(tx_global_cnt);
+  ADD(tx_normal_cnt);
+  ADD(tx_inline_runs);
+  ADD(tx_schedule_cancel_cnt);
+
+  ADD(multi_squash_executions);
+  ADD(multi_squash_exec_hop_usec);
+  ADD(multi_squash_exec_reply_usec);
+  ADD(squashed_commands);
+
+  ADD(blocked_on_interpreter);
+  ADD(rdb_save_usec);
+  ADD(rdb_save_count);
+  ADD(oom_error_cmd_cnt);

  if (this->tx_width_freq_arr.size() > 0) {
    DCHECK_EQ(this->tx_width_freq_arr.size(), other.tx_width_freq_arr.size());
@ -54,6 +58,7 @@ ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) {
    this->tx_width_freq_arr = other.tx_width_freq_arr;
  }
  return *this;
+#undef ADD
 }

 void MonitorsRepo::Add(facade::Connection* connection) {