From 95cd9dfb4c68c06a60e321334f1611aa5c3d2743 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Mon, 23 Dec 2024 10:13:45 +0200 Subject: [PATCH] chore: update helio and improve our stack overflow resiliency (#4349) 1. Run CI/Regression tests with HELIO_STACK_CHECK=4096. This will crash if a fiber stack usage goes below this limit. 2. Increase shard queue stack size to 64KB 3. Increase fiber stack size to 40KB on Debug builds. 4. Updated helio has some changes around the TLS socket code. In addition we add a helper script to generate self-signed certificates helpful for local development work. Signed-off-by: Roman Gershman --- .github/workflows/ci.yml | 1 + .github/workflows/regression-tests.yml | 2 +- helio | 2 +- src/facade/dragonfly_connection.cc | 2 +- src/server/dfly_main.cc | 5 ++ src/server/snapshot.cc | 16 +++-- src/server/transaction.cc | 2 +- tools/local/gen-test-certs.sh | 87 ++++++++++++++++++++++++++ 8 files changed, 108 insertions(+), 9 deletions(-) create mode 100755 tools/local/gen-test-certs.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d721f3576..7ce07d51b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,6 +111,7 @@ jobs: -DCMAKE_CXX_COMPILER="${{matrix.compiler.cxx}}" \ -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache \ -DCMAKE_CXX_FLAGS="${{matrix.cxx_flags}} -no-pie" -DWITH_AWS:BOOL=OFF \ + -DHELIO_STACK_CHECK:STRING=4096 \ -L cd ${GITHUB_WORKSPACE}/build && pwd du -hcs _deps/ diff --git a/.github/workflows/regression-tests.yml b/.github/workflows/regression-tests.yml index 4b6b3b8f8..993b95d0d 100644 --- a/.github/workflows/regression-tests.yml +++ b/.github/workflows/regression-tests.yml @@ -36,7 +36,7 @@ jobs: # -no-pie to disable address randomization so we could symbolize stacktraces cmake -B ${GITHUB_WORKSPACE}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -GNinja \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DPRINT_STACKTRACES_ON_SIGNAL=ON \ - -DCMAKE_CXX_FLAGS=-no-pie + -DCMAKE_CXX_FLAGS=-no-pie -DHELIO_STACK_CHECK:STRING=4096 cd ${GITHUB_WORKSPACE}/build && ninja dragonfly pwd diff --git a/helio b/helio index 32e8c4ec8..b508ad516 160000 --- a/helio +++ b/helio @@ -1 +1 @@ -Subproject commit 32e8c4ec830e8d4224d8a13a11c1607f357da80f +Subproject commit b508ad51624c4470292f132de945449ddf2da998 diff --git a/src/facade/dragonfly_connection.cc b/src/facade/dragonfly_connection.cc index 19b880e3f..39ef714e5 100644 --- a/src/facade/dragonfly_connection.cc +++ b/src/facade/dragonfly_connection.cc @@ -685,7 +685,7 @@ void Connection::HandleRequests() { FiberSocketBase::AcceptResult aresult = socket_->Accept(); if (!aresult) { - LOG(WARNING) << "Error handshaking " << aresult.error().message(); + LOG(INFO) << "Error handshaking " << aresult.error().message(); return; } VLOG(1) << "TLS handshake succeeded"; diff --git a/src/server/dfly_main.cc b/src/server/dfly_main.cc index daac86bd4..a41231d7f 100644 --- a/src/server/dfly_main.cc +++ b/src/server/dfly_main.cc @@ -96,7 +96,12 @@ namespace { // Default stack size for fibers. We decrease it by 16 bytes because some allocators // need additional 8-16 bytes for their internal structures, thus over reserving additional // memory pages if using round sizes. +#ifdef NDEBUG constexpr size_t kFiberDefaultStackSize = 32_KB - 16; +#else +// Increase stack size for debug builds. +constexpr size_t kFiberDefaultStackSize = 40_KB - 16; +#endif using util::http::TlsClient; diff --git a/src/server/snapshot.cc b/src/server/snapshot.cc index 7b7d748cc..eb208e00f 100644 --- a/src/server/snapshot.cc +++ b/src/server/snapshot.cc @@ -4,7 +4,6 @@ #include "server/snapshot.h" -#include #include #include @@ -64,13 +63,18 @@ bool SliceSnapshot::IsSnaphotInProgress() { void SliceSnapshot::Start(bool stream_journal, SnapshotFlush allow_flush) { DCHECK(!snapshot_fb_.IsJoinable()); - auto db_cb = absl::bind_front(&SliceSnapshot::OnDbChange, this); + auto db_cb = [this](DbIndex db_index, const DbSlice::ChangeReq& req) { + OnDbChange(db_index, req); + }; + snapshot_version_ = db_slice_->RegisterOnChange(std::move(db_cb)); if (stream_journal) { auto* journal = db_slice_->shard_owner()->journal(); DCHECK(journal); - auto journal_cb = absl::bind_front(&SliceSnapshot::OnJournalEntry, this); + auto journal_cb = [this](const journal::JournalItem& item, bool await) { + OnJournalEntry(item, await); + }; journal_cb_id_ = journal->RegisterOnChange(std::move(journal_cb)); } @@ -168,7 +172,7 @@ void SliceSnapshot::IterateBucketsFb(bool send_full_sync_cut) { } PrimeTable::Cursor next = - pt->TraverseBuckets(cursor, absl::bind_front(&SliceSnapshot::BucketSaveCb, this)); + pt->TraverseBuckets(cursor, [this](auto it) { return BucketSaveCb(it); }); cursor = next; PushSerialized(false); @@ -229,7 +233,9 @@ void SliceSnapshot::SwitchIncrementalFb(LSN lsn) { FiberAtomicGuard fg; serializer_->SendFullSyncCut(); } - auto journal_cb = absl::bind_front(&SliceSnapshot::OnJournalEntry, this); + auto journal_cb = [this](const journal::JournalItem& item, bool await) { + OnJournalEntry(item, await); + }; journal_cb_id_ = journal->RegisterOnChange(std::move(journal_cb)); PushSerialized(true); } else { diff --git a/src/server/transaction.cc b/src/server/transaction.cc index ca7939cba..9abf8b346 100644 --- a/src/server/transaction.cc +++ b/src/server/transaction.cc @@ -667,7 +667,6 @@ void Transaction::RunCallback(EngineShard* shard) { DCHECK_EQ(shard, EngineShard::tlocal()); RunnableResult result; - auto& db_slice = GetDbSlice(shard->shard_id()); try { result = (*cb_ptr_)(this, shard); @@ -691,6 +690,7 @@ void Transaction::RunCallback(EngineShard* shard) { LOG(FATAL) << "Unexpected exception " << e.what(); } + auto& db_slice = GetDbSlice(shard->shard_id()); db_slice.OnCbFinish(); // Handle result flags to alter behaviour. diff --git a/tools/local/gen-test-certs.sh b/tools/local/gen-test-certs.sh new file mode 100755 index 000000000..37b1c3118 --- /dev/null +++ b/tools/local/gen-test-certs.sh @@ -0,0 +1,87 @@ +#!/bin/bash +set -e + +SCRIPT_DIR=$(dirname "$0") +ROOT_DIR=$(readlink -f "$SCRIPT_DIR/../..") +GEN_DIR=$ROOT_DIR/genfiles/tls + + +# genfiles/tls/ca.{crt,key} Self signed CA certificate. +# genfiles/tls/dragonfly.{crt,key} A certificate with no key usage/policy restrictions. +# genfiles/tls/client.{crt,key} A certificate restricted for SSL client usage. +# genfiles/tls/server.{crt,key} A certificate restricted for SSL server usage. + +: ' +To run dragonfly use: +dragonfly --tls --tls_key_file ../genfiles/tls/server.key --tls_cert_file ../genfiles/tls/server.crt -requirepass pass + +Or with CA (does not require password): +dragonfly --tls --tls_key_file ../genfiles/tls/server.key --tls_cert_file ../genfiles/tls/server.crt \ +--tls_ca_cert_file ../genfiles/tls/ca.crt + +To connect with client (without ca): +openssl s_client -state -crlf -connect 127.0.0.1:6379 + +With CA: +openssl s_client -state -crlf -CAfile ../genfiles/tls/ca.crt -cert ../genfiles/tls/client.crt -key ../genfiles/tls/client.key -connect 127.0.0.1:6379 + +Similarly, to connect with redis-cli (no CA): +redis-cli --tls --insecure -a pass + +With CA: +redis-cli --tls --cacert ../genfiles/tls/ca.crt --cert ../genfiles/tls/client.crt --key ../genfiles/tls/client.key + +memtier (without CA): +memtier_benchmark --tls --key ../genfiles/tls/client.key --cert ../genfiles/tls/client.crt -a pass + +memtier (with CA): +memtier_benchmark --tls --key ../genfiles/tls/client.key --cert ../genfiles/tls/client.crt --cacert ../genfiles/tls/ca.crt +' + +generate_cert() { + local name=$1 + local cn="$2" + local opts="$3" + + local keyfile=$GEN_DIR/${name}.key + local certfile=$GEN_DIR/${name}.crt + + [ -f $keyfile ] || openssl genpkey -algorithm ED25519 -out $keyfile + openssl req -new -sha256 \ + -subj "/O=Dragonfly Test/CN=$cn" \ + -key $keyfile | \ + openssl x509 \ + -req -sha256 \ + -CA $GEN_DIR/ca.crt \ + -CAkey $GEN_DIR/ca.key \ + -CAserial $GEN_DIR/ca.txt \ + -CAcreateserial \ + -days 365 \ + $opts \ + -out $certfile +} + +mkdir -p $GEN_DIR +[ -f $GEN_DIR/ca.key ] || openssl genpkey -algorithm ED25519 -out $GEN_DIR/ca.key + +# -x509: self-signed certificate, -nodes: no password +openssl req \ + -x509 -new -nodes -sha256 \ + -key $GEN_DIR/ca.key \ + -days 3650 \ + -subj '/O=Dragonfly Test/CN=Certificate Authority' \ + -out $GEN_DIR/ca.crt + +cat > $GEN_DIR/openssl.cnf <<_END_ +[ server_cert ] +keyUsage = digitalSignature, keyEncipherment +nsCertType = server + +[ client_cert ] +keyUsage = digitalSignature, keyEncipherment +nsCertType = client +_END_ + +generate_cert server "Server-only" "-extfile $GEN_DIR/openssl.cnf -extensions server_cert" +generate_cert client "Client-only" "-extfile $GEN_DIR/openssl.cnf -extensions client_cert" +generate_cert dragonfly "Generic-cert"