feat(test): Improve benchmark workflow (#3330)

Signed-off-by: adi_holden <adi@dragonflydb.io>
2025-05-10 18:05:44 +02:00 · 2024-07-17 14:34:48 +03:00 · 2024-07-17 14:34:48 +03:00 · e3eb8518fd
commit e3eb8518fd
parent dc193931c9
3 changed files with 54 additions and 5 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -22,7 +22,7 @@ jobs:
                  app.kubernetes.io/created-by: "dragonfly-operator"
                name: "dragonfly-sample"
              spec:
-                image: "ghcr.io/dragonflydb/dragonfly-weekly:latest"
+                image: "ghcr.io/dragonflydb/dragonfly:latest"
                args: ["--cache_mode"]
                replicas: 2
                resources:
@ -89,7 +89,27 @@ jobs:
          kubectl wait -n ${{ steps.setup.outputs.namespace }} pods --selector app=dragonfly-sample --for condition=Ready --timeout=120s
          kubectl describe -n ${{ steps.setup.outputs.namespace }} pod dragonfly-sample-0

-      - name: Run Benchmark
+      - name: Run Memtier Benchmark
+        shell: bash
+        run: |
+          kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml
+
+      - name: Version upgrade
+        shell: bash
+        run: |
+          # benchmark is running, wait for 30 seconds before version upgrade
+          sleep 30
+          kubectl patch dragonfly dragonfly-sample -n ${{ steps.setup.outputs.namespace }}  --type merge -p '{"spec":{"image":"ghcr.io/dragonflydb/dragonfly-weekly:latest"}}'
+
+      - name: Wait for Memtier Benchmark fail
+        shell: bash
+        run: |
+          # Memtier benchmark run will fail at some point because old master shutdown on version upgrade
+          kubectl wait --for=condition=failed --timeout=120s -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null
+          kubectl logs -n ${{ steps.setup.outputs.namespace }} -f jobs/memtier-benchmark
+          kubectl delete -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark
+
+      - name: Run Memtier Benchmark again
        shell: bash
        run: |
          kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml
@ -116,7 +136,6 @@ jobs:
      - name: Server checks
        run: |
          nohup kubectl port-forward -n ${{ steps.setup.outputs.namespace }} service/dragonfly-sample 6379:6379 &
-
          pip install -r tools/requirements.txt
          python3 tools/benchmark/post_run_checks.py

@ -138,6 +157,15 @@ jobs:
          command: |
            kubectl logs -n ${{ steps.setup.outputs.namespace }} dragonfly-sample-1

+      - name: Describe dragonflydb object
+        uses: nick-fields/retry@v3
+        if: always()
+        with:
+          timeout_minutes: 1
+          max_attempts: 3
+          command: |
+            kubectl describe dragonflies.dragonflydb.io -n ${{ steps.setup.outputs.namespace }} dragonfly-sample
+
      - name: Scale down to zero
        if: always()
        run: |
@ -152,3 +180,16 @@ jobs:
          set -x
          kubectl delete namespace ${{ steps.setup.outputs.namespace }}
          kubectl delete namespace dragonfly-operator-system
+
+      - name: Send notification on failure
+        if: failure() && github.ref == 'refs/heads/main'
+        shell: bash
+        run: |
+          job_link="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
+          message="Benchmark tests failed.\\n Job Link: ${job_link}\\n"
+
+          curl -s \
+            -X POST \
+            -H 'Content-Type: application/json' \
+            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \
+            -d '{"text": "'"${message}"'"}'
--- a/tools/benchmark/k8s-benchmark-job.yaml
+++ b/tools/benchmark/k8s-benchmark-job.yaml
@ -11,7 +11,7 @@ spec:
        - name: memtier
          image: redislabs/memtier_benchmark:latest
          args:
-            - memtier_benchmark --pipeline=30 --key-maximum=10000 -c 10 -t 2 --requests=500000 --expiry-range=10-100 --reconnect-interval=10000 --distinct-client-seed --hide-histogram -s dragonfly-sample
+            - memtier_benchmark --pipeline=30 --key-maximum=100000 -c 10 -t 2 --test-time=600 --reconnect-interval=10000 --distinct-client-seed --hide-histogram -s dragonfly-sample
          command:
            - sh # This is important! without it memtier cannot DIG the dragonfly SVC domain
            - -c
--- a/tools/benchmark/post_run_checks.py
+++ b/tools/benchmark/post_run_checks.py
@ -7,6 +7,10 @@ def main():
    max_unaccounted = 200 * 1024 * 1024  # 200mb

    client = redis.Redis(decode_responses=True)
+    info = client.info("server")
+    # Check version upgrade finsihed from last released version to last weekly docker build
+    assert info["dragonfly_version"] == "df-HEAD-HASH-NOTFOUND"
+
    info = client.info("memory")
    print(f'Used memory {info["used_memory"]}, rss {info["used_memory_rss"]}')
    assert info["used_memory_rss"] - info["used_memory"] < max_unaccounted
@ -26,7 +30,11 @@ def main():
        time.sleep(1)
        replication_state = client.info("replication")["slave0"]

-    assert replication_state["lag"] == 0, f"Lag is bad, expected 0, got {replication_state['lag']}"
+    if replication_state["lag"] != 0:
+        print(f"Lag is bad, expected 0, got {replication_state['lag']}")
+        info = client.info("all")
+        print(f"Info all output: {info}")
+        assert False


 if __name__ == "__main__":