feat(test): Improve benchmark workflow (#3330)

Signed-off-by: adi_holden <adi@dragonflydb.io>
This commit is contained in:
adiholden 2024-07-17 14:34:48 +03:00 committed by GitHub
parent dc193931c9
commit e3eb8518fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 54 additions and 5 deletions

View file

@ -22,7 +22,7 @@ jobs:
app.kubernetes.io/created-by: "dragonfly-operator"
name: "dragonfly-sample"
spec:
image: "ghcr.io/dragonflydb/dragonfly-weekly:latest"
image: "ghcr.io/dragonflydb/dragonfly:latest"
args: ["--cache_mode"]
replicas: 2
resources:
@ -89,7 +89,27 @@ jobs:
kubectl wait -n ${{ steps.setup.outputs.namespace }} pods --selector app=dragonfly-sample --for condition=Ready --timeout=120s
kubectl describe -n ${{ steps.setup.outputs.namespace }} pod dragonfly-sample-0
- name: Run Benchmark
- name: Run Memtier Benchmark
shell: bash
run: |
kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml
- name: Version upgrade
shell: bash
run: |
# benchmark is running, wait for 30 seconds before version upgrade
sleep 30
kubectl patch dragonfly dragonfly-sample -n ${{ steps.setup.outputs.namespace }} --type merge -p '{"spec":{"image":"ghcr.io/dragonflydb/dragonfly-weekly:latest"}}'
- name: Wait for Memtier Benchmark fail
shell: bash
run: |
# Memtier benchmark run will fail at some point because old master shutdown on version upgrade
kubectl wait --for=condition=failed --timeout=120s -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null
kubectl logs -n ${{ steps.setup.outputs.namespace }} -f jobs/memtier-benchmark
kubectl delete -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark
- name: Run Memtier Benchmark again
shell: bash
run: |
kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml
@ -116,7 +136,6 @@ jobs:
- name: Server checks
run: |
nohup kubectl port-forward -n ${{ steps.setup.outputs.namespace }} service/dragonfly-sample 6379:6379 &
pip install -r tools/requirements.txt
python3 tools/benchmark/post_run_checks.py
@ -138,6 +157,15 @@ jobs:
command: |
kubectl logs -n ${{ steps.setup.outputs.namespace }} dragonfly-sample-1
- name: Describe dragonflydb object
uses: nick-fields/retry@v3
if: always()
with:
timeout_minutes: 1
max_attempts: 3
command: |
kubectl describe dragonflies.dragonflydb.io -n ${{ steps.setup.outputs.namespace }} dragonfly-sample
- name: Scale down to zero
if: always()
run: |
@ -152,3 +180,16 @@ jobs:
set -x
kubectl delete namespace ${{ steps.setup.outputs.namespace }}
kubectl delete namespace dragonfly-operator-system
- name: Send notification on failure
if: failure() && github.ref == 'refs/heads/main'
shell: bash
run: |
job_link="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
message="Benchmark tests failed.\\n Job Link: ${job_link}\\n"
curl -s \
-X POST \
-H 'Content-Type: application/json' \
'${{ secrets.GSPACES_BOT_DF_BUILD }}' \
-d '{"text": "'"${message}"'"}'

View file

@ -11,7 +11,7 @@ spec:
- name: memtier
image: redislabs/memtier_benchmark:latest
args:
- memtier_benchmark --pipeline=30 --key-maximum=10000 -c 10 -t 2 --requests=500000 --expiry-range=10-100 --reconnect-interval=10000 --distinct-client-seed --hide-histogram -s dragonfly-sample
- memtier_benchmark --pipeline=30 --key-maximum=100000 -c 10 -t 2 --test-time=600 --reconnect-interval=10000 --distinct-client-seed --hide-histogram -s dragonfly-sample
command:
- sh # This is important! without it memtier cannot DIG the dragonfly SVC domain
- -c

View file

@ -7,6 +7,10 @@ def main():
max_unaccounted = 200 * 1024 * 1024 # 200mb
client = redis.Redis(decode_responses=True)
info = client.info("server")
# Check version upgrade finsihed from last released version to last weekly docker build
assert info["dragonfly_version"] == "df-HEAD-HASH-NOTFOUND"
info = client.info("memory")
print(f'Used memory {info["used_memory"]}, rss {info["used_memory_rss"]}')
assert info["used_memory_rss"] - info["used_memory"] < max_unaccounted
@ -26,7 +30,11 @@ def main():
time.sleep(1)
replication_state = client.info("replication")["slave0"]
assert replication_state["lag"] == 0, f"Lag is bad, expected 0, got {replication_state['lag']}"
if replication_state["lag"] != 0:
print(f"Lag is bad, expected 0, got {replication_state['lag']}")
info = client.info("all")
print(f"Info all output: {info}")
assert False
if __name__ == "__main__":