bool

2025-05-11 18:36:41 +02:00 · 2024-07-03 17:22:13 -07:00 · 2024-07-03 17:22:13 -07:00 · 55cd3ddcca
commit 55cd3ddcca
parent 66fe77f084
8 changed files with 82 additions and 83 deletions
--- a/llm/server.go
+++ b/llm/server.go
@ -221,7 +221,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--memory-f32")
 	}

-	flashAttnEnabled := envconfig.FlashAttention
+	flashAttnEnabled := envconfig.FlashAttention()

 	for _, g := range gpus {
 		// only cuda (compute capability 7+) and metal support flash attention