This commit is contained in:
Michael Yang 2024-07-03 17:22:13 -07:00
parent 66fe77f084
commit 55cd3ddcca
8 changed files with 82 additions and 83 deletions

View file

@ -221,7 +221,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--memory-f32")
}
flashAttnEnabled := envconfig.FlashAttention
flashAttnEnabled := envconfig.FlashAttention()
for _, g := range gpus {
// only cuda (compute capability 7+) and metal support flash attention