mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
bool
This commit is contained in:
parent
66fe77f084
commit
55cd3ddcca
8 changed files with 82 additions and 83 deletions
|
@ -221,7 +221,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||
params = append(params, "--memory-f32")
|
||||
}
|
||||
|
||||
flashAttnEnabled := envconfig.FlashAttention
|
||||
flashAttnEnabled := envconfig.FlashAttention()
|
||||
|
||||
for _, g := range gpus {
|
||||
// only cuda (compute capability 7+) and metal support flash attention
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue