mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068)
This commit is contained in:
parent
b9f74ff3d6
commit
f0c454ab57
2 changed files with 12 additions and 1 deletions
|
@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|||
graphFullOffload *= uint64(len(gpus))
|
||||
graphPartialOffload *= uint64(len(gpus))
|
||||
|
||||
// on metal there's no partial offload overhead
|
||||
if gpus[0].Library == "metal" {
|
||||
graphPartialOffload = graphFullOffload
|
||||
}
|
||||
|
||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue