gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068)

This commit is contained in:
Jeffrey Morgan 2024-05-01 11:46:03 -04:00 committed by GitHub
parent b9f74ff3d6
commit f0c454ab57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 12 additions and 1 deletions

View file

@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
graphFullOffload *= uint64(len(gpus))
graphPartialOffload *= uint64(len(gpus))
// on metal there's no partial offload overhead
if gpus[0].Library == "metal" {
graphPartialOffload = graphFullOffload
}
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload