gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068)

2025-05-11 02:16:36 +02:00 · 2024-05-01 11:46:03 -04:00 · 2024-05-01 11:46:03 -04:00 · f0c454ab57
commit f0c454ab57
parent b9f74ff3d6
2 changed files with 12 additions and 1 deletions
--- a/llm/memory.go
+++ b/llm/memory.go
@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 	graphFullOffload *= uint64(len(gpus))
 	graphPartialOffload *= uint64(len(gpus))

+	// on metal there's no partial offload overhead
+	if gpus[0].Library == "metal" {
+		graphPartialOffload = graphFullOffload
+	}
+
 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
 	memoryRequiredTotal := memoryMinimum + graphFullOffload