Implement linux NUMA detection

If the system has multiple numa nodes, enable numa support in llama.cpp If we detect numactl in the path, use that, else use the basic "distribute" mode.
2025-05-11 18:36:41 +02:00 · 2024-08-05 12:56:20 -07:00 · 2024-08-05 12:56:20 -07:00 · f457d63400
commit f457d63400
parent 39f2bc6bfc
3 changed files with 29 additions and 4 deletions
--- a/llm/server.go
+++ b/llm/server.go
@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--mlock")
 	}

-	if opts.UseNUMA {
-		params = append(params, "--numa")
+	if gpu.IsNUMA() {
+		numaMode := "distribute"
+		if runtime.GOOS == "linux" {
+			if _, err := exec.LookPath("numactl"); err == nil {
+				numaMode = "numactl"
+			}
+		}
+		params = append(params, "--numa", numaMode)
 	}

 	params = append(params, "--parallel", strconv.Itoa(numParallel))