Allow setting max vram for workarounds

Until we get all the memory calculations correct, this can provide and escape valve for users to workaround out of memory crashes.
2025-05-11 18:36:41 +02:00 · 2024-03-06 16:53:51 -08:00 · 2024-03-06 16:53:51 -08:00 · be330174dd
commit be330174dd
parent ce9f7c4674
2 changed files with 24 additions and 0 deletions
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) {
 }

 func CheckVRAM() (int64, error) {
+	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
+	if userLimit != "" {
+		avail, err := strconv.ParseInt(userLimit, 10, 64)
+		if err != nil {
+			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
+		}
+		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
+		return avail, nil
+	}
 	gpuInfo := GetGPUInfo()
 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
 		// leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead