Allow setting max vram for workarounds

Until we get all the memory calculations correct, this can provide
and escape valve for users to workaround out of memory crashes.
This commit is contained in:
Daniel Hiltgen 2024-03-06 16:53:51 -08:00
parent ce9f7c4674
commit be330174dd
2 changed files with 24 additions and 0 deletions

View file

@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) {
}
func CheckVRAM() (int64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return avail, nil
}
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
// leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead