mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
Request and model concurrency
This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS.
This commit is contained in:
parent
ee448deaba
commit
34b9db5afc
30 changed files with 2572 additions and 1387 deletions
|
@ -9,52 +9,41 @@ package gpu
|
|||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
|
||||
func CheckVRAM() (uint64, error) {
|
||||
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
|
||||
if userLimit != "" {
|
||||
avail, err := strconv.ParseInt(userLimit, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
|
||||
}
|
||||
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
|
||||
return uint64(avail), nil
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfoList {
|
||||
mem, _ := GetCPUMem()
|
||||
if runtime.GOARCH == "amd64" {
|
||||
// gpu not supported, this may not be metal
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return uint64(C.getRecommendedMaxVRAM()), nil
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
mem, _ := getCPUMem()
|
||||
if runtime.GOARCH == "amd64" {
|
||||
return GpuInfo{
|
||||
Library: "cpu",
|
||||
Variant: GetCPUVariant(),
|
||||
memInfo: mem,
|
||||
return []GpuInfo{
|
||||
{
|
||||
Library: "cpu",
|
||||
Variant: GetCPUVariant(),
|
||||
memInfo: mem,
|
||||
},
|
||||
}
|
||||
}
|
||||
return GpuInfo{
|
||||
info := GpuInfo{
|
||||
Library: "metal",
|
||||
memInfo: mem,
|
||||
ID: "0",
|
||||
}
|
||||
info.TotalMemory = uint64(C.getRecommendedMaxVRAM())
|
||||
|
||||
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
|
||||
info.FreeMemory = info.TotalMemory
|
||||
|
||||
info.MinimumMemory = 0
|
||||
return []GpuInfo{info}
|
||||
}
|
||||
|
||||
func getCPUMem() (memInfo, error) {
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
FreeMemory: 0,
|
||||
DeviceCount: 1,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||
// No-op on darwin
|
||||
return "", ""
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue