mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
Switch use_mmap to a pointer type
This uses nil as undefined for a cleaner implementation.
This commit is contained in:
parent
3518aaef33
commit
97c9e11768
3 changed files with 63 additions and 93 deletions
|
@ -208,7 +208,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||
if g.Library == "metal" &&
|
||||
uint64(opts.NumGPU) > 0 &&
|
||||
uint64(opts.NumGPU) < ggml.KV().BlockCount()+1 {
|
||||
opts.UseMMap = api.TriStateFalse
|
||||
opts.UseMMap = new(bool)
|
||||
*opts.UseMMap = false
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,10 +220,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||
// Windows CUDA should not use mmap for best performance
|
||||
// Linux with a model larger than free space, mmap leads to thrashing
|
||||
// For CPU loads we want the memory to be allocated, not FS cache
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == api.TriStateUndefined) ||
|
||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == api.TriStateUndefined) ||
|
||||
(gpus[0].Library == "cpu" && opts.UseMMap == api.TriStateUndefined) ||
|
||||
opts.UseMMap == api.TriStateFalse {
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
|
||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
|
||||
(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
|
||||
(opts.UseMMap != nil && !*opts.UseMMap) {
|
||||
params = append(params, "--no-mmap")
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue