Merge pull request #10468 from ollama/drifkin/num-parallel-1

This commit is contained in:
Devon Rifkin 2025-04-29 10:21:36 -07:00 committed by GitHub
commit db428adbb8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3
// Default automatic value for parallel setting
// Model will still need to fit in VRAM. If this setting won't fit
// we'll back off down to 1 to try to get it to fit
var defaultParallel = 4
var defaultParallel = 2
var ErrMaxQueue = errors.New("server busy, please try again. maximum pending requests exceeded")