diff --git a/server/sched.go b/server/sched.go index f3978796c..883540cea 100644 --- a/server/sched.go +++ b/server/sched.go @@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3 // Default automatic value for parallel setting // Model will still need to fit in VRAM. If this setting won't fit // we'll back off down to 1 to try to get it to fit -var defaultParallel = 4 +var defaultParallel = 2 var ErrMaxQueue = errors.New("server busy, please try again. maximum pending requests exceeded")