mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
Revert "increase default context length to 4096 (#10364)"
This reverts commit 424f648632
.
This commit is contained in:
parent
5cfc1c39f3
commit
dd93e1af85
7 changed files with 12 additions and 49 deletions
|
@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3
|
|||
// Default automatic value for parallel setting
|
||||
// Model will still need to fit in VRAM. If this setting won't fit
|
||||
// we'll back off down to 1 to try to get it to fit
|
||||
var defaultParallel = 2
|
||||
var defaultParallel = 4
|
||||
|
||||
var ErrMaxQueue = errors.New("server busy, please try again. maximum pending requests exceeded")
|
||||
|
||||
|
@ -81,6 +81,10 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
|||
|
||||
// context must be canceled to decrement ref count and release the runner
|
||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) {
|
||||
if opts.NumCtx < 4 {
|
||||
opts.NumCtx = 4
|
||||
}
|
||||
|
||||
req := &LlmRequest{
|
||||
ctx: c,
|
||||
model: model,
|
||||
|
@ -110,11 +114,6 @@ func (s *Scheduler) Run(ctx context.Context) {
|
|||
}()
|
||||
}
|
||||
|
||||
const (
|
||||
defaultContextLength = 4096
|
||||
smallGpuContextLength = 2048
|
||||
)
|
||||
|
||||
func (s *Scheduler) processPending(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
|
@ -167,17 +166,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||
gpus = s.getGpuFn()
|
||||
}
|
||||
|
||||
if pending.origNumCtx == -1 {
|
||||
if len(gpus) == 1 && gpus[0].Library != "cpu" && gpus[0].TotalMemory <= 4096*1024*1024 {
|
||||
slog.Info("GPU is small, limiting default context window", "num_ctx", smallGpuContextLength)
|
||||
pending.opts.NumCtx = smallGpuContextLength
|
||||
pending.origNumCtx = smallGpuContextLength
|
||||
} else {
|
||||
pending.opts.NumCtx = defaultContextLength
|
||||
pending.origNumCtx = defaultContextLength
|
||||
}
|
||||
}
|
||||
|
||||
if envconfig.MaxRunners() <= 0 {
|
||||
// No user specified MaxRunners, so figure out what automatic setting to use
|
||||
// If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue