mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
llm: make load time stall duration configurable via OLLAMA_LOAD_TIMEOUT
With the new very large parameter models, some users are willing to wait for a very long time for models to load.
This commit is contained in:
parent
b05c9e83d9
commit
6719097649
4 changed files with 60 additions and 7 deletions
|
@ -584,8 +584,7 @@ func (s *llmServer) Ping(ctx context.Context) error {
|
|||
|
||||
func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
||||
start := time.Now()
|
||||
stallDuration := 5 * time.Minute // If no progress happens
|
||||
finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online
|
||||
stallDuration := envconfig.LoadTimeout() // If no progress happens
|
||||
stallTimer := time.Now().Add(stallDuration) // give up if we stall
|
||||
|
||||
slog.Info("waiting for llama runner to start responding")
|
||||
|
@ -637,7 +636,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
|||
stallTimer = time.Now().Add(stallDuration)
|
||||
} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
|
||||
slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
|
||||
stallTimer = time.Now().Add(finalLoadDuration)
|
||||
stallTimer = time.Now().Add(stallDuration)
|
||||
fullyLoaded = true
|
||||
}
|
||||
time.Sleep(time.Millisecond * 250)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue