ollamarunner: Fix memory leak when processing images

The context (and therefore associated input tensors) was not being properly closed when images were being processed. We were trying to close them but in reality we were closing over an empty list, preventing anything from actually being freed. Fixes #10434
2025-05-10 18:06:33 +02:00 · 2025-05-01 11:34:02 -07:00 · 2025-05-01 11:34:02 -07:00 · 8e8f2c6d67
commit 8e8f2c6d67
parent 938e8447e8
1 changed files with 7 additions and 15 deletions
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@ -34,14 +34,10 @@ import (
 	_ "github.com/ollama/ollama/model/models"
 )
 type contextList struct {
 	list []ml.Context
 }
 type Sequence struct {
 	// ctxs are used for allocating tensors that last the lifetime of the sequence, such as
 	// multimodal embeddings
-	ctxs *contextList
+	ctxs []ml.Context
 	// batch index
 	iBatch int
@ -177,8 +173,10 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
 // inputs processes the prompt and images into a list of inputs
 // by splitting the prompt on [img-<n>] tags, tokenizing text and
 // decoding images
-func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *contextList, error) {
+func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, []ml.Context, error) {
 	var inputs []input.Input
 	var ctxs []ml.Context
 	var parts []string
 	var matches [][]string
@ -192,13 +190,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 		parts = []string{prompt}
 	}
 	var contexts contextList
 	runtime.AddCleanup(&contexts, func(ctxs []ml.Context) {
 		for _, ctx := range ctxs {
 			ctx.Close()
 		}
 	}, contexts.list)
 	postTokenize := false
 	for i, part := range parts {
 		// text - tokenize
@ -228,7 +219,8 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 			}
 			ctx := s.model.Backend().NewContext()
-			contexts.list = append(contexts.list, ctx)
+			runtime.SetFinalizer(ctx, func(c ml.Context) { c.Close() })
 			ctxs = append(ctxs, ctx)
 			imageEmbeddings, err := multimodalProcessor.EncodeMultimodal(ctx, images[imageIndex].Data)
 			if err != nil {
 				return nil, nil, err
@ -251,7 +243,7 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 		}
 	}
-	return inputs, &contexts, nil
+	return inputs, ctxs, nil
 }
 type Server struct {