Allow models to force a new batch

This is useful for a few things: - Work around bugs, such as having 2 images in one batch - Keep the image in a single batch for fully connected attention - Improve performance by not evaluating embeddings multiple times
2025-05-11 10:26:53 +02:00 · 2025-03-10 20:03:29 -07:00 · 2025-03-10 20:03:29 -07:00 · 06007c0a18
commit 06007c0a18
parent a8e83a7654
4 changed files with 10 additions and 14 deletions
--- a/model/input/input.go
+++ b/model/input/input.go
@ -15,6 +15,12 @@ type Input struct {
 	// stored in Multimodal, used for caching and comparing
 	// equality.
 	MultimodalHash uint64
+
+	// BatchBreak forces a new batch to be started with this
+	// input. For example, this can be used to align images
+	// with batches. Note that batches may be divided in additional
+	// locations as well.
+	BatchBreak bool
 }

 // MultimodalIndex is a multimodal element (such as an image)
--- a/model/models/gemma3/model.go
+++ b/model/models/gemma3/model.go
@ -112,8 +112,8 @@ func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Inpu
 			result = append(result, inp)
 		} else {
 			imageInputs := []input.Input{
-				{Token: 108},    // "\n\n"
-				{Token: 255999}, // "<start_of_image>""
+				{Token: 108},                      // "\n\n"
+				{Token: 255999, BatchBreak: true}, // "<start_of_image>""
 			}
 			result = append(result, imageInputs...)