Allow models to force a new batch

This is useful for a few things: - Work around bugs, such as having 2 images in one batch - Keep the image in a single batch for fully connected attention - Improve performance by not evaluating embeddings multiple times
2025-05-11 18:36:41 +02:00 · 2025-03-10 20:03:29 -07:00 · 2025-03-10 20:03:29 -07:00 · 06007c0a18
commit 06007c0a18
parent a8e83a7654
4 changed files with 10 additions and 14 deletions
--- a/server/prompt.go
+++ b/server/prompt.go
@ -26,7 +26,6 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 	var system []api.Message

 	isMllama := checkMllamaModelFamily(m)
-	isGemma3 := checkGemma3ModelFamily(m)

 	var imageNumTokens int
 	// TODO: Ideally we would compute this from the projector metadata but some pieces are implementation dependent
@ -41,7 +40,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 	n := len(msgs) - 1
 	// in reverse, find all messages that fit into context window
 	for i := n; i >= 0; i-- {
-		if (isMllama || isGemma3) && len(msgs[i].Images) > 1 {
+		if isMllama && len(msgs[i].Images) > 1 {
 			return "", nil, errTooManyImages
 		}

@ -158,12 +157,3 @@ func checkMllamaModelFamily(m *Model) bool {
 	}
 	return false
 }
-
-func checkGemma3ModelFamily(m *Model) bool {
-	for _, arch := range m.Config.ModelFamilies {
-		if arch == "gemma3" {
-			return true
-		}
-	}
-	return false
-}