WIP thinking API support

- Allows specifying whether thinking mode should be on or not - Templates get passed a new option so, e.g., qwen3's template can put `/think` or `/no_think` in the system prompt depending on the value of the setting - Add parsing for thinking blocks in both streaming/non-streaming mode - Update the CLI to make use of these changes TODO: - [ ] Don't parse thinking blocks when the user doesn't explicitly set the option, to maintain backwards compatibility - [ ] Warning on CLI when using a non-thinking/older version of a model (with an old template) - [ ] Wire up capabilities fully - [x] Unify parsing for streaming/non-streaming - [ ] Update templates - [ ] Update python/js libraries - [ ] How to handle differences in models wrt defaults and whether or not the thinking ability can even be controlled. If not specified by the user, should there be a default or should the template be able to check if it was explicitly set?
2025-05-12 19:07:06 +02:00 · 2025-05-07 16:15:46 -07:00 · 2025-05-07 16:15:46 -07:00 · 77f4594e80
commit 77f4594e80
parent a7835c6716
14 changed files with 513 additions and 12 deletions
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@ -143,6 +143,24 @@ func TestGenerateChat(t *testing.T) {
 		}
 	})

+	t.Run("missing thinking capability", func(t *testing.T) {
+		w := createRequest(t, s.ChatHandler, api.ChatRequest{
+			Model: "test",
+			Messages: []api.Message{
+				{Role: "user", Content: "Hello!"},
+			},
+			Thinking: true,
+		})
+
+		if w.Code != http.StatusBadRequest {
+			t.Errorf("expected status 400, got %d", w.Code)
+		}
+
+		if diff := cmp.Diff(w.Body.String(), `{"error":"registry.ollama.ai/library/test:latest does not support thinking"}`); diff != "" {
+			t.Errorf("mismatch (-got +want):\n%s", diff)
+		}
+	})
+
 	t.Run("missing model", func(t *testing.T) {
 		w := createRequest(t, s.ChatHandler, api.ChatRequest{})
 		if w.Code != http.StatusBadRequest {