ollama/cmd/warn_thinking_test.go
Devon Rifkin 77f4594e80 WIP thinking API support
- Allows specifying whether thinking mode should be on or not
- Templates get passed a new option so, e.g., qwen3's template can put
  `/think` or `/no_think` in the system prompt depending on the value of
  the setting
- Add parsing for thinking blocks in both streaming/non-streaming mode
- Update the CLI to make use of these changes

TODO:

- [ ] Don't parse thinking blocks when the user doesn't explicitly set
      the option, to maintain backwards compatibility
- [ ] Warning on CLI when using a non-thinking/older version of a model
      (with an old template)
- [ ] Wire up capabilities fully
- [x] Unify parsing for streaming/non-streaming
- [ ] Update templates
- [ ] Update python/js libraries
- [ ] How to handle differences in models wrt defaults and whether or
      not the thinking ability can even be controlled. If not specified
      by the user, should there be a default or should the template be
      able to check if it was explicitly set?
2025-05-07 16:15:46 -07:00

64 lines
1.6 KiB
Go

package cmd
import (
"context"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/types/model"
)
// Test that a warning is printed when thinking is requested but not supported.
func TestWarnMissingThinking(t *testing.T) {
cases := []struct {
capabilities []model.Capability
expectWarn bool
}{
{capabilities: []model.Capability{model.CapabilityThinking}, expectWarn: false},
{capabilities: []model.Capability{}, expectWarn: true},
}
for _, tc := range cases {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/show" || r.Method != http.MethodPost {
t.Fatalf("unexpected request to %s %s", r.URL.Path, r.Method)
}
var req api.ShowRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
t.Fatalf("decode request: %v", err)
}
resp := api.ShowResponse{Capabilities: tc.capabilities}
if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Fatalf("encode response: %v", err)
}
}))
defer srv.Close()
t.Setenv("OLLAMA_HOST", srv.URL)
client, err := api.ClientFromEnvironment()
if err != nil {
t.Fatal(err)
}
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
warnMissingThinking(context.Background(), client, "m")
w.Close()
os.Stderr = oldStderr
out, _ := io.ReadAll(r)
warned := strings.Contains(string(out), "warning:")
if tc.expectWarn && !warned {
t.Errorf("expected warning, got none")
}
if !tc.expectWarn && warned {
t.Errorf("did not expect warning, got: %s", string(out))
}
}
}