Integration test improvements (#9654)

Add some new test coverage for various model architectures, and switch from orca-mini to the small llama model.
2025-05-11 10:26:53 +02:00 · 2025-04-16 14:25:55 -07:00 · 2025-04-16 14:25:55 -07:00 · ed4e139314
commit ed4e139314
parent 56dc316a57
9 changed files with 709 additions and 67 deletions
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@ -24,9 +24,14 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/app/lifecycle"
+	"github.com/ollama/ollama/format"
 	"github.com/stretchr/testify/require"
 )

+const (
+	smol = "llama3.2:1b"
+)
+
 func Init() {
 	lifecycle.InitLogging()
 }
@ -140,7 +145,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er

 	showCtx, cancel := context.WithDeadlineCause(
 		ctx,
-		time.Now().Add(10*time.Second),
+		time.Now().Add(20*time.Second),
 		fmt.Errorf("show for existing model %s took too long", modelName),
 	)
 	defer cancel()
@ -157,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
 	}
 	slog.Info("model missing", "model", modelName)

-	stallDuration := 30 * time.Second // This includes checksum verification, which can take a while on larger models
+	stallDuration := 60 * time.Second // This includes checksum verification, which can take a while on larger models, and slower systems
 	stallTimer := time.NewTimer(stallDuration)
 	fn := func(resp api.ProgressResponse) error {
 		// fmt.Print(".")
@ -283,11 +288,11 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
 }

 // Generate a set of requests
-// By default each request uses orca-mini as the model
+// By default each request uses llama3.2 as the model
 func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 	return []api.GenerateRequest{
 			{
-				Model:     "orca-mini",
+				Model:     smol,
 				Prompt:    "why is the ocean blue?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -296,7 +301,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 					"temperature": 0.0,
 				},
 			}, {
-				Model:     "orca-mini",
+				Model:     smol,
 				Prompt:    "why is the color of dirt brown?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -305,7 +310,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 					"temperature": 0.0,
 				},
 			}, {
-				Model:     "orca-mini",
+				Model:     smol,
 				Prompt:    "what is the origin of the us thanksgiving holiday?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -314,7 +319,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 					"temperature": 0.0,
 				},
 			}, {
-				Model:     "orca-mini",
+				Model:     smol,
 				Prompt:    "what is the origin of independence day?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -323,7 +328,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 					"temperature": 0.0,
 				},
 			}, {
-				Model:     "orca-mini",
+				Model:     smol,
 				Prompt:    "what is the composition of air?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@ -341,3 +346,15 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 			{"nitrogen", "oxygen", "carbon", "dioxide"},
 		}
 }
+
+func skipUnderMinVRAM(t *testing.T, gb uint64) {
+	// TODO use info API in the future
+	if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
+		maxVram, err := strconv.ParseUint(s, 10, 64)
+		require.NoError(t, err)
+		// Don't hammer on small VRAM cards...
+		if maxVram < gb*format.GibiByte {
+			t.Skip("skipping with small VRAM to avoid timeouts")
+		}
+	}
+}