mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 10:26:53 +02:00
Move quantization to new backend (#10363)
* Move quantization logic to GGML via new backend This moves the model aware logic to Go code and calls GGMLs quantization code for model creation. * Remove "add model quantizations" This is no longer needed now that quantization is implemented in Go+GGML code directly.
This commit is contained in:
parent
95e744beeb
commit
424810450f
39 changed files with 1854 additions and 440 deletions
130
integration/quantization_test.go
Normal file
130
integration/quantization_test.go
Normal file
|
@ -0,0 +1,130 @@
|
|||
//go:build integration && models
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQuantization(t *testing.T) {
|
||||
sourceModels := []string{
|
||||
"qwen2.5:0.5b-instruct-fp16",
|
||||
}
|
||||
quantizations := []string{
|
||||
"Q8_0",
|
||||
"Q4_K_S",
|
||||
"Q4_K_M",
|
||||
"Q4_K",
|
||||
}
|
||||
softTimeout, hardTimeout := getTimeouts(t)
|
||||
started := time.Now()
|
||||
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
for _, base := range sourceModels {
|
||||
if err := PullIfMissing(ctx, client, base); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
for _, quant := range quantizations {
|
||||
newName := fmt.Sprintf("%s__%s", base, quant)
|
||||
t.Run(newName, func(t *testing.T) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
}
|
||||
req := &api.CreateRequest{
|
||||
Model: newName,
|
||||
Quantization: quant,
|
||||
From: base,
|
||||
}
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
// fmt.Print(".")
|
||||
return nil
|
||||
}
|
||||
t.Logf("quantizing: %s -> %s", base, quant)
|
||||
if err := client.Create(ctx, req, fn); err != nil {
|
||||
t.Fatalf("create failed %s", err)
|
||||
}
|
||||
defer func() {
|
||||
req := &api.DeleteRequest{
|
||||
Model: newName,
|
||||
}
|
||||
t.Logf("deleting: %s -> %s", base, quant)
|
||||
if err := client.Delete(ctx, req); err != nil {
|
||||
t.Logf("failed to clean up %s: %s", req.Model, err)
|
||||
}
|
||||
}()
|
||||
// Check metadata on the model
|
||||
resp, err := client.Show(ctx, &api.ShowRequest{Name: newName})
|
||||
if err != nil {
|
||||
t.Fatalf("unable to show model: %s", err)
|
||||
}
|
||||
if !strings.Contains(resp.Details.QuantizationLevel, quant) {
|
||||
t.Fatalf("unexpected quantization for %s:\ngot: %s", newName, resp.Details.QuantizationLevel)
|
||||
}
|
||||
|
||||
stream := true
|
||||
genReq := api.GenerateRequest{
|
||||
Model: newName,
|
||||
Prompt: "why is the sky blue?",
|
||||
KeepAlive: &api.Duration{Duration: 3 * time.Second},
|
||||
Options: map[string]any{
|
||||
"seed": 42,
|
||||
"temperature": 0.0,
|
||||
},
|
||||
Stream: &stream,
|
||||
}
|
||||
t.Logf("verifying: %s -> %s", base, quant)
|
||||
|
||||
// Some smaller quantizations can cause models to have poor quality
|
||||
// or get stuck in repetition loops, so we stop as soon as we have any matches
|
||||
anyResp := []string{"rayleigh", "scattering", "day", "sun", "moon", "color", "nitrogen", "oxygen"}
|
||||
reqCtx, reqCancel := context.WithCancel(ctx)
|
||||
atLeastOne := false
|
||||
var buf bytes.Buffer
|
||||
genfn := func(response api.GenerateResponse) error {
|
||||
buf.Write([]byte(response.Response))
|
||||
fullResp := strings.ToLower(buf.String())
|
||||
for _, resp := range anyResp {
|
||||
if strings.Contains(fullResp, resp) {
|
||||
atLeastOne = true
|
||||
t.Log(fullResp)
|
||||
reqCancel()
|
||||
break
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
done := make(chan int)
|
||||
var genErr error
|
||||
go func() {
|
||||
genErr = client.Generate(reqCtx, &genReq, genfn)
|
||||
done <- 0
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
if genErr != nil && !atLeastOne {
|
||||
t.Fatalf("failed with %s request prompt %s ", genReq.Model, genReq.Prompt)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
t.Error("outer test context done while waiting for generate")
|
||||
}
|
||||
|
||||
t.Logf("passed")
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue