next ollama runner (#7913)

feat: add new Ollama engine using ggml through cgo This change introduces a new way to run pretrained models. It introduces 3 high level interfaces and a bunch of smaller helper interfaces to facilitate this. - `model.Model` defines the interface for a model architecture. Models such as `llama` and `mllama`, which are provided as examples, can implement the model's forward propagation in the `Forward` method. This method will be called to generate completions. This interface can be found in `model/model.go` - `ml.Backend` defines the interface for a backend tensor library, in this case `ggml`. Among other things, a Backend is responsible for loading a pretrained model into hardware (GPU, CPU, etc) and providing an interface for Models to access loaded tensors. This interface can be found in `ml/backend.go` - `ml.Tensor` defines the interface for a tensor and tensor operations This is the first implementation of the new engine. Follow up PRs will implement more features: - non-greedy sampling (#8410) - integration with Ollama and KV caching (#8301) - more model support (#9080) with more coming soon Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2025-05-11 18:36:41 +02:00 · 2025-02-14 00:31:21 +00:00 · 2025-02-14 00:31:21 +00:00 · 58245413f4
commit 58245413f4
parent 8cf16063a5
57 changed files with 475427 additions and 494 deletions
--- a/model/model_test.go
+++ b/model/model_test.go
@ -0,0 +1,136 @@
+package model
+
+import (
+	"reflect"
+	"slices"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/ml/backend/ggml"
+	"github.com/ollama/ollama/ml/nn"
+)
+
+func TestParseTags(t *testing.T) {
+	cases := []struct {
+		value string
+		want  Tag
+	}{
+		{
+			value: "output",
+			want: Tag{
+				Name: "output",
+			},
+		},
+		{
+			value: "output,alt:token_embd",
+			want: Tag{
+				Name: "output",
+				Alternate: []string{
+					"token_embd",
+				},
+			},
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.value, func(t *testing.T) {
+			got := ParseTags(tt.value)
+			if diff := cmp.Diff(tt.want, got); diff != "" {
+				t.Errorf("ParseTags() returned unexpected values (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+type fakeBackend struct {
+	*ggml.Backend
+	names []string
+}
+
+type fakeTensor struct {
+	*ggml.Tensor
+	Name string
+}
+
+func (m *fakeBackend) Get(name string) ml.Tensor {
+	if slices.Contains(m.names, name) {
+		return &fakeTensor{Name: name}
+	}
+
+	return nil
+}
+
+func TestPopulateFields(t *testing.T) {
+	type fakeLayer struct {
+		Query  *nn.Linear `gguf:"attn_q"`
+		Key    *nn.Linear `gguf:"attn_k"`
+		Value  *nn.Linear `gguf:"attn_v"`
+		Output *nn.Linear `gguf:"attn_o"`
+	}
+
+	type fakeModel struct {
+		Input      *nn.Embedding `gguf:"input"`
+		OutputNorm *nn.RMSNorm   `gguf:"output_norm"`
+		Output     *nn.Linear    `gguf:"output"`
+		Layers     [2]fakeLayer  `gguf:"blk"`
+	}
+
+	var m fakeModel
+	v := reflect.ValueOf(&m)
+	v.Elem().Set(populateFields(&fakeBackend{
+		names: []string{
+			"input.weight",
+			"blk.0.attn_q.weight",
+			"blk.0.attn_k.weight",
+			"blk.0.attn_v.weight",
+			"blk.1.attn_q.weight",
+			"blk.1.attn_k.weight",
+			"blk.1.attn_v.weight",
+			"output_norm.weight",
+			"output.weight",
+		},
+	}, v))
+
+	if diff := cmp.Diff(fakeModel{
+		Input:      &nn.Embedding{Weight: &fakeTensor{Name: "input.weight"}},
+		OutputNorm: &nn.RMSNorm{Weight: &fakeTensor{Name: "output_norm.weight"}},
+		Output:     &nn.Linear{Weight: &fakeTensor{Name: "output.weight"}},
+		Layers: [2]fakeLayer{
+			{
+				Query: &nn.Linear{Weight: &fakeTensor{Name: "blk.0.attn_q.weight"}},
+				Key:   &nn.Linear{Weight: &fakeTensor{Name: "blk.0.attn_k.weight"}},
+				Value: &nn.Linear{Weight: &fakeTensor{Name: "blk.0.attn_v.weight"}},
+			},
+			{
+				Query: &nn.Linear{Weight: &fakeTensor{Name: "blk.1.attn_q.weight"}},
+				Key:   &nn.Linear{Weight: &fakeTensor{Name: "blk.1.attn_k.weight"}},
+				Value: &nn.Linear{Weight: &fakeTensor{Name: "blk.1.attn_v.weight"}},
+			},
+		},
+	}, m); diff != "" {
+		t.Errorf("populateFields() set incorrect values (-want +got):\n%s", diff)
+	}
+}
+
+func TestPopulateFieldsAlternateName(t *testing.T) {
+	type fakeModel struct {
+		Input  *nn.Embedding `gguf:"input"`
+		Output *nn.Linear    `gguf:"output,alt:input"`
+	}
+
+	m := fakeModel{}
+	v := reflect.ValueOf(&m)
+	v.Elem().Set(populateFields(&fakeBackend{
+		names: []string{
+			"input.weight",
+		},
+	}, v))
+
+	if diff := cmp.Diff(fakeModel{
+		Input:  &nn.Embedding{Weight: &fakeTensor{Name: "input.weight"}},
+		Output: &nn.Linear{Weight: &fakeTensor{Name: "input.weight"}},
+	}, m); diff != "" {
+		t.Errorf("populateFields() set incorrect values (-want +got):\n%s", diff)
+	}
+}