From 34ae8077d1ea741e1a4b934e53636d26459d4f16 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 21 Apr 2025 14:40:28 -0700 Subject: [PATCH] wip: write tensors in parallel --- convert/convert.go | 22 +++++++++--------- fs/ggml/gguf.go | 57 ++++++++++++++++++++++++---------------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index 26bc72cc2..773a17d68 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -4,9 +4,9 @@ import ( "encoding/json" "errors" "fmt" - "io" "io/fs" "log/slog" + "os" "strings" "github.com/ollama/ollama/fs/ggml" @@ -84,12 +84,12 @@ func (ModelParameters) specialTokenTypes() []string { } } -func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error { - return ggml.WriteGGUF(ws, kv, ts) +func (ModelParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error { + return ggml.WriteGGUF(f, kv, ts) } -func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error { - return ggml.WriteGGUF(ws, kv, ts) +func (AdapterParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error { + return ggml.WriteGGUF(f, kv, ts) } type ModelConverter interface { @@ -104,7 +104,7 @@ type ModelConverter interface { // specialTokenTypes returns any special token types the model uses specialTokenTypes() []string // writeFile writes the model to the provided io.WriteSeeker - writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error + writeFile(*os.File, ggml.KV, []ggml.Tensor) error } type moreParser interface { @@ -120,10 +120,10 @@ type AdapterConverter interface { // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details Replacements() []string - writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error + writeFile(*os.File, ggml.KV, []ggml.Tensor) error } -func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error { +func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error { bts, err := fs.ReadFile(fsys, "adapter_config.json") if err != nil { return err @@ -158,14 +158,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error { return err } - return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts)) + return conv.writeFile(f, conv.KV(baseKV), conv.Tensors(ts)) } // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations // and files it finds in the input path. // Supported input model formats include safetensors. // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. -func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { +func ConvertModel(fsys fs.FS, f *os.File) error { bts, err := fs.ReadFile(fsys, "config.json") if err != nil { return err @@ -248,5 +248,5 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { return err } - return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) + return conv.writeFile(f, conv.KV(t), conv.Tensors(ts)) } diff --git a/fs/ggml/gguf.go b/fs/ggml/gguf.go index 0c6a8eb2c..4b0a80277 100644 --- a/fs/ggml/gguf.go +++ b/fs/ggml/gguf.go @@ -9,8 +9,12 @@ import ( "io" "log/slog" "maps" + "os" + "runtime" "slices" "strings" + + "golang.org/x/sync/errgroup" ) type containerGGUF struct { @@ -502,22 +506,22 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { return binary.Write(w, binary.LittleEndian, s) } -func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { +func WriteGGUF(f *os.File, kv KV, ts []Tensor) error { alignment := kv.Uint("general.alignment", 32) - if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { + if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil { return err } - if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil { + if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil { return err } - if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil { + if err := binary.Write(f, binary.LittleEndian, uint64(len(ts))); err != nil { return err } - if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil { + if err := binary.Write(f, binary.LittleEndian, uint64(len(kv))); err != nil { return err } @@ -525,7 +529,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { slices.Sort(keys) for _, key := range keys { - if err := ggufWriteKV(ws, key, kv[key]); err != nil { + if err := ggufWriteKV(f, key, kv[key]); err != nil { return err } } @@ -541,21 +545,34 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { }) var s uint64 - for _, t := range ts { - t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment))) - if err := ggufWriteTensorInfo(ws, t); err != nil { + for i := range ts { + ts[i].Offset = s + uint64(ggufPadding(int64(s), int64(alignment))) + if err := ggufWriteTensorInfo(f, ts[i]); err != nil { return err } - s += t.Size() + s += ts[i].Size() } + offset, err := f.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + offset += ggufPadding(offset, int64(alignment)) + slog.Debug("gguf", "offset", offset, "size", s, "alignment", alignment) + + var g errgroup.Group + g.SetLimit(runtime.GOMAXPROCS(0)) for _, t := range ts { - if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil { + t := t + w := io.NewOffsetWriter(f, offset+int64(t.Offset)) + g.Go(func() error { + _, err := t.WriteTo(w) return err - } + }) } - return nil + return g.Wait() } func ggufWriteKV(ws io.WriteSeeker, k string, v any) error { @@ -640,20 +657,6 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error { return binary.Write(ws, binary.LittleEndian, t.Offset) } -func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error { - offset, err := ws.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - - if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil { - return err - } - - _, err = t.WriteTo(ws) - return err -} - func ggufPadding(offset, align int64) int64 { return (align - offset%align) % align }