mirror of
https://github.com/ollama/ollama.git
synced 2025-05-10 18:06:33 +02:00
wip: write tensors in parallel
This commit is contained in:
parent
b0f28d178a
commit
34ae8077d1
2 changed files with 41 additions and 38 deletions
|
@ -4,9 +4,9 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
|
@ -84,12 +84,12 @@ func (ModelParameters) specialTokenTypes() []string {
|
|||
}
|
||||
}
|
||||
|
||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(ws, kv, ts)
|
||||
func (ModelParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(f, kv, ts)
|
||||
}
|
||||
|
||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(ws, kv, ts)
|
||||
func (AdapterParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(f, kv, ts)
|
||||
}
|
||||
|
||||
type ModelConverter interface {
|
||||
|
@ -104,7 +104,7 @@ type ModelConverter interface {
|
|||
// specialTokenTypes returns any special token types the model uses
|
||||
specialTokenTypes() []string
|
||||
// writeFile writes the model to the provided io.WriteSeeker
|
||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
||||
writeFile(*os.File, ggml.KV, []ggml.Tensor) error
|
||||
}
|
||||
|
||||
type moreParser interface {
|
||||
|
@ -120,10 +120,10 @@ type AdapterConverter interface {
|
|||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
|
||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
||||
writeFile(*os.File, ggml.KV, []ggml.Tensor) error
|
||||
}
|
||||
|
||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
||||
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
|
||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -158,14 +158,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
|||
return err
|
||||
}
|
||||
|
||||
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
||||
return conv.writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
|
||||
}
|
||||
|
||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||
// and files it finds in the input path.
|
||||
// Supported input model formats include safetensors.
|
||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
func ConvertModel(fsys fs.FS, f *os.File) error {
|
||||
bts, err := fs.ReadFile(fsys, "config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -248,5 +248,5 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
|||
return err
|
||||
}
|
||||
|
||||
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
|
||||
return conv.writeFile(f, conv.KV(t), conv.Tensors(ts))
|
||||
}
|
||||
|
|
|
@ -9,8 +9,12 @@ import (
|
|||
"io"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"os"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
|
@ -502,22 +506,22 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
|||
return binary.Write(w, binary.LittleEndian, s)
|
||||
}
|
||||
|
||||
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
func WriteGGUF(f *os.File, kv KV, ts []Tensor) error {
|
||||
alignment := kv.Uint("general.alignment", 32)
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||
if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil {
|
||||
if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil {
|
||||
if err := binary.Write(f, binary.LittleEndian, uint64(len(ts))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil {
|
||||
if err := binary.Write(f, binary.LittleEndian, uint64(len(kv))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -525,7 +529,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|||
slices.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
||||
if err := ggufWriteKV(ws, key, kv[key]); err != nil {
|
||||
if err := ggufWriteKV(f, key, kv[key]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -541,21 +545,34 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|||
})
|
||||
|
||||
var s uint64
|
||||
for _, t := range ts {
|
||||
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
||||
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
||||
for i := range ts {
|
||||
ts[i].Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
||||
if err := ggufWriteTensorInfo(f, ts[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
s += t.Size()
|
||||
s += ts[i].Size()
|
||||
}
|
||||
|
||||
offset, err := f.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
offset += ggufPadding(offset, int64(alignment))
|
||||
slog.Debug("gguf", "offset", offset, "size", s, "alignment", alignment)
|
||||
|
||||
var g errgroup.Group
|
||||
g.SetLimit(runtime.GOMAXPROCS(0))
|
||||
for _, t := range ts {
|
||||
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil {
|
||||
t := t
|
||||
w := io.NewOffsetWriter(f, offset+int64(t.Offset))
|
||||
g.Go(func() error {
|
||||
_, err := t.WriteTo(w)
|
||||
return err
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
return g.Wait()
|
||||
}
|
||||
|
||||
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
|
||||
|
@ -640,20 +657,6 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
|
|||
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
||||
}
|
||||
|
||||
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
|
||||
offset, err := ws.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = t.WriteTo(ws)
|
||||
return err
|
||||
}
|
||||
|
||||
func ggufPadding(offset, align int64) int64 {
|
||||
return (align - offset%align) % align
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue