mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
feat: add new Ollama engine using ggml through cgo This change introduces a new way to run pretrained models. It introduces 3 high level interfaces and a bunch of smaller helper interfaces to facilitate this. - `model.Model` defines the interface for a model architecture. Models such as `llama` and `mllama`, which are provided as examples, can implement the model's forward propagation in the `Forward` method. This method will be called to generate completions. This interface can be found in `model/model.go` - `ml.Backend` defines the interface for a backend tensor library, in this case `ggml`. Among other things, a Backend is responsible for loading a pretrained model into hardware (GPU, CPU, etc) and providing an interface for Models to access loaded tensors. This interface can be found in `ml/backend.go` - `ml.Tensor` defines the interface for a tensor and tensor operations This is the first implementation of the new engine. Follow up PRs will implement more features: - non-greedy sampling (#8410) - integration with Ollama and KV caching (#8301) - more model support (#9080) with more coming soon Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
34 lines
590 B
Go
34 lines
590 B
Go
package bufioutil
|
|
|
|
import (
|
|
"bufio"
|
|
"io"
|
|
)
|
|
|
|
type BufferedSeeker struct {
|
|
rs io.ReadSeeker
|
|
br *bufio.Reader
|
|
}
|
|
|
|
func NewBufferedSeeker(rs io.ReadSeeker, size int) *BufferedSeeker {
|
|
return &BufferedSeeker{
|
|
rs: rs,
|
|
br: bufio.NewReaderSize(rs, size),
|
|
}
|
|
}
|
|
|
|
func (b *BufferedSeeker) Read(p []byte) (int, error) {
|
|
return b.br.Read(p)
|
|
}
|
|
|
|
func (b *BufferedSeeker) Seek(offset int64, whence int) (int64, error) {
|
|
if whence == io.SeekCurrent {
|
|
offset -= int64(b.br.Buffered())
|
|
}
|
|
n, err := b.rs.Seek(offset, whence)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
b.br.Reset(b.rs)
|
|
return n, nil
|
|
}
|