ollama/server/model_loader.go
ParthSareen 11acb85ff3 WIP
2024-12-17 16:54:47 -08:00

67 lines
1.6 KiB
Go

package server
import (
"fmt"
"sync"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/types/model"
)
type loadedModel struct {
model llama.Model
modelPath string
}
type modelLoader struct {
cache sync.Map
}
// modelCache stores loaded models keyed by their full path and params hash
var modelCache sync.Map // map[string]*loadedModel
func (ml *modelLoader) LoadModel(name string, params llama.ModelParams) (*loadedModel, error) {
modelName := model.ParseName(name)
if !modelName.IsValid() {
return nil, fmt.Errorf("invalid model name: %s", modelName)
}
modelPath, err := GetModel(modelName.String())
if err != nil {
return nil, fmt.Errorf("model not found: %s", modelName)
}
// Create cache key from model path and params hash
cacheKey := fmt.Sprintf("%s-%+v", modelPath.ModelPath, params)
if cached, ok := modelCache.Load(cacheKey); ok {
return cached.(*loadedModel), nil
}
// Evict existing model if any
ml.evictExistingModel()
model, err := llama.LoadModelFromFile(modelPath.ModelPath, params)
if err != nil {
return nil, fmt.Errorf("failed to load model: %v", err)
}
loaded := &loadedModel{
model: *model,
modelPath: modelPath.ModelPath,
}
modelCache.Store(cacheKey, loaded)
return loaded, nil
}
// evictExistingModel removes any currently loaded model from the cache
// Currently only supports a single model in cache at a time
// TODO: Add proper cache eviction policy (LRU/size/TTL based)
func (ml *modelLoader) evictExistingModel() {
ml.cache.Range(func(key, value any) bool {
if cached, ok := ml.cache.LoadAndDelete(key); ok {
llama.FreeModel(&cached.(*loadedModel).model)
}
return true
})
}