mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
parent
7d6eb0d4c3
commit
05cd82ef94
33 changed files with 94 additions and 94 deletions
|
@ -13,8 +13,8 @@ import (
|
|||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/app/lifecycle"
|
||||
"github.com/ollama/ollama/discover"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
|
@ -47,10 +47,10 @@ func TestLoad(t *testing.T) {
|
|||
sessionDuration: &api.Duration{Duration: 2 * time.Second},
|
||||
}
|
||||
// Fail to load model first
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
s.newServerFn = func(gpus discover.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
return nil, errors.New("something failed to load model blah")
|
||||
}
|
||||
gpus := gpu.GpuInfoList{}
|
||||
gpus := discover.GpuInfoList{}
|
||||
s.load(req, ggml, gpus, 0)
|
||||
require.Empty(t, req.successCh)
|
||||
require.Len(t, req.errCh, 1)
|
||||
|
@ -61,7 +61,7 @@ func TestLoad(t *testing.T) {
|
|||
require.Contains(t, err.Error(), "this model may be incompatible")
|
||||
|
||||
server := &mockLlm{estimatedVRAM: 10, estimatedVRAMByGPU: map[string]uint64{}}
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
s.newServerFn = func(gpus discover.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
return server, nil
|
||||
}
|
||||
s.load(req, ggml, gpus, 0)
|
||||
|
@ -102,7 +102,7 @@ type reqBundle struct {
|
|||
ggml *llm.GGML
|
||||
}
|
||||
|
||||
func (scenario *reqBundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
func (scenario *reqBundle) newServer(gpus discover.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
return scenario.srv, nil
|
||||
}
|
||||
|
||||
|
@ -151,18 +151,18 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
|
|||
return b
|
||||
}
|
||||
|
||||
func getGpuFn() gpu.GpuInfoList {
|
||||
g := gpu.GpuInfo{Library: "metal"}
|
||||
func getGpuFn() discover.GpuInfoList {
|
||||
g := discover.GpuInfo{Library: "metal"}
|
||||
g.TotalMemory = 24 * format.GigaByte
|
||||
g.FreeMemory = 12 * format.GigaByte
|
||||
return []gpu.GpuInfo{g}
|
||||
return []discover.GpuInfo{g}
|
||||
}
|
||||
|
||||
func getCpuFn() gpu.GpuInfoList {
|
||||
g := gpu.GpuInfo{Library: "cpu"}
|
||||
func getCpuFn() discover.GpuInfoList {
|
||||
g := discover.GpuInfo{Library: "cpu"}
|
||||
g.TotalMemory = 32 * format.GigaByte
|
||||
g.FreeMemory = 26 * format.GigaByte
|
||||
return []gpu.GpuInfo{g}
|
||||
return []discover.GpuInfo{g}
|
||||
}
|
||||
|
||||
func TestRequestsSameModelSameRequest(t *testing.T) {
|
||||
|
@ -420,9 +420,9 @@ func TestExpireRunner(t *testing.T) {
|
|||
}
|
||||
|
||||
var ggml *llm.GGML
|
||||
gpus := gpu.GpuInfoList{}
|
||||
gpus := discover.GpuInfoList{}
|
||||
server := &mockLlm{estimatedVRAM: 10, estimatedVRAMByGPU: map[string]uint64{}}
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
s.newServerFn = func(gpus discover.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
return server, nil
|
||||
}
|
||||
s.load(req, ggml, gpus, 0)
|
||||
|
@ -460,11 +460,11 @@ func TestPrematureExpired(t *testing.T) {
|
|||
// Same model, same request
|
||||
scenario1a := newScenarioRequest(t, ctx, "ollama-model-1a", 10, nil)
|
||||
s := InitScheduler(ctx)
|
||||
s.getGpuFn = func() gpu.GpuInfoList {
|
||||
g := gpu.GpuInfo{Library: "metal"}
|
||||
s.getGpuFn = func() discover.GpuInfoList {
|
||||
g := discover.GpuInfo{Library: "metal"}
|
||||
g.TotalMemory = 24 * format.GigaByte
|
||||
g.FreeMemory = 12 * format.GigaByte
|
||||
return []gpu.GpuInfo{g}
|
||||
return []discover.GpuInfo{g}
|
||||
}
|
||||
s.newServerFn = scenario1a.newServer
|
||||
successCh1a, errCh1a := s.GetRunner(scenario1a.ctx, scenario1a.req.model, scenario1a.req.opts, scenario1a.req.sessionDuration)
|
||||
|
@ -530,7 +530,7 @@ func TestUseLoadedRunner(t *testing.T) {
|
|||
func TestUpdateFreeSpace(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer done()
|
||||
gpus := gpu.GpuInfoList{
|
||||
gpus := discover.GpuInfoList{
|
||||
{
|
||||
Library: "a",
|
||||
ID: "1",
|
||||
|
@ -563,7 +563,7 @@ func TestUpdateFreeSpace(t *testing.T) {
|
|||
func TestFilterGPUsWithoutLoadingModels(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer done()
|
||||
gpus := gpu.GpuInfoList{
|
||||
gpus := discover.GpuInfoList{
|
||||
{
|
||||
Library: "cuda",
|
||||
ID: "0",
|
||||
|
@ -573,7 +573,7 @@ func TestFilterGPUsWithoutLoadingModels(t *testing.T) {
|
|||
ID: "1",
|
||||
},
|
||||
}
|
||||
r1 := &runnerRef{gpus: gpu.GpuInfoList{gpus[0]}, loading: true}
|
||||
r1 := &runnerRef{gpus: discover.GpuInfoList{gpus[0]}, loading: true}
|
||||
|
||||
s := InitScheduler(ctx)
|
||||
s.loadedMu.Lock()
|
||||
|
@ -584,12 +584,12 @@ func TestFilterGPUsWithoutLoadingModels(t *testing.T) {
|
|||
require.Len(t, tmp, 1)
|
||||
require.Equal(t, "1", tmp[0].ID)
|
||||
|
||||
r1.gpus = gpu.GpuInfoList{gpus[1]}
|
||||
r1.gpus = discover.GpuInfoList{gpus[1]}
|
||||
tmp = s.filterGPUsWithoutLoadingModels(gpus)
|
||||
require.Len(t, tmp, 1)
|
||||
require.Equal(t, "0", tmp[0].ID)
|
||||
|
||||
r1.gpus = gpu.GpuInfoList{}
|
||||
r1.gpus = discover.GpuInfoList{}
|
||||
tmp = s.filterGPUsWithoutLoadingModels(gpus)
|
||||
require.Len(t, tmp, 2)
|
||||
}
|
||||
|
@ -715,9 +715,9 @@ func TestHomogeneousGPUs(t *testing.T) {
|
|||
defer done()
|
||||
s := InitScheduler(ctx)
|
||||
|
||||
s.getGpuFn = func() gpu.GpuInfoList {
|
||||
s.getGpuFn = func() discover.GpuInfoList {
|
||||
// Set memory values to require the model to be spread
|
||||
gpus := []gpu.GpuInfo{
|
||||
gpus := []discover.GpuInfo{
|
||||
{Library: "cuda"},
|
||||
{Library: "rocm"},
|
||||
}
|
||||
|
@ -729,7 +729,7 @@ func TestHomogeneousGPUs(t *testing.T) {
|
|||
}
|
||||
s.getCpuFn = getCpuFn
|
||||
a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond})
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
s.newServerFn = func(gpus discover.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
require.Len(t, gpus, 1)
|
||||
return a.newServer(gpus, model, ggml, adapters, projectors, opts, numParallel)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue