mirror of
https://github.com/ollama/ollama.git
synced 2025-05-17 06:55:09 +02:00
The sliding window cache trims entries that are outside the window for the latest token. This works when we are extending the cache, such as when the conversation continues. However, if we have a partial overlap in conversation (including the BOS tokens), then we resume from a past point in the conversation and the needed tokens are no longer stored in memory. This verifies that the new window overlaps with the old one before reusing the cache. Co-authored-by: Jesse Gross <jesse@ollama.com>
519 lines
13 KiB
Go
519 lines
13 KiB
Go
package ollamarunner
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"image"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/ollama/ollama/ml"
|
|
"github.com/ollama/ollama/model/input"
|
|
)
|
|
|
|
func TestCountCommon(t *testing.T) {
|
|
imgA := image.NewRGBA(image.Rect(0, 0, 100, 100))
|
|
imgB := image.NewRGBA(image.Rect(0, 0, 50, 50))
|
|
imgC := image.NewRGBA(image.Rect(50, 50, 100, 100))
|
|
|
|
tests := []struct {
|
|
name string
|
|
t1 []input.Input
|
|
t2 []input.Input
|
|
expected int32
|
|
}{
|
|
{
|
|
name: "Equal",
|
|
t1: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
t2: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
expected: 3,
|
|
},
|
|
{
|
|
name: "Prefix",
|
|
t1: []input.Input{{Token: 1}},
|
|
t2: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
expected: 1,
|
|
},
|
|
{
|
|
name: "Image Prefix",
|
|
t1: []input.Input{{Multimodal: imgA, MultimodalHash: 1}},
|
|
t2: []input.Input{{Multimodal: imgA, MultimodalHash: 1}, {Multimodal: imgB, MultimodalHash: 2}, {Multimodal: imgC, MultimodalHash: 3}},
|
|
expected: 1,
|
|
},
|
|
{
|
|
name: "Mixed",
|
|
t1: []input.Input{{Token: 1}, {Multimodal: imgA, MultimodalHash: 1}},
|
|
t2: []input.Input{{Token: 1}, {Multimodal: imgA, MultimodalHash: 1}, {Token: 5}},
|
|
expected: 2,
|
|
},
|
|
{
|
|
name: "Mixed, Same Length",
|
|
t1: []input.Input{{Token: 1}, {Multimodal: imgA, MultimodalHash: 1}},
|
|
t2: []input.Input{{Token: 1}, {Multimodal: imgB, MultimodalHash: 2}},
|
|
expected: 1,
|
|
},
|
|
{
|
|
name: "Empty",
|
|
t1: []input.Input{},
|
|
t2: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
expected: 0,
|
|
},
|
|
{
|
|
name: "Both Empty",
|
|
t1: []input.Input{},
|
|
t2: []input.Input{},
|
|
expected: 0,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result := countCommonPrefix(tt.t1, tt.t2)
|
|
if result != tt.expected {
|
|
t.Errorf("countCommonPrefix(%v, %v): have %v; want %v", tt.t1, tt.t2, result, tt.expected)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestFindCacheSlot(t *testing.T) {
|
|
type expected struct {
|
|
result int
|
|
len int32
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
cache InputCache
|
|
prompt []input.Input
|
|
longest expected
|
|
best expected
|
|
}{
|
|
{
|
|
name: "Empty",
|
|
cache: InputCache{slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{},
|
|
InUse: false,
|
|
lastUsed: time.Time{},
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{},
|
|
InUse: false,
|
|
lastUsed: time.Time{},
|
|
},
|
|
}},
|
|
prompt: []input.Input{{Token: 1}},
|
|
longest: expected{result: 0, len: 0},
|
|
best: expected{result: 0, len: 0},
|
|
},
|
|
{
|
|
name: "Extend",
|
|
cache: InputCache{slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-2 * time.Second),
|
|
},
|
|
}},
|
|
prompt: []input.Input{{Token: 1}, {Token: 2}},
|
|
longest: expected{result: 1, len: 2},
|
|
best: expected{result: 1, len: 2},
|
|
},
|
|
{
|
|
name: "New",
|
|
cache: InputCache{slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{},
|
|
InUse: false,
|
|
lastUsed: time.Time{},
|
|
},
|
|
}},
|
|
prompt: []input.Input{{Token: 2}},
|
|
longest: expected{result: 0, len: 0},
|
|
best: expected{result: 1, len: 0},
|
|
},
|
|
{
|
|
name: "Fork",
|
|
cache: InputCache{
|
|
slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{},
|
|
InUse: false,
|
|
lastUsed: time.Time{},
|
|
},
|
|
},
|
|
},
|
|
prompt: []input.Input{{Token: 1}},
|
|
longest: expected{result: 0, len: 1},
|
|
best: expected{result: 1, len: 1},
|
|
},
|
|
{
|
|
name: "Evict",
|
|
cache: InputCache{slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-2 * time.Second),
|
|
},
|
|
}},
|
|
prompt: []input.Input{{Token: 2}, {Token: 3}},
|
|
longest: expected{result: 0, len: 0},
|
|
best: expected{result: 1, len: 0},
|
|
},
|
|
{
|
|
name: "In use",
|
|
cache: InputCache{slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: true,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{{Token: 1}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-2 * time.Second),
|
|
},
|
|
}},
|
|
prompt: []input.Input{{Token: 1}, {Token: 2}},
|
|
longest: expected{result: 1, len: 1},
|
|
best: expected{result: 1, len: 2},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run("Longest-"+tt.name, func(t *testing.T) {
|
|
result, resultLen, err := tt.cache.findLongestCacheSlot(tt.prompt)
|
|
if err != nil {
|
|
t.Errorf("findLongestCacheSlot: err %v", err)
|
|
} else if result.Id != tt.longest.result || resultLen != tt.longest.len {
|
|
t.Errorf("findLongestCacheSlot: slot have %v, want %v len have %v, want %v",
|
|
result.Id, tt.longest.result, resultLen, tt.longest.len)
|
|
}
|
|
})
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run("Best-"+tt.name, func(t *testing.T) {
|
|
result, resultLen, err := tt.cache.findBestCacheSlot(tt.prompt)
|
|
if err != nil {
|
|
t.Errorf("findBestCacheSlot: err %v", err)
|
|
} else if result.Id != tt.best.result || resultLen != tt.best.len {
|
|
t.Errorf("findBestCacheSlot: slot have %v, want %v len have %v, want %v",
|
|
result.Id, tt.best.result, resultLen, tt.best.len)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestShiftDiscard(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
numCtx int32
|
|
numKeep int32
|
|
inputLen int32
|
|
expected int32
|
|
}{
|
|
{
|
|
name: "Shift",
|
|
numCtx: 2048,
|
|
numKeep: 5,
|
|
inputLen: 2048,
|
|
expected: 1021,
|
|
},
|
|
{
|
|
name: "Max Keep",
|
|
numCtx: 2048,
|
|
numKeep: 2047,
|
|
inputLen: 2048,
|
|
expected: 1,
|
|
},
|
|
{
|
|
name: "No Keep",
|
|
numCtx: 2048,
|
|
numKeep: 0,
|
|
inputLen: 2048,
|
|
expected: 1024,
|
|
},
|
|
{
|
|
name: "Truncate",
|
|
numCtx: 2048,
|
|
numKeep: 5,
|
|
inputLen: 5000,
|
|
expected: 3973,
|
|
},
|
|
{
|
|
name: "Truncate Keep",
|
|
numCtx: 2048,
|
|
numKeep: 2047,
|
|
inputLen: 5000,
|
|
expected: 2953,
|
|
},
|
|
{
|
|
name: "No Op",
|
|
numCtx: 2048,
|
|
numKeep: 5,
|
|
inputLen: 512,
|
|
expected: 0,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
c := InputCache{numCtx: tt.numCtx}
|
|
result := c.ShiftDiscard(tt.inputLen, tt.numKeep)
|
|
if result != tt.expected {
|
|
t.Errorf("shiftDiscard(ctx: %v, keep: %v input: %v): have %v; want %v", tt.numCtx, tt.numKeep, tt.inputLen, result, tt.expected)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestLoadCacheSlot(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
cache InputCache
|
|
prompt []input.Input
|
|
wantErr bool
|
|
expectedSlotId int
|
|
expectedPrompt int // expected length of remaining prompt
|
|
}{
|
|
{
|
|
name: "Basic cache hit - single user",
|
|
cache: InputCache{
|
|
multiUserCache: false,
|
|
slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-2 * time.Second),
|
|
},
|
|
},
|
|
},
|
|
prompt: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
wantErr: false,
|
|
expectedSlotId: 0,
|
|
expectedPrompt: 1, // Only token 3 remains
|
|
},
|
|
{
|
|
name: "Basic cache hit - multi user",
|
|
cache: InputCache{
|
|
multiUserCache: true,
|
|
slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
{
|
|
Id: 1,
|
|
Inputs: []input.Input{},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-2 * time.Second),
|
|
},
|
|
},
|
|
},
|
|
prompt: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
wantErr: false,
|
|
expectedSlotId: 0,
|
|
expectedPrompt: 1, // Only token 3 remains
|
|
},
|
|
{
|
|
name: "Exact match - leave one input",
|
|
cache: InputCache{
|
|
multiUserCache: false,
|
|
slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: false,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
},
|
|
},
|
|
prompt: []input.Input{{Token: 1}, {Token: 2}},
|
|
wantErr: false,
|
|
expectedSlotId: 0,
|
|
expectedPrompt: 1, // Should leave 1 token for sampling
|
|
},
|
|
{
|
|
name: "No available slots",
|
|
cache: InputCache{
|
|
multiUserCache: false,
|
|
slots: []InputCacheSlot{
|
|
{
|
|
Id: 0,
|
|
Inputs: []input.Input{{Token: 1}, {Token: 2}},
|
|
InUse: true,
|
|
lastUsed: time.Now().Add(-time.Second),
|
|
},
|
|
},
|
|
},
|
|
prompt: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}},
|
|
wantErr: true,
|
|
expectedSlotId: -1,
|
|
expectedPrompt: -1,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
slot, remainingPrompt, err := tt.cache.LoadCacheSlot(tt.prompt)
|
|
|
|
// Check error state
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("LoadCacheSlot() error = %v, wantErr %v", err, tt.wantErr)
|
|
return
|
|
}
|
|
|
|
if tt.wantErr {
|
|
return // Skip further checks if we expected an error
|
|
}
|
|
|
|
// Verify slot ID
|
|
if slot.Id != tt.expectedSlotId {
|
|
t.Errorf("LoadCacheSlot() slot ID = %v, expected %v", slot.Id, tt.expectedSlotId)
|
|
}
|
|
|
|
// Verify slot is now marked in use
|
|
if !slot.InUse {
|
|
t.Errorf("LoadCacheSlot() slot not marked InUse")
|
|
}
|
|
|
|
// Verify remaining prompt length
|
|
if len(remainingPrompt) != tt.expectedPrompt {
|
|
t.Errorf("LoadCacheSlot() remaining prompt length = %v, expected %v",
|
|
len(remainingPrompt), tt.expectedPrompt)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// Mock implementation of the Cache interface
|
|
type mockCache struct {
|
|
shouldFail bool
|
|
}
|
|
|
|
// Implement only the methods needed for the test
|
|
func (m *mockCache) Remove(seq int, beginIndex, endIndex int32) error {
|
|
if m.shouldFail {
|
|
return fmt.Errorf("mock cache removal error")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Stub implementations for other interface methods
|
|
func (m *mockCache) SetLayer(layer int) {}
|
|
func (m *mockCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) { return nil, nil, nil }
|
|
func (m *mockCache) Put(ctx ml.Context, key, value ml.Tensor) {}
|
|
func (m *mockCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {}
|
|
func (m *mockCache) Close() {}
|
|
func (m *mockCache) StartForward(ctx ml.Context, batch input.Batch) error { return nil }
|
|
func (m *mockCache) CopyPrefix(srcSeq, dstSeq int, len int32) {}
|
|
func (m *mockCache) SetConfig(ml.CacheConfig) {}
|
|
func (m *mockCache) CanResume(seq int, pos int32) bool { return true }
|
|
|
|
func TestShiftCacheSlot(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
numCtx int32
|
|
inputs []input.Input
|
|
numKeep int32
|
|
cacheErr bool
|
|
wantErr any
|
|
wantInputsLen int
|
|
}{
|
|
{
|
|
name: "Normal shift",
|
|
numCtx: 10,
|
|
inputs: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}, {Token: 4}, {Token: 5}, {Token: 6}, {Token: 7}, {Token: 8}, {Token: 9}, {Token: 10}},
|
|
numKeep: 2,
|
|
cacheErr: false, // No error
|
|
wantErr: nil,
|
|
wantInputsLen: 6, // After discarding 4 tokens
|
|
},
|
|
{
|
|
name: "Cache removal fails",
|
|
numCtx: 10,
|
|
inputs: []input.Input{{Token: 1}, {Token: 2}, {Token: 3}, {Token: 4}, {Token: 5}, {Token: 6}, {Token: 7}, {Token: 8}, {Token: 9}, {Token: 10}},
|
|
numKeep: 2,
|
|
cacheErr: true,
|
|
wantErr: &ErrReprocessInputs{},
|
|
wantInputsLen: 0, // Original inputs should be cleared
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
mock := &mockCache{shouldFail: tt.cacheErr}
|
|
c := InputCache{
|
|
numCtx: tt.numCtx,
|
|
cache: mock,
|
|
}
|
|
slot := &InputCacheSlot{
|
|
Id: 123,
|
|
Inputs: make([]input.Input, len(tt.inputs)),
|
|
}
|
|
copy(slot.Inputs, tt.inputs)
|
|
|
|
err := c.ShiftCacheSlot(slot, tt.numKeep)
|
|
|
|
if tt.wantErr != nil {
|
|
if err == nil {
|
|
t.Errorf("Expected error but got nil")
|
|
return
|
|
}
|
|
|
|
if !errors.As(err, &tt.wantErr) {
|
|
t.Errorf("Expected error of type %T but got %T: %v", tt.wantErr, err, err)
|
|
}
|
|
} else if err != nil {
|
|
t.Errorf("Unexpected error: %v", err)
|
|
}
|
|
|
|
if len(slot.Inputs) != tt.wantInputsLen {
|
|
t.Errorf("Slot inputs length after operation: got %v, want %v", len(slot.Inputs), tt.wantInputsLen)
|
|
}
|
|
})
|
|
}
|
|
}
|