llama: Improve error handling

Check for NULL return values from llama.cpp in more places and convert them into Go errors, which should make debugging easier in the future rather than having hidden surprises in our data structures.
2025-05-11 18:36:41 +02:00 · 2024-11-01 15:50:53 -07:00 · 2024-11-01 15:50:53 -07:00 · 312d9de1d1
commit 312d9de1d1
parent a103dae01e
4 changed files with 98 additions and 40 deletions
--- a/llama/llama.go
+++ b/llama/llama.go
@ -88,6 +88,7 @@ import (
 	"fmt"
 	"runtime"
 	"runtime/cgo"
+	"slices"
 	"strings"
 	"unsafe"
 )
@ -260,7 +261,7 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) {
 	}

 	m := Model{c: C.llama_load_model_from_file(C.CString(modelPath), cparams)}
-	if m.c == (*C.struct_llama_model)(C.NULL) {
+	if m.c == nil {
 		return nil, fmt.Errorf("unable to load model: %s", modelPath)
 	}

@ -276,7 +277,7 @@ func NewContextWithModel(model *Model, params ContextParams) (*Context, error) {
 		c:          C.llama_new_context_with_model(model.c, params.c),
 		numThreads: int(params.c.n_threads),
 	}
-	if c.c == (*C.struct_llama_context)(C.NULL) {
+	if c.c == nil {
 		return nil, errors.New("unable to create llama context")
 	}

@ -300,6 +301,9 @@ func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float
 	defer C.free(unsafe.Pointer(cLoraPath))

 	loraAdapter := C.llama_lora_adapter_init(m.c, cLoraPath)
+	if loraAdapter == nil {
+		return errors.New("unable to load lora")
+	}

 	err := -1
 	if loraAdapter != nil {
@ -322,13 +326,25 @@ type Batch struct {
 // Creates a new batch for either word tokens or image embeddings (if embedSize is non-zero).
 // Batches cannot contain both types at the same time. batchSize is the maximum number of entries
 // that can be added per sequence
-func NewBatch(batchSize int, maxSeq int, embedSize int) *Batch {
-	return &Batch{
+func NewBatch(batchSize int, maxSeq int, embedSize int) (*Batch, error) {
+	b := Batch{
 		c:         C.llama_batch_init(C.int(batchSize*maxSeq), C.int(embedSize), C.int(maxSeq)),
 		batchSize: batchSize,
 		maxSeq:    maxSeq,
 		embedSize: embedSize,
 	}
+
+	// Check to see if any of the allocations in llama_batch_init() failed
+	nilPointer := (embedSize == 0 && b.c.token == nil) || (embedSize != 0 && b.c.embd == nil) ||
+		b.c.pos == nil || b.c.n_seq_id == nil || b.c.seq_id == nil || b.c.logits == nil ||
+		slices.Contains(unsafe.Slice(b.c.seq_id, b.allocSize()), nil)
+
+	if nilPointer {
+		C.llama_batch_free(b.c)
+		return nil, fmt.Errorf("unable to allocate batch (batchSize=%v maxSeq=%v embedSize=%v)", batchSize, maxSeq, embedSize)
+	}
+
+	return &b, nil
 }

 func (b *Batch) Size() int {
@ -484,6 +500,9 @@ func NewClipContext(llamaContext *Context, modelPath string) (*ClipContext, erro
 	mp := C.CString(modelPath)
 	defer C.free(unsafe.Pointer(mp))
 	c := C.clip_model_load(mp, 1)
+	if c == nil {
+		return nil, fmt.Errorf("unable to load clip model: %v", modelPath)
+	}

 	projEmbedSize := int(C.clip_n_mmproj_embd(c))
 	modelEmbedSize := llamaContext.Model().NEmbd()
@ -498,8 +517,11 @@ func (c *ClipContext) Free() {
 	C.clip_free(c.c)
 }

-func (c *ClipContext) NewEmbed(llamaContext *Context, data []byte) [][]float32 {
+func (c *ClipContext) NewEmbed(llamaContext *Context, data []byte) ([][]float32, error) {
 	l := C.llava_image_embed_make_with_bytes(c.c, C.int(llamaContext.numThreads), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(len(data)))
+	if l == nil {
+		return nil, errors.New("unable to make llava embedding from image")
+	}

 	numTokens := int(l.n_image_pos)
 	numEmbed := llamaContext.Model().NEmbd()
@ -516,7 +538,7 @@ func (c *ClipContext) NewEmbed(llamaContext *Context, data []byte) [][]float32 {

 	C.llava_image_embed_free(l)

-	return embed
+	return embed, nil
 }

 type MllamaContext struct {
@ -527,6 +549,9 @@ func NewMllamaContext(llamaContext *Context, modelPath string) (*MllamaContext,
 	mp := C.CString(modelPath)
 	defer C.free(unsafe.Pointer(mp))
 	c := C.mllama_model_load(mp, 1)
+	if c == nil {
+		return nil, fmt.Errorf("unable to load mllama model: %v", modelPath)
+	}

 	projEmbedSize := int(C.mllama_n_embd(c))
 	modelEmbedSize := llamaContext.Model().NEmbd()
@ -541,19 +566,25 @@ func (m *MllamaContext) Free() {
 	C.mllama_free(m.c)
 }

-func (m *MllamaContext) NewEmbed(llamaContext *Context, data []byte, aspectRatioId int) [][]float32 {
+func (m *MllamaContext) NewEmbed(llamaContext *Context, data []byte, aspectRatioId int) ([][]float32, error) {
 	img := C.mllama_image_init()
 	defer C.mllama_image_free(img)

-	C.mllama_image_load_from_data(unsafe.Pointer(&data[0]), C.int(len(data)), 560, 560, 3, 4, C.int(aspectRatioId), img)
+	ok := bool(C.mllama_image_load_from_data(unsafe.Pointer(&data[0]), C.int(len(data)), 560, 560, 3, 4, C.int(aspectRatioId), img))
+	if !ok {
+		return nil, errors.New("unable to load mllama image data")
+	}

 	rows := make([]float32, m.EmbedSize(llamaContext))
-	C.mllama_image_encode(m.c, C.int(llamaContext.numThreads), img, (*C.float)(unsafe.Pointer(&rows[0])))
+	ok = bool(C.mllama_image_encode(m.c, C.int(llamaContext.numThreads), img, (*C.float)(unsafe.Pointer(&rows[0]))))
+	if !ok {
+		return nil, errors.New("unable to make mllama embedding from image")
+	}

 	embed := make([][]float32, 1)
 	embed[0] = rows

-	return embed
+	return embed, nil
 }

 func (m *MllamaContext) EmbedSize(llamaContext *Context) int {
@ -592,7 +623,7 @@ type SamplingParams struct {
 	Grammar        string
 }

-func NewSamplingContext(model *Model, params SamplingParams) *SamplingContext {
+func NewSamplingContext(model *Model, params SamplingParams) (*SamplingContext, error) {
 	var cparams C.struct_gpt_sampler_cparams
 	cparams.top_k = C.int32_t(params.TopK)
 	cparams.top_p = C.float(params.TopP)
@ -615,9 +646,13 @@ func NewSamplingContext(model *Model, params SamplingParams) *SamplingContext {

 	cparams.grammar = grammar
 	context := &SamplingContext{c: C.gpt_sampler_cinit(model.c, &cparams)}
+	if context.c == nil {
+		return nil, errors.New("unable to create sampling context")
+	}
+
 	runtime.SetFinalizer(context, func(s *SamplingContext) { C.gpt_sampler_cfree(s.c) })

-	return context
+	return context, nil
 }

 func (s *SamplingContext) Reset() {