Move quantization to new backend (#10363)

* Move quantization logic to GGML via new backend

This moves the model aware logic to Go code and calls GGMLs quantization code for model creation.

* Remove "add model quantizations"

This is no longer needed now that quantization is implemented in Go+GGML code directly.
This commit is contained in:
Daniel Hiltgen 2025-05-06 11:20:48 -07:00 committed by GitHub
parent 95e744beeb
commit 424810450f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
39 changed files with 1854 additions and 440 deletions

View file

@ -460,24 +460,6 @@ func (m *Model) NEmbd() int {
return int(C.llama_model_n_embd(m.c))
}
func Quantize(infile, outfile string, ftype uint32) error {
cinfile := C.CString(infile)
defer C.free(unsafe.Pointer(cinfile))
coutfile := C.CString(outfile)
defer C.free(unsafe.Pointer(coutfile))
params := C.llama_model_quantize_default_params()
params.nthread = -1
params.ftype = ftype
if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
return fmt.Errorf("llama_model_quantize: %d", rc)
}
return nil
}
// vision processing
type ClipContext struct {
c *C.struct_clip_ctx