mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 02:16:36 +02:00
sample: temporarily use grammars for constrained generation in new engine (#9586)
This commit is contained in:
parent
a1cda80bcb
commit
e093db92c4
10 changed files with 301 additions and 213 deletions
|
@ -245,6 +245,20 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) {
|
|||
return &m, nil
|
||||
}
|
||||
|
||||
func LoadVocabFromFile(path string) (*Vocab, error) {
|
||||
mp := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(mp))
|
||||
v := Vocab{c: C.llama_load_vocab_from_file(mp)}
|
||||
if v.c == nil {
|
||||
return nil, fmt.Errorf("unable to load vocab: %s", path)
|
||||
}
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
func FreeVocab(vocab *Vocab) {
|
||||
C.llama_free_vocab(vocab.c)
|
||||
}
|
||||
|
||||
func FreeModel(model *Model) {
|
||||
C.llama_model_free(model.c)
|
||||
}
|
||||
|
@ -293,6 +307,10 @@ func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float
|
|||
return nil
|
||||
}
|
||||
|
||||
type Vocab struct {
|
||||
c *C.struct_llama_vocab
|
||||
}
|
||||
|
||||
func (m *Model) Vocab() *C.struct_llama_vocab {
|
||||
return C.llama_model_get_vocab(m.c)
|
||||
}
|
||||
|
@ -669,3 +687,53 @@ func SchemaToGrammar(schema []byte) []byte {
|
|||
}
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
type Sampler struct {
|
||||
c *C.struct_llama_sampler
|
||||
}
|
||||
|
||||
func NewGrammarSampler(vocab *Vocab, grammar string) *Sampler {
|
||||
cGrammar := C.CString(grammar)
|
||||
cRoot := C.CString("root")
|
||||
defer C.free(unsafe.Pointer(cGrammar))
|
||||
defer C.free(unsafe.Pointer(cRoot))
|
||||
|
||||
sampler := &Sampler{c: C.llama_sampler_init_grammar(vocab.c, cGrammar, cRoot)}
|
||||
|
||||
return sampler
|
||||
}
|
||||
|
||||
func (s *Sampler) Accept(token int32) {
|
||||
C.llama_sampler_accept(s.c, C.llama_token(token))
|
||||
}
|
||||
|
||||
type TokenData struct {
|
||||
Id int32
|
||||
Logit float32
|
||||
}
|
||||
|
||||
func (s *Sampler) Apply(tokens []TokenData) {
|
||||
tds := make([]C.struct_llama_token_data, len(tokens))
|
||||
for i, token := range tokens {
|
||||
tds[i] = C.struct_llama_token_data{
|
||||
id: C.int32_t(token.Id),
|
||||
logit: C.float(token.Logit),
|
||||
p: C.float(0.0),
|
||||
}
|
||||
}
|
||||
tda := &C.llama_token_data_array{
|
||||
data: (*C.struct_llama_token_data)(unsafe.Pointer(&tds[0])),
|
||||
size: C.size_t(len(tokens)),
|
||||
selected: C.int64_t(-1),
|
||||
sorted: C.bool(false),
|
||||
}
|
||||
|
||||
var pinner runtime.Pinner
|
||||
pinner.Pin(&tds[0])
|
||||
defer pinner.Unpin()
|
||||
|
||||
C.llama_sampler_apply(s.c, tda)
|
||||
for i := range tokens {
|
||||
tokens[i].Logit = float32(tds[i].logit)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue