llama: remove model loading for grammar (#10096)

This commit is contained in:
Parth Sareen 2025-04-24 11:51:19 -07:00 committed by GitHub
parent 40b10eee6d
commit a53d744b01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 521 additions and 107 deletions

View file

@ -26,6 +26,9 @@ type Model struct {
// Implement MultimodalProcessor interface
var _ model.MultimodalProcessor = (*Model)(nil)
// Implement TextProcessor interface
var _ model.TextProcessor = (*Model)(nil)
func New(c fs.Config) (model.Model, error) {
textModel, err := NewTextModel(c)
if err != nil {

View file

@ -32,6 +32,7 @@ type TextProcessor interface {
Encode(s string, addSpecial bool) ([]int32, error)
Decode([]int32) (string, error)
Is(int32, Special) bool
Vocabulary() *Vocabulary
}
type Vocabulary struct {
@ -117,6 +118,8 @@ type BytePairEncoding struct {
vocab *Vocabulary
}
var _ TextProcessor = (*BytePairEncoding)(nil)
func NewBytePairEncoding(pre string, vocab *Vocabulary) BytePairEncoding {
return BytePairEncoding{
pre: regexp2.MustCompile(pre, regexp2.Unicode|regexp2.RE2),
@ -124,6 +127,10 @@ func NewBytePairEncoding(pre string, vocab *Vocabulary) BytePairEncoding {
}
}
func (bpe BytePairEncoding) Vocabulary() *Vocabulary {
return bpe.vocab
}
func (bpe BytePairEncoding) Is(id int32, special Special) bool {
return bpe.vocab.Is(id, special)
}

View file

@ -17,6 +17,10 @@ type SentencePieceModel struct {
var _ TextProcessor = (*SentencePieceModel)(nil)
func (spm SentencePieceModel) Vocabulary() *Vocabulary {
return spm.vocab
}
func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
slog.Debug("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])