mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
fixes after rebase
This commit is contained in:
parent
db5da138c9
commit
b2a051375f
4 changed files with 60 additions and 13 deletions
|
@ -31,8 +31,28 @@ func (sa *TextAttention) Forward(ctx ml.Context, hiddenStates, positions, attent
|
|||
value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
|
||||
if useRope {
|
||||
query = query.RoPE(ctx, positions, sa.RopeFactors, uint32(opts.ropeDim), uint32(0), opts.ropeBase, opts.ropeScale)
|
||||
key = key.RoPE(ctx, positions, sa.RopeFactors, uint32(opts.ropeDim), uint32(0), opts.ropeBase, opts.ropeScale)
|
||||
query = query.RoPE(
|
||||
ctx,
|
||||
positions,
|
||||
sa.RopeFactors,
|
||||
ml.RoPEConfig{
|
||||
Dim: uint32(opts.ropeDim),
|
||||
Type: ml.RopeTypeNormal,
|
||||
Base: opts.ropeBase,
|
||||
Scale: opts.ropeScale,
|
||||
},
|
||||
)
|
||||
key = key.RoPE(
|
||||
ctx,
|
||||
positions,
|
||||
sa.RopeFactors,
|
||||
ml.RoPEConfig{
|
||||
Dim: uint32(opts.ropeDim),
|
||||
Type: ml.RopeTypeNormal,
|
||||
Base: opts.ropeBase,
|
||||
Scale: opts.ropeScale,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
if opts.useQKNorm {
|
||||
|
@ -255,5 +275,15 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
|
|||
}
|
||||
|
||||
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||
return key.RoPE(ctx, shift, m.Layers[layer].Attention.RopeFactors, uint32(0), uint32(m.ropeDim), m.ropeBase, m.ropeScale), nil
|
||||
return key.RoPE(
|
||||
ctx,
|
||||
shift,
|
||||
m.Layers[layer].Attention.RopeFactors,
|
||||
ml.RoPEConfig{
|
||||
Dim: uint32(m.TextOptions.ropeDim),
|
||||
Type: ml.RopeTypeNormal,
|
||||
Base: m.TextOptions.ropeBase,
|
||||
Scale: m.TextOptions.ropeScale,
|
||||
},
|
||||
), nil
|
||||
}
|
||||
|
|
|
@ -240,7 +240,7 @@ func newTextModel(c fs.Config) *TextModel {
|
|||
numHeads: int(c.Uint("attention.head_count")),
|
||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
||||
crossAttentionLayers: c.Uints("attention.cross_attention_layers"),
|
||||
crossAttentionLayers: c.Ints("attention.cross_attention_layers"),
|
||||
ropeConfig: ml.RoPEConfig{
|
||||
Base: c.Float("rope.freq_base"),
|
||||
Scale: c.Float("rope.freq_scale", 1),
|
||||
|
|
|
@ -35,7 +35,7 @@ func NewTextModel(c fs.Config) *TextModel {
|
|||
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
||||
&model.Vocabulary{
|
||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||
Types: c.Uints("tokenizer.ggml.token_type"),
|
||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||
BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")),
|
||||
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
|
||||
|
|
|
@ -514,17 +514,34 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
|
|||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size())
|
||||
if layer.Digest == "" {
|
||||
layer, err = NewLayer(io.NewSectionReader(blob, 0, n), mediatype)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
mediatype := "application/vnd.ollama.image.model"
|
||||
if f.KV().Kind() == "adapter" {
|
||||
mediatype = "application/vnd.ollama.image.adapter"
|
||||
} else if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok || f.KV().Kind() == "projector" {
|
||||
mediatype = "application/vnd.ollama.image.projector"
|
||||
}
|
||||
}
|
||||
|
||||
layers = append(layers, &layerGGML{layer, f})
|
||||
var layer Layer
|
||||
if digest != "" && n == stat.Size() && offset == 0 {
|
||||
layer, err = NewLayerFromLayer(digest, mediatype, blob.Name())
|
||||
if err != nil {
|
||||
slog.Debug("could not create new layer from layer", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size())
|
||||
if layer.Digest == "" {
|
||||
layer, err = NewLayer(io.NewSectionReader(blob, offset, n), mediatype)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
layers = append(layers, &layerGGML{layer, f})
|
||||
offset = n
|
||||
}
|
||||
|
||||
return detectChatTemplate(layers)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue