ml/backend/ggml: create tensor on specific backend

some tensors should be created on specific backends to reduce number of
copies and improve performance
This commit is contained in:
Michael Yang 2025-02-25 16:06:32 -08:00
parent 764e199d67
commit 7bae7fa5ce
6 changed files with 129 additions and 60 deletions

View file

@ -138,17 +138,17 @@ func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Ten
}
func (m *Model) Forward(ctx ml.Context, opts model.Options) (ml.Tensor, error) {
inputs, err := ctx.FromIntSlice(opts.Inputs, len(opts.Inputs))
inputs, err := ctx.Input().FromIntSlice(opts.Inputs, len(opts.Inputs))
if err != nil {
return nil, err
}
positions, err := ctx.FromIntSlice(opts.Positions, len(opts.Positions))
positions, err := ctx.Input().FromIntSlice(opts.Positions, len(opts.Positions))
if err != nil {
return nil, err
}
outputs, err := ctx.FromIntSlice(opts.Outputs, len(opts.Outputs))
outputs, err := ctx.Output().FromIntSlice(opts.Outputs, len(opts.Outputs))
if err != nil {
return nil, err
}

View file

@ -72,7 +72,7 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
return nil, err
}
pixelValues, err := ctx.FromFloatSlice(f32s,
pixelValues, err := ctx.Input().FromFloatSlice(f32s,
m.ImageProcessor.imageSize,
m.ImageProcessor.imageSize,
m.ImageProcessor.numChannels,
@ -82,7 +82,7 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
return nil, err
}
aspectRatio, err := ctx.FromIntSlice([]int32{int32(aspectRatioID)}, 1)
aspectRatio, err := ctx.Input().FromIntSlice([]int32{int32(aspectRatioID)}, 1)
if err != nil {
return nil, err
}
@ -92,7 +92,7 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
positions[i] = int32(i)
}
positionIDs, err := ctx.FromIntSlice(positions, len(positions))
positionIDs, err := ctx.Input().FromIntSlice(positions, len(positions))
if err != nil {
return nil, err
}
@ -136,17 +136,17 @@ func (m *Model) Forward(ctx ml.Context, opts model.Options) (ml.Tensor, error) {
crossAttentionStates = opts.Multimodal[0].Multimodal.(ml.Tensor)
}
inputs, err := ctx.FromIntSlice(opts.Inputs, len(opts.Inputs))
inputs, err := ctx.Input().FromIntSlice(opts.Inputs, len(opts.Inputs))
if err != nil {
return nil, err
}
positions, err := ctx.FromIntSlice(opts.Positions, len(opts.Positions))
positions, err := ctx.Input().FromIntSlice(opts.Positions, len(opts.Positions))
if err != nil {
return nil, err
}
outputs, err := ctx.FromIntSlice(opts.Outputs, len(opts.Outputs))
outputs, err := ctx.Output().FromIntSlice(opts.Outputs, len(opts.Outputs))
if err != nil {
return nil, err
}