mirror of
https://github.com/ollama/ollama.git
synced 2025-05-10 18:06:33 +02:00
Rather than directly giving the input data to models, we can pass a tensor instead. In the short term, this saves some duplicated code. Longer term, we will want to overlap setting up the next batch with processing of the current one. In this case, we will only have the shape of tensor but it will not be loaded with data at the time of graph generation. By passing only a tensor to models now, we set up this possibility and prevent them from relying on data that they won't have in the future. Although the same could be done for Positions and Outputs, in some cases we either need the raw input data or don't use them at all. Therefore, for now we leave them as they are and allow models to convert them to tensors as needed.
58 lines
1.8 KiB
Go
58 lines
1.8 KiB
Go
package input
|
|
|
|
import "github.com/ollama/ollama/ml"
|
|
|
|
// Input represents one token in the input stream
|
|
type Input struct {
|
|
// Token is a single element of text.
|
|
Token int32
|
|
|
|
// Multimodal is opaque data representing a non-text
|
|
// element such as an image (or part of one if the image
|
|
// can be processed in pieces). It may be either together
|
|
// with Token or on its own.
|
|
Multimodal any
|
|
|
|
// MultimodalHash is a unique representation of the data
|
|
// stored in Multimodal, used for caching and comparing
|
|
// equality.
|
|
MultimodalHash uint64
|
|
|
|
// SameBatch forces the following number of tokens to be processed
|
|
// in a single batch, breaking and extending batches as needed.
|
|
// Useful for things like images that must be processed in one
|
|
// shot.
|
|
SameBatch int
|
|
}
|
|
|
|
// MultimodalIndex is a multimodal element (such as an image)
|
|
// together with an index into the slice of Inputs with the
|
|
// corresponding token. Note that the index is not the same
|
|
// as the position - to find that use the index with the
|
|
// Positions slice.
|
|
type MultimodalIndex struct {
|
|
Index int
|
|
Multimodal any
|
|
}
|
|
|
|
// Batch contains the inputs for a model forward pass
|
|
type Batch struct {
|
|
// Inputs is the input tokens, including placeholders for multimodal inputs.
|
|
Inputs ml.Tensor
|
|
|
|
// Multimodal is a set of multimodal embeddings previously created by
|
|
// EncodeMultimodal, along with an index into Inputs. Unused for text-only
|
|
// models or for batches without multimodal elements.
|
|
Multimodal []MultimodalIndex
|
|
|
|
// Positions is the position for each Input, relative to its sequence. Equal
|
|
// in length to Inputs.
|
|
Positions []int32
|
|
|
|
// Sequences is the sequence for each Input. Equal in length to Inputs.
|
|
Sequences []int
|
|
|
|
// Outputs are the set of indicies into Inputs for which output data should
|
|
// be returned.
|
|
Outputs []int32
|
|
}
|