mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
OpenAI: Add Suffix to v1/completions
(#5611)
* add suffix * remove todo * remove TODO * add to test * rm outdated prompt tokens info md * fix test * fix test
This commit is contained in:
parent
499e87c9ba
commit
0d41623b52
3 changed files with 7 additions and 6 deletions
|
@ -111,6 +111,7 @@ type CompletionRequest struct {
|
|||
Stream bool `json:"stream"`
|
||||
Temperature *float32 `json:"temperature"`
|
||||
TopP float32 `json:"top_p"`
|
||||
Suffix string `json:"suffix"`
|
||||
}
|
||||
|
||||
type Completion struct {
|
||||
|
@ -188,7 +189,6 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
|||
}(r.DoneReason),
|
||||
}},
|
||||
Usage: Usage{
|
||||
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
|
||||
PromptTokens: r.PromptEvalCount,
|
||||
CompletionTokens: r.EvalCount,
|
||||
TotalTokens: r.PromptEvalCount + r.EvalCount,
|
||||
|
@ -234,7 +234,6 @@ func toCompletion(id string, r api.GenerateResponse) Completion {
|
|||
}(r.DoneReason),
|
||||
}},
|
||||
Usage: Usage{
|
||||
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
|
||||
PromptTokens: r.PromptEvalCount,
|
||||
CompletionTokens: r.EvalCount,
|
||||
TotalTokens: r.PromptEvalCount + r.EvalCount,
|
||||
|
@ -475,6 +474,7 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
|
|||
Prompt: r.Prompt,
|
||||
Options: options,
|
||||
Stream: &r.Stream,
|
||||
Suffix: r.Suffix,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue