mirror of
https://github.com/ollama/ollama.git
synced 2025-05-10 18:06:33 +02:00
docs: update readmes for structured outputs (#7962)
This commit is contained in:
parent
aed1419c64
commit
f6e87fd628
3 changed files with 149 additions and 2 deletions
106
docs/api.md
106
docs/api.md
|
@ -45,7 +45,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
|
|||
|
||||
Advanced parameters (optional):
|
||||
|
||||
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||
- `format`: the format to return a response in. Format can be `json` or a JSON schema
|
||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||
- `system`: system message to (overrides what is defined in the `Modelfile`)
|
||||
- `template`: the prompt template to use (overrides what is defined in the `Modelfile`)
|
||||
|
@ -54,6 +54,10 @@ Advanced parameters (optional):
|
|||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||
- `context` (deprecated): the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||
|
||||
#### Structured outputs
|
||||
|
||||
Structured outputs are supported by providing a JSON schema in the `format` parameter. The model will generate a response that matches the schema. See the [structured outputs](#request-structured-outputs) example below.
|
||||
|
||||
#### JSON mode
|
||||
|
||||
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
|
||||
|
@ -185,6 +189,52 @@ curl http://localhost:11434/api/generate -d '{
|
|||
}
|
||||
```
|
||||
|
||||
#### Request (Structured outputs)
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/generate -H "Content-Type: application/json" -d '{
|
||||
"model": "llama3.1:8b",
|
||||
"prompt": "Ollama is 22 years old and is busy saving the world. Respond using JSON",
|
||||
"stream": false,
|
||||
"format": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"age": {
|
||||
"type": "integer"
|
||||
},
|
||||
"available": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"age",
|
||||
"available"
|
||||
]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "llama3.1:8b",
|
||||
"created_at": "2024-12-06T00:48:09.983619Z",
|
||||
"response": "{\n \"age\": 22,\n \"available\": true\n}",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"context": [1, 2, 3],
|
||||
"total_duration": 1075509083,
|
||||
"load_duration": 567678166,
|
||||
"prompt_eval_count": 28,
|
||||
"prompt_eval_duration": 236000000,
|
||||
"eval_count": 16,
|
||||
"eval_duration": 269000000
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (JSON mode)
|
||||
|
||||
> [!IMPORTANT]
|
||||
|
@ -456,11 +506,15 @@ The `message` object has the following fields:
|
|||
|
||||
Advanced parameters (optional):
|
||||
|
||||
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||
- `format`: the format to return a response in. Format can be `json` or a JSON schema.
|
||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||
|
||||
### Structured outputs
|
||||
|
||||
Structured outputs are supported by providing a JSON schema in the `format` parameter. The model will generate a response that matches the schema. See the [Chat request (Structured outputs)](#chat-request-structured-outputs) example below.
|
||||
|
||||
### Examples
|
||||
|
||||
#### Chat Request (Streaming)
|
||||
|
@ -551,6 +605,54 @@ curl http://localhost:11434/api/chat -d '{
|
|||
}
|
||||
```
|
||||
|
||||
#### Chat request (Structured outputs)
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
||||
"model": "llama3.1",
|
||||
"messages": [{"role": "user", "content": "Ollama is 22 years old and busy saving the world. Return a JSON object with the age and availability."}],
|
||||
"stream": false,
|
||||
"format": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"age": {
|
||||
"type": "integer"
|
||||
},
|
||||
"available": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"age",
|
||||
"available"
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"temperature": 0
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "llama3.1",
|
||||
"created_at": "2024-12-06T00:46:58.265747Z",
|
||||
"message": { "role": "assistant", "content": "{\"age\": 22, \"available\": false}" },
|
||||
"done_reason": "stop",
|
||||
"done": true,
|
||||
"total_duration": 2254970291,
|
||||
"load_duration": 574751416,
|
||||
"prompt_eval_count": 34,
|
||||
"prompt_eval_duration": 1502000000,
|
||||
"eval_count": 12,
|
||||
"eval_duration": 175000000
|
||||
}
|
||||
```
|
||||
|
||||
#### Chat request (With History)
|
||||
|
||||
Send a chat message with a conversation history. You can use this same approach to start the conversation using multi-shot or chain-of-thought prompting.
|
||||
|
|
|
@ -59,6 +59,40 @@ embeddings = client.embeddings.create(
|
|||
input=["why is the sky blue?", "why is the grass green?"],
|
||||
)
|
||||
```
|
||||
#### Structured outputs
|
||||
```py
|
||||
rom pydantic import BaseModel
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
|
||||
|
||||
# Define the schema for the response
|
||||
class FriendInfo(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
is_available: bool
|
||||
|
||||
class FriendList(BaseModel):
|
||||
friends: list[FriendInfo]
|
||||
|
||||
try:
|
||||
completion = client.beta.chat.completions.parse(
|
||||
temperature=0,
|
||||
model="llama3.1:8b",
|
||||
messages=[
|
||||
{"role": "user", "content": "I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format"}
|
||||
],
|
||||
response_format=FriendList,
|
||||
)
|
||||
|
||||
friends_response = completion.choices[0].message
|
||||
if friends_response.parsed:
|
||||
print(friends_response.parsed)
|
||||
elif friends_response.refusal:
|
||||
print(friends_response.refusal)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
```
|
||||
|
||||
### OpenAI JavaScript library
|
||||
|
||||
|
|
|
@ -1,3 +1,14 @@
|
|||
# Examples
|
||||
|
||||
This directory contains different examples of using Ollama.
|
||||
|
||||
## Python examples
|
||||
Ollama Python examples at [ollama-python/examples](https://github.com/ollama/ollama-python/tree/main/examples)
|
||||
|
||||
|
||||
## JavaScript examples
|
||||
Ollama JavaScript examples at [ollama-js/examples](https://github.com/ollama/ollama-js/tree/main/examples)
|
||||
|
||||
|
||||
## OpenAI compatibility examples
|
||||
Ollama OpenAI compatibility examples at [ollama/examples/openai](../docs/openai.md)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue