diff --git a/README.md b/README.md index ea76eedfb..8547445ae 100644 --- a/README.md +++ b/README.md @@ -357,7 +357,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI) - [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama) - [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama) -- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux) +- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application available for Mac/Windows/Linux) - [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support) - [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow) diff --git a/app/tray/wintray/eventloop.go b/app/tray/wintray/eventloop.go index 157828a36..35608a49e 100644 --- a/app/tray/wintray/eventloop.go +++ b/app/tray/wintray/eventloop.go @@ -98,7 +98,7 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui } err = t.wcex.unregister() if err != nil { - slog.Error(fmt.Sprintf("failed to uregister windo %s", err)) + slog.Error(fmt.Sprintf("failed to unregister window %s", err)) } case WM_DESTROY: // same as WM_ENDSESSION, but throws 0 exit code after all diff --git a/convert/sentencepiece/sentencepiece_model.pb.go b/convert/sentencepiece/sentencepiece_model.pb.go index 5c8db9bce..6bf668911 100644 --- a/convert/sentencepiece/sentencepiece_model.pb.go +++ b/convert/sentencepiece/sentencepiece_model.pb.go @@ -331,7 +331,7 @@ type TrainerSpec struct { // Reserved special meta tokens. // * -1 is not used. // * unk_id must not be -1. - // Id must starts with 0 and be contigous. + // Id must start with 0 and be contiguous. UnkId *int32 `protobuf:"varint,40,opt,name=unk_id,json=unkId,def=0" json:"unk_id,omitempty"` // BosId *int32 `protobuf:"varint,41,opt,name=bos_id,json=bosId,def=1" json:"bos_id,omitempty"` // EosId *int32 `protobuf:"varint,42,opt,name=eos_id,json=eosId,def=2" json:"eos_id,omitempty"` // diff --git a/convert/sentencepiece_model.proto b/convert/sentencepiece_model.proto index 5dc02d6cf..370887a4a 100644 --- a/convert/sentencepiece_model.proto +++ b/convert/sentencepiece_model.proto @@ -213,7 +213,7 @@ message TrainerSpec { // Reserved special meta tokens. // * -1 is not used. // * unk_id must not be -1. - // Id must starts with 0 and be contigous. + // Id must start with 0 and be contiguous. optional int32 unk_id = 40 [default = 0]; // optional int32 bos_id = 41 [default = 1]; // optional int32 eos_id = 42 [default = 2]; // diff --git a/discover/gpu_windows.go b/discover/gpu_windows.go index e7665a63c..2dc2f0746 100644 --- a/discover/gpu_windows.go +++ b/discover/gpu_windows.go @@ -209,7 +209,7 @@ func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage { } } - // Sumarize the results + // Summarize the results for i, pkg := range packages { slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount) } diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 24e8e962f..28f4350aa 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -80,7 +80,7 @@ If you are using a container to run Ollama, make sure you've set up the containe Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem -- If you are using a container, is the container runtime working? Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU. +- If you are using a container, is the container runtime working? Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama won't be able to see your NVIDIA GPU. - Is the uvm driver loaded? `sudo nvidia-modprobe -u` - Try reloading the nvidia_uvm driver - `sudo rmmod nvidia_uvm` then `sudo modprobe nvidia_uvm` - Try rebooting diff --git a/examples/python-grounded-factuality-rag-check/README.md b/examples/python-grounded-factuality-rag-check/README.md index cd72071cb..868b16230 100644 --- a/examples/python-grounded-factuality-rag-check/README.md +++ b/examples/python-grounded-factuality-rag-check/README.md @@ -1,6 +1,6 @@ # RAG Hallucination Checker using Bespoke-Minicheck -This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. +This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retrieved from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. ## Running the Example diff --git a/examples/python-grounded-factuality-rag-check/main.py b/examples/python-grounded-factuality-rag-check/main.py index eab0b670e..dd18f3efa 100644 --- a/examples/python-grounded-factuality-rag-check/main.py +++ b/examples/python-grounded-factuality-rag-check/main.py @@ -115,7 +115,7 @@ if __name__ == "__main__": print(f"\nRetrieved chunks: \n{sourcetext}\n") - # Give the retreived chunks and question to the chat model + # Give the retrieved chunks and question to the chat model system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}" ollama_response = ollama.generate( diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go index 165231cd0..78e3b5ab6 100644 --- a/integration/concurrency_test.go +++ b/integration/concurrency_test.go @@ -207,7 +207,7 @@ func TestMultiModelStress(t *testing.T) { chosenModels = mediumModels // default: // slog.Info("selecting large models") - // chosenModels = largModels + // chosenModels = largeModels } req, resp := GenerateRequests() @@ -232,7 +232,7 @@ func TestMultiModelStress(t *testing.T) { var wg sync.WaitGroup consumed := uint64(256 * format.MebiByte) // Assume some baseline usage for i := 0; i < len(req); i++ { - // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long + // Always get at least 2 models, but don't overshoot VRAM too much or we'll take too long if i > 1 && consumed > maxVram { slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed)) break diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go index a27664303..1878d0dac 100644 --- a/integration/max_queue_test.go +++ b/integration/max_queue_test.go @@ -20,7 +20,7 @@ import ( func TestMaxQueue(t *testing.T) { if os.Getenv("OLLAMA_TEST_EXISTING") != "" { - t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size") + t.Skip("Max Queue test requires spawning a local server so we can adjust the queue size") return } @@ -67,7 +67,7 @@ func TestMaxQueue(t *testing.T) { busyCount := 0 resetByPeerCount := 0 canceledCount := 0 - succesCount := 0 + successCount := 0 counterMu := sync.Mutex{} var embedwg sync.WaitGroup for i := 0; i < threadCount; i++ { @@ -88,7 +88,7 @@ func TestMaxQueue(t *testing.T) { defer counterMu.Unlock() switch { case genErr == nil: - succesCount++ + successCount++ require.Greater(t, len(resp.Embedding), 5) // somewhat arbitrary, but sufficient to be reasonable case errors.Is(genErr, context.Canceled): canceledCount++ @@ -107,7 +107,7 @@ func TestMaxQueue(t *testing.T) { slog.Info("generate done, waiting for embeds") embedwg.Wait() - slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) + slog.Info("embeds completed", "success", successCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?") require.True(t, busyCount > 0, "no requests hit busy error but some should have") require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout") diff --git a/llm/memory_test.go b/llm/memory_test.go index c4209ded5..04abaabee 100644 --- a/llm/memory_test.go +++ b/llm/memory_test.go @@ -71,7 +71,7 @@ func TestEstimateGPULayers(t *testing.T) { projectorSize := uint64(0) memoryLayerOutput := uint64(4) - // Dual CUDA scenario with assymetry + // Dual CUDA scenario with asymmetry gpuMinimumMemory := uint64(2048) gpus = []discover.GpuInfo{ { diff --git a/parser/parser_test.go b/parser/parser_test.go index deadafd08..b5614c2ed 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -568,7 +568,7 @@ PARAMETER param1 value1 PARAMETER param2 value2 TEMPLATE template1 MESSAGE system """ -You are a store greeter. Always responsed with "Hello!". +You are a store greeter. Always respond with "Hello!". """ MESSAGE user Hey there! MESSAGE assistant Hello, I want to parse all the things! @@ -586,7 +586,7 @@ PARAMETER param1 value1 PARAMETER param2 value2 TEMPLATE template1 MESSAGE system """ -You are a store greeter. Always responsed with "Hello!". +You are a store greeter. Always respond with "Hello!". """ MESSAGE user Hey there! MESSAGE assistant Hello, I want to parse all the things! diff --git a/server/sched.go b/server/sched.go index 1e34e4b9e..0da84182e 100644 --- a/server/sched.go +++ b/server/sched.go @@ -54,7 +54,7 @@ type Scheduler struct { var defaultModelsPerGPU = 3 // Default automatic value for parallel setting -// Model will still need to fit in VRAM. If this setting wont fit +// Model will still need to fit in VRAM. If this setting won't fit // we'll back off down to 1 to try to get it to fit var defaultParallel = 4 @@ -501,7 +501,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) { } else if (allGpus[i].TotalMemory - p) < allGpus[i].FreeMemory { // predicted free is smaller than reported free, use it // TODO maybe we should just always trust our numbers, since cuda's free memory reporting is laggy // and we might unload models we didn't actually need to. The risk is if some other GPU intensive app is loaded - // after we start our first runner, then we'll never acount for that, so picking the smallest free value seems prudent. + // after we start our first runner, then we'll never account for that, so picking the smallest free value seems prudent. allGpus[i].FreeMemory = allGpus[i].TotalMemory - p } slog.Info("updated VRAM based on existing loaded models", "gpu", allGpus[i].ID, "library", allGpus[i].Library, "total", format.HumanBytes2(allGpus[i].TotalMemory), "available", format.HumanBytes2(allGpus[i].FreeMemory)) @@ -683,7 +683,7 @@ func (a ByDuration) Less(i, j int) bool { // pickBestFullFitByLibrary will try to find the optimal placement of the model in the available GPUs where the model fully fits // The list of GPUs returned will always be the same brand (library) // If the model can not be fit fully within the available GPU(s) nil is returned -// If numParallel is <= 0, this will attempt try to optimize parallism based on available VRAM, and adjust +// If numParallel is <= 0, this will attempt try to optimize parallelism based on available VRAM, and adjust // opts.NumCtx accordingly func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus discover.GpuInfoList, numParallel *int) discover.GpuInfoList { var estimatedVRAM uint64 diff --git a/server/sched_test.go b/server/sched_test.go index c999eee05..72baf15a2 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -325,7 +325,7 @@ func TestRequestsMultipleLoadedModels(t *testing.T) { require.Len(t, s.loaded, 3) s.loadedMu.Unlock() - // Try to load a model that wont fit + // Try to load a model that won't fit s.newServerFn = d.newServer slog.Info("d") s.loadedMu.Lock() @@ -394,7 +394,7 @@ func TestGetRunner(t *testing.T) { c.req.model.ModelPath = "bad path" slog.Info("c") successCh1c, errCh1c := s.GetRunner(c.ctx, c.req.model, c.req.opts, c.req.sessionDuration) - // Starts in pending channel, then should be quickly processsed to return an error + // Starts in pending channel, then should be quickly processed to return an error time.Sleep(50 * time.Millisecond) // Long enough for the "a" model to expire and unload require.Empty(t, successCh1c) s.loadedMu.Lock() diff --git a/server/testdata/tools/command-r-plus.out b/server/testdata/tools/command-r-plus.out index 425af75ab..8193d40c9 100644 --- a/server/testdata/tools/command-r-plus.out +++ b/server/testdata/tools/command-r-plus.out @@ -6,7 +6,7 @@ The instructions in this section override those in the task description and styl You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. # User Preamble -You are a knowledgable assistant. You can answer questions and perform tasks. +You are a knowledgeable assistant. You can answer questions and perform tasks. ## Available Tools Here is a list of tools that you have available to you: @@ -16,7 +16,7 @@ def get_current_weather(format: string, location: string, ) -> List[Dict]: """Get the current weather Args: - format (string): The temperature unit to use. Infer this from the users location. + format (string): The temperature unit to use. Infer this from the user's location. location (string): The city and state, e.g. San Francisco, CA """ pass diff --git a/server/testdata/tools/firefunction.out b/server/testdata/tools/firefunction.out index be50175ef..144f5e428 100644 --- a/server/testdata/tools/firefunction.out +++ b/server/testdata/tools/firefunction.out @@ -1,5 +1,5 @@ <|start_header_id|>system<|end_header_id|> -You are a knowledgable assistant. You can answer questions and perform tasks. +You are a knowledgeable assistant. You can answer questions and perform tasks. In addition to plain text responses, you can chose to call one or more of the provided functions. Use the following rule to decide when to call a function: @@ -14,4 +14,4 @@ If you decide to call functions: * make sure you pick the right functions that match the user intent Available functions as JSON spec: -[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]<|eot_id|><|start_header_id|><|end_header_id|>You are a knowledgable assistant. You can answer questions and perform tasks.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|> functools[{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]<|eot_id|><|start_header_id|>tool<|end_header_id|>22<|eot_id|><|start_header_id|>assistant<|end_header_id|>The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|> \ No newline at end of file +[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]<|eot_id|><|start_header_id|><|end_header_id|>You are a knowledgeable assistant. You can answer questions and perform tasks.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|> functools[{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]<|eot_id|><|start_header_id|>tool<|end_header_id|>22<|eot_id|><|start_header_id|>assistant<|end_header_id|>The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|> \ No newline at end of file diff --git a/server/testdata/tools/llama3-groq-tool-use.out b/server/testdata/tools/llama3-groq-tool-use.out index 75a495582..912ad11ca 100644 --- a/server/testdata/tools/llama3-groq-tool-use.out +++ b/server/testdata/tools/llama3-groq-tool-use.out @@ -1,12 +1,12 @@ <|start_header_id|>system<|end_header_id|> -You are a knowledgable assistant. You can answer questions and perform tasks. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within XML tags as follows: +You are a knowledgeable assistant. You can answer questions and perform tasks. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within XML tags as follows: {"name": ,"arguments": } Here are the available tools: - {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} <|eot_id|><|start_header_id|>user<|end_header_id|> + {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} <|eot_id|><|start_header_id|>user<|end_header_id|> What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|> diff --git a/server/testdata/tools/messages.json b/server/testdata/tools/messages.json index 1a3d1f56c..42de4711c 100644 --- a/server/testdata/tools/messages.json +++ b/server/testdata/tools/messages.json @@ -1,7 +1,7 @@ [ { "role": "system", - "content": "You are a knowledgable assistant. You can answer questions and perform tasks." + "content": "You are a knowledgeable assistant. You can answer questions and perform tasks." }, { "role": "user", diff --git a/server/testdata/tools/mistral.out b/server/testdata/tools/mistral.out index 31d8cdd62..6956e3920 100644 --- a/server/testdata/tools/mistral.out +++ b/server/testdata/tools/mistral.out @@ -1,3 +1,3 @@ -[INST] What's the weather like today in Paris?[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}][TOOL_RESULTS] {"content": 22}[/TOOL_RESULTS] The current temperature in Paris, France is 22 degrees Celsius.[AVAILABLE_TOOLS] [{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}][/AVAILABLE_TOOLS][INST] You are a knowledgable assistant. You can answer questions and perform tasks. +[INST] What's the weather like today in Paris?[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}][TOOL_RESULTS] {"content": 22}[/TOOL_RESULTS] The current temperature in Paris, France is 22 degrees Celsius.[AVAILABLE_TOOLS] [{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}][/AVAILABLE_TOOLS][INST] You are a knowledgeable assistant. You can answer questions and perform tasks. What's the weather like today in San Francisco and Toronto?[/INST] \ No newline at end of file diff --git a/server/testdata/tools/nemotron.out b/server/testdata/tools/nemotron.out index 2166b2026..486889ca1 100644 --- a/server/testdata/tools/nemotron.out +++ b/server/testdata/tools/nemotron.out @@ -1,8 +1,8 @@ System -You are a knowledgable assistant. You can answer questions and perform tasks. +You are a knowledgeable assistant. You can answer questions and perform tasks. - {"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}} + {"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}} User diff --git a/server/testdata/tools/tools.json b/server/testdata/tools/tools.json index 17260bf83..edde4ae0b 100644 --- a/server/testdata/tools/tools.json +++ b/server/testdata/tools/tools.json @@ -17,7 +17,7 @@ "celsius", "fahrenheit" ], - "description": "The temperature unit to use. Infer this from the users location." + "description": "The temperature unit to use. Infer this from the user's location." } }, "required": [ diff --git a/server/testdata/tools/xlam.out b/server/testdata/tools/xlam.out index a4a9952fc..5d8065327 100644 --- a/server/testdata/tools/xlam.out +++ b/server/testdata/tools/xlam.out @@ -1,4 +1,4 @@ -You are a knowledgable assistant. You can answer questions and perform tasks. +You are a knowledgeable assistant. You can answer questions and perform tasks. ### Instruction: What's the weather like today in Paris? ### Response: @@ -16,7 +16,7 @@ If the given question lacks the parameters required by the function, also point [END OF TASK INSTRUCTION] [BEGIN OF AVAILABLE TOOLS] -[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}] +[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}] [END OF AVAILABLE TOOLS] [BEGIN OF FORMAT INSTRUCTION]