mirror of
https://github.com/ollama/ollama.git
synced 2025-05-12 02:46:36 +02:00
llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)
This commit is contained in:
parent
2db96c18e7
commit
d7d7e99662
149 changed files with 18215 additions and 11009 deletions
5
llama/llama.cpp/src/llama-context.cpp
vendored
5
llama/llama.cpp/src/llama-context.cpp
vendored
|
@ -1,5 +1,8 @@
|
|||
#include "llama-context.h"
|
||||
|
||||
#include "llama-impl.h"
|
||||
#include "llama-mmap.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
@ -513,7 +516,7 @@ size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {
|
|||
|
||||
auto * buft = ggml_backend_cpu_buffer_type();
|
||||
// try to use the host buffer of the device where the output tensor is allocated for faster transfer to system memory
|
||||
auto * output_dev = lctx.model.dev_output.dev;
|
||||
auto * output_dev = lctx.model.dev_output();
|
||||
auto * output_dev_host_buft = output_dev ? ggml_backend_dev_host_buffer_type(output_dev) : nullptr;
|
||||
if (output_dev_host_buft) {
|
||||
buft = output_dev_host_buft;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue