llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)

2025-05-12 02:46:36 +02:00 · 2025-02-26 20:34:44 -08:00 · 2025-02-26 20:34:44 -08:00 · d7d7e99662
commit d7d7e99662
parent 2db96c18e7
149 changed files with 18215 additions and 11009 deletions
--- a/llama/llama.cpp/src/llama-context.cpp
+++ b/llama/llama.cpp/src/llama-context.cpp
@ -1,5 +1,8 @@
 #include "llama-context.h"

+#include "llama-impl.h"
+#include "llama-mmap.h"
+
 #include <cassert>
 #include <cmath>
 #include <cstring>
@ -513,7 +516,7 @@ size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {

        auto * buft = ggml_backend_cpu_buffer_type();
        // try to use the host buffer of the device where the output tensor is allocated for faster transfer to system memory
-        auto * output_dev = lctx.model.dev_output.dev;
+        auto * output_dev = lctx.model.dev_output();
        auto * output_dev_host_buft = output_dev ? ggml_backend_dev_host_buffer_type(output_dev) : nullptr;
        if (output_dev_host_buft) {
            buft = output_dev_host_buft;