llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)

This commit is contained in:
Jeffrey Morgan 2025-02-26 20:34:44 -08:00 committed by GitHub
parent 2db96c18e7
commit d7d7e99662
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
149 changed files with 18215 additions and 11009 deletions

View file

@ -1,5 +1,8 @@
#include "llama-context.h"
#include "llama-impl.h"
#include "llama-mmap.h"
#include <cassert>
#include <cmath>
#include <cstring>
@ -513,7 +516,7 @@ size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {
auto * buft = ggml_backend_cpu_buffer_type();
// try to use the host buffer of the device where the output tensor is allocated for faster transfer to system memory
auto * output_dev = lctx.model.dev_output.dev;
auto * output_dev = lctx.model.dev_output();
auto * output_dev_host_buft = output_dev ? ggml_backend_dev_host_buffer_type(output_dev) : nullptr;
if (output_dev_host_buft) {
buft = output_dev_host_buft;