ollama/llama/patches/0001-cuda.patch
2024-12-20 16:56:03 -08:00

55 lines
2.1 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Thu, 6 Jun 2024 23:55:47 -0700
Subject: [PATCH] cuda
---
ggml/src/ggml-backend.cpp | 2 +-
ggml/src/ggml-cuda/ggml-cuda.cu | 1 +
ggml/src/ggml-metal/ggml-metal.m | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index fdb4b986..731e4078 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -106,7 +106,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
if (buffer->iface.free_buffer != NULL) {
buffer->iface.free_buffer(buffer);
}
- delete buffer;
}
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
@@ -1862,6 +1861,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
ggml_aligned_free(buffer->context, buffer->size);
+ free(buffer);
}
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index d6e4bfdd..a2fcfe5d 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -424,6 +424,7 @@ struct ggml_backend_cuda_buffer_context {
static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
delete ctx;
+ delete buffer;
}
static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index 093ae900..a0cf4ba4 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -4035,6 +4035,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
}
free(ctx);
+ free(buffer);
}
static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {