mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 10:26:53 +02:00
When ggml_backend_buffer_free() is called, the device memory is released but not all backends consistently release the actual ggml_backend_buffer_t in system RAM, causing a memory leak. Bug #10040
210 lines
9 KiB
Diff
210 lines
9 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: jmorganca <jmorganca@gmail.com>
|
|
Date: Thu, 6 Jun 2024 23:55:47 -0700
|
|
Subject: [PATCH] ggml-backend: malloc and free using the same compiler
|
|
|
|
On Windows, the CUDA backend must be compiled with MSVC but generic
|
|
portions compiled with CGo use either GCC or Clang. Since
|
|
ggml_backend_buffer_t spans these two components, it can be allocated
|
|
and freed using different compilers. Specifically, it is malloced by
|
|
MSVC and freed by Clang, which can cause problems.
|
|
|
|
This moves freeing of the buffers into the backends to avoid the
|
|
problem.
|
|
---
|
|
ggml/src/ggml-backend.cpp | 9 +++++++--
|
|
ggml/src/ggml-cann/ggml-cann.cpp | 2 ++
|
|
ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++
|
|
ggml/src/ggml-kompute/ggml-kompute.cpp | 1 +
|
|
ggml/src/ggml-metal/ggml-metal.m | 1 +
|
|
ggml/src/ggml-opencl/ggml-opencl.cpp | 1 +
|
|
ggml/src/ggml-rpc/ggml-rpc.cpp | 1 +
|
|
ggml/src/ggml-sycl/ggml-sycl.cpp | 3 +++
|
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 ++
|
|
9 files changed, 21 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
|
index dba7be33..65e150d6 100644
|
|
--- a/ggml/src/ggml-backend.cpp
|
|
+++ b/ggml/src/ggml-backend.cpp
|
|
@@ -106,7 +106,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
|
if (buffer->iface.free_buffer != NULL) {
|
|
buffer->iface.free_buffer(buffer);
|
|
}
|
|
- delete buffer;
|
|
}
|
|
|
|
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
|
@@ -542,6 +541,7 @@ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
|
|
|
free(ctx->buffers);
|
|
free(ctx);
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
@@ -1865,6 +1865,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
|
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_aligned_free(buffer->context, buffer->size);
|
|
+ delete buffer;
|
|
+}
|
|
+
|
|
+static void ggml_backend_cpu_ptr_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
|
@@ -1912,7 +1917,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
|
|
};
|
|
|
|
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
|
|
- /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
|
|
+ /* .free_buffer = */ ggml_backend_cpu_ptr_buffer_free_buffer, // ptr is not owned by the buffer but need to free the buffer itself
|
|
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
|
/* .init_tensor = */ NULL, // no initialization required
|
|
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
|
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
|
|
index d410c024..a207ab1e 100644
|
|
--- a/ggml/src/ggml-cann/ggml-cann.cpp
|
|
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
|
|
@@ -530,6 +530,7 @@ static void ggml_backend_cann_buffer_free_buffer(
|
|
ggml_backend_cann_buffer_context* ctx =
|
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
/**
|
|
@@ -1198,6 +1199,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
|
|
*/
|
|
static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
|
ACL_CHECK(aclrtFreeHost(buffer->context));
|
|
+ delete buffer;
|
|
}
|
|
|
|
/**
|
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
index ebb2ccae..dfff21a2 100644
|
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
@@ -529,6 +529,7 @@ struct ggml_backend_cuda_buffer_context {
|
|
static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
|
|
@@ -783,6 +784,7 @@ struct ggml_backend_cuda_split_buffer_context {
|
|
static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
@@ -1055,6 +1057,7 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
|
|
|
|
static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
CUDA_CHECK(cudaFreeHost(buffer->context));
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_cuda_host_malloc(size_t size) {
|
|
diff --git a/ggml/src/ggml-kompute/ggml-kompute.cpp b/ggml/src/ggml-kompute/ggml-kompute.cpp
|
|
index 50579227..2799a0a5 100644
|
|
--- a/ggml/src/ggml-kompute/ggml-kompute.cpp
|
|
+++ b/ggml/src/ggml-kompute/ggml-kompute.cpp
|
|
@@ -1911,6 +1911,7 @@ static void ggml_backend_kompute_buffer_free_buffer(ggml_backend_buffer_t buffer
|
|
ggml_vk_free_memory(*memory);
|
|
}
|
|
delete memory;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
|
index c550142a..fd9a4e77 100644
|
|
--- a/ggml/src/ggml-metal/ggml-metal.m
|
|
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
|
@@ -4350,6 +4350,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
|
}
|
|
|
|
free(ctx);
|
|
+ free(buffer);
|
|
}
|
|
|
|
static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
index f5906246..062e93b8 100644
|
|
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
@@ -1203,6 +1203,7 @@ static void * const cl_ptr_base = (void *)(uintptr_t) 0x1000;
|
|
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
index 97873acc..893ee0b9 100644
|
|
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
@@ -419,6 +419,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_FREE_BUFFER, &request, sizeof(request), nullptr, 0);
|
|
GGML_ASSERT(status);
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
index 792e0569..5e233e8b 100644
|
|
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
@@ -311,6 +311,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
|
|
ggml_sycl_set_device(ctx->device);
|
|
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
catch (sycl::exception const &exc) {
|
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
@@ -720,6 +721,7 @@ struct ggml_backend_sycl_split_buffer_context {
|
|
static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
@@ -1053,6 +1055,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
|
|
|
|
static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_sycl_host_free(buffer->context);
|
|
+ delete buffer;
|
|
}
|
|
|
|
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
index abe3e790..1dad714b 100644
|
|
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
@@ -7914,6 +7914,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
|
ggml_vk_destroy_buffer(ctx->dev_buffer);
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
@@ -8056,6 +8057,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
|
|
static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
|
|
ggml_vk_host_free(vk_instance.devices[0], buffer->context);
|
|
+ delete buffer;
|
|
}
|
|
|
|
static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|