mirror of
https://github.com/ollama/ollama.git
synced 2025-05-10 18:06:33 +02:00
210 lines
9 KiB
Diff
210 lines
9 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: jmorganca <jmorganca@gmail.com>
|
|
Date: Thu, 6 Jun 2024 23:55:47 -0700
|
|
Subject: [PATCH] ggml-backend: malloc and free using the same compiler
|
|
|
|
On Windows, the CUDA backend must be compiled with MSVC but generic
|
|
portions compiled with CGo use either GCC or Clang. Since
|
|
ggml_backend_buffer_t spans these two components, it can be allocated
|
|
and freed using different compilers. Specifically, it is malloced by
|
|
MSVC and freed by Clang, which can cause problems.
|
|
|
|
This moves freeing of the buffers into the backends to avoid the
|
|
problem.
|
|
---
|
|
ggml/src/ggml-backend.cpp | 9 +++++++--
|
|
ggml/src/ggml-cann/ggml-cann.cpp | 2 ++
|
|
ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++
|
|
ggml/src/ggml-kompute/ggml-kompute.cpp | 1 +
|
|
ggml/src/ggml-metal/ggml-metal.m | 1 +
|
|
ggml/src/ggml-opencl/ggml-opencl.cpp | 1 +
|
|
ggml/src/ggml-rpc/ggml-rpc.cpp | 1 +
|
|
ggml/src/ggml-sycl/ggml-sycl.cpp | 3 +++
|
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 ++
|
|
9 files changed, 21 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
|
index 273075f4..dd11f304 100644
|
|
--- a/ggml/src/ggml-backend.cpp
|
|
+++ b/ggml/src/ggml-backend.cpp
|
|
@@ -107,7 +107,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
|
if (buffer->iface.free_buffer != NULL) {
|
|
buffer->iface.free_buffer(buffer);
|
|
}
|
|
- delete buffer;
|
|
}
|
|
|
|
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
|
@@ -544,6 +543,7 @@ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
|
|
|
free(ctx->buffers);
|
|
free(ctx);
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
@@ -1867,6 +1867,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
|
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_aligned_free(buffer->context, buffer->size);
|
|
+ delete buffer;
|
|
+}
|
|
+
|
|
+static void ggml_backend_cpu_ptr_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
|
@@ -1914,7 +1919,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
|
|
};
|
|
|
|
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
|
|
- /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
|
|
+ /* .free_buffer = */ ggml_backend_cpu_ptr_buffer_free_buffer, // ptr is not owned by the buffer but need to free the buffer itself
|
|
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
|
/* .init_tensor = */ NULL, // no initialization required
|
|
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
|
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
|
|
index e2617b06..242e50a7 100644
|
|
--- a/ggml/src/ggml-cann/ggml-cann.cpp
|
|
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
|
|
@@ -800,6 +800,7 @@ static void ggml_backend_cann_buffer_free_buffer(
|
|
ggml_backend_cann_buffer_context* ctx =
|
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
/**
|
|
@@ -1472,6 +1473,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
|
|
*/
|
|
static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
|
ACL_CHECK(aclrtFreeHost(buffer->context));
|
|
+ delete buffer;
|
|
}
|
|
|
|
/**
|
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
index 9fb2134f..04ce764e 100644
|
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
@@ -534,6 +534,7 @@ struct ggml_backend_cuda_buffer_context {
|
|
static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
|
|
@@ -789,6 +790,7 @@ struct ggml_backend_cuda_split_buffer_context {
|
|
static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
@@ -1062,6 +1064,7 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
|
|
|
|
static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
CUDA_CHECK(cudaFreeHost(buffer->context));
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_cuda_host_malloc(size_t size) {
|
|
diff --git a/ggml/src/ggml-kompute/ggml-kompute.cpp b/ggml/src/ggml-kompute/ggml-kompute.cpp
|
|
index 50579227..2799a0a5 100644
|
|
--- a/ggml/src/ggml-kompute/ggml-kompute.cpp
|
|
+++ b/ggml/src/ggml-kompute/ggml-kompute.cpp
|
|
@@ -1911,6 +1911,7 @@ static void ggml_backend_kompute_buffer_free_buffer(ggml_backend_buffer_t buffer
|
|
ggml_vk_free_memory(*memory);
|
|
}
|
|
delete memory;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
|
index d92392ed..425524d0 100644
|
|
--- a/ggml/src/ggml-metal/ggml-metal.m
|
|
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
|
@@ -5077,6 +5077,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
|
}
|
|
|
|
free(ctx);
|
|
+ free(buffer);
|
|
}
|
|
|
|
static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
index 05a2f4e6..392cc18d 100644
|
|
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
|
|
@@ -1940,6 +1940,7 @@ struct ggml_backend_opencl_buffer_context {
|
|
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
index 140a775f..e33c4ba0 100644
|
|
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
|
|
@@ -477,6 +477,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_FREE_BUFFER, &request, sizeof(request), nullptr, 0);
|
|
GGML_ASSERT(status);
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
index 66b6f2cc..e3e6deae 100644
|
|
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
|
|
@@ -317,6 +317,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
|
|
ggml_sycl_set_device(ctx->device);
|
|
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
catch (sycl::exception const &exc) {
|
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
@@ -762,6 +763,7 @@ struct ggml_backend_sycl_split_buffer_context {
|
|
static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
@@ -1096,6 +1098,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
|
|
|
|
static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_sycl_host_free(buffer->context);
|
|
+ delete buffer;
|
|
}
|
|
|
|
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
index c0bdb9e1..03d03064 100644
|
|
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
@@ -8660,6 +8660,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
|
ggml_vk_destroy_buffer(ctx->dev_buffer);
|
|
delete ctx;
|
|
+ delete buffer;
|
|
}
|
|
|
|
static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
@@ -8803,6 +8804,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
|
|
static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
|
|
ggml_vk_host_free(vk_instance.devices[0], buffer->context);
|
|
+ delete buffer;
|
|
}
|
|
|
|
static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|