mirror of
https://github.com/ollama/ollama.git
synced 2025-05-10 18:06:33 +02:00
* Move quantization logic to GGML via new backend This moves the model aware logic to Go code and calls GGMLs quantization code for model creation. * Remove "add model quantizations" This is no longer needed now that quantization is implemented in Go+GGML code directly.
38 lines
1.6 KiB
Diff
38 lines
1.6 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Jesse Gross <jesse@kernel.org>
|
|
Date: Thu, 1 May 2025 13:46:10 -0700
|
|
Subject: [PATCH] ggml: Don't assert fail when tensor data changes (#13222)
|
|
|
|
The following scenario will cause an assertion failure in the graph
|
|
allocator:
|
|
- Build and allocate a graph containing a tensor with a non-NULL data
|
|
pointer
|
|
- Build and allocate a new graph where that data is NULL
|
|
|
|
Result:
|
|
ggml-alloc.c:819: GGML_ASSERT(talloc->buffer_id >= 0) failed
|
|
|
|
This happens during revalidation because we think that memory should
|
|
have been previously allocated based on the current graph but in
|
|
reality the previous graph was different. In this situation, we
|
|
should do a full reallocation pass.
|
|
---
|
|
ggml/src/ggml-alloc.c | 5 ++++-
|
|
1 file changed, 4 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
|
|
index a3d3f690..5fd379f6 100644
|
|
--- a/ggml/src/ggml-alloc.c
|
|
+++ b/ggml/src/ggml-alloc.c
|
|
@@ -816,7 +816,10 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
|
|
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
|
|
size_t node_size = 0;
|
|
if (!node->data && !node->view_src) {
|
|
- GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
|
|
+ // If we previously had data but don't now then reallocate
|
|
+ if (talloc->buffer_id < 0) {
|
|
+ return false;
|
|
+ }
|
|
node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
|
|
}
|
|
return talloc->size_max >= node_size;
|