diff --git a/llama/patches/0018-ggml-Don-t-assert-fail-when-tensor-data-changes-1322.patch b/llama/patches/0018-ggml-Don-t-assert-fail-when-tensor-data-changes-1322.patch
new file mode 100644
index 000000000..b3424c9ef
--- /dev/null
+++ b/llama/patches/0018-ggml-Don-t-assert-fail-when-tensor-data-changes-1322.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jesse Gross <jesse@kernel.org>
+Date: Thu, 1 May 2025 13:46:10 -0700
+Subject: [PATCH] ggml: Don't assert fail when tensor data changes (#13222)
+
+The following scenario will cause an assertion failure in the graph
+allocator:
+ - Build and allocate a graph containing a tensor with a non-NULL data
+   pointer
+ - Build and allocate a new graph where that data is NULL
+
+Result:
+ggml-alloc.c:819: GGML_ASSERT(talloc->buffer_id >= 0) failed
+
+This happens during revalidation because we think that memory should
+have been previously allocated based on the current graph but in
+reality the previous graph was different. In this situation, we
+should do a full reallocation pass.
+---
+ ggml/src/ggml-alloc.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
+index a3d3f690..5fd379f6 100644
+--- a/ggml/src/ggml-alloc.c
++++ b/ggml/src/ggml-alloc.c
+@@ -816,7 +816,10 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
+ static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
+     size_t node_size = 0;
+     if (!node->data && !node->view_src) {
+-        GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
++        // If we previously had data but don't now then reallocate
++        if (talloc->buffer_id < 0) {
++            return false;
++        }
+         node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
+     }
+     return talloc->size_max >= node_size;
diff --git a/ml/backend/ggml/ggml/src/ggml-alloc.c b/ml/backend/ggml/ggml/src/ggml-alloc.c
index a3d3f6901..5fd379f6a 100644
--- a/ml/backend/ggml/ggml/src/ggml-alloc.c
+++ b/ml/backend/ggml/ggml/src/ggml-alloc.c
@@ -816,7 +816,10 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
 static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
     size_t node_size = 0;
     if (!node->data && !node->view_src) {
-        GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
+        // If we previously had data but don't now then reallocate
+        if (talloc->buffer_id < 0) {
+            return false;
+        }
         node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
     }
     return talloc->size_max >= node_size;
diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go
index 9597f7cdd..3e0bb34ec 100644
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -715,9 +715,7 @@ func (m *multiLPath) String() string {
 	return strings.Join(*m, ", ")
 }
 
-// TODO(jessegross): This is causing tensor allocation failures with large batches when not offloaded
-// to the GPU
-/*func (s *Server) reserveWorstCaseGraph() error {
+func (s *Server) reserveWorstCaseGraph() error {
 	ctx := s.model.Backend().NewContext()
 	defer ctx.Close()
 
@@ -760,7 +758,7 @@ func (m *multiLPath) String() string {
 	}
 
 	return nil
-}*/
+}
 
 func (s *Server) loadModel(
 	ctx context.Context,
@@ -797,10 +795,10 @@ func (s *Server) loadModel(
 	s.seqs = make([]*Sequence, s.parallel)
 	s.seqsSem = semaphore.NewWeighted(int64(s.parallel))
 
-	/*err = s.reserveWorstCaseGraph()
+	err = s.reserveWorstCaseGraph()
 	if err != nil {
 		panic(err)
-	}*/
+	}
 
 	s.status = llm.ServerStatusReady
 	s.ready.Done()