mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 10:26:53 +02:00
llama: use dynamic backend loading for mllama and clip (#8835)
This commit is contained in:
parent
c852b8e021
commit
cd3fbf1c49
3 changed files with 36 additions and 82 deletions
36
llama/llama.cpp/examples/llava/clip.cpp
vendored
36
llama/llama.cpp/examples/llava/clip.cpp
vendored
|
@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
ggml_backend_t backend = ggml_backend_init_best();
|
||||||
new_clip->backend = ggml_backend_cuda_init(0);
|
if (backend == nullptr) {
|
||||||
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
LOG_ERR("%s: failed to initialize backend\n", __func__);
|
||||||
#endif
|
clip_free(new_clip);
|
||||||
|
gguf_free(ctx);
|
||||||
#ifdef GGML_USE_METAL
|
return nullptr;
|
||||||
new_clip->backend = ggml_backend_metal_init();
|
|
||||||
LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_CANN
|
|
||||||
new_clip->backend = ggml_backend_cann_init(0);
|
|
||||||
LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_VULKAN
|
|
||||||
new_clip->backend = ggml_backend_vk_init(0);
|
|
||||||
LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
new_clip->backend = ggml_backend_sycl_init(0);
|
|
||||||
LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!new_clip->backend) {
|
|
||||||
new_clip->backend = ggml_backend_cpu_init();
|
|
||||||
LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
|
||||||
}
|
}
|
||||||
|
LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
|
new_clip->backend = backend;
|
||||||
|
|
||||||
// model size and capabilities
|
// model size and capabilities
|
||||||
{
|
{
|
||||||
|
|
31
llama/mllama.cpp
vendored
31
llama/mllama.cpp
vendored
|
@ -558,30 +558,15 @@ struct mllama_ctx *mllama_model_load(const char *fname, const int verbosity = 1)
|
||||||
|
|
||||||
mllama_ctx *new_mllama = new mllama_ctx{};
|
mllama_ctx *new_mllama = new mllama_ctx{};
|
||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
ggml_backend_t backend = ggml_backend_init_best();
|
||||||
new_mllama->backend = ggml_backend_cuda_init(0);
|
if (backend == nullptr) {
|
||||||
LOG("vision using CUDA backend");
|
LOG("%s: failed to initialize backend\n", __func__);
|
||||||
#endif
|
mllama_free(new_mllama);
|
||||||
|
gguf_free(ctx);
|
||||||
#ifdef GGML_USE_METAL
|
return nullptr;
|
||||||
new_mllama->backend = ggml_backend_metal_init();
|
|
||||||
LOG("vision using Metal backend");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_CANN
|
|
||||||
new_mllama->backend = ggml_backend_cann_init(0);
|
|
||||||
LOG("vision using CANN backend");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_VULKAN
|
|
||||||
new_mllama->backend = ggml_backend_vk_init(0);
|
|
||||||
LOG("vision using Vulkan backend");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!new_mllama->backend) {
|
|
||||||
new_mllama->backend = ggml_backend_cpu_init();
|
|
||||||
LOG("vision using CPU backend");
|
|
||||||
}
|
}
|
||||||
|
LOG("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
|
new_mllama->backend = backend;
|
||||||
|
|
||||||
// load tensors
|
// load tensors
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
From: jmorganca <jmorganca@gmail.com>
|
From: jmorganca <jmorganca@gmail.com>
|
||||||
Date: Sat, 4 Jan 2025 22:52:48 -0800
|
Date: Sat, 4 Jan 2025 22:52:48 -0800
|
||||||
Subject: [PATCH] re-enable gpu for clip
|
Subject: [PATCH] use dynamic backend loading for clip
|
||||||
|
|
||||||
---
|
---
|
||||||
examples/llava/clip.cpp | 86 ++++++++++++++++++++---------------------
|
examples/llava/clip.cpp | 74 +++++++++++++++--------------------------
|
||||||
1 file changed, 43 insertions(+), 43 deletions(-)
|
1 file changed, 27 insertions(+), 47 deletions(-)
|
||||||
|
|
||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
index b3c1829f..718052e1 100644
|
index b3c1829f..86b91d5c 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/examples/llava/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/examples/llava/clip.cpp
|
||||||
@@ -8,25 +8,25 @@
|
@@ -8,25 +8,25 @@
|
||||||
|
@ -56,7 +56,7 @@ index b3c1829f..718052e1 100644
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
#define STB_IMAGE_IMPLEMENTATION
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
@@ -1235,30 +1235,30 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,30 +84,19 @@ index b3c1829f..718052e1 100644
|
||||||
-// new_clip->backend = ggml_backend_sycl_init(0);
|
-// new_clip->backend = ggml_backend_sycl_init(0);
|
||||||
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
||||||
-//#endif
|
-//#endif
|
||||||
+#ifdef GGML_USE_CUDA
|
-
|
||||||
+ new_clip->backend = ggml_backend_cuda_init(0);
|
- if (!new_clip->backend) {
|
||||||
+ LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
- new_clip->backend = ggml_backend_cpu_init();
|
||||||
+#endif
|
- LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
||||||
+
|
+ ggml_backend_t backend = ggml_backend_init_best();
|
||||||
+#ifdef GGML_USE_METAL
|
+ if (backend == nullptr) {
|
||||||
+ new_clip->backend = ggml_backend_metal_init();
|
+ LOG_ERR("%s: failed to initialize backend\n", __func__);
|
||||||
+ LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
+ clip_free(new_clip);
|
||||||
+#endif
|
+ gguf_free(ctx);
|
||||||
+
|
+ return nullptr;
|
||||||
+#ifdef GGML_USE_CANN
|
}
|
||||||
+ new_clip->backend = ggml_backend_cann_init(0);
|
+ LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
+ LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
+ new_clip->backend = backend;
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#ifdef GGML_USE_VULKAN
|
|
||||||
+ new_clip->backend = ggml_backend_vk_init(0);
|
|
||||||
+ LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#ifdef GGML_USE_SYCL
|
|
||||||
+ new_clip->backend = ggml_backend_sycl_init(0);
|
|
||||||
+ LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
if (!new_clip->backend) {
|
// model size and capabilities
|
||||||
new_clip->backend = ggml_backend_cpu_init();
|
{
|
Loading…
Add table
Add a link
Reference in a new issue