llama: use dynamic backend loading for mllama and clip (#8835)

This commit is contained in:
Jeffrey Morgan 2025-02-05 09:46:56 -08:00 committed by GitHub
parent c852b8e021
commit cd3fbf1c49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 36 additions and 82 deletions

View file

@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
} }
} }
#ifdef GGML_USE_CUDA ggml_backend_t backend = ggml_backend_init_best();
new_clip->backend = ggml_backend_cuda_init(0); if (backend == nullptr) {
LOG_INF("%s: CLIP using CUDA backend\n", __func__); LOG_ERR("%s: failed to initialize backend\n", __func__);
#endif clip_free(new_clip);
gguf_free(ctx);
#ifdef GGML_USE_METAL return nullptr;
new_clip->backend = ggml_backend_metal_init();
LOG_INF("%s: CLIP using Metal backend\n", __func__);
#endif
#ifdef GGML_USE_CANN
new_clip->backend = ggml_backend_cann_init(0);
LOG_INF("%s: CLIP using CANN backend\n", __func__);
#endif
#ifdef GGML_USE_VULKAN
new_clip->backend = ggml_backend_vk_init(0);
LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
#endif
#ifdef GGML_USE_SYCL
new_clip->backend = ggml_backend_sycl_init(0);
LOG_INF("%s: CLIP using SYCL backend\n", __func__);
#endif
if (!new_clip->backend) {
new_clip->backend = ggml_backend_cpu_init();
LOG_INF("%s: CLIP using CPU backend\n", __func__);
} }
LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
new_clip->backend = backend;
// model size and capabilities // model size and capabilities
{ {

31
llama/mllama.cpp vendored
View file

@ -558,30 +558,15 @@ struct mllama_ctx *mllama_model_load(const char *fname, const int verbosity = 1)
mllama_ctx *new_mllama = new mllama_ctx{}; mllama_ctx *new_mllama = new mllama_ctx{};
#ifdef GGML_USE_CUDA ggml_backend_t backend = ggml_backend_init_best();
new_mllama->backend = ggml_backend_cuda_init(0); if (backend == nullptr) {
LOG("vision using CUDA backend"); LOG("%s: failed to initialize backend\n", __func__);
#endif mllama_free(new_mllama);
gguf_free(ctx);
#ifdef GGML_USE_METAL return nullptr;
new_mllama->backend = ggml_backend_metal_init();
LOG("vision using Metal backend");
#endif
#ifdef GGML_USE_CANN
new_mllama->backend = ggml_backend_cann_init(0);
LOG("vision using CANN backend");
#endif
#ifdef GGML_USE_VULKAN
new_mllama->backend = ggml_backend_vk_init(0);
LOG("vision using Vulkan backend");
#endif
if (!new_mllama->backend) {
new_mllama->backend = ggml_backend_cpu_init();
LOG("vision using CPU backend");
} }
LOG("%s: using %s backend\n", __func__, ggml_backend_name(backend));
new_mllama->backend = backend;
// load tensors // load tensors
{ {

View file

@ -1,14 +1,14 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com> From: jmorganca <jmorganca@gmail.com>
Date: Sat, 4 Jan 2025 22:52:48 -0800 Date: Sat, 4 Jan 2025 22:52:48 -0800
Subject: [PATCH] re-enable gpu for clip Subject: [PATCH] use dynamic backend loading for clip
--- ---
examples/llava/clip.cpp | 86 ++++++++++++++++++++--------------------- examples/llava/clip.cpp | 74 +++++++++++++++--------------------------
1 file changed, 43 insertions(+), 43 deletions(-) 1 file changed, 27 insertions(+), 47 deletions(-)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index b3c1829f..718052e1 100644 index b3c1829f..86b91d5c 100644
--- a/examples/llava/clip.cpp --- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp
@@ -8,25 +8,25 @@ @@ -8,25 +8,25 @@
@ -56,7 +56,7 @@ index b3c1829f..718052e1 100644
#define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h" #include "stb_image.h"
@@ -1235,30 +1235,30 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { @@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
} }
} }
@ -84,30 +84,19 @@ index b3c1829f..718052e1 100644
-// new_clip->backend = ggml_backend_sycl_init(0); -// new_clip->backend = ggml_backend_sycl_init(0);
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__); -// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
-//#endif -//#endif
+#ifdef GGML_USE_CUDA -
+ new_clip->backend = ggml_backend_cuda_init(0); - if (!new_clip->backend) {
+ LOG_INF("%s: CLIP using CUDA backend\n", __func__); - new_clip->backend = ggml_backend_cpu_init();
+#endif - LOG_INF("%s: CLIP using CPU backend\n", __func__);
+ + ggml_backend_t backend = ggml_backend_init_best();
+#ifdef GGML_USE_METAL + if (backend == nullptr) {
+ new_clip->backend = ggml_backend_metal_init(); + LOG_ERR("%s: failed to initialize backend\n", __func__);
+ LOG_INF("%s: CLIP using Metal backend\n", __func__); + clip_free(new_clip);
+#endif + gguf_free(ctx);
+ + return nullptr;
+#ifdef GGML_USE_CANN }
+ new_clip->backend = ggml_backend_cann_init(0); + LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
+ LOG_INF("%s: CLIP using CANN backend\n", __func__); + new_clip->backend = backend;
+#endif
+
+#ifdef GGML_USE_VULKAN
+ new_clip->backend = ggml_backend_vk_init(0);
+ LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
+#endif
+
+#ifdef GGML_USE_SYCL
+ new_clip->backend = ggml_backend_sycl_init(0);
+ LOG_INF("%s: CLIP using SYCL backend\n", __func__);
+#endif
if (!new_clip->backend) { // model size and capabilities
new_clip->backend = ggml_backend_cpu_init(); {