diff --git a/llama/llama.cpp/examples/llava/clip.cpp b/llama/llama.cpp/examples/llava/clip.cpp index 718052e16..86b91d5cb 100644 --- a/llama/llama.cpp/examples/llava/clip.cpp +++ b/llama/llama.cpp/examples/llava/clip.cpp @@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } } -#ifdef GGML_USE_CUDA - new_clip->backend = ggml_backend_cuda_init(0); - LOG_INF("%s: CLIP using CUDA backend\n", __func__); -#endif - -#ifdef GGML_USE_METAL - new_clip->backend = ggml_backend_metal_init(); - LOG_INF("%s: CLIP using Metal backend\n", __func__); -#endif - -#ifdef GGML_USE_CANN - new_clip->backend = ggml_backend_cann_init(0); - LOG_INF("%s: CLIP using CANN backend\n", __func__); -#endif - -#ifdef GGML_USE_VULKAN - new_clip->backend = ggml_backend_vk_init(0); - LOG_INF("%s: CLIP using Vulkan backend\n", __func__); -#endif - -#ifdef GGML_USE_SYCL - new_clip->backend = ggml_backend_sycl_init(0); - LOG_INF("%s: CLIP using SYCL backend\n", __func__); -#endif - - if (!new_clip->backend) { - new_clip->backend = ggml_backend_cpu_init(); - LOG_INF("%s: CLIP using CPU backend\n", __func__); + ggml_backend_t backend = ggml_backend_init_best(); + if (backend == nullptr) { + LOG_ERR("%s: failed to initialize backend\n", __func__); + clip_free(new_clip); + gguf_free(ctx); + return nullptr; } + LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend)); + new_clip->backend = backend; // model size and capabilities { diff --git a/llama/mllama.cpp b/llama/mllama.cpp index df5bd6a98..4e84c60ae 100644 --- a/llama/mllama.cpp +++ b/llama/mllama.cpp @@ -558,30 +558,15 @@ struct mllama_ctx *mllama_model_load(const char *fname, const int verbosity = 1) mllama_ctx *new_mllama = new mllama_ctx{}; -#ifdef GGML_USE_CUDA - new_mllama->backend = ggml_backend_cuda_init(0); - LOG("vision using CUDA backend"); -#endif - -#ifdef GGML_USE_METAL - new_mllama->backend = ggml_backend_metal_init(); - LOG("vision using Metal backend"); -#endif - -#ifdef GGML_USE_CANN - new_mllama->backend = ggml_backend_cann_init(0); - LOG("vision using CANN backend"); -#endif - -#ifdef GGML_USE_VULKAN - new_mllama->backend = ggml_backend_vk_init(0); - LOG("vision using Vulkan backend"); -#endif - - if (!new_mllama->backend) { - new_mllama->backend = ggml_backend_cpu_init(); - LOG("vision using CPU backend"); + ggml_backend_t backend = ggml_backend_init_best(); + if (backend == nullptr) { + LOG("%s: failed to initialize backend\n", __func__); + mllama_free(new_mllama); + gguf_free(ctx); + return nullptr; } + LOG("%s: using %s backend\n", __func__, ggml_backend_name(backend)); + new_mllama->backend = backend; // load tensors { diff --git a/llama/patches/0013-re-enable-gpu-for-clip.patch b/llama/patches/0013-use-dynamic-backend-loading-for-clip.patch similarity index 64% rename from llama/patches/0013-re-enable-gpu-for-clip.patch rename to llama/patches/0013-use-dynamic-backend-loading-for-clip.patch index a38d08841..e283a857f 100644 --- a/llama/patches/0013-re-enable-gpu-for-clip.patch +++ b/llama/patches/0013-use-dynamic-backend-loading-for-clip.patch @@ -1,14 +1,14 @@ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Sat, 4 Jan 2025 22:52:48 -0800 -Subject: [PATCH] re-enable gpu for clip +Subject: [PATCH] use dynamic backend loading for clip --- - examples/llava/clip.cpp | 86 ++++++++++++++++++++--------------------- - 1 file changed, 43 insertions(+), 43 deletions(-) + examples/llava/clip.cpp | 74 +++++++++++++++-------------------------- + 1 file changed, 27 insertions(+), 47 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp -index b3c1829f..718052e1 100644 +index b3c1829f..86b91d5c 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -8,25 +8,25 @@ @@ -56,7 +56,7 @@ index b3c1829f..718052e1 100644 #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" -@@ -1235,30 +1235,30 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { +@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } } @@ -84,30 +84,19 @@ index b3c1829f..718052e1 100644 -// new_clip->backend = ggml_backend_sycl_init(0); -// LOG_INF("%s: CLIP using SYCL backend\n", __func__); -//#endif -+#ifdef GGML_USE_CUDA -+ new_clip->backend = ggml_backend_cuda_init(0); -+ LOG_INF("%s: CLIP using CUDA backend\n", __func__); -+#endif -+ -+#ifdef GGML_USE_METAL -+ new_clip->backend = ggml_backend_metal_init(); -+ LOG_INF("%s: CLIP using Metal backend\n", __func__); -+#endif -+ -+#ifdef GGML_USE_CANN -+ new_clip->backend = ggml_backend_cann_init(0); -+ LOG_INF("%s: CLIP using CANN backend\n", __func__); -+#endif -+ -+#ifdef GGML_USE_VULKAN -+ new_clip->backend = ggml_backend_vk_init(0); -+ LOG_INF("%s: CLIP using Vulkan backend\n", __func__); -+#endif -+ -+#ifdef GGML_USE_SYCL -+ new_clip->backend = ggml_backend_sycl_init(0); -+ LOG_INF("%s: CLIP using SYCL backend\n", __func__); -+#endif +- +- if (!new_clip->backend) { +- new_clip->backend = ggml_backend_cpu_init(); +- LOG_INF("%s: CLIP using CPU backend\n", __func__); ++ ggml_backend_t backend = ggml_backend_init_best(); ++ if (backend == nullptr) { ++ LOG_ERR("%s: failed to initialize backend\n", __func__); ++ clip_free(new_clip); ++ gguf_free(ctx); ++ return nullptr; + } ++ LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend)); ++ new_clip->backend = backend; - if (!new_clip->backend) { - new_clip->backend = ggml_backend_cpu_init(); + // model size and capabilities + {