ml/backend/ggml: use default CUDA compression mode (#10314)

This commit is contained in:
Jeffrey Morgan 2025-04-16 22:54:20 -04:00 committed by GitHub
parent dc264be6ff
commit 09bb2e30f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_CUDA_GRAPHS ON) set(GGML_CUDA_GRAPHS ON)
set(GGML_CUDA_FA ON) set(GGML_CUDA_FA ON)
set(GGML_CUDA_COMPRESSION_MODE none) set(GGML_CUDA_COMPRESSION_MODE default)
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64") if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+")) OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))