From fa393554b927f154145488c852297a2330cb5f13 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 6 May 2025 17:33:19 -0700 Subject: [PATCH] remove cuda v11 (#10569) This reduces the size of our Windows installer payloads by ~256M by dropping support for nvidia drivers older than Feb 2023. Hardware support is unchanged. Linux default bundle sizes are reduced by ~600M to 1G. --- .github/workflows/release.yaml | 6 ------ .github/workflows/test.yaml | 6 +++--- CMakePresets.json | 13 ------------- Dockerfile | 17 +---------------- discover/cuda_common.go | 3 +++ discover/path.go | 2 +- docs/gpu.md | 2 +- docs/troubleshooting.md | 2 +- llm/server.go | 2 +- scripts/build_windows.ps1 | 14 -------------- scripts/env.sh | 2 -- 11 files changed, 11 insertions(+), 58 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 12f361408..ec4fc0b0f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -103,11 +103,6 @@ jobs: arch: [amd64] preset: ['CPU'] include: - - os: windows - arch: amd64 - preset: 'CUDA 11' - install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe - cuda-version: '11.3' - os: windows arch: amd64 preset: 'CUDA 12' @@ -324,7 +319,6 @@ jobs: case "$COMPONENT" in bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; - lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;; lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 27e229fcf..2e7093391 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -46,7 +46,7 @@ jobs: include: - preset: CPU - preset: CUDA - container: nvidia/cuda:11.8.0-devel-ubuntu22.04 + container: nvidia/cuda:12.8.1-devel-ubuntu22.04 flags: '-DCMAKE_CUDA_ARCHITECTURES=87' - preset: ROCm container: rocm/dev-ubuntu-22.04:6.1.2 @@ -78,7 +78,7 @@ jobs: include: - preset: CPU - preset: CUDA - install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe + install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe flags: '-DCMAKE_CUDA_ARCHITECTURES=80' - preset: ROCm install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe @@ -102,7 +102,7 @@ jobs: $ErrorActionPreference = "Stop" if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') { Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe" - Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait + Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait } $cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path diff --git a/CMakePresets.json b/CMakePresets.json index 0b70d8ba3..2f29e041e 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -17,14 +17,6 @@ "name": "CUDA", "inherits": [ "Default" ] }, - { - "name": "CUDA 11", - "inherits": [ "CUDA" ], - "cacheVariables": { - "CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86", - "CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets" - } - }, { "name": "CUDA 12", "inherits": [ "CUDA" ], @@ -78,11 +70,6 @@ "configurePreset": "CUDA", "targets": [ "ggml-cuda" ] }, - { - "name": "CUDA 11", - "inherits": [ "CUDA" ], - "configurePreset": "CUDA 11" - }, { "name": "CUDA 12", "inherits": [ "CUDA" ], diff --git a/Dockerfile b/Dockerfile index 4c6619e77..1196dc535 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,14 +7,10 @@ ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 -# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 RUN yum install -y yum-utils \ - && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \ - && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \ - && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \ + && dnf install -y ccache \ && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo -ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH FROM --platform=linux/arm64 almalinux:8 AS base-arm64 # install epel-release for ccache @@ -38,15 +34,6 @@ RUN --mount=type=cache,target=/root/.ccache \ && cmake --build --parallel --preset 'CPU' \ && cmake --install build --component CPU --strip --parallel 8 -FROM base AS cuda-11 -ARG CUDA11VERSION=11.3 -RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-} -ENV PATH=/usr/local/cuda-11/bin:$PATH -RUN --mount=type=cache,target=/root/.ccache \ - cmake --preset 'CUDA 11' \ - && cmake --build --parallel --preset 'CUDA 11' \ - && cmake --install build --component CUDA --strip --parallel 8 - FROM base AS cuda-12 ARG CUDA12VERSION=12.8 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} @@ -98,11 +85,9 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ go build -trimpath -buildmode=pie -o /bin/ollama . FROM --platform=linux/amd64 scratch AS amd64 -COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 FROM --platform=linux/arm64 scratch AS arm64 -COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5 COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6 diff --git a/discover/cuda_common.go b/discover/cuda_common.go index 048295297..f46c7cfa5 100644 --- a/discover/cuda_common.go +++ b/discover/cuda_common.go @@ -3,6 +3,7 @@ package discover import ( + "fmt" "log/slog" "os" "regexp" @@ -59,6 +60,8 @@ func cudaVariant(gpuInfo CudaGPUInfo) string { // driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) { + // The detected driver is older than Feb 2023 + slog.Warn("old CUDA driver detected - please upgrade to a newer driver", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor)) return "v11" } return "v12" diff --git a/discover/path.go b/discover/path.go index 8a20d8c21..68e63009a 100644 --- a/discover/path.go +++ b/discover/path.go @@ -12,7 +12,7 @@ import ( // '../lib/ollama' on Linux and the executable's directory on macOS // note: distribution builds, additional GPU-specific libraries are // found in subdirectories of the returned path, such as -// 'cuda_v11', 'cuda_v12', 'rocm', etc. +// 'cuda_v12', 'rocm', etc. var LibOllamaPath string = func() string { exe, err := os.Executable() if err != nil { diff --git a/docs/gpu.md b/docs/gpu.md index b54c66ab6..61ff6e458 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -1,6 +1,6 @@ # GPU ## Nvidia -Ollama supports Nvidia GPUs with compute capability 5.0+. +Ollama supports Nvidia GPUs with compute capability 5.0+ and driver version 531 and newer. Check your compute compatibility to see if your card is supported: [https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index ba5487fef..995b33aca 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto In the server log, you will see a message that looks something like this (varies from release to release): ``` -Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5] +Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5] ``` **Experimental LLM Library Override** diff --git a/llm/server.go b/llm/server.go index d7c466a9a..f2f04c18a 100644 --- a/llm/server.go +++ b/llm/server.go @@ -286,7 +286,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a params = append(params, "--mmproj", projectors[0]) } - // iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc. + // iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc. // adding each library's respective path to the LD_LIBRARY_PATH, until finally running // without any LD_LIBRARY_PATH flags for { diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index e4c0b3d93..eaac2c600 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -27,7 +27,6 @@ function checkEnv() { $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] } # Locate CUDA versions - # Note: this assumes every version found will be built $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') if ($cudaList.length -eq 0) { $d=(get-command -ea 'silentlycontinue' nvcc).path @@ -94,19 +93,6 @@ function buildOllama() { $hashEnv = @{} Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value } - if ("$script:CUDA_DIRS".Contains("v11")) { - $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }} - $env:CUDAToolkit_ROOT=$hashEnv[$v11] - write-host "Building CUDA v11 backend libraries" - # Note: cuda v11 requires msvc 2019 so force the older generator - # to avoid 2022 (or newer) from being used as the default - & cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR - if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --build --preset "CUDA 11" --config Release --parallel $script:JOBS - if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --install build --component "CUDA" --strip - if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - } if ("$script:CUDA_DIRS".Contains("v12")) { $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }} $env:CUDAToolkit_ROOT=$hashEnv[$v12] diff --git a/scripts/env.sh b/scripts/env.sh index c5e6f530a..65a970bdc 100644 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -10,9 +10,7 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \ --build-arg=GOFLAGS \ --build-arg=OLLAMA_CUSTOM_CPU_DEFS \ --build-arg=OLLAMA_SKIP_CUDA_GENERATE \ - --build-arg=OLLAMA_SKIP_CUDA_11_GENERATE \ --build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \ - --build-arg=CUDA_V11_ARCHITECTURES \ --build-arg=CUDA_V12_ARCHITECTURES \ --build-arg=OLLAMA_SKIP_ROCM_GENERATE \ --build-arg=OLLAMA_FAST_BUILD \