mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
remove cuda v11 (#10569)
This reduces the size of our Windows installer payloads by ~256M by dropping support for nvidia drivers older than Feb 2023. Hardware support is unchanged. Linux default bundle sizes are reduced by ~600M to 1G.
This commit is contained in:
parent
307e3b3e1d
commit
fa393554b9
11 changed files with 11 additions and 58 deletions
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
|
@ -103,11 +103,6 @@ jobs:
|
||||||
arch: [amd64]
|
arch: [amd64]
|
||||||
preset: ['CPU']
|
preset: ['CPU']
|
||||||
include:
|
include:
|
||||||
- os: windows
|
|
||||||
arch: amd64
|
|
||||||
preset: 'CUDA 11'
|
|
||||||
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
|
|
||||||
cuda-version: '11.3'
|
|
||||||
- os: windows
|
- os: windows
|
||||||
arch: amd64
|
arch: amd64
|
||||||
preset: 'CUDA 12'
|
preset: 'CUDA 12'
|
||||||
|
@ -324,7 +319,6 @@ jobs:
|
||||||
case "$COMPONENT" in
|
case "$COMPONENT" in
|
||||||
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
|
||||||
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
||||||
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
||||||
|
|
6
.github/workflows/test.yaml
vendored
6
.github/workflows/test.yaml
vendored
|
@ -46,7 +46,7 @@ jobs:
|
||||||
include:
|
include:
|
||||||
- preset: CPU
|
- preset: CPU
|
||||||
- preset: CUDA
|
- preset: CUDA
|
||||||
container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
container: nvidia/cuda:12.8.1-devel-ubuntu22.04
|
||||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
||||||
- preset: ROCm
|
- preset: ROCm
|
||||||
container: rocm/dev-ubuntu-22.04:6.1.2
|
container: rocm/dev-ubuntu-22.04:6.1.2
|
||||||
|
@ -78,7 +78,7 @@ jobs:
|
||||||
include:
|
include:
|
||||||
- preset: CPU
|
- preset: CPU
|
||||||
- preset: CUDA
|
- preset: CUDA
|
||||||
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
|
install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
|
||||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
||||||
- preset: ROCm
|
- preset: ROCm
|
||||||
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
||||||
|
@ -102,7 +102,7 @@ jobs:
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
||||||
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
||||||
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait
|
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
|
||||||
}
|
}
|
||||||
|
|
||||||
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
||||||
|
|
|
@ -17,14 +17,6 @@
|
||||||
"name": "CUDA",
|
"name": "CUDA",
|
||||||
"inherits": [ "Default" ]
|
"inherits": [ "Default" ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "CUDA 11",
|
|
||||||
"inherits": [ "CUDA" ],
|
|
||||||
"cacheVariables": {
|
|
||||||
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86",
|
|
||||||
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CUDA 12",
|
"name": "CUDA 12",
|
||||||
"inherits": [ "CUDA" ],
|
"inherits": [ "CUDA" ],
|
||||||
|
@ -78,11 +70,6 @@
|
||||||
"configurePreset": "CUDA",
|
"configurePreset": "CUDA",
|
||||||
"targets": [ "ggml-cuda" ]
|
"targets": [ "ggml-cuda" ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "CUDA 11",
|
|
||||||
"inherits": [ "CUDA" ],
|
|
||||||
"configurePreset": "CUDA 11"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CUDA 12",
|
"name": "CUDA 12",
|
||||||
"inherits": [ "CUDA" ],
|
"inherits": [ "CUDA" ],
|
||||||
|
|
17
Dockerfile
17
Dockerfile
|
@ -7,14 +7,10 @@ ARG JETPACK5VERSION=r35.4.1
|
||||||
ARG JETPACK6VERSION=r36.4.0
|
ARG JETPACK6VERSION=r36.4.0
|
||||||
ARG CMAKEVERSION=3.31.2
|
ARG CMAKEVERSION=3.31.2
|
||||||
|
|
||||||
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
||||||
RUN yum install -y yum-utils \
|
RUN yum install -y yum-utils \
|
||||||
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
&& dnf install -y ccache \
|
||||||
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
|
||||||
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
|
||||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
||||||
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
|
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
|
||||||
# install epel-release for ccache
|
# install epel-release for ccache
|
||||||
|
@ -38,15 +34,6 @@ RUN --mount=type=cache,target=/root/.ccache \
|
||||||
&& cmake --build --parallel --preset 'CPU' \
|
&& cmake --build --parallel --preset 'CPU' \
|
||||||
&& cmake --install build --component CPU --strip --parallel 8
|
&& cmake --install build --component CPU --strip --parallel 8
|
||||||
|
|
||||||
FROM base AS cuda-11
|
|
||||||
ARG CUDA11VERSION=11.3
|
|
||||||
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
|
|
||||||
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
|
||||||
cmake --preset 'CUDA 11' \
|
|
||||||
&& cmake --build --parallel --preset 'CUDA 11' \
|
|
||||||
&& cmake --install build --component CUDA --strip --parallel 8
|
|
||||||
|
|
||||||
FROM base AS cuda-12
|
FROM base AS cuda-12
|
||||||
ARG CUDA12VERSION=12.8
|
ARG CUDA12VERSION=12.8
|
||||||
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
||||||
|
@ -98,11 +85,9 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||||
go build -trimpath -buildmode=pie -o /bin/ollama .
|
go build -trimpath -buildmode=pie -o /bin/ollama .
|
||||||
|
|
||||||
FROM --platform=linux/amd64 scratch AS amd64
|
FROM --platform=linux/amd64 scratch AS amd64
|
||||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
|
||||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
||||||
|
|
||||||
FROM --platform=linux/arm64 scratch AS arm64
|
FROM --platform=linux/arm64 scratch AS arm64
|
||||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
|
||||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
||||||
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
|
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
|
||||||
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
|
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
package discover
|
package discover
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
@ -59,6 +60,8 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||||
|
|
||||||
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
|
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
|
||||||
if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
||||||
|
// The detected driver is older than Feb 2023
|
||||||
|
slog.Warn("old CUDA driver detected - please upgrade to a newer driver", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor))
|
||||||
return "v11"
|
return "v11"
|
||||||
}
|
}
|
||||||
return "v12"
|
return "v12"
|
||||||
|
|
|
@ -12,7 +12,7 @@ import (
|
||||||
// '../lib/ollama' on Linux and the executable's directory on macOS
|
// '../lib/ollama' on Linux and the executable's directory on macOS
|
||||||
// note: distribution builds, additional GPU-specific libraries are
|
// note: distribution builds, additional GPU-specific libraries are
|
||||||
// found in subdirectories of the returned path, such as
|
// found in subdirectories of the returned path, such as
|
||||||
// 'cuda_v11', 'cuda_v12', 'rocm', etc.
|
// 'cuda_v12', 'rocm', etc.
|
||||||
var LibOllamaPath string = func() string {
|
var LibOllamaPath string = func() string {
|
||||||
exe, err := os.Executable()
|
exe, err := os.Executable()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# GPU
|
# GPU
|
||||||
## Nvidia
|
## Nvidia
|
||||||
Ollama supports Nvidia GPUs with compute capability 5.0+.
|
Ollama supports Nvidia GPUs with compute capability 5.0+ and driver version 531 and newer.
|
||||||
|
|
||||||
Check your compute compatibility to see if your card is supported:
|
Check your compute compatibility to see if your card is supported:
|
||||||
[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus)
|
[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
|
@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto
|
||||||
In the server log, you will see a message that looks something like this (varies from release to release):
|
In the server log, you will see a message that looks something like this (varies from release to release):
|
||||||
|
|
||||||
```
|
```
|
||||||
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
|
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
|
||||||
```
|
```
|
||||||
|
|
||||||
**Experimental LLM Library Override**
|
**Experimental LLM Library Override**
|
||||||
|
|
|
@ -286,7 +286,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||||
params = append(params, "--mmproj", projectors[0])
|
params = append(params, "--mmproj", projectors[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc.
|
// iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc.
|
||||||
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
||||||
// without any LD_LIBRARY_PATH flags
|
// without any LD_LIBRARY_PATH flags
|
||||||
for {
|
for {
|
||||||
|
|
|
@ -27,7 +27,6 @@ function checkEnv() {
|
||||||
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
||||||
}
|
}
|
||||||
# Locate CUDA versions
|
# Locate CUDA versions
|
||||||
# Note: this assumes every version found will be built
|
|
||||||
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
||||||
if ($cudaList.length -eq 0) {
|
if ($cudaList.length -eq 0) {
|
||||||
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
||||||
|
@ -94,19 +93,6 @@ function buildOllama() {
|
||||||
|
|
||||||
$hashEnv = @{}
|
$hashEnv = @{}
|
||||||
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
||||||
if ("$script:CUDA_DIRS".Contains("v11")) {
|
|
||||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
|
|
||||||
$env:CUDAToolkit_ROOT=$hashEnv[$v11]
|
|
||||||
write-host "Building CUDA v11 backend libraries"
|
|
||||||
# Note: cuda v11 requires msvc 2019 so force the older generator
|
|
||||||
# to avoid 2022 (or newer) from being used as the default
|
|
||||||
& cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
& cmake --build --preset "CUDA 11" --config Release --parallel $script:JOBS
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
& cmake --install build --component "CUDA" --strip
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
}
|
|
||||||
if ("$script:CUDA_DIRS".Contains("v12")) {
|
if ("$script:CUDA_DIRS".Contains("v12")) {
|
||||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
||||||
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
||||||
|
|
|
@ -10,9 +10,7 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \
|
||||||
--build-arg=GOFLAGS \
|
--build-arg=GOFLAGS \
|
||||||
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_GENERATE \
|
--build-arg=OLLAMA_SKIP_CUDA_GENERATE \
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_11_GENERATE \
|
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \
|
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \
|
||||||
--build-arg=CUDA_V11_ARCHITECTURES \
|
|
||||||
--build-arg=CUDA_V12_ARCHITECTURES \
|
--build-arg=CUDA_V12_ARCHITECTURES \
|
||||||
--build-arg=OLLAMA_SKIP_ROCM_GENERATE \
|
--build-arg=OLLAMA_SKIP_ROCM_GENERATE \
|
||||||
--build-arg=OLLAMA_FAST_BUILD \
|
--build-arg=OLLAMA_FAST_BUILD \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue