mirror of
https://github.com/ollama/ollama.git
synced 2025-05-11 18:36:41 +02:00
Better nvidia GPU discovery logging
Refine the way we log GPU discovery to improve the non-debug output, and report more actionable log messages when possible to help users troubleshoot on their own.
This commit is contained in:
parent
e5352297d9
commit
ef757da2c9
4 changed files with 51 additions and 23 deletions
23
gpu/gpu.go
23
gpu/gpu.go
|
@ -202,7 +202,7 @@ func GetGPUInfo() GpuInfoList {
|
|||
}()
|
||||
|
||||
if !bootstrapped {
|
||||
slog.Debug("Detecting GPUs")
|
||||
slog.Info("looking for compatible GPUs")
|
||||
needRefresh = false
|
||||
cpuCapability = GetCPUCapability()
|
||||
var memInfo C.mem_info_t
|
||||
|
@ -320,6 +320,9 @@ func GetGPUInfo() GpuInfoList {
|
|||
|
||||
rocmGPUs = AMDGetGPUInfo()
|
||||
bootstrapped = true
|
||||
if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
|
||||
slog.Info("no compatible GPUs were discovered")
|
||||
}
|
||||
}
|
||||
|
||||
// For detected GPUs, load library if not loaded
|
||||
|
@ -514,7 +517,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
|
|||
defer C.free(unsafe.Pointer(lib))
|
||||
C.nvcuda_init(lib, &resp)
|
||||
if resp.err != nil {
|
||||
slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
|
||||
// Decide what log level based on the type of error message to help users understand why
|
||||
msg := C.GoString(resp.err)
|
||||
switch resp.cudaErr {
|
||||
case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
|
||||
slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
|
||||
case C.CUDA_ERROR_NO_DEVICE:
|
||||
slog.Info("no nvidia devices detected", "library", libPath)
|
||||
case C.CUDA_ERROR_UNKNOWN:
|
||||
slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
|
||||
slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
|
||||
default:
|
||||
if strings.Contains(msg, "wrong ELF class") {
|
||||
slog.Debug("skipping 32bit library", "library", libPath)
|
||||
} else {
|
||||
slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
|
||||
}
|
||||
}
|
||||
C.free(unsafe.Pointer(resp.err))
|
||||
} else {
|
||||
return int(resp.num_devices), &resp.ch, libPath
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue