Report more information about GPUs in verbose mode

This adds additional calls to both CUDA and ROCm management libraries to
discover additional attributes about the GPU(s) detected in the system, and
wires up runtime verbosity selection.  When users hit problems with GPUs we can
ask them to run with `OLLAMA_DEBUG=1 ollama serve` and share the results.
This commit is contained in:
Daniel Hiltgen 2024-01-22 16:03:32 -08:00
parent 0759d8996e
commit 987c16b2f7
6 changed files with 171 additions and 30 deletions

View file

@ -259,6 +259,7 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
var resp C.cuda_init_resp_t
resp.ch.verbose = getVerboseState()
for _, libPath := range cudaLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
@ -275,6 +276,7 @@ func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
var resp C.rocm_init_resp_t
resp.rh.verbose = getVerboseState()
for _, libPath := range rocmLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
@ -288,3 +290,10 @@ func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
}
return nil
}
func getVerboseState() C.uint16_t {
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
return C.uint16_t(1)
}
return C.uint16_t(0)
}