More logging for gpu management

Fix an ordering glitch of dlerr/dlclose and add more logging to help
root cause some crashes users are hitting. This also refines the
function pointer names to use the underlying function names instead
of simplified names for readability.
This commit is contained in:
Daniel Hiltgen 2024-01-24 10:32:00 -08:00
parent f63dc2db5c
commit 013fd07139
5 changed files with 61 additions and 44 deletions

View file

@ -40,11 +40,13 @@ var CudaLinuxGlobs = []string{
"/usr/lib/wsl/lib/libnvidia-ml.so*",
"/usr/lib/wsl/drivers/*/libnvidia-ml.so*",
"/opt/cuda/lib64/libnvidia-ml.so*",
"/opt/cuda/targets/x86_64-linux/lib/stubs/libnvidia-ml.so*",
"/usr/lib*/libnvidia-ml.so*",
"/usr/local/lib*/libnvidia-ml.so*",
"/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*",
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*",
// TODO: are these stubs ever valid?
"/opt/cuda/targets/x86_64-linux/lib/stubs/libnvidia-ml.so*",
}
var CudaWindowsGlobs = []string{