##***************************************************************************** # AUTHOR: # Michael Hinton # # SYNOPSIS: # X_AC_NVML # # DESCRIPTION: # Determine if NVIDIA's NVML API library exists (comes with CUDA) ##***************************************************************************** # TODO: Check for the "CUDA_DEVICE_ORDER=PCI_BUS_ID" environmental var # If that is not set, emit a warning and point to the documentation # saying that this needs to be set for CUDA device numbers to match Slurm/NVML # device numbers, and that after setting, a reboot is required? # TODO: Check to make sure that nvidia driver is at least r384.40, or else there # is weirdness with PCI bus id lookups. See https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g9dc7be8cb41b6c77552c0fa0c36557c4 AC_DEFUN([X_AC_NVML], [ AC_ARG_WITH( [nvml], AS_HELP_STRING(--without-nvml, Do not build NVIDIA NVML-related code), [] ) if [test "x$with_nvml" = xno]; then AC_MSG_WARN([support for nvml disabled]) else # /usr/local/cuda/include is the main location. Others are just in case nvml_includes="-I/usr/local/cuda/include -I/usr/cuda/include" # Check for NVML header and library in the default locations AC_MSG_RESULT([]) cppflags_save="$CPPFLAGS" CPPFLAGS="$nvml_includes $CPPFLAGS" AC_CHECK_HEADER([nvml.h], [ac_nvml_h=yes], [ac_nvml_h=no]) AC_CHECK_LIB([nvidia-ml], [nvmlInit], [ac_nvml=yes], [ac_nvml=no]) CPPFLAGS="$cppflags_save" if test "$ac_nvml" = "yes" && test "$ac_nvml_h" = "yes"; then NVML_LIBS="-lnvidia-ml" NVML_CPPFLAGS="$nvml_includes" AC_DEFINE(HAVE_NVML, 1, [Define to 1 if NVML library found]) else AC_MSG_WARN([unable to locate libnvidia-ml.so and/or nvml.h]) fi AC_SUBST(NVML_LIBS) AC_SUBST(NVML_CPPFLAGS) fi AM_CONDITIONAL(BUILD_NVML, test "$ac_nvml" = "yes" && test "$ac_nvml_h" = "yes") ])