diff --git a/go.mod b/go.mod index 61d968b65..2ffbf50b7 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,8 @@ go 1.22.2 require ( github.com/NVIDIA/go-gpuallocator v0.5.0 - github.com/NVIDIA/go-nvlib v0.7.2 - github.com/NVIDIA/go-nvml v0.12.4-1 + github.com/NVIDIA/go-nvlib v0.7.3 + github.com/NVIDIA/go-nvml v0.12.9-0 github.com/NVIDIA/nvidia-container-toolkit v1.17.8 github.com/fsnotify/fsnotify v1.7.0 github.com/google/renameio v1.0.1 diff --git a/go.sum b/go.sum index 7a1761ca2..58a17b3cb 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,9 @@ github.com/NVIDIA/go-gpuallocator v0.5.0 h1:166ICvPv2dU9oZ2J3kJ4y3XdbGCi6LhXgFZJtrqeu3A= github.com/NVIDIA/go-gpuallocator v0.5.0/go.mod h1:zos5bTIN01hpQioOyu9oRKglrznImMQvm0bZllMmckw= -github.com/NVIDIA/go-nvlib v0.7.2 h1:7sy/NVUa4sM9FLKwH6CjBfHSWrJUmv8emVyxLTzjfOA= -github.com/NVIDIA/go-nvlib v0.7.2/go.mod h1:2Kh2kYSP5IJ8EKf0/SYDzHiQKb9EJkwOf2LQzu6pXzY= -github.com/NVIDIA/go-nvml v0.12.4-1 h1:WKUvqshhWSNTfm47ETRhv0A0zJyr1ncCuHiXwoTrBEc= -github.com/NVIDIA/go-nvml v0.12.4-1/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ= +github.com/NVIDIA/go-nvlib v0.7.3 h1:kXc8PkWUlrwedSpM4fR8xT/DAq1NKy8HqhpgteFcGAw= +github.com/NVIDIA/go-nvlib v0.7.3/go.mod h1:i95Je7GinMy/+BDs++DAdbPmT2TubjNP8i8joC7DD7I= +github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0= +github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/NVIDIA/nvidia-container-toolkit v1.17.8 h1:ndE23TKvQBicsZT88mzZudygn6JNOe6+UsIgqk6gGvw= github.com/NVIDIA/nvidia-container-toolkit v1.17.8/go.mod h1:khOgMW80+g8eX/1zPlO4demLShHht9I0YEm8ngcPgwk= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go index ea3332fd8..f00d1ac65 100644 --- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/device/device.go @@ -89,6 +89,10 @@ func (d *device) GetArchitectureAsString() (string, error) { return "Ada Lovelace", nil case nvml.DEVICE_ARCH_HOPPER: return "Hopper", nil + case nvml.DEVICE_ARCH_BLACKWELL: + return "Blackwell", nil + case nvml.DEVICE_ARCH_T23X: + return "Orin", nil case nvml.DEVICE_ARCH_UNKNOWN: return "Unknown", nil } diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go index 1ccb5016b..09e82fc6b 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go @@ -52,6 +52,10 @@ const ( MAX_PHYSICAL_BRIDGE = 128 // MAX_THERMAL_SENSORS_PER_GPU as defined in nvml/nvml.h MAX_THERMAL_SENSORS_PER_GPU = 3 + // DEVICE_UUID_ASCII_LEN as defined in nvml/nvml.h + DEVICE_UUID_ASCII_LEN = 41 + // DEVICE_UUID_BINARY_LEN as defined in nvml/nvml.h + DEVICE_UUID_BINARY_LEN = 16 // FlagDefault as defined in nvml/nvml.h FlagDefault = 0 // FlagForce as defined in nvml/nvml.h @@ -62,54 +66,8 @@ const ( DOUBLE_BIT_ECC = 0 // MAX_GPU_PERF_PSTATES as defined in nvml/nvml.h MAX_GPU_PERF_PSTATES = 16 - // GRID_LICENSE_EXPIRY_NOT_AVAILABLE as defined in nvml/nvml.h - GRID_LICENSE_EXPIRY_NOT_AVAILABLE = 0 - // GRID_LICENSE_EXPIRY_INVALID as defined in nvml/nvml.h - GRID_LICENSE_EXPIRY_INVALID = 1 - // GRID_LICENSE_EXPIRY_VALID as defined in nvml/nvml.h - GRID_LICENSE_EXPIRY_VALID = 2 - // GRID_LICENSE_EXPIRY_NOT_APPLICABLE as defined in nvml/nvml.h - GRID_LICENSE_EXPIRY_NOT_APPLICABLE = 3 - // GRID_LICENSE_EXPIRY_PERMANENT as defined in nvml/nvml.h - GRID_LICENSE_EXPIRY_PERMANENT = 4 - // GRID_LICENSE_BUFFER_SIZE as defined in nvml/nvml.h - GRID_LICENSE_BUFFER_SIZE = 128 - // VGPU_NAME_BUFFER_SIZE as defined in nvml/nvml.h - VGPU_NAME_BUFFER_SIZE = 64 - // GRID_LICENSE_FEATURE_MAX_COUNT as defined in nvml/nvml.h - GRID_LICENSE_FEATURE_MAX_COUNT = 3 - // INVALID_VGPU_PLACEMENT_ID as defined in nvml/nvml.h - INVALID_VGPU_PLACEMENT_ID = 65535 - // VGPU_SCHEDULER_POLICY_UNKNOWN as defined in nvml/nvml.h - VGPU_SCHEDULER_POLICY_UNKNOWN = 0 - // VGPU_SCHEDULER_POLICY_BEST_EFFORT as defined in nvml/nvml.h - VGPU_SCHEDULER_POLICY_BEST_EFFORT = 1 - // VGPU_SCHEDULER_POLICY_EQUAL_SHARE as defined in nvml/nvml.h - VGPU_SCHEDULER_POLICY_EQUAL_SHARE = 2 - // VGPU_SCHEDULER_POLICY_FIXED_SHARE as defined in nvml/nvml.h - VGPU_SCHEDULER_POLICY_FIXED_SHARE = 3 - // SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT as defined in nvml/nvml.h - SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT = 3 - // SCHEDULER_SW_MAX_LOG_ENTRIES as defined in nvml/nvml.h - SCHEDULER_SW_MAX_LOG_ENTRIES = 200 - // VGPU_SCHEDULER_ARR_DEFAULT as defined in nvml/nvml.h - VGPU_SCHEDULER_ARR_DEFAULT = 0 - // VGPU_SCHEDULER_ARR_DISABLE as defined in nvml/nvml.h - VGPU_SCHEDULER_ARR_DISABLE = 1 - // VGPU_SCHEDULER_ARR_ENABLE as defined in nvml/nvml.h - VGPU_SCHEDULER_ARR_ENABLE = 2 - // GRID_LICENSE_STATE_UNKNOWN as defined in nvml/nvml.h - GRID_LICENSE_STATE_UNKNOWN = 0 - // GRID_LICENSE_STATE_UNINITIALIZED as defined in nvml/nvml.h - GRID_LICENSE_STATE_UNINITIALIZED = 1 - // GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED as defined in nvml/nvml.h - GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED = 2 - // GRID_LICENSE_STATE_UNLICENSED_RESTRICTED as defined in nvml/nvml.h - GRID_LICENSE_STATE_UNLICENSED_RESTRICTED = 3 - // GRID_LICENSE_STATE_UNLICENSED as defined in nvml/nvml.h - GRID_LICENSE_STATE_UNLICENSED = 4 - // GRID_LICENSE_STATE_LICENSED as defined in nvml/nvml.h - GRID_LICENSE_STATE_LICENSED = 5 + // PERF_MODES_BUFFER_SIZE as defined in nvml/nvml.h + PERF_MODES_BUFFER_SIZE = 2048 // GSP_FIRMWARE_VERSION_BUF_SIZE as defined in nvml/nvml.h GSP_FIRMWARE_VERSION_BUF_SIZE = 64 // DEVICE_ARCH_KEPLER as defined in nvml/nvml.h @@ -128,6 +86,10 @@ const ( DEVICE_ARCH_ADA = 8 // DEVICE_ARCH_HOPPER as defined in nvml/nvml.h DEVICE_ARCH_HOPPER = 9 + // DEVICE_ARCH_BLACKWELL as defined in nvml/nvml.h + DEVICE_ARCH_BLACKWELL = 10 + // DEVICE_ARCH_T23X as defined in nvml/nvml.h + DEVICE_ARCH_T23X = 11 // DEVICE_ARCH_UNKNOWN as defined in nvml/nvml.h DEVICE_ARCH_UNKNOWN = 4294967295 // BUS_TYPE_UNKNOWN as defined in nvml/nvml.h @@ -170,6 +132,82 @@ const ( ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED = 1 // MAX_GPU_UTILIZATIONS as defined in nvml/nvml.h MAX_GPU_UTILIZATIONS = 8 + // PCIE_ATOMICS_CAP_FETCHADD32 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_FETCHADD32 = 1 + // PCIE_ATOMICS_CAP_FETCHADD64 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_FETCHADD64 = 2 + // PCIE_ATOMICS_CAP_SWAP32 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_SWAP32 = 4 + // PCIE_ATOMICS_CAP_SWAP64 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_SWAP64 = 8 + // PCIE_ATOMICS_CAP_CAS32 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_CAS32 = 16 + // PCIE_ATOMICS_CAP_CAS64 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_CAS64 = 32 + // PCIE_ATOMICS_CAP_CAS128 as defined in nvml/nvml.h + PCIE_ATOMICS_CAP_CAS128 = 64 + // PCIE_ATOMICS_OPS_MAX as defined in nvml/nvml.h + PCIE_ATOMICS_OPS_MAX = 7 + // POWER_SCOPE_GPU as defined in nvml/nvml.h + POWER_SCOPE_GPU = 0 + // POWER_SCOPE_MODULE as defined in nvml/nvml.h + POWER_SCOPE_MODULE = 1 + // POWER_SCOPE_MEMORY as defined in nvml/nvml.h + POWER_SCOPE_MEMORY = 2 + // GRID_LICENSE_EXPIRY_NOT_AVAILABLE as defined in nvml/nvml.h + GRID_LICENSE_EXPIRY_NOT_AVAILABLE = 0 + // GRID_LICENSE_EXPIRY_INVALID as defined in nvml/nvml.h + GRID_LICENSE_EXPIRY_INVALID = 1 + // GRID_LICENSE_EXPIRY_VALID as defined in nvml/nvml.h + GRID_LICENSE_EXPIRY_VALID = 2 + // GRID_LICENSE_EXPIRY_NOT_APPLICABLE as defined in nvml/nvml.h + GRID_LICENSE_EXPIRY_NOT_APPLICABLE = 3 + // GRID_LICENSE_EXPIRY_PERMANENT as defined in nvml/nvml.h + GRID_LICENSE_EXPIRY_PERMANENT = 4 + // GRID_LICENSE_BUFFER_SIZE as defined in nvml/nvml.h + GRID_LICENSE_BUFFER_SIZE = 128 + // VGPU_NAME_BUFFER_SIZE as defined in nvml/nvml.h + VGPU_NAME_BUFFER_SIZE = 64 + // GRID_LICENSE_FEATURE_MAX_COUNT as defined in nvml/nvml.h + GRID_LICENSE_FEATURE_MAX_COUNT = 3 + // INVALID_VGPU_PLACEMENT_ID as defined in nvml/nvml.h + INVALID_VGPU_PLACEMENT_ID = 65535 + // VGPU_PGPU_HETEROGENEOUS_MODE as defined in nvml/nvml.h + VGPU_PGPU_HETEROGENEOUS_MODE = 0 + // VGPU_PGPU_HOMOGENEOUS_MODE as defined in nvml/nvml.h + VGPU_PGPU_HOMOGENEOUS_MODE = 1 + // VGPU_SCHEDULER_POLICY_UNKNOWN as defined in nvml/nvml.h + VGPU_SCHEDULER_POLICY_UNKNOWN = 0 + // VGPU_SCHEDULER_POLICY_BEST_EFFORT as defined in nvml/nvml.h + VGPU_SCHEDULER_POLICY_BEST_EFFORT = 1 + // VGPU_SCHEDULER_POLICY_EQUAL_SHARE as defined in nvml/nvml.h + VGPU_SCHEDULER_POLICY_EQUAL_SHARE = 2 + // VGPU_SCHEDULER_POLICY_FIXED_SHARE as defined in nvml/nvml.h + VGPU_SCHEDULER_POLICY_FIXED_SHARE = 3 + // SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT as defined in nvml/nvml.h + SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT = 3 + // SCHEDULER_SW_MAX_LOG_ENTRIES as defined in nvml/nvml.h + SCHEDULER_SW_MAX_LOG_ENTRIES = 200 + // VGPU_SCHEDULER_ARR_DEFAULT as defined in nvml/nvml.h + VGPU_SCHEDULER_ARR_DEFAULT = 0 + // VGPU_SCHEDULER_ARR_DISABLE as defined in nvml/nvml.h + VGPU_SCHEDULER_ARR_DISABLE = 1 + // VGPU_SCHEDULER_ARR_ENABLE as defined in nvml/nvml.h + VGPU_SCHEDULER_ARR_ENABLE = 2 + // VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS as defined in nvml/nvml.h + VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS = 1 + // GRID_LICENSE_STATE_UNKNOWN as defined in nvml/nvml.h + GRID_LICENSE_STATE_UNKNOWN = 0 + // GRID_LICENSE_STATE_UNINITIALIZED as defined in nvml/nvml.h + GRID_LICENSE_STATE_UNINITIALIZED = 1 + // GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED as defined in nvml/nvml.h + GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED = 2 + // GRID_LICENSE_STATE_UNLICENSED_RESTRICTED as defined in nvml/nvml.h + GRID_LICENSE_STATE_UNLICENSED_RESTRICTED = 3 + // GRID_LICENSE_STATE_UNLICENSED as defined in nvml/nvml.h + GRID_LICENSE_STATE_UNLICENSED = 4 + // GRID_LICENSE_STATE_LICENSED as defined in nvml/nvml.h + GRID_LICENSE_STATE_LICENSED = 5 // FI_DEV_ECC_CURRENT as defined in nvml/nvml.h FI_DEV_ECC_CURRENT = 1 // FI_DEV_ECC_PENDING as defined in nvml/nvml.h @@ -562,10 +600,188 @@ const ( FI_DEV_TEMPERATURE_MEM_MAX_TLIMIT = 195 // FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT as defined in nvml/nvml.h FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT = 196 + // FI_DEV_PCIE_COUNT_TX_BYTES as defined in nvml/nvml.h + FI_DEV_PCIE_COUNT_TX_BYTES = 197 + // FI_DEV_PCIE_COUNT_RX_BYTES as defined in nvml/nvml.h + FI_DEV_PCIE_COUNT_RX_BYTES = 198 // FI_DEV_IS_MIG_MODE_INDEPENDENT_MIG_QUERY_CAPABLE as defined in nvml/nvml.h FI_DEV_IS_MIG_MODE_INDEPENDENT_MIG_QUERY_CAPABLE = 199 + // FI_DEV_NVLINK_GET_POWER_THRESHOLD_MAX as defined in nvml/nvml.h + FI_DEV_NVLINK_GET_POWER_THRESHOLD_MAX = 200 + // FI_DEV_NVLINK_COUNT_XMIT_PACKETS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_XMIT_PACKETS = 201 + // FI_DEV_NVLINK_COUNT_XMIT_BYTES as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_XMIT_BYTES = 202 + // FI_DEV_NVLINK_COUNT_RCV_PACKETS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RCV_PACKETS = 203 + // FI_DEV_NVLINK_COUNT_RCV_BYTES as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RCV_BYTES = 204 + // FI_DEV_NVLINK_COUNT_VL15_DROPPED as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_VL15_DROPPED = 205 + // FI_DEV_NVLINK_COUNT_MALFORMED_PACKET_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_MALFORMED_PACKET_ERRORS = 206 + // FI_DEV_NVLINK_COUNT_BUFFER_OVERRUN_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_BUFFER_OVERRUN_ERRORS = 207 + // FI_DEV_NVLINK_COUNT_RCV_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RCV_ERRORS = 208 + // FI_DEV_NVLINK_COUNT_RCV_REMOTE_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RCV_REMOTE_ERRORS = 209 + // FI_DEV_NVLINK_COUNT_RCV_GENERAL_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RCV_GENERAL_ERRORS = 210 + // FI_DEV_NVLINK_COUNT_LOCAL_LINK_INTEGRITY_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_LOCAL_LINK_INTEGRITY_ERRORS = 211 + // FI_DEV_NVLINK_COUNT_XMIT_DISCARDS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_XMIT_DISCARDS = 212 + // FI_DEV_NVLINK_COUNT_LINK_RECOVERY_SUCCESSFUL_EVENTS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_LINK_RECOVERY_SUCCESSFUL_EVENTS = 213 + // FI_DEV_NVLINK_COUNT_LINK_RECOVERY_FAILED_EVENTS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_LINK_RECOVERY_FAILED_EVENTS = 214 + // FI_DEV_NVLINK_COUNT_LINK_RECOVERY_EVENTS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_LINK_RECOVERY_EVENTS = 215 + // FI_DEV_NVLINK_COUNT_RAW_BER_LANE0 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RAW_BER_LANE0 = 216 + // FI_DEV_NVLINK_COUNT_RAW_BER_LANE1 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RAW_BER_LANE1 = 217 + // FI_DEV_NVLINK_COUNT_RAW_BER as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_RAW_BER = 218 + // FI_DEV_NVLINK_COUNT_EFFECTIVE_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_EFFECTIVE_ERRORS = 219 + // FI_DEV_NVLINK_COUNT_EFFECTIVE_BER as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_EFFECTIVE_BER = 220 + // FI_DEV_NVLINK_COUNT_SYMBOL_ERRORS as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_SYMBOL_ERRORS = 221 + // FI_DEV_NVLINK_COUNT_SYMBOL_BER as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_SYMBOL_BER = 222 + // FI_DEV_NVLINK_GET_POWER_THRESHOLD_MIN as defined in nvml/nvml.h + FI_DEV_NVLINK_GET_POWER_THRESHOLD_MIN = 223 + // FI_DEV_NVLINK_GET_POWER_THRESHOLD_UNITS as defined in nvml/nvml.h + FI_DEV_NVLINK_GET_POWER_THRESHOLD_UNITS = 224 + // FI_DEV_NVLINK_GET_POWER_THRESHOLD_SUPPORTED as defined in nvml/nvml.h + FI_DEV_NVLINK_GET_POWER_THRESHOLD_SUPPORTED = 225 + // FI_DEV_RESET_STATUS as defined in nvml/nvml.h + FI_DEV_RESET_STATUS = 226 + // FI_DEV_DRAIN_AND_RESET_STATUS as defined in nvml/nvml.h + FI_DEV_DRAIN_AND_RESET_STATUS = 227 + // FI_DEV_PCIE_OUTBOUND_ATOMICS_MASK as defined in nvml/nvml.h + FI_DEV_PCIE_OUTBOUND_ATOMICS_MASK = 228 + // FI_DEV_PCIE_INBOUND_ATOMICS_MASK as defined in nvml/nvml.h + FI_DEV_PCIE_INBOUND_ATOMICS_MASK = 229 + // FI_DEV_GET_GPU_RECOVERY_ACTION as defined in nvml/nvml.h + FI_DEV_GET_GPU_RECOVERY_ACTION = 230 + // FI_DEV_C2C_LINK_ERROR_INTR as defined in nvml/nvml.h + FI_DEV_C2C_LINK_ERROR_INTR = 231 + // FI_DEV_C2C_LINK_ERROR_REPLAY as defined in nvml/nvml.h + FI_DEV_C2C_LINK_ERROR_REPLAY = 232 + // FI_DEV_C2C_LINK_ERROR_REPLAY_B2B as defined in nvml/nvml.h + FI_DEV_C2C_LINK_ERROR_REPLAY_B2B = 233 + // FI_DEV_C2C_LINK_POWER_STATE as defined in nvml/nvml.h + FI_DEV_C2C_LINK_POWER_STATE = 234 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_0 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_0 = 235 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_1 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_1 = 236 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_2 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_2 = 237 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_3 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_3 = 238 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_4 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_4 = 239 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_5 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_5 = 240 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_6 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_6 = 241 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_7 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_7 = 242 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_8 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_8 = 243 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_9 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_9 = 244 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_10 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_10 = 245 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_11 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_11 = 246 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_12 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_12 = 247 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_13 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_13 = 248 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_14 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_14 = 249 + // FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 as defined in nvml/nvml.h + FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 = 250 + // FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP = 74 + // FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST = 76 + // FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN = 251 + // FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN = 252 + // FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN = 253 + // FI_DEV_POWER_SYNC_BALANCING_FREQ as defined in nvml/nvml.h + FI_DEV_POWER_SYNC_BALANCING_FREQ = 254 + // FI_DEV_POWER_SYNC_BALANCING_AF as defined in nvml/nvml.h + FI_DEV_POWER_SYNC_BALANCING_AF = 255 + // FI_PWR_SMOOTHING_ENABLED as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ENABLED = 256 + // FI_PWR_SMOOTHING_PRIV_LVL as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PRIV_LVL = 257 + // FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED as defined in nvml/nvml.h + FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED = 258 + // FI_PWR_SMOOTHING_APPLIED_TMP_CEIL as defined in nvml/nvml.h + FI_PWR_SMOOTHING_APPLIED_TMP_CEIL = 259 + // FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR = 260 + // FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING as defined in nvml/nvml.h + FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING = 261 + // FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING as defined in nvml/nvml.h + FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING = 262 + // FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING as defined in nvml/nvml.h + FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING = 263 + // FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES as defined in nvml/nvml.h + FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES = 264 + // FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR = 265 + // FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE = 266 + // FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE = 267 + // FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL = 268 + // FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE = 269 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR = 270 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE = 271 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE = 272 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL = 273 // FI_MAX as defined in nvml/nvml.h - FI_MAX = 200 + FI_MAX = 274 + // NVLINK_LOW_POWER_THRESHOLD_UNIT_100US as defined in nvml/nvml.h + NVLINK_LOW_POWER_THRESHOLD_UNIT_100US = 0 + // NVLINK_LOW_POWER_THRESHOLD_UNIT_50US as defined in nvml/nvml.h + NVLINK_LOW_POWER_THRESHOLD_UNIT_50US = 1 + // NVLINK_POWER_STATE_HIGH_SPEED as defined in nvml/nvml.h + NVLINK_POWER_STATE_HIGH_SPEED = 0 + // NVLINK_POWER_STATE_LOW as defined in nvml/nvml.h + NVLINK_POWER_STATE_LOW = 1 + // NVLINK_LOW_POWER_THRESHOLD_MIN as defined in nvml/nvml.h + NVLINK_LOW_POWER_THRESHOLD_MIN = 1 + // NVLINK_LOW_POWER_THRESHOLD_MAX as defined in nvml/nvml.h + NVLINK_LOW_POWER_THRESHOLD_MAX = 8191 + // NVLINK_LOW_POWER_THRESHOLD_RESET as defined in nvml/nvml.h + NVLINK_LOW_POWER_THRESHOLD_RESET = 4294967295 + // NVLINK_LOW_POWER_THRESHOLD_DEFAULT as defined in nvml/nvml.h + NVLINK_LOW_POWER_THRESHOLD_DEFAULT = 4294967295 + // C2C_POWER_STATE_FULL_POWER as defined in nvml/nvml.h + C2C_POWER_STATE_FULL_POWER = 0 + // C2C_POWER_STATE_LOW_POWER as defined in nvml/nvml.h + C2C_POWER_STATE_LOW_POWER = 1 + // EventTypeNone as defined in nvml/nvml.h + EventTypeNone = 0 // EventTypeSingleBitEccError as defined in nvml/nvml.h EventTypeSingleBitEccError = 1 // EventTypeDoubleBitEccError as defined in nvml/nvml.h @@ -580,10 +796,28 @@ const ( EventTypePowerSourceChange = 128 // EventMigConfigChange as defined in nvml/nvml.h EventMigConfigChange = 256 - // EventTypeNone as defined in nvml/nvml.h - EventTypeNone = 0 + // EventTypeSingleBitEccErrorStorm as defined in nvml/nvml.h + EventTypeSingleBitEccErrorStorm = 512 + // EventTypeDramRetirementEvent as defined in nvml/nvml.h + EventTypeDramRetirementEvent = 1024 + // EventTypeDramRetirementFailure as defined in nvml/nvml.h + EventTypeDramRetirementFailure = 2048 + // EventTypeNonFatalPoisonError as defined in nvml/nvml.h + EventTypeNonFatalPoisonError = 4096 + // EventTypeFatalPoisonError as defined in nvml/nvml.h + EventTypeFatalPoisonError = 8192 + // EventTypeGpuUnavailableError as defined in nvml/nvml.h + EventTypeGpuUnavailableError = 16384 + // EventTypeGpuRecoveryAction as defined in nvml/nvml.h + EventTypeGpuRecoveryAction = 32768 // EventTypeAll as defined in nvml/nvml.h - EventTypeAll = 415 + EventTypeAll = 65439 + // SystemEventTypeGpuDriverUnbind as defined in nvml/nvml.h + SystemEventTypeGpuDriverUnbind = 1 + // SystemEventTypeGpuDriverBind as defined in nvml/nvml.h + SystemEventTypeGpuDriverBind = 2 + // SystemEventTypeCount as defined in nvml/nvml.h + SystemEventTypeCount = 2 // ClocksEventReasonGpuIdle as defined in nvml/nvml.h ClocksEventReasonGpuIdle = 1 // ClocksEventReasonApplicationsClocksSetting as defined in nvml/nvml.h @@ -640,6 +874,10 @@ const ( CC_SYSTEM_CPU_CAPS_AMD_SEV = 1 // CC_SYSTEM_CPU_CAPS_INTEL_TDX as defined in nvml/nvml.h CC_SYSTEM_CPU_CAPS_INTEL_TDX = 2 + // CC_SYSTEM_CPU_CAPS_AMD_SEV_SNP as defined in nvml/nvml.h + CC_SYSTEM_CPU_CAPS_AMD_SEV_SNP = 3 + // CC_SYSTEM_CPU_CAPS_AMD_SNP_VTOM as defined in nvml/nvml.h + CC_SYSTEM_CPU_CAPS_AMD_SNP_VTOM = 4 // CC_SYSTEM_GPUS_CC_NOT_CAPABLE as defined in nvml/nvml.h CC_SYSTEM_GPUS_CC_NOT_CAPABLE = 0 // CC_SYSTEM_GPUS_CC_CAPABLE as defined in nvml/nvml.h @@ -683,7 +921,7 @@ const ( // CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MIN as defined in nvml/nvml.h CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MIN = 50 // CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX as defined in nvml/nvml.h - CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX = 75 + CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX = 65 // GPU_FABRIC_UUID_LEN as defined in nvml/nvml.h GPU_FABRIC_UUID_LEN = 16 // GPU_FABRIC_STATE_NOT_SUPPORTED as defined in nvml/nvml.h @@ -703,13 +941,37 @@ const ( // GPU_FABRIC_HEALTH_MASK_SHIFT_DEGRADED_BW as defined in nvml/nvml.h GPU_FABRIC_HEALTH_MASK_SHIFT_DEGRADED_BW = 0 // GPU_FABRIC_HEALTH_MASK_WIDTH_DEGRADED_BW as defined in nvml/nvml.h - GPU_FABRIC_HEALTH_MASK_WIDTH_DEGRADED_BW = 17 - // POWER_SCOPE_GPU as defined in nvml/nvml.h - POWER_SCOPE_GPU = 0 - // POWER_SCOPE_MODULE as defined in nvml/nvml.h - POWER_SCOPE_MODULE = 1 - // POWER_SCOPE_MEMORY as defined in nvml/nvml.h - POWER_SCOPE_MEMORY = 2 + GPU_FABRIC_HEALTH_MASK_WIDTH_DEGRADED_BW = 3 + // GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_NOT_SUPPORTED as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_NOT_SUPPORTED = 0 + // GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_TRUE as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_TRUE = 1 + // GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_FALSE as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_FALSE = 2 + // GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_RECOVERY as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_RECOVERY = 2 + // GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_RECOVERY as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_RECOVERY = 3 + // GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_NOT_SUPPORTED as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_NOT_SUPPORTED = 0 + // GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_TRUE as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_TRUE = 1 + // GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_FALSE as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_FALSE = 2 + // GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_UNHEALTHY as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_UNHEALTHY = 4 + // GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_UNHEALTHY as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_UNHEALTHY = 3 + // GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_NOT_SUPPORTED as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_NOT_SUPPORTED = 0 + // GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_TRUE as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_TRUE = 1 + // GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_FALSE as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_FALSE = 2 + // GPU_FABRIC_HEALTH_MASK_SHIFT_ACCESS_TIMEOUT_RECOVERY as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_SHIFT_ACCESS_TIMEOUT_RECOVERY = 6 + // GPU_FABRIC_HEALTH_MASK_WIDTH_ACCESS_TIMEOUT_RECOVERY as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_WIDTH_ACCESS_TIMEOUT_RECOVERY = 3 // INIT_FLAG_NO_GPUS as defined in nvml/nvml.h INIT_FLAG_NO_GPUS = 1 // INIT_FLAG_NO_ATTACH as defined in nvml/nvml.h @@ -738,6 +1000,22 @@ const ( AFFINITY_SCOPE_NODE = 0 // AFFINITY_SCOPE_SOCKET as defined in nvml/nvml.h AFFINITY_SCOPE_SOCKET = 1 + // NVLINK_BER_MANTISSA_SHIFT as defined in nvml/nvml.h + NVLINK_BER_MANTISSA_SHIFT = 8 + // NVLINK_BER_MANTISSA_WIDTH as defined in nvml/nvml.h + NVLINK_BER_MANTISSA_WIDTH = 15 + // NVLINK_BER_EXP_SHIFT as defined in nvml/nvml.h + NVLINK_BER_EXP_SHIFT = 0 + // NVLINK_BER_EXP_WIDTH as defined in nvml/nvml.h + NVLINK_BER_EXP_WIDTH = 255 + // NVLINK_STATE_INACTIVE as defined in nvml/nvml.h + NVLINK_STATE_INACTIVE = 0 + // NVLINK_STATE_ACTIVE as defined in nvml/nvml.h + NVLINK_STATE_ACTIVE = 1 + // NVLINK_STATE_SLEEP as defined in nvml/nvml.h + NVLINK_STATE_SLEEP = 2 + // NVLINK_TOTAL_SUPPORTED_BW_MODES as defined in nvml/nvml.h + NVLINK_TOTAL_SUPPORTED_BW_MODES = 23 // DEVICE_MIG_DISABLE as defined in nvml/nvml.h DEVICE_MIG_DISABLE = 0 // DEVICE_MIG_ENABLE as defined in nvml/nvml.h @@ -762,10 +1040,30 @@ const ( GPU_INSTANCE_PROFILE_2_SLICE_REV1 = 8 // GPU_INSTANCE_PROFILE_1_SLICE_REV2 as defined in nvml/nvml.h GPU_INSTANCE_PROFILE_1_SLICE_REV2 = 9 + // GPU_INSTANCE_PROFILE_1_SLICE_GFX as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_1_SLICE_GFX = 10 + // GPU_INSTANCE_PROFILE_2_SLICE_GFX as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_2_SLICE_GFX = 11 + // GPU_INSTANCE_PROFILE_4_SLICE_GFX as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_4_SLICE_GFX = 12 + // GPU_INSTANCE_PROFILE_1_SLICE_NO_ME as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_1_SLICE_NO_ME = 13 + // GPU_INSTANCE_PROFILE_2_SLICE_NO_ME as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_2_SLICE_NO_ME = 14 + // GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME = 15 + // GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME = 16 // GPU_INSTANCE_PROFILE_COUNT as defined in nvml/nvml.h - GPU_INSTANCE_PROFILE_COUNT = 10 + GPU_INSTANCE_PROFILE_COUNT = 17 + // GPU_INSTANCE_PROFILE_CAPS_P2P as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_CAPS_P2P = 1 // GPU_INTSTANCE_PROFILE_CAPS_P2P as defined in nvml/nvml.h GPU_INTSTANCE_PROFILE_CAPS_P2P = 1 + // GPU_INSTANCE_PROFILE_CAPS_GFX as defined in nvml/nvml.h + GPU_INSTANCE_PROFILE_CAPS_GFX = 2 + // COMPUTE_INSTANCE_PROFILE_CAPS_GFX as defined in nvml/nvml.h + COMPUTE_INSTANCE_PROFILE_CAPS_GFX = 1 // COMPUTE_INSTANCE_PROFILE_1_SLICE as defined in nvml/nvml.h COMPUTE_INSTANCE_PROFILE_1_SLICE = 0 // COMPUTE_INSTANCE_PROFILE_2_SLICE as defined in nvml/nvml.h @@ -792,16 +1090,24 @@ const ( GPM_METRICS_GET_VERSION = 1 // GPM_SUPPORT_VERSION as defined in nvml/nvml.h GPM_SUPPORT_VERSION = 1 - // NVLINK_POWER_STATE_HIGH_SPEED as defined in nvml/nvml.h - NVLINK_POWER_STATE_HIGH_SPEED = 0 - // NVLINK_POWER_STATE_LOW as defined in nvml/nvml.h - NVLINK_POWER_STATE_LOW = 1 - // NVLINK_LOW_POWER_THRESHOLD_MIN as defined in nvml/nvml.h - NVLINK_LOW_POWER_THRESHOLD_MIN = 1 - // NVLINK_LOW_POWER_THRESHOLD_MAX as defined in nvml/nvml.h - NVLINK_LOW_POWER_THRESHOLD_MAX = 8191 - // NVLINK_LOW_POWER_THRESHOLD_RESET as defined in nvml/nvml.h - NVLINK_LOW_POWER_THRESHOLD_RESET = 4294967295 + // DEV_CAP_EGM as defined in nvml/nvml.h + DEV_CAP_EGM = 1 + // WORKLOAD_POWER_MAX_PROFILES as defined in nvml/nvml.h + WORKLOAD_POWER_MAX_PROFILES = 255 + // POWER_SMOOTHING_MAX_NUM_PROFILES as defined in nvml/nvml.h + POWER_SMOOTHING_MAX_NUM_PROFILES = 5 + // POWER_SMOOTHING_NUM_PROFILE_PARAMS as defined in nvml/nvml.h + POWER_SMOOTHING_NUM_PROFILE_PARAMS = 4 + // POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET as defined in nvml/nvml.h + POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET = 4294967295 + // POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR = 0 + // POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE = 1 + // POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE = 2 + // POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS = 3 ) // BridgeChipType as declared in nvml/nvml.h @@ -960,7 +1266,8 @@ const ( VALUE_TYPE_UNSIGNED_LONG_LONG ValueType = 3 VALUE_TYPE_SIGNED_LONG_LONG ValueType = 4 VALUE_TYPE_SIGNED_INT ValueType = 5 - VALUE_TYPE_COUNT ValueType = 6 + VALUE_TYPE_UNSIGNED_SHORT ValueType = 6 + VALUE_TYPE_COUNT ValueType = 7 ) // PerfPolicyType as declared in nvml/nvml.h @@ -979,6 +1286,29 @@ const ( PERF_POLICY_COUNT PerfPolicyType = 12 ) +// CoolerControl as declared in nvml/nvml.h +type CoolerControl int32 + +// CoolerControl enumeration from nvml/nvml.h +const ( + THERMAL_COOLER_SIGNAL_NONE CoolerControl = iota + THERMAL_COOLER_SIGNAL_TOGGLE CoolerControl = 1 + THERMAL_COOLER_SIGNAL_VARIABLE CoolerControl = 2 + THERMAL_COOLER_SIGNAL_COUNT CoolerControl = 3 +) + +// CoolerTarget as declared in nvml/nvml.h +type CoolerTarget int32 + +// CoolerTarget enumeration from nvml/nvml.h +const ( + THERMAL_COOLER_TARGET_NONE CoolerTarget = 1 + THERMAL_COOLER_TARGET_GPU CoolerTarget = 2 + THERMAL_COOLER_TARGET_MEMORY CoolerTarget = 4 + THERMAL_COOLER_TARGET_POWER_SUPPLY CoolerTarget = 8 + THERMAL_COOLER_TARGET_GPU_RELATED CoolerTarget = 14 +) + // EnableState as declared in nvml/nvml.h type EnableState int32 @@ -1026,7 +1356,8 @@ const ( TEMPERATURE_THRESHOLD_ACOUSTIC_MIN TemperatureThresholds = 4 TEMPERATURE_THRESHOLD_ACOUSTIC_CURR TemperatureThresholds = 5 TEMPERATURE_THRESHOLD_ACOUSTIC_MAX TemperatureThresholds = 6 - TEMPERATURE_THRESHOLD_COUNT TemperatureThresholds = 7 + TEMPERATURE_THRESHOLD_GPS_CURR TemperatureThresholds = 7 + TEMPERATURE_THRESHOLD_COUNT TemperatureThresholds = 8 ) // TemperatureSensors as declared in nvml/nvml.h @@ -1060,6 +1391,21 @@ const ( MEMORY_ERROR_TYPE_COUNT MemoryErrorType = 2 ) +// NvlinkVersion as declared in nvml/nvml.h +type NvlinkVersion int32 + +// NvlinkVersion enumeration from nvml/nvml.h +const ( + NVLINK_VERSION_INVALID NvlinkVersion = iota + NVLINK_VERSION_1_0 NvlinkVersion = 1 + NVLINK_VERSION_2_0 NvlinkVersion = 2 + NVLINK_VERSION_2_2 NvlinkVersion = 3 + NVLINK_VERSION_3_0 NvlinkVersion = 4 + NVLINK_VERSION_3_1 NvlinkVersion = 5 + NVLINK_VERSION_4_0 NvlinkVersion = 6 + NVLINK_VERSION_5_0 NvlinkVersion = 7 +) + // EccCounterType as declared in nvml/nvml.h type EccCounterType int32 @@ -1101,6 +1447,7 @@ type DriverModel int32 const ( DRIVER_WDDM DriverModel = iota DRIVER_WDM DriverModel = 1 + DRIVER_MCDM DriverModel = 2 ) // Pstates as declared in nvml/nvml.h @@ -1145,7 +1492,8 @@ const ( INFOROM_OEM InforomObject = iota INFOROM_ECC InforomObject = 1 INFOROM_POWER InforomObject = 2 - INFOROM_COUNT InforomObject = 3 + INFOROM_DEN InforomObject = 3 + INFOROM_COUNT InforomObject = 4 ) // Return as declared in nvml/nvml.h @@ -1223,6 +1571,17 @@ const ( RESTRICTED_API_COUNT RestrictedAPI = 2 ) +// GpuUtilizationDomainId as declared in nvml/nvml.h +type GpuUtilizationDomainId int32 + +// GpuUtilizationDomainId enumeration from nvml/nvml.h +const ( + GPU_UTILIZATION_DOMAIN_GPU GpuUtilizationDomainId = iota + GPU_UTILIZATION_DOMAIN_FB GpuUtilizationDomainId = 1 + GPU_UTILIZATION_DOMAIN_VID GpuUtilizationDomainId = 2 + GPU_UTILIZATION_DOMAIN_BUS GpuUtilizationDomainId = 3 +) + // GpuVirtualizationMode as declared in nvml/nvml.h type GpuVirtualizationMode int32 @@ -1281,7 +1640,8 @@ type VgpuDriverCapability int32 // VgpuDriverCapability enumeration from nvml/nvml.h const ( VGPU_DRIVER_CAP_HETEROGENEOUS_MULTI_VGPU VgpuDriverCapability = iota - VGPU_DRIVER_CAP_COUNT VgpuDriverCapability = 1 + VGPU_DRIVER_CAP_WARM_UPDATE VgpuDriverCapability = 1 + VGPU_DRIVER_CAP_COUNT VgpuDriverCapability = 2 ) // DeviceVgpuCapability as declared in nvml/nvml.h @@ -1297,18 +1657,23 @@ const ( DEVICE_VGPU_CAP_DEVICE_STREAMING DeviceVgpuCapability = 5 DEVICE_VGPU_CAP_MINI_QUARTER_GPU DeviceVgpuCapability = 6 DEVICE_VGPU_CAP_COMPUTE_MEDIA_ENGINE_GPU DeviceVgpuCapability = 7 - DEVICE_VGPU_CAP_COUNT DeviceVgpuCapability = 8 + DEVICE_VGPU_CAP_WARM_UPDATE DeviceVgpuCapability = 8 + DEVICE_VGPU_CAP_HOMOGENEOUS_PLACEMENTS DeviceVgpuCapability = 9 + DEVICE_VGPU_CAP_MIG_TIMESLICING_SUPPORTED DeviceVgpuCapability = 10 + DEVICE_VGPU_CAP_MIG_TIMESLICING_ENABLED DeviceVgpuCapability = 11 + DEVICE_VGPU_CAP_COUNT DeviceVgpuCapability = 12 ) -// GpuUtilizationDomainId as declared in nvml/nvml.h -type GpuUtilizationDomainId int32 +// DeviceGpuRecoveryAction as declared in nvml/nvml.h +type DeviceGpuRecoveryAction int32 -// GpuUtilizationDomainId enumeration from nvml/nvml.h +// DeviceGpuRecoveryAction enumeration from nvml/nvml.h const ( - GPU_UTILIZATION_DOMAIN_GPU GpuUtilizationDomainId = iota - GPU_UTILIZATION_DOMAIN_FB GpuUtilizationDomainId = 1 - GPU_UTILIZATION_DOMAIN_VID GpuUtilizationDomainId = 2 - GPU_UTILIZATION_DOMAIN_BUS GpuUtilizationDomainId = 3 + GPU_RECOVERY_ACTION_NONE DeviceGpuRecoveryAction = iota + GPU_RECOVERY_ACTION_GPU_RESET DeviceGpuRecoveryAction = 1 + GPU_RECOVERY_ACTION_NODE_REBOOT DeviceGpuRecoveryAction = 2 + GPU_RECOVERY_ACTION_DRAIN_P2P DeviceGpuRecoveryAction = 3 + GPU_RECOVERY_ACTION_DRAIN_AND_RESET DeviceGpuRecoveryAction = 4 ) // FanState as declared in nvml/nvml.h @@ -1447,6 +1812,16 @@ const ( THERMAL_CONTROLLER_UNKNOWN ThermalController = -1 ) +// UUIDType as declared in nvml/nvml.h +type UUIDType int32 + +// UUIDType enumeration from nvml/nvml.h +const ( + UUID_TYPE_NONE UUIDType = iota + UUID_TYPE_ASCII UUIDType = 1 + UUID_TYPE_BINARY UUIDType = 2 +) + // GridLicenseFeatureCode as declared in nvml/nvml.h type GridLicenseFeatureCode int32 @@ -1465,74 +1840,208 @@ type GpmMetricId int32 // GpmMetricId enumeration from nvml/nvml.h const ( - GPM_METRIC_GRAPHICS_UTIL GpmMetricId = 1 - GPM_METRIC_SM_UTIL GpmMetricId = 2 - GPM_METRIC_SM_OCCUPANCY GpmMetricId = 3 - GPM_METRIC_INTEGER_UTIL GpmMetricId = 4 - GPM_METRIC_ANY_TENSOR_UTIL GpmMetricId = 5 - GPM_METRIC_DFMA_TENSOR_UTIL GpmMetricId = 6 - GPM_METRIC_HMMA_TENSOR_UTIL GpmMetricId = 7 - GPM_METRIC_IMMA_TENSOR_UTIL GpmMetricId = 9 - GPM_METRIC_DRAM_BW_UTIL GpmMetricId = 10 - GPM_METRIC_FP64_UTIL GpmMetricId = 11 - GPM_METRIC_FP32_UTIL GpmMetricId = 12 - GPM_METRIC_FP16_UTIL GpmMetricId = 13 - GPM_METRIC_PCIE_TX_PER_SEC GpmMetricId = 20 - GPM_METRIC_PCIE_RX_PER_SEC GpmMetricId = 21 - GPM_METRIC_NVDEC_0_UTIL GpmMetricId = 30 - GPM_METRIC_NVDEC_1_UTIL GpmMetricId = 31 - GPM_METRIC_NVDEC_2_UTIL GpmMetricId = 32 - GPM_METRIC_NVDEC_3_UTIL GpmMetricId = 33 - GPM_METRIC_NVDEC_4_UTIL GpmMetricId = 34 - GPM_METRIC_NVDEC_5_UTIL GpmMetricId = 35 - GPM_METRIC_NVDEC_6_UTIL GpmMetricId = 36 - GPM_METRIC_NVDEC_7_UTIL GpmMetricId = 37 - GPM_METRIC_NVJPG_0_UTIL GpmMetricId = 40 - GPM_METRIC_NVJPG_1_UTIL GpmMetricId = 41 - GPM_METRIC_NVJPG_2_UTIL GpmMetricId = 42 - GPM_METRIC_NVJPG_3_UTIL GpmMetricId = 43 - GPM_METRIC_NVJPG_4_UTIL GpmMetricId = 44 - GPM_METRIC_NVJPG_5_UTIL GpmMetricId = 45 - GPM_METRIC_NVJPG_6_UTIL GpmMetricId = 46 - GPM_METRIC_NVJPG_7_UTIL GpmMetricId = 47 - GPM_METRIC_NVOFA_0_UTIL GpmMetricId = 50 - GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC GpmMetricId = 60 - GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC GpmMetricId = 61 - GPM_METRIC_NVLINK_L0_RX_PER_SEC GpmMetricId = 62 - GPM_METRIC_NVLINK_L0_TX_PER_SEC GpmMetricId = 63 - GPM_METRIC_NVLINK_L1_RX_PER_SEC GpmMetricId = 64 - GPM_METRIC_NVLINK_L1_TX_PER_SEC GpmMetricId = 65 - GPM_METRIC_NVLINK_L2_RX_PER_SEC GpmMetricId = 66 - GPM_METRIC_NVLINK_L2_TX_PER_SEC GpmMetricId = 67 - GPM_METRIC_NVLINK_L3_RX_PER_SEC GpmMetricId = 68 - GPM_METRIC_NVLINK_L3_TX_PER_SEC GpmMetricId = 69 - GPM_METRIC_NVLINK_L4_RX_PER_SEC GpmMetricId = 70 - GPM_METRIC_NVLINK_L4_TX_PER_SEC GpmMetricId = 71 - GPM_METRIC_NVLINK_L5_RX_PER_SEC GpmMetricId = 72 - GPM_METRIC_NVLINK_L5_TX_PER_SEC GpmMetricId = 73 - GPM_METRIC_NVLINK_L6_RX_PER_SEC GpmMetricId = 74 - GPM_METRIC_NVLINK_L6_TX_PER_SEC GpmMetricId = 75 - GPM_METRIC_NVLINK_L7_RX_PER_SEC GpmMetricId = 76 - GPM_METRIC_NVLINK_L7_TX_PER_SEC GpmMetricId = 77 - GPM_METRIC_NVLINK_L8_RX_PER_SEC GpmMetricId = 78 - GPM_METRIC_NVLINK_L8_TX_PER_SEC GpmMetricId = 79 - GPM_METRIC_NVLINK_L9_RX_PER_SEC GpmMetricId = 80 - GPM_METRIC_NVLINK_L9_TX_PER_SEC GpmMetricId = 81 - GPM_METRIC_NVLINK_L10_RX_PER_SEC GpmMetricId = 82 - GPM_METRIC_NVLINK_L10_TX_PER_SEC GpmMetricId = 83 - GPM_METRIC_NVLINK_L11_RX_PER_SEC GpmMetricId = 84 - GPM_METRIC_NVLINK_L11_TX_PER_SEC GpmMetricId = 85 - GPM_METRIC_NVLINK_L12_RX_PER_SEC GpmMetricId = 86 - GPM_METRIC_NVLINK_L12_TX_PER_SEC GpmMetricId = 87 - GPM_METRIC_NVLINK_L13_RX_PER_SEC GpmMetricId = 88 - GPM_METRIC_NVLINK_L13_TX_PER_SEC GpmMetricId = 89 - GPM_METRIC_NVLINK_L14_RX_PER_SEC GpmMetricId = 90 - GPM_METRIC_NVLINK_L14_TX_PER_SEC GpmMetricId = 91 - GPM_METRIC_NVLINK_L15_RX_PER_SEC GpmMetricId = 92 - GPM_METRIC_NVLINK_L15_TX_PER_SEC GpmMetricId = 93 - GPM_METRIC_NVLINK_L16_RX_PER_SEC GpmMetricId = 94 - GPM_METRIC_NVLINK_L16_TX_PER_SEC GpmMetricId = 95 - GPM_METRIC_NVLINK_L17_RX_PER_SEC GpmMetricId = 96 - GPM_METRIC_NVLINK_L17_TX_PER_SEC GpmMetricId = 97 - GPM_METRIC_MAX GpmMetricId = 98 + GPM_METRIC_GRAPHICS_UTIL GpmMetricId = 1 + GPM_METRIC_SM_UTIL GpmMetricId = 2 + GPM_METRIC_SM_OCCUPANCY GpmMetricId = 3 + GPM_METRIC_INTEGER_UTIL GpmMetricId = 4 + GPM_METRIC_ANY_TENSOR_UTIL GpmMetricId = 5 + GPM_METRIC_DFMA_TENSOR_UTIL GpmMetricId = 6 + GPM_METRIC_HMMA_TENSOR_UTIL GpmMetricId = 7 + GPM_METRIC_IMMA_TENSOR_UTIL GpmMetricId = 9 + GPM_METRIC_DRAM_BW_UTIL GpmMetricId = 10 + GPM_METRIC_FP64_UTIL GpmMetricId = 11 + GPM_METRIC_FP32_UTIL GpmMetricId = 12 + GPM_METRIC_FP16_UTIL GpmMetricId = 13 + GPM_METRIC_PCIE_TX_PER_SEC GpmMetricId = 20 + GPM_METRIC_PCIE_RX_PER_SEC GpmMetricId = 21 + GPM_METRIC_NVDEC_0_UTIL GpmMetricId = 30 + GPM_METRIC_NVDEC_1_UTIL GpmMetricId = 31 + GPM_METRIC_NVDEC_2_UTIL GpmMetricId = 32 + GPM_METRIC_NVDEC_3_UTIL GpmMetricId = 33 + GPM_METRIC_NVDEC_4_UTIL GpmMetricId = 34 + GPM_METRIC_NVDEC_5_UTIL GpmMetricId = 35 + GPM_METRIC_NVDEC_6_UTIL GpmMetricId = 36 + GPM_METRIC_NVDEC_7_UTIL GpmMetricId = 37 + GPM_METRIC_NVJPG_0_UTIL GpmMetricId = 40 + GPM_METRIC_NVJPG_1_UTIL GpmMetricId = 41 + GPM_METRIC_NVJPG_2_UTIL GpmMetricId = 42 + GPM_METRIC_NVJPG_3_UTIL GpmMetricId = 43 + GPM_METRIC_NVJPG_4_UTIL GpmMetricId = 44 + GPM_METRIC_NVJPG_5_UTIL GpmMetricId = 45 + GPM_METRIC_NVJPG_6_UTIL GpmMetricId = 46 + GPM_METRIC_NVJPG_7_UTIL GpmMetricId = 47 + GPM_METRIC_NVOFA_0_UTIL GpmMetricId = 50 + GPM_METRIC_NVOFA_1_UTIL GpmMetricId = 51 + GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC GpmMetricId = 60 + GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC GpmMetricId = 61 + GPM_METRIC_NVLINK_L0_RX_PER_SEC GpmMetricId = 62 + GPM_METRIC_NVLINK_L0_TX_PER_SEC GpmMetricId = 63 + GPM_METRIC_NVLINK_L1_RX_PER_SEC GpmMetricId = 64 + GPM_METRIC_NVLINK_L1_TX_PER_SEC GpmMetricId = 65 + GPM_METRIC_NVLINK_L2_RX_PER_SEC GpmMetricId = 66 + GPM_METRIC_NVLINK_L2_TX_PER_SEC GpmMetricId = 67 + GPM_METRIC_NVLINK_L3_RX_PER_SEC GpmMetricId = 68 + GPM_METRIC_NVLINK_L3_TX_PER_SEC GpmMetricId = 69 + GPM_METRIC_NVLINK_L4_RX_PER_SEC GpmMetricId = 70 + GPM_METRIC_NVLINK_L4_TX_PER_SEC GpmMetricId = 71 + GPM_METRIC_NVLINK_L5_RX_PER_SEC GpmMetricId = 72 + GPM_METRIC_NVLINK_L5_TX_PER_SEC GpmMetricId = 73 + GPM_METRIC_NVLINK_L6_RX_PER_SEC GpmMetricId = 74 + GPM_METRIC_NVLINK_L6_TX_PER_SEC GpmMetricId = 75 + GPM_METRIC_NVLINK_L7_RX_PER_SEC GpmMetricId = 76 + GPM_METRIC_NVLINK_L7_TX_PER_SEC GpmMetricId = 77 + GPM_METRIC_NVLINK_L8_RX_PER_SEC GpmMetricId = 78 + GPM_METRIC_NVLINK_L8_TX_PER_SEC GpmMetricId = 79 + GPM_METRIC_NVLINK_L9_RX_PER_SEC GpmMetricId = 80 + GPM_METRIC_NVLINK_L9_TX_PER_SEC GpmMetricId = 81 + GPM_METRIC_NVLINK_L10_RX_PER_SEC GpmMetricId = 82 + GPM_METRIC_NVLINK_L10_TX_PER_SEC GpmMetricId = 83 + GPM_METRIC_NVLINK_L11_RX_PER_SEC GpmMetricId = 84 + GPM_METRIC_NVLINK_L11_TX_PER_SEC GpmMetricId = 85 + GPM_METRIC_NVLINK_L12_RX_PER_SEC GpmMetricId = 86 + GPM_METRIC_NVLINK_L12_TX_PER_SEC GpmMetricId = 87 + GPM_METRIC_NVLINK_L13_RX_PER_SEC GpmMetricId = 88 + GPM_METRIC_NVLINK_L13_TX_PER_SEC GpmMetricId = 89 + GPM_METRIC_NVLINK_L14_RX_PER_SEC GpmMetricId = 90 + GPM_METRIC_NVLINK_L14_TX_PER_SEC GpmMetricId = 91 + GPM_METRIC_NVLINK_L15_RX_PER_SEC GpmMetricId = 92 + GPM_METRIC_NVLINK_L15_TX_PER_SEC GpmMetricId = 93 + GPM_METRIC_NVLINK_L16_RX_PER_SEC GpmMetricId = 94 + GPM_METRIC_NVLINK_L16_TX_PER_SEC GpmMetricId = 95 + GPM_METRIC_NVLINK_L17_RX_PER_SEC GpmMetricId = 96 + GPM_METRIC_NVLINK_L17_TX_PER_SEC GpmMetricId = 97 + GPM_METRIC_C2C_TOTAL_TX_PER_SEC GpmMetricId = 100 + GPM_METRIC_C2C_TOTAL_RX_PER_SEC GpmMetricId = 101 + GPM_METRIC_C2C_DATA_TX_PER_SEC GpmMetricId = 102 + GPM_METRIC_C2C_DATA_RX_PER_SEC GpmMetricId = 103 + GPM_METRIC_C2C_LINK0_TOTAL_TX_PER_SEC GpmMetricId = 104 + GPM_METRIC_C2C_LINK0_TOTAL_RX_PER_SEC GpmMetricId = 105 + GPM_METRIC_C2C_LINK0_DATA_TX_PER_SEC GpmMetricId = 106 + GPM_METRIC_C2C_LINK0_DATA_RX_PER_SEC GpmMetricId = 107 + GPM_METRIC_C2C_LINK1_TOTAL_TX_PER_SEC GpmMetricId = 108 + GPM_METRIC_C2C_LINK1_TOTAL_RX_PER_SEC GpmMetricId = 109 + GPM_METRIC_C2C_LINK1_DATA_TX_PER_SEC GpmMetricId = 110 + GPM_METRIC_C2C_LINK1_DATA_RX_PER_SEC GpmMetricId = 111 + GPM_METRIC_C2C_LINK2_TOTAL_TX_PER_SEC GpmMetricId = 112 + GPM_METRIC_C2C_LINK2_TOTAL_RX_PER_SEC GpmMetricId = 113 + GPM_METRIC_C2C_LINK2_DATA_TX_PER_SEC GpmMetricId = 114 + GPM_METRIC_C2C_LINK2_DATA_RX_PER_SEC GpmMetricId = 115 + GPM_METRIC_C2C_LINK3_TOTAL_TX_PER_SEC GpmMetricId = 116 + GPM_METRIC_C2C_LINK3_TOTAL_RX_PER_SEC GpmMetricId = 117 + GPM_METRIC_C2C_LINK3_DATA_TX_PER_SEC GpmMetricId = 118 + GPM_METRIC_C2C_LINK3_DATA_RX_PER_SEC GpmMetricId = 119 + GPM_METRIC_C2C_LINK4_TOTAL_TX_PER_SEC GpmMetricId = 120 + GPM_METRIC_C2C_LINK4_TOTAL_RX_PER_SEC GpmMetricId = 121 + GPM_METRIC_C2C_LINK4_DATA_TX_PER_SEC GpmMetricId = 122 + GPM_METRIC_C2C_LINK4_DATA_RX_PER_SEC GpmMetricId = 123 + GPM_METRIC_C2C_LINK5_TOTAL_TX_PER_SEC GpmMetricId = 124 + GPM_METRIC_C2C_LINK5_TOTAL_RX_PER_SEC GpmMetricId = 125 + GPM_METRIC_C2C_LINK5_DATA_TX_PER_SEC GpmMetricId = 126 + GPM_METRIC_C2C_LINK5_DATA_RX_PER_SEC GpmMetricId = 127 + GPM_METRIC_C2C_LINK6_TOTAL_TX_PER_SEC GpmMetricId = 128 + GPM_METRIC_C2C_LINK6_TOTAL_RX_PER_SEC GpmMetricId = 129 + GPM_METRIC_C2C_LINK6_DATA_TX_PER_SEC GpmMetricId = 130 + GPM_METRIC_C2C_LINK6_DATA_RX_PER_SEC GpmMetricId = 131 + GPM_METRIC_C2C_LINK7_TOTAL_TX_PER_SEC GpmMetricId = 132 + GPM_METRIC_C2C_LINK7_TOTAL_RX_PER_SEC GpmMetricId = 133 + GPM_METRIC_C2C_LINK7_DATA_TX_PER_SEC GpmMetricId = 134 + GPM_METRIC_C2C_LINK7_DATA_RX_PER_SEC GpmMetricId = 135 + GPM_METRIC_C2C_LINK8_TOTAL_TX_PER_SEC GpmMetricId = 136 + GPM_METRIC_C2C_LINK8_TOTAL_RX_PER_SEC GpmMetricId = 137 + GPM_METRIC_C2C_LINK8_DATA_TX_PER_SEC GpmMetricId = 138 + GPM_METRIC_C2C_LINK8_DATA_RX_PER_SEC GpmMetricId = 139 + GPM_METRIC_C2C_LINK9_TOTAL_TX_PER_SEC GpmMetricId = 140 + GPM_METRIC_C2C_LINK9_TOTAL_RX_PER_SEC GpmMetricId = 141 + GPM_METRIC_C2C_LINK9_DATA_TX_PER_SEC GpmMetricId = 142 + GPM_METRIC_C2C_LINK9_DATA_RX_PER_SEC GpmMetricId = 143 + GPM_METRIC_C2C_LINK10_TOTAL_TX_PER_SEC GpmMetricId = 144 + GPM_METRIC_C2C_LINK10_TOTAL_RX_PER_SEC GpmMetricId = 145 + GPM_METRIC_C2C_LINK10_DATA_TX_PER_SEC GpmMetricId = 146 + GPM_METRIC_C2C_LINK10_DATA_RX_PER_SEC GpmMetricId = 147 + GPM_METRIC_C2C_LINK11_TOTAL_TX_PER_SEC GpmMetricId = 148 + GPM_METRIC_C2C_LINK11_TOTAL_RX_PER_SEC GpmMetricId = 149 + GPM_METRIC_C2C_LINK11_DATA_TX_PER_SEC GpmMetricId = 150 + GPM_METRIC_C2C_LINK11_DATA_RX_PER_SEC GpmMetricId = 151 + GPM_METRIC_C2C_LINK12_TOTAL_TX_PER_SEC GpmMetricId = 152 + GPM_METRIC_C2C_LINK12_TOTAL_RX_PER_SEC GpmMetricId = 153 + GPM_METRIC_C2C_LINK12_DATA_TX_PER_SEC GpmMetricId = 154 + GPM_METRIC_C2C_LINK12_DATA_RX_PER_SEC GpmMetricId = 155 + GPM_METRIC_C2C_LINK13_TOTAL_TX_PER_SEC GpmMetricId = 156 + GPM_METRIC_C2C_LINK13_TOTAL_RX_PER_SEC GpmMetricId = 157 + GPM_METRIC_C2C_LINK13_DATA_TX_PER_SEC GpmMetricId = 158 + GPM_METRIC_C2C_LINK13_DATA_RX_PER_SEC GpmMetricId = 159 + GPM_METRIC_HOSTMEM_CACHE_HIT GpmMetricId = 160 + GPM_METRIC_HOSTMEM_CACHE_MISS GpmMetricId = 161 + GPM_METRIC_PEERMEM_CACHE_HIT GpmMetricId = 162 + GPM_METRIC_PEERMEM_CACHE_MISS GpmMetricId = 163 + GPM_METRIC_DRAM_CACHE_HIT GpmMetricId = 164 + GPM_METRIC_DRAM_CACHE_MISS GpmMetricId = 165 + GPM_METRIC_NVENC_0_UTIL GpmMetricId = 166 + GPM_METRIC_NVENC_1_UTIL GpmMetricId = 167 + GPM_METRIC_NVENC_2_UTIL GpmMetricId = 168 + GPM_METRIC_NVENC_3_UTIL GpmMetricId = 169 + GPM_METRIC_GR0_CTXSW_CYCLES_ELAPSED GpmMetricId = 170 + GPM_METRIC_GR0_CTXSW_CYCLES_ACTIVE GpmMetricId = 171 + GPM_METRIC_GR0_CTXSW_REQUESTS GpmMetricId = 172 + GPM_METRIC_GR0_CTXSW_CYCLES_PER_REQ GpmMetricId = 173 + GPM_METRIC_GR0_CTXSW_ACTIVE_PCT GpmMetricId = 174 + GPM_METRIC_GR1_CTXSW_CYCLES_ELAPSED GpmMetricId = 175 + GPM_METRIC_GR1_CTXSW_CYCLES_ACTIVE GpmMetricId = 176 + GPM_METRIC_GR1_CTXSW_REQUESTS GpmMetricId = 177 + GPM_METRIC_GR1_CTXSW_CYCLES_PER_REQ GpmMetricId = 178 + GPM_METRIC_GR1_CTXSW_ACTIVE_PCT GpmMetricId = 179 + GPM_METRIC_GR2_CTXSW_CYCLES_ELAPSED GpmMetricId = 180 + GPM_METRIC_GR2_CTXSW_CYCLES_ACTIVE GpmMetricId = 181 + GPM_METRIC_GR2_CTXSW_REQUESTS GpmMetricId = 182 + GPM_METRIC_GR2_CTXSW_CYCLES_PER_REQ GpmMetricId = 183 + GPM_METRIC_GR2_CTXSW_ACTIVE_PCT GpmMetricId = 184 + GPM_METRIC_GR3_CTXSW_CYCLES_ELAPSED GpmMetricId = 185 + GPM_METRIC_GR3_CTXSW_CYCLES_ACTIVE GpmMetricId = 186 + GPM_METRIC_GR3_CTXSW_REQUESTS GpmMetricId = 187 + GPM_METRIC_GR3_CTXSW_CYCLES_PER_REQ GpmMetricId = 188 + GPM_METRIC_GR3_CTXSW_ACTIVE_PCT GpmMetricId = 189 + GPM_METRIC_GR4_CTXSW_CYCLES_ELAPSED GpmMetricId = 190 + GPM_METRIC_GR4_CTXSW_CYCLES_ACTIVE GpmMetricId = 191 + GPM_METRIC_GR4_CTXSW_REQUESTS GpmMetricId = 192 + GPM_METRIC_GR4_CTXSW_CYCLES_PER_REQ GpmMetricId = 193 + GPM_METRIC_GR4_CTXSW_ACTIVE_PCT GpmMetricId = 194 + GPM_METRIC_GR5_CTXSW_CYCLES_ELAPSED GpmMetricId = 195 + GPM_METRIC_GR5_CTXSW_CYCLES_ACTIVE GpmMetricId = 196 + GPM_METRIC_GR5_CTXSW_REQUESTS GpmMetricId = 197 + GPM_METRIC_GR5_CTXSW_CYCLES_PER_REQ GpmMetricId = 198 + GPM_METRIC_GR5_CTXSW_ACTIVE_PCT GpmMetricId = 199 + GPM_METRIC_GR6_CTXSW_CYCLES_ELAPSED GpmMetricId = 200 + GPM_METRIC_GR6_CTXSW_CYCLES_ACTIVE GpmMetricId = 201 + GPM_METRIC_GR6_CTXSW_REQUESTS GpmMetricId = 202 + GPM_METRIC_GR6_CTXSW_CYCLES_PER_REQ GpmMetricId = 203 + GPM_METRIC_GR6_CTXSW_ACTIVE_PCT GpmMetricId = 204 + GPM_METRIC_GR7_CTXSW_CYCLES_ELAPSED GpmMetricId = 205 + GPM_METRIC_GR7_CTXSW_CYCLES_ACTIVE GpmMetricId = 206 + GPM_METRIC_GR7_CTXSW_REQUESTS GpmMetricId = 207 + GPM_METRIC_GR7_CTXSW_CYCLES_PER_REQ GpmMetricId = 208 + GPM_METRIC_GR7_CTXSW_ACTIVE_PCT GpmMetricId = 209 + GPM_METRIC_MAX GpmMetricId = 210 +) + +// PowerProfileType as declared in nvml/nvml.h +type PowerProfileType int32 + +// PowerProfileType enumeration from nvml/nvml.h +const ( + POWER_PROFILE_MAX_P PowerProfileType = iota + POWER_PROFILE_MAX_Q PowerProfileType = 1 + POWER_PROFILE_COMPUTE PowerProfileType = 2 + POWER_PROFILE_MEMORY_BOUND PowerProfileType = 3 + POWER_PROFILE_NETWORK PowerProfileType = 4 + POWER_PROFILE_BALANCED PowerProfileType = 5 + POWER_PROFILE_LLM_INFERENCE PowerProfileType = 6 + POWER_PROFILE_LLM_TRAINING PowerProfileType = 7 + POWER_PROFILE_RBM PowerProfileType = 8 + POWER_PROFILE_DCPCIE PowerProfileType = 9 + POWER_PROFILE_HMMA_SPARSE PowerProfileType = 10 + POWER_PROFILE_HMMA_DENSE PowerProfileType = 11 + POWER_PROFILE_SYNC_BALANCED PowerProfileType = 12 + POWER_PROFILE_HPC PowerProfileType = 13 + POWER_PROFILE_MIG PowerProfileType = 14 + POWER_PROFILE_MAX PowerProfileType = 15 ) diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go index de0ab88c0..4784cd433 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go @@ -68,16 +68,6 @@ type GpuInstanceInfo struct { Placement GpuInstancePlacement } -func (g GpuInstanceInfo) convert() nvmlGpuInstanceInfo { - out := nvmlGpuInstanceInfo{ - Device: g.Device.(nvmlDevice), - Id: g.Id, - ProfileId: g.ProfileId, - Placement: g.Placement, - } - return out -} - func (g nvmlGpuInstanceInfo) convert() GpuInstanceInfo { out := GpuInstanceInfo{ Device: g.Device, @@ -97,17 +87,6 @@ type ComputeInstanceInfo struct { Placement ComputeInstancePlacement } -func (c ComputeInstanceInfo) convert() nvmlComputeInstanceInfo { - out := nvmlComputeInstanceInfo{ - Device: c.Device.(nvmlDevice), - GpuInstance: c.GpuInstance.(nvmlGpuInstance), - Id: c.Id, - ProfileId: c.ProfileId, - Placement: c.Placement, - } - return out -} - func (c nvmlComputeInstanceInfo) convert() ComputeInstanceInfo { out := ComputeInstanceInfo{ Device: c.Device, @@ -147,6 +126,13 @@ func (l *library) DeviceGetHandleByUUID(uuid string) (Device, Return) { return device, ret } +// nvml.DeviceGetHandleByUUIDV() +func (l *library) DeviceGetHandleByUUIDV(uuid *UUID) (Device, Return) { + var device nvmlDevice + ret := nvmlDeviceGetHandleByUUIDV(uuid, &device) + return device, ret +} + // nvml.DeviceGetHandleByPciBusId() func (l *library) DeviceGetHandleByPciBusId(pciBusId string) (Device, Return) { var device nvmlDevice @@ -2101,6 +2087,13 @@ func (handler GpuInstanceProfileInfoHandler) V2() (GpuInstanceProfileInfo_v2, Re return info, ret } +func (handler GpuInstanceProfileInfoHandler) V3() (GpuInstanceProfileInfo_v3, Return) { + var info GpuInstanceProfileInfo_v3 + info.Version = STRUCT_VERSION(info, 3) + ret := nvmlDeviceGetGpuInstanceProfileInfoV(handler.device, uint32(handler.profile), (*GpuInstanceProfileInfo_v2)(unsafe.Pointer(&info))) + return info, ret +} + func (l *library) DeviceGetGpuInstanceProfileInfoV(device Device, profile int) GpuInstanceProfileInfoHandler { return device.GetGpuInstanceProfileInfoV(profile) } @@ -2191,7 +2184,7 @@ func (device nvmlDevice) GetGpuInstances(info *GpuInstanceProfileInfo) ([]GpuIns if info == nil { return nil, ERROR_INVALID_ARGUMENT } - var count uint32 = info.InstanceCount + var count = info.InstanceCount gpuInstances := make([]nvmlGpuInstance, count) ret := nvmlDeviceGetGpuInstances(device, info.Id, &gpuInstances[0], &count) return convertSlice[nvmlGpuInstance, GpuInstance](gpuInstances[:count]), ret @@ -2248,6 +2241,13 @@ func (handler ComputeInstanceProfileInfoHandler) V2() (ComputeInstanceProfileInf return info, ret } +func (handler ComputeInstanceProfileInfoHandler) V3() (ComputeInstanceProfileInfo_v3, Return) { + var info ComputeInstanceProfileInfo_v3 + info.Version = STRUCT_VERSION(info, 3) + ret := nvmlGpuInstanceGetComputeInstanceProfileInfoV(handler.gpuInstance, uint32(handler.profile), uint32(handler.engProfile), (*ComputeInstanceProfileInfo_v2)(unsafe.Pointer(&info))) + return info, ret +} + func (l *library) GpuInstanceGetComputeInstanceProfileInfoV(gpuInstance GpuInstance, profile int, engProfile int) ComputeInstanceProfileInfoHandler { return gpuInstance.GetComputeInstanceProfileInfoV(profile, engProfile) } @@ -2302,7 +2302,7 @@ func (gpuInstance nvmlGpuInstance) GetComputeInstances(info *ComputeInstanceProf if info == nil { return nil, ERROR_INVALID_ARGUMENT } - var count uint32 = info.InstanceCount + var count = info.InstanceCount computeInstances := make([]nvmlComputeInstance, count) ret := nvmlGpuInstanceGetComputeInstances(gpuInstance, info.Id, &computeInstances[0], &count) return convertSlice[nvmlComputeInstance, ComputeInstance](computeInstances[:count]), ret @@ -3062,3 +3062,353 @@ func (device nvmlDevice) GetSramEccErrorStatus() (EccSramErrorStatus, Return) { ret := nvmlDeviceGetSramEccErrorStatus(device, &status) return status, ret } + +// nvml.DeviceGetClockOffsets() +func (l *library) DeviceGetClockOffsets(device Device) (ClockOffset, Return) { + return device.GetClockOffsets() +} + +func (device nvmlDevice) GetClockOffsets() (ClockOffset, Return) { + var info ClockOffset + info.Version = STRUCT_VERSION(info, 1) + ret := nvmlDeviceGetClockOffsets(device, &info) + return info, ret +} + +// nvml.DeviceSetClockOffsets() +func (l *library) DeviceSetClockOffsets(device Device, info ClockOffset) Return { + return device.SetClockOffsets(info) +} + +func (device nvmlDevice) SetClockOffsets(info ClockOffset) Return { + return nvmlDeviceSetClockOffsets(device, &info) +} + +// nvml.DeviceGetDriverModel_v2() +func (l *library) DeviceGetDriverModel_v2(device Device) (DriverModel, DriverModel, Return) { + return device.GetDriverModel_v2() +} + +func (device nvmlDevice) GetDriverModel_v2() (DriverModel, DriverModel, Return) { + var current, pending DriverModel + ret := nvmlDeviceGetDriverModel_v2(device, ¤t, &pending) + return current, pending, ret +} + +// nvml.DeviceGetCapabilities() +func (l *library) DeviceGetCapabilities(device Device) (DeviceCapabilities, Return) { + return device.GetCapabilities() +} + +func (device nvmlDevice) GetCapabilities() (DeviceCapabilities, Return) { + var caps DeviceCapabilities + caps.Version = STRUCT_VERSION(caps, 1) + ret := nvmlDeviceGetCapabilities(device, &caps) + return caps, ret +} + +// nvml.DeviceGetFanSpeedRPM() +func (l *library) DeviceGetFanSpeedRPM(device Device) (FanSpeedInfo, Return) { + return device.GetFanSpeedRPM() +} + +func (device nvmlDevice) GetFanSpeedRPM() (FanSpeedInfo, Return) { + var fanSpeed FanSpeedInfo + fanSpeed.Version = STRUCT_VERSION(fanSpeed, 1) + ret := nvmlDeviceGetFanSpeedRPM(device, &fanSpeed) + return fanSpeed, ret +} + +// nvml.DeviceGetCoolerInfo() +func (l *library) DeviceGetCoolerInfo(device Device) (CoolerInfo, Return) { + return device.GetCoolerInfo() +} + +func (device nvmlDevice) GetCoolerInfo() (CoolerInfo, Return) { + var coolerInfo CoolerInfo + coolerInfo.Version = STRUCT_VERSION(coolerInfo, 1) + ret := nvmlDeviceGetCoolerInfo(device, &coolerInfo) + return coolerInfo, ret +} + +// nvml.DeviceGetTemperatureV() +type TemperatureHandler struct { + device nvmlDevice +} + +func (handler TemperatureHandler) V1() (Temperature, Return) { + var temperature Temperature + temperature.Version = STRUCT_VERSION(temperature, 1) + ret := nvmlDeviceGetTemperatureV(handler.device, &temperature) + return temperature, ret +} + +func (l *library) DeviceGetTemperatureV(device Device) TemperatureHandler { + return device.GetTemperatureV() +} + +func (device nvmlDevice) GetTemperatureV() TemperatureHandler { + return TemperatureHandler{device} +} + +// nvml.DeviceGetMarginTemperature() +func (l *library) DeviceGetMarginTemperature(device Device) (MarginTemperature, Return) { + return device.GetMarginTemperature() +} + +func (device nvmlDevice) GetMarginTemperature() (MarginTemperature, Return) { + var marginTemp MarginTemperature + marginTemp.Version = STRUCT_VERSION(marginTemp, 1) + ret := nvmlDeviceGetMarginTemperature(device, &marginTemp) + return marginTemp, ret +} + +// nvml.DeviceGetPerformanceModes() +func (l *library) DeviceGetPerformanceModes(device Device) (DevicePerfModes, Return) { + return device.GetPerformanceModes() +} + +func (device nvmlDevice) GetPerformanceModes() (DevicePerfModes, Return) { + var perfModes DevicePerfModes + perfModes.Version = STRUCT_VERSION(perfModes, 1) + ret := nvmlDeviceGetPerformanceModes(device, &perfModes) + return perfModes, ret +} + +// nvml.DeviceGetCurrentClockFreqs() +func (l *library) DeviceGetCurrentClockFreqs(device Device) (DeviceCurrentClockFreqs, Return) { + return device.GetCurrentClockFreqs() +} + +func (device nvmlDevice) GetCurrentClockFreqs() (DeviceCurrentClockFreqs, Return) { + var currentClockFreqs DeviceCurrentClockFreqs + currentClockFreqs.Version = STRUCT_VERSION(currentClockFreqs, 1) + ret := nvmlDeviceGetCurrentClockFreqs(device, ¤tClockFreqs) + return currentClockFreqs, ret +} + +// nvml.DeviceGetDramEncryptionMode() +func (l *library) DeviceGetDramEncryptionMode(device Device) (DramEncryptionInfo, DramEncryptionInfo, Return) { + return device.GetDramEncryptionMode() +} + +func (device nvmlDevice) GetDramEncryptionMode() (DramEncryptionInfo, DramEncryptionInfo, Return) { + var current, pending DramEncryptionInfo + current.Version = STRUCT_VERSION(current, 1) + pending.Version = STRUCT_VERSION(pending, 1) + ret := nvmlDeviceGetDramEncryptionMode(device, ¤t, &pending) + return current, pending, ret +} + +// nvml.DeviceSetDramEncryptionMode() +func (l *library) DeviceSetDramEncryptionMode(device Device, dramEncryption *DramEncryptionInfo) Return { + return device.SetDramEncryptionMode(dramEncryption) +} + +func (device nvmlDevice) SetDramEncryptionMode(dramEncryption *DramEncryptionInfo) Return { + return nvmlDeviceSetDramEncryptionMode(device, dramEncryption) +} + +// nvml.DeviceGetPlatformInfo() +func (l *library) DeviceGetPlatformInfo(device Device) (PlatformInfo, Return) { + return device.GetPlatformInfo() +} + +func (device nvmlDevice) GetPlatformInfo() (PlatformInfo, Return) { + var platformInfo PlatformInfo + platformInfo.Version = STRUCT_VERSION(platformInfo, 1) + ret := nvmlDeviceGetPlatformInfo(device, &platformInfo) + return platformInfo, ret +} + +// nvml.DeviceGetNvlinkSupportedBwModes() +func (l *library) DeviceGetNvlinkSupportedBwModes(device Device) (NvlinkSupportedBwModes, Return) { + return device.GetNvlinkSupportedBwModes() +} + +func (device nvmlDevice) GetNvlinkSupportedBwModes() (NvlinkSupportedBwModes, Return) { + var supportedBwMode NvlinkSupportedBwModes + supportedBwMode.Version = STRUCT_VERSION(supportedBwMode, 1) + ret := nvmlDeviceGetNvlinkSupportedBwModes(device, &supportedBwMode) + return supportedBwMode, ret +} + +// nvml.DeviceGetNvlinkBwMode() +func (l *library) DeviceGetNvlinkBwMode(device Device) (NvlinkGetBwMode, Return) { + return device.GetNvlinkBwMode() +} + +func (device nvmlDevice) GetNvlinkBwMode() (NvlinkGetBwMode, Return) { + var getBwMode NvlinkGetBwMode + getBwMode.Version = STRUCT_VERSION(getBwMode, 1) + ret := nvmlDeviceGetNvlinkBwMode(device, &getBwMode) + return getBwMode, ret +} + +// nvml.DeviceSetNvlinkBwMode() +func (l *library) DeviceSetNvlinkBwMode(device Device, setBwMode *NvlinkSetBwMode) Return { + return device.SetNvlinkBwMode(setBwMode) +} + +func (device nvmlDevice) SetNvlinkBwMode(setBwMode *NvlinkSetBwMode) Return { + return nvmlDeviceSetNvlinkBwMode(device, setBwMode) +} + +// nvml.DeviceWorkloadPowerProfileGetProfilesInfo() +func (l *library) DeviceWorkloadPowerProfileGetProfilesInfo(device Device) (WorkloadPowerProfileProfilesInfo, Return) { + return device.WorkloadPowerProfileGetProfilesInfo() +} + +func (device nvmlDevice) WorkloadPowerProfileGetProfilesInfo() (WorkloadPowerProfileProfilesInfo, Return) { + var profilesInfo WorkloadPowerProfileProfilesInfo + profilesInfo.Version = STRUCT_VERSION(profilesInfo, 1) + ret := nvmlDeviceWorkloadPowerProfileGetProfilesInfo(device, &profilesInfo) + return profilesInfo, ret +} + +// nvml.DeviceWorkloadPowerProfileGetCurrentProfiles() +func (l *library) DeviceWorkloadPowerProfileGetCurrentProfiles(device Device) (WorkloadPowerProfileCurrentProfiles, Return) { + return device.WorkloadPowerProfileGetCurrentProfiles() +} + +func (device nvmlDevice) WorkloadPowerProfileGetCurrentProfiles() (WorkloadPowerProfileCurrentProfiles, Return) { + var currentProfiles WorkloadPowerProfileCurrentProfiles + currentProfiles.Version = STRUCT_VERSION(currentProfiles, 1) + ret := nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(device, ¤tProfiles) + return currentProfiles, ret +} + +// nvml.DeviceWorkloadPowerProfileSetRequestedProfiles() +func (l *library) DeviceWorkloadPowerProfileSetRequestedProfiles(device Device, requestedProfiles *WorkloadPowerProfileRequestedProfiles) Return { + return device.WorkloadPowerProfileSetRequestedProfiles(requestedProfiles) +} + +func (device nvmlDevice) WorkloadPowerProfileSetRequestedProfiles(requestedProfiles *WorkloadPowerProfileRequestedProfiles) Return { + return nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(device, requestedProfiles) +} + +// nvml.DeviceWorkloadPowerProfileClearRequestedProfiles() +func (l *library) DeviceWorkloadPowerProfileClearRequestedProfiles(device Device, requestedProfiles *WorkloadPowerProfileRequestedProfiles) Return { + return device.WorkloadPowerProfileClearRequestedProfiles(requestedProfiles) +} + +func (device nvmlDevice) WorkloadPowerProfileClearRequestedProfiles(requestedProfiles *WorkloadPowerProfileRequestedProfiles) Return { + return nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(device, requestedProfiles) +} + +// nvml.DevicePowerSmoothingActivatePresetProfile() +func (l *library) DevicePowerSmoothingActivatePresetProfile(device Device, profile *PowerSmoothingProfile) Return { + return device.PowerSmoothingActivatePresetProfile(profile) +} + +func (device nvmlDevice) PowerSmoothingActivatePresetProfile(profile *PowerSmoothingProfile) Return { + return nvmlDevicePowerSmoothingActivatePresetProfile(device, profile) +} + +// nvml.DevicePowerSmoothingUpdatePresetProfileParam() +func (l *library) DevicePowerSmoothingUpdatePresetProfileParam(device Device, profile *PowerSmoothingProfile) Return { + return device.PowerSmoothingUpdatePresetProfileParam(profile) +} + +func (device nvmlDevice) PowerSmoothingUpdatePresetProfileParam(profile *PowerSmoothingProfile) Return { + return nvmlDevicePowerSmoothingUpdatePresetProfileParam(device, profile) +} + +// nvml.DevicePowerSmoothingSetState() +func (l *library) DevicePowerSmoothingSetState(device Device, state *PowerSmoothingState) Return { + return device.PowerSmoothingSetState(state) +} + +func (device nvmlDevice) PowerSmoothingSetState(state *PowerSmoothingState) Return { + return nvmlDevicePowerSmoothingSetState(device, state) +} + +// nvml.GpuInstanceGetCreatableVgpus() +func (l *library) GpuInstanceGetCreatableVgpus(gpuInstance GpuInstance) (VgpuTypeIdInfo, Return) { + return gpuInstance.GetCreatableVgpus() +} + +func (gpuInstance nvmlGpuInstance) GetCreatableVgpus() (VgpuTypeIdInfo, Return) { + var vgpuTypeIdInfo VgpuTypeIdInfo + vgpuTypeIdInfo.Version = STRUCT_VERSION(vgpuTypeIdInfo, 1) + ret := nvmlGpuInstanceGetCreatableVgpus(gpuInstance, &vgpuTypeIdInfo) + return vgpuTypeIdInfo, ret +} + +// nvml.GpuInstanceGetActiveVgpus() +func (l *library) GpuInstanceGetActiveVgpus(gpuInstance GpuInstance) (ActiveVgpuInstanceInfo, Return) { + return gpuInstance.GetActiveVgpus() +} + +func (gpuInstance nvmlGpuInstance) GetActiveVgpus() (ActiveVgpuInstanceInfo, Return) { + var activeVgpuInstanceInfo ActiveVgpuInstanceInfo + activeVgpuInstanceInfo.Version = STRUCT_VERSION(activeVgpuInstanceInfo, 1) + ret := nvmlGpuInstanceGetActiveVgpus(gpuInstance, &activeVgpuInstanceInfo) + return activeVgpuInstanceInfo, ret +} + +// nvml.GpuInstanceSetVgpuSchedulerState() +func (l *library) GpuInstanceSetVgpuSchedulerState(gpuInstance GpuInstance, scheduler *VgpuSchedulerState) Return { + return gpuInstance.SetVgpuSchedulerState(scheduler) +} + +func (gpuInstance nvmlGpuInstance) SetVgpuSchedulerState(scheduler *VgpuSchedulerState) Return { + return nvmlGpuInstanceSetVgpuSchedulerState(gpuInstance, scheduler) +} + +// nvml.GpuInstanceGetVgpuSchedulerState() +func (l *library) GpuInstanceGetVgpuSchedulerState(gpuInstance GpuInstance) (VgpuSchedulerStateInfo, Return) { + return gpuInstance.GetVgpuSchedulerState() +} + +func (gpuInstance nvmlGpuInstance) GetVgpuSchedulerState() (VgpuSchedulerStateInfo, Return) { + var schedulerStateInfo VgpuSchedulerStateInfo + schedulerStateInfo.Version = STRUCT_VERSION(schedulerStateInfo, 1) + ret := nvmlGpuInstanceGetVgpuSchedulerState(gpuInstance, &schedulerStateInfo) + return schedulerStateInfo, ret +} + +// nvml.GpuInstanceGetVgpuSchedulerLog() +func (l *library) GpuInstanceGetVgpuSchedulerLog(gpuInstance GpuInstance) (VgpuSchedulerLogInfo, Return) { + return gpuInstance.GetVgpuSchedulerLog() +} + +func (gpuInstance nvmlGpuInstance) GetVgpuSchedulerLog() (VgpuSchedulerLogInfo, Return) { + var schedulerLogInfo VgpuSchedulerLogInfo + schedulerLogInfo.Version = STRUCT_VERSION(schedulerLogInfo, 1) + ret := nvmlGpuInstanceGetVgpuSchedulerLog(gpuInstance, &schedulerLogInfo) + return schedulerLogInfo, ret +} + +// nvml.GpuInstanceGetVgpuTypeCreatablePlacements() +func (l *library) GpuInstanceGetVgpuTypeCreatablePlacements(gpuInstance GpuInstance) (VgpuCreatablePlacementInfo, Return) { + return gpuInstance.GetVgpuTypeCreatablePlacements() +} + +func (gpuInstance nvmlGpuInstance) GetVgpuTypeCreatablePlacements() (VgpuCreatablePlacementInfo, Return) { + var creatablePlacementInfo VgpuCreatablePlacementInfo + creatablePlacementInfo.Version = STRUCT_VERSION(creatablePlacementInfo, 1) + ret := nvmlGpuInstanceGetVgpuTypeCreatablePlacements(gpuInstance, &creatablePlacementInfo) + return creatablePlacementInfo, ret +} + +// nvml.GpuInstanceGetVgpuHeterogeneousMode() +func (l *library) GpuInstanceGetVgpuHeterogeneousMode(gpuInstance GpuInstance) (VgpuHeterogeneousMode, Return) { + return gpuInstance.GetVgpuHeterogeneousMode() +} + +func (gpuInstance nvmlGpuInstance) GetVgpuHeterogeneousMode() (VgpuHeterogeneousMode, Return) { + var heterogeneousMode VgpuHeterogeneousMode + heterogeneousMode.Version = STRUCT_VERSION(heterogeneousMode, 1) + ret := nvmlGpuInstanceGetVgpuHeterogeneousMode(gpuInstance, &heterogeneousMode) + return heterogeneousMode, ret +} + +// nvml.GpuInstanceSetVgpuHeterogeneousMode() +func (l *library) GpuInstanceSetVgpuHeterogeneousMode(gpuInstance GpuInstance, heterogeneousMode *VgpuHeterogeneousMode) Return { + return gpuInstance.SetVgpuHeterogeneousMode(heterogeneousMode) +} + +func (gpuInstance nvmlGpuInstance) SetVgpuHeterogeneousMode(heterogeneousMode *VgpuHeterogeneousMode) Return { + return nvmlGpuInstanceSetVgpuHeterogeneousMode(gpuInstance, heterogeneousMode) +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/event_set.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/event_set.go index 933b4dead..b772d57fc 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/event_set.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/event_set.go @@ -23,17 +23,6 @@ type EventData struct { ComputeInstanceId uint32 } -func (e EventData) convert() nvmlEventData { - out := nvmlEventData{ - Device: e.Device.(nvmlDevice), - EventType: e.EventType, - EventData: e.EventData, - GpuInstanceId: e.GpuInstanceId, - ComputeInstanceId: e.ComputeInstanceId, - } - return out -} - func (e nvmlEventData) convert() EventData { out := EventData{ Device: e.Device, @@ -71,3 +60,23 @@ func (l *library) EventSetFree(set EventSet) Return { func (set nvmlEventSet) Free() Return { return nvmlEventSetFree(set) } + +// nvml.SystemEventSetCreate() +func (l *library) SystemEventSetCreate(request *SystemEventSetCreateRequest) Return { + return nvmlSystemEventSetCreate(request) +} + +// nvml.SystemEventSetFree() +func (l *library) SystemEventSetFree(request *SystemEventSetFreeRequest) Return { + return nvmlSystemEventSetFree(request) +} + +// nvml.SystemRegisterEvents() +func (l *library) SystemRegisterEvents(request *SystemRegisterEventRequest) Return { + return nvmlSystemRegisterEvents(request) +} + +// nvml.SystemEventSetWait() +func (l *library) SystemEventSetWait(request *SystemEventSetWaitRequest) Return { + return nvmlSystemEventSetWait(request) +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go index 5a71c0ff3..563bc5939 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go @@ -20,7 +20,7 @@ type GpmMetricsGetType struct { NumMetrics uint32 Sample1 GpmSample Sample2 GpmSample - Metrics [98]GpmMetric + Metrics [210]GpmMetric } func (g *GpmMetricsGetType) convert() *nvmlGpmMetricsGetType { @@ -30,9 +30,8 @@ func (g *GpmMetricsGetType) convert() *nvmlGpmMetricsGetType { Sample1: g.Sample1.(nvmlGpmSample), Sample2: g.Sample2.(nvmlGpmSample), } - for i := range g.Metrics { - out.Metrics[i] = g.Metrics[i] - } + copy(out.Metrics[:], g.Metrics[:]) + return out } @@ -43,9 +42,8 @@ func (g *nvmlGpmMetricsGetType) convert() *GpmMetricsGetType { Sample1: g.Sample1, Sample2: g.Sample2, } - for i := range g.Metrics { - out.Metrics[i] = g.Metrics[i] - } + copy(out.Metrics[:], g.Metrics[:]) + return out } diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go index bc4c3de5e..5a7e6882f 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go @@ -163,6 +163,7 @@ var GetBlacklistDeviceCount = GetExcludedDeviceCount var GetBlacklistDeviceInfoByIndex = GetExcludedDeviceInfoByIndex var nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v1 var nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v1 +var nvmlDeviceGetDriverModel = nvmlDeviceGetDriverModel_v1 // BlacklistDeviceInfo was replaced by ExcludedDeviceInfo type BlacklistDeviceInfo = ExcludedDeviceInfo @@ -288,4 +289,8 @@ func (l *library) updateVersionedSymbols() { if err == nil { nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v2 } + err = l.dl.Lookup("nvmlDeviceGetDriverModel_v2") + if err == nil { + nvmlDeviceGetDriverModel = nvmlDeviceGetDriverModel_v2 + } } diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go index 6ba290c5f..95d67d6da 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go @@ -121,6 +121,15 @@ func nvmlSystemGetTopologyGpuSet(CpuNumber uint32, Count *uint32, DeviceArray *n return __v } +// nvmlSystemGetDriverBranch function as declared in nvml/nvml.h +func nvmlSystemGetDriverBranch(BranchInfo *SystemDriverBranchInfo, Length uint32) Return { + cBranchInfo, _ := (*C.nvmlSystemDriverBranchInfo_t)(unsafe.Pointer(BranchInfo)), cgoAllocsUnknown + cLength, _ := (C.uint)(Length), cgoAllocsUnknown + __ret := C.nvmlSystemGetDriverBranch(cBranchInfo, cLength) + __v := (Return)(__ret) + return __v +} + // nvmlUnitGetCount function as declared in nvml/nvml.h func nvmlUnitGetCount(UnitCount *uint32) Return { cUnitCount, _ := (*C.uint)(unsafe.Pointer(UnitCount)), cgoAllocsUnknown @@ -238,6 +247,15 @@ func nvmlDeviceGetHandleByUUID(Uuid string, nvmlDevice *nvmlDevice) Return { return __v } +// nvmlDeviceGetHandleByUUIDV function as declared in nvml/nvml.h +func nvmlDeviceGetHandleByUUIDV(Uuid *UUID, nvmlDevice *nvmlDevice) Return { + cUuid, _ := (*C.nvmlUUID_t)(unsafe.Pointer(Uuid)), cgoAllocsUnknown + cnvmlDevice, _ := (*C.nvmlDevice_t)(unsafe.Pointer(nvmlDevice)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetHandleByUUIDV(cUuid, cnvmlDevice) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetHandleByPciBusId_v2 function as declared in nvml/nvml.h func nvmlDeviceGetHandleByPciBusId_v2(PciBusId string, nvmlDevice *nvmlDevice) Return { cPciBusId, _ := unpackPCharString(PciBusId) @@ -698,6 +716,15 @@ func nvmlDeviceGetFanSpeed_v2(nvmlDevice nvmlDevice, Fan uint32, Speed *uint32) return __v } +// nvmlDeviceGetFanSpeedRPM function as declared in nvml/nvml.h +func nvmlDeviceGetFanSpeedRPM(nvmlDevice nvmlDevice, FanSpeed *FanSpeedInfo) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cFanSpeed, _ := (*C.nvmlFanSpeedInfo_t)(unsafe.Pointer(FanSpeed)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetFanSpeedRPM(cnvmlDevice, cFanSpeed) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetTargetFanSpeed function as declared in nvml/nvml.h func nvmlDeviceGetTargetFanSpeed(nvmlDevice nvmlDevice, Fan uint32, TargetSpeed *uint32) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -747,6 +774,24 @@ func nvmlDeviceGetTemperature(nvmlDevice nvmlDevice, SensorType TemperatureSenso return __v } +// nvmlDeviceGetCoolerInfo function as declared in nvml/nvml.h +func nvmlDeviceGetCoolerInfo(nvmlDevice nvmlDevice, CoolerInfo *CoolerInfo) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cCoolerInfo, _ := (*C.nvmlCoolerInfo_t)(unsafe.Pointer(CoolerInfo)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetCoolerInfo(cnvmlDevice, cCoolerInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceGetTemperatureV function as declared in nvml/nvml.h +func nvmlDeviceGetTemperatureV(nvmlDevice nvmlDevice, Temperature *Temperature) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cTemperature, _ := (*C.nvmlTemperature_t)(unsafe.Pointer(Temperature)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetTemperatureV(cnvmlDevice, cTemperature) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetTemperatureThreshold function as declared in nvml/nvml.h func nvmlDeviceGetTemperatureThreshold(nvmlDevice nvmlDevice, ThresholdType TemperatureThresholds, Temp *uint32) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -757,6 +802,15 @@ func nvmlDeviceGetTemperatureThreshold(nvmlDevice nvmlDevice, ThresholdType Temp return __v } +// nvmlDeviceGetMarginTemperature function as declared in nvml/nvml.h +func nvmlDeviceGetMarginTemperature(nvmlDevice nvmlDevice, MarginTempInfo *MarginTemperature) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cMarginTempInfo, _ := (*C.nvmlMarginTemperature_t)(unsafe.Pointer(MarginTempInfo)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetMarginTemperature(cnvmlDevice, cMarginTempInfo) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetThermalSettings function as declared in nvml/nvml.h func nvmlDeviceGetThermalSettings(nvmlDevice nvmlDevice, SensorIndex uint32, PThermalSettings *GpuThermalSettings) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -881,6 +935,42 @@ func nvmlDeviceGetMemClkMinMaxVfOffset(nvmlDevice nvmlDevice, MinOffset *int32, return __v } +// nvmlDeviceGetClockOffsets function as declared in nvml/nvml.h +func nvmlDeviceGetClockOffsets(nvmlDevice nvmlDevice, Info *ClockOffset) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cInfo, _ := (*C.nvmlClockOffset_t)(unsafe.Pointer(Info)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetClockOffsets(cnvmlDevice, cInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceSetClockOffsets function as declared in nvml/nvml.h +func nvmlDeviceSetClockOffsets(nvmlDevice nvmlDevice, Info *ClockOffset) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cInfo, _ := (*C.nvmlClockOffset_t)(unsafe.Pointer(Info)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetClockOffsets(cnvmlDevice, cInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceGetPerformanceModes function as declared in nvml/nvml.h +func nvmlDeviceGetPerformanceModes(nvmlDevice nvmlDevice, PerfModes *DevicePerfModes) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cPerfModes, _ := (*C.nvmlDevicePerfModes_t)(unsafe.Pointer(PerfModes)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetPerformanceModes(cnvmlDevice, cPerfModes) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceGetCurrentClockFreqs function as declared in nvml/nvml.h +func nvmlDeviceGetCurrentClockFreqs(nvmlDevice nvmlDevice, CurrentClockFreqs *DeviceCurrentClockFreqs) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cCurrentClockFreqs, _ := (*C.nvmlDeviceCurrentClockFreqs_t)(unsafe.Pointer(CurrentClockFreqs)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetCurrentClockFreqs(cnvmlDevice, cCurrentClockFreqs) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetPowerManagementMode function as declared in nvml/nvml.h func nvmlDeviceGetPowerManagementMode(nvmlDevice nvmlDevice, Mode *EnableState) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -992,6 +1082,25 @@ func nvmlDeviceGetCudaComputeCapability(nvmlDevice nvmlDevice, Major *int32, Min return __v } +// nvmlDeviceGetDramEncryptionMode function as declared in nvml/nvml.h +func nvmlDeviceGetDramEncryptionMode(nvmlDevice nvmlDevice, Current *DramEncryptionInfo, Pending *DramEncryptionInfo) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cCurrent, _ := (*C.nvmlDramEncryptionInfo_t)(unsafe.Pointer(Current)), cgoAllocsUnknown + cPending, _ := (*C.nvmlDramEncryptionInfo_t)(unsafe.Pointer(Pending)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetDramEncryptionMode(cnvmlDevice, cCurrent, cPending) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceSetDramEncryptionMode function as declared in nvml/nvml.h +func nvmlDeviceSetDramEncryptionMode(nvmlDevice nvmlDevice, DramEncryption *DramEncryptionInfo) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cDramEncryption, _ := (*C.nvmlDramEncryptionInfo_t)(unsafe.Pointer(DramEncryption)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetDramEncryptionMode(cnvmlDevice, cDramEncryption) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetEccMode function as declared in nvml/nvml.h func nvmlDeviceGetEccMode(nvmlDevice nvmlDevice, Current *EnableState, Pending *EnableState) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -1162,12 +1271,12 @@ func nvmlDeviceGetFBCSessions(nvmlDevice nvmlDevice, SessionCount *uint32, Sessi return __v } -// nvmlDeviceGetDriverModel function as declared in nvml/nvml.h -func nvmlDeviceGetDriverModel(nvmlDevice nvmlDevice, Current *DriverModel, Pending *DriverModel) Return { +// nvmlDeviceGetDriverModel_v2 function as declared in nvml/nvml.h +func nvmlDeviceGetDriverModel_v2(nvmlDevice nvmlDevice, Current *DriverModel, Pending *DriverModel) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown cCurrent, _ := (*C.nvmlDriverModel_t)(unsafe.Pointer(Current)), cgoAllocsUnknown cPending, _ := (*C.nvmlDriverModel_t)(unsafe.Pointer(Pending)), cgoAllocsUnknown - __ret := C.nvmlDeviceGetDriverModel(cnvmlDevice, cCurrent, cPending) + __ret := C.nvmlDeviceGetDriverModel_v2(cnvmlDevice, cCurrent, cPending) __v := (Return)(__ret) return __v } @@ -1440,6 +1549,31 @@ func nvmlSystemGetConfComputeKeyRotationThresholdInfo(PKeyRotationThrInfo *ConfC return __v } +// nvmlDeviceSetConfComputeUnprotectedMemSize function as declared in nvml/nvml.h +func nvmlDeviceSetConfComputeUnprotectedMemSize(nvmlDevice nvmlDevice, SizeKiB uint64) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cSizeKiB, _ := (C.ulonglong)(SizeKiB), cgoAllocsUnknown + __ret := C.nvmlDeviceSetConfComputeUnprotectedMemSize(cnvmlDevice, cSizeKiB) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemSetConfComputeGpusReadyState function as declared in nvml/nvml.h +func nvmlSystemSetConfComputeGpusReadyState(IsAcceptingWork uint32) Return { + cIsAcceptingWork, _ := (C.uint)(IsAcceptingWork), cgoAllocsUnknown + __ret := C.nvmlSystemSetConfComputeGpusReadyState(cIsAcceptingWork) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemSetConfComputeKeyRotationThresholdInfo function as declared in nvml/nvml.h +func nvmlSystemSetConfComputeKeyRotationThresholdInfo(PKeyRotationThrInfo *ConfComputeSetKeyRotationThresholdInfo) Return { + cPKeyRotationThrInfo, _ := (*C.nvmlConfComputeSetKeyRotationThresholdInfo_t)(unsafe.Pointer(PKeyRotationThrInfo)), cgoAllocsUnknown + __ret := C.nvmlSystemSetConfComputeKeyRotationThresholdInfo(cPKeyRotationThrInfo) + __v := (Return)(__ret) + return __v +} + // nvmlSystemGetConfComputeSettings function as declared in nvml/nvml.h func nvmlSystemGetConfComputeSettings(Settings *SystemConfComputeSettings) Return { cSettings, _ := (*C.nvmlSystemConfComputeSettings_t)(unsafe.Pointer(Settings)), cgoAllocsUnknown @@ -1467,6 +1601,15 @@ func nvmlDeviceGetGspFirmwareMode(nvmlDevice nvmlDevice, IsEnabled *uint32, Defa return __v } +// nvmlDeviceGetSramEccErrorStatus function as declared in nvml/nvml.h +func nvmlDeviceGetSramEccErrorStatus(nvmlDevice nvmlDevice, Status *EccSramErrorStatus) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cStatus, _ := (*C.nvmlEccSramErrorStatus_t)(unsafe.Pointer(Status)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetSramEccErrorStatus(cnvmlDevice, cStatus) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetAccountingMode function as declared in nvml/nvml.h func nvmlDeviceGetAccountingMode(nvmlDevice nvmlDevice, Mode *EnableState) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -1596,6 +1739,15 @@ func nvmlDeviceGetProcessesUtilizationInfo(nvmlDevice nvmlDevice, ProcesesUtilIn return __v } +// nvmlDeviceGetPlatformInfo function as declared in nvml/nvml.h +func nvmlDeviceGetPlatformInfo(nvmlDevice nvmlDevice, PlatformInfo *PlatformInfo) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cPlatformInfo, _ := (*C.nvmlPlatformInfo_t)(unsafe.Pointer(PlatformInfo)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetPlatformInfo(cnvmlDevice, cPlatformInfo) + __v := (Return)(__ret) + return __v +} + // nvmlUnitSetLedState function as declared in nvml/nvml.h func nvmlUnitSetLedState(nvmlUnit nvmlUnit, Color LedColor) Return { cnvmlUnit, _ := *(*C.nvmlUnit_t)(unsafe.Pointer(&nvmlUnit)), cgoAllocsUnknown @@ -1809,31 +1961,6 @@ func nvmlDeviceSetMemClkVfOffset(nvmlDevice nvmlDevice, Offset int32) Return { return __v } -// nvmlDeviceSetConfComputeUnprotectedMemSize function as declared in nvml/nvml.h -func nvmlDeviceSetConfComputeUnprotectedMemSize(nvmlDevice nvmlDevice, SizeKiB uint64) Return { - cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown - cSizeKiB, _ := (C.ulonglong)(SizeKiB), cgoAllocsUnknown - __ret := C.nvmlDeviceSetConfComputeUnprotectedMemSize(cnvmlDevice, cSizeKiB) - __v := (Return)(__ret) - return __v -} - -// nvmlSystemSetConfComputeGpusReadyState function as declared in nvml/nvml.h -func nvmlSystemSetConfComputeGpusReadyState(IsAcceptingWork uint32) Return { - cIsAcceptingWork, _ := (C.uint)(IsAcceptingWork), cgoAllocsUnknown - __ret := C.nvmlSystemSetConfComputeGpusReadyState(cIsAcceptingWork) - __v := (Return)(__ret) - return __v -} - -// nvmlSystemSetConfComputeKeyRotationThresholdInfo function as declared in nvml/nvml.h -func nvmlSystemSetConfComputeKeyRotationThresholdInfo(PKeyRotationThrInfo *ConfComputeSetKeyRotationThresholdInfo) Return { - cPKeyRotationThrInfo, _ := (*C.nvmlConfComputeSetKeyRotationThresholdInfo_t)(unsafe.Pointer(PKeyRotationThrInfo)), cgoAllocsUnknown - __ret := C.nvmlSystemSetConfComputeKeyRotationThresholdInfo(cPKeyRotationThrInfo) - __v := (Return)(__ret) - return __v -} - // nvmlDeviceSetAccountingMode function as declared in nvml/nvml.h func nvmlDeviceSetAccountingMode(nvmlDevice nvmlDevice, Mode EnableState) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -1851,6 +1978,15 @@ func nvmlDeviceClearAccountingPids(nvmlDevice nvmlDevice) Return { return __v } +// nvmlDeviceSetPowerManagementLimit_v2 function as declared in nvml/nvml.h +func nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice nvmlDevice, PowerValue *PowerValue_v2) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cPowerValue, _ := (*C.nvmlPowerValue_v2_t)(unsafe.Pointer(PowerValue)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetPowerManagementLimit_v2(cnvmlDevice, cPowerValue) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetNvLinkState function as declared in nvml/nvml.h func nvmlDeviceGetNvLinkState(nvmlDevice nvmlDevice, Link uint32, IsActive *EnableState) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -1978,6 +2114,58 @@ func nvmlDeviceGetNvLinkRemoteDeviceType(nvmlDevice nvmlDevice, Link uint32, PNv return __v } +// nvmlDeviceSetNvLinkDeviceLowPowerThreshold function as declared in nvml/nvml.h +func nvmlDeviceSetNvLinkDeviceLowPowerThreshold(nvmlDevice nvmlDevice, Info *NvLinkPowerThres) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cInfo, _ := (*C.nvmlNvLinkPowerThres_t)(unsafe.Pointer(Info)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetNvLinkDeviceLowPowerThreshold(cnvmlDevice, cInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemSetNvlinkBwMode function as declared in nvml/nvml.h +func nvmlSystemSetNvlinkBwMode(NvlinkBwMode uint32) Return { + cNvlinkBwMode, _ := (C.uint)(NvlinkBwMode), cgoAllocsUnknown + __ret := C.nvmlSystemSetNvlinkBwMode(cNvlinkBwMode) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemGetNvlinkBwMode function as declared in nvml/nvml.h +func nvmlSystemGetNvlinkBwMode(NvlinkBwMode *uint32) Return { + cNvlinkBwMode, _ := (*C.uint)(unsafe.Pointer(NvlinkBwMode)), cgoAllocsUnknown + __ret := C.nvmlSystemGetNvlinkBwMode(cNvlinkBwMode) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceGetNvlinkSupportedBwModes function as declared in nvml/nvml.h +func nvmlDeviceGetNvlinkSupportedBwModes(nvmlDevice nvmlDevice, SupportedBwMode *NvlinkSupportedBwModes) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cSupportedBwMode, _ := (*C.nvmlNvlinkSupportedBwModes_t)(unsafe.Pointer(SupportedBwMode)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetNvlinkSupportedBwModes(cnvmlDevice, cSupportedBwMode) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceGetNvlinkBwMode function as declared in nvml/nvml.h +func nvmlDeviceGetNvlinkBwMode(nvmlDevice nvmlDevice, GetBwMode *NvlinkGetBwMode) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cGetBwMode, _ := (*C.nvmlNvlinkGetBwMode_t)(unsafe.Pointer(GetBwMode)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetNvlinkBwMode(cnvmlDevice, cGetBwMode) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceSetNvlinkBwMode function as declared in nvml/nvml.h +func nvmlDeviceSetNvlinkBwMode(nvmlDevice nvmlDevice, SetBwMode *NvlinkSetBwMode) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cSetBwMode, _ := (*C.nvmlNvlinkSetBwMode_t)(unsafe.Pointer(SetBwMode)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetNvlinkBwMode(cnvmlDevice, cSetBwMode) + __v := (Return)(__ret) + return __v +} + // nvmlEventSetCreate function as declared in nvml/nvml.h func nvmlEventSetCreate(Set *nvmlEventSet) Return { cSet, _ := (*C.nvmlEventSet_t)(unsafe.Pointer(Set)), cgoAllocsUnknown @@ -2023,6 +2211,38 @@ func nvmlEventSetFree(Set nvmlEventSet) Return { return __v } +// nvmlSystemEventSetCreate function as declared in nvml/nvml.h +func nvmlSystemEventSetCreate(Request *SystemEventSetCreateRequest) Return { + cRequest, _ := (*C.nvmlSystemEventSetCreateRequest_t)(unsafe.Pointer(Request)), cgoAllocsUnknown + __ret := C.nvmlSystemEventSetCreate(cRequest) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemEventSetFree function as declared in nvml/nvml.h +func nvmlSystemEventSetFree(Request *SystemEventSetFreeRequest) Return { + cRequest, _ := (*C.nvmlSystemEventSetFreeRequest_t)(unsafe.Pointer(Request)), cgoAllocsUnknown + __ret := C.nvmlSystemEventSetFree(cRequest) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemRegisterEvents function as declared in nvml/nvml.h +func nvmlSystemRegisterEvents(Request *SystemRegisterEventRequest) Return { + cRequest, _ := (*C.nvmlSystemRegisterEventRequest_t)(unsafe.Pointer(Request)), cgoAllocsUnknown + __ret := C.nvmlSystemRegisterEvents(cRequest) + __v := (Return)(__ret) + return __v +} + +// nvmlSystemEventSetWait function as declared in nvml/nvml.h +func nvmlSystemEventSetWait(Request *SystemEventSetWaitRequest) Return { + cRequest, _ := (*C.nvmlSystemEventSetWaitRequest_t)(unsafe.Pointer(Request)), cgoAllocsUnknown + __ret := C.nvmlSystemEventSetWait(cRequest) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceModifyDrainState function as declared in nvml/nvml.h func nvmlDeviceModifyDrainState(PciInfo *PciInfo, NewState EnableState) Return { cPciInfo, _ := (*C.nvmlPciInfo_t)(unsafe.Pointer(PciInfo)), cgoAllocsUnknown @@ -2171,6 +2391,15 @@ func nvmlVgpuTypeGetFbReservation(nvmlVgpuTypeId nvmlVgpuTypeId, FbReservation * return __v } +// nvmlVgpuInstanceGetRuntimeStateSize function as declared in nvml/nvml.h +func nvmlVgpuInstanceGetRuntimeStateSize(nvmlVgpuInstance nvmlVgpuInstance, PState *VgpuRuntimeState) Return { + cnvmlVgpuInstance, _ := (C.nvmlVgpuInstance_t)(nvmlVgpuInstance), cgoAllocsUnknown + cPState, _ := (*C.nvmlVgpuRuntimeState_t)(unsafe.Pointer(PState)), cgoAllocsUnknown + __ret := C.nvmlVgpuInstanceGetRuntimeStateSize(cnvmlVgpuInstance, cPState) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceSetVgpuCapabilities function as declared in nvml/nvml.h func nvmlDeviceSetVgpuCapabilities(nvmlDevice nvmlDevice, Capability DeviceVgpuCapability, State EnableState) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -2335,6 +2564,15 @@ func nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId nvmlVgpuTypeId, VgpuInstanc return __v } +// nvmlVgpuTypeGetBAR1Info function as declared in nvml/nvml.h +func nvmlVgpuTypeGetBAR1Info(nvmlVgpuTypeId nvmlVgpuTypeId, Bar1Info *VgpuTypeBar1Info) Return { + cnvmlVgpuTypeId, _ := (C.nvmlVgpuTypeId_t)(nvmlVgpuTypeId), cgoAllocsUnknown + cBar1Info, _ := (*C.nvmlVgpuTypeBar1Info_t)(unsafe.Pointer(Bar1Info)), cgoAllocsUnknown + __ret := C.nvmlVgpuTypeGetBAR1Info(cnvmlVgpuTypeId, cBar1Info) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetActiveVgpus function as declared in nvml/nvml.h func nvmlDeviceGetActiveVgpus(nvmlDevice nvmlDevice, VgpuCount *uint32, VgpuInstances *nvmlVgpuInstance) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -2518,6 +2756,86 @@ func nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance nvmlVgpuInstance, MdevUuid *by return __v } +// nvmlGpuInstanceGetCreatableVgpus function as declared in nvml/nvml.h +func nvmlGpuInstanceGetCreatableVgpus(nvmlGpuInstance nvmlGpuInstance, PVgpus *VgpuTypeIdInfo) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPVgpus, _ := (*C.nvmlVgpuTypeIdInfo_t)(unsafe.Pointer(PVgpus)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceGetCreatableVgpus(cnvmlGpuInstance, cPVgpus) + __v := (Return)(__ret) + return __v +} + +// nvmlVgpuTypeGetMaxInstancesPerGpuInstance function as declared in nvml/nvml.h +func nvmlVgpuTypeGetMaxInstancesPerGpuInstance(PMaxInstance *VgpuTypeMaxInstance) Return { + cPMaxInstance, _ := (*C.nvmlVgpuTypeMaxInstance_t)(unsafe.Pointer(PMaxInstance)), cgoAllocsUnknown + __ret := C.nvmlVgpuTypeGetMaxInstancesPerGpuInstance(cPMaxInstance) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceGetActiveVgpus function as declared in nvml/nvml.h +func nvmlGpuInstanceGetActiveVgpus(nvmlGpuInstance nvmlGpuInstance, PVgpuInstanceInfo *ActiveVgpuInstanceInfo) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPVgpuInstanceInfo, _ := (*C.nvmlActiveVgpuInstanceInfo_t)(unsafe.Pointer(PVgpuInstanceInfo)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceGetActiveVgpus(cnvmlGpuInstance, cPVgpuInstanceInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceSetVgpuSchedulerState function as declared in nvml/nvml.h +func nvmlGpuInstanceSetVgpuSchedulerState(nvmlGpuInstance nvmlGpuInstance, PScheduler *VgpuSchedulerState) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPScheduler, _ := (*C.nvmlVgpuSchedulerState_t)(unsafe.Pointer(PScheduler)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceSetVgpuSchedulerState(cnvmlGpuInstance, cPScheduler) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceGetVgpuSchedulerState function as declared in nvml/nvml.h +func nvmlGpuInstanceGetVgpuSchedulerState(nvmlGpuInstance nvmlGpuInstance, PSchedulerStateInfo *VgpuSchedulerStateInfo) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPSchedulerStateInfo, _ := (*C.nvmlVgpuSchedulerStateInfo_t)(unsafe.Pointer(PSchedulerStateInfo)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceGetVgpuSchedulerState(cnvmlGpuInstance, cPSchedulerStateInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceGetVgpuSchedulerLog function as declared in nvml/nvml.h +func nvmlGpuInstanceGetVgpuSchedulerLog(nvmlGpuInstance nvmlGpuInstance, PSchedulerLogInfo *VgpuSchedulerLogInfo) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPSchedulerLogInfo, _ := (*C.nvmlVgpuSchedulerLogInfo_t)(unsafe.Pointer(PSchedulerLogInfo)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceGetVgpuSchedulerLog(cnvmlGpuInstance, cPSchedulerLogInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceGetVgpuTypeCreatablePlacements function as declared in nvml/nvml.h +func nvmlGpuInstanceGetVgpuTypeCreatablePlacements(nvmlGpuInstance nvmlGpuInstance, PCreatablePlacementInfo *VgpuCreatablePlacementInfo) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPCreatablePlacementInfo, _ := (*C.nvmlVgpuCreatablePlacementInfo_t)(unsafe.Pointer(PCreatablePlacementInfo)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceGetVgpuTypeCreatablePlacements(cnvmlGpuInstance, cPCreatablePlacementInfo) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceGetVgpuHeterogeneousMode function as declared in nvml/nvml.h +func nvmlGpuInstanceGetVgpuHeterogeneousMode(nvmlGpuInstance nvmlGpuInstance, PHeterogeneousMode *VgpuHeterogeneousMode) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPHeterogeneousMode, _ := (*C.nvmlVgpuHeterogeneousMode_t)(unsafe.Pointer(PHeterogeneousMode)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceGetVgpuHeterogeneousMode(cnvmlGpuInstance, cPHeterogeneousMode) + __v := (Return)(__ret) + return __v +} + +// nvmlGpuInstanceSetVgpuHeterogeneousMode function as declared in nvml/nvml.h +func nvmlGpuInstanceSetVgpuHeterogeneousMode(nvmlGpuInstance nvmlGpuInstance, PHeterogeneousMode *VgpuHeterogeneousMode) Return { + cnvmlGpuInstance, _ := *(*C.nvmlGpuInstance_t)(unsafe.Pointer(&nvmlGpuInstance)), cgoAllocsUnknown + cPHeterogeneousMode, _ := (*C.nvmlVgpuHeterogeneousMode_t)(unsafe.Pointer(PHeterogeneousMode)), cgoAllocsUnknown + __ret := C.nvmlGpuInstanceSetVgpuHeterogeneousMode(cnvmlGpuInstance, cPHeterogeneousMode) + __v := (Return)(__ret) + return __v +} + // nvmlVgpuInstanceGetMetadata function as declared in nvml/nvml.h func nvmlVgpuInstanceGetMetadata(nvmlVgpuInstance nvmlVgpuInstance, nvmlVgpuMetadata *nvmlVgpuMetadata, BufferSize *uint32) Return { cnvmlVgpuInstance, _ := (C.nvmlVgpuInstance_t)(nvmlVgpuInstance), cgoAllocsUnknown @@ -3062,45 +3380,74 @@ func nvmlGpmSetStreamingEnabled(nvmlDevice nvmlDevice, State uint32) Return { return __v } -// nvmlDeviceSetNvLinkDeviceLowPowerThreshold function as declared in nvml/nvml.h -func nvmlDeviceSetNvLinkDeviceLowPowerThreshold(nvmlDevice nvmlDevice, Info *NvLinkPowerThres) Return { +// nvmlDeviceGetCapabilities function as declared in nvml/nvml.h +func nvmlDeviceGetCapabilities(nvmlDevice nvmlDevice, Caps *DeviceCapabilities) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown - cInfo, _ := (*C.nvmlNvLinkPowerThres_t)(unsafe.Pointer(Info)), cgoAllocsUnknown - __ret := C.nvmlDeviceSetNvLinkDeviceLowPowerThreshold(cnvmlDevice, cInfo) + cCaps, _ := (*C.nvmlDeviceCapabilities_t)(unsafe.Pointer(Caps)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetCapabilities(cnvmlDevice, cCaps) __v := (Return)(__ret) return __v } -// nvmlSystemSetNvlinkBwMode function as declared in nvml/nvml.h -func nvmlSystemSetNvlinkBwMode(NvlinkBwMode uint32) Return { - cNvlinkBwMode, _ := (C.uint)(NvlinkBwMode), cgoAllocsUnknown - __ret := C.nvmlSystemSetNvlinkBwMode(cNvlinkBwMode) +// nvmlDeviceWorkloadPowerProfileGetProfilesInfo function as declared in nvml/nvml.h +func nvmlDeviceWorkloadPowerProfileGetProfilesInfo(nvmlDevice nvmlDevice, ProfilesInfo *WorkloadPowerProfileProfilesInfo) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cProfilesInfo, _ := (*C.nvmlWorkloadPowerProfileProfilesInfo_t)(unsafe.Pointer(ProfilesInfo)), cgoAllocsUnknown + __ret := C.nvmlDeviceWorkloadPowerProfileGetProfilesInfo(cnvmlDevice, cProfilesInfo) __v := (Return)(__ret) return __v } -// nvmlSystemGetNvlinkBwMode function as declared in nvml/nvml.h -func nvmlSystemGetNvlinkBwMode(NvlinkBwMode *uint32) Return { - cNvlinkBwMode, _ := (*C.uint)(unsafe.Pointer(NvlinkBwMode)), cgoAllocsUnknown - __ret := C.nvmlSystemGetNvlinkBwMode(cNvlinkBwMode) +// nvmlDeviceWorkloadPowerProfileGetCurrentProfiles function as declared in nvml/nvml.h +func nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(nvmlDevice nvmlDevice, CurrentProfiles *WorkloadPowerProfileCurrentProfiles) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cCurrentProfiles, _ := (*C.nvmlWorkloadPowerProfileCurrentProfiles_t)(unsafe.Pointer(CurrentProfiles)), cgoAllocsUnknown + __ret := C.nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(cnvmlDevice, cCurrentProfiles) __v := (Return)(__ret) return __v } -// nvmlDeviceSetPowerManagementLimit_v2 function as declared in nvml/nvml.h -func nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice nvmlDevice, PowerValue *PowerValue_v2) Return { +// nvmlDeviceWorkloadPowerProfileSetRequestedProfiles function as declared in nvml/nvml.h +func nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(nvmlDevice nvmlDevice, RequestedProfiles *WorkloadPowerProfileRequestedProfiles) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown - cPowerValue, _ := (*C.nvmlPowerValue_v2_t)(unsafe.Pointer(PowerValue)), cgoAllocsUnknown - __ret := C.nvmlDeviceSetPowerManagementLimit_v2(cnvmlDevice, cPowerValue) + cRequestedProfiles, _ := (*C.nvmlWorkloadPowerProfileRequestedProfiles_t)(unsafe.Pointer(RequestedProfiles)), cgoAllocsUnknown + __ret := C.nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(cnvmlDevice, cRequestedProfiles) __v := (Return)(__ret) return __v } -// nvmlDeviceGetSramEccErrorStatus function as declared in nvml/nvml.h -func nvmlDeviceGetSramEccErrorStatus(nvmlDevice nvmlDevice, Status *EccSramErrorStatus) Return { +// nvmlDeviceWorkloadPowerProfileClearRequestedProfiles function as declared in nvml/nvml.h +func nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(nvmlDevice nvmlDevice, RequestedProfiles *WorkloadPowerProfileRequestedProfiles) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown - cStatus, _ := (*C.nvmlEccSramErrorStatus_t)(unsafe.Pointer(Status)), cgoAllocsUnknown - __ret := C.nvmlDeviceGetSramEccErrorStatus(cnvmlDevice, cStatus) + cRequestedProfiles, _ := (*C.nvmlWorkloadPowerProfileRequestedProfiles_t)(unsafe.Pointer(RequestedProfiles)), cgoAllocsUnknown + __ret := C.nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(cnvmlDevice, cRequestedProfiles) + __v := (Return)(__ret) + return __v +} + +// nvmlDevicePowerSmoothingActivatePresetProfile function as declared in nvml/nvml.h +func nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice nvmlDevice, Profile *PowerSmoothingProfile) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cProfile, _ := (*C.nvmlPowerSmoothingProfile_t)(unsafe.Pointer(Profile)), cgoAllocsUnknown + __ret := C.nvmlDevicePowerSmoothingActivatePresetProfile(cnvmlDevice, cProfile) + __v := (Return)(__ret) + return __v +} + +// nvmlDevicePowerSmoothingUpdatePresetProfileParam function as declared in nvml/nvml.h +func nvmlDevicePowerSmoothingUpdatePresetProfileParam(nvmlDevice nvmlDevice, Profile *PowerSmoothingProfile) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cProfile, _ := (*C.nvmlPowerSmoothingProfile_t)(unsafe.Pointer(Profile)), cgoAllocsUnknown + __ret := C.nvmlDevicePowerSmoothingUpdatePresetProfileParam(cnvmlDevice, cProfile) + __v := (Return)(__ret) + return __v +} + +// nvmlDevicePowerSmoothingSetState function as declared in nvml/nvml.h +func nvmlDevicePowerSmoothingSetState(nvmlDevice nvmlDevice, State *PowerSmoothingState) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cState, _ := (*C.nvmlPowerSmoothingState_t)(unsafe.Pointer(State)), cgoAllocsUnknown + __ret := C.nvmlDevicePowerSmoothingSetState(cnvmlDevice, cState) __v := (Return)(__ret) return __v } @@ -3308,3 +3655,13 @@ func nvmlVgpuInstanceGetLicenseInfo_v1(nvmlVgpuInstance nvmlVgpuInstance, Licens __v := (Return)(__ret) return __v } + +// nvmlDeviceGetDriverModel_v1 function as declared in nvml/nvml.h +func nvmlDeviceGetDriverModel_v1(nvmlDevice nvmlDevice, Current *DriverModel, Pending *DriverModel) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cCurrent, _ := (*C.nvmlDriverModel_t)(unsafe.Pointer(Current)), cgoAllocsUnknown + cPending, _ := (*C.nvmlDriverModel_t)(unsafe.Pointer(Pending)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetDriverModel(cnvmlDevice, cCurrent, cPending) + __v := (Return)(__ret) + return __v +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h index 1e4eb12dc..28a654756 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h @@ -1,7 +1,7 @@ -/*** NVML VERSION: 12.4.127 ***/ -/*** From https://api.anaconda.org/download/nvidia/cuda-nvml-dev/12.4.127/linux-64/cuda-nvml-dev-12.4.127-0.tar.bz2 ***/ +/*** NVML VERSION: 12.9.40 ***/ +/*** From https://gitlab.com/nvidia/headers/cuda-individual/nvml_dev/-/raw/v12.9.40/nvml.h ***/ /* - * Copyright 1993-2023 NVIDIA Corporation. All rights reserved. + * Copyright 1993-2025 NVIDIA Corporation. All rights reserved. * * NOTICE TO USER: * @@ -94,6 +94,8 @@ extern "C" { #define DECLDIR #endif + #define NVML_MCDM_SUPPORT + /** * NVML API versioning support */ @@ -124,6 +126,7 @@ extern "C" { #define nvmlGetBlacklistDeviceInfoByIndex nvmlGetExcludedDeviceInfoByIndex #define nvmlDeviceGetGpuInstancePossiblePlacements nvmlDeviceGetGpuInstancePossiblePlacements_v2 #define nvmlVgpuInstanceGetLicenseInfo nvmlVgpuInstanceGetLicenseInfo_v2 + #define nvmlDeviceGetDriverModel nvmlDeviceGetDriverModel_v2 #endif // #ifndef NVML_NO_UNVERSIONED_FUNC_DEFS #define NVML_STRUCT_VERSION(data, ver) (unsigned int)(sizeof(nvml ## data ## _v ## ver ## _t) | \ @@ -148,6 +151,11 @@ typedef struct struct nvmlDevice_st* handle; } nvmlDevice_t; +typedef struct +{ + struct nvmlGpuInstance_st* handle; +} nvmlGpuInstance_t; + /** * Buffer size guaranteed to be large enough for pci bus id */ @@ -587,12 +595,12 @@ typedef enum nvmlValueType_enum NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3, NVML_VALUE_TYPE_SIGNED_LONG_LONG = 4, NVML_VALUE_TYPE_SIGNED_INT = 5, + NVML_VALUE_TYPE_UNSIGNED_SHORT = 6, // Keep this last NVML_VALUE_TYPE_COUNT }nvmlValueType_t; - /** * Union to represent different types of Value */ @@ -604,6 +612,7 @@ typedef union nvmlValue_st unsigned long ulVal; //!< If the value is unsigned long unsigned long long ullVal; //!< If the value is unsigned long long signed long long sllVal; //!< If the value is signed long long + unsigned short usVal; //!< If the value is unsigned short }nvmlValue_t; /** @@ -645,6 +654,9 @@ typedef struct nvmlViolationTime_st #define NVML_MAX_THERMAL_SENSORS_PER_GPU 3 +/** + * Represents the thermal sensor targets + */ typedef enum { NVML_THERMAL_TARGET_NONE = 0, @@ -660,6 +672,9 @@ typedef enum NVML_THERMAL_TARGET_UNKNOWN = -1, } nvmlThermalTarget_t; +/** + * Represents the thermal sensor controllers + */ typedef enum { NVML_THERMAL_CONTROLLER_NONE = 0, @@ -691,6 +706,9 @@ typedef struct { nvmlThermalTarget_t target; } nvmlGpuThermalSettingsSensor_t; +/** + * Struct to hold the thermal sensor settings + */ typedef struct { unsigned int count; @@ -698,6 +716,84 @@ typedef struct } nvmlGpuThermalSettings_t; +/** + * Cooler control type + */ +typedef enum nvmlCoolerControl_enum +{ + NVML_THERMAL_COOLER_SIGNAL_NONE = 0, //!< This cooler has no control signal. + NVML_THERMAL_COOLER_SIGNAL_TOGGLE = 1, //!< This cooler can only be toggled either ON or OFF (eg a switch). + NVML_THERMAL_COOLER_SIGNAL_VARIABLE = 2, //!< This cooler's level can be adjusted from some minimum to some maximum (eg a knob). + + // Keep this last + NVML_THERMAL_COOLER_SIGNAL_COUNT +} nvmlCoolerControl_t; + +/** + * Cooler's target + */ +typedef enum nvmlCoolerTarget_enum +{ + NVML_THERMAL_COOLER_TARGET_NONE = 1 << 0, //!< This cooler cools nothing. + NVML_THERMAL_COOLER_TARGET_GPU = 1 << 1, //!< This cooler can cool the GPU. + NVML_THERMAL_COOLER_TARGET_MEMORY = 1 << 2, //!< This cooler can cool the memory. + NVML_THERMAL_COOLER_TARGET_POWER_SUPPLY = 1 << 3, //!< This cooler can cool the power supply. + NVML_THERMAL_COOLER_TARGET_GPU_RELATED = (NVML_THERMAL_COOLER_TARGET_GPU | NVML_THERMAL_COOLER_TARGET_MEMORY | NVML_THERMAL_COOLER_TARGET_POWER_SUPPLY) //!< This cooler cools all of the components related to its target gpu. GPU_RELATED = GPU | MEMORY | POWER_SUPPLY +} nvmlCoolerTarget_t; + +typedef struct +{ + unsigned int version; //!< the API version number + unsigned int index; //!< the cooler index + nvmlCoolerControl_t signalType; //!< OUT: the cooler's control signal characteristics + nvmlCoolerTarget_t target; //!< OUT: the target that cooler cools +} nvmlCoolerInfo_v1_t; +typedef nvmlCoolerInfo_v1_t nvmlCoolerInfo_t; + +#define nvmlCoolerInfo_v1 NVML_STRUCT_VERSION(CoolerInfo, 1) + +/** + * UUID length in ASCII format + */ +#define NVML_DEVICE_UUID_ASCII_LEN 41 + +/** + * UUID length in binary format + */ +#define NVML_DEVICE_UUID_BINARY_LEN 16 + +/** + * Enum to represent different UUID types + */ +typedef enum +{ + NVML_UUID_TYPE_NONE = 0, //!< Undefined type + NVML_UUID_TYPE_ASCII = 1, //!< ASCII format type + NVML_UUID_TYPE_BINARY = 2, //!< Binary format type +} nvmlUUIDType_t; + +/** + * Union to represent different UUID values + */ +typedef union +{ + char str[NVML_DEVICE_UUID_ASCII_LEN]; //!< ASCII format value + unsigned char bytes[NVML_DEVICE_UUID_BINARY_LEN]; //!< Binary format value +} nvmlUUIDValue_t; + +/** + * Struct to represent NVML UUID information + */ +typedef struct +{ + unsigned int version; //!< API version number + unsigned int type; //!< One of \p nvmlUUIDType_t + nvmlUUIDValue_t value; //!< One of \p nvmlUUIDValue_t, to be set based on the UUID format +} nvmlUUID_v1_t; +typedef nvmlUUID_v1_t nvmlUUID_t; + +#define nvmlUUID_v1 NVML_STRUCT_VERSION(UUID, 1) + /** @} */ /***************************************************************************************************/ @@ -720,6 +816,18 @@ typedef enum nvmlEnableState_enum //! Generic flag used to force some behavior. See description of particular functions for details. #define nvmlFlagForce 0x01 +/** + * DRAM Encryption Info + */ +typedef struct +{ + unsigned int version; //!< IN - the API version number + nvmlEnableState_t encryptionState; //!< IN/OUT - DRAM Encryption state +} nvmlDramEncryptionInfo_v1_t; +typedef nvmlDramEncryptionInfo_v1_t nvmlDramEncryptionInfo_t; + +#define nvmlDramEncryptionInfo_v1 NVML_STRUCT_VERSION(DramEncryptionInfo, 1) + /** * * The Brand of the GPU * */ @@ -767,6 +875,8 @@ typedef enum nvmlTemperatureThresholds_enum // acoustic threshold. NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX = 6, // Maximum GPU temperature that can be // set as acoustic threshold. + NVML_TEMPERATURE_THRESHOLD_GPS_CURR = 7, // Current temperature that is set as + // gps threshold. // Keep this last NVML_TEMPERATURE_THRESHOLD_COUNT } nvmlTemperatureThresholds_t; @@ -782,6 +892,19 @@ typedef enum nvmlTemperatureSensors_enum NVML_TEMPERATURE_COUNT } nvmlTemperatureSensors_t; +/** + * Margin temperature values + */ +typedef struct +{ + unsigned int version; //!< The version number of this struct + int marginTemperature; //!< The margin temperature value +} nvmlMarginTemperature_v1_t; + +typedef nvmlMarginTemperature_v1_t nvmlMarginTemperature_t; + +#define nvmlMarginTemperature_v1 NVML_STRUCT_VERSION(MarginTemperature, 1) + /** * Compute mode. * @@ -803,7 +926,7 @@ typedef enum nvmlComputeMode_enum /** * Max Clock Monitors available */ -#define MAX_CLK_DOMAINS 32 +#define MAX_CLK_DOMAINS 32 /** * Clock Monitor error types @@ -881,12 +1004,26 @@ typedef enum nvmlMemoryErrorType_enum */ NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1, - // Keep this last NVML_MEMORY_ERROR_TYPE_COUNT //!< Count of memory error types } nvmlMemoryErrorType_t; +/** + * Represents Nvlink Version + */ +typedef enum nvmlNvlinkVersion_enum +{ + NVML_NVLINK_VERSION_INVALID = 0, + NVML_NVLINK_VERSION_1_0 = 1, + NVML_NVLINK_VERSION_2_0 = 2, + NVML_NVLINK_VERSION_2_2 = 3, + NVML_NVLINK_VERSION_3_0 = 4, + NVML_NVLINK_VERSION_3_1 = 5, + NVML_NVLINK_VERSION_4_0 = 6, + NVML_NVLINK_VERSION_5_0 = 7, +}nvmlNvlinkVersion_t; + /** * ECC counter types. * @@ -943,8 +1080,9 @@ typedef enum nvmlClockId_enum typedef enum nvmlDriverModel_enum { - NVML_DRIVER_WDDM = 0, //!< WDDM driver model -- GPU treated as a display device - NVML_DRIVER_WDM = 1 //!< WDM (TCC) model (recommended) -- GPU treated as a generic device + NVML_DRIVER_WDDM = 0, //!< WDDM driver model -- GPU treated as a display device + NVML_DRIVER_WDM = 1, //!< WDM (TCC) model (deprecated) -- GPU treated as a generic compute device + NVML_DRIVER_MCDM = 2 //!< MCDM driver model -- GPU treated as a Microsoft compute device } nvmlDriverModel_t; #define NVML_MAX_GPU_PERF_PSTATES 16 @@ -973,6 +1111,62 @@ typedef enum nvmlPStates_enum NVML_PSTATE_UNKNOWN = 32 //!< Unknown performance state } nvmlPstates_t; +/** + * Clock offset info. + */ +typedef struct +{ + unsigned int version; //!< The version number of this struct + nvmlClockType_t type; + nvmlPstates_t pstate; + int clockOffsetMHz; + int minClockOffsetMHz; + int maxClockOffsetMHz; +} nvmlClockOffset_v1_t; + +typedef nvmlClockOffset_v1_t nvmlClockOffset_t; + +#define nvmlClockOffset_v1 NVML_STRUCT_VERSION(ClockOffset, 1) + +/** + * Fan speed info. + */ +typedef struct +{ + unsigned int version; //!< the API version number + unsigned int fan; //!< the fan index + unsigned int speed; //!< OUT: the fan speed in RPM +} nvmlFanSpeedInfo_v1_t; +typedef nvmlFanSpeedInfo_v1_t nvmlFanSpeedInfo_t; + +#define nvmlFanSpeedInfo_v1 NVML_STRUCT_VERSION(FanSpeedInfo, 1) + +#define NVML_PERF_MODES_BUFFER_SIZE 2048 + +/** + * Device performance modes string + */ +typedef struct +{ + unsigned int version; //!< the API version number + char str[NVML_PERF_MODES_BUFFER_SIZE]; //!< OUT: the performance modes string. +} nvmlDevicePerfModes_v1_t; +typedef nvmlDevicePerfModes_v1_t nvmlDevicePerfModes_t; + +#define nvmlDevicePerfModes_v1 NVML_STRUCT_VERSION(DevicePerfModes, 1) + +/** + * Device current clocks string + */ +typedef struct +{ + unsigned int version; //!< the API version number + char str[NVML_PERF_MODES_BUFFER_SIZE]; //!< OUT: the current clock frequency string. +} nvmlDeviceCurrentClockFreqs_v1_t; +typedef nvmlDeviceCurrentClockFreqs_v1_t nvmlDeviceCurrentClockFreqs_t; + +#define nvmlDeviceCurrentClockFreqs_v1 NVML_STRUCT_VERSION(DeviceCurrentClockFreqs, 1) + /** * GPU Operation Mode * @@ -999,7 +1193,7 @@ typedef enum nvmlInforomObject_enum NVML_INFOROM_OEM = 0, //!< An object defined by OEM NVML_INFOROM_ECC = 1, //!< The ECC object determining the level of ECC support NVML_INFOROM_POWER = 2, //!< The power management object - + NVML_INFOROM_DEN = 3, //!< DRAM Encryption object // Keep this last NVML_INFOROM_COUNT //!< This counts the number of infoROM objects the driver knows about } nvmlInforomObject_t; @@ -1086,10 +1280,244 @@ typedef enum nvmlRestrictedAPI_enum NVML_RESTRICTED_API_COUNT } nvmlRestrictedAPI_t; +/** + * Structure to store utilization value and process Id + */ +typedef struct nvmlProcessUtilizationSample_st +{ + unsigned int pid; //!< PID of process + unsigned long long timeStamp; //!< CPU Timestamp in microseconds + unsigned int smUtil; //!< SM (3D/Compute) Util Value + unsigned int memUtil; //!< Frame Buffer Memory Util Value + unsigned int encUtil; //!< Encoder Util Value + unsigned int decUtil; //!< Decoder Util Value +} nvmlProcessUtilizationSample_t; + +/** + * Structure to store utilization value and process Id -- version 1 + */ +typedef struct +{ + unsigned long long timeStamp; //!< CPU Timestamp in microseconds + unsigned int pid; //!< PID of process + unsigned int smUtil; //!< SM (3D/Compute) Util Value + unsigned int memUtil; //!< Frame Buffer Memory Util Value + unsigned int encUtil; //!< Encoder Util Value + unsigned int decUtil; //!< Decoder Util Value + unsigned int jpgUtil; //!< Jpeg Util Value + unsigned int ofaUtil; //!< Ofa Util Value +} nvmlProcessUtilizationInfo_v1_t; + +/** + * Structure to store utilization and process ID for each running process -- version 1 + */ +typedef struct +{ + unsigned int version; //!< The version number of this struct + unsigned int processSamplesCount; //!< Caller-supplied array size, and returns number of processes running + unsigned long long lastSeenTimeStamp; //!< Return only samples with timestamp greater than lastSeenTimeStamp + nvmlProcessUtilizationInfo_v1_t *procUtilArray; //!< The array (allocated by caller) of the utilization of GPU SM, framebuffer, video encoder, video decoder, JPEG, and OFA +} nvmlProcessesUtilizationInfo_v1_t; +typedef nvmlProcessesUtilizationInfo_v1_t nvmlProcessesUtilizationInfo_t; +#define nvmlProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(ProcessesUtilizationInfo, 1) + +/** + * Structure to store SRAM uncorrectable error counters + */ +typedef struct +{ + unsigned int version; //!< the API version number + unsigned long long aggregateUncParity; //!< aggregate uncorrectable parity error count + unsigned long long aggregateUncSecDed; //!< aggregate uncorrectable SEC-DED error count + unsigned long long aggregateCor; //!< aggregate correctable error count + unsigned long long volatileUncParity; //!< volatile uncorrectable parity error count + unsigned long long volatileUncSecDed; //!< volatile uncorrectable SEC-DED error count + unsigned long long volatileCor; //!< volatile correctable error count + unsigned long long aggregateUncBucketL2; //!< aggregate uncorrectable error count for L2 cache bucket + unsigned long long aggregateUncBucketSm; //!< aggregate uncorrectable error count for SM bucket + unsigned long long aggregateUncBucketPcie; //!< aggregate uncorrectable error count for PCIE bucket + unsigned long long aggregateUncBucketMcu; //!< aggregate uncorrectable error count for Microcontroller bucket + unsigned long long aggregateUncBucketOther; //!< aggregate uncorrectable error count for Other bucket + unsigned int bThresholdExceeded; //!< if the error threshold of field diag is exceeded +} nvmlEccSramErrorStatus_v1_t; + +typedef nvmlEccSramErrorStatus_v1_t nvmlEccSramErrorStatus_t; +#define nvmlEccSramErrorStatus_v1 NVML_STRUCT_VERSION(EccSramErrorStatus, 1) + +/** + * Structure to store platform information + * + * @deprecated The nvmlPlatformInfo_v1_t will be deprecated in the subsequent releases. + * Use nvmlPlatformInfo_v2_t + */ +typedef struct +{ + unsigned int version; //!< the API version number + unsigned char ibGuid[16]; //!< Infiniband GUID reported by platform (for Blackwell, ibGuid is 8 bytes so indices 8-15 are zero) + unsigned char rackGuid[16]; //!< GUID of the rack containing this GPU (for Blackwell rackGuid is 13 bytes so indices 13-15 are zero) + unsigned char chassisPhysicalSlotNumber; //!< The slot number in the rack containing this GPU (includes switches) + unsigned char computeSlotIndex; //!< The index within the compute slots in the rack containing this GPU (does not include switches) + unsigned char nodeIndex; //!< Index of the node within the slot containing this GPU + unsigned char peerType; //!< Platform indicated NVLink-peer type (e.g. switch present or not) + unsigned char moduleId; //!< ID of this GPU within the node +} nvmlPlatformInfo_v1_t; +#define nvmlPlatformInfo_v1 NVML_STRUCT_VERSION(PlatformInfo, 1) + +/** + * Structure to store platform information (v2) + */ +typedef struct +{ + unsigned int version; //!< the API version number + unsigned char ibGuid[16]; //!< Infiniband GUID reported by platform (for Blackwell, ibGuid is 8 bytes so indices 8-15 are zero) + unsigned char chassisSerialNumber[16]; //!< Serial number of the chassis containing this GPU (for Blackwell it is 13 bytes so indices 13-15 are zero) + unsigned char slotNumber; //!< The slot number in the chassis containing this GPU (includes switches) + unsigned char trayIndex; //!< The tray index within the compute slots in the chassis containing this GPU (does not include switches) + unsigned char hostId; //!< Index of the node within the slot containing this GPU + unsigned char peerType; //!< Platform indicated NVLink-peer type (e.g. switch present or not) + unsigned char moduleId; //!< ID of this GPU within the node +} nvmlPlatformInfo_v2_t; + +typedef nvmlPlatformInfo_v2_t nvmlPlatformInfo_t; +#define nvmlPlatformInfo_v2 NVML_STRUCT_VERSION(PlatformInfo, 2) + +/** + * GSP firmware + */ +#define NVML_GSP_FIRMWARE_VERSION_BUF_SIZE 0x40 + +/** + * Simplified chip architecture + */ +#define NVML_DEVICE_ARCH_KEPLER 2 // Devices based on the NVIDIA Kepler architecture +#define NVML_DEVICE_ARCH_MAXWELL 3 // Devices based on the NVIDIA Maxwell architecture +#define NVML_DEVICE_ARCH_PASCAL 4 // Devices based on the NVIDIA Pascal architecture +#define NVML_DEVICE_ARCH_VOLTA 5 // Devices based on the NVIDIA Volta architecture +#define NVML_DEVICE_ARCH_TURING 6 // Devices based on the NVIDIA Turing architecture +#define NVML_DEVICE_ARCH_AMPERE 7 // Devices based on the NVIDIA Ampere architecture +#define NVML_DEVICE_ARCH_ADA 8 // Devices based on the NVIDIA Ada architecture +#define NVML_DEVICE_ARCH_HOPPER 9 // Devices based on the NVIDIA Hopper architecture + +#define NVML_DEVICE_ARCH_BLACKWELL 10 // Devices based on the NVIDIA Blackwell architecture + +#define NVML_DEVICE_ARCH_T23X 11 // Devices based on NVIDIA Orin architecture + +#define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff // Anything else, presumably something newer + +typedef unsigned int nvmlDeviceArchitecture_t; + +/** + * PCI bus types + */ +#define NVML_BUS_TYPE_UNKNOWN 0 +#define NVML_BUS_TYPE_PCI 1 +#define NVML_BUS_TYPE_PCIE 2 +#define NVML_BUS_TYPE_FPCI 3 +#define NVML_BUS_TYPE_AGP 4 + +typedef unsigned int nvmlBusType_t; + +/** + * Device Power Modes + */ + +/** + * Device Fan control policy + */ +#define NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW 0 +#define NVML_FAN_POLICY_MANUAL 1 + +typedef unsigned int nvmlFanControlPolicy_t; + +/** + * Device Power Source + */ +#define NVML_POWER_SOURCE_AC 0x00000000 +#define NVML_POWER_SOURCE_BATTERY 0x00000001 +#define NVML_POWER_SOURCE_UNDERSIZED 0x00000002 + +typedef unsigned int nvmlPowerSource_t; + +/** + * Device PCIE link Max Speed + */ +#define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 +#define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 +#define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 +#define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 +#define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 +#define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 +#define NVML_PCIE_LINK_MAX_SPEED_64000MBPS 0x00000006 + +/** + * Adaptive clocking status + */ +#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 +#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 + +#define NVML_MAX_GPU_UTILIZATIONS 8 + +/** + * Represents the GPU utilization domains + */ +typedef enum nvmlGpuUtilizationDomainId_t +{ + NVML_GPU_UTILIZATION_DOMAIN_GPU = 0, //!< Graphics engine domain + NVML_GPU_UTILIZATION_DOMAIN_FB = 1, //!< Frame buffer domain + NVML_GPU_UTILIZATION_DOMAIN_VID = 2, //!< Video engine domain + NVML_GPU_UTILIZATION_DOMAIN_BUS = 3, //!< Bus interface domain +} nvmlGpuUtilizationDomainId_t; + +typedef struct { + unsigned int bIsPresent; + unsigned int percentage; + unsigned int incThreshold; + unsigned int decThreshold; +} nvmlGpuDynamicPstatesInfoUtilization_t; + +typedef struct nvmlGpuDynamicPstatesInfo_st +{ + unsigned int flags; //!< Reserved for future use + nvmlGpuDynamicPstatesInfoUtilization_t utilization[NVML_MAX_GPU_UTILIZATIONS]; +} nvmlGpuDynamicPstatesInfo_t; + +/* + * PCIe outbound/inbound atomic operations capability + */ +#define NVML_PCIE_ATOMICS_CAP_FETCHADD32 0x01 +#define NVML_PCIE_ATOMICS_CAP_FETCHADD64 0x02 +#define NVML_PCIE_ATOMICS_CAP_SWAP32 0x04 +#define NVML_PCIE_ATOMICS_CAP_SWAP64 0x08 +#define NVML_PCIE_ATOMICS_CAP_CAS32 0x10 +#define NVML_PCIE_ATOMICS_CAP_CAS64 0x20 +#define NVML_PCIE_ATOMICS_CAP_CAS128 0x40 +#define NVML_PCIE_ATOMICS_OPS_MAX 7 + +/** + * Device Scope - This is useful to retrieve the telemetry at GPU and module (e.g. GPU + CPU) level + */ +#define NVML_POWER_SCOPE_GPU 0U //!< Targets only GPU +#define NVML_POWER_SCOPE_MODULE 1U //!< Targets the whole module +#define NVML_POWER_SCOPE_MEMORY 2U //!< Targets the GPU Memory + +typedef unsigned char nvmlPowerScopeType_t; + +/** + * Contains the power management limit + */ +typedef struct +{ + unsigned int version; //!< Structure format version (must be 1) + nvmlPowerScopeType_t powerScope; //!< [in] Device type: GPU or Total Module + unsigned int powerValueMw; //!< [out] Power value to retrieve or set in milliwatts +} nvmlPowerValue_v2_t; + +#define nvmlPowerValue_v2 NVML_STRUCT_VERSION(PowerValue, 2) + /** @} */ /***************************************************************************************************/ -/** @addtogroup virtualGPU +/** @addtogroup virtualGPU vGPU Enums, Constants, Structs * @{ */ /***************************************************************************************************/ @@ -1176,6 +1604,7 @@ typedef enum nvmlVgpuCapability_enum typedef enum nvmlVgpuDriverCapability_enum { NVML_VGPU_DRIVER_CAP_HETEROGENEOUS_MULTI_VGPU = 0, //!< Supports mixing of different vGPU profiles within one guest VM + NVML_VGPU_DRIVER_CAP_WARM_UPDATE = 1, //!< Supports FSR and warm update of vGPU host driver without terminating the running guest VM // Keep this last NVML_VGPU_DRIVER_CAP_COUNT } nvmlVgpuDriverCapability_t; @@ -1185,14 +1614,18 @@ typedef enum nvmlVgpuDriverCapability_enum */ typedef enum nvmlDeviceVgpuCapability_enum { - NVML_DEVICE_VGPU_CAP_FRACTIONAL_MULTI_VGPU = 0, //!< Query if the fractional vGPU profiles on this GPU can be used in multi-vGPU configurations - NVML_DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_PROFILES = 1, //!< Query if the GPU support concurrent execution of timesliced vGPU profiles of differing types - NVML_DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_SIZES = 2, //!< Query if the GPU support concurrent execution of timesliced vGPU profiles of differing framebuffer sizes + NVML_DEVICE_VGPU_CAP_FRACTIONAL_MULTI_VGPU = 0, //!< Query whether the fractional vGPU profiles on this GPU can be used in multi-vGPU configurations + NVML_DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_PROFILES = 1, //!< Query whether the GPU support concurrent execution of timesliced vGPU profiles of differing types + NVML_DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_SIZES = 2, //!< Query whether the GPU support concurrent execution of timesliced vGPU profiles of differing framebuffer sizes NVML_DEVICE_VGPU_CAP_READ_DEVICE_BUFFER_BW = 3, //!< Query the GPU's read_device_buffer expected bandwidth capacity in megabytes per second NVML_DEVICE_VGPU_CAP_WRITE_DEVICE_BUFFER_BW = 4, //!< Query the GPU's write_device_buffer expected bandwidth capacity in megabytes per second - NVML_DEVICE_VGPU_CAP_DEVICE_STREAMING = 5, //!< Query if vGPU profiles on the GPU supports migration data streaming + NVML_DEVICE_VGPU_CAP_DEVICE_STREAMING = 5, //!< Query whether the vGPU profiles on the GPU supports migration data streaming NVML_DEVICE_VGPU_CAP_MINI_QUARTER_GPU = 6, //!< Set/Get support for mini-quarter vGPU profiles NVML_DEVICE_VGPU_CAP_COMPUTE_MEDIA_ENGINE_GPU = 7, //!< Set/Get support for compute media engine vGPU profiles + NVML_DEVICE_VGPU_CAP_WARM_UPDATE = 8, //!< Query whether the GPU supports FSR and warm update + NVML_DEVICE_VGPU_CAP_HOMOGENEOUS_PLACEMENTS = 9, //!< Query whether the GPU supports reporting of placements of timesliced vGPU profiles with identical framebuffer sizes + NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_SUPPORTED = 10, //!< Query whether the GPU supports timesliced vGPU on MIG + NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_ENABLED = 11, //!< Set/Get MIG timesliced mode reporting, without impacting the underlying functionality // Keep this last NVML_DEVICE_VGPU_CAP_COUNT } nvmlDeviceVgpuCapability_t; @@ -1235,6 +1668,12 @@ typedef enum nvmlDeviceVgpuCapability_enum #define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 #define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 +/** + * Macros to indicate the vGPU mode of the GPU. + */ +#define NVML_VGPU_PGPU_HETEROGENEOUS_MODE 0 +#define NVML_VGPU_PGPU_HOMOGENEOUS_MODE 1 + /** @} */ /***************************************************************************************************/ @@ -1279,9 +1718,33 @@ typedef struct unsigned int count; //!< Count of placement IDs fetched unsigned int *placementIds; //!< Placement IDs for the vGPU type } nvmlVgpuPlacementList_v1_t; -typedef nvmlVgpuPlacementList_v1_t nvmlVgpuPlacementList_t; #define nvmlVgpuPlacementList_v1 NVML_STRUCT_VERSION(VgpuPlacementList, 1) +/** + * Structure to store the list of vGPU placements -- version 2 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int placementSize; //!< OUT: The number of slots occupied by the vGPU type + unsigned int count; //!< IN/OUT: Count of the placement IDs + unsigned int *placementIds; //!< IN/OUT: Placement IDs for the vGPU type + unsigned int mode; //!< IN: The vGPU mode. Either NVML_VGPU_PGPU_HETEROGENEOUS_MODE or NVML_VGPU_PGPU_HOMOGENEOUS_MODE +} nvmlVgpuPlacementList_v2_t; +typedef nvmlVgpuPlacementList_v2_t nvmlVgpuPlacementList_t; +#define nvmlVgpuPlacementList_v2 NVML_STRUCT_VERSION(VgpuPlacementList, 2) + +/** + * Structure to store BAR1 size information of vGPU type -- Version 1 + */ +typedef struct +{ + unsigned int version; //!< The version number of this struct + unsigned long long bar1Size; //!< BAR1 size in megabytes +} nvmlVgpuTypeBar1Info_v1_t; +typedef nvmlVgpuTypeBar1Info_v1_t nvmlVgpuTypeBar1Info_t; +#define nvmlVgpuTypeBar1Info_v1 NVML_STRUCT_VERSION(VgpuTypeBar1Info, 1) + /** * Structure to store Utilization Value and vgpuInstance */ @@ -1369,6 +1832,17 @@ typedef struct typedef nvmlVgpuProcessesUtilizationInfo_v1_t nvmlVgpuProcessesUtilizationInfo_t; #define nvmlVgpuProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuProcessesUtilizationInfo, 1) +/** + * Structure to store the information of vGPU runtime state -- version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned long long size; //!< OUT: The runtime state size of the vGPU instance +} nvmlVgpuRuntimeState_v1_t; +typedef nvmlVgpuRuntimeState_v1_t nvmlVgpuRuntimeState_t; +#define nvmlVgpuRuntimeState_v1 NVML_STRUCT_VERSION(VgpuRuntimeState, 1) + /** * vGPU scheduler policies */ @@ -1385,6 +1859,11 @@ typedef nvmlVgpuProcessesUtilizationInfo_v1_t nvmlVgpuProcessesUtilizationInfo_t #define NVML_VGPU_SCHEDULER_ARR_DISABLE 1 #define NVML_VGPU_SCHEDULER_ARR_ENABLE 2 +/** + * vGPU scheduler engine types + */ +#define NVML_VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS 1 + typedef struct { unsigned int avgFactor; unsigned int timeslice; @@ -1517,47 +1996,6 @@ typedef struct nvmlVgpuLicenseInfo_st unsigned int currentState; //!< Current license state } nvmlVgpuLicenseInfo_t; -/** - * Structure to store utilization value and process Id - */ -typedef struct nvmlProcessUtilizationSample_st -{ - unsigned int pid; //!< PID of process - unsigned long long timeStamp; //!< CPU Timestamp in microseconds - unsigned int smUtil; //!< SM (3D/Compute) Util Value - unsigned int memUtil; //!< Frame Buffer Memory Util Value - unsigned int encUtil; //!< Encoder Util Value - unsigned int decUtil; //!< Decoder Util Value -} nvmlProcessUtilizationSample_t; - -/** - * Structure to store utilization value and process Id -- version 1 - */ -typedef struct -{ - unsigned long long timeStamp; //!< CPU Timestamp in microseconds - unsigned int pid; //!< PID of process - unsigned int smUtil; //!< SM (3D/Compute) Util Value - unsigned int memUtil; //!< Frame Buffer Memory Util Value - unsigned int encUtil; //!< Encoder Util Value - unsigned int decUtil; //!< Decoder Util Value - unsigned int jpgUtil; //!< Jpeg Util Value - unsigned int ofaUtil; //!< Ofa Util Value -} nvmlProcessUtilizationInfo_v1_t; - -/** - * Structure to store utilization and process ID for each running process -- version 1 - */ -typedef struct -{ - unsigned int version; //!< The version number of this struct - unsigned int processSamplesCount; //!< Caller-supplied array size, and returns number of processes running - unsigned long long lastSeenTimeStamp; //!< Return only samples with timestamp greater than lastSeenTimeStamp - nvmlProcessUtilizationInfo_v1_t *procUtilArray; //!< The array (allocated by caller) of the utilization of GPU SM, framebuffer, video encoder, video decoder, JPEG, and OFA -} nvmlProcessesUtilizationInfo_v1_t; -typedef nvmlProcessesUtilizationInfo_v1_t nvmlProcessesUtilizationInfo_t; -#define nvmlProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(ProcessesUtilizationInfo, 1) - /** * Structure to store license expiry date and time values */ @@ -1596,119 +2034,109 @@ typedef struct nvmlGridLicensableFeatures_st } nvmlGridLicensableFeatures_t; /** - * Structure to store SRAM uncorrectable error counters + * Enum describing the GPU Recovery Action */ -typedef struct -{ - unsigned int version; //!< the API version number - unsigned long long aggregateUncParity; //!< aggregate uncorrectable parity error count - unsigned long long aggregateUncSecDed; //!< aggregate uncorrectable SEC-DED error count - unsigned long long aggregateCor; //!< aggregate correctable error count - unsigned long long volatileUncParity; //!< volatile uncorrectable parity error count - unsigned long long volatileUncSecDed; //!< volatile uncorrectable SEC-DED error count - unsigned long long volatileCor; //!< volatile correctable error count - unsigned long long aggregateUncBucketL2; //!< aggregate uncorrectable error count for L2 cache bucket - unsigned long long aggregateUncBucketSm; //!< aggregate uncorrectable error count for SM bucket - unsigned long long aggregateUncBucketPcie; //!< aggregate uncorrectable error count for PCIE bucket - unsigned long long aggregateUncBucketMcu; //!< aggregate uncorrectable error count for Microcontroller bucket - unsigned long long aggregateUncBucketOther; //!< aggregate uncorrectable error count for Other bucket - unsigned int bThresholdExceeded; //!< if the error threshold of field diag is exceeded -} nvmlEccSramErrorStatus_v1_t; - -typedef nvmlEccSramErrorStatus_v1_t nvmlEccSramErrorStatus_t; -#define nvmlEccSramErrorStatus_v1 NVML_STRUCT_VERSION(EccSramErrorStatus, 1) - -/** - * GSP firmware - */ -#define NVML_GSP_FIRMWARE_VERSION_BUF_SIZE 0x40 +typedef enum nvmlDeviceGpuRecoveryAction_s { + NVML_GPU_RECOVERY_ACTION_NONE = 0, + NVML_GPU_RECOVERY_ACTION_GPU_RESET = 1, + NVML_GPU_RECOVERY_ACTION_NODE_REBOOT = 2, + NVML_GPU_RECOVERY_ACTION_DRAIN_P2P = 3, + NVML_GPU_RECOVERY_ACTION_DRAIN_AND_RESET = 4, +} nvmlDeviceGpuRecoveryAction_t; /** - * Simplified chip architecture - */ -#define NVML_DEVICE_ARCH_KEPLER 2 // Devices based on the NVIDIA Kepler architecture -#define NVML_DEVICE_ARCH_MAXWELL 3 // Devices based on the NVIDIA Maxwell architecture -#define NVML_DEVICE_ARCH_PASCAL 4 // Devices based on the NVIDIA Pascal architecture -#define NVML_DEVICE_ARCH_VOLTA 5 // Devices based on the NVIDIA Volta architecture -#define NVML_DEVICE_ARCH_TURING 6 // Devices based on the NVIDIA Turing architecture -#define NVML_DEVICE_ARCH_AMPERE 7 // Devices based on the NVIDIA Ampere architecture -#define NVML_DEVICE_ARCH_ADA 8 // Devices based on the NVIDIA Ada architecture -#define NVML_DEVICE_ARCH_HOPPER 9 // Devices based on the NVIDIA Hopper architecture - -#define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff // Anything else, presumably something newer - -typedef unsigned int nvmlDeviceArchitecture_t; - -/** - * PCI bus types - */ -#define NVML_BUS_TYPE_UNKNOWN 0 -#define NVML_BUS_TYPE_PCI 1 -#define NVML_BUS_TYPE_PCIE 2 -#define NVML_BUS_TYPE_FPCI 3 -#define NVML_BUS_TYPE_AGP 4 - -typedef unsigned int nvmlBusType_t; - -/** - * Device Power Modes + * Structure to store the vGPU type IDs -- version 1 */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int vgpuCount; //!< IN/OUT: Number of vGPU types + nvmlVgpuTypeId_t *vgpuTypeIds; //!< OUT: List of vGPU type IDs +} nvmlVgpuTypeIdInfo_v1_t; +typedef nvmlVgpuTypeIdInfo_v1_t nvmlVgpuTypeIdInfo_t; +#define nvmlVgpuTypeIdInfo_v1 NVML_STRUCT_VERSION(VgpuTypeIdInfo, 1) /** - * Device Fan control policy + * Structure to store the maximum number of possible vGPU type IDs -- version 1 */ -#define NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW 0 -#define NVML_FAN_POLICY_MANUAL 1 - -typedef unsigned int nvmlFanControlPolicy_t; +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + nvmlVgpuTypeId_t vgpuTypeId; //!< IN: Handle to vGPU type + unsigned int maxInstancePerGI; //!< OUT: Maximum number of vGPU instances per GPU instance +} nvmlVgpuTypeMaxInstance_v1_t; +typedef nvmlVgpuTypeMaxInstance_v1_t nvmlVgpuTypeMaxInstance_t; +#define nvmlVgpuTypeMaxInstance_v1 NVML_STRUCT_VERSION(VgpuTypeMaxInstance, 1) /** - * Device Power Source + * Structure to store active vGPU instance information -- Version 1 */ -#define NVML_POWER_SOURCE_AC 0x00000000 -#define NVML_POWER_SOURCE_BATTERY 0x00000001 -#define NVML_POWER_SOURCE_UNDERSIZED 0x00000002 - -typedef unsigned int nvmlPowerSource_t; +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int vgpuCount; //!< IN/OUT: Count of the active vGPU instances + nvmlVgpuInstance_t *vgpuInstances; //!< IN/OUT: list of active vGPU instances +} nvmlActiveVgpuInstanceInfo_v1_t; +typedef nvmlActiveVgpuInstanceInfo_v1_t nvmlActiveVgpuInstanceInfo_t; +#define nvmlActiveVgpuInstanceInfo_v1 NVML_STRUCT_VERSION(ActiveVgpuInstanceInfo, 1) -/* - * Device PCIE link Max Speed +/** + * Structure to set vGPU scheduler state information -- version 1 */ -#define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 -#define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 -#define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 -#define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 -#define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 -#define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 -#define NVML_PCIE_LINK_MAX_SPEED_64000MBPS 0x00000006 +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int engineId; //!< IN: One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< IN: Scheduler policy + unsigned int enableARRMode; //!< IN: Adaptive Round Robin scheduler + nvmlVgpuSchedulerSetParams_t schedulerParams; //!< IN: vGPU Scheduler Parameters +} nvmlVgpuSchedulerState_v1_t; +typedef nvmlVgpuSchedulerState_v1_t nvmlVgpuSchedulerState_t; +#define nvmlVgpuSchedulerState_v1 NVML_STRUCT_VERSION(VgpuSchedulerState, 1) -/* - * Adaptive clocking status +/** + * Structure to store vGPU scheduler state information -- Version 1 */ -#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 -#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 - -#define NVML_MAX_GPU_UTILIZATIONS 8 -typedef enum nvmlGpuUtilizationDomainId_t +typedef struct { - NVML_GPU_UTILIZATION_DOMAIN_GPU = 0, //!< Graphics engine domain - NVML_GPU_UTILIZATION_DOMAIN_FB = 1, //!< Frame buffer domain - NVML_GPU_UTILIZATION_DOMAIN_VID = 2, //!< Video engine domain - NVML_GPU_UTILIZATION_DOMAIN_BUS = 3, //!< Bus interface domain -} nvmlGpuUtilizationDomainId_t; + unsigned int version; //!< IN: The version number of this struct + unsigned int engineId; //!< IN: Engine whose software scheduler state info is fetched. One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< OUT: Scheduler policy + unsigned int arrMode; //!< OUT: Adaptive Round Robin scheduler mode. One of the NVML_VGPU_SCHEDULER_ARR_*. + nvmlVgpuSchedulerParams_t schedulerParams; //!< OUT: vGPU Scheduler Parameters +} nvmlVgpuSchedulerStateInfo_v1_t; +typedef nvmlVgpuSchedulerStateInfo_v1_t nvmlVgpuSchedulerStateInfo_t; +#define nvmlVgpuSchedulerStateInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerStateInfo, 1) -typedef struct { - unsigned int bIsPresent; - unsigned int percentage; - unsigned int incThreshold; - unsigned int decThreshold; -} nvmlGpuDynamicPstatesInfoUtilization_t; +/** + * Structure to store vGPU scheduler log information -- Version 1 + */ +typedef struct +{ + unsigned int version; //!< IN: The version number of this struct + unsigned int engineId; //!< IN: Engine whose software runlist log entries are fetched. One of One of NVML_VGPU_SCHEDULER_ENGINE_TYPE_*. + unsigned int schedulerPolicy; //!< OUT: Scheduler policy + unsigned int arrMode; //!< OUT: Adaptive Round Robin scheduler mode. One of the NVML_VGPU_SCHEDULER_ARR_*. + nvmlVgpuSchedulerParams_t schedulerParams; //!< OUT: vGPU Scheduler Parameters + unsigned int entriesCount; //!< OUT: Count of log entries fetched + nvmlVgpuSchedulerLogEntry_t logEntries[NVML_SCHEDULER_SW_MAX_LOG_ENTRIES]; //!< OUT: Structure to store the state and logs of a software runlist +} nvmlVgpuSchedulerLogInfo_v1_t; +typedef nvmlVgpuSchedulerLogInfo_v1_t nvmlVgpuSchedulerLogInfo_t; +#define nvmlVgpuSchedulerLogInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerLogInfo, 1) -typedef struct nvmlGpuDynamicPstatesInfo_st +/** + * Structure to store creatable vGPU placement information -- version 1 + */ +typedef struct { - unsigned int flags; //!< Reserved for future use - nvmlGpuDynamicPstatesInfoUtilization_t utilization[NVML_MAX_GPU_UTILIZATIONS]; -} nvmlGpuDynamicPstatesInfo_t; + unsigned int version; //!< IN: The version number of this struct + nvmlVgpuTypeId_t vgpuTypeId; //!< IN: Handle to vGPU type + unsigned int count; //!< IN/OUT: Count of the placement IDs + unsigned int *placementIds; //!< IN/OUT: Placement IDs for the vGPU type + unsigned int placementSize; //!< OUT: The number of slots occupied by the vGPU type +} nvmlVgpuCreatablePlacementInfo_v1_t; +typedef nvmlVgpuCreatablePlacementInfo_v1_t nvmlVgpuCreatablePlacementInfo_t; +#define nvmlVgpuCreatablePlacementInfo_v1 NVML_STRUCT_VERSION(VgpuCreatablePlacementInfo, 1) /** @} */ /** @} */ @@ -1760,7 +2188,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_RETIRED_DBE 30 //!< Number of retired pages because of double bit errors #define NVML_FI_DEV_RETIRED_PENDING 31 //!< If any pages are pending retirement. 1=yes. 0=no. -/* NvLink Flit Error Counters */ +/** + * NVLink Flit Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0 32 //!< NVLink flow control CRC Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1 33 //!< NVLink flow control CRC Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2 34 //!< NVLink flow control CRC Error Counter for Lane 2 @@ -1769,7 +2201,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5 37 //!< NVLink flow control CRC Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL 38 //!< NVLink flow control CRC Error Counter total for all Lanes -/* NvLink CRC Data Error Counters */ +/** + * NVLink CRC Data Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0 39 //!< NVLink data CRC Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1 40 //!< NVLink data CRC Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2 41 //!< NVLink data CRC Error Counter for Lane 2 @@ -1778,7 +2214,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5 44 //!< NVLink data CRC Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL 45 //!< NvLink data CRC Error Counter total for all Lanes -/* NvLink Replay Error Counters */ +/** + * NVLink Replay Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0 46 //!< NVLink Replay Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1 47 //!< NVLink Replay Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2 48 //!< NVLink Replay Error Counter for Lane 2 @@ -1787,7 +2227,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5 51 //!< NVLink Replay Error Counter for Lane 5 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL 52 //!< NVLink Replay Error Counter total for all Lanes -/* NvLink Recovery Error Counters */ +/** + * NVLink Recovery Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0 53 //!< NVLink Recovery Error Counter for Lane 0 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1 54 //!< NVLink Recovery Error Counter for Lane 1 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2 55 //!< NVLink Recovery Error Counter for Lane 2 @@ -1838,7 +2282,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st /* Energy Counter */ #define NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION 83 //!< Total energy consumption for the GPU in mJ since the driver was last reloaded -/* NVLink Speed */ +/** + * NVLink Speed + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L0 84 //!< NVLink Speed in MBps for Link 0 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L1 85 //!< NVLink Speed in MBps for Link 1 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L2 86 //!< NVLink Speed in MBps for Link 2 @@ -1855,7 +2303,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_PCIE_REPLAY_COUNTER 94 //!< PCIe replay counter #define NVML_FI_DEV_PCIE_REPLAY_ROLLOVER_COUNTER 95 //!< PCIe replay rollover counter -/* NvLink Flit Error Counters */ +/** + * NVLink Flit Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6 96 //!< NVLink flow control CRC Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7 97 //!< NVLink flow control CRC Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8 98 //!< NVLink flow control CRC Error Counter for Lane 8 @@ -1863,7 +2315,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10 100 //!< NVLink flow control CRC Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11 101 //!< NVLink flow control CRC Error Counter for Lane 11 -/* NvLink CRC Data Error Counters */ +/** + * NVLink CRC Data Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6 102 //!< NVLink data CRC Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7 103 //!< NVLink data CRC Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8 104 //!< NVLink data CRC Error Counter for Lane 8 @@ -1871,7 +2327,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10 106 //!< NVLink data CRC Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11 107 //!< NVLink data CRC Error Counter for Lane 11 -/* NvLink Replay Error Counters */ +/** + * NVLink Replay Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6 108 //!< NVLink Replay Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7 109 //!< NVLink Replay Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8 110 //!< NVLink Replay Error Counter for Lane 8 @@ -1879,7 +2339,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10 112 //!< NVLink Replay Error Counter for Lane 10 #define NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11 113 //!< NVLink Replay Error Counter for Lane 11 -/* NvLink Recovery Error Counters */ +/** + * NVLink Recovery Error Counters + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6 114 //!< NVLink Recovery Error Counter for Lane 6 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7 115 //!< NVLink Recovery Error Counter for Lane 7 #define NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8 116 //!< NVLink Recovery Error Counter for Lane 8 @@ -1911,7 +2375,11 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L10 130 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 10 #define NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L11 131 //!< NVLink Bandwidth Counter for Counter Set 1, Lane 11 -/* NVLink Speed */ +/** + * NVLink Speed + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L6 132 //!< NVLink Speed in MBps for Link 6 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L7 133 //!< NVLink Speed in MBps for Link 7 #define NVML_FI_DEV_NVLINK_SPEED_MBPS_L8 134 //!< NVLink Speed in MBps for Link 8 @@ -1968,15 +2436,45 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L11 159 //!< NVLink data ECC Error Counter for Link 11 #define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL 160 //!< NVLink data ECC Error Counter total for all Links +/** + * NVLink Error Replay + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_ERROR_DL_REPLAY 161 //!< NVLink Replay Error Counter + //!< This is unsupported for Blackwell+. + //!< Please use NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_* +/** + * NVLink Recovery Error Counter + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_ERROR_DL_RECOVERY 162 //!< NVLink Recovery Error Counter + //!< This is unsupported for Blackwell+ + //!< Please use NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_* + +/** + * NVLink Recovery Error CRC Counter + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_ERROR_DL_CRC 163 //!< NVLink CRC Error Counter + //!< This is unsupported for Blackwell+ + //!< Please use NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_* + +/** + * NVLink Speed, State and Version field id 164, 165, and 166 + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ #define NVML_FI_DEV_NVLINK_GET_SPEED 164 //!< NVLink Speed in MBps #define NVML_FI_DEV_NVLINK_GET_STATE 165 //!< NVLink State - Active,Inactive #define NVML_FI_DEV_NVLINK_GET_VERSION 166 //!< NVLink Version #define NVML_FI_DEV_NVLINK_GET_POWER_STATE 167 //!< NVLink Power state. 0=HIGH_SPEED 1=LOW_SPEED -#define NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD 168 //!< NVLink length of idle period (in units of 100us) before transitioning links to sleep state +#define NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD 168 //!< NVLink length of idle period (units can be found from + //!< NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_UNITS) before + //!< transitioning links to sleep state #define NVML_FI_DEV_PCIE_L0_TO_RECOVERY_COUNTER 169 //!< Device PEX error recovery counter @@ -1984,19 +2482,19 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_C2C_LINK_GET_STATUS 171 //!< C2C Link Status 0=INACTIVE 1=ACTIVE #define NVML_FI_DEV_C2C_LINK_GET_MAX_BW 172 //!< C2C Link Speed in MBps for active links -#define NVML_FI_DEV_PCIE_COUNT_CORRECTABLE_ERRORS 173 -#define NVML_FI_DEV_PCIE_COUNT_NAKS_RECEIVED 174 -#define NVML_FI_DEV_PCIE_COUNT_RECEIVER_ERROR 175 -#define NVML_FI_DEV_PCIE_COUNT_BAD_TLP 176 -#define NVML_FI_DEV_PCIE_COUNT_NAKS_SENT 177 -#define NVML_FI_DEV_PCIE_COUNT_BAD_DLLP 178 -#define NVML_FI_DEV_PCIE_COUNT_NON_FATAL_ERROR 179 -#define NVML_FI_DEV_PCIE_COUNT_FATAL_ERROR 180 -#define NVML_FI_DEV_PCIE_COUNT_UNSUPPORTED_REQ 181 -#define NVML_FI_DEV_PCIE_COUNT_LCRC_ERROR 182 -#define NVML_FI_DEV_PCIE_COUNT_LANE_ERROR 183 +#define NVML_FI_DEV_PCIE_COUNT_CORRECTABLE_ERRORS 173 //!< PCIe Correctable Errors Counter +#define NVML_FI_DEV_PCIE_COUNT_NAKS_RECEIVED 174 //!< PCIe NAK Receive Counter +#define NVML_FI_DEV_PCIE_COUNT_RECEIVER_ERROR 175 //!< PCIe Receiver Error Counter +#define NVML_FI_DEV_PCIE_COUNT_BAD_TLP 176 //!< PCIe Bad TLP Counter +#define NVML_FI_DEV_PCIE_COUNT_NAKS_SENT 177 //!< PCIe NAK Send Counter +#define NVML_FI_DEV_PCIE_COUNT_BAD_DLLP 178 //!< PCIe Bad DLLP Counter +#define NVML_FI_DEV_PCIE_COUNT_NON_FATAL_ERROR 179 //!< PCIe Non Fatal Error Counter +#define NVML_FI_DEV_PCIE_COUNT_FATAL_ERROR 180 //!< PCIe Fatal Error Counter +#define NVML_FI_DEV_PCIE_COUNT_UNSUPPORTED_REQ 181 //!< PCIe Unsupported Request Counter +#define NVML_FI_DEV_PCIE_COUNT_LCRC_ERROR 182 //!< PCIe LCRC Error Counter +#define NVML_FI_DEV_PCIE_COUNT_LANE_ERROR 183 //!< PCIe Per Lane Error Counter. -#define NVML_FI_DEV_IS_RESETLESS_MIG_SUPPORTED 184 +#define NVML_FI_DEV_IS_RESETLESS_MIG_SUPPORTED 184 //!< Device's Restless MIG Capability /** * Retrieves power usage for this GPU in milliwatts. @@ -2027,9 +2525,165 @@ typedef struct nvmlGpuDynamicPstatesInfo_st #define NVML_FI_DEV_TEMPERATURE_MEM_MAX_TLIMIT 195 //!< T.Limit temperature after which GPU may begin SW slowdown due to memory temperature #define NVML_FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT 196 //!< T.Limit temperature after which GPU may be throttled below base clock +#define NVML_FI_DEV_PCIE_COUNT_TX_BYTES 197 //!< PCIe transmit bytes. Value can be wrapped. +#define NVML_FI_DEV_PCIE_COUNT_RX_BYTES 198 //!< PCIe receive bytes. Value can be wrapped. + #define NVML_FI_DEV_IS_MIG_MODE_INDEPENDENT_MIG_QUERY_CAPABLE 199 //!< MIG mode independent, MIG query capable device. 1=yes. 0=no. -#define NVML_FI_MAX 200 //!< One greater than the largest field ID defined above +#define NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_MAX 200 //!< Max Nvlink Power Threshold. See NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD + +/** + * NVLink counter field id 201-225 + * + * Link ID needs to be specified in the scopeId field in nvmlFieldValue_t. + */ +#define NVML_FI_DEV_NVLINK_COUNT_XMIT_PACKETS 201 //!type is invalid + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_FOUND if \a uuid does not match a valid device on the system + * - \ref NVML_ERROR_GPU_IS_LOST if any GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUIDV(const nvmlUUID_t *uuid, nvmlDevice_t *device); + /** * Acquire the handle for a particular device, based on its PCI bus id. * @@ -3414,6 +4250,11 @@ nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *d * instead of NVML_ERROR_NO_PERMISSION. * * @param pciBusId The PCI bus id of the target GPU + * Accept the following formats (all numbers in hexadecimal): + * domain:bus:device.function in format %x:%x:%x.%x + * domain:bus:device in format %x:%x:%x + * bus:device.function in format %x:%x.%x + * * @param device Reference in which to return the device handle * * @return @@ -3532,21 +4373,21 @@ nvmlReturn_t DECLDIR nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int *index */ nvmlReturn_t DECLDIR nvmlDeviceGetSerial(nvmlDevice_t device, char *serial, unsigned int length); -/* -* Get a unique identifier for the device module on the baseboard -* -* This API retrieves a unique identifier for each GPU module that exists on a given baseboard. -* For non-baseboard products, this ID would always be 0. -* -* @param device The identifier of the target device -* @param moduleId Unique identifier for the GPU module -* -* @return -* - \ref NVML_SUCCESS if \a moduleId has been successfully retrieved -* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized -* - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a moduleId is invalid -* - \ref NVML_ERROR_UNKNOWN on any unexpected error -*/ +/** + * Get a unique identifier for the device module on the baseboard + * + * This API retrieves a unique identifier for each GPU module that exists on a given baseboard. + * For non-baseboard products, this ID would always be 0. + * + * @param device The identifier of the target device + * @param moduleId Unique identifier for the GPU module + * + * @return + * - \ref NVML_SUCCESS if \a moduleId has been successfully retrieved + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a moduleId is invalid + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ nvmlReturn_t DECLDIR nvmlDeviceGetModuleId(nvmlDevice_t device, unsigned int *moduleId); /** @@ -4032,7 +4873,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPersistenceMode(nvmlDevice_t device, nvmlEnabl * * For all products. * - * See \ref nvmlPciInfoExt_t for details on the available PCI info. + * See \ref nvmlPciInfoExt_v1_t for details on the available PCI info. * * @param device The identifier of the target device * @param pci Reference in which to return the PCI info @@ -4471,6 +5312,30 @@ nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t device, unsigned int *sp */ nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed_v2(nvmlDevice_t device, unsigned int fan, unsigned int * speed); +/** + * Retrieves the intended operating speed in rotations per minute (RPM) of the device's specified fan. + * + * For Maxwell &tm; or newer fully supported devices. + * + * For all discrete products with dedicated fans. + * + * Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, the + * output will not match the actual fan speed. + * + * @param device The identifier of the target device + * @param fanSpeed Structure specifying the index of the target fan (input) and + * retrieved fan speed value (output) + * + * @return + * - \ref NVML_SUCCESS If everything worked + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid, \a fan is not an acceptable + * index, or \a speed is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED If the \a device does not support this feature + */ +nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeedRPM(nvmlDevice_t device, nvmlFanSpeedInfo_t *fanSpeed); + /** * Retrieves the intended target speed of the device's specified fan. * @@ -4559,25 +5424,67 @@ nvmlReturn_t DECLDIR nvmlDeviceGetFanControlPolicy_v2(nvmlDevice_t device, unsig nvmlReturn_t DECLDIR nvmlDeviceGetNumFans(nvmlDevice_t device, unsigned int *numFans); /** - * Retrieves the current temperature readings for the device, in degrees C. + * @deprecated Use \ref nvmlDeviceGetTemperatureV instead + */ +nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp); + +/** + * Retrieves the cooler's information. + * Returns a cooler's control signal characteristics. The possible types are restricted, Variable and Toggle. + * See \ref nvmlCoolerControl_t for details on available signal types. + * Returns objects that cooler cools. Targets may be GPU, Memory, Power Supply or All of these. + * See \ref nvmlCoolerTarget_t for details on available targets. * - * For all products. + * For Maxwell &tm; or newer fully supported devices. * - * See \ref nvmlTemperatureSensors_t for details on available temperature sensors. + * For all discrete products with dedicated fans. * - * @param device The identifier of the target device - * @param sensorType Flag that indicates which sensor reading to retrieve - * @param temp Reference in which to return the temperature reading + * @param[in] device The identifier of the target device + * @param[out] coolerInfo Structure specifying the cooler's control signal characteristics (out) + * and the target that cooler cools (out) * * @return - * - \ref NVML_SUCCESS if \a temp has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a sensorType is invalid or \a temp is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have the specified sensor - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS If everything worked + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid, \a signalType or \a target is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED If the \a device does not support this feature */ -nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp); +nvmlReturn_t DECLDIR nvmlDeviceGetCoolerInfo(nvmlDevice_t device, nvmlCoolerInfo_t *coolerInfo); + +/** + * Structure used to encapsulate temperature info + */ +typedef struct +{ + unsigned int version; + nvmlTemperatureSensors_t sensorType; + int temperature; +} nvmlTemperature_v1_t; + +typedef nvmlTemperature_v1_t nvmlTemperature_t; + +#define nvmlTemperature_v1 NVML_STRUCT_VERSION(Temperature, 1) + +/** + * Retrieves the current temperature readings (in degrees C) for the given device. + * + * For all products. + * + * @param[in] device Target device identifier. + * @param[in,out] temperature Structure specifying the sensor type (input) and retrieved + * temperature value (output). + * + * @return + * - \ref NVML_SUCCESS if \a temp has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a sensorType is invalid or \a temp is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have the specified sensor + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetTemperatureV(nvmlDevice_t device, nvmlTemperature_t *temperature); + /** * Retrieves the temperature threshold for the GPU with the specified threshold type in degrees C. @@ -4607,6 +5514,22 @@ nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatu */ nvmlReturn_t DECLDIR nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp); +/** + * Retrieves the thermal margin temperature (distance to nearest slowdown threshold). + * + * @param[in] device The identifier of the target device + * @param[in,out] marginTempInfo Versioned structure in which to return the temperature reading + * + * @returns + * - \ref NVML_SUCCESS if the margin temperature was retrieved successfully + * - \ref NVML_ERROR_NOT_SUPPORTED if request is not supported on the current platform + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a temperature is NULL + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the right versioned structure is not used + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetMarginTemperature(nvmlDevice_t device, nvmlMarginTemperature_t *marginTempInfo); + /** * Used to execute a list of thermal system instructions. * @@ -4831,6 +5754,159 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpcClkMinMaxVfOffset(nvmlDevice_t device, nvmlReturn_t DECLDIR nvmlDeviceGetMemClkMinMaxVfOffset(nvmlDevice_t device, int *minOffset, int *maxOffset); +/** + * Retrieve min, max and current clock offset of some clock domain for a given PState + * + * For Maxwell &tm; or newer fully supported devices. + * + * Note: \ref nvmlDeviceGetGpcClkVfOffset, \ref nvmlDeviceGetMemClkVfOffset, \ref nvmlDeviceGetGpcClkMinMaxVfOffset and + * \ref nvmlDeviceGetMemClkMinMaxVfOffset will be deprecated in a future release. + Use \ref nvmlDeviceGetClockOffsets instead. + * + * @param device The identifier of the target device + * @param info Structure specifying the clock type (input) and the pstate (input) + * retrieved clock offset value (output), min clock offset (output) + * and max clock offset (output) + * + * @return + * - \ref NVML_SUCCESS If everything worked + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a type or \a pstate are invalid or both + * \a minClockOffsetMHz and \a maxClockOffsetMHz are NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + */ +nvmlReturn_t DECLDIR nvmlDeviceGetClockOffsets(nvmlDevice_t device, nvmlClockOffset_t *info); + +/** + * Control current clock offset of some clock domain for a given PState + * + * For Maxwell &tm; or newer fully supported devices. + * + * Requires privileged user. + * + * @param device The identifier of the target device + * @param info Structure specifying the clock type (input), the pstate (input) + * and clock offset value (input) + * + * @return + * - \ref NVML_SUCCESS If everything worked + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_NO_PERMISSION If the user doesn't have permission to perform this operation + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device, \a type or \a pstate are invalid or both + * \a clockOffsetMHz is out of allowed range. + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + */ +nvmlReturn_t DECLDIR nvmlDeviceSetClockOffsets(nvmlDevice_t device, nvmlClockOffset_t *info); + +/** + * Retrieves a performance mode string with all the + * performance modes defined for this device along with their associated + * GPU Clock and Memory Clock values. + * Not all tokens will be reported on all GPUs, and additional tokens + * may be added in the future. + * For backwards compatibility we still provide nvclock and memclock; + * those are the same as nvclockmin and memclockmin. + * + * Note: These clock values take into account the offset + * set by clients through /ref nvmlDeviceSetClockOffsets. + * + * Maximum available Pstate (P15) shows the minimum performance level (0) and vice versa. + * + * Each performance modes are returned as a comma-separated list of + * "token=value" pairs. Each set of performance mode tokens are separated + * by a ";". Valid tokens: + * + * Token Value + * "perf" unsigned int - the Performance level + * "nvclock" unsigned int - the GPU clocks (in MHz) for the perf level + * "nvclockmin" unsigned int - the GPU clocks min (in MHz) for the perf level + * "nvclockmax" unsigned int - the GPU clocks max (in MHz) for the perf level + * "nvclockeditable" unsigned int - if the GPU clock domain is editable for the perf level + * "memclock" unsigned int - the memory clocks (in MHz) for the perf level + * "memclockmin" unsigned int - the memory clocks min (in MHz) for the perf level + * "memclockmax" unsigned int - the memory clocks max (in MHz) for the perf level + * "memclockeditable" unsigned int - if the memory clock domain is editable for the perf level + * "memtransferrate" unsigned int - the memory transfer rate (in MHz) for the perf level + * "memtransferratemin" unsigned int - the memory transfer rate min (in MHz) for the perf level + * "memtransferratemax" unsigned int - the memory transfer rate max (in MHz) for the perf level + * "memtransferrateeditable" unsigned int - if the memory transfer rate is editable for the perf level + * + * Example: + * + * perf=0, nvclock=324, nvclockmin=324, nvclockmax=324, nvclockeditable=0, + * memclock=324, memclockmin=324, memclockmax=324, memclockeditable=0, + * memtransferrate=648, memtransferratemin=648, memtransferratemax=648, + * memtransferrateeditable=0 ; + * perf=1, nvclock=324, nvclockmin=324, nvclockmax=640, nvclockeditable=0, + * memclock=810, memclockmin=810, memclockmax=810, memclockeditable=0, + * memtransferrate=1620, memtransferrate=1620, memtransferrate=1620, + * memtransferrateeditable=0 ; + * + * + * @param device The identifier of the target device + * @param perfModes Reference in which to return the performance level string + * + * @return + * - \ref NVML_SUCCESS if \a perfModes has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a name is NULL + * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceModes(nvmlDevice_t device, nvmlDevicePerfModes_t *perfModes); + +/** + * Retrieves a string with the associated current GPU Clock and Memory Clock values. + * + * Not all tokens will be reported on all GPUs, and additional tokens + * may be added in the future. + * + * Note: These clock values take into account the offset + * set by clients through /ref nvmlDeviceSetClockOffsets. + * + * Clock values are returned as a comma-separated list of + * "token=value" pairs. + * Valid tokens: + * + * Token Value + * "perf" unsigned int - the Performance level + * "nvclock" unsigned int - the GPU clocks (in MHz) for the perf level + * "nvclockmin" unsigned int - the GPU clocks min (in MHz) for the perf level + * "nvclockmax" unsigned int - the GPU clocks max (in MHz) for the perf level + * "nvclockeditable" unsigned int - if the GPU clock domain is editable for the perf level + * "memclock" unsigned int - the memory clocks (in MHz) for the perf level + * "memclockmin" unsigned int - the memory clocks min (in MHz) for the perf level + * "memclockmax" unsigned int - the memory clocks max (in MHz) for the perf level + * "memclockeditable" unsigned int - if the memory clock domain is editable for the perf level + * "memtransferrate" unsigned int - the memory transfer rate (in MHz) for the perf level + * "memtransferratemin" unsigned int - the memory transfer rate min (in MHz) for the perf level + * "memtransferratemax" unsigned int - the memory transfer rate max (in MHz) for the perf level + * "memtransferrateeditable" unsigned int - if the memory transfer rate is editable for the perf level + * + * Example: + * + * nvclock=324, nvclockmin=324, nvclockmax=324, nvclockeditable=0, + * memclock=324, memclockmin=324, memclockmax=324, memclockeditable=0, + * memtransferrate=648, memtransferratemin=648, memtransferratemax=648, + * memtransferrateeditable=0 ; + * + * + * @param device The identifier of the target device + * @param currentClockFreqs Reference in which to return the performance level string + * + * @return + * - \ref NVML_SUCCESS if \a currentClockFreqs has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a name is NULL + * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClockFreqs(nvmlDevice_t device, nvmlDeviceCurrentClockFreqs_t *currentClockFreqs); + /** * This API has been deprecated. * @@ -5055,6 +6131,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuO * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t *memory); + +/** + * nvmlDeviceGetMemoryInfo_v2 accounts separately for reserved memory and includes it in the used memory amount. + */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t *memory); /** @@ -5103,6 +6183,66 @@ nvmlReturn_t DECLDIR nvmlDeviceGetComputeMode(nvmlDevice_t device, nvmlComputeMo */ nvmlReturn_t DECLDIR nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int *major, int *minor); +/** + * Retrieves the current and pending DRAM Encryption modes for the device. + * + * %BLACKWELL_OR_NEWER% + * Only applicable to devices that support DRAM Encryption + * Requires \a NVML_INFOROM_DEN version 1.0 or higher. + * + * Changing DRAM Encryption modes requires a reboot. The "pending" DRAM Encryption mode refers to the target mode following + * the next reboot. + * + * See \ref nvmlEnableState_t for details on allowed modes. + * + * @param device The identifier of the target device + * @param current Reference in which to return the current DRAM Encryption mode + * @param pending Reference in which to return the pending DRAM Encryption mode + * + * @return + * - \ref NVML_SUCCESS if \a current and \a pending have been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or either \a current or \a pending is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the argument version is not supported + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceSetDramEncryptionMode() + */ +nvmlReturn_t DECLDIR nvmlDeviceGetDramEncryptionMode(nvmlDevice_t device, nvmlDramEncryptionInfo_t *current, nvmlDramEncryptionInfo_t *pending); + +/** + * Set the DRAM Encryption mode for the device. + * + * For Kepler &tm; or newer fully supported devices. + * Only applicable to devices that support DRAM Encryption. + * Requires \a NVML_INFOROM_DEN version 1.0 or higher. + * Requires root/admin permissions. + * + * The DRAM Encryption mode determines whether the GPU enables its DRAM Encryption support. + * + * This operation takes effect after the next reboot. + * + * See \ref nvmlEnableState_t for details on available modes. + * + * @param device The identifier of the target device + * @param dramEncryption The target DRAM Encryption mode + * + * @return + * - \ref NVML_SUCCESS if the DRAM Encryption mode was set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a DRAM Encryption is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the argument version is not supported + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceGetDramEncryptionMode() + */ +nvmlReturn_t DECLDIR nvmlDeviceSetDramEncryptionMode(nvmlDevice_t device, const nvmlDramEncryptionInfo_t *dramEncryption); + /** * Retrieves the current and pending ECC modes for the device. * @@ -5538,11 +6678,11 @@ nvmlReturn_t DECLDIR nvmlDeviceGetFBCSessions(nvmlDevice_t device, unsigned int /** * Retrieves the current and pending driver model for the device. * - * For Fermi &tm; or newer fully supported devices. + * For Kepler &tm; or newer fully supported devices. * For windows only. * - * On Windows platforms the device driver can run in either WDDM or WDM (TCC) mode. If a display is attached - * to the device it must run in WDDM mode. TCC mode is preferred if a display is not attached. + * On Windows platforms the device driver can run in either WDDM, MCDM or WDM (TCC) modes. If a display is attached + * to the device it must run in WDDM mode. MCDM mode is preferred if a display is not attached. TCC mode is deprecated. * * See \ref nvmlDriverModel_t for details on available driver models. * @@ -5558,9 +6698,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetFBCSessions(nvmlDevice_t device, unsigned int * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible * - \ref NVML_ERROR_UNKNOWN on any unexpected error * - * @see nvmlDeviceSetDriverModel() + * @see nvmlDeviceSetDriverModel_v2() */ -nvmlReturn_t DECLDIR nvmlDeviceGetDriverModel(nvmlDevice_t device, nvmlDriverModel_t *current, nvmlDriverModel_t *pending); +nvmlReturn_t DECLDIR nvmlDeviceGetDriverModel_v2(nvmlDevice_t device, nvmlDriverModel_t *current, nvmlDriverModel_t *pending); /** * Get VBIOS version of the device. @@ -5691,7 +6831,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetComputeRunningProcesses_v3(nvmlDevice_t device nvmlReturn_t DECLDIR nvmlDeviceGetGraphicsRunningProcesses_v3(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos); /** - * Get information about processes with a MPS compute context on a device + * Get information about processes with a Multi-Process Service (MPS) compute context on a device * * For Volta &tm; or newer fully supported devices. * @@ -5736,17 +6876,17 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses_v3(nvmlDevice_t dev /** * Get information about running processes on a device for input context * - * %HOPPER_OR_NEWER% + * For Hopper &tm; or newer fully supported devices. * * This function returns information only about running processes (e.g. CUDA application which have * active context). * - * To determine the size of the @ref plist->procArray array to allocate, call the function with - * @ref plist->numProcArrayEntries set to zero and @ref plist->procArray set to NULL. The return + * To determine the size of the \a plist->procArray array to allocate, call the function with + * \a plist->numProcArrayEntries set to zero and \a plist->procArray set to NULL. The return * code will be either NVML_ERROR_INSUFFICIENT_SIZE (if there are valid processes of type - * @ref plist->mode to report on, in which case the @ref plist->numProcArrayEntries field will + * \a plist->mode to report on, in which case the \a plist->numProcArrayEntries field will * indicate the required number of entries in the array) or NVML_SUCCESS (if no processes of type - * @ref plist->mode exist). + * \a plist->mode exist). * * The usedGpuMemory field returned is all of the memory used by the application. * The usedGpuCcProtectedMemory field returned is all of the protected memory used by the application. @@ -5763,10 +6903,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses_v3(nvmlDevice_t dev * * @param device The device handle or MIG device handle * @param plist Reference in which to process detail list - * @param plist->version The api version - * @param plist->mode The process mode - * @param plist->procArray Reference in which to return the process information - * @param plist->numProcArrayEntries Proc array size of returned entries + * \a plist->version The api version + * \a plist->mode The process mode + * \a plist->procArray Reference in which to return the process information + * \a plist->numProcArrayEntries Proc array size of returned entries * * @return * - \ref NVML_SUCCESS if \a plist->numprocArrayEntries and \a plist->procArray have been populated @@ -5951,7 +7091,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetIrqNum(nvmlDevice_t device, unsigned int *irqN * @param numCores The number of cores for the specified device * * @return - * - \ref NVML_SUCCESS if Gpu core count is successfully retrieved + * - \ref NVML_SUCCESS if GPU core count is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a numCores is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device @@ -5999,7 +7139,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryBusWidth(nvmlDevice_t device, unsigned i * @param maxSpeed The devices's PCIE Max Link speed in MBPS * * @return - * - \ref NVML_SUCCESS if Pcie Max Link Speed is successfully retrieved + * - \ref NVML_SUCCESS if PCIe Max Link Speed is successfully retrieved * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a maxSpeed is NULL * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device @@ -6028,8 +7168,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPcieSpeed(nvmlDevice_t device, unsigned int *p * * @param device The identifier of the target device * @param adaptiveClockStatus The current adaptive clocking status, either - * @ref NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED - * or @ref NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED + * NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED + * or NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED * * @return * - \ref NVML_SUCCESS if the current adaptive clocking status is successfully retrieved @@ -6050,7 +7190,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetAdaptiveClockInfoStatus(nvmlDevice_t device, u * return * - \ref NVML_SUCCESS if the bus \a type is successfully retreived * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \device is invalid or \type is NULL + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a type is NULL * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetBusType(nvmlDevice_t device, nvmlBusType_t *type); @@ -6061,7 +7201,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetBusType(nvmlDevice_t device, nvmlBusType_t *ty * * Get fabric information associated with the device. * - * %HOPPER_OR_NEWER% + * For Hopper &tm; or newer fully supported devices. * * On Hopper + NVSwitch systems, GPU is registered with the NVIDIA Fabric Manager * Upon successful registration, the GPU is added to the NVLink fabric to enable @@ -6091,7 +7231,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuFabricInfo(nvmlDevice_t device, nvmlGpuFabr * nvmlReturn_t result = nvmlDeviceGetGpuFabricInfoV(device,&fabricInfo); * \endcode * -* %HOPPER_OR_NEWER% +* For Hopper &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param gpuFabricInfo Information about GPU fabric state @@ -6189,7 +7329,7 @@ nvmlReturn_t DECLDIR nvmlSystemGetConfComputeGpusReadyState(unsigned int *isAcce nvmlReturn_t DECLDIR nvmlDeviceGetConfComputeProtectedMemoryUsage(nvmlDevice_t device, nvmlMemory_t *memory); /** - * Get Conf Computing Gpu certificate details. + * Get Conf Computing GPU certificate details. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux, Windows TCC. @@ -6208,7 +7348,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetConfComputeGpuCertificate(nvmlDevice_t device, nvmlConfComputeGpuCertificate_t *gpuCert); /** - * Get Conf Computing Gpu attestation report. + * Get Conf Computing GPU attestation report. * * For Ampere &tm; or newer fully supported devices. * Supported on Linux, Windows TCC. @@ -6223,42 +7363,100 @@ nvmlReturn_t DECLDIR nvmlDeviceGetConfComputeGpuCertificate(nvmlDevice_t device, * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceGetConfComputeGpuAttestationReport(nvmlDevice_t device, - nvmlConfComputeGpuAttestationReport_t *gpuAtstReport); +nvmlReturn_t DECLDIR nvmlDeviceGetConfComputeGpuAttestationReport(nvmlDevice_t device, + nvmlConfComputeGpuAttestationReport_t *gpuAtstReport); +/** + * Get Conf Computing key rotation threshold detail. + * + * For Hopper &tm; or newer fully supported devices. + * Supported on Linux, Windows TCC. + * + * @param pKeyRotationThrInfo Reference in which to return the key rotation threshold data + * + * @return + * - \ref NVML_SUCCESS if \a gpu key rotation threshold info has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlSystemGetConfComputeKeyRotationThresholdInfo( + nvmlConfComputeGetKeyRotationThresholdInfo_t *pKeyRotationThrInfo); + +/** + * Set Conf Computing Unprotected Memory Size. + * + * For Ampere &tm; or newer fully supported devices. + * Supported on Linux, Windows TCC. + * + * @param device Device Handle + * @param sizeKiB Unprotected Memory size to be set in KiB + * + * @return + * - \ref NVML_SUCCESS if \a sizeKiB successfully set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device + */ +nvmlReturn_t DECLDIR nvmlDeviceSetConfComputeUnprotectedMemSize(nvmlDevice_t device, unsigned long long sizeKiB); + +/** + * Set Conf Computing GPUs ready state. + * + * For Ampere &tm; or newer fully supported devices. + * Supported on Linux, Windows TCC. + * + * @param isAcceptingWork GPU accepting new work, NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE or + * NVML_CC_ACCEPTING_CLIENT_REQUESTS_FALSE + * + * return + * - \ref NVML_SUCCESS if \a current GPUs ready state is successfully set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a isAcceptingWork is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device + */ +nvmlReturn_t DECLDIR nvmlSystemSetConfComputeGpusReadyState(unsigned int isAcceptingWork); + /** - * Get Conf Computing key rotation threshold detail. + * Set Conf Computing key rotation threshold. * - * %HOPPER_OR_NEWER% + * For Hopper &tm; or newer fully supported devices. * Supported on Linux, Windows TCC. * - * @param pKeyRotationThrInfo Reference in which to return the key rotation threshold data + * This function is to set the confidential compute key rotation threshold parameters. + * \a pKeyRotationThrInfo->maxAttackerAdvantage should be in the range from + * NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MIN to NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX. + * Default value is 60. + * + * @param pKeyRotationThrInfo Reference to the key rotation threshold data * * @return - * - \ref NVML_SUCCESS if \a gpu key rotation threshold info has been populated + * - \ref NVML_SUCCESS if \a key rotation threashold max attacker advantage has been set * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL + * - \ref NVML_ERROR_INVALID_STATE if confidential compute GPU ready state is enabled * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlSystemGetConfComputeKeyRotationThresholdInfo( - nvmlConfComputeGetKeyRotationThresholdInfo_t *pKeyRotationThrInfo); +nvmlReturn_t DECLDIR nvmlSystemSetConfComputeKeyRotationThresholdInfo( + nvmlConfComputeSetKeyRotationThresholdInfo_t *pKeyRotationThrInfo); /** * Get Conf Computing System Settings. * - * %HOPPER_OR_NEWER% + * For Hopper &tm; or newer fully supported devices. * Supported on Linux, Windows TCC. * * @param settings System CC settings * * @return - * - \ref NVML_SUCCESS if the query is success - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a counters is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS If the query is success + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a counters is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlSystemGetConfComputeSettings(nvmlSystemConfComputeSettings_t *settings); @@ -6292,10 +7490,34 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGspFirmwareVersion(nvmlDevice_t device, char * * @return * - \ref NVML_SUCCESS if GSP firmware mode is sucessfully retrieved * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or any of \a isEnabled or \a defaultMode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if GSP firmware is not enabled for GPU * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetGspFirmwareMode(nvmlDevice_t device, unsigned int *isEnabled, unsigned int *defaultMode); +/** + * Get SRAM ECC error status of this device. + * + * For Ampere &tm; or newer fully supported devices. + * Requires root/admin permissions. + * + * See \ref nvmlEccSramErrorStatus_v1_t for more information on the struct. + * + * @param device The identifier of the target device + * @param status Returns SRAM ECC error status + * + * @return + * - \ref NVML_SUCCESS If \a limit has been set + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a counters is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a nvmlEccSramErrorStatus_t is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetSramEccErrorStatus(nvmlDevice_t device, + nvmlEccSramErrorStatus_t *status); + /** * @} */ @@ -6369,8 +7591,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetAccountingStats(nvmlDevice_t device, unsigned * * For Kepler &tm; or newer fully supported devices. * - * To just query the number of processes ready to be queried, call this function with *count = 0 and - * pids=NULL. The return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if list is empty. + * To query the number of processes under Accounting Mode, call this function with *count = 0 and pids=NULL. + * The return code will be NVML_ERROR_INSUFFICIENT_SIZE with an updated count value indicating the number of processes. * * For more details see \ref nvmlDeviceGetAccountingStats. * @@ -6428,7 +7650,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device, unsi /** * Returns the list of retired pages by source, including pages that are pending retirement * The address information provided from this API is the hardware address of the page that was retired. Note - * that this does not match the virtual address used in CUDA, but will match the address information in XID 63 + * that this does not match the virtual address used in CUDA, but will match the address information in Xid 63 * * For Kepler &tm; or newer fully supported devices. * @@ -6456,7 +7678,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageReti /** * Returns the list of retired pages by source, including pages that are pending retirement * The address information provided from this API is the hardware address of the page that was retired. Note - * that this does not match the virtual address used in CUDA, but will match the address information in XID 63 + * that this does not match the virtual address used in CUDA, but will match the address information in Xid 63 * * \note nvmlDeviceGetRetiredPages_v2 adds an additional timestamps parameter to return the time of each page's * retirement. @@ -6657,20 +7879,40 @@ nvmlReturn_t DECLDIR nvmlDeviceGetProcessUtilization(nvmlDevice_t device, nvmlPr * @param procesesUtilInfo Pointer to the caller-provided structure of nvmlProcessesUtilizationInfo_t. * @return - * - \ref NVML_SUCCESS if \a procesesUtilInfo->procUtilArray has been populated - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a procesesUtilInfo is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_VERSION_MISMATCH if the version of \a procesesUtilInfo is invalid - * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a procesesUtilInfo->procUtilArray is NULL, or the buffer size of procesesUtilInfo->procUtilArray is too small. - * The caller should check the minimul array size from the returned procesesUtilInfo->processSamplesCount, and call - * the function again with a buffer no smaller than procesesUtilInfo->processSamplesCount * sizeof(nvmlProcessUtilizationInfo_t) - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS If \a procesesUtilInfo->procUtilArray has been populated + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid, or \a procesesUtilInfo is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + * - \ref NVML_ERROR_NOT_FOUND If sample entries are not found + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a procesesUtilInfo is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If \a procesesUtilInfo->procUtilArray is NULL, or the buffer size of procesesUtilInfo->procUtilArray is too small. + * The caller should check the minimul array size from the returned procesesUtilInfo->processSamplesCount, and call + * the function again with a buffer no smaller than procesesUtilInfo->processSamplesCount * sizeof(nvmlProcessUtilizationInfo_t) + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetProcessesUtilizationInfo(nvmlDevice_t device, nvmlProcessesUtilizationInfo_t *procesesUtilInfo); +/** + * Get platform information of this device. + * + * %BLACKWELL_OR_NEWER% + * + * See \ref nvmlPlatformInfo_v2_t for more information on the struct. + * + * @param device The identifier of the target device + * @param platformInfo Pointer to the caller-provided structure of nvmlPlatformInfo_t. + * + * @return + * - \ref NVML_SUCCESS If \a platformInfo has been retrieved + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a platformInfo is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + * - \ref NVML_ERROR_MEMORY if system memory is insufficient + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a nvmlPlatformInfo_t is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetPlatformInfo(nvmlDevice_t device, nvmlPlatformInfo_t *platformInfo); + /** @} */ /***************************************************************************************************/ @@ -7327,6 +8569,9 @@ nvmlReturn_t DECLDIR nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestri nvmlReturn_t DECLDIR nvmlDeviceSetFanSpeed_v2(nvmlDevice_t device, unsigned int fan, unsigned int speed); /** + * Deprecated: Will be deprecated in a future release. Use \ref nvmlDeviceSetClockOffsets instead. It works + * on Maxwell onwards GPU architectures. + * * Set the GPCCLK VF offset value * @param[in] device The identifier of the target device * @param[in] offset The GPCCLK VF offset value to set @@ -7342,6 +8587,9 @@ nvmlReturn_t DECLDIR nvmlDeviceSetFanSpeed_v2(nvmlDevice_t device, unsigned int nvmlReturn_t DECLDIR nvmlDeviceSetGpcClkVfOffset(nvmlDevice_t device, int offset); /** + * Deprecated: Will be deprecated in a future release. Use \ref nvmlDeviceSetClockOffsets instead. It works + * on Maxwell onwards GPU architectures. + * * Set the MemClk (Memory Clock) VF offset value. It requires elevated privileges. * @param[in] device The identifier of the target device * @param[in] offset The MemClk VF offset value to set @@ -7356,64 +8604,6 @@ nvmlReturn_t DECLDIR nvmlDeviceSetGpcClkVfOffset(nvmlDevice_t device, int offset */ nvmlReturn_t DECLDIR nvmlDeviceSetMemClkVfOffset(nvmlDevice_t device, int offset); -/** - * Set Conf Computing Unprotected Memory Size. - * - * For Ampere &tm; or newer fully supported devices. - * Supported on Linux, Windows TCC. - * - * @param device Device Handle - * @param sizeKiB Unprotected Memory size to be set in KiB - * - * @return - * - \ref NVML_SUCCESS if \a sizeKiB successfully set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device - */ -nvmlReturn_t DECLDIR nvmlDeviceSetConfComputeUnprotectedMemSize(nvmlDevice_t device, unsigned long long sizeKiB); - -/** - * Set Conf Computing GPUs ready state. - * - * For Ampere &tm; or newer fully supported devices. - * Supported on Linux, Windows TCC. - * - * @param isAcceptingWork GPU accepting new work, NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE or - * NVML_CC_ACCEPTING_CLIENT_REQUESTS_FALSE - * - * return - * - \ref NVML_SUCCESS if \a current GPUs ready state is successfully set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a isAcceptingWork is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device - */ -nvmlReturn_t DECLDIR nvmlSystemSetConfComputeGpusReadyState(unsigned int isAcceptingWork); - -/** - * Set Conf Computing key rotation threshold. - * - * %HOPPER_OR_NEWER% - * Supported on Linux, Windows TCC. - * - * This function is to set the confidential compute key rotation threshold parameters. - * @ref pKeyRotationThrInfo->maxAttackerAdvantage should be in the range from - * NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MIN to NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX. - * Default value is 60. - * - * @param pKeyRotationThrInfo Reference to the key rotation threshold data - * - * @return - * - \ref NVML_SUCCESS if \a key rotation threashold max attacker advantage has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL - * - \ref NVML_ERROR_INVALID_STATE if confidential compute GPU ready state is enabled - * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device - * - \ref NVML_ERROR_UNKNOWN on any unexpected error - */ -nvmlReturn_t DECLDIR nvmlSystemSetConfComputeKeyRotationThresholdInfo( - nvmlConfComputeSetKeyRotationThresholdInfo_t *pKeyRotationThrInfo); - /** * @} */ @@ -7476,6 +8666,99 @@ nvmlReturn_t DECLDIR nvmlDeviceSetAccountingMode(nvmlDevice_t device, nvmlEnable */ nvmlReturn_t DECLDIR nvmlDeviceClearAccountingPids(nvmlDevice_t device); +/** + * Set new power limit of this device. + * + * For Kepler &tm; or newer fully supported devices. + * Requires root/admin permissions. + * + * See \ref nvmlDeviceGetPowerManagementLimitConstraints to check the allowed ranges of values. + * + * See \ref nvmlPowerValue_v2_t for more information on the struct. + * + * \note Limit is not persistent across reboots or driver unloads. + * Enable persistent mode to prevent driver from unloading when no application is using the device. + * + * This API replaces nvmlDeviceSetPowerManagementLimit. It can be used as a drop-in replacement for the older version. + * + * @param device The identifier of the target device + * @param powerValue Power management limit in milliwatts to set + * + * @return + * - \ref NVML_SUCCESS if \a limit has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a powerValue is NULL or contains invalid values + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see NVML_FI_DEV_POWER_AVERAGE + * @see NVML_FI_DEV_POWER_INSTANT + * @see NVML_FI_DEV_POWER_MIN_LIMIT + * @see NVML_FI_DEV_POWER_MAX_LIMIT + * @see NVML_FI_DEV_POWER_CURRENT_LIMIT + */ +nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice_t device, nvmlPowerValue_v2_t *powerValue); + +/***************************************************************************************************/ +/** @defgroup NVML NVLink + * @{ + */ +/***************************************************************************************************/ + +#define NVML_NVLINK_BER_MANTISSA_SHIFT 8 +#define NVML_NVLINK_BER_MANTISSA_WIDTH 0xf + +#define NVML_NVLINK_BER_EXP_SHIFT 0 +#define NVML_NVLINK_BER_EXP_WIDTH 0xff + +/** + * Nvlink Error counter BER can be obtained using the below macros + * Ex - NVML_NVLINK_ERROR_COUNTER_BER_GET(var, BER_MANTISSA) + */ +#define NVML_NVLINK_ERROR_COUNTER_BER_GET(var, type) \ + (((var) >> NVML_NVLINK_##type##_SHIFT) & \ + (NVML_NVLINK_##type##_WIDTH)) \ + +/* + * NVML_FI_DEV_NVLINK_GET_STATE state enums + */ +#define NVML_NVLINK_STATE_INACTIVE 0x0 +#define NVML_NVLINK_STATE_ACTIVE 0x1 +#define NVML_NVLINK_STATE_SLEEP 0x2 + +#define NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES 23 + +typedef struct +{ + unsigned int version; + unsigned char bwModes[NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES]; + unsigned char totalBwModes; +} nvmlNvlinkSupportedBwModes_v1_t; +typedef nvmlNvlinkSupportedBwModes_v1_t nvmlNvlinkSupportedBwModes_t; +#define nvmlNvlinkSupportedBwModes_v1 NVML_STRUCT_VERSION(NvlinkSupportedBwModes, 1) + +typedef struct +{ + unsigned int version; + unsigned int bIsBest; + unsigned char bwMode; +} nvmlNvlinkGetBwMode_v1_t; +typedef nvmlNvlinkGetBwMode_v1_t nvmlNvlinkGetBwMode_t; +#define nvmlNvlinkGetBwMode_v1 NVML_STRUCT_VERSION(NvlinkGetBwMode, 1) + +typedef struct +{ + unsigned int version; + unsigned int bSetBest; + unsigned char bwMode; +} nvmlNvlinkSetBwMode_v1_t; +typedef nvmlNvlinkSetBwMode_v1_t nvmlNvlinkSetBwMode_t; +#define nvmlNvlinkSetBwMode_v1 NVML_STRUCT_VERSION(NvlinkSetBwMode, 1) + +/** @} */ // @defgroup NVML NVLink + + /** @} */ /***************************************************************************************************/ @@ -7512,7 +8795,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried - * @param version Requested NvLink version + * @param version Requested NvLink version from nvmlNvlinkVersion_t * * @return * - \ref NVML_SUCCESS if \a version has been set @@ -7698,48 +8981,144 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, - unsigned int counter, nvmlEnableState_t freeze); +nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, + unsigned int counter, nvmlEnableState_t freeze); + +/** + * Deprecated: Resetting NVLINK utilization counters is no longer supported. + * + * Reset the NVLINK utilization counters + * Both the receive and transmit counters are operated on by this function + * + * For Pascal &tm; or newer fully supported devices. + * + * @param device The identifier of the target device + * @param link Specifies the NvLink link to be reset + * @param counter Specifies the counter that should be reset (0 or 1) + * + * @return + * - \ref NVML_SUCCESS if counters were successfully reset + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a counter is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter); + +/** +* Get the NVLink device type of the remote device connected over the given link. +* +* @param device The device handle of the target GPU +* @param link The NVLink link index on the target GPU +* @param pNvLinkDeviceType Pointer in which the output remote device type is returned +* +* @return +* - \ref NVML_SUCCESS if \a pNvLinkDeviceType has been set +* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized +* - \ref NVML_ERROR_NOT_SUPPORTED if NVLink is not supported +* - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid, or +* \a pNvLinkDeviceType is NULL +* - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is +* otherwise inaccessible +* - \ref NVML_ERROR_UNKNOWN on any unexpected error +*/ +nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkRemoteDeviceType(nvmlDevice_t device, unsigned int link, nvmlIntNvLinkDeviceType_t *pNvLinkDeviceType); + +/** + * Set NvLink Low Power Threshold for device. + * + * For Hopper &tm; or newer fully supported devices. + * + * @param device The identifier of the target device + * @param info Reference to \a nvmlNvLinkPowerThres_t struct + * input parameters + * + * @return + * - \ref NVML_SUCCESS if the \a Threshold is successfully set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a Threshold is not within range + * - \ref NVML_ERROR_NOT_READY if an internal driver setting prevents the threshold from being used + * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device + * + **/ +nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkDeviceLowPowerThreshold(nvmlDevice_t device, nvmlNvLinkPowerThres_t *info); + +/** + * Set the global nvlink bandwith mode + * + * @param nvlinkBwMode nvlink bandwidth mode + * @return + * - \ref NVML_SUCCESS on success + * - \ref NVML_ERROR_INVALID_ARGUMENT if an invalid argument is provided + * - \ref NVML_ERROR_IN_USE if P2P object exists + * - \ref NVML_ERROR_NOT_SUPPORTED if GPU is not Hopper or newer architecture. + * - \ref NVML_ERROR_NO_PERMISSION if not root user + */ +nvmlReturn_t DECLDIR nvmlSystemSetNvlinkBwMode(unsigned int nvlinkBwMode); + +/** + * Get the global nvlink bandwith mode + * + * @param nvlinkBwMode reference of nvlink bandwidth mode + * @return + * - \ref NVML_SUCCESS on success + * - \ref NVML_ERROR_INVALID_ARGUMENT if an invalid pointer is provided + * - \ref NVML_ERROR_NOT_SUPPORTED if GPU is not Hopper or newer architecture. + * - \ref NVML_ERROR_NO_PERMISSION if not root user + */ +nvmlReturn_t DECLDIR nvmlSystemGetNvlinkBwMode(unsigned int *nvlinkBwMode); /** - * Deprecated: Resetting NVLINK utilization counters is no longer supported. + * Get the supported NvLink Reduced Bandwidth Modes of the device * - * Reset the NVLINK utilization counters - * Both the receive and transmit counters are operated on by this function + * %BLACKWELL_OR_NEWER% * - * For Pascal &tm; or newer fully supported devices. + * @param device The identifier of the target device + * @param supportedBwMode Reference to \a nvmlNvlinkSupportedBwModes_t * - * @param device The identifier of the target device - * @param link Specifies the NvLink link to be reset - * @param counter Specifies the counter that should be reset (0 or 1) + * @return + * - \ref NVML_SUCCESS if the query was successful + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or supportedBwMode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the version specified is not supported + **/ +nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkSupportedBwModes(nvmlDevice_t device, + nvmlNvlinkSupportedBwModes_t *supportedBwMode); + +/** + * Get the NvLink Reduced Bandwidth Mode for the device + * + * %BLACKWELL_OR_NEWER% + * + * @param device The identifier of the target device + * @param getBwMode Reference to \a nvmlNvlinkGetBwMode_t * * @return - * - \ref NVML_SUCCESS if counters were successfully reset - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a counter is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature - * - \ref NVML_ERROR_UNKNOWN on any unexpected error - */ -nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter); + * - \ref NVML_SUCCESS if the query was successful + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or getBwMode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the version specified is not supported + **/ +nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkBwMode(nvmlDevice_t device, + nvmlNvlinkGetBwMode_t *getBwMode); /** -* Get the NVLink device type of the remote device connected over the given link. -* -* @param device The device handle of the target GPU -* @param link The NVLink link index on the target GPU -* @param pNvLinkDeviceType Pointer in which the output remote device type is returned -* -* @return -* - \ref NVML_SUCCESS if \a pNvLinkDeviceType has been set -* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized -* - \ref NVML_ERROR_NOT_SUPPORTED if NVLink is not supported -* - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid, or -* \a pNvLinkDeviceType is NULL -* - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is -* otherwise inaccessible -* - \ref NVML_ERROR_UNKNOWN on any unexpected error -*/ -nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkRemoteDeviceType(nvmlDevice_t device, unsigned int link, nvmlIntNvLinkDeviceType_t *pNvLinkDeviceType); + * Set the NvLink Reduced Bandwidth Mode for the device + * + * %BLACKWELL_OR_NEWER% + * + * @param device The identifier of the target device + * @param setBwMode Reference to \a nvmlNvlinkSetBwMode_t + * + * @return + * - \ref NVML_SUCCESS if the Bandwidth mode was successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or setBwMode is NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to change Bandwidth mode + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the version specified is not supported + **/ +nvmlReturn_t DECLDIR nvmlDeviceSetNvlinkBwMode(nvmlDevice_t device, + nvmlNvlinkSetBwMode_t *setBwMode); /** @} */ @@ -7772,13 +9151,11 @@ nvmlReturn_t DECLDIR nvmlEventSetCreate(nvmlEventSet_t *set); * Starts recording of events on a specified devices and add the events to specified \ref nvmlEventSet_t * * For Fermi &tm; or newer fully supported devices. - * Ecc events are available only on ECC enabled devices (see \ref nvmlDeviceGetTotalEccErrors) + * ECC events are available only on ECC-enabled devices (see \ref nvmlDeviceGetTotalEccErrors) * Power capping events are available only on Power Management enabled devices (see \ref nvmlDeviceGetPowerManagementMode) * * For Linux only. * - * \b IMPORTANT: Operations on \a set are not thread safe - * * This call starts recording of events on specific device. * All events that occurred before this call are not recorded. * Checking if some event occurred can be done with \ref nvmlEventSetWait_v2 @@ -7838,11 +9215,11 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device, unsig * but not longer than specified timeout. This function in certain conditions can return before * specified timeout passes (e.g. when interrupt arrives) * - * On Windows, in case of xid error, the function returns the most recent xid error type seen by the system. - * If there are multiple xid errors generated before nvmlEventSetWait is invoked then the last seen xid error - * type is returned for all xid error events. + * On Windows, in case of Xid error, the function returns the most recent Xid error type seen by the system. + * If there are multiple Xid errors generated before nvmlEventSetWait is invoked then the last seen Xid error + * type is returned for all Xid error events. * - * On Linux, every xid error event would return the associated event data and other information if applicable. + * On Linux, every Xid error event would return the associated event data and other information if applicable. * * In MIG mode, if device handle is provided, the API reports all the events for the available instances, * only if the caller has appropriate privileges. In absence of required privileges, only the events which @@ -7883,6 +9260,98 @@ nvmlReturn_t DECLDIR nvmlEventSetWait_v2(nvmlEventSet_t set, nvmlEventData_t * d */ nvmlReturn_t DECLDIR nvmlEventSetFree(nvmlEventSet_t set); +/* + * Create an empty set of system events. + * Event set should be freed by \ref nvmlSystemEventSetFree + * + * For Fermi &tm; or newer fully supported devices. + * @param request Reference to nvmlSystemEventSetCreateRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlSystemEventSetFree + */ +nvmlReturn_t DECLDIR nvmlSystemEventSetCreate(nvmlSystemEventSetCreateRequest_t *request); + +/** + * Releases system event set + * + * For Fermi &tm; or newer fully supported devices. + * + * @param set Reference to nvmlSystemEventSetFreeRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceRegisterEvents + */ +nvmlReturn_t DECLDIR nvmlSystemEventSetFree(nvmlSystemEventSetFreeRequest_t *request); + +/** + * Starts recording of events on system and add the events to specified \ref nvmlSystemEventSet_t + * + * For Linux only. + * + * This call starts recording of events on specific device. + * All events that occurred before this call are not recorded. + * Checking if some event occurred can be done with \ref nvmlSystemEventSetWait + * + * If function reports NVML_ERROR_UNKNOWN, event set is in undefined state and should be freed. + * If function reports NVML_ERROR_NOT_SUPPORTED, event set can still be used. None of the requested eventTypes + * are registered in that case. + * + * @param request Reference to the struct nvmlSystemRegisterEventRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlSystemEventType + * @see nvmlSystemEventSetWait + * @see nvmlEventSetFree + */ +nvmlReturn_t DECLDIR nvmlSystemRegisterEvents(nvmlSystemRegisterEventRequest_t *request); + +/** + * Waits on system events and delivers events + * + * For Fermi &tm; or newer fully supported devices. + * + * If some events are ready to be delivered at the time of the call, function returns immediately. + * If there are no events ready to be delivered, function sleeps till event arrives + * but not longer than specified timeout. This function in certain conditions can return before + * specified timeout passes (e.g. when interrupt arrives) + * + * if the return request->numEvent equals to request->dataSize, there might be outstanding + * event, it is recommended to call nvmlSystemEventSetWait again to query all the events. + * + * @param request Reference in which to nvmlSystemEventSetWaitRequest_t + * + * @return + * - \ref NVML_SUCCESS if the event has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if request is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH for unsupported version + * - \ref NVML_ERROR_TIMEOUT if no event notification after timeoutms + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlSystemEventType + * @see nvmlSystemRegisterEvents + */ +nvmlReturn_t DECLDIR nvmlSystemEventSetWait(nvmlSystemEventSetWaitRequest_t *request); + /** @} */ /***************************************************************************************************/ @@ -8044,13 +9513,6 @@ nvmlReturn_t DECLDIR nvmlDeviceClearFieldValues(nvmlDevice_t device, int valuesC /** @} */ -/***************************************************************************************************/ -/** @defgroup vGPU Enums, Constants and Structs - * @{ - */ -/** @} */ -/***************************************************************************************************/ - /***************************************************************************************************/ /** @defgroup nvmlVirtualGpuQueries vGPU APIs * This chapter describes operations that are associated with NVIDIA vGPU Software products. @@ -8125,12 +9587,12 @@ nvmlReturn_t DECLDIR nvmlDeviceSetVirtualizationMode(nvmlDevice_t device, nvmlGp * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t * * @return - * - \ref NVML_SUCCESS Upon success - * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a pHeterogeneousMode is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED If \a device doesn't support this feature - * - \ref NVML_ERROR_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid - * - \ref NVML_ERROR_UNKNOWN On any unexpected error + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a pHeterogeneousMode is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device doesn't support this feature + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuHeterogeneousMode(nvmlDevice_t device, nvmlVgpuHeterogeneousMode_t *pHeterogeneousMode); @@ -8142,6 +9604,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuHeterogeneousMode(nvmlDevice_t device, nvm * API would return an appropriate error code upon unsuccessful activation. For example, the heterogeneous mode * set will fail with error \ref NVML_ERROR_IN_USE if any vGPU instance is active on the device. The caller of this API * is expected to shutdown the vGPU VMs and retry setting the \a mode. + * On KVM platform, setting heterogeneous mode is allowed, if no MDEV device is created on the device, else will fail + * with same error \ref NVML_ERROR_IN_USE. * On successful return, the function updates the vGPU heterogeneous mode with the user provided \a pHeterogeneousMode->mode. * \a pHeterogeneousMode->version is the version number of the structure nvmlVgpuHeterogeneousMode_t, the caller should * set the correct version number to set the vGPU heterogeneous mode. @@ -8150,14 +9614,14 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuHeterogeneousMode(nvmlDevice_t device, nvm * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t * * @return - * - \ref NVML_SUCCESS Upon success - * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a pHeterogeneousMode is NULL or \a pHeterogeneousMode->mode is invalid - * - \ref NVML_ERROR_IN_USE If the \a device is in use - * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation - * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device doesn't support this feature - * - \ref NVML_ERROR_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid - * - \ref NVML_ERROR_UNKNOWN On any unexpected error + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a pHeterogeneousMode is NULL or \a pHeterogeneousMode->mode is invalid + * - \ref NVML_ERROR_IN_USE If the \a device is in use + * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device doesn't support this feature + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetVgpuHeterogeneousMode(nvmlDevice_t device, const nvmlVgpuHeterogeneousMode_t *pHeterogeneousMode); @@ -8173,35 +9637,41 @@ nvmlReturn_t DECLDIR nvmlDeviceSetVgpuHeterogeneousMode(nvmlDevice_t device, con * @param pPlacement Pointer to vGPU placement ID structure \a nvmlVgpuPlacementId_t * * @return - * - \ref NVML_SUCCESS If information is successfully retrieved - * - \ref NVML_ERROR_NOT_FOUND If \a vgpuInstance does not match a valid active vGPU instance - * - \ref NVML_ERROR_INVALID_ARGUMENT If \a vgpuInstance is invalid or \a pPlacement is NULL - * - \ref NVML_ERROR_VERSION_MISMATCH If the version of \a pPlacement is invalid - * - \ref NVML_ERROR_UNKNOWN On any unexpected error + * - \ref NVML_SUCCESS If information is successfully retrieved + * - \ref NVML_ERROR_NOT_FOUND If \a vgpuInstance does not match a valid active vGPU instance + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a vgpuInstance is invalid or \a pPlacement is NULL + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pPlacement is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetPlacementId(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuPlacementId_t *pPlacement); /** * Query the supported vGPU placement ID of the vGPU type. * - * An array of supported vGPU placement IDs for the vGPU type ID indicated by \a vgpuTypeId is returned in the - * caller-supplied buffer of \a pPlacementList->placementIds. Memory needed for the placementIds array should be - * allocated based on maximum instances of a vGPU type which can be queried via \ref nvmlVgpuTypeGetMaxInstances(). + * The function returns an array of supported vGPU placement IDs for the specified vGPU type ID in the buffer provided + * by the caller at \a pPlacementList->placementIds. The required memory for the placementIds array must be allocated + * based on the maximum number of vGPU type instances, which is retrievable through \ref nvmlVgpuTypeGetMaxInstances(). + * If the provided count by the caller is insufficient, the function will return NVML_ERROR_INSUFFICIENT_SIZE along with + * the number of required entries in \a pPlacementList->count. The caller should then reallocate a buffer with the size + * of pPlacementList->count * sizeof(pPlacementList->placementIds) and invoke the function again. * - * This function will return supported placement IDs even if GPU is not in vGPU heterogeneous mode. + * To obtain a list of homogeneous placement IDs, the caller needs to set \a pPlacementList->mode to NVML_VGPU_PGPU_HOMOGENEOUS_MODE. + * For heterogeneous placement IDs, \a pPlacementList->mode should be set to NVML_VGPU_PGPU_HETEROGENEOUS_MODE. + * By default, a list of heterogeneous placement IDs is returned. * * @param device Identifier of the target device * @param vgpuTypeId Handle to vGPU type. The vGPU type ID * @param pPlacementList Pointer to the vGPU placement structure \a nvmlVgpuPlacementList_t * * @return - * - \ref NVML_SUCCESS Upon success - * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a vgpuTypeId is invalid or \a pPlacementList is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED If \a device or \a vgpuTypeId isn't supported - * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation - * - \ref NVML_ERROR_VERSION_MISMATCH If the version of \a pPlacementList is invalid - * - \ref NVML_ERROR_UNKNOWN On any unexpected error + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a vgpuTypeId is invalid or \a pPlacementList is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If \a device or \a vgpuTypeId isn't supported + * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pPlacementList is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If the buffer is small, element count is returned in \a pPlacementList->count + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuTypeSupportedPlacements(nvmlDevice_t device, nvmlVgpuTypeId_t vgpuTypeId, nvmlVgpuPlacementList_t *pPlacementList); @@ -8211,23 +9681,25 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuTypeSupportedPlacements(nvmlDevice_t devic * An array of creatable vGPU placement IDs for the vGPU type ID indicated by \a vgpuTypeId is returned in the * caller-supplied buffer of \a pPlacementList->placementIds. Memory needed for the placementIds array should be * allocated based on maximum instances of a vGPU type which can be queried via \ref nvmlVgpuTypeGetMaxInstances(). + * If the provided count by the caller is insufficient, the function will return NVML_ERROR_INSUFFICIENT_SIZE along with + * the number of required entries in \a pPlacementList->count. The caller should then reallocate a buffer with the size + * of pPlacementList->count * sizeof(pPlacementList->placementIds) and invoke the function again. + * * The creatable vGPU placement IDs may differ over time, as there may be restrictions on what type of vGPU the * vGPU instance is running. * - * The function will return \ref NVML_ERROR_NOT_SUPPORTED if the \a device is not in vGPU heterogeneous mode. - * * @param device The identifier of the target device * @param vgpuTypeId Handle to vGPU type. The vGPU type ID * @param pPlacementList Pointer to the list of vGPU placement structure \a nvmlVgpuPlacementList_t * * @return - * - \ref NVML_SUCCESS Upon success - * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a vgpuTypeId is invalid or \a pPlacementList is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED If \a device or \a vgpuTypeId isn't supported - * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation - * - \ref NVML_ERROR_VERSION_MISMATCH If the version of \a pPlacementList is invalid - * - \ref NVML_ERROR_UNKNOWN On any unexpected error + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device or \a vgpuTypeId is invalid or \a pPlacementList is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device or \a vgpuTypeId isn't supported + * - \ref NVML_ERROR_NO_PERMISSION If user doesn't have permission to perform the operation + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pPlacementList is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuTypeCreatablePlacements(nvmlDevice_t device, nvmlVgpuTypeId_t vgpuTypeId, nvmlVgpuPlacementList_t *pPlacementList); @@ -8257,6 +9729,27 @@ nvmlReturn_t DECLDIR nvmlVgpuTypeGetGspHeapSize(nvmlVgpuTypeId_t vgpuTypeId, uns */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetFbReservation(nvmlVgpuTypeId_t vgpuTypeId, unsigned long long *fbReservation); +/** + * Retrieve the currently used runtime state size of the vGPU instance + * + * This size represents the maximum in-memory data size utilized by a vGPU instance during standard operation. + * This measurement is exclusive of frame buffer (FB) data size assigned to the vGPU instance. + * + * For Maxwell &tm; or newer fully supported devices. + * + * @param vgpuInstance Identifier of the target vGPU instance + * @param pState Pointer to the vGPU runtime state's structure \a nvmlVgpuRuntimeState_t + * + * @return + * - \ref NVML_SUCCESS If information is successfully retrieved + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a vgpuInstance is invalid, or \a pState is NULL + * - \ref NVML_ERROR_NOT_FOUND If \a vgpuInstance does not match a valid active vGPU instance on the system + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pState is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlVgpuInstanceGetRuntimeStateSize(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuRuntimeState_t *pState); + /** * Set the desirable vGPU capability of a device * @@ -8598,6 +10091,22 @@ nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device, nvmlVgpuTy */ nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstancesPerVm(nvmlVgpuTypeId_t vgpuTypeId, unsigned int *vgpuInstanceCountPerVm); +/** + * Retrieve the BAR1 info for given vGPU type. + * + * For Maxwell &tm; or newer fully supported devices. + * + * @param vgpuTypeId Handle to vGPU type + * @param bar1Info Pointer to the vGPU type BAR1 information structure \a nvmlVgpuTypeBar1Info_t + * + * @return + * - \ref NVML_SUCCESS successful completion + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a bar1Info is NULL + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlVgpuTypeGetBAR1Info(nvmlVgpuTypeId_t vgpuTypeId, nvmlVgpuTypeBar1Info_t *bar1Info); + /** * Retrieve the active vGPU instances on a device. * @@ -8970,48 +10479,279 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetGpuInstanceId(nvmlVgpuInstance_t vgpuIns nvmlReturn_t DECLDIR nvmlVgpuInstanceGetGpuPciId(nvmlVgpuInstance_t vgpuInstance, char *vgpuPciId, unsigned int *length); /** -* Retrieve the requested capability for a given vGPU type. Refer to the \a nvmlVgpuCapability_t structure -* for the specific capabilities that can be queried. The return value in \a capResult should be treated as -* a boolean, with a non-zero value indicating that the capability is supported. -* -* For Maxwell &tm; or newer fully supported devices. -* -* @param vgpuTypeId Handle to vGPU type -* @param capability Specifies the \a nvmlVgpuCapability_t to be queried -* @param capResult A boolean for the queried capability indicating that feature is supported -* -* @return -* - \ref NVML_SUCCESS successful completion -* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized -* - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a capability is invalid, or \a capResult is NULL -* - \ref NVML_ERROR_UNKNOWN on any unexpected error -*/ -nvmlReturn_t DECLDIR nvmlVgpuTypeGetCapabilities(nvmlVgpuTypeId_t vgpuTypeId, nvmlVgpuCapability_t capability, unsigned int *capResult); +* Retrieve the requested capability for a given vGPU type. Refer to the \a nvmlVgpuCapability_t structure +* for the specific capabilities that can be queried. The return value in \a capResult should be treated as +* a boolean, with a non-zero value indicating that the capability is supported. +* +* For Maxwell &tm; or newer fully supported devices. +* +* @param vgpuTypeId Handle to vGPU type +* @param capability Specifies the \a nvmlVgpuCapability_t to be queried +* @param capResult A boolean for the queried capability indicating that feature is supported +* +* @return +* - \ref NVML_SUCCESS successful completion +* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized +* - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuTypeId is invalid, or \a capability is invalid, or \a capResult is NULL +* - \ref NVML_ERROR_UNKNOWN on any unexpected error +*/ +nvmlReturn_t DECLDIR nvmlVgpuTypeGetCapabilities(nvmlVgpuTypeId_t vgpuTypeId, nvmlVgpuCapability_t capability, unsigned int *capResult); + +/** + * Retrieve the MDEV UUID of a vGPU instance. + * + * The MDEV UUID is a globally unique identifier of the mdev device assigned to the VM, and is returned as a 5-part hexadecimal string, + * not exceeding 80 characters in length (including the NULL terminator). + * MDEV UUID is displayed only on KVM platform. + * See \ref nvmlConstants::NVML_DEVICE_UUID_BUFFER_SIZE. + * + * For Maxwell &tm; or newer fully supported devices. + * + * @param vgpuInstance Identifier of the target vGPU instance + * @param mdevUuid Pointer to caller-supplied buffer to hold MDEV UUID + * @param size Size of buffer in bytes + * + * @return + * - \ref NVML_SUCCESS successful completion + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_NOT_SUPPORTED on any hypervisor other than KVM + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a mdevUuid is NULL + * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system + * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, char *mdevUuid, unsigned int size); + +/** + * Query the currently creatable vGPU types on a specific GPU Instance. + * + * The function returns an array of vGPU types that can be created for a specified GPU instance. This array is stored + * in a caller-supplied buffer, with the buffer's element count passed through \a pVgpus->vgpuCount. The number of + * vGPU types written to the buffer is indicated by \a pVgpus->vgpuCount. If the buffer is too small to hold the vGPU + * type array, the function returns NVML_ERROR_INSUFFICIENT_SIZE and updates \a pVgpus->vgpuCount with the required + * element count. + * + * To determine the creatable vGPUs for a GPU Instance, invoke this function with \a pVgpus->vgpuCount set to 0 and + * \a pVgpus->vgpuTypeIds as NULL. This will result in NVML_ERROR_INSUFFICIENT_SIZE being returned, along with the + * count value in \a pVgpus->vgpuCount. + * + * The creatable vGPU types may differ over time, as there may be restrictions on what type of vGPUs can concurrently + * run on the device. + * + * @param gpuInstance The GPU instance handle + * @param pVgpus Pointer to the caller-provided structure of nvmlVgpuTypeIdInfo_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pVgpus is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If \a pVgpus->vgpuTypeIds buffer is small + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pVgpus is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetCreatableVgpus(nvmlGpuInstance_t gpuInstance, nvmlVgpuTypeIdInfo_t *pVgpus); + +/** + * Retrieve the maximum number of vGPU instances per GPU instance for given vGPU type + * + * @param pMaxInstance Pointer to the caller-provided structure of nvmlVgpuTypeMaxInstance_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pMaxInstance is NULL or \a pMaxInstance->vgpuTypeId is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or non-MIG vGPU type + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pMaxInstance is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstancesPerGpuInstance(nvmlVgpuTypeMaxInstance_t *pMaxInstance); + +/** + * Retrieve the active vGPU instances within a GPU instance. + * + * An array of active vGPU instances is returned in the caller-supplied buffer pointed + * at by \a pVgpuInstanceInfo->vgpuInstances. The array element count is passed in + * \a pVgpuInstanceInfo->vgpuCount, and \a pVgpuInstanceInfo->vgpuCount is used to return + * the number of vGPU instances written to the buffer. + * + * If the supplied buffer is not large enough to accommodate the vGPU instance array, + * the function returns NVML_ERROR_INSUFFICIENT_SIZE, with the element count of + * nvmlVgpuInstance_t array required in \a pVgpuInstanceInfo->vgpuCount. To query the + * number of active vGPU instances, call this function with pVgpuInstanceInfo->vgpuCount = 0 + * and pVgpuInstanceInfo->vgpuTypeIds = NULL. The code will return NVML_ERROR_INSUFFICIENT_SIZE, + * or NVML_SUCCESS if no vGPU Types are active. + * + * @param gpuInstance The GPU instance handle + * @param pVgpuInstanceInfo Pointer to the vGPU instance information structure \a nvmlActiveVgpuInstanceInfo_t + * + * @return + * - \ref NVML_SUCCESS Successful completion + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pVgpuInstanceInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a pVgpuInstanceInfo->vgpuTypeIds buffer is too small, + * array element count is returned in \a pVgpuInstanceInfo->vgpuCount + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pVgpuInstanceInfo is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetActiveVgpus(nvmlGpuInstance_t gpuInstance, nvmlActiveVgpuInstanceInfo_t *pVgpuInstanceInfo); + +/** + * Set vGPU scheduler state for the given GPU instance + * + * %GB20X_OR_NEWER% + * + * Scheduler state and params will be allowed to set only when no VM is running within the GPU instance. + * In \a nvmlVgpuSchedulerState_t, IFF enableARRMode is enabled then provide the avgFactor and frequency + * as input. If enableARRMode is disabled then provide timeslice as input. + * + * The scheduler state change won't persist across module load/unload and GPU Instance creation/deletion. + * + * @param gpuInstance The GPU instance handle + * @param pScheduler Pointer to the caller-provided structure of nvmlVgpuSchedulerState_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pScheduler is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_RESET_REQUIRED If setting the state failed with fatal error, reboot is required + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or if any vGPU instance exists + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pScheduler is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceSetVgpuSchedulerState(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerState_t *pScheduler); + +/** + * Returns the vGPU scheduler state for the given GPU instance. + * The information returned in \a nvmlVgpuSchedulerStateInfo_t is not relevant if the BEST EFFORT policy is set. + * + * %GB20X_OR_NEWER% + * + * @param gpuInstance The GPU instance handle + * @param pSchedulerStateInfo Reference in which \a pSchedulerStateInfo is returned + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler state is successfully obtained + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerStateInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pSchedulerStateInfo is invalid + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerState(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerStateInfo_t *pSchedulerStateInfo); + +/** + * Returns the vGPU scheduler logs for the given GPU instance. + * \a pSchedulerLogInfo points to a caller-allocated structure to contain the logs. The number of elements returned will + * never exceed \a NVML_SCHEDULER_SW_MAX_LOG_ENTRIES. + * + * To get the entire logs, call the function atleast 5 times a second. + * + * %GB20X_OR_NEWER% + * + * @param gpuInstance The GPU instance handle + * @param pSchedulerLogInfo Reference in which \a pSchedulerLogInfo is written + * + * @return + * - \ref NVML_SUCCESS vGPU scheduler logs are successfully obtained + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pSchedulerLogInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pSchedulerLogInfo is invalid + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerLog(nvmlGpuInstance_t gpuInstance, nvmlVgpuSchedulerLogInfo_t *pSchedulerLogInfo); + +/** + * Query the creatable vGPU placement ID of the vGPU type within a GPU instance. + * + * %GB20X_OR_NEWER% + * + * An array of creatable vGPU placement IDs for the vGPU type ID indicated by \a pCreatablePlacementInfo->vgpuTypeId + * is returned in the caller-supplied buffer of \a pCreatablePlacementInfo->placementIds. Memory needed for the + * placementIds array should be allocated based on maximum instances of a vGPU type per GPU instance which can be + * queried via \ref nvmlVgpuTypeGetMaxInstancesPerGpuInstance(). + * If the provided count by the caller is insufficient, the function will return NVML_ERROR_INSUFFICIENT_SIZE along with + * the number of required entries in \a pCreatablePlacementInfo->count. The caller should then reallocate a buffer with the size + * of pCreatablePlacementInfo->count * sizeof(pCreatablePlacementInfo->placementIds) and invoke the function again. + * The creatable vGPU placement IDs may differ over time, as there may be restrictions on what type of vGPU the + * vGPU instance is running. + * + * @param gpuInstance The GPU instance handle + * @param pCreatablePlacementInfo Pointer to the list of vGPU creatable placement structure \a nvmlVgpuCreatablePlacementInfo_t + * + * @return + * - \ref NVML_SUCCESS Successful completion + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pCreatablePlacementInfo is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If the buffer is small, element count is returned in \a pCreatablePlacementInfo->count + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pCreatablePlacementInfo is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or vGPU heterogeneous mode is not enabled + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuTypeCreatablePlacements(nvmlGpuInstance_t gpuInstance, nvmlVgpuCreatablePlacementInfo_t *pCreatablePlacementInfo); + +/** + * Get the vGPU heterogeneous mode for the GPU instance. + * + * When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes. + * + * On successful return, the function returns \a pHeterogeneousMode->mode with the current vGPU heterogeneous mode. + * \a pHeterogeneousMode->version is the version number of the structure nvmlVgpuHeterogeneousMode_t, the caller should + * set the correct version number to retrieve the vGPU heterogeneous mode. + * \a pHeterogeneousMode->mode can either be \ref NVML_FEATURE_ENABLED or \ref NVML_FEATURE_DISABLED. + * + * %GB20X_OR_NEWER% + * + * @param gpuInstance The GPU instance handle + * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t + * + * @return + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, or \a pHeterogeneousMode is NULL + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU or not in MIG mode + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuHeterogeneousMode(nvmlGpuInstance_t gpuInstance, nvmlVgpuHeterogeneousMode_t *pHeterogeneousMode); /** - * Retrieve the MDEV UUID of a vGPU instance. + * Enable or disable vGPU heterogeneous mode for the GPU instance. * - * The MDEV UUID is a globally unique identifier of the mdev device assigned to the VM, and is returned as a 5-part hexadecimal string, - * not exceeding 80 characters in length (including the NULL terminator). - * MDEV UUID is displayed only on KVM platform. - * See \ref nvmlConstants::NVML_DEVICE_UUID_BUFFER_SIZE. + * When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes. * - * For Maxwell &tm; or newer fully supported devices. + * API would return an appropriate error code upon unsuccessful activation. For example, the heterogeneous mode + * set will fail with error \ref NVML_ERROR_IN_USE if any vGPU instance is active within the GPU instance. + * The caller of this API is expected to shutdown the vGPU VMs and retry setting the \a mode. + * On successful return, the function updates the vGPU heterogeneous mode with the user provided \a pHeterogeneousMode->mode. + * \a pHeterogeneousMode->version is the version number of the structure nvmlVgpuHeterogeneousMode_t, the caller should + * set the correct version number to set the vGPU heterogeneous mode. * - * @param vgpuInstance Identifier of the target vGPU instance - * @param mdevUuid Pointer to caller-supplied buffer to hold MDEV UUID - * @param size Size of buffer in bytes + * %GB20X_OR_NEWER% + * + * @param gpuInstance The GPU instance handle + * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t * * @return - * - \ref NVML_SUCCESS successful completion - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_NOT_SUPPORTED on any hypervisor other than KVM - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a mdevUuid is NULL - * - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system - * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a size is too small - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS Upon success + * - \ref NVML_ERROR_UNINITIALIZED If library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a gpuInstance is NULL or invalid, + * or \a pHeterogeneousMode is NULL or \a pHeterogeneousMode->mode is invalid + * or GPU Instance Id is invalid + * - \ref NVML_ERROR_IN_USE If the \a gpuInstance is in use + * - \ref NVML_ERROR_NOT_SUPPORTED If not on a vGPU host or an unsupported GPU + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a pHeterogeneousMode is invalid + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ -nvmlReturn_t DECLDIR nvmlVgpuInstanceGetMdevUUID(nvmlVgpuInstance_t vgpuInstance, char *mdevUuid, unsigned int size); +nvmlReturn_t DECLDIR nvmlGpuInstanceSetVgpuHeterogeneousMode(nvmlGpuInstance_t gpuInstance, const nvmlVgpuHeterogeneousMode_t *pHeterogeneousMode); /** @} */ @@ -9166,8 +10906,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpu * * @return * - \ref NVML_SUCCESS vGPU metadata structure was successfully returned - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuMetadata or \a pgpuMetadata or \a bufferSize are NULL - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a vgpuMetadata or \a pgpuMetadata or \a bufferSize are NULL + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, nvmlVgpuPgpuMetadata_t *pgpuMetadata, nvmlVgpuPgpuCompatibility_t *compatibilityInfo); @@ -9185,9 +10925,9 @@ nvmlReturn_t DECLDIR nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, * @return * - \ref NVML_SUCCESS GPU metadata structure was successfully returned * - \ref NVML_ERROR_INSUFFICIENT_SIZE \a pgpuMetadata buffer is too small, required size is returned in \a bufferSize - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a bufferSize is NULL or \a device is invalid; if \a pgpuMetadata is NULL and the value of \a bufferSize is not 0. - * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the system - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a bufferSize is NULL or \a device is invalid; if \a pgpuMetadata is NULL and the value of \a bufferSize is not 0. + * - \ref NVML_ERROR_NOT_SUPPORTED If vGPU is not supported by the system + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char *pgpuMetadata, unsigned int *bufferSize); @@ -9205,9 +10945,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPgpuMetadataString(nvmlDevice_t device, char * * * @return * - \ref NVML_SUCCESS vGPU scheduler logs were successfully obtained - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pSchedulerLog is NULL or \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerLog is NULL or \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerLog(nvmlDevice_t device, nvmlVgpuSchedulerLog_t *pSchedulerLog); @@ -9222,9 +10962,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerLog(nvmlDevice_t device, nvmlVgpu * * @return * - \ref NVML_SUCCESS vGPU scheduler state is successfully obtained - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pSchedulerState is NULL or \a device is invalid - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerState is NULL or \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerState(nvmlDevice_t device, nvmlVgpuSchedulerGetState_t *pSchedulerState); @@ -9245,9 +10985,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerState(nvmlDevice_t device, nvmlVg * * @return * - \ref NVML_SUCCESS vGPU scheduler capabilities were successfully obtained - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pCapabilities is NULL or \a device is invalid + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pCapabilities is NULL or \a device is invalid * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerCapabilities(nvmlDevice_t device, nvmlVgpuSchedulerCapabilities_t *pCapabilities); @@ -9267,12 +11007,12 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuSchedulerCapabilities(nvmlDevice_t device, * * @return * - \ref NVML_SUCCESS vGPU scheduler state has been successfully set - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pSchedulerState is NULL or \a device is invalid - * - \ref NVML_ERROR_RESET_REQUIRED if setting \a pSchedulerState failed with fatal error, + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a pSchedulerState is NULL or \a device is invalid + * - \ref NVML_ERROR_RESET_REQUIRED If setting \a pSchedulerState failed with fatal error, * reboot is required to overcome from this error. - * - \ref NVML_ERROR_NOT_SUPPORTED The API is not supported in current state or \a device not in vGPU host mode + * - \ref NVML_ERROR_NOT_SUPPORTED If MIG is enabled or \a device not in vGPU host mode * or if any vGPU instance currently exists on the \a device - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetVgpuSchedulerState(nvmlDevice_t device, nvmlVgpuSchedulerSetState_t *pSchedulerState); @@ -9424,17 +11164,17 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuUtilization(nvmlDevice_t device, unsigned * @param vgpuUtilInfo Pointer to the caller-provided structure of nvmlVgpuInstancesUtilizationInfo_t * @return - * - \ref NVML_SUCCESS if utilization samples are successfully retrieved - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a vgpuUtilInfo is NULL, or \a vgpuUtilInfo->vgpuInstanceCount is 0 - * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_VERSION_MISMATCH if the version of \a vgpuUtilInfo is invalid - * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a vgpuUtilInfo->vgpuUtilArray is NULL, or the buffer size of vgpuUtilInfo->vgpuInstanceCount is too small. - * The caller should check the current vGPU instance count from the returned vgpuUtilInfo->vgpuInstanceCount, and call - * the function again with a buffer of size vgpuUtilInfo->vgpuInstanceCount * sizeof(nvmlVgpuInstanceUtilizationInfo_t) - * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS If utilization samples are successfully retrieved + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid, \a vgpuUtilInfo is NULL, or \a vgpuUtilInfo->vgpuInstanceCount is 0 + * - \ref NVML_ERROR_NOT_SUPPORTED If vGPU is not supported by the device + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a vgpuUtilInfo is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If \a vgpuUtilInfo->vgpuUtilArray is NULL, or the buffer size of vgpuUtilInfo->vgpuInstanceCount is too small. + * The caller should check the current vGPU instance count from the returned vgpuUtilInfo->vgpuInstanceCount, and call + * the function again with a buffer of size vgpuUtilInfo->vgpuInstanceCount * sizeof(nvmlVgpuInstanceUtilizationInfo_t) + * - \ref NVML_ERROR_NOT_FOUND If sample entries are not found + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuInstancesUtilizationInfo(nvmlDevice_t device, nvmlVgpuInstancesUtilizationInfo_t *vgpuUtilInfo); @@ -9515,19 +11255,19 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuProcessUtilization(nvmlDevice_t device, un * @param vgpuProcUtilInfo Pointer to the caller-provided structure of nvmlVgpuProcessesUtilizationInfo_t * @return - * - \ref NVML_SUCCESS if utilization samples are successfully retrieved - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a vgpuProcUtilInfo is null - * - \ref NVML_ERROR_VERSION_MISMATCH if the version of \a vgpuProcUtilInfo is invalid - * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a vgpuProcUtilInfo->vgpuProcUtilArray is null, or supplied \a vgpuProcUtilInfo->vgpuProcessCount - * is too small to return samples for all processes on vGPU instances currently executing on the device. - * The caller should check the current processes count from the returned \a vgpuProcUtilInfo->vgpuProcessCount, - * and call the function again with a buffer of size - * vgpuProcUtilInfo->vgpuProcessCount * sizeof(nvmlVgpuProcessUtilizationSample_t) - * - \ref NVML_ERROR_NOT_SUPPORTED if vGPU is not supported by the device - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS If utilization samples are successfully retrieved + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid, or \a vgpuProcUtilInfo is null + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the version of \a vgpuProcUtilInfo is invalid + * - \ref NVML_ERROR_INSUFFICIENT_SIZE If \a vgpuProcUtilInfo->vgpuProcUtilArray is null, or supplied \a vgpuProcUtilInfo->vgpuProcessCount + * is too small to return samples for all processes on vGPU instances currently executing on the device. + * The caller should check the current processes count from the returned \a vgpuProcUtilInfo->vgpuProcessCount, + * and call the function again with a buffer of size + * vgpuProcUtilInfo->vgpuProcessCount * sizeof(nvmlVgpuProcessUtilizationSample_t) + * - \ref NVML_ERROR_NOT_SUPPORTED If vGPU is not supported by the device + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_NOT_FOUND If sample entries are not found + * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuProcessesUtilizationInfo(nvmlDevice_t device, nvmlVgpuProcessesUtilizationInfo_t *vgpuProcUtilInfo); @@ -9737,10 +11477,31 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx #define NVML_GPU_INSTANCE_PROFILE_7_SLICE 0x4 #define NVML_GPU_INSTANCE_PROFILE_8_SLICE 0x5 #define NVML_GPU_INSTANCE_PROFILE_6_SLICE 0x6 +// 1_SLICE profile with at least one (if supported at all) of Decoder, Encoder, JPEG, OFA engines. #define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 0x7 +// 2_SLICE profile with at least one (if supported at all) of Decoder, Encoder, JPEG, OFA engines. #define NVML_GPU_INSTANCE_PROFILE_2_SLICE_REV1 0x8 +// 1_SLICE profile with twice the amount of memory resources. #define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2 0x9 -#define NVML_GPU_INSTANCE_PROFILE_COUNT 0xA +// 1_SLICE gfx capable profile +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX 0x0A +// 2_SLICE gfx capable profile +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX 0x0B +// 4_SLICE gfx capable profile +#define NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX 0x0C +// 1_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_NO_ME 0x0D +// 2_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_NO_ME 0x0E +// 1_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. +// Allocation of instance of this profile prevents allocation of +// all but _NO_ME profiles. +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME 0x0F +// 2_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. +// Allocation of instance of this profile prevents allocation of +// all but _NO_ME profiles. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME 0x10 +#define NVML_GPU_INSTANCE_PROFILE_COUNT 0x11 /** * MIG GPU instance profile capability. @@ -9748,7 +11509,9 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx * Bit field values representing MIG profile capabilities * \ref nvmlGpuInstanceProfileInfo_v3_t.capabilities */ -#define NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P 0x1 +#define NVML_GPU_INSTANCE_PROFILE_CAPS_P2P 0x1 +#define NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P 0x1 //!< Deprecated, do not use +#define NVML_GPU_INSTANCE_PROFILE_CAPS_GFX 0x2 /** * MIG compute instance profile capability. @@ -9756,7 +11519,7 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx * Bit field values representing MIG profile capabilities * \ref nvmlComputeInstanceProfileInfo_v3_t.capabilities */ -/* No capabilities for compute profiles currently exposed */ +#define NVML_COMPUTE_INSTANCE_PROFILE_CAPS_GFX 0x1 typedef struct nvmlGpuInstancePlacement_st { @@ -9848,11 +11611,6 @@ typedef struct nvmlGpuInstanceInfo_st nvmlGpuInstancePlacement_t placement; //!< Placement for this instance } nvmlGpuInstanceInfo_t; -typedef struct -{ - struct nvmlGpuInstance_st* handle; -} nvmlGpuInstance_t; - /** * Compute instance profiles. * @@ -10025,6 +11783,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMigMode(nvmlDevice_t device, unsigned int *cur * * Information provided by this API is immutable throughout the lifetime of a MIG mode. * + * @note This API can be used to enumerate all MIG profiles supported by NVML in a forward compatible + * way by invoking it on \a profile values starting from 0, until the API returns \ref NVML_ERROR_INVALID_ARGUMENT. + * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * @@ -10264,6 +12025,9 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceGetInfo(nvmlGpuInstance_t gpuInstance, nvmlG * * Information provided by this API is immutable throughout the lifetime of a MIG mode. * + * @note This API can be used to enumerate all MIG profiles supported by NVML in a forward compatible + * way by invoking it on \a profile values starting from 0, until the API returns \ref NVML_ERROR_INVALID_ARGUMENT. + * * For Ampere &tm; or newer fully supported devices. * Supported on Linux only. * @@ -10657,76 +12421,188 @@ nvmlReturn_t DECLDIR nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t m */ typedef enum { - NVML_GPM_METRIC_GRAPHICS_UTIL = 1, //!< Percentage of time any compute/graphics app was active on the GPU. 0.0 - 100.0 - NVML_GPM_METRIC_SM_UTIL = 2, //!< Percentage of SMs that were busy. 0.0 - 100.0 - NVML_GPM_METRIC_SM_OCCUPANCY = 3, //!< Percentage of warps that were active vs theoretical maximum. 0.0 - 100.0 - NVML_GPM_METRIC_INTEGER_UTIL = 4, //!< Percentage of time the GPU's SMs were doing integer operations. 0.0 - 100.0 - NVML_GPM_METRIC_ANY_TENSOR_UTIL = 5, //!< Percentage of time the GPU's SMs were doing ANY tensor operations. 0.0 - 100.0 - NVML_GPM_METRIC_DFMA_TENSOR_UTIL = 6, //!< Percentage of time the GPU's SMs were doing DFMA tensor operations. 0.0 - 100.0 - NVML_GPM_METRIC_HMMA_TENSOR_UTIL = 7, //!< Percentage of time the GPU's SMs were doing HMMA tensor operations. 0.0 - 100.0 - NVML_GPM_METRIC_IMMA_TENSOR_UTIL = 9, //!< Percentage of time the GPU's SMs were doing IMMA tensor operations. 0.0 - 100.0 - NVML_GPM_METRIC_DRAM_BW_UTIL = 10, //!< Percentage of DRAM bw used vs theoretical maximum. 0.0 - 100.0 */ - NVML_GPM_METRIC_FP64_UTIL = 11, //!< Percentage of time the GPU's SMs were doing non-tensor FP64 math. 0.0 - 100.0 - NVML_GPM_METRIC_FP32_UTIL = 12, //!< Percentage of time the GPU's SMs were doing non-tensor FP32 math. 0.0 - 100.0 - NVML_GPM_METRIC_FP16_UTIL = 13, //!< Percentage of time the GPU's SMs were doing non-tensor FP16 math. 0.0 - 100.0 - NVML_GPM_METRIC_PCIE_TX_PER_SEC = 20, //!< PCIe traffic from this GPU in MiB/sec - NVML_GPM_METRIC_PCIE_RX_PER_SEC = 21, //!< PCIe traffic to this GPU in MiB/sec - NVML_GPM_METRIC_NVDEC_0_UTIL = 30, //!< Percent utilization of NVDEC 0. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_1_UTIL = 31, //!< Percent utilization of NVDEC 1. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_2_UTIL = 32, //!< Percent utilization of NVDEC 2. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_3_UTIL = 33, //!< Percent utilization of NVDEC 3. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_4_UTIL = 34, //!< Percent utilization of NVDEC 4. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_5_UTIL = 35, //!< Percent utilization of NVDEC 5. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_6_UTIL = 36, //!< Percent utilization of NVDEC 6. 0.0 - 100.0 - NVML_GPM_METRIC_NVDEC_7_UTIL = 37, //!< Percent utilization of NVDEC 7. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_0_UTIL = 40, //!< Percent utilization of NVJPG 0. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_1_UTIL = 41, //!< Percent utilization of NVJPG 1. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_2_UTIL = 42, //!< Percent utilization of NVJPG 2. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_3_UTIL = 43, //!< Percent utilization of NVJPG 3. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_4_UTIL = 44, //!< Percent utilization of NVJPG 4. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_5_UTIL = 45, //!< Percent utilization of NVJPG 5. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_6_UTIL = 46, //!< Percent utilization of NVJPG 6. 0.0 - 100.0 - NVML_GPM_METRIC_NVJPG_7_UTIL = 47, //!< Percent utilization of NVJPG 7. 0.0 - 100.0 - NVML_GPM_METRIC_NVOFA_0_UTIL = 50, //!< Percent utilization of NVOFA 0. 0.0 - 100.0 - NVML_GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC = 60, //!< NvLink read bandwidth for all links in MiB/sec - NVML_GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC = 61, //!< NvLink write bandwidth for all links in MiB/sec - NVML_GPM_METRIC_NVLINK_L0_RX_PER_SEC = 62, //!< NvLink read bandwidth for link 0 in MiB/sec - NVML_GPM_METRIC_NVLINK_L0_TX_PER_SEC = 63, //!< NvLink write bandwidth for link 0 in MiB/sec - NVML_GPM_METRIC_NVLINK_L1_RX_PER_SEC = 64, //!< NvLink read bandwidth for link 1 in MiB/sec - NVML_GPM_METRIC_NVLINK_L1_TX_PER_SEC = 65, //!< NvLink write bandwidth for link 1 in MiB/sec - NVML_GPM_METRIC_NVLINK_L2_RX_PER_SEC = 66, //!< NvLink read bandwidth for link 2 in MiB/sec - NVML_GPM_METRIC_NVLINK_L2_TX_PER_SEC = 67, //!< NvLink write bandwidth for link 2 in MiB/sec - NVML_GPM_METRIC_NVLINK_L3_RX_PER_SEC = 68, //!< NvLink read bandwidth for link 3 in MiB/sec - NVML_GPM_METRIC_NVLINK_L3_TX_PER_SEC = 69, //!< NvLink write bandwidth for link 3 in MiB/sec - NVML_GPM_METRIC_NVLINK_L4_RX_PER_SEC = 70, //!< NvLink read bandwidth for link 4 in MiB/sec - NVML_GPM_METRIC_NVLINK_L4_TX_PER_SEC = 71, //!< NvLink write bandwidth for link 4 in MiB/sec - NVML_GPM_METRIC_NVLINK_L5_RX_PER_SEC = 72, //!< NvLink read bandwidth for link 5 in MiB/sec - NVML_GPM_METRIC_NVLINK_L5_TX_PER_SEC = 73, //!< NvLink write bandwidth for link 5 in MiB/sec - NVML_GPM_METRIC_NVLINK_L6_RX_PER_SEC = 74, //!< NvLink read bandwidth for link 6 in MiB/sec - NVML_GPM_METRIC_NVLINK_L6_TX_PER_SEC = 75, //!< NvLink write bandwidth for link 6 in MiB/sec - NVML_GPM_METRIC_NVLINK_L7_RX_PER_SEC = 76, //!< NvLink read bandwidth for link 7 in MiB/sec - NVML_GPM_METRIC_NVLINK_L7_TX_PER_SEC = 77, //!< NvLink write bandwidth for link 7 in MiB/sec - NVML_GPM_METRIC_NVLINK_L8_RX_PER_SEC = 78, //!< NvLink read bandwidth for link 8 in MiB/sec - NVML_GPM_METRIC_NVLINK_L8_TX_PER_SEC = 79, //!< NvLink write bandwidth for link 8 in MiB/sec - NVML_GPM_METRIC_NVLINK_L9_RX_PER_SEC = 80, //!< NvLink read bandwidth for link 9 in MiB/sec - NVML_GPM_METRIC_NVLINK_L9_TX_PER_SEC = 81, //!< NvLink write bandwidth for link 9 in MiB/sec - NVML_GPM_METRIC_NVLINK_L10_RX_PER_SEC = 82, //!< NvLink read bandwidth for link 10 in MiB/sec - NVML_GPM_METRIC_NVLINK_L10_TX_PER_SEC = 83, //!< NvLink write bandwidth for link 10 in MiB/sec - NVML_GPM_METRIC_NVLINK_L11_RX_PER_SEC = 84, //!< NvLink read bandwidth for link 11 in MiB/sec - NVML_GPM_METRIC_NVLINK_L11_TX_PER_SEC = 85, //!< NvLink write bandwidth for link 11 in MiB/sec - NVML_GPM_METRIC_NVLINK_L12_RX_PER_SEC = 86, //!< NvLink read bandwidth for link 12 in MiB/sec - NVML_GPM_METRIC_NVLINK_L12_TX_PER_SEC = 87, //!< NvLink write bandwidth for link 12 in MiB/sec - NVML_GPM_METRIC_NVLINK_L13_RX_PER_SEC = 88, //!< NvLink read bandwidth for link 13 in MiB/sec - NVML_GPM_METRIC_NVLINK_L13_TX_PER_SEC = 89, //!< NvLink write bandwidth for link 13 in MiB/sec - NVML_GPM_METRIC_NVLINK_L14_RX_PER_SEC = 90, //!< NvLink read bandwidth for link 14 in MiB/sec - NVML_GPM_METRIC_NVLINK_L14_TX_PER_SEC = 91, //!< NvLink write bandwidth for link 14 in MiB/sec - NVML_GPM_METRIC_NVLINK_L15_RX_PER_SEC = 92, //!< NvLink read bandwidth for link 15 in MiB/sec - NVML_GPM_METRIC_NVLINK_L15_TX_PER_SEC = 93, //!< NvLink write bandwidth for link 15 in MiB/sec - NVML_GPM_METRIC_NVLINK_L16_RX_PER_SEC = 94, //!< NvLink read bandwidth for link 16 in MiB/sec - NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC = 95, //!< NvLink write bandwidth for link 16 in MiB/sec - NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC = 96, //!< NvLink read bandwidth for link 17 in MiB/sec - NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC = 97, //!< NvLink write bandwidth for link 17 in MiB/sec - NVML_GPM_METRIC_MAX = 98, //!< Maximum value above +1. Note that changing this should also change NVML_GPM_METRICS_GET_VERSION due to struct size change + NVML_GPM_METRIC_GRAPHICS_UTIL = 1, //!< Percentage of time any compute/graphics app was active on the GPU. 0.0 - 100.0 + NVML_GPM_METRIC_SM_UTIL = 2, //!< Percentage of SMs that were busy. 0.0 - 100.0 + NVML_GPM_METRIC_SM_OCCUPANCY = 3, //!< Percentage of warps that were active vs theoretical maximum. 0.0 - 100.0 + NVML_GPM_METRIC_INTEGER_UTIL = 4, //!< Percentage of time the GPU's SMs were doing integer operations. 0.0 - 100.0 + NVML_GPM_METRIC_ANY_TENSOR_UTIL = 5, //!< Percentage of time the GPU's SMs were doing ANY tensor operations. 0.0 - 100.0 + NVML_GPM_METRIC_DFMA_TENSOR_UTIL = 6, //!< Percentage of time the GPU's SMs were doing DFMA tensor operations. 0.0 - 100.0 + NVML_GPM_METRIC_HMMA_TENSOR_UTIL = 7, //!< Percentage of time the GPU's SMs were doing HMMA tensor operations. 0.0 - 100.0 + NVML_GPM_METRIC_IMMA_TENSOR_UTIL = 9, //!< Percentage of time the GPU's SMs were doing IMMA tensor operations. 0.0 - 100.0 + NVML_GPM_METRIC_DRAM_BW_UTIL = 10, //!< Percentage of DRAM bw used vs theoretical maximum. 0.0 - 100.0 */ + NVML_GPM_METRIC_FP64_UTIL = 11, //!< Percentage of time the GPU's SMs were doing non-tensor FP64 math. 0.0 - 100.0 + NVML_GPM_METRIC_FP32_UTIL = 12, //!< Percentage of time the GPU's SMs were doing non-tensor FP32 math. 0.0 - 100.0 + NVML_GPM_METRIC_FP16_UTIL = 13, //!< Percentage of time the GPU's SMs were doing non-tensor FP16 math. 0.0 - 100.0 + NVML_GPM_METRIC_PCIE_TX_PER_SEC = 20, //!< PCIe traffic from this GPU in MiB/sec + NVML_GPM_METRIC_PCIE_RX_PER_SEC = 21, //!< PCIe traffic to this GPU in MiB/sec + NVML_GPM_METRIC_NVDEC_0_UTIL = 30, //!< Percent utilization of NVDEC 0. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_1_UTIL = 31, //!< Percent utilization of NVDEC 1. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_2_UTIL = 32, //!< Percent utilization of NVDEC 2. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_3_UTIL = 33, //!< Percent utilization of NVDEC 3. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_4_UTIL = 34, //!< Percent utilization of NVDEC 4. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_5_UTIL = 35, //!< Percent utilization of NVDEC 5. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_6_UTIL = 36, //!< Percent utilization of NVDEC 6. 0.0 - 100.0 + NVML_GPM_METRIC_NVDEC_7_UTIL = 37, //!< Percent utilization of NVDEC 7. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_0_UTIL = 40, //!< Percent utilization of NVJPG 0. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_1_UTIL = 41, //!< Percent utilization of NVJPG 1. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_2_UTIL = 42, //!< Percent utilization of NVJPG 2. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_3_UTIL = 43, //!< Percent utilization of NVJPG 3. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_4_UTIL = 44, //!< Percent utilization of NVJPG 4. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_5_UTIL = 45, //!< Percent utilization of NVJPG 5. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_6_UTIL = 46, //!< Percent utilization of NVJPG 6. 0.0 - 100.0 + NVML_GPM_METRIC_NVJPG_7_UTIL = 47, //!< Percent utilization of NVJPG 7. 0.0 - 100.0 + NVML_GPM_METRIC_NVOFA_0_UTIL = 50, //!< Percent utilization of NVOFA 0. 0.0 - 100.0 + NVML_GPM_METRIC_NVOFA_1_UTIL = 51, //!< Percent utilization of NVOFA 1. 0.0 - 100.0 + NVML_GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC = 60, //!< NvLink read bandwidth for all links in MiB/sec + NVML_GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC = 61, //!< NvLink write bandwidth for all links in MiB/sec + NVML_GPM_METRIC_NVLINK_L0_RX_PER_SEC = 62, //!< NvLink read bandwidth for link 0 in MiB/sec + NVML_GPM_METRIC_NVLINK_L0_TX_PER_SEC = 63, //!< NvLink write bandwidth for link 0 in MiB/sec + NVML_GPM_METRIC_NVLINK_L1_RX_PER_SEC = 64, //!< NvLink read bandwidth for link 1 in MiB/sec + NVML_GPM_METRIC_NVLINK_L1_TX_PER_SEC = 65, //!< NvLink write bandwidth for link 1 in MiB/sec + NVML_GPM_METRIC_NVLINK_L2_RX_PER_SEC = 66, //!< NvLink read bandwidth for link 2 in MiB/sec + NVML_GPM_METRIC_NVLINK_L2_TX_PER_SEC = 67, //!< NvLink write bandwidth for link 2 in MiB/sec + NVML_GPM_METRIC_NVLINK_L3_RX_PER_SEC = 68, //!< NvLink read bandwidth for link 3 in MiB/sec + NVML_GPM_METRIC_NVLINK_L3_TX_PER_SEC = 69, //!< NvLink write bandwidth for link 3 in MiB/sec + NVML_GPM_METRIC_NVLINK_L4_RX_PER_SEC = 70, //!< NvLink read bandwidth for link 4 in MiB/sec + NVML_GPM_METRIC_NVLINK_L4_TX_PER_SEC = 71, //!< NvLink write bandwidth for link 4 in MiB/sec + NVML_GPM_METRIC_NVLINK_L5_RX_PER_SEC = 72, //!< NvLink read bandwidth for link 5 in MiB/sec + NVML_GPM_METRIC_NVLINK_L5_TX_PER_SEC = 73, //!< NvLink write bandwidth for link 5 in MiB/sec + NVML_GPM_METRIC_NVLINK_L6_RX_PER_SEC = 74, //!< NvLink read bandwidth for link 6 in MiB/sec + NVML_GPM_METRIC_NVLINK_L6_TX_PER_SEC = 75, //!< NvLink write bandwidth for link 6 in MiB/sec + NVML_GPM_METRIC_NVLINK_L7_RX_PER_SEC = 76, //!< NvLink read bandwidth for link 7 in MiB/sec + NVML_GPM_METRIC_NVLINK_L7_TX_PER_SEC = 77, //!< NvLink write bandwidth for link 7 in MiB/sec + NVML_GPM_METRIC_NVLINK_L8_RX_PER_SEC = 78, //!< NvLink read bandwidth for link 8 in MiB/sec + NVML_GPM_METRIC_NVLINK_L8_TX_PER_SEC = 79, //!< NvLink write bandwidth for link 8 in MiB/sec + NVML_GPM_METRIC_NVLINK_L9_RX_PER_SEC = 80, //!< NvLink read bandwidth for link 9 in MiB/sec + NVML_GPM_METRIC_NVLINK_L9_TX_PER_SEC = 81, //!< NvLink write bandwidth for link 9 in MiB/sec + NVML_GPM_METRIC_NVLINK_L10_RX_PER_SEC = 82, //!< NvLink read bandwidth for link 10 in MiB/sec + NVML_GPM_METRIC_NVLINK_L10_TX_PER_SEC = 83, //!< NvLink write bandwidth for link 10 in MiB/sec + NVML_GPM_METRIC_NVLINK_L11_RX_PER_SEC = 84, //!< NvLink read bandwidth for link 11 in MiB/sec + NVML_GPM_METRIC_NVLINK_L11_TX_PER_SEC = 85, //!< NvLink write bandwidth for link 11 in MiB/sec + NVML_GPM_METRIC_NVLINK_L12_RX_PER_SEC = 86, //!< NvLink read bandwidth for link 12 in MiB/sec + NVML_GPM_METRIC_NVLINK_L12_TX_PER_SEC = 87, //!< NvLink write bandwidth for link 12 in MiB/sec + NVML_GPM_METRIC_NVLINK_L13_RX_PER_SEC = 88, //!< NvLink read bandwidth for link 13 in MiB/sec + NVML_GPM_METRIC_NVLINK_L13_TX_PER_SEC = 89, //!< NvLink write bandwidth for link 13 in MiB/sec + NVML_GPM_METRIC_NVLINK_L14_RX_PER_SEC = 90, //!< NvLink read bandwidth for link 14 in MiB/sec + NVML_GPM_METRIC_NVLINK_L14_TX_PER_SEC = 91, //!< NvLink write bandwidth for link 14 in MiB/sec + NVML_GPM_METRIC_NVLINK_L15_RX_PER_SEC = 92, //!< NvLink read bandwidth for link 15 in MiB/sec + NVML_GPM_METRIC_NVLINK_L15_TX_PER_SEC = 93, //!< NvLink write bandwidth for link 15 in MiB/sec + NVML_GPM_METRIC_NVLINK_L16_RX_PER_SEC = 94, //!< NvLink read bandwidth for link 16 in MiB/sec + NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC = 95, //!< NvLink write bandwidth for link 16 in MiB/sec + NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC = 96, //!< NvLink read bandwidth for link 17 in MiB/sec + NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC = 97, //!< NvLink write bandwidth for link 17 in MiB/sec + //Put new metrics for BLACKWELL here... + NVML_GPM_METRIC_C2C_TOTAL_TX_PER_SEC = 100, + NVML_GPM_METRIC_C2C_TOTAL_RX_PER_SEC = 101, + NVML_GPM_METRIC_C2C_DATA_TX_PER_SEC = 102, + NVML_GPM_METRIC_C2C_DATA_RX_PER_SEC = 103, + NVML_GPM_METRIC_C2C_LINK0_TOTAL_TX_PER_SEC = 104, + NVML_GPM_METRIC_C2C_LINK0_TOTAL_RX_PER_SEC = 105, + NVML_GPM_METRIC_C2C_LINK0_DATA_TX_PER_SEC = 106, + NVML_GPM_METRIC_C2C_LINK0_DATA_RX_PER_SEC = 107, + NVML_GPM_METRIC_C2C_LINK1_TOTAL_TX_PER_SEC = 108, + NVML_GPM_METRIC_C2C_LINK1_TOTAL_RX_PER_SEC = 109, + NVML_GPM_METRIC_C2C_LINK1_DATA_TX_PER_SEC = 110, + NVML_GPM_METRIC_C2C_LINK1_DATA_RX_PER_SEC = 111, + NVML_GPM_METRIC_C2C_LINK2_TOTAL_TX_PER_SEC = 112, + NVML_GPM_METRIC_C2C_LINK2_TOTAL_RX_PER_SEC = 113, + NVML_GPM_METRIC_C2C_LINK2_DATA_TX_PER_SEC = 114, + NVML_GPM_METRIC_C2C_LINK2_DATA_RX_PER_SEC = 115, + NVML_GPM_METRIC_C2C_LINK3_TOTAL_TX_PER_SEC = 116, + NVML_GPM_METRIC_C2C_LINK3_TOTAL_RX_PER_SEC = 117, + NVML_GPM_METRIC_C2C_LINK3_DATA_TX_PER_SEC = 118, + NVML_GPM_METRIC_C2C_LINK3_DATA_RX_PER_SEC = 119, + NVML_GPM_METRIC_C2C_LINK4_TOTAL_TX_PER_SEC = 120, + NVML_GPM_METRIC_C2C_LINK4_TOTAL_RX_PER_SEC = 121, + NVML_GPM_METRIC_C2C_LINK4_DATA_TX_PER_SEC = 122, + NVML_GPM_METRIC_C2C_LINK4_DATA_RX_PER_SEC = 123, + NVML_GPM_METRIC_C2C_LINK5_TOTAL_TX_PER_SEC = 124, + NVML_GPM_METRIC_C2C_LINK5_TOTAL_RX_PER_SEC = 125, + NVML_GPM_METRIC_C2C_LINK5_DATA_TX_PER_SEC = 126, + NVML_GPM_METRIC_C2C_LINK5_DATA_RX_PER_SEC = 127, + NVML_GPM_METRIC_C2C_LINK6_TOTAL_TX_PER_SEC = 128, + NVML_GPM_METRIC_C2C_LINK6_TOTAL_RX_PER_SEC = 129, + NVML_GPM_METRIC_C2C_LINK6_DATA_TX_PER_SEC = 130, + NVML_GPM_METRIC_C2C_LINK6_DATA_RX_PER_SEC = 131, + NVML_GPM_METRIC_C2C_LINK7_TOTAL_TX_PER_SEC = 132, + NVML_GPM_METRIC_C2C_LINK7_TOTAL_RX_PER_SEC = 133, + NVML_GPM_METRIC_C2C_LINK7_DATA_TX_PER_SEC = 134, + NVML_GPM_METRIC_C2C_LINK7_DATA_RX_PER_SEC = 135, + NVML_GPM_METRIC_C2C_LINK8_TOTAL_TX_PER_SEC = 136, + NVML_GPM_METRIC_C2C_LINK8_TOTAL_RX_PER_SEC = 137, + NVML_GPM_METRIC_C2C_LINK8_DATA_TX_PER_SEC = 138, + NVML_GPM_METRIC_C2C_LINK8_DATA_RX_PER_SEC = 139, + NVML_GPM_METRIC_C2C_LINK9_TOTAL_TX_PER_SEC = 140, + NVML_GPM_METRIC_C2C_LINK9_TOTAL_RX_PER_SEC = 141, + NVML_GPM_METRIC_C2C_LINK9_DATA_TX_PER_SEC = 142, + NVML_GPM_METRIC_C2C_LINK9_DATA_RX_PER_SEC = 143, + NVML_GPM_METRIC_C2C_LINK10_TOTAL_TX_PER_SEC = 144, + NVML_GPM_METRIC_C2C_LINK10_TOTAL_RX_PER_SEC = 145, + NVML_GPM_METRIC_C2C_LINK10_DATA_TX_PER_SEC = 146, + NVML_GPM_METRIC_C2C_LINK10_DATA_RX_PER_SEC = 147, + NVML_GPM_METRIC_C2C_LINK11_TOTAL_TX_PER_SEC = 148, + NVML_GPM_METRIC_C2C_LINK11_TOTAL_RX_PER_SEC = 149, + NVML_GPM_METRIC_C2C_LINK11_DATA_TX_PER_SEC = 150, + NVML_GPM_METRIC_C2C_LINK11_DATA_RX_PER_SEC = 151, + NVML_GPM_METRIC_C2C_LINK12_TOTAL_TX_PER_SEC = 152, + NVML_GPM_METRIC_C2C_LINK12_TOTAL_RX_PER_SEC = 153, + NVML_GPM_METRIC_C2C_LINK12_DATA_TX_PER_SEC = 154, + NVML_GPM_METRIC_C2C_LINK12_DATA_RX_PER_SEC = 155, + NVML_GPM_METRIC_C2C_LINK13_TOTAL_TX_PER_SEC = 156, + NVML_GPM_METRIC_C2C_LINK13_TOTAL_RX_PER_SEC = 157, + NVML_GPM_METRIC_C2C_LINK13_DATA_TX_PER_SEC = 158, + NVML_GPM_METRIC_C2C_LINK13_DATA_RX_PER_SEC = 159, + NVML_GPM_METRIC_HOSTMEM_CACHE_HIT = 160, + NVML_GPM_METRIC_HOSTMEM_CACHE_MISS = 161, + NVML_GPM_METRIC_PEERMEM_CACHE_HIT = 162, + NVML_GPM_METRIC_PEERMEM_CACHE_MISS = 163, + NVML_GPM_METRIC_DRAM_CACHE_HIT = 164, + NVML_GPM_METRIC_DRAM_CACHE_MISS = 165, + NVML_GPM_METRIC_NVENC_0_UTIL = 166, + NVML_GPM_METRIC_NVENC_1_UTIL = 167, + NVML_GPM_METRIC_NVENC_2_UTIL = 168, + NVML_GPM_METRIC_NVENC_3_UTIL = 169, + NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ELAPSED = 170, + NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ACTIVE = 171, + NVML_GPM_METRIC_GR0_CTXSW_REQUESTS = 172, + NVML_GPM_METRIC_GR0_CTXSW_CYCLES_PER_REQ = 173, + NVML_GPM_METRIC_GR0_CTXSW_ACTIVE_PCT = 174, + NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ELAPSED = 175, + NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ACTIVE = 176, + NVML_GPM_METRIC_GR1_CTXSW_REQUESTS = 177, + NVML_GPM_METRIC_GR1_CTXSW_CYCLES_PER_REQ = 178, + NVML_GPM_METRIC_GR1_CTXSW_ACTIVE_PCT = 179, + NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ELAPSED = 180, + NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ACTIVE = 181, + NVML_GPM_METRIC_GR2_CTXSW_REQUESTS = 182, + NVML_GPM_METRIC_GR2_CTXSW_CYCLES_PER_REQ = 183, + NVML_GPM_METRIC_GR2_CTXSW_ACTIVE_PCT = 184, + NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ELAPSED = 185, + NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ACTIVE = 186, + NVML_GPM_METRIC_GR3_CTXSW_REQUESTS = 187, + NVML_GPM_METRIC_GR3_CTXSW_CYCLES_PER_REQ = 188, + NVML_GPM_METRIC_GR3_CTXSW_ACTIVE_PCT = 189, + NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ELAPSED = 190, + NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ACTIVE = 191, + NVML_GPM_METRIC_GR4_CTXSW_REQUESTS = 192, + NVML_GPM_METRIC_GR4_CTXSW_CYCLES_PER_REQ = 193, + NVML_GPM_METRIC_GR4_CTXSW_ACTIVE_PCT = 194, + NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ELAPSED = 195, + NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ACTIVE = 196, + NVML_GPM_METRIC_GR5_CTXSW_REQUESTS = 197, + NVML_GPM_METRIC_GR5_CTXSW_CYCLES_PER_REQ = 198, + NVML_GPM_METRIC_GR5_CTXSW_ACTIVE_PCT = 199, + NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ELAPSED = 200, + NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ACTIVE = 201, + NVML_GPM_METRIC_GR6_CTXSW_REQUESTS = 202, + NVML_GPM_METRIC_GR6_CTXSW_CYCLES_PER_REQ = 203, + NVML_GPM_METRIC_GR6_CTXSW_ACTIVE_PCT = 204, + NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ELAPSED = 205, + NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ACTIVE = 206, + NVML_GPM_METRIC_GR7_CTXSW_REQUESTS = 207, + NVML_GPM_METRIC_GR7_CTXSW_CYCLES_PER_REQ = 208, + NVML_GPM_METRIC_GR7_CTXSW_ACTIVE_PCT = 209, + NVML_GPM_METRIC_MAX = 210, //!< Maximum value above +1. Note that changing this should also change NVML_GPM_METRICS_GET_VERSION due to struct size change } nvmlGpmMetricId_t; /** @} */ // @defgroup nvmlGpmEnums @@ -10757,7 +12633,7 @@ typedef struct { */ typedef struct { - unsigned int metricId; //!< IN: NVML_GPM_METRIC_? #define of which metric to retrieve + unsigned int metricId; //!< IN: NVML_GPM_METRIC_? define of which metric to retrieve nvmlReturn_t nvmlReturn; //!< OUT: Status of this metric. If this is nonzero, then value is not valid double value; //!< OUT: Value of this metric. Is only valid if nvmlReturn is 0 (NVML_SUCCESS) nvmlGpmMetricMetricInfo_t metricInfo; //!< OUT: Metric name and unit. Those can be NULL if not defined @@ -10801,6 +12677,14 @@ typedef struct * * For Hopper &tm; or newer fully supported devices. * + * To retrieve metrics, the user must first allocate the two sample buffers at \a metricsGet->sample1 + * and \a metricsGet->sample2 by calling \a nvmlGpmSampleAlloc(). Next, the user should fill in the ID of each metric + * in \a metricsGet->metrics[i].metricId and specify the total number of metrics to retrieve in \a metricsGet->numMetrics, + * The version should be set to NVML_GPM_METRICS_GET_VERSION in \a metricsGet->version. The user then calls the + * \a nvmlGpmSampleGet() API twice to obtain 2 samples of counters. \note that the interval between these + * two \a nvmlGpmSampleGet() calls should be greater than 100ms due to the internal sample refresh rate. + * Finally, the user calls \a nvmlGpmMetricsGet to retrieve the metrics, which will be stored at \a metricsGet->metrics + * * @param metricsGet IN/OUT: populated \a nvmlGpmMetricsGet_t struct * * @return @@ -10846,6 +12730,9 @@ nvmlReturn_t DECLDIR nvmlGpmSampleAlloc(nvmlGpmSample_t *gpmSample); * * For Hopper &tm; or newer fully supported devices. * + * @note The interval between two \a nvmlGpmSampleGet() calls should be greater than 100ms due to + * the internal sample refresh rate. + * * @param device Device to get samples for * @param gpmSample Buffer to read samples into * @@ -10863,6 +12750,9 @@ nvmlReturn_t DECLDIR nvmlGpmSampleGet(nvmlDevice_t device, nvmlGpmSample_t gpmSa * * For Hopper &tm; or newer fully supported devices. * + * @note The interval between two \a nvmlGpmMigSampleGet() calls should be greater than 100ms due to + * the internal sample refresh rate. + * * @param device Device to get samples for * @param gpuInstanceId MIG GPU Instance ID * @param gpmSample Buffer to read samples into @@ -10876,6 +12766,8 @@ nvmlReturn_t DECLDIR nvmlGpmMigSampleGet(nvmlDevice_t device, unsigned int gpuIn /** * Indicate whether the supplied device supports GPM * + * For Hopper &tm; or newer fully supported devices. + * * @param device NVML device to query for * @param gpmSupport Structure to indicate GPM support \a nvmlGpmSupport_t. Indicates * GPM support per system for the supplied device @@ -10890,7 +12782,7 @@ nvmlReturn_t DECLDIR nvmlGpmQueryDeviceSupport(nvmlDevice_t device, nvmlGpmSuppo /** * Get GPM stream state. * - * %HOPPER_OR_NEWER% + * For Hopper &tm; or newer fully supported devices. * Supported on Linux, Windows TCC. * * @param device The identifier of the target device @@ -10908,7 +12800,7 @@ nvmlReturn_t DECLDIR nvmlGpmQueryIfStreamingEnabled(nvmlDevice_t device, unsigne /** * Set GPM stream state. * - * %HOPPER_OR_NEWER% + * For Hopper &tm; or newer fully supported devices. * Supported on Linux, Windows TCC. * * @param device The identifier of the target device @@ -10926,118 +12818,348 @@ nvmlReturn_t DECLDIR nvmlGpmSetStreamingEnabled(nvmlDevice_t device, unsigned in /** @} */ // @defgroup nvmlGpmFunctions /** @} */ // @defgroup GPM -#define NVML_NVLINK_POWER_STATE_HIGH_SPEED 0x0 -#define NVML_NVLINK_POWER_STATE_LOW 0x1 - -#define NVML_NVLINK_LOW_POWER_THRESHOLD_MIN 0x1 -#define NVML_NVLINK_LOW_POWER_THRESHOLD_MAX 0x1FFF -#define NVML_NVLINK_LOW_POWER_THRESHOLD_RESET 0xFFFFFFFF - -/* Structure containing Low Power parameters */ -typedef struct nvmlNvLinkPowerThres_st +#define NVML_DEV_CAP_EGM (1 << 0) // Extended GPU memory +/** + * Device capabilities + */ +typedef struct { - unsigned int lowPwrThreshold; //!< Low power threshold (in units of 100us) -} nvmlNvLinkPowerThres_t; + unsigned int version; //!< the API version number + unsigned int capMask; //!< OUT: Bit mask of capabilities. +} nvmlDeviceCapabilities_v1_t; +typedef nvmlDeviceCapabilities_v1_t nvmlDeviceCapabilities_t; +#define nvmlDeviceCapabilities_v1 NVML_STRUCT_VERSION(DeviceCapabilities, 1) /** - * Set NvLink Low Power Threshold for device. + * Get device capabilities * - * %HOPPER_OR_NEWER% + * See \ref nvmlDeviceCapabilities_v1_t for more information on the struct. * * @param device The identifier of the target device - * @param info Reference to \a nvmlNvLinkPowerThres_t struct - * input parameters + * @param caps Returns GPU's capabilities * * @return - * - \ref NVML_SUCCESS if the \a Threshold is successfully set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a Threshold is not within range - * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device - * - **/ -nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkDeviceLowPowerThreshold(nvmlDevice_t device, nvmlNvLinkPowerThres_t *info); + * - \ref NVML_SUCCESS If the query is success + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a counters is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetCapabilities(nvmlDevice_t device, + nvmlDeviceCapabilities_t *caps); + +/* + * Generic bitmask to hold 255 bits, represented by 8 elements of 32 bits + */ +#define NVML_255_MASK_BITS_PER_ELEM 32 +#define NVML_255_MASK_NUM_ELEMS 8 +#define NVML_255_MASK_BIT_SET(index, nvmlMask) \ + nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + +#define NVML_255_MASK_BIT_GET(index, nvmlMask) \ + nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + +#define NVML_255_MASK_BIT_SET_PTR(index, nvmlMask) \ + nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + +#define NVML_255_MASK_BIT_GET_PTR(index, nvmlMask) \ + nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + +typedef struct +{ + unsigned int mask[NVML_255_MASK_NUM_ELEMS]; //profileId is used and + * the rest of the structure is ignored. * * @return - * - \ref NVML_SUCCESS if \a limit has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a powerValue is NULL or contains invalid values - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * - \ref NVML_SUCCESS if the Desired Profile was successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or structure was NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to change the profile number + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device * - * @see NVML_FI_DEV_POWER_AVERAGE - * @see NVML_FI_DEV_POWER_INSTANT - * @see NVML_FI_DEV_POWER_MIN_LIMIT - * @see NVML_FI_DEV_POWER_MAX_LIMIT - * @see NVML_FI_DEV_POWER_CURRENT_LIMIT - */ -nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit_v2(nvmlDevice_t device, nvmlPowerValue_v2_t *powerValue); + **/ +nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice_t device, + nvmlPowerSmoothingProfile_t *profile); /** - * Get SRAM ECC error status of this device. + * Update the value of a specific profile parameter contained within \ref nvmlPowerSmoothingProfile_v1_t. + * Requires root/admin permissions. * - * For Ampere &tm; or newer fully supported devices. + * %BLACKWELL_OR_NEWER% + * + * NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR expects a value as a percentage from 00.00-100.00% + * NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE expects a value in W/s + * NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE expects a value in W/s + * NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS expects a value in ms + * + * @param device The identifier of the target device + * @param profile Reference to \ref nvmlPowerSmoothingProfile_v1_t struct + * + * @return + * - \ref NVML_SUCCESS if the Active Profile was successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or profile parameter/value was invalid + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to change any profile parameters + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the structure version is not supported + * + **/ +nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingUpdatePresetProfileParam(nvmlDevice_t device, + nvmlPowerSmoothingProfile_t *profile); +/** + * Enable or disable the Power Smoothing Feature. * Requires root/admin permissions. * - * See \ref nvmlEccSramErrorStatus_v1_t for more information on the struct. + * %BLACKWELL_OR_NEWER% * - * @param device The identifier of the target device - * @param status Returns SRAM ECC error status + * See \ref nvmlEnableState_t for details on allowed states + * + * @param device The identifier of the target device + * @param state Reference to \ref nvmlPowerSmoothingState_v1_t * * @return - * - \ref NVML_SUCCESS if \a limit has been set - * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a counters is NULL - * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature - * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible - * - \ref NVML_ERROR_VERSION_MISMATCH if the version of \a nvmlEccSramErrorStatus_t is invalid - * - \ref NVML_ERROR_UNKNOWN on any unexpected error - */ -nvmlReturn_t DECLDIR nvmlDeviceGetSramEccErrorStatus(nvmlDevice_t device, - nvmlEccSramErrorStatus_t *status); + * - \ref NVML_SUCCESS if the feature state was successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or state is NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to change feature state + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + * + **/ +nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingSetState(nvmlDevice_t device, + nvmlPowerSmoothingState_t *state); +/** @} */ // @defgroup + /** * NVML API versioning support */ @@ -11065,6 +13187,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses(nvmlDevice_t device nvmlReturn_t DECLDIR nvmlDeviceGetMPSComputeRunningProcesses_v2(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_v2_t *infos); nvmlReturn_t DECLDIR nvmlDeviceGetGpuInstancePossiblePlacements(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstancePlacement_t *placements, unsigned int *count); nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseInfo(nvmlVgpuInstance_t vgpuInstance, nvmlVgpuLicenseInfo_t *licenseInfo); +nvmlReturn_t DECLDIR nvmlDeviceGetDriverModel(nvmlDevice_t device, nvmlDriverModel_t *current, nvmlDriverModel_t *pending); #endif // #ifdef NVML_NO_UNVERSIONED_FUNC_DEFS #if defined(NVML_NO_UNVERSIONED_FUNC_DEFS) @@ -11090,6 +13213,7 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetLicenseInfo(nvmlVgpuInstance_t vgpuInsta #undef nvmlGetBlacklistDeviceInfoByIndex #undef nvmlDeviceGetGpuInstancePossiblePlacements #undef nvmlVgpuInstanceGetLicenseInfo +#undef nvmlDeviceGetDriverModel #undef nvmlDeviceSetPowerManagementLimit #endif diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/system.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/system.go index a1cc7db4e..f82471024 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/system.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/system.go @@ -88,31 +88,31 @@ func (l *library) SystemGetConfComputeCapabilities() (ConfComputeSystemCaps, Ret } // nvml.SystemGetConfComputeState() -func SystemGetConfComputeState() (ConfComputeSystemState, Return) { +func (l *library) SystemGetConfComputeState() (ConfComputeSystemState, Return) { var state ConfComputeSystemState ret := nvmlSystemGetConfComputeState(&state) return state, ret } // nvml.SystemGetConfComputeGpusReadyState() -func SystemGetConfComputeGpusReadyState() (uint32, Return) { +func (l *library) SystemGetConfComputeGpusReadyState() (uint32, Return) { var isAcceptingWork uint32 ret := nvmlSystemGetConfComputeGpusReadyState(&isAcceptingWork) return isAcceptingWork, ret } // nvml.SystemSetConfComputeGpusReadyState() -func SystemSetConfComputeGpusReadyState(isAcceptingWork uint32) Return { +func (l *library) SystemSetConfComputeGpusReadyState(isAcceptingWork uint32) Return { return nvmlSystemSetConfComputeGpusReadyState(isAcceptingWork) } // nvml.SystemSetNvlinkBwMode() -func SystemSetNvlinkBwMode(nvlinkBwMode uint32) Return { +func (l *library) SystemSetNvlinkBwMode(nvlinkBwMode uint32) Return { return nvmlSystemSetNvlinkBwMode(nvlinkBwMode) } // nvml.SystemGetNvlinkBwMode() -func SystemGetNvlinkBwMode() (uint32, Return) { +func (l *library) SystemGetNvlinkBwMode() (uint32, Return) { var nvlinkBwMode uint32 ret := nvmlSystemGetNvlinkBwMode(&nvlinkBwMode) return nvlinkBwMode, ret @@ -138,3 +138,11 @@ func (l *library) SystemGetConfComputeSettings() (SystemConfComputeSettings, Ret func (l *library) SystemSetConfComputeKeyRotationThresholdInfo(keyRotationThresholdInfo ConfComputeSetKeyRotationThresholdInfo) Return { return nvmlSystemSetConfComputeKeyRotationThresholdInfo(&keyRotationThresholdInfo) } + +// nvml.SystemGetDriverBranch() +func (l *library) SystemGetDriverBranch() (SystemDriverBranchInfo, Return) { + var branchInfo SystemDriverBranchInfo + branchInfo.Version = STRUCT_VERSION(branchInfo, 1) + ret := nvmlSystemGetDriverBranch(&branchInfo, SYSTEM_DRIVER_VERSION_BUFFER_SIZE) + return branchInfo, ret +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go index 6ee33a6ab..3be17966c 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go @@ -9,6 +9,10 @@ type nvmlDevice struct { Handle *_Ctype_struct_nvmlDevice_st } +type nvmlGpuInstance struct { + Handle *_Ctype_struct_nvmlGpuInstance_st +} + type PciInfoExt_v1 struct { Version uint32 Domain uint32 @@ -182,6 +186,58 @@ type GpuThermalSettings struct { Sensor [3]GpuThermalSettingsSensor } +type CoolerInfo_v1 struct { + Version uint32 + Index uint32 + SignalType uint32 + Target uint32 +} + +type CoolerInfo struct { + Version uint32 + Index uint32 + SignalType uint32 + Target uint32 +} + +const sizeofUUIDValue = unsafe.Sizeof([41]byte{}) + +type UUIDValue [sizeofUUIDValue]byte + +type UUID_v1 struct { + Version uint32 + Type uint32 + Value [41]byte + Pad_cgo_0 [3]byte +} + +type UUID struct { + Version uint32 + Type uint32 + Value [41]byte + Pad_cgo_0 [3]byte +} + +type DramEncryptionInfo_v1 struct { + Version uint32 + EncryptionState uint32 +} + +type DramEncryptionInfo struct { + Version uint32 + EncryptionState uint32 +} + +type MarginTemperature_v1 struct { + Version uint32 + MarginTemperature int32 +} + +type MarginTemperature struct { + Version uint32 + MarginTemperature int32 +} + type ClkMonFaultInfo struct { ClkApiDomain uint32 ClkDomainFaultMask uint32 @@ -193,6 +249,189 @@ type ClkMonStatus struct { ClkMonList [32]ClkMonFaultInfo } +type ClockOffset_v1 struct { + Version uint32 + Type uint32 + Pstate uint32 + ClockOffsetMHz int32 + MinClockOffsetMHz int32 + MaxClockOffsetMHz int32 +} + +type ClockOffset struct { + Version uint32 + Type uint32 + Pstate uint32 + ClockOffsetMHz int32 + MinClockOffsetMHz int32 + MaxClockOffsetMHz int32 +} + +type FanSpeedInfo_v1 struct { + Version uint32 + Fan uint32 + Speed uint32 +} + +type FanSpeedInfo struct { + Version uint32 + Fan uint32 + Speed uint32 +} + +type DevicePerfModes_v1 struct { + Version uint32 + Str [2048]int8 +} + +type DevicePerfModes struct { + Version uint32 + Str [2048]int8 +} + +type DeviceCurrentClockFreqs_v1 struct { + Version uint32 + Str [2048]int8 +} + +type DeviceCurrentClockFreqs struct { + Version uint32 + Str [2048]int8 +} + +type ProcessUtilizationSample struct { + Pid uint32 + TimeStamp uint64 + SmUtil uint32 + MemUtil uint32 + EncUtil uint32 + DecUtil uint32 +} + +type ProcessUtilizationInfo_v1 struct { + TimeStamp uint64 + Pid uint32 + SmUtil uint32 + MemUtil uint32 + EncUtil uint32 + DecUtil uint32 + JpgUtil uint32 + OfaUtil uint32 + Pad_cgo_0 [4]byte +} + +type ProcessesUtilizationInfo_v1 struct { + Version uint32 + ProcessSamplesCount uint32 + LastSeenTimeStamp uint64 + ProcUtilArray *ProcessUtilizationInfo_v1 +} + +type ProcessesUtilizationInfo struct { + Version uint32 + ProcessSamplesCount uint32 + LastSeenTimeStamp uint64 + ProcUtilArray *ProcessUtilizationInfo_v1 +} + +type EccSramErrorStatus_v1 struct { + Version uint32 + AggregateUncParity uint64 + AggregateUncSecDed uint64 + AggregateCor uint64 + VolatileUncParity uint64 + VolatileUncSecDed uint64 + VolatileCor uint64 + AggregateUncBucketL2 uint64 + AggregateUncBucketSm uint64 + AggregateUncBucketPcie uint64 + AggregateUncBucketMcu uint64 + AggregateUncBucketOther uint64 + BThresholdExceeded uint32 + Pad_cgo_0 [4]byte +} + +type EccSramErrorStatus struct { + Version uint32 + AggregateUncParity uint64 + AggregateUncSecDed uint64 + AggregateCor uint64 + VolatileUncParity uint64 + VolatileUncSecDed uint64 + VolatileCor uint64 + AggregateUncBucketL2 uint64 + AggregateUncBucketSm uint64 + AggregateUncBucketPcie uint64 + AggregateUncBucketMcu uint64 + AggregateUncBucketOther uint64 + BThresholdExceeded uint32 + Pad_cgo_0 [4]byte +} + +type PlatformInfo_v1 struct { + Version uint32 + IbGuid [16]uint8 + RackGuid [16]uint8 + ChassisPhysicalSlotNumber uint8 + ComputeSlotIndex uint8 + NodeIndex uint8 + PeerType uint8 + ModuleId uint8 + Pad_cgo_0 [3]byte +} + +type PlatformInfo_v2 struct { + Version uint32 + IbGuid [16]uint8 + ChassisSerialNumber [16]uint8 + SlotNumber uint8 + TrayIndex uint8 + HostId uint8 + PeerType uint8 + ModuleId uint8 + Pad_cgo_0 [3]byte +} + +type PlatformInfo struct { + Version uint32 + IbGuid [16]uint8 + ChassisSerialNumber [16]uint8 + SlotNumber uint8 + TrayIndex uint8 + HostId uint8 + PeerType uint8 + ModuleId uint8 + Pad_cgo_0 [3]byte +} + +type DeviceArchitecture uint32 + +type BusType uint32 + +type FanControlPolicy uint32 + +type PowerSource uint32 + +type GpuDynamicPstatesInfoUtilization struct { + BIsPresent uint32 + Percentage uint32 + IncThreshold uint32 + DecThreshold uint32 +} + +type GpuDynamicPstatesInfo struct { + Flags uint32 + Utilization [8]GpuDynamicPstatesInfoUtilization +} + +type PowerScopeType byte + +type PowerValue_v2 struct { + Version uint32 + PowerScope uint8 + PowerValueMw uint32 +} + type nvmlVgpuTypeId uint32 type nvmlVgpuInstance uint32 @@ -224,11 +463,32 @@ type VgpuPlacementList_v1 struct { PlacementIds *uint32 } +type VgpuPlacementList_v2 struct { + Version uint32 + PlacementSize uint32 + Count uint32 + PlacementIds *uint32 + Mode uint32 + Pad_cgo_0 [4]byte +} + type VgpuPlacementList struct { Version uint32 PlacementSize uint32 Count uint32 PlacementIds *uint32 + Mode uint32 + Pad_cgo_0 [4]byte +} + +type VgpuTypeBar1Info_v1 struct { + Version uint32 + Bar1Size uint64 +} + +type VgpuTypeBar1Info struct { + Version uint32 + Bar1Size uint64 } type VgpuInstanceUtilizationSample struct { @@ -306,6 +566,16 @@ type VgpuProcessesUtilizationInfo struct { VgpuProcUtilArray *VgpuProcessUtilizationInfo_v1 } +type VgpuRuntimeState_v1 struct { + Version uint32 + Size uint64 +} + +type VgpuRuntimeState struct { + Version uint32 + Size uint64 +} + type VgpuSchedulerParamsVgpuSchedDataWithARR struct { AvgFactor uint32 Timeslice uint32 @@ -390,41 +660,6 @@ type VgpuLicenseInfo struct { CurrentState uint32 } -type ProcessUtilizationSample struct { - Pid uint32 - TimeStamp uint64 - SmUtil uint32 - MemUtil uint32 - EncUtil uint32 - DecUtil uint32 -} - -type ProcessUtilizationInfo_v1 struct { - TimeStamp uint64 - Pid uint32 - SmUtil uint32 - MemUtil uint32 - EncUtil uint32 - DecUtil uint32 - JpgUtil uint32 - OfaUtil uint32 - Pad_cgo_0 [4]byte -} - -type ProcessesUtilizationInfo_v1 struct { - Version uint32 - ProcessSamplesCount uint32 - LastSeenTimeStamp uint64 - ProcUtilArray *ProcessUtilizationInfo_v1 -} - -type ProcessesUtilizationInfo struct { - Version uint32 - ProcessSamplesCount uint32 - LastSeenTimeStamp uint64 - ProcUtilArray *ProcessUtilizationInfo_v1 -} - type GridLicenseExpiry struct { Year uint32 Month uint16 @@ -451,58 +686,114 @@ type GridLicensableFeatures struct { GridLicensableFeatures [3]GridLicensableFeature } -type EccSramErrorStatus_v1 struct { - Version uint32 - AggregateUncParity uint64 - AggregateUncSecDed uint64 - AggregateCor uint64 - VolatileUncParity uint64 - VolatileUncSecDed uint64 - VolatileCor uint64 - AggregateUncBucketL2 uint64 - AggregateUncBucketSm uint64 - AggregateUncBucketPcie uint64 - AggregateUncBucketMcu uint64 - AggregateUncBucketOther uint64 - BThresholdExceeded uint32 - Pad_cgo_0 [4]byte +type VgpuTypeIdInfo_v1 struct { + Version uint32 + VgpuCount uint32 + VgpuTypeIds *uint32 } -type EccSramErrorStatus struct { - Version uint32 - AggregateUncParity uint64 - AggregateUncSecDed uint64 - AggregateCor uint64 - VolatileUncParity uint64 - VolatileUncSecDed uint64 - VolatileCor uint64 - AggregateUncBucketL2 uint64 - AggregateUncBucketSm uint64 - AggregateUncBucketPcie uint64 - AggregateUncBucketMcu uint64 - AggregateUncBucketOther uint64 - BThresholdExceeded uint32 - Pad_cgo_0 [4]byte +type VgpuTypeIdInfo struct { + Version uint32 + VgpuCount uint32 + VgpuTypeIds *uint32 } -type DeviceArchitecture uint32 +type VgpuTypeMaxInstance_v1 struct { + Version uint32 + VgpuTypeId uint32 + MaxInstancePerGI uint32 +} -type BusType uint32 +type VgpuTypeMaxInstance struct { + Version uint32 + VgpuTypeId uint32 + MaxInstancePerGI uint32 +} -type FanControlPolicy uint32 +type ActiveVgpuInstanceInfo_v1 struct { + Version uint32 + VgpuCount uint32 + VgpuInstances *uint32 +} -type PowerSource uint32 +type ActiveVgpuInstanceInfo struct { + Version uint32 + VgpuCount uint32 + VgpuInstances *uint32 +} -type GpuDynamicPstatesInfoUtilization struct { - BIsPresent uint32 - Percentage uint32 - IncThreshold uint32 - DecThreshold uint32 +type VgpuSchedulerState_v1 struct { + Version uint32 + EngineId uint32 + SchedulerPolicy uint32 + EnableARRMode uint32 + SchedulerParams [8]byte } -type GpuDynamicPstatesInfo struct { - Flags uint32 - Utilization [8]GpuDynamicPstatesInfoUtilization +type VgpuSchedulerState struct { + Version uint32 + EngineId uint32 + SchedulerPolicy uint32 + EnableARRMode uint32 + SchedulerParams [8]byte +} + +type VgpuSchedulerStateInfo_v1 struct { + Version uint32 + EngineId uint32 + SchedulerPolicy uint32 + ArrMode uint32 + SchedulerParams [8]byte +} + +type VgpuSchedulerStateInfo struct { + Version uint32 + EngineId uint32 + SchedulerPolicy uint32 + ArrMode uint32 + SchedulerParams [8]byte +} + +type VgpuSchedulerLogInfo_v1 struct { + Version uint32 + EngineId uint32 + SchedulerPolicy uint32 + ArrMode uint32 + SchedulerParams [8]byte + EntriesCount uint32 + LogEntries [200]VgpuSchedulerLogEntry +} + +type VgpuSchedulerLogInfo struct { + Version uint32 + EngineId uint32 + SchedulerPolicy uint32 + ArrMode uint32 + SchedulerParams [8]byte + EntriesCount uint32 + LogEntries [200]VgpuSchedulerLogEntry +} + +type VgpuCreatablePlacementInfo_v1 struct { + Version uint32 + VgpuTypeId uint32 + Count uint32 + PlacementIds *uint32 + PlacementSize uint32 + Pad_cgo_0 [4]byte +} + +type VgpuCreatablePlacementInfo struct { + Version uint32 + VgpuTypeId uint32 + Count uint32 + PlacementIds *uint32 + PlacementSize uint32 + Pad_cgo_0 [4]byte +} + +type NvLinkPowerThres struct { + LowPwrThreshold uint32 } type FieldValue struct { @@ -565,6 +856,66 @@ type nvmlEventData struct { ComputeInstanceId uint32 } +type SystemEventSet struct { + Handle *_Ctype_struct_nvmlSystemEventSet_st +} + +type SystemEventSetCreateRequest_v1 struct { + Version uint32 + Set SystemEventSet +} + +type SystemEventSetCreateRequest struct { + Version uint32 + Set SystemEventSet +} + +type SystemEventSetFreeRequest_v1 struct { + Version uint32 + Set SystemEventSet +} + +type SystemEventSetFreeRequest struct { + Version uint32 + Set SystemEventSet +} + +type SystemRegisterEventRequest_v1 struct { + Version uint32 + EventTypes uint64 + Set SystemEventSet +} + +type SystemRegisterEventRequest struct { + Version uint32 + EventTypes uint64 + Set SystemEventSet +} + +type SystemEventData_v1 struct { + EventType uint64 + GpuId uint32 + Pad_cgo_0 [4]byte +} + +type SystemEventSetWaitRequest_v1 struct { + Version uint32 + Timeoutms uint32 + Set SystemEventSet + Data *SystemEventData_v1 + DataSize uint32 + NumEvent uint32 +} + +type SystemEventSetWaitRequest struct { + Version uint32 + Timeoutms uint32 + Set SystemEventSet + Data *SystemEventData_v1 + DataSize uint32 + NumEvent uint32 +} + type AccountingStats struct { GpuUtilization uint32 MemoryUtilization uint32 @@ -703,16 +1054,70 @@ type GpuFabricInfoV struct { HealthMask uint32 } -type PowerScopeType byte +type SystemDriverBranchInfo_v1 struct { + Version uint32 + Branch [80]int8 +} -type PowerValue_v2 struct { - Version uint32 - PowerScope uint8 - PowerValueMw uint32 +type SystemDriverBranchInfo struct { + Version uint32 + Branch [80]int8 } type AffinityScope uint32 +type Temperature_v1 struct { + Version uint32 + SensorType uint32 + Temperature int32 +} + +type Temperature struct { + Version uint32 + SensorType uint32 + Temperature int32 +} + +type NvlinkSupportedBwModes_v1 struct { + Version uint32 + BwModes [23]uint8 + TotalBwModes uint8 +} + +type NvlinkSupportedBwModes struct { + Version uint32 + BwModes [23]uint8 + TotalBwModes uint8 +} + +type NvlinkGetBwMode_v1 struct { + Version uint32 + BIsBest uint32 + BwMode uint8 + Pad_cgo_0 [3]byte +} + +type NvlinkGetBwMode struct { + Version uint32 + BIsBest uint32 + BwMode uint8 + Pad_cgo_0 [3]byte +} + +type NvlinkSetBwMode_v1 struct { + Version uint32 + BSetBest uint32 + BwMode uint8 + Pad_cgo_0 [3]byte +} + +type NvlinkSetBwMode struct { + Version uint32 + BSetBest uint32 + BwMode uint8 + Pad_cgo_0 [3]byte +} + type VgpuVersion struct { MinVersion uint32 MaxVersion uint32 @@ -811,10 +1216,6 @@ type nvmlGpuInstanceInfo struct { Placement GpuInstancePlacement } -type nvmlGpuInstance struct { - Handle *_Ctype_struct_nvmlGpuInstance_st -} - type ComputeInstancePlacement struct { Start uint32 Size uint32 @@ -895,7 +1296,7 @@ type nvmlGpmMetricsGetType struct { NumMetrics uint32 Sample1 nvmlGpmSample Sample2 nvmlGpmSample - Metrics [98]GpmMetric + Metrics [210]GpmMetric } type GpmSupport struct { @@ -903,6 +1304,90 @@ type GpmSupport struct { IsSupportedDevice uint32 } -type NvLinkPowerThres struct { - LowPwrThreshold uint32 +type DeviceCapabilities_v1 struct { + Version uint32 + CapMask uint32 +} + +type DeviceCapabilities struct { + Version uint32 + CapMask uint32 +} + +type Mask255 struct { + Mask [8]uint32 +} + +type WorkloadPowerProfileInfo_v1 struct { + Version uint32 + ProfileId uint32 + Priority uint32 + ConflictingMask Mask255 +} + +type WorkloadPowerProfileInfo struct { + Version uint32 + ProfileId uint32 + Priority uint32 + ConflictingMask Mask255 +} + +type WorkloadPowerProfileProfilesInfo_v1 struct { + Version uint32 + PerfProfilesMask Mask255 + PerfProfile [255]WorkloadPowerProfileInfo +} + +type WorkloadPowerProfileProfilesInfo struct { + Version uint32 + PerfProfilesMask Mask255 + PerfProfile [255]WorkloadPowerProfileInfo +} + +type WorkloadPowerProfileCurrentProfiles_v1 struct { + Version uint32 + PerfProfilesMask Mask255 + RequestedProfilesMask Mask255 + EnforcedProfilesMask Mask255 +} + +type WorkloadPowerProfileCurrentProfiles struct { + Version uint32 + PerfProfilesMask Mask255 + RequestedProfilesMask Mask255 + EnforcedProfilesMask Mask255 +} + +type WorkloadPowerProfileRequestedProfiles_v1 struct { + Version uint32 + RequestedProfilesMask Mask255 +} + +type WorkloadPowerProfileRequestedProfiles struct { + Version uint32 + RequestedProfilesMask Mask255 +} + +type PowerSmoothingProfile_v1 struct { + Version uint32 + ProfileId uint32 + ParamId uint32 + Value float64 +} + +type PowerSmoothingProfile struct { + Version uint32 + ProfileId uint32 + ParamId uint32 + Value float64 +} + +type PowerSmoothingState_v1 struct { + Version uint32 + State uint32 +} + +type PowerSmoothingState struct { + Version uint32 + State uint32 } diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/vgpu.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/vgpu.go index da4952422..b1e0fa7c6 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/vgpu.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/vgpu.go @@ -478,3 +478,32 @@ func (l *library) GetVgpuDriverCapabilities(capability VgpuDriverCapability) (bo ret := nvmlGetVgpuDriverCapabilities(capability, &capResult) return (capResult != 0), ret } + +// nvml.VgpuTypeGetBAR1Info() +func (l *library) VgpuTypeGetBAR1Info(vgpuTypeId VgpuTypeId) (VgpuTypeBar1Info, Return) { + return vgpuTypeId.GetBAR1Info() +} + +func (vgpuTypeId nvmlVgpuTypeId) GetBAR1Info() (VgpuTypeBar1Info, Return) { + var bar1Info VgpuTypeBar1Info + bar1Info.Version = STRUCT_VERSION(bar1Info, 1) + ret := nvmlVgpuTypeGetBAR1Info(vgpuTypeId, &bar1Info) + return bar1Info, ret +} + +// nvml.VgpuInstanceGetRuntimeStateSize() +func (l *library) VgpuInstanceGetRuntimeStateSize(vgpuInstance VgpuInstance) (VgpuRuntimeState, Return) { + return vgpuInstance.GetRuntimeStateSize() +} + +func (vgpuInstance nvmlVgpuInstance) GetRuntimeStateSize() (VgpuRuntimeState, Return) { + var pState VgpuRuntimeState + pState.Version = STRUCT_VERSION(pState, 1) + ret := nvmlVgpuInstanceGetRuntimeStateSize(vgpuInstance, &pState) + return pState, ret +} + +// nvml.VgpuTypeGetMaxInstancesPerGpuInstance() +func (l *library) VgpuTypeGetMaxInstancesPerGpuInstance(maxInstance *VgpuTypeMaxInstance) Return { + return nvmlVgpuTypeGetMaxInstancesPerGpuInstance(maxInstance) +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go index c1ecb2d0e..bfe4d0790 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go @@ -20,327 +20,372 @@ package nvml // The variables below represent package level methods from the library type. var ( - ComputeInstanceDestroy = libnvml.ComputeInstanceDestroy - ComputeInstanceGetInfo = libnvml.ComputeInstanceGetInfo - DeviceClearAccountingPids = libnvml.DeviceClearAccountingPids - DeviceClearCpuAffinity = libnvml.DeviceClearCpuAffinity - DeviceClearEccErrorCounts = libnvml.DeviceClearEccErrorCounts - DeviceClearFieldValues = libnvml.DeviceClearFieldValues - DeviceCreateGpuInstance = libnvml.DeviceCreateGpuInstance - DeviceCreateGpuInstanceWithPlacement = libnvml.DeviceCreateGpuInstanceWithPlacement - DeviceDiscoverGpus = libnvml.DeviceDiscoverGpus - DeviceFreezeNvLinkUtilizationCounter = libnvml.DeviceFreezeNvLinkUtilizationCounter - DeviceGetAPIRestriction = libnvml.DeviceGetAPIRestriction - DeviceGetAccountingBufferSize = libnvml.DeviceGetAccountingBufferSize - DeviceGetAccountingMode = libnvml.DeviceGetAccountingMode - DeviceGetAccountingPids = libnvml.DeviceGetAccountingPids - DeviceGetAccountingStats = libnvml.DeviceGetAccountingStats - DeviceGetActiveVgpus = libnvml.DeviceGetActiveVgpus - DeviceGetAdaptiveClockInfoStatus = libnvml.DeviceGetAdaptiveClockInfoStatus - DeviceGetApplicationsClock = libnvml.DeviceGetApplicationsClock - DeviceGetArchitecture = libnvml.DeviceGetArchitecture - DeviceGetAttributes = libnvml.DeviceGetAttributes - DeviceGetAutoBoostedClocksEnabled = libnvml.DeviceGetAutoBoostedClocksEnabled - DeviceGetBAR1MemoryInfo = libnvml.DeviceGetBAR1MemoryInfo - DeviceGetBoardId = libnvml.DeviceGetBoardId - DeviceGetBoardPartNumber = libnvml.DeviceGetBoardPartNumber - DeviceGetBrand = libnvml.DeviceGetBrand - DeviceGetBridgeChipInfo = libnvml.DeviceGetBridgeChipInfo - DeviceGetBusType = libnvml.DeviceGetBusType - DeviceGetC2cModeInfoV = libnvml.DeviceGetC2cModeInfoV - DeviceGetClkMonStatus = libnvml.DeviceGetClkMonStatus - DeviceGetClock = libnvml.DeviceGetClock - DeviceGetClockInfo = libnvml.DeviceGetClockInfo - DeviceGetComputeInstanceId = libnvml.DeviceGetComputeInstanceId - DeviceGetComputeMode = libnvml.DeviceGetComputeMode - DeviceGetComputeRunningProcesses = libnvml.DeviceGetComputeRunningProcesses - DeviceGetConfComputeGpuAttestationReport = libnvml.DeviceGetConfComputeGpuAttestationReport - DeviceGetConfComputeGpuCertificate = libnvml.DeviceGetConfComputeGpuCertificate - DeviceGetConfComputeMemSizeInfo = libnvml.DeviceGetConfComputeMemSizeInfo - DeviceGetConfComputeProtectedMemoryUsage = libnvml.DeviceGetConfComputeProtectedMemoryUsage - DeviceGetCount = libnvml.DeviceGetCount - DeviceGetCpuAffinity = libnvml.DeviceGetCpuAffinity - DeviceGetCpuAffinityWithinScope = libnvml.DeviceGetCpuAffinityWithinScope - DeviceGetCreatableVgpus = libnvml.DeviceGetCreatableVgpus - DeviceGetCudaComputeCapability = libnvml.DeviceGetCudaComputeCapability - DeviceGetCurrPcieLinkGeneration = libnvml.DeviceGetCurrPcieLinkGeneration - DeviceGetCurrPcieLinkWidth = libnvml.DeviceGetCurrPcieLinkWidth - DeviceGetCurrentClocksEventReasons = libnvml.DeviceGetCurrentClocksEventReasons - DeviceGetCurrentClocksThrottleReasons = libnvml.DeviceGetCurrentClocksThrottleReasons - DeviceGetDecoderUtilization = libnvml.DeviceGetDecoderUtilization - DeviceGetDefaultApplicationsClock = libnvml.DeviceGetDefaultApplicationsClock - DeviceGetDefaultEccMode = libnvml.DeviceGetDefaultEccMode - DeviceGetDetailedEccErrors = libnvml.DeviceGetDetailedEccErrors - DeviceGetDeviceHandleFromMigDeviceHandle = libnvml.DeviceGetDeviceHandleFromMigDeviceHandle - DeviceGetDisplayActive = libnvml.DeviceGetDisplayActive - DeviceGetDisplayMode = libnvml.DeviceGetDisplayMode - DeviceGetDriverModel = libnvml.DeviceGetDriverModel - DeviceGetDynamicPstatesInfo = libnvml.DeviceGetDynamicPstatesInfo - DeviceGetEccMode = libnvml.DeviceGetEccMode - DeviceGetEncoderCapacity = libnvml.DeviceGetEncoderCapacity - DeviceGetEncoderSessions = libnvml.DeviceGetEncoderSessions - DeviceGetEncoderStats = libnvml.DeviceGetEncoderStats - DeviceGetEncoderUtilization = libnvml.DeviceGetEncoderUtilization - DeviceGetEnforcedPowerLimit = libnvml.DeviceGetEnforcedPowerLimit - DeviceGetFBCSessions = libnvml.DeviceGetFBCSessions - DeviceGetFBCStats = libnvml.DeviceGetFBCStats - DeviceGetFanControlPolicy_v2 = libnvml.DeviceGetFanControlPolicy_v2 - DeviceGetFanSpeed = libnvml.DeviceGetFanSpeed - DeviceGetFanSpeed_v2 = libnvml.DeviceGetFanSpeed_v2 - DeviceGetFieldValues = libnvml.DeviceGetFieldValues - DeviceGetGpcClkMinMaxVfOffset = libnvml.DeviceGetGpcClkMinMaxVfOffset - DeviceGetGpcClkVfOffset = libnvml.DeviceGetGpcClkVfOffset - DeviceGetGpuFabricInfo = libnvml.DeviceGetGpuFabricInfo - DeviceGetGpuFabricInfoV = libnvml.DeviceGetGpuFabricInfoV - DeviceGetGpuInstanceById = libnvml.DeviceGetGpuInstanceById - DeviceGetGpuInstanceId = libnvml.DeviceGetGpuInstanceId - DeviceGetGpuInstancePossiblePlacements = libnvml.DeviceGetGpuInstancePossiblePlacements - DeviceGetGpuInstanceProfileInfo = libnvml.DeviceGetGpuInstanceProfileInfo - DeviceGetGpuInstanceProfileInfoV = libnvml.DeviceGetGpuInstanceProfileInfoV - DeviceGetGpuInstanceRemainingCapacity = libnvml.DeviceGetGpuInstanceRemainingCapacity - DeviceGetGpuInstances = libnvml.DeviceGetGpuInstances - DeviceGetGpuMaxPcieLinkGeneration = libnvml.DeviceGetGpuMaxPcieLinkGeneration - DeviceGetGpuOperationMode = libnvml.DeviceGetGpuOperationMode - DeviceGetGraphicsRunningProcesses = libnvml.DeviceGetGraphicsRunningProcesses - DeviceGetGridLicensableFeatures = libnvml.DeviceGetGridLicensableFeatures - DeviceGetGspFirmwareMode = libnvml.DeviceGetGspFirmwareMode - DeviceGetGspFirmwareVersion = libnvml.DeviceGetGspFirmwareVersion - DeviceGetHandleByIndex = libnvml.DeviceGetHandleByIndex - DeviceGetHandleByPciBusId = libnvml.DeviceGetHandleByPciBusId - DeviceGetHandleBySerial = libnvml.DeviceGetHandleBySerial - DeviceGetHandleByUUID = libnvml.DeviceGetHandleByUUID - DeviceGetHostVgpuMode = libnvml.DeviceGetHostVgpuMode - DeviceGetIndex = libnvml.DeviceGetIndex - DeviceGetInforomConfigurationChecksum = libnvml.DeviceGetInforomConfigurationChecksum - DeviceGetInforomImageVersion = libnvml.DeviceGetInforomImageVersion - DeviceGetInforomVersion = libnvml.DeviceGetInforomVersion - DeviceGetIrqNum = libnvml.DeviceGetIrqNum - DeviceGetJpgUtilization = libnvml.DeviceGetJpgUtilization - DeviceGetLastBBXFlushTime = libnvml.DeviceGetLastBBXFlushTime - DeviceGetMPSComputeRunningProcesses = libnvml.DeviceGetMPSComputeRunningProcesses - DeviceGetMaxClockInfo = libnvml.DeviceGetMaxClockInfo - DeviceGetMaxCustomerBoostClock = libnvml.DeviceGetMaxCustomerBoostClock - DeviceGetMaxMigDeviceCount = libnvml.DeviceGetMaxMigDeviceCount - DeviceGetMaxPcieLinkGeneration = libnvml.DeviceGetMaxPcieLinkGeneration - DeviceGetMaxPcieLinkWidth = libnvml.DeviceGetMaxPcieLinkWidth - DeviceGetMemClkMinMaxVfOffset = libnvml.DeviceGetMemClkMinMaxVfOffset - DeviceGetMemClkVfOffset = libnvml.DeviceGetMemClkVfOffset - DeviceGetMemoryAffinity = libnvml.DeviceGetMemoryAffinity - DeviceGetMemoryBusWidth = libnvml.DeviceGetMemoryBusWidth - DeviceGetMemoryErrorCounter = libnvml.DeviceGetMemoryErrorCounter - DeviceGetMemoryInfo = libnvml.DeviceGetMemoryInfo - DeviceGetMemoryInfo_v2 = libnvml.DeviceGetMemoryInfo_v2 - DeviceGetMigDeviceHandleByIndex = libnvml.DeviceGetMigDeviceHandleByIndex - DeviceGetMigMode = libnvml.DeviceGetMigMode - DeviceGetMinMaxClockOfPState = libnvml.DeviceGetMinMaxClockOfPState - DeviceGetMinMaxFanSpeed = libnvml.DeviceGetMinMaxFanSpeed - DeviceGetMinorNumber = libnvml.DeviceGetMinorNumber - DeviceGetModuleId = libnvml.DeviceGetModuleId - DeviceGetMultiGpuBoard = libnvml.DeviceGetMultiGpuBoard - DeviceGetName = libnvml.DeviceGetName - DeviceGetNumFans = libnvml.DeviceGetNumFans - DeviceGetNumGpuCores = libnvml.DeviceGetNumGpuCores - DeviceGetNumaNodeId = libnvml.DeviceGetNumaNodeId - DeviceGetNvLinkCapability = libnvml.DeviceGetNvLinkCapability - DeviceGetNvLinkErrorCounter = libnvml.DeviceGetNvLinkErrorCounter - DeviceGetNvLinkRemoteDeviceType = libnvml.DeviceGetNvLinkRemoteDeviceType - DeviceGetNvLinkRemotePciInfo = libnvml.DeviceGetNvLinkRemotePciInfo - DeviceGetNvLinkState = libnvml.DeviceGetNvLinkState - DeviceGetNvLinkUtilizationControl = libnvml.DeviceGetNvLinkUtilizationControl - DeviceGetNvLinkUtilizationCounter = libnvml.DeviceGetNvLinkUtilizationCounter - DeviceGetNvLinkVersion = libnvml.DeviceGetNvLinkVersion - DeviceGetOfaUtilization = libnvml.DeviceGetOfaUtilization - DeviceGetP2PStatus = libnvml.DeviceGetP2PStatus - DeviceGetPciInfo = libnvml.DeviceGetPciInfo - DeviceGetPciInfoExt = libnvml.DeviceGetPciInfoExt - DeviceGetPcieLinkMaxSpeed = libnvml.DeviceGetPcieLinkMaxSpeed - DeviceGetPcieReplayCounter = libnvml.DeviceGetPcieReplayCounter - DeviceGetPcieSpeed = libnvml.DeviceGetPcieSpeed - DeviceGetPcieThroughput = libnvml.DeviceGetPcieThroughput - DeviceGetPerformanceState = libnvml.DeviceGetPerformanceState - DeviceGetPersistenceMode = libnvml.DeviceGetPersistenceMode - DeviceGetPgpuMetadataString = libnvml.DeviceGetPgpuMetadataString - DeviceGetPowerManagementDefaultLimit = libnvml.DeviceGetPowerManagementDefaultLimit - DeviceGetPowerManagementLimit = libnvml.DeviceGetPowerManagementLimit - DeviceGetPowerManagementLimitConstraints = libnvml.DeviceGetPowerManagementLimitConstraints - DeviceGetPowerManagementMode = libnvml.DeviceGetPowerManagementMode - DeviceGetPowerSource = libnvml.DeviceGetPowerSource - DeviceGetPowerState = libnvml.DeviceGetPowerState - DeviceGetPowerUsage = libnvml.DeviceGetPowerUsage - DeviceGetProcessUtilization = libnvml.DeviceGetProcessUtilization - DeviceGetProcessesUtilizationInfo = libnvml.DeviceGetProcessesUtilizationInfo - DeviceGetRemappedRows = libnvml.DeviceGetRemappedRows - DeviceGetRetiredPages = libnvml.DeviceGetRetiredPages - DeviceGetRetiredPagesPendingStatus = libnvml.DeviceGetRetiredPagesPendingStatus - DeviceGetRetiredPages_v2 = libnvml.DeviceGetRetiredPages_v2 - DeviceGetRowRemapperHistogram = libnvml.DeviceGetRowRemapperHistogram - DeviceGetRunningProcessDetailList = libnvml.DeviceGetRunningProcessDetailList - DeviceGetSamples = libnvml.DeviceGetSamples - DeviceGetSerial = libnvml.DeviceGetSerial - DeviceGetSramEccErrorStatus = libnvml.DeviceGetSramEccErrorStatus - DeviceGetSupportedClocksEventReasons = libnvml.DeviceGetSupportedClocksEventReasons - DeviceGetSupportedClocksThrottleReasons = libnvml.DeviceGetSupportedClocksThrottleReasons - DeviceGetSupportedEventTypes = libnvml.DeviceGetSupportedEventTypes - DeviceGetSupportedGraphicsClocks = libnvml.DeviceGetSupportedGraphicsClocks - DeviceGetSupportedMemoryClocks = libnvml.DeviceGetSupportedMemoryClocks - DeviceGetSupportedPerformanceStates = libnvml.DeviceGetSupportedPerformanceStates - DeviceGetSupportedVgpus = libnvml.DeviceGetSupportedVgpus - DeviceGetTargetFanSpeed = libnvml.DeviceGetTargetFanSpeed - DeviceGetTemperature = libnvml.DeviceGetTemperature - DeviceGetTemperatureThreshold = libnvml.DeviceGetTemperatureThreshold - DeviceGetThermalSettings = libnvml.DeviceGetThermalSettings - DeviceGetTopologyCommonAncestor = libnvml.DeviceGetTopologyCommonAncestor - DeviceGetTopologyNearestGpus = libnvml.DeviceGetTopologyNearestGpus - DeviceGetTotalEccErrors = libnvml.DeviceGetTotalEccErrors - DeviceGetTotalEnergyConsumption = libnvml.DeviceGetTotalEnergyConsumption - DeviceGetUUID = libnvml.DeviceGetUUID - DeviceGetUtilizationRates = libnvml.DeviceGetUtilizationRates - DeviceGetVbiosVersion = libnvml.DeviceGetVbiosVersion - DeviceGetVgpuCapabilities = libnvml.DeviceGetVgpuCapabilities - DeviceGetVgpuHeterogeneousMode = libnvml.DeviceGetVgpuHeterogeneousMode - DeviceGetVgpuInstancesUtilizationInfo = libnvml.DeviceGetVgpuInstancesUtilizationInfo - DeviceGetVgpuMetadata = libnvml.DeviceGetVgpuMetadata - DeviceGetVgpuProcessUtilization = libnvml.DeviceGetVgpuProcessUtilization - DeviceGetVgpuProcessesUtilizationInfo = libnvml.DeviceGetVgpuProcessesUtilizationInfo - DeviceGetVgpuSchedulerCapabilities = libnvml.DeviceGetVgpuSchedulerCapabilities - DeviceGetVgpuSchedulerLog = libnvml.DeviceGetVgpuSchedulerLog - DeviceGetVgpuSchedulerState = libnvml.DeviceGetVgpuSchedulerState - DeviceGetVgpuTypeCreatablePlacements = libnvml.DeviceGetVgpuTypeCreatablePlacements - DeviceGetVgpuTypeSupportedPlacements = libnvml.DeviceGetVgpuTypeSupportedPlacements - DeviceGetVgpuUtilization = libnvml.DeviceGetVgpuUtilization - DeviceGetViolationStatus = libnvml.DeviceGetViolationStatus - DeviceGetVirtualizationMode = libnvml.DeviceGetVirtualizationMode - DeviceIsMigDeviceHandle = libnvml.DeviceIsMigDeviceHandle - DeviceModifyDrainState = libnvml.DeviceModifyDrainState - DeviceOnSameBoard = libnvml.DeviceOnSameBoard - DeviceQueryDrainState = libnvml.DeviceQueryDrainState - DeviceRegisterEvents = libnvml.DeviceRegisterEvents - DeviceRemoveGpu = libnvml.DeviceRemoveGpu - DeviceRemoveGpu_v2 = libnvml.DeviceRemoveGpu_v2 - DeviceResetApplicationsClocks = libnvml.DeviceResetApplicationsClocks - DeviceResetGpuLockedClocks = libnvml.DeviceResetGpuLockedClocks - DeviceResetMemoryLockedClocks = libnvml.DeviceResetMemoryLockedClocks - DeviceResetNvLinkErrorCounters = libnvml.DeviceResetNvLinkErrorCounters - DeviceResetNvLinkUtilizationCounter = libnvml.DeviceResetNvLinkUtilizationCounter - DeviceSetAPIRestriction = libnvml.DeviceSetAPIRestriction - DeviceSetAccountingMode = libnvml.DeviceSetAccountingMode - DeviceSetApplicationsClocks = libnvml.DeviceSetApplicationsClocks - DeviceSetAutoBoostedClocksEnabled = libnvml.DeviceSetAutoBoostedClocksEnabled - DeviceSetComputeMode = libnvml.DeviceSetComputeMode - DeviceSetConfComputeUnprotectedMemSize = libnvml.DeviceSetConfComputeUnprotectedMemSize - DeviceSetCpuAffinity = libnvml.DeviceSetCpuAffinity - DeviceSetDefaultAutoBoostedClocksEnabled = libnvml.DeviceSetDefaultAutoBoostedClocksEnabled - DeviceSetDefaultFanSpeed_v2 = libnvml.DeviceSetDefaultFanSpeed_v2 - DeviceSetDriverModel = libnvml.DeviceSetDriverModel - DeviceSetEccMode = libnvml.DeviceSetEccMode - DeviceSetFanControlPolicy = libnvml.DeviceSetFanControlPolicy - DeviceSetFanSpeed_v2 = libnvml.DeviceSetFanSpeed_v2 - DeviceSetGpcClkVfOffset = libnvml.DeviceSetGpcClkVfOffset - DeviceSetGpuLockedClocks = libnvml.DeviceSetGpuLockedClocks - DeviceSetGpuOperationMode = libnvml.DeviceSetGpuOperationMode - DeviceSetMemClkVfOffset = libnvml.DeviceSetMemClkVfOffset - DeviceSetMemoryLockedClocks = libnvml.DeviceSetMemoryLockedClocks - DeviceSetMigMode = libnvml.DeviceSetMigMode - DeviceSetNvLinkDeviceLowPowerThreshold = libnvml.DeviceSetNvLinkDeviceLowPowerThreshold - DeviceSetNvLinkUtilizationControl = libnvml.DeviceSetNvLinkUtilizationControl - DeviceSetPersistenceMode = libnvml.DeviceSetPersistenceMode - DeviceSetPowerManagementLimit = libnvml.DeviceSetPowerManagementLimit - DeviceSetPowerManagementLimit_v2 = libnvml.DeviceSetPowerManagementLimit_v2 - DeviceSetTemperatureThreshold = libnvml.DeviceSetTemperatureThreshold - DeviceSetVgpuCapabilities = libnvml.DeviceSetVgpuCapabilities - DeviceSetVgpuHeterogeneousMode = libnvml.DeviceSetVgpuHeterogeneousMode - DeviceSetVgpuSchedulerState = libnvml.DeviceSetVgpuSchedulerState - DeviceSetVirtualizationMode = libnvml.DeviceSetVirtualizationMode - DeviceValidateInforom = libnvml.DeviceValidateInforom - ErrorString = libnvml.ErrorString - EventSetCreate = libnvml.EventSetCreate - EventSetFree = libnvml.EventSetFree - EventSetWait = libnvml.EventSetWait - Extensions = libnvml.Extensions - GetExcludedDeviceCount = libnvml.GetExcludedDeviceCount - GetExcludedDeviceInfoByIndex = libnvml.GetExcludedDeviceInfoByIndex - GetVgpuCompatibility = libnvml.GetVgpuCompatibility - GetVgpuDriverCapabilities = libnvml.GetVgpuDriverCapabilities - GetVgpuVersion = libnvml.GetVgpuVersion - GpmMetricsGet = libnvml.GpmMetricsGet - GpmMetricsGetV = libnvml.GpmMetricsGetV - GpmMigSampleGet = libnvml.GpmMigSampleGet - GpmQueryDeviceSupport = libnvml.GpmQueryDeviceSupport - GpmQueryDeviceSupportV = libnvml.GpmQueryDeviceSupportV - GpmQueryIfStreamingEnabled = libnvml.GpmQueryIfStreamingEnabled - GpmSampleAlloc = libnvml.GpmSampleAlloc - GpmSampleFree = libnvml.GpmSampleFree - GpmSampleGet = libnvml.GpmSampleGet - GpmSetStreamingEnabled = libnvml.GpmSetStreamingEnabled - GpuInstanceCreateComputeInstance = libnvml.GpuInstanceCreateComputeInstance - GpuInstanceCreateComputeInstanceWithPlacement = libnvml.GpuInstanceCreateComputeInstanceWithPlacement - GpuInstanceDestroy = libnvml.GpuInstanceDestroy - GpuInstanceGetComputeInstanceById = libnvml.GpuInstanceGetComputeInstanceById - GpuInstanceGetComputeInstancePossiblePlacements = libnvml.GpuInstanceGetComputeInstancePossiblePlacements - GpuInstanceGetComputeInstanceProfileInfo = libnvml.GpuInstanceGetComputeInstanceProfileInfo - GpuInstanceGetComputeInstanceProfileInfoV = libnvml.GpuInstanceGetComputeInstanceProfileInfoV - GpuInstanceGetComputeInstanceRemainingCapacity = libnvml.GpuInstanceGetComputeInstanceRemainingCapacity - GpuInstanceGetComputeInstances = libnvml.GpuInstanceGetComputeInstances - GpuInstanceGetInfo = libnvml.GpuInstanceGetInfo - Init = libnvml.Init - InitWithFlags = libnvml.InitWithFlags - SetVgpuVersion = libnvml.SetVgpuVersion - Shutdown = libnvml.Shutdown - SystemGetConfComputeCapabilities = libnvml.SystemGetConfComputeCapabilities - SystemGetConfComputeKeyRotationThresholdInfo = libnvml.SystemGetConfComputeKeyRotationThresholdInfo - SystemGetConfComputeSettings = libnvml.SystemGetConfComputeSettings - SystemGetCudaDriverVersion = libnvml.SystemGetCudaDriverVersion - SystemGetCudaDriverVersion_v2 = libnvml.SystemGetCudaDriverVersion_v2 - SystemGetDriverVersion = libnvml.SystemGetDriverVersion - SystemGetHicVersion = libnvml.SystemGetHicVersion - SystemGetNVMLVersion = libnvml.SystemGetNVMLVersion - SystemGetProcessName = libnvml.SystemGetProcessName - SystemGetTopologyGpuSet = libnvml.SystemGetTopologyGpuSet - SystemSetConfComputeKeyRotationThresholdInfo = libnvml.SystemSetConfComputeKeyRotationThresholdInfo - UnitGetCount = libnvml.UnitGetCount - UnitGetDevices = libnvml.UnitGetDevices - UnitGetFanSpeedInfo = libnvml.UnitGetFanSpeedInfo - UnitGetHandleByIndex = libnvml.UnitGetHandleByIndex - UnitGetLedState = libnvml.UnitGetLedState - UnitGetPsuInfo = libnvml.UnitGetPsuInfo - UnitGetTemperature = libnvml.UnitGetTemperature - UnitGetUnitInfo = libnvml.UnitGetUnitInfo - UnitSetLedState = libnvml.UnitSetLedState - VgpuInstanceClearAccountingPids = libnvml.VgpuInstanceClearAccountingPids - VgpuInstanceGetAccountingMode = libnvml.VgpuInstanceGetAccountingMode - VgpuInstanceGetAccountingPids = libnvml.VgpuInstanceGetAccountingPids - VgpuInstanceGetAccountingStats = libnvml.VgpuInstanceGetAccountingStats - VgpuInstanceGetEccMode = libnvml.VgpuInstanceGetEccMode - VgpuInstanceGetEncoderCapacity = libnvml.VgpuInstanceGetEncoderCapacity - VgpuInstanceGetEncoderSessions = libnvml.VgpuInstanceGetEncoderSessions - VgpuInstanceGetEncoderStats = libnvml.VgpuInstanceGetEncoderStats - VgpuInstanceGetFBCSessions = libnvml.VgpuInstanceGetFBCSessions - VgpuInstanceGetFBCStats = libnvml.VgpuInstanceGetFBCStats - VgpuInstanceGetFbUsage = libnvml.VgpuInstanceGetFbUsage - VgpuInstanceGetFrameRateLimit = libnvml.VgpuInstanceGetFrameRateLimit - VgpuInstanceGetGpuInstanceId = libnvml.VgpuInstanceGetGpuInstanceId - VgpuInstanceGetGpuPciId = libnvml.VgpuInstanceGetGpuPciId - VgpuInstanceGetLicenseInfo = libnvml.VgpuInstanceGetLicenseInfo - VgpuInstanceGetLicenseStatus = libnvml.VgpuInstanceGetLicenseStatus - VgpuInstanceGetMdevUUID = libnvml.VgpuInstanceGetMdevUUID - VgpuInstanceGetMetadata = libnvml.VgpuInstanceGetMetadata - VgpuInstanceGetType = libnvml.VgpuInstanceGetType - VgpuInstanceGetUUID = libnvml.VgpuInstanceGetUUID - VgpuInstanceGetVmDriverVersion = libnvml.VgpuInstanceGetVmDriverVersion - VgpuInstanceGetVmID = libnvml.VgpuInstanceGetVmID - VgpuInstanceSetEncoderCapacity = libnvml.VgpuInstanceSetEncoderCapacity - VgpuTypeGetCapabilities = libnvml.VgpuTypeGetCapabilities - VgpuTypeGetClass = libnvml.VgpuTypeGetClass - VgpuTypeGetDeviceID = libnvml.VgpuTypeGetDeviceID - VgpuTypeGetFrameRateLimit = libnvml.VgpuTypeGetFrameRateLimit - VgpuTypeGetFramebufferSize = libnvml.VgpuTypeGetFramebufferSize - VgpuTypeGetGpuInstanceProfileId = libnvml.VgpuTypeGetGpuInstanceProfileId - VgpuTypeGetLicense = libnvml.VgpuTypeGetLicense - VgpuTypeGetMaxInstances = libnvml.VgpuTypeGetMaxInstances - VgpuTypeGetMaxInstancesPerVm = libnvml.VgpuTypeGetMaxInstancesPerVm - VgpuTypeGetName = libnvml.VgpuTypeGetName - VgpuTypeGetNumDisplayHeads = libnvml.VgpuTypeGetNumDisplayHeads - VgpuTypeGetResolution = libnvml.VgpuTypeGetResolution + ComputeInstanceDestroy = libnvml.ComputeInstanceDestroy + ComputeInstanceGetInfo = libnvml.ComputeInstanceGetInfo + DeviceClearAccountingPids = libnvml.DeviceClearAccountingPids + DeviceClearCpuAffinity = libnvml.DeviceClearCpuAffinity + DeviceClearEccErrorCounts = libnvml.DeviceClearEccErrorCounts + DeviceClearFieldValues = libnvml.DeviceClearFieldValues + DeviceCreateGpuInstance = libnvml.DeviceCreateGpuInstance + DeviceCreateGpuInstanceWithPlacement = libnvml.DeviceCreateGpuInstanceWithPlacement + DeviceDiscoverGpus = libnvml.DeviceDiscoverGpus + DeviceFreezeNvLinkUtilizationCounter = libnvml.DeviceFreezeNvLinkUtilizationCounter + DeviceGetAPIRestriction = libnvml.DeviceGetAPIRestriction + DeviceGetAccountingBufferSize = libnvml.DeviceGetAccountingBufferSize + DeviceGetAccountingMode = libnvml.DeviceGetAccountingMode + DeviceGetAccountingPids = libnvml.DeviceGetAccountingPids + DeviceGetAccountingStats = libnvml.DeviceGetAccountingStats + DeviceGetActiveVgpus = libnvml.DeviceGetActiveVgpus + DeviceGetAdaptiveClockInfoStatus = libnvml.DeviceGetAdaptiveClockInfoStatus + DeviceGetApplicationsClock = libnvml.DeviceGetApplicationsClock + DeviceGetArchitecture = libnvml.DeviceGetArchitecture + DeviceGetAttributes = libnvml.DeviceGetAttributes + DeviceGetAutoBoostedClocksEnabled = libnvml.DeviceGetAutoBoostedClocksEnabled + DeviceGetBAR1MemoryInfo = libnvml.DeviceGetBAR1MemoryInfo + DeviceGetBoardId = libnvml.DeviceGetBoardId + DeviceGetBoardPartNumber = libnvml.DeviceGetBoardPartNumber + DeviceGetBrand = libnvml.DeviceGetBrand + DeviceGetBridgeChipInfo = libnvml.DeviceGetBridgeChipInfo + DeviceGetBusType = libnvml.DeviceGetBusType + DeviceGetC2cModeInfoV = libnvml.DeviceGetC2cModeInfoV + DeviceGetCapabilities = libnvml.DeviceGetCapabilities + DeviceGetClkMonStatus = libnvml.DeviceGetClkMonStatus + DeviceGetClock = libnvml.DeviceGetClock + DeviceGetClockInfo = libnvml.DeviceGetClockInfo + DeviceGetClockOffsets = libnvml.DeviceGetClockOffsets + DeviceGetComputeInstanceId = libnvml.DeviceGetComputeInstanceId + DeviceGetComputeMode = libnvml.DeviceGetComputeMode + DeviceGetComputeRunningProcesses = libnvml.DeviceGetComputeRunningProcesses + DeviceGetConfComputeGpuAttestationReport = libnvml.DeviceGetConfComputeGpuAttestationReport + DeviceGetConfComputeGpuCertificate = libnvml.DeviceGetConfComputeGpuCertificate + DeviceGetConfComputeMemSizeInfo = libnvml.DeviceGetConfComputeMemSizeInfo + DeviceGetConfComputeProtectedMemoryUsage = libnvml.DeviceGetConfComputeProtectedMemoryUsage + DeviceGetCoolerInfo = libnvml.DeviceGetCoolerInfo + DeviceGetCount = libnvml.DeviceGetCount + DeviceGetCpuAffinity = libnvml.DeviceGetCpuAffinity + DeviceGetCpuAffinityWithinScope = libnvml.DeviceGetCpuAffinityWithinScope + DeviceGetCreatableVgpus = libnvml.DeviceGetCreatableVgpus + DeviceGetCudaComputeCapability = libnvml.DeviceGetCudaComputeCapability + DeviceGetCurrPcieLinkGeneration = libnvml.DeviceGetCurrPcieLinkGeneration + DeviceGetCurrPcieLinkWidth = libnvml.DeviceGetCurrPcieLinkWidth + DeviceGetCurrentClockFreqs = libnvml.DeviceGetCurrentClockFreqs + DeviceGetCurrentClocksEventReasons = libnvml.DeviceGetCurrentClocksEventReasons + DeviceGetCurrentClocksThrottleReasons = libnvml.DeviceGetCurrentClocksThrottleReasons + DeviceGetDecoderUtilization = libnvml.DeviceGetDecoderUtilization + DeviceGetDefaultApplicationsClock = libnvml.DeviceGetDefaultApplicationsClock + DeviceGetDefaultEccMode = libnvml.DeviceGetDefaultEccMode + DeviceGetDetailedEccErrors = libnvml.DeviceGetDetailedEccErrors + DeviceGetDeviceHandleFromMigDeviceHandle = libnvml.DeviceGetDeviceHandleFromMigDeviceHandle + DeviceGetDisplayActive = libnvml.DeviceGetDisplayActive + DeviceGetDisplayMode = libnvml.DeviceGetDisplayMode + DeviceGetDramEncryptionMode = libnvml.DeviceGetDramEncryptionMode + DeviceGetDriverModel = libnvml.DeviceGetDriverModel + DeviceGetDriverModel_v2 = libnvml.DeviceGetDriverModel_v2 + DeviceGetDynamicPstatesInfo = libnvml.DeviceGetDynamicPstatesInfo + DeviceGetEccMode = libnvml.DeviceGetEccMode + DeviceGetEncoderCapacity = libnvml.DeviceGetEncoderCapacity + DeviceGetEncoderSessions = libnvml.DeviceGetEncoderSessions + DeviceGetEncoderStats = libnvml.DeviceGetEncoderStats + DeviceGetEncoderUtilization = libnvml.DeviceGetEncoderUtilization + DeviceGetEnforcedPowerLimit = libnvml.DeviceGetEnforcedPowerLimit + DeviceGetFBCSessions = libnvml.DeviceGetFBCSessions + DeviceGetFBCStats = libnvml.DeviceGetFBCStats + DeviceGetFanControlPolicy_v2 = libnvml.DeviceGetFanControlPolicy_v2 + DeviceGetFanSpeed = libnvml.DeviceGetFanSpeed + DeviceGetFanSpeedRPM = libnvml.DeviceGetFanSpeedRPM + DeviceGetFanSpeed_v2 = libnvml.DeviceGetFanSpeed_v2 + DeviceGetFieldValues = libnvml.DeviceGetFieldValues + DeviceGetGpcClkMinMaxVfOffset = libnvml.DeviceGetGpcClkMinMaxVfOffset + DeviceGetGpcClkVfOffset = libnvml.DeviceGetGpcClkVfOffset + DeviceGetGpuFabricInfo = libnvml.DeviceGetGpuFabricInfo + DeviceGetGpuFabricInfoV = libnvml.DeviceGetGpuFabricInfoV + DeviceGetGpuInstanceById = libnvml.DeviceGetGpuInstanceById + DeviceGetGpuInstanceId = libnvml.DeviceGetGpuInstanceId + DeviceGetGpuInstancePossiblePlacements = libnvml.DeviceGetGpuInstancePossiblePlacements + DeviceGetGpuInstanceProfileInfo = libnvml.DeviceGetGpuInstanceProfileInfo + DeviceGetGpuInstanceProfileInfoV = libnvml.DeviceGetGpuInstanceProfileInfoV + DeviceGetGpuInstanceRemainingCapacity = libnvml.DeviceGetGpuInstanceRemainingCapacity + DeviceGetGpuInstances = libnvml.DeviceGetGpuInstances + DeviceGetGpuMaxPcieLinkGeneration = libnvml.DeviceGetGpuMaxPcieLinkGeneration + DeviceGetGpuOperationMode = libnvml.DeviceGetGpuOperationMode + DeviceGetGraphicsRunningProcesses = libnvml.DeviceGetGraphicsRunningProcesses + DeviceGetGridLicensableFeatures = libnvml.DeviceGetGridLicensableFeatures + DeviceGetGspFirmwareMode = libnvml.DeviceGetGspFirmwareMode + DeviceGetGspFirmwareVersion = libnvml.DeviceGetGspFirmwareVersion + DeviceGetHandleByIndex = libnvml.DeviceGetHandleByIndex + DeviceGetHandleByPciBusId = libnvml.DeviceGetHandleByPciBusId + DeviceGetHandleBySerial = libnvml.DeviceGetHandleBySerial + DeviceGetHandleByUUID = libnvml.DeviceGetHandleByUUID + DeviceGetHandleByUUIDV = libnvml.DeviceGetHandleByUUIDV + DeviceGetHostVgpuMode = libnvml.DeviceGetHostVgpuMode + DeviceGetIndex = libnvml.DeviceGetIndex + DeviceGetInforomConfigurationChecksum = libnvml.DeviceGetInforomConfigurationChecksum + DeviceGetInforomImageVersion = libnvml.DeviceGetInforomImageVersion + DeviceGetInforomVersion = libnvml.DeviceGetInforomVersion + DeviceGetIrqNum = libnvml.DeviceGetIrqNum + DeviceGetJpgUtilization = libnvml.DeviceGetJpgUtilization + DeviceGetLastBBXFlushTime = libnvml.DeviceGetLastBBXFlushTime + DeviceGetMPSComputeRunningProcesses = libnvml.DeviceGetMPSComputeRunningProcesses + DeviceGetMarginTemperature = libnvml.DeviceGetMarginTemperature + DeviceGetMaxClockInfo = libnvml.DeviceGetMaxClockInfo + DeviceGetMaxCustomerBoostClock = libnvml.DeviceGetMaxCustomerBoostClock + DeviceGetMaxMigDeviceCount = libnvml.DeviceGetMaxMigDeviceCount + DeviceGetMaxPcieLinkGeneration = libnvml.DeviceGetMaxPcieLinkGeneration + DeviceGetMaxPcieLinkWidth = libnvml.DeviceGetMaxPcieLinkWidth + DeviceGetMemClkMinMaxVfOffset = libnvml.DeviceGetMemClkMinMaxVfOffset + DeviceGetMemClkVfOffset = libnvml.DeviceGetMemClkVfOffset + DeviceGetMemoryAffinity = libnvml.DeviceGetMemoryAffinity + DeviceGetMemoryBusWidth = libnvml.DeviceGetMemoryBusWidth + DeviceGetMemoryErrorCounter = libnvml.DeviceGetMemoryErrorCounter + DeviceGetMemoryInfo = libnvml.DeviceGetMemoryInfo + DeviceGetMemoryInfo_v2 = libnvml.DeviceGetMemoryInfo_v2 + DeviceGetMigDeviceHandleByIndex = libnvml.DeviceGetMigDeviceHandleByIndex + DeviceGetMigMode = libnvml.DeviceGetMigMode + DeviceGetMinMaxClockOfPState = libnvml.DeviceGetMinMaxClockOfPState + DeviceGetMinMaxFanSpeed = libnvml.DeviceGetMinMaxFanSpeed + DeviceGetMinorNumber = libnvml.DeviceGetMinorNumber + DeviceGetModuleId = libnvml.DeviceGetModuleId + DeviceGetMultiGpuBoard = libnvml.DeviceGetMultiGpuBoard + DeviceGetName = libnvml.DeviceGetName + DeviceGetNumFans = libnvml.DeviceGetNumFans + DeviceGetNumGpuCores = libnvml.DeviceGetNumGpuCores + DeviceGetNumaNodeId = libnvml.DeviceGetNumaNodeId + DeviceGetNvLinkCapability = libnvml.DeviceGetNvLinkCapability + DeviceGetNvLinkErrorCounter = libnvml.DeviceGetNvLinkErrorCounter + DeviceGetNvLinkRemoteDeviceType = libnvml.DeviceGetNvLinkRemoteDeviceType + DeviceGetNvLinkRemotePciInfo = libnvml.DeviceGetNvLinkRemotePciInfo + DeviceGetNvLinkState = libnvml.DeviceGetNvLinkState + DeviceGetNvLinkUtilizationControl = libnvml.DeviceGetNvLinkUtilizationControl + DeviceGetNvLinkUtilizationCounter = libnvml.DeviceGetNvLinkUtilizationCounter + DeviceGetNvLinkVersion = libnvml.DeviceGetNvLinkVersion + DeviceGetNvlinkBwMode = libnvml.DeviceGetNvlinkBwMode + DeviceGetNvlinkSupportedBwModes = libnvml.DeviceGetNvlinkSupportedBwModes + DeviceGetOfaUtilization = libnvml.DeviceGetOfaUtilization + DeviceGetP2PStatus = libnvml.DeviceGetP2PStatus + DeviceGetPciInfo = libnvml.DeviceGetPciInfo + DeviceGetPciInfoExt = libnvml.DeviceGetPciInfoExt + DeviceGetPcieLinkMaxSpeed = libnvml.DeviceGetPcieLinkMaxSpeed + DeviceGetPcieReplayCounter = libnvml.DeviceGetPcieReplayCounter + DeviceGetPcieSpeed = libnvml.DeviceGetPcieSpeed + DeviceGetPcieThroughput = libnvml.DeviceGetPcieThroughput + DeviceGetPerformanceModes = libnvml.DeviceGetPerformanceModes + DeviceGetPerformanceState = libnvml.DeviceGetPerformanceState + DeviceGetPersistenceMode = libnvml.DeviceGetPersistenceMode + DeviceGetPgpuMetadataString = libnvml.DeviceGetPgpuMetadataString + DeviceGetPlatformInfo = libnvml.DeviceGetPlatformInfo + DeviceGetPowerManagementDefaultLimit = libnvml.DeviceGetPowerManagementDefaultLimit + DeviceGetPowerManagementLimit = libnvml.DeviceGetPowerManagementLimit + DeviceGetPowerManagementLimitConstraints = libnvml.DeviceGetPowerManagementLimitConstraints + DeviceGetPowerManagementMode = libnvml.DeviceGetPowerManagementMode + DeviceGetPowerSource = libnvml.DeviceGetPowerSource + DeviceGetPowerState = libnvml.DeviceGetPowerState + DeviceGetPowerUsage = libnvml.DeviceGetPowerUsage + DeviceGetProcessUtilization = libnvml.DeviceGetProcessUtilization + DeviceGetProcessesUtilizationInfo = libnvml.DeviceGetProcessesUtilizationInfo + DeviceGetRemappedRows = libnvml.DeviceGetRemappedRows + DeviceGetRetiredPages = libnvml.DeviceGetRetiredPages + DeviceGetRetiredPagesPendingStatus = libnvml.DeviceGetRetiredPagesPendingStatus + DeviceGetRetiredPages_v2 = libnvml.DeviceGetRetiredPages_v2 + DeviceGetRowRemapperHistogram = libnvml.DeviceGetRowRemapperHistogram + DeviceGetRunningProcessDetailList = libnvml.DeviceGetRunningProcessDetailList + DeviceGetSamples = libnvml.DeviceGetSamples + DeviceGetSerial = libnvml.DeviceGetSerial + DeviceGetSramEccErrorStatus = libnvml.DeviceGetSramEccErrorStatus + DeviceGetSupportedClocksEventReasons = libnvml.DeviceGetSupportedClocksEventReasons + DeviceGetSupportedClocksThrottleReasons = libnvml.DeviceGetSupportedClocksThrottleReasons + DeviceGetSupportedEventTypes = libnvml.DeviceGetSupportedEventTypes + DeviceGetSupportedGraphicsClocks = libnvml.DeviceGetSupportedGraphicsClocks + DeviceGetSupportedMemoryClocks = libnvml.DeviceGetSupportedMemoryClocks + DeviceGetSupportedPerformanceStates = libnvml.DeviceGetSupportedPerformanceStates + DeviceGetSupportedVgpus = libnvml.DeviceGetSupportedVgpus + DeviceGetTargetFanSpeed = libnvml.DeviceGetTargetFanSpeed + DeviceGetTemperature = libnvml.DeviceGetTemperature + DeviceGetTemperatureThreshold = libnvml.DeviceGetTemperatureThreshold + DeviceGetTemperatureV = libnvml.DeviceGetTemperatureV + DeviceGetThermalSettings = libnvml.DeviceGetThermalSettings + DeviceGetTopologyCommonAncestor = libnvml.DeviceGetTopologyCommonAncestor + DeviceGetTopologyNearestGpus = libnvml.DeviceGetTopologyNearestGpus + DeviceGetTotalEccErrors = libnvml.DeviceGetTotalEccErrors + DeviceGetTotalEnergyConsumption = libnvml.DeviceGetTotalEnergyConsumption + DeviceGetUUID = libnvml.DeviceGetUUID + DeviceGetUtilizationRates = libnvml.DeviceGetUtilizationRates + DeviceGetVbiosVersion = libnvml.DeviceGetVbiosVersion + DeviceGetVgpuCapabilities = libnvml.DeviceGetVgpuCapabilities + DeviceGetVgpuHeterogeneousMode = libnvml.DeviceGetVgpuHeterogeneousMode + DeviceGetVgpuInstancesUtilizationInfo = libnvml.DeviceGetVgpuInstancesUtilizationInfo + DeviceGetVgpuMetadata = libnvml.DeviceGetVgpuMetadata + DeviceGetVgpuProcessUtilization = libnvml.DeviceGetVgpuProcessUtilization + DeviceGetVgpuProcessesUtilizationInfo = libnvml.DeviceGetVgpuProcessesUtilizationInfo + DeviceGetVgpuSchedulerCapabilities = libnvml.DeviceGetVgpuSchedulerCapabilities + DeviceGetVgpuSchedulerLog = libnvml.DeviceGetVgpuSchedulerLog + DeviceGetVgpuSchedulerState = libnvml.DeviceGetVgpuSchedulerState + DeviceGetVgpuTypeCreatablePlacements = libnvml.DeviceGetVgpuTypeCreatablePlacements + DeviceGetVgpuTypeSupportedPlacements = libnvml.DeviceGetVgpuTypeSupportedPlacements + DeviceGetVgpuUtilization = libnvml.DeviceGetVgpuUtilization + DeviceGetViolationStatus = libnvml.DeviceGetViolationStatus + DeviceGetVirtualizationMode = libnvml.DeviceGetVirtualizationMode + DeviceIsMigDeviceHandle = libnvml.DeviceIsMigDeviceHandle + DeviceModifyDrainState = libnvml.DeviceModifyDrainState + DeviceOnSameBoard = libnvml.DeviceOnSameBoard + DevicePowerSmoothingActivatePresetProfile = libnvml.DevicePowerSmoothingActivatePresetProfile + DevicePowerSmoothingSetState = libnvml.DevicePowerSmoothingSetState + DevicePowerSmoothingUpdatePresetProfileParam = libnvml.DevicePowerSmoothingUpdatePresetProfileParam + DeviceQueryDrainState = libnvml.DeviceQueryDrainState + DeviceRegisterEvents = libnvml.DeviceRegisterEvents + DeviceRemoveGpu = libnvml.DeviceRemoveGpu + DeviceRemoveGpu_v2 = libnvml.DeviceRemoveGpu_v2 + DeviceResetApplicationsClocks = libnvml.DeviceResetApplicationsClocks + DeviceResetGpuLockedClocks = libnvml.DeviceResetGpuLockedClocks + DeviceResetMemoryLockedClocks = libnvml.DeviceResetMemoryLockedClocks + DeviceResetNvLinkErrorCounters = libnvml.DeviceResetNvLinkErrorCounters + DeviceResetNvLinkUtilizationCounter = libnvml.DeviceResetNvLinkUtilizationCounter + DeviceSetAPIRestriction = libnvml.DeviceSetAPIRestriction + DeviceSetAccountingMode = libnvml.DeviceSetAccountingMode + DeviceSetApplicationsClocks = libnvml.DeviceSetApplicationsClocks + DeviceSetAutoBoostedClocksEnabled = libnvml.DeviceSetAutoBoostedClocksEnabled + DeviceSetClockOffsets = libnvml.DeviceSetClockOffsets + DeviceSetComputeMode = libnvml.DeviceSetComputeMode + DeviceSetConfComputeUnprotectedMemSize = libnvml.DeviceSetConfComputeUnprotectedMemSize + DeviceSetCpuAffinity = libnvml.DeviceSetCpuAffinity + DeviceSetDefaultAutoBoostedClocksEnabled = libnvml.DeviceSetDefaultAutoBoostedClocksEnabled + DeviceSetDefaultFanSpeed_v2 = libnvml.DeviceSetDefaultFanSpeed_v2 + DeviceSetDramEncryptionMode = libnvml.DeviceSetDramEncryptionMode + DeviceSetDriverModel = libnvml.DeviceSetDriverModel + DeviceSetEccMode = libnvml.DeviceSetEccMode + DeviceSetFanControlPolicy = libnvml.DeviceSetFanControlPolicy + DeviceSetFanSpeed_v2 = libnvml.DeviceSetFanSpeed_v2 + DeviceSetGpcClkVfOffset = libnvml.DeviceSetGpcClkVfOffset + DeviceSetGpuLockedClocks = libnvml.DeviceSetGpuLockedClocks + DeviceSetGpuOperationMode = libnvml.DeviceSetGpuOperationMode + DeviceSetMemClkVfOffset = libnvml.DeviceSetMemClkVfOffset + DeviceSetMemoryLockedClocks = libnvml.DeviceSetMemoryLockedClocks + DeviceSetMigMode = libnvml.DeviceSetMigMode + DeviceSetNvLinkDeviceLowPowerThreshold = libnvml.DeviceSetNvLinkDeviceLowPowerThreshold + DeviceSetNvLinkUtilizationControl = libnvml.DeviceSetNvLinkUtilizationControl + DeviceSetNvlinkBwMode = libnvml.DeviceSetNvlinkBwMode + DeviceSetPersistenceMode = libnvml.DeviceSetPersistenceMode + DeviceSetPowerManagementLimit = libnvml.DeviceSetPowerManagementLimit + DeviceSetPowerManagementLimit_v2 = libnvml.DeviceSetPowerManagementLimit_v2 + DeviceSetTemperatureThreshold = libnvml.DeviceSetTemperatureThreshold + DeviceSetVgpuCapabilities = libnvml.DeviceSetVgpuCapabilities + DeviceSetVgpuHeterogeneousMode = libnvml.DeviceSetVgpuHeterogeneousMode + DeviceSetVgpuSchedulerState = libnvml.DeviceSetVgpuSchedulerState + DeviceSetVirtualizationMode = libnvml.DeviceSetVirtualizationMode + DeviceValidateInforom = libnvml.DeviceValidateInforom + DeviceWorkloadPowerProfileClearRequestedProfiles = libnvml.DeviceWorkloadPowerProfileClearRequestedProfiles + DeviceWorkloadPowerProfileGetCurrentProfiles = libnvml.DeviceWorkloadPowerProfileGetCurrentProfiles + DeviceWorkloadPowerProfileGetProfilesInfo = libnvml.DeviceWorkloadPowerProfileGetProfilesInfo + DeviceWorkloadPowerProfileSetRequestedProfiles = libnvml.DeviceWorkloadPowerProfileSetRequestedProfiles + ErrorString = libnvml.ErrorString + EventSetCreate = libnvml.EventSetCreate + EventSetFree = libnvml.EventSetFree + EventSetWait = libnvml.EventSetWait + Extensions = libnvml.Extensions + GetExcludedDeviceCount = libnvml.GetExcludedDeviceCount + GetExcludedDeviceInfoByIndex = libnvml.GetExcludedDeviceInfoByIndex + GetVgpuCompatibility = libnvml.GetVgpuCompatibility + GetVgpuDriverCapabilities = libnvml.GetVgpuDriverCapabilities + GetVgpuVersion = libnvml.GetVgpuVersion + GpmMetricsGet = libnvml.GpmMetricsGet + GpmMetricsGetV = libnvml.GpmMetricsGetV + GpmMigSampleGet = libnvml.GpmMigSampleGet + GpmQueryDeviceSupport = libnvml.GpmQueryDeviceSupport + GpmQueryDeviceSupportV = libnvml.GpmQueryDeviceSupportV + GpmQueryIfStreamingEnabled = libnvml.GpmQueryIfStreamingEnabled + GpmSampleAlloc = libnvml.GpmSampleAlloc + GpmSampleFree = libnvml.GpmSampleFree + GpmSampleGet = libnvml.GpmSampleGet + GpmSetStreamingEnabled = libnvml.GpmSetStreamingEnabled + GpuInstanceCreateComputeInstance = libnvml.GpuInstanceCreateComputeInstance + GpuInstanceCreateComputeInstanceWithPlacement = libnvml.GpuInstanceCreateComputeInstanceWithPlacement + GpuInstanceDestroy = libnvml.GpuInstanceDestroy + GpuInstanceGetActiveVgpus = libnvml.GpuInstanceGetActiveVgpus + GpuInstanceGetComputeInstanceById = libnvml.GpuInstanceGetComputeInstanceById + GpuInstanceGetComputeInstancePossiblePlacements = libnvml.GpuInstanceGetComputeInstancePossiblePlacements + GpuInstanceGetComputeInstanceProfileInfo = libnvml.GpuInstanceGetComputeInstanceProfileInfo + GpuInstanceGetComputeInstanceProfileInfoV = libnvml.GpuInstanceGetComputeInstanceProfileInfoV + GpuInstanceGetComputeInstanceRemainingCapacity = libnvml.GpuInstanceGetComputeInstanceRemainingCapacity + GpuInstanceGetComputeInstances = libnvml.GpuInstanceGetComputeInstances + GpuInstanceGetCreatableVgpus = libnvml.GpuInstanceGetCreatableVgpus + GpuInstanceGetInfo = libnvml.GpuInstanceGetInfo + GpuInstanceGetVgpuHeterogeneousMode = libnvml.GpuInstanceGetVgpuHeterogeneousMode + GpuInstanceGetVgpuSchedulerLog = libnvml.GpuInstanceGetVgpuSchedulerLog + GpuInstanceGetVgpuSchedulerState = libnvml.GpuInstanceGetVgpuSchedulerState + GpuInstanceGetVgpuTypeCreatablePlacements = libnvml.GpuInstanceGetVgpuTypeCreatablePlacements + GpuInstanceSetVgpuHeterogeneousMode = libnvml.GpuInstanceSetVgpuHeterogeneousMode + GpuInstanceSetVgpuSchedulerState = libnvml.GpuInstanceSetVgpuSchedulerState + Init = libnvml.Init + InitWithFlags = libnvml.InitWithFlags + SetVgpuVersion = libnvml.SetVgpuVersion + Shutdown = libnvml.Shutdown + SystemEventSetCreate = libnvml.SystemEventSetCreate + SystemEventSetFree = libnvml.SystemEventSetFree + SystemEventSetWait = libnvml.SystemEventSetWait + SystemGetConfComputeCapabilities = libnvml.SystemGetConfComputeCapabilities + SystemGetConfComputeGpusReadyState = libnvml.SystemGetConfComputeGpusReadyState + SystemGetConfComputeKeyRotationThresholdInfo = libnvml.SystemGetConfComputeKeyRotationThresholdInfo + SystemGetConfComputeSettings = libnvml.SystemGetConfComputeSettings + SystemGetConfComputeState = libnvml.SystemGetConfComputeState + SystemGetCudaDriverVersion = libnvml.SystemGetCudaDriverVersion + SystemGetCudaDriverVersion_v2 = libnvml.SystemGetCudaDriverVersion_v2 + SystemGetDriverBranch = libnvml.SystemGetDriverBranch + SystemGetDriverVersion = libnvml.SystemGetDriverVersion + SystemGetHicVersion = libnvml.SystemGetHicVersion + SystemGetNVMLVersion = libnvml.SystemGetNVMLVersion + SystemGetNvlinkBwMode = libnvml.SystemGetNvlinkBwMode + SystemGetProcessName = libnvml.SystemGetProcessName + SystemGetTopologyGpuSet = libnvml.SystemGetTopologyGpuSet + SystemRegisterEvents = libnvml.SystemRegisterEvents + SystemSetConfComputeGpusReadyState = libnvml.SystemSetConfComputeGpusReadyState + SystemSetConfComputeKeyRotationThresholdInfo = libnvml.SystemSetConfComputeKeyRotationThresholdInfo + SystemSetNvlinkBwMode = libnvml.SystemSetNvlinkBwMode + UnitGetCount = libnvml.UnitGetCount + UnitGetDevices = libnvml.UnitGetDevices + UnitGetFanSpeedInfo = libnvml.UnitGetFanSpeedInfo + UnitGetHandleByIndex = libnvml.UnitGetHandleByIndex + UnitGetLedState = libnvml.UnitGetLedState + UnitGetPsuInfo = libnvml.UnitGetPsuInfo + UnitGetTemperature = libnvml.UnitGetTemperature + UnitGetUnitInfo = libnvml.UnitGetUnitInfo + UnitSetLedState = libnvml.UnitSetLedState + VgpuInstanceClearAccountingPids = libnvml.VgpuInstanceClearAccountingPids + VgpuInstanceGetAccountingMode = libnvml.VgpuInstanceGetAccountingMode + VgpuInstanceGetAccountingPids = libnvml.VgpuInstanceGetAccountingPids + VgpuInstanceGetAccountingStats = libnvml.VgpuInstanceGetAccountingStats + VgpuInstanceGetEccMode = libnvml.VgpuInstanceGetEccMode + VgpuInstanceGetEncoderCapacity = libnvml.VgpuInstanceGetEncoderCapacity + VgpuInstanceGetEncoderSessions = libnvml.VgpuInstanceGetEncoderSessions + VgpuInstanceGetEncoderStats = libnvml.VgpuInstanceGetEncoderStats + VgpuInstanceGetFBCSessions = libnvml.VgpuInstanceGetFBCSessions + VgpuInstanceGetFBCStats = libnvml.VgpuInstanceGetFBCStats + VgpuInstanceGetFbUsage = libnvml.VgpuInstanceGetFbUsage + VgpuInstanceGetFrameRateLimit = libnvml.VgpuInstanceGetFrameRateLimit + VgpuInstanceGetGpuInstanceId = libnvml.VgpuInstanceGetGpuInstanceId + VgpuInstanceGetGpuPciId = libnvml.VgpuInstanceGetGpuPciId + VgpuInstanceGetLicenseInfo = libnvml.VgpuInstanceGetLicenseInfo + VgpuInstanceGetLicenseStatus = libnvml.VgpuInstanceGetLicenseStatus + VgpuInstanceGetMdevUUID = libnvml.VgpuInstanceGetMdevUUID + VgpuInstanceGetMetadata = libnvml.VgpuInstanceGetMetadata + VgpuInstanceGetRuntimeStateSize = libnvml.VgpuInstanceGetRuntimeStateSize + VgpuInstanceGetType = libnvml.VgpuInstanceGetType + VgpuInstanceGetUUID = libnvml.VgpuInstanceGetUUID + VgpuInstanceGetVmDriverVersion = libnvml.VgpuInstanceGetVmDriverVersion + VgpuInstanceGetVmID = libnvml.VgpuInstanceGetVmID + VgpuInstanceSetEncoderCapacity = libnvml.VgpuInstanceSetEncoderCapacity + VgpuTypeGetBAR1Info = libnvml.VgpuTypeGetBAR1Info + VgpuTypeGetCapabilities = libnvml.VgpuTypeGetCapabilities + VgpuTypeGetClass = libnvml.VgpuTypeGetClass + VgpuTypeGetDeviceID = libnvml.VgpuTypeGetDeviceID + VgpuTypeGetFrameRateLimit = libnvml.VgpuTypeGetFrameRateLimit + VgpuTypeGetFramebufferSize = libnvml.VgpuTypeGetFramebufferSize + VgpuTypeGetGpuInstanceProfileId = libnvml.VgpuTypeGetGpuInstanceProfileId + VgpuTypeGetLicense = libnvml.VgpuTypeGetLicense + VgpuTypeGetMaxInstances = libnvml.VgpuTypeGetMaxInstances + VgpuTypeGetMaxInstancesPerGpuInstance = libnvml.VgpuTypeGetMaxInstancesPerGpuInstance + VgpuTypeGetMaxInstancesPerVm = libnvml.VgpuTypeGetMaxInstancesPerVm + VgpuTypeGetName = libnvml.VgpuTypeGetName + VgpuTypeGetNumDisplayHeads = libnvml.VgpuTypeGetNumDisplayHeads + VgpuTypeGetResolution = libnvml.VgpuTypeGetResolution ) // Interface represents the interface for the library type. @@ -375,9 +420,11 @@ type Interface interface { DeviceGetBridgeChipInfo(Device) (BridgeChipHierarchy, Return) DeviceGetBusType(Device) (BusType, Return) DeviceGetC2cModeInfoV(Device) C2cModeInfoHandler + DeviceGetCapabilities(Device) (DeviceCapabilities, Return) DeviceGetClkMonStatus(Device) (ClkMonStatus, Return) DeviceGetClock(Device, ClockType, ClockId) (uint32, Return) DeviceGetClockInfo(Device, ClockType) (uint32, Return) + DeviceGetClockOffsets(Device) (ClockOffset, Return) DeviceGetComputeInstanceId(Device) (int, Return) DeviceGetComputeMode(Device) (ComputeMode, Return) DeviceGetComputeRunningProcesses(Device) ([]ProcessInfo, Return) @@ -385,6 +432,7 @@ type Interface interface { DeviceGetConfComputeGpuCertificate(Device) (ConfComputeGpuCertificate, Return) DeviceGetConfComputeMemSizeInfo(Device) (ConfComputeMemSizeInfo, Return) DeviceGetConfComputeProtectedMemoryUsage(Device) (Memory, Return) + DeviceGetCoolerInfo(Device) (CoolerInfo, Return) DeviceGetCount() (int, Return) DeviceGetCpuAffinity(Device, int) ([]uint, Return) DeviceGetCpuAffinityWithinScope(Device, int, AffinityScope) ([]uint, Return) @@ -392,6 +440,7 @@ type Interface interface { DeviceGetCudaComputeCapability(Device) (int, int, Return) DeviceGetCurrPcieLinkGeneration(Device) (int, Return) DeviceGetCurrPcieLinkWidth(Device) (int, Return) + DeviceGetCurrentClockFreqs(Device) (DeviceCurrentClockFreqs, Return) DeviceGetCurrentClocksEventReasons(Device) (uint64, Return) DeviceGetCurrentClocksThrottleReasons(Device) (uint64, Return) DeviceGetDecoderUtilization(Device) (uint32, uint32, Return) @@ -401,7 +450,9 @@ type Interface interface { DeviceGetDeviceHandleFromMigDeviceHandle(Device) (Device, Return) DeviceGetDisplayActive(Device) (EnableState, Return) DeviceGetDisplayMode(Device) (EnableState, Return) + DeviceGetDramEncryptionMode(Device) (DramEncryptionInfo, DramEncryptionInfo, Return) DeviceGetDriverModel(Device) (DriverModel, DriverModel, Return) + DeviceGetDriverModel_v2(Device) (DriverModel, DriverModel, Return) DeviceGetDynamicPstatesInfo(Device) (GpuDynamicPstatesInfo, Return) DeviceGetEccMode(Device) (EnableState, EnableState, Return) DeviceGetEncoderCapacity(Device, EncoderType) (int, Return) @@ -413,6 +464,7 @@ type Interface interface { DeviceGetFBCStats(Device) (FBCStats, Return) DeviceGetFanControlPolicy_v2(Device, int) (FanControlPolicy, Return) DeviceGetFanSpeed(Device) (uint32, Return) + DeviceGetFanSpeedRPM(Device) (FanSpeedInfo, Return) DeviceGetFanSpeed_v2(Device, int) (uint32, Return) DeviceGetFieldValues(Device, []FieldValue) Return DeviceGetGpcClkMinMaxVfOffset(Device) (int, int, Return) @@ -436,6 +488,7 @@ type Interface interface { DeviceGetHandleByPciBusId(string) (Device, Return) DeviceGetHandleBySerial(string) (Device, Return) DeviceGetHandleByUUID(string) (Device, Return) + DeviceGetHandleByUUIDV(*UUID) (Device, Return) DeviceGetHostVgpuMode(Device) (HostVgpuMode, Return) DeviceGetIndex(Device) (int, Return) DeviceGetInforomConfigurationChecksum(Device) (uint32, Return) @@ -445,6 +498,7 @@ type Interface interface { DeviceGetJpgUtilization(Device) (uint32, uint32, Return) DeviceGetLastBBXFlushTime(Device) (uint64, uint, Return) DeviceGetMPSComputeRunningProcesses(Device) ([]ProcessInfo, Return) + DeviceGetMarginTemperature(Device) (MarginTemperature, Return) DeviceGetMaxClockInfo(Device, ClockType) (uint32, Return) DeviceGetMaxCustomerBoostClock(Device, ClockType) (uint32, Return) DeviceGetMaxMigDeviceCount(Device) (int, Return) @@ -476,6 +530,8 @@ type Interface interface { DeviceGetNvLinkUtilizationControl(Device, int, int) (NvLinkUtilizationControl, Return) DeviceGetNvLinkUtilizationCounter(Device, int, int) (uint64, uint64, Return) DeviceGetNvLinkVersion(Device, int) (uint32, Return) + DeviceGetNvlinkBwMode(Device) (NvlinkGetBwMode, Return) + DeviceGetNvlinkSupportedBwModes(Device) (NvlinkSupportedBwModes, Return) DeviceGetOfaUtilization(Device) (uint32, uint32, Return) DeviceGetP2PStatus(Device, Device, GpuP2PCapsIndex) (GpuP2PStatus, Return) DeviceGetPciInfo(Device) (PciInfo, Return) @@ -484,9 +540,11 @@ type Interface interface { DeviceGetPcieReplayCounter(Device) (int, Return) DeviceGetPcieSpeed(Device) (int, Return) DeviceGetPcieThroughput(Device, PcieUtilCounter) (uint32, Return) + DeviceGetPerformanceModes(Device) (DevicePerfModes, Return) DeviceGetPerformanceState(Device) (Pstates, Return) DeviceGetPersistenceMode(Device) (EnableState, Return) DeviceGetPgpuMetadataString(Device) (string, Return) + DeviceGetPlatformInfo(Device) (PlatformInfo, Return) DeviceGetPowerManagementDefaultLimit(Device) (uint32, Return) DeviceGetPowerManagementLimit(Device) (uint32, Return) DeviceGetPowerManagementLimitConstraints(Device) (uint32, uint32, Return) @@ -515,6 +573,7 @@ type Interface interface { DeviceGetTargetFanSpeed(Device, int) (int, Return) DeviceGetTemperature(Device, TemperatureSensors) (uint32, Return) DeviceGetTemperatureThreshold(Device, TemperatureThresholds) (uint32, Return) + DeviceGetTemperatureV(Device) TemperatureHandler DeviceGetThermalSettings(Device, uint32) (GpuThermalSettings, Return) DeviceGetTopologyCommonAncestor(Device, Device) (GpuTopologyLevel, Return) DeviceGetTopologyNearestGpus(Device, GpuTopologyLevel) ([]Device, Return) @@ -540,6 +599,9 @@ type Interface interface { DeviceIsMigDeviceHandle(Device) (bool, Return) DeviceModifyDrainState(*PciInfo, EnableState) Return DeviceOnSameBoard(Device, Device) (int, Return) + DevicePowerSmoothingActivatePresetProfile(Device, *PowerSmoothingProfile) Return + DevicePowerSmoothingSetState(Device, *PowerSmoothingState) Return + DevicePowerSmoothingUpdatePresetProfileParam(Device, *PowerSmoothingProfile) Return DeviceQueryDrainState(*PciInfo) (EnableState, Return) DeviceRegisterEvents(Device, uint64, EventSet) Return DeviceRemoveGpu(*PciInfo) Return @@ -553,11 +615,13 @@ type Interface interface { DeviceSetAccountingMode(Device, EnableState) Return DeviceSetApplicationsClocks(Device, uint32, uint32) Return DeviceSetAutoBoostedClocksEnabled(Device, EnableState) Return + DeviceSetClockOffsets(Device, ClockOffset) Return DeviceSetComputeMode(Device, ComputeMode) Return DeviceSetConfComputeUnprotectedMemSize(Device, uint64) Return DeviceSetCpuAffinity(Device) Return DeviceSetDefaultAutoBoostedClocksEnabled(Device, EnableState, uint32) Return DeviceSetDefaultFanSpeed_v2(Device, int) Return + DeviceSetDramEncryptionMode(Device, *DramEncryptionInfo) Return DeviceSetDriverModel(Device, DriverModel, uint32) Return DeviceSetEccMode(Device, EnableState) Return DeviceSetFanControlPolicy(Device, int, FanControlPolicy) Return @@ -570,6 +634,7 @@ type Interface interface { DeviceSetMigMode(Device, int) (Return, Return) DeviceSetNvLinkDeviceLowPowerThreshold(Device, *NvLinkPowerThres) Return DeviceSetNvLinkUtilizationControl(Device, int, int, *NvLinkUtilizationControl, bool) Return + DeviceSetNvlinkBwMode(Device, *NvlinkSetBwMode) Return DeviceSetPersistenceMode(Device, EnableState) Return DeviceSetPowerManagementLimit(Device, uint32) Return DeviceSetPowerManagementLimit_v2(Device, *PowerValue_v2) Return @@ -579,6 +644,10 @@ type Interface interface { DeviceSetVgpuSchedulerState(Device, *VgpuSchedulerSetState) Return DeviceSetVirtualizationMode(Device, GpuVirtualizationMode) Return DeviceValidateInforom(Device) Return + DeviceWorkloadPowerProfileClearRequestedProfiles(Device, *WorkloadPowerProfileRequestedProfiles) Return + DeviceWorkloadPowerProfileGetCurrentProfiles(Device) (WorkloadPowerProfileCurrentProfiles, Return) + DeviceWorkloadPowerProfileGetProfilesInfo(Device) (WorkloadPowerProfileProfilesInfo, Return) + DeviceWorkloadPowerProfileSetRequestedProfiles(Device, *WorkloadPowerProfileRequestedProfiles) Return ErrorString(Return) string EventSetCreate() (EventSet, Return) EventSetFree(EventSet) Return @@ -602,28 +671,46 @@ type Interface interface { GpuInstanceCreateComputeInstance(GpuInstance, *ComputeInstanceProfileInfo) (ComputeInstance, Return) GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, *ComputeInstanceProfileInfo, *ComputeInstancePlacement) (ComputeInstance, Return) GpuInstanceDestroy(GpuInstance) Return + GpuInstanceGetActiveVgpus(GpuInstance) (ActiveVgpuInstanceInfo, Return) GpuInstanceGetComputeInstanceById(GpuInstance, int) (ComputeInstance, Return) GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, *ComputeInstanceProfileInfo) ([]ComputeInstancePlacement, Return) GpuInstanceGetComputeInstanceProfileInfo(GpuInstance, int, int) (ComputeInstanceProfileInfo, Return) GpuInstanceGetComputeInstanceProfileInfoV(GpuInstance, int, int) ComputeInstanceProfileInfoHandler GpuInstanceGetComputeInstanceRemainingCapacity(GpuInstance, *ComputeInstanceProfileInfo) (int, Return) GpuInstanceGetComputeInstances(GpuInstance, *ComputeInstanceProfileInfo) ([]ComputeInstance, Return) + GpuInstanceGetCreatableVgpus(GpuInstance) (VgpuTypeIdInfo, Return) GpuInstanceGetInfo(GpuInstance) (GpuInstanceInfo, Return) + GpuInstanceGetVgpuHeterogeneousMode(GpuInstance) (VgpuHeterogeneousMode, Return) + GpuInstanceGetVgpuSchedulerLog(GpuInstance) (VgpuSchedulerLogInfo, Return) + GpuInstanceGetVgpuSchedulerState(GpuInstance) (VgpuSchedulerStateInfo, Return) + GpuInstanceGetVgpuTypeCreatablePlacements(GpuInstance) (VgpuCreatablePlacementInfo, Return) + GpuInstanceSetVgpuHeterogeneousMode(GpuInstance, *VgpuHeterogeneousMode) Return + GpuInstanceSetVgpuSchedulerState(GpuInstance, *VgpuSchedulerState) Return Init() Return InitWithFlags(uint32) Return SetVgpuVersion(*VgpuVersion) Return Shutdown() Return + SystemEventSetCreate(*SystemEventSetCreateRequest) Return + SystemEventSetFree(*SystemEventSetFreeRequest) Return + SystemEventSetWait(*SystemEventSetWaitRequest) Return SystemGetConfComputeCapabilities() (ConfComputeSystemCaps, Return) + SystemGetConfComputeGpusReadyState() (uint32, Return) SystemGetConfComputeKeyRotationThresholdInfo() (ConfComputeGetKeyRotationThresholdInfo, Return) SystemGetConfComputeSettings() (SystemConfComputeSettings, Return) + SystemGetConfComputeState() (ConfComputeSystemState, Return) SystemGetCudaDriverVersion() (int, Return) SystemGetCudaDriverVersion_v2() (int, Return) + SystemGetDriverBranch() (SystemDriverBranchInfo, Return) SystemGetDriverVersion() (string, Return) SystemGetHicVersion() ([]HwbcEntry, Return) SystemGetNVMLVersion() (string, Return) + SystemGetNvlinkBwMode() (uint32, Return) SystemGetProcessName(int) (string, Return) SystemGetTopologyGpuSet(int) ([]Device, Return) + SystemRegisterEvents(*SystemRegisterEventRequest) Return + SystemSetConfComputeGpusReadyState(uint32) Return SystemSetConfComputeKeyRotationThresholdInfo(ConfComputeSetKeyRotationThresholdInfo) Return + SystemSetNvlinkBwMode(uint32) Return UnitGetCount() (int, Return) UnitGetDevices(Unit) ([]Device, Return) UnitGetFanSpeedInfo(Unit) (UnitFanSpeeds, Return) @@ -651,11 +738,13 @@ type Interface interface { VgpuInstanceGetLicenseStatus(VgpuInstance) (int, Return) VgpuInstanceGetMdevUUID(VgpuInstance) (string, Return) VgpuInstanceGetMetadata(VgpuInstance) (VgpuMetadata, Return) + VgpuInstanceGetRuntimeStateSize(VgpuInstance) (VgpuRuntimeState, Return) VgpuInstanceGetType(VgpuInstance) (VgpuTypeId, Return) VgpuInstanceGetUUID(VgpuInstance) (string, Return) VgpuInstanceGetVmDriverVersion(VgpuInstance) (string, Return) VgpuInstanceGetVmID(VgpuInstance) (string, VgpuVmIdType, Return) VgpuInstanceSetEncoderCapacity(VgpuInstance, int) Return + VgpuTypeGetBAR1Info(VgpuTypeId) (VgpuTypeBar1Info, Return) VgpuTypeGetCapabilities(VgpuTypeId, VgpuCapability) (bool, Return) VgpuTypeGetClass(VgpuTypeId) (string, Return) VgpuTypeGetDeviceID(VgpuTypeId) (uint64, uint64, Return) @@ -664,6 +753,7 @@ type Interface interface { VgpuTypeGetGpuInstanceProfileId(VgpuTypeId) (uint32, Return) VgpuTypeGetLicense(VgpuTypeId) (string, Return) VgpuTypeGetMaxInstances(Device, VgpuTypeId) (int, Return) + VgpuTypeGetMaxInstancesPerGpuInstance(*VgpuTypeMaxInstance) Return VgpuTypeGetMaxInstancesPerVm(VgpuTypeId) (int, Return) VgpuTypeGetName(VgpuTypeId) (string, Return) VgpuTypeGetNumDisplayHeads(VgpuTypeId) (int, Return) @@ -699,9 +789,11 @@ type Device interface { GetBridgeChipInfo() (BridgeChipHierarchy, Return) GetBusType() (BusType, Return) GetC2cModeInfoV() C2cModeInfoHandler + GetCapabilities() (DeviceCapabilities, Return) GetClkMonStatus() (ClkMonStatus, Return) GetClock(ClockType, ClockId) (uint32, Return) GetClockInfo(ClockType) (uint32, Return) + GetClockOffsets() (ClockOffset, Return) GetComputeInstanceId() (int, Return) GetComputeMode() (ComputeMode, Return) GetComputeRunningProcesses() ([]ProcessInfo, Return) @@ -709,12 +801,14 @@ type Device interface { GetConfComputeGpuCertificate() (ConfComputeGpuCertificate, Return) GetConfComputeMemSizeInfo() (ConfComputeMemSizeInfo, Return) GetConfComputeProtectedMemoryUsage() (Memory, Return) + GetCoolerInfo() (CoolerInfo, Return) GetCpuAffinity(int) ([]uint, Return) GetCpuAffinityWithinScope(int, AffinityScope) ([]uint, Return) GetCreatableVgpus() ([]VgpuTypeId, Return) GetCudaComputeCapability() (int, int, Return) GetCurrPcieLinkGeneration() (int, Return) GetCurrPcieLinkWidth() (int, Return) + GetCurrentClockFreqs() (DeviceCurrentClockFreqs, Return) GetCurrentClocksEventReasons() (uint64, Return) GetCurrentClocksThrottleReasons() (uint64, Return) GetDecoderUtilization() (uint32, uint32, Return) @@ -724,7 +818,9 @@ type Device interface { GetDeviceHandleFromMigDeviceHandle() (Device, Return) GetDisplayActive() (EnableState, Return) GetDisplayMode() (EnableState, Return) + GetDramEncryptionMode() (DramEncryptionInfo, DramEncryptionInfo, Return) GetDriverModel() (DriverModel, DriverModel, Return) + GetDriverModel_v2() (DriverModel, DriverModel, Return) GetDynamicPstatesInfo() (GpuDynamicPstatesInfo, Return) GetEccMode() (EnableState, EnableState, Return) GetEncoderCapacity(EncoderType) (int, Return) @@ -736,6 +832,7 @@ type Device interface { GetFBCStats() (FBCStats, Return) GetFanControlPolicy_v2(int) (FanControlPolicy, Return) GetFanSpeed() (uint32, Return) + GetFanSpeedRPM() (FanSpeedInfo, Return) GetFanSpeed_v2(int) (uint32, Return) GetFieldValues([]FieldValue) Return GetGpcClkMinMaxVfOffset() (int, int, Return) @@ -764,6 +861,7 @@ type Device interface { GetJpgUtilization() (uint32, uint32, Return) GetLastBBXFlushTime() (uint64, uint, Return) GetMPSComputeRunningProcesses() ([]ProcessInfo, Return) + GetMarginTemperature() (MarginTemperature, Return) GetMaxClockInfo(ClockType) (uint32, Return) GetMaxCustomerBoostClock(ClockType) (uint32, Return) GetMaxMigDeviceCount() (int, Return) @@ -795,6 +893,8 @@ type Device interface { GetNvLinkUtilizationControl(int, int) (NvLinkUtilizationControl, Return) GetNvLinkUtilizationCounter(int, int) (uint64, uint64, Return) GetNvLinkVersion(int) (uint32, Return) + GetNvlinkBwMode() (NvlinkGetBwMode, Return) + GetNvlinkSupportedBwModes() (NvlinkSupportedBwModes, Return) GetOfaUtilization() (uint32, uint32, Return) GetP2PStatus(Device, GpuP2PCapsIndex) (GpuP2PStatus, Return) GetPciInfo() (PciInfo, Return) @@ -803,9 +903,11 @@ type Device interface { GetPcieReplayCounter() (int, Return) GetPcieSpeed() (int, Return) GetPcieThroughput(PcieUtilCounter) (uint32, Return) + GetPerformanceModes() (DevicePerfModes, Return) GetPerformanceState() (Pstates, Return) GetPersistenceMode() (EnableState, Return) GetPgpuMetadataString() (string, Return) + GetPlatformInfo() (PlatformInfo, Return) GetPowerManagementDefaultLimit() (uint32, Return) GetPowerManagementLimit() (uint32, Return) GetPowerManagementLimitConstraints() (uint32, uint32, Return) @@ -834,6 +936,7 @@ type Device interface { GetTargetFanSpeed(int) (int, Return) GetTemperature(TemperatureSensors) (uint32, Return) GetTemperatureThreshold(TemperatureThresholds) (uint32, Return) + GetTemperatureV() TemperatureHandler GetThermalSettings(uint32) (GpuThermalSettings, Return) GetTopologyCommonAncestor(Device) (GpuTopologyLevel, Return) GetTopologyNearestGpus(GpuTopologyLevel) ([]Device, Return) @@ -864,6 +967,9 @@ type Device interface { GpmSetStreamingEnabled(uint32) Return IsMigDeviceHandle() (bool, Return) OnSameBoard(Device) (int, Return) + PowerSmoothingActivatePresetProfile(*PowerSmoothingProfile) Return + PowerSmoothingSetState(*PowerSmoothingState) Return + PowerSmoothingUpdatePresetProfileParam(*PowerSmoothingProfile) Return RegisterEvents(uint64, EventSet) Return ResetApplicationsClocks() Return ResetGpuLockedClocks() Return @@ -874,11 +980,13 @@ type Device interface { SetAccountingMode(EnableState) Return SetApplicationsClocks(uint32, uint32) Return SetAutoBoostedClocksEnabled(EnableState) Return + SetClockOffsets(ClockOffset) Return SetComputeMode(ComputeMode) Return SetConfComputeUnprotectedMemSize(uint64) Return SetCpuAffinity() Return SetDefaultAutoBoostedClocksEnabled(EnableState, uint32) Return SetDefaultFanSpeed_v2(int) Return + SetDramEncryptionMode(*DramEncryptionInfo) Return SetDriverModel(DriverModel, uint32) Return SetEccMode(EnableState) Return SetFanControlPolicy(int, FanControlPolicy) Return @@ -891,6 +999,7 @@ type Device interface { SetMigMode(int) (Return, Return) SetNvLinkDeviceLowPowerThreshold(*NvLinkPowerThres) Return SetNvLinkUtilizationControl(int, int, *NvLinkUtilizationControl, bool) Return + SetNvlinkBwMode(*NvlinkSetBwMode) Return SetPersistenceMode(EnableState) Return SetPowerManagementLimit(uint32) Return SetPowerManagementLimit_v2(*PowerValue_v2) Return @@ -901,6 +1010,10 @@ type Device interface { SetVirtualizationMode(GpuVirtualizationMode) Return ValidateInforom() Return VgpuTypeGetMaxInstances(VgpuTypeId) (int, Return) + WorkloadPowerProfileClearRequestedProfiles(*WorkloadPowerProfileRequestedProfiles) Return + WorkloadPowerProfileGetCurrentProfiles() (WorkloadPowerProfileCurrentProfiles, Return) + WorkloadPowerProfileGetProfilesInfo() (WorkloadPowerProfileProfilesInfo, Return) + WorkloadPowerProfileSetRequestedProfiles(*WorkloadPowerProfileRequestedProfiles) Return } // GpuInstance represents the interface for the nvmlGpuInstance type. @@ -910,13 +1023,21 @@ type GpuInstance interface { CreateComputeInstance(*ComputeInstanceProfileInfo) (ComputeInstance, Return) CreateComputeInstanceWithPlacement(*ComputeInstanceProfileInfo, *ComputeInstancePlacement) (ComputeInstance, Return) Destroy() Return + GetActiveVgpus() (ActiveVgpuInstanceInfo, Return) GetComputeInstanceById(int) (ComputeInstance, Return) GetComputeInstancePossiblePlacements(*ComputeInstanceProfileInfo) ([]ComputeInstancePlacement, Return) GetComputeInstanceProfileInfo(int, int) (ComputeInstanceProfileInfo, Return) GetComputeInstanceProfileInfoV(int, int) ComputeInstanceProfileInfoHandler GetComputeInstanceRemainingCapacity(*ComputeInstanceProfileInfo) (int, Return) GetComputeInstances(*ComputeInstanceProfileInfo) ([]ComputeInstance, Return) + GetCreatableVgpus() (VgpuTypeIdInfo, Return) GetInfo() (GpuInstanceInfo, Return) + GetVgpuHeterogeneousMode() (VgpuHeterogeneousMode, Return) + GetVgpuSchedulerLog() (VgpuSchedulerLogInfo, Return) + GetVgpuSchedulerState() (VgpuSchedulerStateInfo, Return) + GetVgpuTypeCreatablePlacements() (VgpuCreatablePlacementInfo, Return) + SetVgpuHeterogeneousMode(*VgpuHeterogeneousMode) Return + SetVgpuSchedulerState(*VgpuSchedulerState) Return } // ComputeInstance represents the interface for the nvmlComputeInstance type. @@ -979,6 +1100,7 @@ type VgpuInstance interface { GetLicenseStatus() (int, Return) GetMdevUUID() (string, Return) GetMetadata() (VgpuMetadata, Return) + GetRuntimeStateSize() (VgpuRuntimeState, Return) GetType() (VgpuTypeId, Return) GetUUID() (string, Return) GetVmDriverVersion() (string, Return) @@ -990,6 +1112,7 @@ type VgpuInstance interface { // //go:generate moq -out mock/vgputypeid.go -pkg mock . VgpuTypeId:VgpuTypeId type VgpuTypeId interface { + GetBAR1Info() (VgpuTypeBar1Info, Return) GetCapabilities(VgpuCapability) (bool, Return) GetClass() (string, Return) GetCreatablePlacements(Device) (VgpuPlacementList, Return) diff --git a/vendor/modules.txt b/vendor/modules.txt index 3f955a31c..19faec0c6 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -2,7 +2,7 @@ ## explicit; go 1.20 github.com/NVIDIA/go-gpuallocator/gpuallocator github.com/NVIDIA/go-gpuallocator/internal/links -# github.com/NVIDIA/go-nvlib v0.7.2 +# github.com/NVIDIA/go-nvlib v0.7.3 ## explicit; go 1.20 github.com/NVIDIA/go-nvlib/pkg/nvlib/device github.com/NVIDIA/go-nvlib/pkg/nvlib/info @@ -10,7 +10,7 @@ github.com/NVIDIA/go-nvlib/pkg/nvpci github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio github.com/NVIDIA/go-nvlib/pkg/pciids -# github.com/NVIDIA/go-nvml v0.12.4-1 +# github.com/NVIDIA/go-nvml v0.12.9-0 ## explicit; go 1.20 github.com/NVIDIA/go-nvml/pkg/dl github.com/NVIDIA/go-nvml/pkg/nvml