diff --git a/Makefile b/Makefile
index a6d54aa..df61577 100644
--- a/Makefile
+++ b/Makefile
@@ -16,9 +16,9 @@ DOCKER   ?= docker
 MKDIR    ?= mkdir
 REGISTRY ?= nvidia
 
-DCGM_VERSION   := 1.7.2
+DCGM_VERSION   := 2.0.10
 GOLANG_VERSION := 1.14.2
-VERSION        := 2.0.0-rc.7
+VERSION        := 2.1.0-rc.1
 FULL_VERSION   := $(DCGM_VERSION)-$(VERSION)
 
 .PHONY: all binary install check-format
diff --git a/README.md b/README.md
index 34d7498..0a83eec 100644
--- a/README.md
+++ b/README.md
@@ -4,13 +4,13 @@
 
 This Github repository contains Golang bindings for the following two libraries:
 - [NVIDIA Management Library (NVML)](https://docs.nvidia.com/deploy/nvml-api/nvml-api-reference.html#nvml-api-reference) is a C-based API for monitoring and managing NVIDIA GPU devices.
-- [NVIDIA Data Center GPU Manager (DCGM)](https://developer.nvidia.com/data-center-gpu-manager-dcgm) is a set of tools for managing and monitoring NVIDIA GPUs in cluster environments. It's a low overhead tool suite that performs a variety of functions on each host system including active health monitoring, diagnostics, system validation, policies, power and clock management, group configuration and accounting.
+- [NVIDIA Data Center GPU Manager (DCGM)](https://developer.nvidia.com/dcgm) is a set of tools for managing and monitoring NVIDIA GPUs in cluster environments. It's a low overhead tool suite that performs a variety of functions on each host system including active health monitoring, diagnostics, system validation, policies, power and clock management, group configuration and accounting.
 
 You will also find samples for both of these bindings in this repository.
 
 ## DCGM exporter
 
-This Github repository also contains the DCGM exporter software. It exposes GPU metrics exporter for [Prometheus](https://prometheus.io/) leveraging [NVIDIA Data Center GPU Manager (DCGM)](https://developer.nvidia.com/data-center-gpu-manager-dcgm).
+This Github repository also contains the DCGM exporter software. It exposes GPU metrics exporter for [Prometheus](https://prometheus.io/) leveraging [NVIDIA Data Center GPU Manager (DCGM)](https://developer.nvidia.com/dcgm).
 
 Find the installation and run instructions [here](https://github.com/NVIDIA/gpu-monitoring-tools/blob/master/exporters/prometheus-dcgm/README.md).
 
@@ -60,48 +60,9 @@ DCGM_FI_DEV_MEM_CLOCK{gpu="0", UUID="GPU-604ac76c-d9cf-fef3-62e9-d92044ab6e52",c
 DCGM_FI_DEV_MEMORY_TEMP{gpu="0", UUID="GPU-604ac76c-d9cf-fef3-62e9-d92044ab6e52",container="",namespace="",pod=""} 9223372036854775794
 ...
 
-# If you are using the Prometheus operator
-# Note on exporters here:
-# https://github.com/coreos/prometheus-operator/blob/release-0.38/Documentation/user-guides/running-exporters.md
-
-$ helm repo add stable https://kubernetes-charts.storage.googleapis.com
-$ helm install stable/prometheus-operator --generate-name \
-    --set "prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false"
-$ kubectl create -f \
-    https://raw.githubusercontent.com/NVIDIA/gpu-monitoring-tools/2.0.0-rc.12/service-monitor.yaml
-
-# Note might take ~1-2 minutes for prometheus to pickup the metrics and display them
-# You can also check in the WebUI the servce-discovery tab (in the Status category)
-$ NAME=$(kubectl get svc -l app=prometheus-operator-prometheus -o jsonpath='{.items[0].metadata.name}')
-$ kubectl port-forward $NAME 9090:9090 &
-$ curl -sL http://127.0.01:9090/api/v1/query?query=DCGM_FI_DEV_MEMORY_TEMP"
-{
-	status: "success",
-	data: {
-		resultType: "vector",
-		result: [
-			{
-				metric: {
-					UUID: "GPU-604ac76c-d9cf-fef3-62e9-d92044ab6e52",
-					__name__: "DCGM_FI_DEV_MEMORY_TEMP",
-					__container__: "",
-					__pod__: "",
-					__namespace__: "",
-					...
-					pod: "dcgm-exporter-fn7fm",
-					service: "dcgm-exporter"
-				},
-				value: [
-					1588399049.227,
-					"9223372036854776000"
-				]
-			},
-			...
-		]
-	}
-}
 ```
-
+To integrate `dcgm-exporter` with Prometheus and Grafana, see the full instructions in the [user guide](https://docs.nvidia.com/datacenter/cloud-native/kubernetes/dcgme2e.html#gpu-telemetry). 
+`dcgm-exporter` is deployed as part of the GPU Operator. To get started with integrating with Prometheus, check the Operator [user guide](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/getting-started.html#gpu-telemetry).
 
 ### Building From source and Running on Bare Metal
 
diff --git a/bindings/go/dcgm/admin.go b/bindings/go/dcgm/admin.go
index 30bcacd..066f8b0 100644
--- a/bindings/go/dcgm/admin.go
+++ b/bindings/go/dcgm/admin.go
@@ -57,7 +57,7 @@ var (
 
 func initDcgm(m mode, args ...string) (err error) {
 	const (
-		dcgmLib = "libdcgm.so.1"
+		dcgmLib = "libdcgm.so"
 	)
 	lib := C.CString(dcgmLib)
 	defer freeCString(lib)
diff --git a/bindings/go/dcgm/dcgm_agent.h b/bindings/go/dcgm/dcgm_agent.h
index e06dc58..3ade17e 100644
--- a/bindings/go/dcgm/dcgm_agent.h
+++ b/bindings/go/dcgm/dcgm_agent.h
@@ -10,22 +10,22 @@
  */
 
 #ifndef DCGM_AGENT_H
-#define	DCGM_AGENT_H
+#define DCGM_AGENT_H
 
-#ifdef	__cplusplus
+#include "dcgm_structs.h"
+
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "dcgm_structs.h"
-
 #define DECLDIR
 
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_Admin Administrative
- *   
+ *
  *  This chapter describes the administration interfaces for DCGM.
- *  It is the user's responsibility to call \ref dcgmInit() before calling any other methods, 
- *  and \ref dcgmShutdown() once DCGM is no longer being used. The APIs in Administrative module 
+ *  It is the user's responsibility to call \ref dcgmInit() before calling any other methods,
+ *  and \ref dcgmShutdown() once DCGM is no longer being used. The APIs in Administrative module
  *  can be broken down into following categories:
  *  @{
  */
@@ -33,17 +33,17 @@ extern "C" {
 
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_Admin_InitShut Init and Shutdown
- *   
+ *
  *  Describes APIs to Initialize and Shutdown the DCGM Engine.
  *  @{
  */
 /***************************************************************************************************/
-    
+
 /**
  * This method is used to initialize DCGM within this process. This must be called before
  * dcgmStartEmbedded() or dcgmConnect()
- * 
- *  * @return 
+ *
+ *  * @return
  *        - \ref DCGM_ST_OK                   if DCGM has been properly initialized
  *        - \ref DCGM_ST_INIT_ERROR           if there was an error initializing the library
  */
@@ -52,8 +52,8 @@ dcgmReturn_t DECLDIR dcgmInit(void);
 /**
  * This method is used to shut down DCGM. Any embedded host engines or remote connections will automatically
  * be shut down as well.
- *  
- * @return 
+ *
+ * @return
  *        - \ref DCGM_ST_OK                   if DCGM has been properly shut down
  *        - \ref DCGM_ST_UNINITIALIZED        if the library was not shut down properly
  */
@@ -68,8 +68,8 @@ dcgmReturn_t DECLDIR dcgmShutdown(void);
  * \ref dcgmUpdateAllFields which tells DCGM to wake up and perform data collection and
  * operations needed for policy management.
  *
- * @param opMode      IN  : Collect data automatically or manually when asked by the user.
- * @param pDcgmHandle OUT : DCGM Handle to use for API calls
+ * @param opMode       IN: Collect data automatically or manually when asked by the user.
+ * @param pDcgmHandle OUT: DCGM Handle to use for API calls
  *
  * @return
  *         - \ref DCGM_ST_OK                if DCGM was started successfully within our process
@@ -78,6 +78,24 @@ dcgmReturn_t DECLDIR dcgmShutdown(void);
  */
 dcgmReturn_t DECLDIR dcgmStartEmbedded(dcgmOperationMode_t opMode, dcgmHandle_t *pDcgmHandle);
 
+/**
+ * Start an embedded host engine agent within this process.
+ *
+ * The agent is loaded as a shared library. This mode is provided to avoid any
+ * extra jitter associated with an additional autonomous agent needs to be managed. In
+ * this mode, the user has to periodically call APIs such as \ref dcgmPolicyTrigger and
+ * \ref dcgmUpdateAllFields which tells DCGM to wake up and perform data collection and
+ * operations needed for policy management.
+ *
+ * @param params IN/OUT: See \ref dcgmStartEmbeddedV2Params_v1 for details.
+ *
+ * @return
+ *         - \ref DCGM_ST_OK                if DCGM was started successfully within our process
+ *         - \ref DCGM_ST_UNINITIALIZED     if DCGM has not been initialized with \ref dcgmInit yet
+ *
+ */
+dcgmReturn_t DECLDIR dcgmStartEmbedded_v2(dcgmStartEmbeddedV2Params_v1 *params);
+
 /**
  * Stop the embedded host engine within this process that was started with dcgmStartEmbedded
  *
@@ -98,20 +116,19 @@ dcgmReturn_t DECLDIR dcgmStopEmbedded(dcgmHandle_t pDcgmHandle);
  *
  * NOTE: dcgmConnect_v2 provides additional connection options.
  *
- * @param ipAddress   IN  : Valid IP address for the remote host engine to connect to.
- *                          If ipAddress is specified as x.x.x.x it will attempt to connect to the default
- *                          port specified by DCGM_HE_PORT_NUMBER
- *                          If ipAddress is specified as x.x.x.x:yyyy it will attempt to connect to the
- *                          port specified by yyyy
- * @param pDcgmHandle OUT : DCGM Handle of the remote host engine
+ * @param ipAddress    IN: Valid IP address for the remote host engine to connect to.
+ *                         If ipAddress is specified as x.x.x.x it will attempt to connect to the default
+ *                         port specified by DCGM_HE_PORT_NUMBER
+ *                         If ipAddress is specified as x.x.x.x:yyyy it will attempt to connect to the
+ *                         port specified by yyyy
+ * @param pDcgmHandle OUT: DCGM Handle of the remote host engine
  *
  * @return
  *         - \ref DCGM_ST_OK                   if we successfully connected to the remote host engine
  *         - \ref DCGM_ST_CONNECTION_NOT_VALID if the remote host engine could not be reached
  *         - \ref DCGM_ST_UNINITIALIZED        if DCGM has not been initialized with \ref dcgmInit.
  *         - \ref DCGM_ST_BADPARAM             if pDcgmHandle is NULL or ipAddress is invalid
- *         - \ref DCGM_ST_INIT_ERROR           if DCGM encountered an error while initializing the remote
- *                                             client library
+ *         - \ref DCGM_ST_INIT_ERROR           if DCGM encountered an error while initializing the remote client library
  *         - \ref DCGM_ST_UNINITIALIZED        if DCGM has not been initialized with \ref dcgmInit
  */
 dcgmReturn_t DECLDIR dcgmConnect(char *ipAddress, dcgmHandle_t *pDcgmHandle);
@@ -120,24 +137,23 @@ dcgmReturn_t DECLDIR dcgmConnect(char *ipAddress, dcgmHandle_t *pDcgmHandle);
  * This method is used to connect to a stand-alone host engine process. Remote host engines are started
  * by running the nv-hostengine command.
  *
- * @param ipAddress     IN : Valid IP address for the remote host engine to connect to.
- *                           If ipAddress is specified as x.x.x.x it will attempt to connect to the default
- *                           port specified by DCGM_HE_PORT_NUMBER
- *                           If ipAddress is specified as x.x.x.x:yyyy it will attempt to connect to the
- *                           port specified by yyyy
- * @param connectParams IN : Additional connection parameters. See \ref dcgmConnectV2Params_t for details.
- * @param pDcgmHandle  OUT : DCGM Handle of the remote host engine
+ * @param ipAddress     IN: Valid IP address for the remote host engine to connect to.
+ *                          If ipAddress is specified as x.x.x.x it will attempt to connect to the default port
+ *                          specified by DCGM_HE_PORT_NUMBER.
+ *                          If ipAddress is specified as x.x.x.x:yyyy it will attempt to connect to the port
+ *                          specified by yyyy
+ * @param connectParams IN: Additional connection parameters. See \ref dcgmConnectV2Params_t for details.
+ * @param pDcgmHandle  OUT: DCGM Handle of the remote host engine
  *
  * @return
  *         - \ref DCGM_ST_OK                   if we successfully connected to the remote host engine
  *         - \ref DCGM_ST_CONNECTION_NOT_VALID if the remote host engine could not be reached
  *         - \ref DCGM_ST_UNINITIALIZED        if DCGM has not been initialized with \ref dcgmInit.
  *         - \ref DCGM_ST_BADPARAM             if pDcgmHandle is NULL or ipAddress is invalid
- *         - \ref DCGM_ST_INIT_ERROR           if DCGM encountered an error while initializing the remote
- *                                             client library
+ *         - \ref DCGM_ST_INIT_ERROR           if DCGM encountered an error while initializing the remote client library
  *         - \ref DCGM_ST_UNINITIALIZED        if DCGM has not been initialized with \ref dcgmInit
  */
- dcgmReturn_t DECLDIR dcgmConnect_v2(char *ipAddress, dcgmConnectV2Params_t *connectParams, dcgmHandle_t *pDcgmHandle);
+dcgmReturn_t DECLDIR dcgmConnect_v2(char *ipAddress, dcgmConnectV2Params_t *connectParams, dcgmHandle_t *pDcgmHandle);
 
 /**
  * This method is used to disconnect from a stand-alone host engine process.
@@ -166,14 +182,44 @@ dcgmReturn_t DECLDIR dcgmDisconnect(dcgmHandle_t pDcgmHandle);
 /**
  * This method is used to return information about the build environment where DCGM was built.
  *
- * @param pVersionInfo OUT : Build environment information
+ * @param pVersionInfo OUT: Build environment information
  *
  * @return
  *          - \ref DCGM_ST_OK           if build information is sucessfully obtained
  *          - \ref DCGM_ST_BADPARAM     if pVersionInfo is null
  *          - \ref DCGM_ST_VER_MISMATCH if the expected and provided versions of dcgmVersionInfo_t do not match
  */
-dcgmReturn_t DECLDIR dcgmVersionInfo(dcgmVersionInfo_t* pVersionInfo);
+dcgmReturn_t DECLDIR dcgmVersionInfo(dcgmVersionInfo_t *pVersionInfo);
+
+
+/**
+ * This method is used to set the logging severity on HostEngine for the specified logger
+ *
+ * @param pDcgmHandle  IN: DCGM Handle
+ * @param logging      IN: dcgmSettingsSetLoggingSeverity_t struct containing the target logger and severity
+ *
+ * @return
+ *          - \ref DCGM_ST_OK           Severity successfuly set
+ *          - \ref DCGM_ST_BADPARAM     Bad logger/severity string
+ *          - \ref DCGM_ST_VER_MISMATCH if the expected and provided versions of dcgmSettingsSetLoggingSeverity_t
+ *                                      do not match
+ */
+dcgmReturn_t DECLDIR dcgmHostengineSetLoggingSeverity(dcgmHandle_t pDcgmHandle,
+                                                      dcgmSettingsSetLoggingSeverity_t *logging);
+
+/**
+ * This function is used to return whether or not the host engine considers itself healthy
+ *
+ * @param[in]  pDcgmHandle - the handle to DCGM
+ * @param[out] heHealth - struct describing the health of the hostengine. if heHealth.hostengineHealth is 0,
+ *                        then the hostengine is healthy. Non-zero indicates not healthy with error codes
+ *                        determining the cause.
+ *
+ * @return
+ *          - \ref DCGM_ST_OK         Able to gauge health
+ *          - \ref DCGM_ST_BADPARAM   isHealthy is not a valid pointer
+ */
+dcgmReturn_t DECLDIR dcgmHostengineIsHealthy(dcgmHandle_t pDcgmHandle, dcgmHostengineHealth_t *heHealth);
 
 /** @} */ // Closing DCGMAPI_Admin_Info
 
@@ -185,7 +231,7 @@ dcgmReturn_t DECLDIR dcgmVersionInfo(dcgmVersionInfo_t* pVersionInfo);
  *  @{
  *  This chapter describes the APIs used to identify set of GPUs on the node, grouping functions to
  *  provide mechanism to operate on a group of GPUs, and status management APIs in
- *  order to get individual statuses for each operation. The APIs in System module can be 
+ *  order to get individual statuses for each operation. The APIs in System module can be
  *  broken down into following categories:
  */
 /***************************************************************************************************/
@@ -198,22 +244,24 @@ dcgmReturn_t DECLDIR dcgmVersionInfo(dcgmVersionInfo_t* pVersionInfo);
 /***************************************************************************************************/
 
 /**
- * This method is used to get identifiers corresponding to all the devices on the system. The 
- * identifier represents DCGM GPU Id corresponding to each GPU on the system and is immutable during 
+ * This method is used to get identifiers corresponding to all the devices on the system. The
+ * identifier represents DCGM GPU Id corresponding to each GPU on the system and is immutable during
  * the lifespan of the engine. The list should be queried again if the engine is restarted.
- * 
+ *
  * The GPUs returned from this function include gpuIds of GPUs that are not supported by DCGM.
  * To only get gpuIds of GPUs that are supported by DCGM, use dcgmGetAllSupportedDevices().
  *
- * @param pDcgmHandle                   IN  : DCGM Handle
- * @param gpuIdList                     OUT : Array reference to fill GPU Ids present on the system.
- * @param count                         OUT : Number of GPUs returned in \a gpuIdList.
+ * @param pDcgmHandle                    IN: DCGM Handle
+ * @param gpuIdList                     OUT: Array reference to fill GPU Ids present on the system.
+ * @param count                         OUT: Number of GPUs returned in \a gpuIdList.
  *
- * @return 
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful.
  *        - \ref DCGM_ST_BADPARAM             if \a gpuIdList or \a count were not valid.
  */
-dcgmReturn_t DECLDIR dcgmGetAllDevices(dcgmHandle_t pDcgmHandle, unsigned int gpuIdList[DCGM_MAX_NUM_DEVICES], int *count);
+dcgmReturn_t DECLDIR dcgmGetAllDevices(dcgmHandle_t pDcgmHandle,
+                                       unsigned int gpuIdList[DCGM_MAX_NUM_DEVICES],
+                                       int *count);
 
 /**
  * This method is used to get identifiers corresponding to all the DCGM-supported devices on the system. The
@@ -224,169 +272,191 @@ dcgmReturn_t DECLDIR dcgmGetAllDevices(dcgmHandle_t pDcgmHandle, unsigned int gp
  * To get gpuIds of all GPUs in the system, use dcgmGetAllDevices().
  *
  *
- * @param pDcgmHandle                   IN  : DCGM Handle
- * @param gpuIdList                     OUT : Array reference to fill GPU Ids present on the system.
- * @param count                         OUT : Number of GPUs returned in \a gpuIdList.
+ * @param pDcgmHandle                    IN: DCGM Handle
+ * @param gpuIdList                     OUT: Array reference to fill GPU Ids present on the system.
+ * @param count                         OUT: Number of GPUs returned in \a gpuIdList.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful.
  *        - \ref DCGM_ST_BADPARAM             if \a gpuIdList or \a count were not valid.
  */
-dcgmReturn_t DECLDIR dcgmGetAllSupportedDevices(dcgmHandle_t pDcgmHandle, unsigned int gpuIdList[DCGM_MAX_NUM_DEVICES], int *count);
+dcgmReturn_t DECLDIR dcgmGetAllSupportedDevices(dcgmHandle_t pDcgmHandle,
+                                                unsigned int gpuIdList[DCGM_MAX_NUM_DEVICES],
+                                                int *count);
 
 /**
- * Gets device attributes corresponding to the \a gpuId. If operation is not successful for any of 
- * the requested fields then the field is populated with one of DCGM_BLANK_VALUES defined in 
+ * Gets device attributes corresponding to the \a gpuId. If operation is not successful for any of
+ * the requested fields then the field is populated with one of DCGM_BLANK_VALUES defined in
  * dcgm_structs.h.
- * 
- * @param pDcgmHandle   IN  : DCGM Handle
- * @param gpuId         IN  : GPU Id corresponding to which the attributes
- *                            should be fetched
- * @param pDcgmAttr  IN/OUT : Device attributes corresponding to \a gpuId.<br>
- *                            pDcgmAttr->version should be set to
- *                            \ref dcgmDeviceAttributes_version before this
- *                            call.
+ *
+ * @param pDcgmHandle    IN: DCGM Handle
+ * @param gpuId          IN: GPU Id corresponding to which the attributes should be fetched
+ * @param pDcgmAttr  IN/OUT: Device attributes corresponding to \a gpuId.<br> pDcgmAttr->version should be set to
+ *                           \ref dcgmDeviceAttributes_version before this call.
  *
  * @return
  *        - \ref DCGM_ST_OK            if the call was successful.
  *        - \ref DCGM_ST_VER_MISMATCH  if pDcgmAttr->version is not set or is invalid.
  */
-dcgmReturn_t DECLDIR dcgmGetDeviceAttributes(dcgmHandle_t pDcgmHandle, unsigned int gpuId, dcgmDeviceAttributes_t *pDcgmAttr);
+dcgmReturn_t DECLDIR dcgmGetDeviceAttributes(dcgmHandle_t pDcgmHandle,
+                                             unsigned int gpuId,
+                                             dcgmDeviceAttributes_t *pDcgmAttr);
 
 /**
  * Gets the list of entities that exist for a given entity group. This API can be used in place of
- * \ref dcgmGetAllDevices. 
- * 
+ * \ref dcgmGetAllDevices.
+ *
  * @param dcgmHandle      IN: DCGM Handle
  * @param entityGroup     IN: Entity group to list entities of
  * @param entities       OUT: Array of entities for entityGroup
- * @param numEntities IN/OUT: Upon calling, this should be the number of entities that entityList[]
- *                            can hold. Upon return, this will contain the number of entities actually
- *                            saved to entityList. 
- * @param flags           IN: Flags to modify the behavior of this request. 
+ * @param numEntities IN/OUT: Upon calling, this should be the number of entities that entityList[] can hold. Upon
+ *                            return, this will contain the number of entities actually saved to entityList.
+ * @param flags           IN: Flags to modify the behavior of this request.
  *                            See DCGM_GEGE_FLAG_* #defines in dcgm_structs.h
- * 
+ *
  * @return
  *        - \ref DCGM_ST_OK                if the call was successful.
- *        - \ref DCGM_ST_INSUFFICIENT_SIZE if numEntities was not large enough to hold the number of
- *                                            entities in the entityGroup. numEntities will contain
- *                                            the capacity needed to complete this request successfully.
+ *        - \ref DCGM_ST_INSUFFICIENT_SIZE if numEntities was not large enough to hold the number of entities in the
+ *                                         entityGroup. numEntities will contain the capacity needed to complete this
+ *                                         request successfully.
  *        - \ref DCGM_ST_NOT_SUPPORTED     if the given entityGroup does not support enumeration.
  *        - \ref DCGM_ST_BADPARAM          if any parameter is invalid
  */
-dcgmReturn_t DECLDIR dcgmGetEntityGroupEntities(dcgmHandle_t dcgmHandle, dcgm_field_entity_group_t entityGroup,
-                                                dcgm_field_eid_t *entities, int *numEntities, unsigned int flags);
+dcgmReturn_t DECLDIR dcgmGetEntityGroupEntities(dcgmHandle_t dcgmHandle,
+                                                dcgm_field_entity_group_t entityGroup,
+                                                dcgm_field_eid_t *entities,
+                                                int *numEntities,
+                                                unsigned int flags);
+
+/**
+ * Gets the hierarchy of GPUs, GPU Instances, and Compute Instances by populating a list of each entity with
+ * a reference to their parent
+ *
+ * @param dcgmHandle       IN: DCGM Handle
+ * @param entities        OUT: array of entities in the hierarchy
+ * @param numEntities  IN/OUT: Upon calling, this should be the capacity of entities.
+ *                             Upon return, this will contain the number of entities actually saved to entities.
+ *
+ * @return
+ *        - \ref DCGM_ST_OK                if the call was successful.
+ *        - \ref DCGM_ST_VER_MISMATCH      if the struct version is incorrect
+ *        - \ref DCGM_ST_BADPARAM          if any parameter is invalid
+ */
+dcgmReturn_t DECLDIR dcgmGetGpuInstanceHierarchy(dcgmHandle_t dcgmHandle, dcgmMigHierarchy_v1 *hierarchy);
 
 /**
  * Get the NvLink link status for every NvLink in this system. This includes the NvLinks of both GPUs and
  * NvSwitches. Note that only NvSwitches and GPUs that are visible to the current environment will be
  * returned in this structure.
- * 
+ *
  * @param dcgmHandle  IN: DCGM Handle
  * @param linkStatus OUT: Structure in which to store NvLink link statuses. .version should be set to
- *                        dcgmNvLinkStatus_version1 before calling this. 
- * 
+ *                        dcgmNvLinkStatus_version1 before calling this.
+ *
  * @return
  *        - \ref DCGM_ST_OK                if the call was successful.
  *        - \ref DCGM_ST_NOT_SUPPORTED     if the given entityGroup does not support enumeration.
  *        - \ref DCGM_ST_BADPARAM          if any parameter is invalid
  */
-dcgmReturn_t DECLDIR dcgmGetNvLinkLinkStatus(dcgmHandle_t dcgmHandle, dcgmNvLinkStatus_v1 *linkStatus);
+dcgmReturn_t DECLDIR dcgmGetNvLinkLinkStatus(dcgmHandle_t dcgmHandle, dcgmNvLinkStatus_v2 *linkStatus);
 
 /** @} */
 
 /***************************************************************************************************/
 /** @defgroup DCGM_GROUPING Grouping
- *  The following APIs are used for group management. The user can create a group of entities and 
+ *  The following APIs are used for group management. The user can create a group of entities and
  *  perform an operation on a group of entities. If grouping is not needed and the user wishes
- *  to run commands on all GPUs seen by DCGM then the user can use DCGM_GROUP_ALL_GPUS or 
+ *  to run commands on all GPUs seen by DCGM then the user can use DCGM_GROUP_ALL_GPUS or
  *  DCGM_GROUP_ALL_NVSWITCHES in place of group IDs when needed.
  *  @{
  */
 /***************************************************************************************************/
 
 /**
- * Used to create a entity group handle which can store one or more entity Ids as an opaque handle 
+ * Used to create a entity group handle which can store one or more entity Ids as an opaque handle
  * returned in \a pDcgmGrpId. Instead of executing an operation separately for each entity, the
- * DCGM group enables the user to execute same operation on all the entities present in the group as a 
+ * DCGM group enables the user to execute same operation on all the entities present in the group as a
  * single API call.
- * 
- * To create the group with all the entities present on the system, the \a type field should be 
- * specified as \a DCGM_GROUP_DEFAULT or \a DCGM_GROUP_ALL_NVSWITCHES. To create an empty group, 
- * the \a type field should be specified as \a DCGM_GROUP_EMPTY. The empty group can be updated 
- * with the desired set of entities using the APIs \ref dcgmGroupAddDevice, \ref dcgmGroupAddEntity, 
+ *
+ * To create the group with all the entities present on the system, the \a type field should be
+ * specified as \a DCGM_GROUP_DEFAULT or \a DCGM_GROUP_ALL_NVSWITCHES. To create an empty group,
+ * the \a type field should be specified as \a DCGM_GROUP_EMPTY. The empty group can be updated
+ * with the desired set of entities using the APIs \ref dcgmGroupAddDevice, \ref dcgmGroupAddEntity,
  * \ref dcgmGroupRemoveDevice, and \ref dcgmGroupRemoveEntity.
- * 
- * @param pDcgmHandle   IN  :   DCGM Handle
- * @param type          IN  :   Type of Entity Group to be formed
- * @param groupName     IN  :   Desired name of the GPU group specified as NULL terminated C string
- * @param pDcgmGrpId    OUT :   Reference to group ID
- * @return
- *  - \ref DCGM_ST_OK                   if the group has been created
- *  - \ref DCGM_ST_BADPARAM             if any of \a type, \a groupName, \a length or \a pDcgmGrpId 
- *                                      is invalid
- *  - \ref DCGM_ST_MAX_LIMIT            if number of groups on the system has reached the max limit
- *                                      \a DCGM_MAX_NUM_GROUPS
- *  - \ref DCGM_ST_INIT_ERROR           if the library has not been successfully initialized
+ *
+ * @param pDcgmHandle    IN: DCGM Handle
+ * @param type           IN: Type of Entity Group to be formed
+ * @param groupName      IN: Desired name of the GPU group specified as NULL terminated C string
+ * @param pDcgmGrpId    OUT: Reference to group ID
+ *
+ * @return
+ *  - \ref DCGM_ST_OK                if the group has been created
+ *  - \ref DCGM_ST_BADPARAM          if any of \a type, \a groupName, \a length or \a pDcgmGrpId is invalid
+ *  - \ref DCGM_ST_MAX_LIMIT         if number of groups on the system has reached the max limit \a DCGM_MAX_NUM_GROUPS
+ *  - \ref DCGM_ST_INIT_ERROR        if the library has not been successfully initialized
  */
-dcgmReturn_t DECLDIR dcgmGroupCreate(dcgmHandle_t pDcgmHandle, dcgmGroupType_t type, char *groupName, 
-        dcgmGpuGrp_t *pDcgmGrpId);
+dcgmReturn_t DECLDIR dcgmGroupCreate(dcgmHandle_t pDcgmHandle,
+                                     dcgmGroupType_t type,
+                                     char *groupName,
+                                     dcgmGpuGrp_t *pDcgmGrpId);
 
 /**
- * Used to destroy a group represented by \a groupId. 
+ * Used to destroy a group represented by \a groupId.
  * Since DCGM group is a logical grouping of entities, the properties applied on the group stay intact
  * for the individual entities even after the group is destroyed.
  *
- * @param pDcgmHandle   IN  :   DCGM Handle
- * @param groupId       IN  :   Group ID
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param groupId       IN: Group ID
  *
  * @return
  *  - \ref DCGM_ST_OK                   if the group has been destroyed
  *  - \ref DCGM_ST_BADPARAM             if \a groupId is invalid
  *  - \ref DCGM_ST_INIT_ERROR           if the library has not been successfully initialized
- *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group does not exists 
+ *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group does not exists
  */
 dcgmReturn_t DECLDIR dcgmGroupDestroy(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId);
 
 /**
  * Used to add specified GPU Id to the group represented by \a groupId.
- * 
- * @param pDcgmHandle   IN  : DCGM Handle
- * @param groupId       IN  : Group Id to which device should be added
- * @param gpuId         IN  : DCGM GPU Id
- * @return 
- *  - \ref DCGM_ST_OK                   if the GPU Id has been successfully added 
- *                                      to the group
+ *
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param groupId       IN: Group Id to which device should be added
+ * @param gpuId         IN: DCGM GPU Id
+ *
+ * @return
+ *  - \ref DCGM_ST_OK                   if the GPU Id has been successfully added to the group
  *  - \ref DCGM_ST_INIT_ERROR           if the library has not been successfully initialized
- *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group (\a groupId) does not exists 
+ *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group (\a groupId) does not exists
  *  - \ref DCGM_ST_BADPARAM             if \a gpuId is invalid or already part of the specified group
  */
 dcgmReturn_t dcgmGroupAddDevice(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, unsigned int gpuId);
 
 /**
  * Used to add specified entity to the group represented by \a groupId.
- * 
- * @param pDcgmHandle   IN  : DCGM Handle
- * @param groupId       IN  : Group Id to which device should be added
- * @param entityGroupId IN  : Entity group that entityId belongs to
- * @param entityId      IN  : DCGM entityId
- * @return 
- *  - \ref DCGM_ST_OK                   if the entity has been successfully added 
- *                                      to the group
+ *
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param groupId       IN: Group Id to which device should be added
+ * @param entityGroupId IN: Entity group that entityId belongs to
+ * @param entityId      IN: DCGM entityId
+ *
+ * @return
+ *  - \ref DCGM_ST_OK                   if the entity has been successfully added to the group
  *  - \ref DCGM_ST_INIT_ERROR           if the library has not been successfully initialized
- *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group (\a groupId) does not exists 
+ *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group (\a groupId) does not exists
  *  - \ref DCGM_ST_BADPARAM             if \a entityId is invalid or already part of the specified group
  */
-dcgmReturn_t dcgmGroupAddEntity(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, 
-                                dcgm_field_entity_group_t entityGroupId, 
+dcgmReturn_t dcgmGroupAddEntity(dcgmHandle_t pDcgmHandle,
+                                dcgmGpuGrp_t groupId,
+                                dcgm_field_entity_group_t entityGroupId,
                                 dcgm_field_eid_t entityId);
 
 /**
  * Used to remove specified GPU Id from the group represented by \a groupId.
- * @param pDcgmHandle   IN  : DCGM Handle
- * @param groupId       IN  : Group ID from which device should be removed
- * @param gpuId         IN  : DCGM GPU Id
- * @return 
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param groupId       IN: Group ID from which device should be removed
+ * @param gpuId         IN: DCGM GPU Id
+ *
+ * @return
  *  - \ref DCGM_ST_OK                   if the GPU Id has been successfully removed from the group
  *  - \ref DCGM_ST_INIT_ERROR           if the library has not been successfully initialized
  *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group (\a groupId) does not exists
@@ -396,30 +466,31 @@ dcgmReturn_t dcgmGroupRemoveDevice(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupI
 
 /**
  * Used to remove specified entity from the group represented by \a groupId.
- * @param pDcgmHandle   IN  : DCGM Handle
- * @param groupId       IN  : Group ID from which device should be removed
- * @param entityGroupId IN  : Entity group that entityId belongs to
- * @param entityId      IN  : DCGM entityId
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param groupId       IN: Group ID from which device should be removed
+ * @param entityGroupId IN: Entity group that entityId belongs to
+ * @param entityId      IN: DCGM entityId
  *
- * @return 
+ * @return
  *  - \ref DCGM_ST_OK                   if the entity has been successfully removed from the group
  *  - \ref DCGM_ST_INIT_ERROR           if the library has not been successfully initialized
  *  - \ref DCGM_ST_NOT_CONFIGURED       if entry corresponding to the group (\a groupId) does not exists
  *  - \ref DCGM_ST_BADPARAM             if \a entityId is invalid or not part of the specified group
  */
-dcgmReturn_t dcgmGroupRemoveEntity(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, 
-                                   dcgm_field_entity_group_t entityGroupId, 
+dcgmReturn_t dcgmGroupRemoveEntity(dcgmHandle_t pDcgmHandle,
+                                   dcgmGpuGrp_t groupId,
+                                   dcgm_field_entity_group_t entityGroupId,
                                    dcgm_field_eid_t entityId);
 
-
 /**
- * Used to get information corresponding to the group represented by \a groupId. The information 
- * returned in \a pDcgmGroupInfo consists of group name, and the list of entities present in the 
+ * Used to get information corresponding to the group represented by \a groupId. The information
+ * returned in \a pDcgmGroupInfo consists of group name, and the list of entities present in the
  * group.
- * 
- * @param pDcgmHandle       IN  : DCGM Handle
- * @param groupId           IN  : Group ID for which information to be fetched
- * @param pDcgmGroupInfo    OUT : Group Information
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID for which information to be fetched
+ * @param pDcgmGroupInfo    OUT: Group Information
+ *
  * @return
  *  - \ref DCGM_ST_OK                   if the group info is successfully received.
  *  - \ref DCGM_ST_BADPARAM             if any of \a groupId or \a pDcgmGroupInfo is invalid.
@@ -431,12 +502,13 @@ dcgmReturn_t dcgmGroupGetInfo(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dc
 
 /**
  * Used to get the Ids of all groups of entities. The information returned is a list of group ids
- * in \a groupIdList as well as a count of how many ids there are in \a count. Please allocate enough 
+ * in \a groupIdList as well as a count of how many ids there are in \a count. Please allocate enough
  * memory for \a groupIdList. Memory of size MAX_NUM_GROUPS should be allocated for \a groupIdList.
  *
- * @param pDcgmHandle       IN   : DCGM Handle
- * @param groupIdList       OUT  : List of Group Ids
- * @param count             OUT  : The number of Group ids in the list
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupIdList       OUT: List of Group Ids
+ * @param count             OUT: The number of Group ids in the list
+ *
  * @return
  *  - \ref DCGM_ST_OK               if the ids of the groups were successfully retrieved
  *  - \ref DCGM_ST_BADPARAM         if either of the \a groupIdList or \a count is null
@@ -457,11 +529,11 @@ dcgmReturn_t dcgmGroupGetAllIds(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupIdLi
  * Used to create a group of fields and return the handle in dcgmFieldGroupId
  *
  * @param dcgmHandle         IN: DCGM handle
- * @param numFieldIds        IN: Number of field IDs that are being provided in fieldIds[]. Must be
- *                               between 1 and DCGM_MAX_FIELD_IDS_PER_FIELD_GROUP.
+ * @param numFieldIds        IN: Number of field IDs that are being provided in fieldIds[]. Must be between 1 and
+ *                               DCGM_MAX_FIELD_IDS_PER_FIELD_GROUP.
  * @param fieldIds           IN: Field IDs to be added to the newly-created field group
- * @param fieldGroupName     IN: Unique name for this group of fields. This must not be the same
- *                               as any existing field groups.
+ * @param fieldGroupName     IN: Unique name for this group of fields. This must not be the same as any existing field
+ *                               groups.
  * @param dcgmFieldGroupId  OUT: Handle to the newly-created field group
  *
  * @return
@@ -471,8 +543,11 @@ dcgmReturn_t dcgmGroupGetAllIds(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupIdLi
  * - \ref DCGM_ST_MAX_LIMIT            if too many field groups already exist
  *
  */
-dcgmReturn_t dcgmFieldGroupCreate(dcgmHandle_t dcgmHandle, int numFieldIds, unsigned short *fieldIds,
-                                  char *fieldGroupName, dcgmFieldGrp_t *dcgmFieldGroupId);
+dcgmReturn_t dcgmFieldGroupCreate(dcgmHandle_t dcgmHandle,
+                                  int numFieldIds,
+                                  unsigned short *fieldIds,
+                                  char *fieldGroupName,
+                                  dcgmFieldGrp_t *dcgmFieldGroupId);
 
 /**
  * Used to remove a field group that was created with \ref dcgmFieldGroupCreate
@@ -493,11 +568,9 @@ dcgmReturn_t dcgmFieldGroupDestroy(dcgmHandle_t dcgmHandle, dcgmFieldGrp_t dcgmF
  * Used to get information about a field group that was created with \ref dcgmFieldGroupCreate.
  *
  * @param dcgmHandle         IN: DCGM handle
- * @param fieldGroupInfo IN/OUT: Info about all of the field groups that
- *                               exist.<br> .version should be set to
- *                               \ref dcgmFieldGroupInfo_version before this
- *                               call<br> .fieldGroupId should contain the
- *                               fieldGroupId you are interested in querying
+ * @param fieldGroupInfo IN/OUT: Info about all of the field groups that exist.<br>
+ *                               .version should be set to \ref dcgmFieldGroupInfo_version before this call<br>
+ *                               .fieldGroupId should contain the fieldGroupId you are interested in querying
  *                               information for.
  *
  * @return
@@ -509,16 +582,12 @@ dcgmReturn_t dcgmFieldGroupDestroy(dcgmHandle_t dcgmHandle, dcgmFieldGrp_t dcgmF
  */
 dcgmReturn_t dcgmFieldGroupGetInfo(dcgmHandle_t dcgmHandle, dcgmFieldGroupInfo_t *fieldGroupInfo);
 
-
 /**
  * Used to get information about all field groups in the system.
  *
  * @param dcgmHandle         IN: DCGM handle
- * @param allGroupInfo   IN/OUT: Info about all of the field groups that
- *                               exist.<br>
- *                               .version should be set to
- *                               \ref dcgmAllFieldGroup_version before
- *                               this call.
+ * @param allGroupInfo   IN/OUT: Info about all of the field groups that exist.<br>
+ *                               .version should be set to \ref dcgmAllFieldGroup_version before this call.
  *
  * @return
  * - \ref DCGM_ST_OK                   if the field group info was successfully returned
@@ -532,7 +601,6 @@ dcgmReturn_t dcgmFieldGroupGetAll(dcgmHandle_t dcgmHandle, dcgmAllFieldGroup_t *
 /** @} */
 
 
-
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_ST Status handling
  * The following APIs are used to manage statuses for multiple operations on one or more GPUs.
@@ -541,66 +609,76 @@ dcgmReturn_t dcgmFieldGroupGetAll(dcgmHandle_t dcgmHandle, dcgmAllFieldGroup_t *
 /***************************************************************************************************/
 
 /**
- * Creates reference to DCGM status handler which can be used to get the statuses for multiple 
+ * Creates reference to DCGM status handler which can be used to get the statuses for multiple
  * operations on one or more devices.
- * 
- * The multiple statuses are useful when the operations are performed at group level. The status 
+ *
+ * The multiple statuses are useful when the operations are performed at group level. The status
  * handle provides a mechanism to access error attributes for the failed operations.
- * 
- * The number of errors stored behind the opaque handle can be accessed using the the API 
- * \ref dcgmStatusGetCount. The errors are accessed from the opaque handle \a statusHandle 
- * using the API \ref dcgmStatusPopError. The user can invoke \ref dcgmStatusPopError 
+ *
+ * The number of errors stored behind the opaque handle can be accessed using the the API
+ * \ref dcgmStatusGetCount. The errors are accessed from the opaque handle \a statusHandle
+ * using the API \ref dcgmStatusPopError. The user can invoke \ref dcgmStatusPopError
  * for the number of errors or until all the errors are fetched.
- * 
- * When the status handle is not required any further then it should be deleted using the API 
+ *
+ * When the status handle is not required any further then it should be deleted using the API
  * \ref dcgmStatusDestroy.
- * @param statusHandle   OUT :   Reference to handle for list of statuses
- * @return 
+ * @param statusHandle   OUT: Reference to handle for list of statuses
+ *
+ * @return
  *  - \ref DCGM_ST_OK                   if the status handle is successfully created
  *  - \ref DCGM_ST_BADPARAM             if \a statusHandle is invalid
+ *
  */
 dcgmReturn_t dcgmStatusCreate(dcgmStatus_t *statusHandle);
 
 /**
  * Used to destroy status handle created using \ref dcgmStatusCreate.
- * @param statusHandle   IN  : Handle to list of statuses
+ * @param statusHandle   IN: Handle to list of statuses
+ *
  * @return
  *  - \ref DCGM_ST_OK                   if the status handle is successfully created
  *  - \ref DCGM_ST_BADPARAM             if \a statusHandle is invalid
+ *
  */
 dcgmReturn_t dcgmStatusDestroy(dcgmStatus_t statusHandle);
 
 /**
  * Used to get count of error entries stored inside the opaque handle \a statusHandle.
- * @param statusHandle  IN  :   Handle to list of statuses
- * @param count         OUT :   Number of error entries present in the list of statuses
- * @return 
+ * @param statusHandle   IN: Handle to list of statuses
+ * @param count         OUT: Number of error entries present in the list of statuses
+ *
+ * @return
  *  - \ref DCGM_ST_OK                   if the error count is successfully received
  *  - \ref DCGM_ST_BADPARAM             if any of \a statusHandle or \a count is invalid
+ *
  */
 dcgmReturn_t dcgmStatusGetCount(dcgmStatus_t statusHandle, unsigned int *count);
 
 /**
- * Used to iterate through the list of errors maintained behind \a statusHandle. The method pops the 
- * first error from the list of DCGM statuses. In order to iterate through all the errors, the user 
+ * Used to iterate through the list of errors maintained behind \a statusHandle. The method pops the
+ * first error from the list of DCGM statuses. In order to iterate through all the errors, the user
  * can invoke this API for the number of errors or until all the errors are fetched.
- * @param statusHandle      IN  :   Handle to list of statuses
- * @param pDcgmErrorInfo    OUT :   First error from the list of statuses
+ * @param statusHandle       IN: Handle to list of statuses
+ * @param pDcgmErrorInfo    OUT: First error from the list of statuses
+ *
  * @return
  *  - \ref DCGM_ST_OK                   if the error entry is successfully fetched
  *  - \ref DCGM_ST_BADPARAM             if any of \a statusHandle or \a pDcgmErrorInfo is invalid
  *  - \ref DCGM_ST_NO_DATA              if the status handle list is empty
+ *
  */
 dcgmReturn_t dcgmStatusPopError(dcgmStatus_t statusHandle, dcgmErrorInfo_t *pDcgmErrorInfo);
 
 /**
  * Used to clear all the errors in the status handle created by the API
- * \ref dcgmStatusCreate. After one set of operation, the \a statusHandle 
+ * \ref dcgmStatusCreate. After one set of operation, the \a statusHandle
  * can be cleared and reused for the next set of operation.
- * @param statusHandle   IN  : Handle to list of statuses
- * @return 
+ * @param statusHandle   IN: Handle to list of statuses
+ *
+ * @return
  *  - \ref DCGM_ST_OK                   if the errors are successfully cleared
  *  - \ref DCGM_ST_BADPARAM             if \a statusHandle is invalid
+ *
  */
 dcgmReturn_t dcgmStatusClear(dcgmStatus_t statusHandle);
 
@@ -618,7 +696,6 @@ dcgmReturn_t dcgmStatusClear(dcgmStatus_t statusHandle);
  */
 /***************************************************************************************************/
 
-
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_DC_Setup Setup and management
  *  Describes APIs to Get/Set configuration on the group of GPUs.
@@ -627,100 +704,98 @@ dcgmReturn_t dcgmStatusClear(dcgmStatus_t statusHandle);
 /***************************************************************************************************/
 
 /**
- * Used to set configuration for the group of one or more GPUs identified by \a groupId.
- * 
- * The configuration settings specified in \a pDeviceConfig are applied to all the GPUs in the 
- * group. Since DCGM group is a logical grouping of GPUs, the configuration settings stays intact 
- * for the individual GPUs even after the group is destroyed.
- * 
- * If the user wishes to ignore the configuration of one or more properties in the input 
- * \a pDeviceConfig then the property should be specified as one of \a DCGM_INT32_BLANK, 
- * \a DCGM_INT64_BLANK, \a DCGM_FP64_BLANK or \a DCGM_STR_BLANK based on the data type of the
- * property to be ignored.
- * 
- * If any of the properties fail to be configured for any of the GPUs in the group then the API 
- * returns an error. The status handle \a statusHandle should be further evaluated to access error 
- * attributes for the failed operations. Please refer to status management APIs at \ref DCGMAPI_ST 
- * to access the error attributes.
- *
- * To find out valid supported clock values that can be passed to dcgmConfigSet, look at the device
- * attributes of a GPU in the group using the API dcgmGetDeviceAttributes.
-
- * @param pDcgmHandle           IN  : DCGM Handle
- * 
- * @param groupId               IN  :  Group ID representing collection of one or more GPUs. Look 
- *                                     at \ref dcgmGroupCreate for details on creating the 
- *                                     group.
- * @param pDeviceConfig         IN  :  Pointer to memory to hold desired configuration to be 
- *                                     applied for all the GPU in the group represented by 
- *                                     \a groupId. The caller must populate the version field of
- *                                     \a pDeviceConfig.
- * @param statusHandle       IN/OUT :  Resulting error status for multiple operations. Pass it as 
- *                                     NULL if the detailed error information is not needed. 
- *                                     Look at \ref dcgmStatusCreate for details on creating 
- *                                     status handle.
-
- * @return 
- *        - \ref DCGM_ST_OK                   if the configuration has been successfully set.
- *        - \ref DCGM_ST_BADPARAM             if any of \a groupId or \a pDeviceConfig is invalid.
- *        - \ref DCGM_ST_VER_MISMATCH         if \a pDeviceConfig has the incorrect version.
- *        - \ref DCGM_ST_GENERIC_ERROR        if an unknown error has occurred.
- */
-dcgmReturn_t DECLDIR dcgmConfigSet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmConfig_t *pDeviceConfig,
-                                            dcgmStatus_t statusHandle);
-
-/**
- * Used to get configuration for all the GPUs present in the group. 
- * 
- * This API can get the most recent target or desired configuration set by \ref dcgmConfigSet. 
- * Set type as \a DCGM_CONFIG_TARGET_STATE to get target configuration. The target configuration 
- * properties are maintained by DCGM and are automatically enforced after a GPU reset or 
- * reinitialization is completed.
- * 
- * The method can also be used to get the actual configuration state for the GPUs in the group. 
- * Set type as \a DCGM_CONFIG_CURRENT_STATE to get the actually configuration state. Ideally, the 
- * actual configuration state will be exact same as the target configuration state.
- * 
- * If any of the property in the target configuration is unknown then the property value in the 
- * output is populated as  one of DCGM_INT32_BLANK, DCGM_INT64_BLANK, DCGM_FP64_BLANK or 
- * DCGM_STR_BLANK based on the data type of the property.
- * 
- * If any of the property in the current configuration state is not supported then the property 
- * value in the output is populated as one of DCGM_INT32_NOT_SUPPORTED, DCGM_INT64_NOT_SUPPORTED, 
- * DCGM_FP64_NOT_SUPPORTED or DCGM_STR_NOT_SUPPORTED based on the data type of the property.
- * 
- * If any of the properties can't be fetched for any of the GPUs in the group then the API returns 
- * an error. The status handle \a statusHandle should be further evaluated to access error 
- * attributes for the failed operations. Please refer to status management APIs at \ref DCGMAPI_ST 
- * to access the error attributes.
- * 
- * @param pDcgmHandle           IN  :  DCGM Handle
- * @param groupId               IN  :  Group ID representing collection of one or more GPUs. Look 
- *                                     at \ref dcgmGroupCreate for details on creating the 
- *                                     group.
- * @param type                  IN  :  Type of configuration values to be fetched.
- * @param count                 IN  :  The number of entries that \a deviceConfigList array can 
- *                                     store.
- * @param deviceConfigList      OUT :  Pointer to memory to hold requested configuration 
- *                                     corresponding to all the GPUs in the group (\a groupId). The 
- *                                     size of the memory must be greater than or equal to hold 
- *                                     output information for the number of GPUs present in the 
- *                                     group (\a groupId).
- * @param statusHandle       IN/OUT :  Resulting error status for multiple operations. Pass it as 
- *                                     NULL if the detailed error information is not needed. 
- *                                     Look at \ref dcgmStatusCreate for details on creating 
- *                                     status handle.
-
- * @return
- *        - \ref DCGM_ST_OK                   if the configuration has been successfully fetched.
- *        - \ref DCGM_ST_BADPARAM             if any of \a groupId, \a type, \a count, 
- *                                            or \a deviceConfigList is invalid.
- *        - \ref DCGM_ST_NOT_CONFIGURED       if the target configuration is not already set.
- *        - \ref DCGM_ST_VER_MISMATCH         if \a deviceConfigList has the incorrect version.
- *        - \ref DCGM_ST_GENERIC_ERROR        if an unknown error has occurred.
- */
-dcgmReturn_t DECLDIR dcgmConfigGet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmConfigType_t type, int count, 
-            dcgmConfig_t deviceConfigList[], dcgmStatus_t statusHandle);
+* Used to set configuration for the group of one or more GPUs identified by \a groupId.
+*
+* The configuration settings specified in \a pDeviceConfig are applied to all the GPUs in the
+* group. Since DCGM group is a logical grouping of GPUs, the configuration settings stays intact
+* for the individual GPUs even after the group is destroyed.
+*
+* If the user wishes to ignore the configuration of one or more properties in the input
+* \a pDeviceConfig then the property should be specified as one of \a DCGM_INT32_BLANK,
+* \a DCGM_INT64_BLANK, \a DCGM_FP64_BLANK or \a DCGM_STR_BLANK based on the data type of the
+* property to be ignored.
+*
+* If any of the properties fail to be configured for any of the GPUs in the group then the API
+* returns an error. The status handle \a statusHandle should be further evaluated to access error
+* attributes for the failed operations. Please refer to status management APIs at \ref DCGMAPI_ST
+* to access the error attributes.
+*
+* To find out valid supported clock values that can be passed to dcgmConfigSet, look at the device
+* attributes of a GPU in the group using the API dcgmGetDeviceAttributes.
+
+* @param pDcgmHandle            IN: DCGM Handle
+* @param groupId                IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
+*                                   for details on creating the group.
+* @param pDeviceConfig          IN: Pointer to memory to hold desired configuration to be applied for all the GPU in the
+*                                   group represented by \a groupId.
+*                                   The caller must populate the version field of \a pDeviceConfig.
+* @param statusHandle       IN/OUT: Resulting error status for multiple operations. Pass it as NULL if the detailed
+*                                   error information is not needed.
+*                                   Look at \ref dcgmStatusCreate for details on creating status handle.
+
+* @return
+*        - \ref DCGM_ST_OK                   if the configuration has been successfully set.
+*        - \ref DCGM_ST_BADPARAM             if any of \a groupId or \a pDeviceConfig is invalid.
+*        - \ref DCGM_ST_VER_MISMATCH         if \a pDeviceConfig has the incorrect version.
+*        - \ref DCGM_ST_GENERIC_ERROR        if an unknown error has occurred.
+*
+*/
+dcgmReturn_t DECLDIR dcgmConfigSet(dcgmHandle_t pDcgmHandle,
+                                   dcgmGpuGrp_t groupId,
+                                   dcgmConfig_t *pDeviceConfig,
+                                   dcgmStatus_t statusHandle);
+
+/**
+* Used to get configuration for all the GPUs present in the group.
+*
+* This API can get the most recent target or desired configuration set by \ref dcgmConfigSet.
+* Set type as \a DCGM_CONFIG_TARGET_STATE to get target configuration. The target configuration
+* properties are maintained by DCGM and are automatically enforced after a GPU reset or
+* reinitialization is completed.
+*
+* The method can also be used to get the actual configuration state for the GPUs in the group.
+* Set type as \a DCGM_CONFIG_CURRENT_STATE to get the actually configuration state. Ideally, the
+* actual configuration state will be exact same as the target configuration state.
+*
+* If any of the property in the target configuration is unknown then the property value in the
+* output is populated as  one of DCGM_INT32_BLANK, DCGM_INT64_BLANK, DCGM_FP64_BLANK or
+* DCGM_STR_BLANK based on the data type of the property.
+*
+* If any of the property in the current configuration state is not supported then the property
+* value in the output is populated as one of DCGM_INT32_NOT_SUPPORTED, DCGM_INT64_NOT_SUPPORTED,
+* DCGM_FP64_NOT_SUPPORTED or DCGM_STR_NOT_SUPPORTED based on the data type of the property.
+*
+* If any of the properties can't be fetched for any of the GPUs in the group then the API returns
+* an error. The status handle \a statusHandle should be further evaluated to access error
+* attributes for the failed operations. Please refer to status management APIs at \ref DCGMAPI_ST
+* to access the error attributes.
+*
+* @param pDcgmHandle            IN: DCGM Handle
+* @param groupId                IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
+*                                   for details on creating the group.
+* @param type                   IN: Type of configuration values to be fetched.
+* @param count                  IN: The number of entries that \a deviceConfigList array can store.
+* @param deviceConfigList      OUT: Pointer to memory to hold requested configuration corresponding to all the GPUs in
+*                                   the group (\a groupId). The size of the memory must be greater than or equal to hold
+*                                   output information for the number of GPUs present in the group (\a groupId).
+* @param statusHandle       IN/OUT: Resulting error status for multiple operations. Pass it as NULL if the detailed
+*                                   error information is not needed.
+*                                   Look at \ref dcgmStatusCreate for details on creating status handle.
+
+* @return
+*        - \ref DCGM_ST_OK                   if the configuration has been successfully fetched.
+*        - \ref DCGM_ST_BADPARAM             if any of \a groupId, \a type, \a count, or \a deviceConfigList is invalid.
+*        - \ref DCGM_ST_NOT_CONFIGURED       if the target configuration is not already set.
+*        - \ref DCGM_ST_VER_MISMATCH         if \a deviceConfigList has the incorrect version.
+*        - \ref DCGM_ST_GENERIC_ERROR        if an unknown error has occurred.
+*
+*/
+dcgmReturn_t DECLDIR dcgmConfigGet(dcgmHandle_t pDcgmHandle,
+                                   dcgmGpuGrp_t groupId,
+                                   dcgmConfigType_t type,
+                                   int count,
+                                   dcgmConfig_t deviceConfigList[],
+                                   dcgmStatus_t statusHandle);
 
 /** @} */ // Closing for DCGMAPI_DC_Setup
 
@@ -732,47 +807,45 @@ dcgmReturn_t DECLDIR dcgmConfigGet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupI
  */
 /***************************************************************************************************/
 
-
 /**
  * Used to enforce previously set configuration for all the GPUs present in the group.
- * 
+ *
  * This API provides a mechanism to the users to manually enforce the configuration at any point of
- * time. The configuration can only be enforced if it's already configured using the API \ref 
+ * time. The configuration can only be enforced if it's already configured using the API \ref
  * dcgmConfigSet.
- * 
- * If any of the properties can't be enforced for any of the GPUs in the group then the API returns 
- * an error. The status handle \a statusHandle should be further evaluated to access error 
- * attributes for the failed operations. Please refer to status management APIs at \ref DCGMAPI_ST 
+ *
+ * If any of the properties can't be enforced for any of the GPUs in the group then the API returns
+ * an error. The status handle \a statusHandle should be further evaluated to access error
+ * attributes for the failed operations. Please refer to status management APIs at \ref DCGMAPI_ST
  * to access the error attributes.
- * 
- * @param pDcgmHandle           IN  : DCGM Handle
- * 
- * @param groupId               IN  :   Group ID representing collection of one or more GPUs. Look at
- *                                      \ref dcgmGroupCreate for details on creating the group.
- *                                      Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                      to perform operation on all the GPUs.
- * @param statusHandle       IN/OUT :   Resulting error status for multiple operations. Pass it as 
- *                                      NULL if the detailed error information is not needed. 
- *                                      Look at \ref dcgmStatusCreate for details on creating 
- *                                      status handle.
+ *
+ * @param pDcgmHandle            IN: DCGM Handle
+ * @param groupId                IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
+ *                                   for details on creating the group. Alternatively, pass in the group id as
+ *                                   \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param statusHandle       IN/OUT: Resulting error status for multiple operations. Pass it as NULL if the detailed
+ *                                   error information is not needed. Look at \ref dcgmStatusCreate for details on
+ *                                   creating status handle.
+ *
  * @return
  *        - \ref DCGM_ST_OK                   if the configuration has been successfully enforced.
  *        - \ref DCGM_ST_BADPARAM             if \a groupId is invalid.
  *        - \ref DCGM_ST_NOT_CONFIGURED       if the target configuration is not already set.
  *        - \ref DCGM_ST_GENERIC_ERROR        if an unknown error has occurred.
+ *
  */
 dcgmReturn_t DECLDIR dcgmConfigEnforce(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmStatus_t statusHandle);
 
 /** @} */ // Closing for DCGMAPI_DC_MI
 
 /** @} */ // Closing for DCGMAPI_DC
- 
+
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_FI Field APIs
- * 
+ *
  *   These APIs are responsible for watching, unwatching, and updating specific fields as defined
  *   by DCGM_FI_*
- *   
+ *
  *  @{
  */
 /***************************************************************************************************/
@@ -784,11 +857,10 @@ dcgmReturn_t DECLDIR dcgmConfigEnforce(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t gr
  * To force a field update cycle, call dcgmUpdateAllFields(1).
  *
  * @param pDcgmHandle         IN: DCGM Handle
- * @param groupId             IN: Group ID representing collection of one or more entities. Look at
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES to
- *                                to perform the operation on all NvSwitches.
+ * @param groupId             IN: Group ID representing collection of one or more entities. Look at \ref dcgmGroupCreate
+ *                                for details on creating the group. Alternatively, pass in the group id as
+ *                                \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs or
+ *                                \a DCGM_GROUP_ALL_NVSWITCHES to to perform the operation on all NvSwitches.
  * @param fieldGroupId        IN: Fields to watch.
  * @param updateFreq          IN: How often to update this field in usec
  * @param maxKeepAge          IN: How long to keep data for this field in seconds
@@ -797,183 +869,223 @@ dcgmReturn_t DECLDIR dcgmConfigEnforce(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t gr
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
 
-dcgmReturn_t dcgmWatchFields(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId,
-                             long long updateFreq, double maxKeepAge, int maxKeepSamples);
+dcgmReturn_t dcgmWatchFields(dcgmHandle_t pDcgmHandle,
+                             dcgmGpuGrp_t groupId,
+                             dcgmFieldGrp_t fieldGroupId,
+                             long long updateFreq,
+                             double maxKeepAge,
+                             int maxKeepSamples);
 
 /**
  * Request that DCGM stop recording updates for a given field collection.
  *
  * @param pDcgmHandle         IN: DCGM Handle
- * @param groupId             IN: Group ID representing collection of one or more entities. Look at
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES to
- *                                to perform the operation on all NvSwitches.
+ * @param groupId             IN: Group ID representing collection of one or more entities. Look at \ref dcgmGroupCreate
+ *                                for details on creating the group. Alternatively, pass in the group id as
+ *                                \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs or
+ *                                \a DCGM_GROUP_ALL_NVSWITCHES to to perform the operation on all NvSwitches.
  * @param fieldGroupId        IN: Fields to unwatch.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
- dcgmReturn_t dcgmUnwatchFields(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId);
+dcgmReturn_t dcgmUnwatchFields(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId);
 
 /**
  * Request updates for all field values that have updated since a given timestamp
- * 
- * This version only works with GPU entities. Use \ref dcgmGetValuesSince_v2 for entity groups 
+ *
+ * This version only works with GPU entities. Use \ref dcgmGetValuesSince_v2 for entity groups
  * containing NvSwitches.
  *
  * @param pDcgmHandle         IN: DCGM Handle
- * @param groupId             IN: Group ID representing collection of one or more GPUs. Look at
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs.
+ * @param groupId             IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                                details on creating the group. Alternatively, pass in the group id as
+ *                                \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
  * @param fieldGroupId        IN: Fields to return data for
- * @param sinceTimestamp      IN: Timestamp to request values since in usec since 1970. This will
- *                                be returned in nextSinceTimestamp for subsequent calls
- *                                0 = request all data
+ * @param sinceTimestamp      IN: Timestamp to request values since in usec since 1970. This will be returned in
+ *                                nextSinceTimestamp for subsequent calls 0 = request all data
  * @param nextSinceTimestamp OUT: Timestamp to use for sinceTimestamp on next call to this function
- * @param enumCB              IN: Callback to invoke for every field value update. Note that
- *                                multiple updates can be returned in each invocation
+ * @param enumCB              IN: Callback to invoke for every field value update. Note that multiple updates can be
+ *                                returned in each invocation
  * @param userData            IN: User data pointer to pass to the userData field of enumCB.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_NOT_SUPPORTED        if one of the entities was from a non-GPU type
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
-
-dcgmReturn_t dcgmGetValuesSince(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId,
-                                long long sinceTimestamp, long long *nextSinceTimestamp,
-                                dcgmFieldValueEnumeration_f enumCB, void *userData);
+dcgmReturn_t dcgmGetValuesSince(dcgmHandle_t pDcgmHandle,
+                                dcgmGpuGrp_t groupId,
+                                dcgmFieldGrp_t fieldGroupId,
+                                long long sinceTimestamp,
+                                long long *nextSinceTimestamp,
+                                dcgmFieldValueEnumeration_f enumCB,
+                                void *userData);
 
 /**
  * Request updates for all field values that have updated since a given timestamp
- * 
+ *
  * This version works with non-GPU entities like NvSwitches
  *
  * @param pDcgmHandle         IN: DCGM Handle
- * @param groupId             IN: Group ID representing collection of one or more entities. Look at
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES to
- *                                perform the operation on all NvSwitches.
+ * @param groupId             IN: Group ID representing collection of one or more entities. Look at \ref dcgmGroupCreate
+ *                                for details on creating the group. Alternatively, pass in the group id as
+ *                                \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs or
+ *                                \a DCGM_GROUP_ALL_NVSWITCHES to perform the operation on all NvSwitches.
  * @param fieldGroupId        IN: Fields to return data for
- * @param sinceTimestamp      IN: Timestamp to request values since in usec since 1970. This will
- *                                be returned in nextSinceTimestamp for subsequent calls
- *                                0 = request all data
+ * @param sinceTimestamp      IN: Timestamp to request values since in usec since 1970. This will be returned in
+ *                                nextSinceTimestamp for subsequent calls 0 = request all data
  * @param nextSinceTimestamp OUT: Timestamp to use for sinceTimestamp on next call to this function
- * @param enumCB              IN: Callback to invoke for every field value update. Note that
- *                                multiple updates can be returned in each invocation
+ * @param enumCB              IN: Callback to invoke for every field value update. Note that multiple updates can be
+ *                                returned in each invocation
  * @param userData            IN: User data pointer to pass to the userData field of enumCB.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
-
-dcgmReturn_t dcgmGetValuesSince_v2(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId,
-                                   long long sinceTimestamp, long long *nextSinceTimestamp,
-                                   dcgmFieldValueEntityEnumeration_f enumCB, void *userData);
+dcgmReturn_t dcgmGetValuesSince_v2(dcgmHandle_t pDcgmHandle,
+                                   dcgmGpuGrp_t groupId,
+                                   dcgmFieldGrp_t fieldGroupId,
+                                   long long sinceTimestamp,
+                                   long long *nextSinceTimestamp,
+                                   dcgmFieldValueEntityEnumeration_f enumCB,
+                                   void *userData);
 
 /**
  * Request latest cached field value for a field value collection
- * 
- * This version only works with GPU entities. Use \ref dcgmGetLatestValues_v2 for entity groups 
+ *
+ * This version only works with GPU entities. Use \ref dcgmGetLatestValues_v2 for entity groups
  * containing NvSwitches.
  *
  * @param pDcgmHandle        IN: DCGM Handle
- * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at
- *                               \ref dcgmGroupCreate for details on creating the group.
- *                               Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                               to perform operation on all the GPUs.
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                               details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
  * @param fieldGroupId       IN: Fields to return data for.
- * @param enumCB             IN: Callback to invoke for every field value update. Note that
- *                               multiple updates can be returned in each invocation
+ * @param enumCB             IN: Callback to invoke for every field value update. Note that multiple updates can be
+ *                               returned in each invocation
  * @param userData           IN: User data pointer to pass to the userData field of enumCB.
  *
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_NOT_SUPPORTED        if one of the entities was from a non-GPU type
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
-dcgmReturn_t dcgmGetLatestValues(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId,
-                                 dcgmFieldValueEnumeration_f enumCB, void *userData);
+dcgmReturn_t dcgmGetLatestValues(dcgmHandle_t pDcgmHandle,
+                                 dcgmGpuGrp_t groupId,
+                                 dcgmFieldGrp_t fieldGroupId,
+                                 dcgmFieldValueEnumeration_f enumCB,
+                                 void *userData);
 
 /**
  * Request latest cached field value for a field value collection
- * 
+ *
  * This version works with non-GPU entities like NvSwitches
  *
  * @param pDcgmHandle        IN: DCGM Handle
- * @param groupId            IN: Group ID representing collection of one or more entities. Look at
- *                               \ref dcgmGroupCreate for details on creating the group.
- *                               Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                               to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES to
- *                               perform the operation on all NvSwitches.
+ * @param groupId            IN: Group ID representing collection of one or more entities. Look at \ref dcgmGroupCreate
+ *                               for details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs or
+ *                               \a DCGM_GROUP_ALL_NVSWITCHES to perform the operation on all NvSwitches.
  * @param fieldGroupId       IN: Fields to return data for.
- * @param enumCB             IN: Callback to invoke for every field value update. Note that
- *                               multiple updates can be returned in each invocation
+ * @param enumCB             IN: Callback to invoke for every field value update. Note that multiple updates can be
+ *                               returned in each invocation
  * @param userData           IN: User data pointer to pass to the userData field of enumCB.
  *
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_NOT_SUPPORTED        if one of the entities was from a non-GPU type
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
-dcgmReturn_t dcgmGetLatestValues_v2(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmFieldGrp_t fieldGroupId,
-                                    dcgmFieldValueEntityEnumeration_f enumCB, void *userData);
+dcgmReturn_t dcgmGetLatestValues_v2(dcgmHandle_t pDcgmHandle,
+                                    dcgmGpuGrp_t groupId,
+                                    dcgmFieldGrp_t fieldGroupId,
+                                    dcgmFieldValueEntityEnumeration_f enumCB,
+                                    void *userData);
 
 /**
  * Request latest cached field value for a GPU
  *
- * @param pDcgmHandle        IN: DCGM Handle
- * @param gpuId              IN: Gpu ID representing the GPU for which the fields are being requested.
- * @param fields             IN: Field IDs to return data for. See the definitions in dcgm_fields.h that
- *                               start with DCGM_FI_.
- * @param count              IN: Number of field IDs in fields[] array.
- * @param values            OUT: Latest field values for the fields in fields[].
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param gpuId         IN: Gpu ID representing the GPU for which the fields are being requested.
+ * @param fields        IN: Field IDs to return data for. See the definitions in dcgm_fields.h that start with DCGM_FI_.
+ * @param count         IN: Number of field IDs in fields[] array.
+ * @param values       OUT: Latest field values for the fields in fields[].
+ *
  */
-dcgmReturn_t dcgmGetLatestValuesForFields(dcgmHandle_t pDcgmHandle, int gpuId, unsigned short fields[],
-	                                  unsigned int count, dcgmFieldValue_v1 values[]);
+dcgmReturn_t dcgmGetLatestValuesForFields(dcgmHandle_t pDcgmHandle,
+                                          int gpuId,
+                                          unsigned short fields[],
+                                          unsigned int count,
+                                          dcgmFieldValue_v1 values[]);
 /**
  * Request latest cached field value for a group of fields for a specific entity
  *
- * @param pDcgmHandle        IN: DCGM Handle
- * @param entityGroup        IN: entity_group_t (e.g. switch)
- * @param entityId           IN: entity ID representing the rntity for which the fields are being requested.
- * @param fields             IN: Field IDs to return data for. See the definitions in dcgm_fields.h that
- *                               start with DCGM_FI_.
- * @param count              IN: Number of field IDs in fields[] array.
- * @param values            OUT: Latest field values for the fields in fields[].
- */
-dcgmReturn_t dcgmEntityGetLatestValues(dcgmHandle_t pDcgmHandle, dcgm_field_entity_group_t entityGroup, 
-                                       int entityId, unsigned short fields[], unsigned int count, 
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param entityGroup   IN: entity_group_t (e.g. switch)
+ * @param entityId      IN: entity ID representing the rntity for which the fields are being requested.
+ * @param fields        IN: Field IDs to return data for. See the definitions in dcgm_fields.h that start with DCGM_FI_.
+ * @param count         IN: Number of field IDs in fields[] array.
+ * @param values       OUT: Latest field values for the fields in fields[].
+ *
+ */
+dcgmReturn_t dcgmEntityGetLatestValues(dcgmHandle_t pDcgmHandle,
+                                       dcgm_field_entity_group_t entityGroup,
+                                       int entityId,
+                                       unsigned short fields[],
+                                       unsigned int count,
                                        dcgmFieldValue_v1 values[]);
 
 /**
  * Request the latest cached or live field value for a list of fields for a group of entities
  *
  * Note: The returned entities are not guaranteed to be in any order. Reordering can occur internally
- *       in order to optimize calls to the NVIDIA driver. 
- * 
- * @param pDcgmHandle        IN: DCGM Handle
- * @param entities           IN: List of entities to get values for
- * @param entityCount        IN: Number of entries in entities[]
- * @param fields             IN: Field IDs to return data for. See the definitions in dcgm_fields.h that
- *                               start with DCGM_FI_.
- * @param fieldCount         IN: Number of field IDs in fields[] array.
- * @param flags              IN: Optional flags that affect how this request is processed. Pass 
- *                               \ref DCGM_FV_FLAG_LIVE_DATA here to retrieve a live driver value rather
- *                               than a cached value. See that flag's documentation for caveats.
- * @param values            OUT: Latest field values for the fields requested. This must be able to hold
- *                               entityCount * fieldCount field value records.
- */
-dcgmReturn_t dcgmEntitiesGetLatestValues(dcgmHandle_t pDcgmHandle, dcgmGroupEntityPair_t entities[], 
-                                         unsigned int entityCount, unsigned short fields[],
-                                         unsigned int fieldCount, unsigned int flags, 
+ *       in order to optimize calls to the NVIDIA driver.
+ *
+ * @param pDcgmHandle   IN: DCGM Handle
+ * @param entities      IN: List of entities to get values for
+ * @param entityCount   IN: Number of entries in entities[]
+ * @param fields        IN: Field IDs to return data for. See the definitions in dcgm_fields.h that start with DCGM_FI_.
+ * @param fieldCount    IN: Number of field IDs in fields[] array.
+ * @param flags         IN: Optional flags that affect how this request is processed. Pass \ref DCGM_FV_FLAG_LIVE_DATA
+ *                          here to retrieve a live driver value rather than a cached value. See that flag's
+ *                          documentation for caveats.
+ * @param values       OUT: Latest field values for the fields requested. This must be able to hold entityCount *
+ *                          fieldCount field value records.
+ *
+ */
+dcgmReturn_t dcgmEntitiesGetLatestValues(dcgmHandle_t pDcgmHandle,
+                                         dcgmGroupEntityPair_t entities[],
+                                         unsigned int entityCount,
+                                         unsigned short fields[],
+                                         unsigned int fieldCount,
+                                         unsigned int flags,
                                          dcgmFieldValue_v2 values[]);
 
+/*************************************************************************/
+/**
+ * Get a summary of the values for a field id over a period of time.
+ *
+ * @param pDcgmHandle       IN: DCGM Handle
+ * @param request       IN/OUT: a pointer to the struct detailing the request and containing the response
+ *
+ * @return
+ *       - \ref DCGM_ST_OK                if the call was successful
+ *       - \ref DCGM_ST_FIELD_UNSUPPORTED_BY_API if the field is not int64 or double type
+ *
+ */
+dcgmReturn_t DECLDIR dcgmGetFieldSummary(dcgmHandle_t pDcgmHandle, dcgmFieldSummaryRequest_t *request);
+
 /** @} */
 
 /***************************************************************************************************/
@@ -984,19 +1096,20 @@ dcgmReturn_t dcgmEntitiesGetLatestValues(dcgmHandle_t pDcgmHandle, dcgmGroupEnti
 
 /**
  * This method is used to tell the DCGM module to update all the fields being watched.
- * 
+ *
  * Note: If the if the operation mode was set to manual mode (DCGM_OPERATION_MODE_MANUAL) during
  * initialization (\ref dcgmInit), this method must be caused periodically to allow field value watches
  * the opportunity to gather samples.
- * 
+ *
  * @param pDcgmHandle           IN: DCGM Handle
- * @param waitForUpdate         IN: Whether or not to wait for the update loop to 
- *                                  complete before returning to the caller 1=wait. 0=do not wait.
+ * @param waitForUpdate         IN: Whether or not to wait for the update loop to complete before returning to the
+ *                                  caller 1=wait. 0=do not wait.
  *
- * @return 
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if \a waitForUpdate is invalid
  *        - \ref DCGM_ST_GENERIC_ERROR        if an unspecified DCGM error occurs
+ *
  */
 dcgmReturn_t dcgmUpdateAllFields(dcgmHandle_t pDcgmHandle, int waitForUpdate);
 
@@ -1005,7 +1118,7 @@ dcgmReturn_t dcgmUpdateAllFields(dcgmHandle_t pDcgmHandle, int waitForUpdate);
 
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_PROCESS_STATS Process Statistics
- *  Describes APIs to investigate statistics such as accounting, performance and errors during the 
+ *  Describes APIs to investigate statistics such as accounting, performance and errors during the
  *  lifetime of a GPU process
  *  @{
  */
@@ -1018,10 +1131,9 @@ dcgmReturn_t dcgmUpdateAllFields(dcgmHandle_t pDcgmHandle, int waitForUpdate);
  * To force a field update cycle, call dcgmUpdateAllFields(1).
  *
  * @param pDcgmHandle         IN: DCGM Handle
- * @param groupId             IN: Group ID representing collection of one or more GPUs. Look at
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs.
+ * @param groupId             IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                                details on creating the group. Alternatively, pass in the group id as
+ *                                \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
  * @param updateFreq          IN: How often to update this field in usec
  * @param maxKeepAge          IN: How long to keep data for this field in seconds
  * @param maxKeepSamples      IN: Maximum number of samples to keep. 0=no limit
@@ -1029,14 +1141,16 @@ dcgmReturn_t dcgmUpdateAllFields(dcgmHandle_t pDcgmHandle, int waitForUpdate);
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
- *        - \ref DCGM_ST_REQUIRES_ROOT        if the host engine is being run as non-root, and
- *                                            accounting mode could not be enabled (requires root).
- *                                            Run "nvidia-smi -am 1" as root on the node before starting
- *                                            DCGM to fix this.
+ *        - \ref DCGM_ST_REQUIRES_ROOT        if the host engine is being run as non-root, and accounting mode could not
+ *                                            be enabled (requires root). Run "nvidia-smi -am 1" as root on the node
+ *                                            before starting DCGM to fix this.
+ *
  */
-
-dcgmReturn_t dcgmWatchPidFields(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId,
-                                long long updateFreq, double maxKeepAge, int maxKeepSamples);
+dcgmReturn_t dcgmWatchPidFields(dcgmHandle_t pDcgmHandle,
+                                dcgmGpuGrp_t groupId,
+                                long long updateFreq,
+                                double maxKeepAge,
+                                int maxKeepSamples);
 
 /**
  *
@@ -1048,13 +1162,12 @@ dcgmReturn_t dcgmWatchPidFields(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId,
  *
  * @param pDcgmHandle IN: DCGM Handle
  * @param groupId     IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
- *                        for details on creating the group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                        to perform operation on all the GPUs.
- * @param pidInfo IN/OUT: Structure to return information about pid in.
- *                        pidInfo->pid must be set to the pid in question.
+ *                        for details on creating the group. Alternatively, pass in the group id as
+ *                        \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param pidInfo IN/OUT: Structure to return information about pid in. pidInfo->pid must be set to the pid in question.
  *                        pidInfo->version should be set to dcgmPidInfo_version.
  *
- * @return 
+ * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_NO_DATA             if the PID did not run on any GPU
  *
@@ -1065,9 +1178,9 @@ dcgmReturn_t dcgmGetPidInfo(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgm
 
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_JOB_STATS Job Statistics
- * The client can invoke DCGM APIs to start and stop collecting the stats at the process boundaries 
- * (during prologue and epilogue). This will enable DCGM to monitor all the PIDs while the job is 
- * in progress, and provide a summary of active processes and resource usage during the window of 
+ * The client can invoke DCGM APIs to start and stop collecting the stats at the process boundaries
+ * (during prologue and epilogue). This will enable DCGM to monitor all the PIDs while the job is
+ * in progress, and provide a summary of active processes and resource usage during the window of
  * interest.
  *  @{
  */
@@ -1080,10 +1193,9 @@ dcgmReturn_t dcgmGetPidInfo(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgm
  * To force a field update cycle, call dcgmUpdateAllFields(1).
  *
  * @param pDcgmHandle         IN: DCGM Handle
- * @param groupId             IN: Group ID representing collection of one or more GPUs. Look at
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs.
+ * @param groupId             IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                                details on creating the group. Alternatively, pass in the group id as
+ *                                \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
  * @param updateFreq          IN: How often to update this field in usec
  * @param maxKeepAge          IN: How long to keep data for this field in seconds
  * @param maxKeepSamples      IN: Maximum number of samples to keep. 0=no limit
@@ -1095,59 +1207,65 @@ dcgmReturn_t dcgmGetPidInfo(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgm
  *                                            accounting mode could not be enabled (requires root).
  *                                            Run "nvidia-smi -am 1" as root on the node before starting
  *                                            DCGM to fix this.
+ *
  */
-
-dcgmReturn_t dcgmWatchJobFields(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId,
-                                long long updateFreq, double maxKeepAge, int maxKeepSamples);
-
+dcgmReturn_t dcgmWatchJobFields(dcgmHandle_t pDcgmHandle,
+                                dcgmGpuGrp_t groupId,
+                                long long updateFreq,
+                                double maxKeepAge,
+                                int maxKeepSamples);
 
 /**
  * This API is used by the client to notify DCGM about the job to be started. Should be invoked as
  * part of job prologue
- * 
- * @param pDcgmHandle       IN  : DCGM Handle
- * @param groupId           IN  : Group ID representing collection of one or more GPUs. Look at 
- *                                \ref dcgmGroupCreate for details on creating the group.
- *                                Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                to perform operation on all the GPUs.
- * @param jobId             IN  : User provided string to represent the job  
- * @return 
+ *
+ * @param pDcgmHandle       IN: DCGM Handle
+ * @param groupId           IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                              details on creating the group. Alternatively, pass in the group id as
+ *                              \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param jobId             IN: User provided string to represent the job
+ *
+ * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
  *       - \ref DCGM_ST_DUPLICATE_KEY       if the specified \a jobId is already in use
+ *
  */
 dcgmReturn_t dcgmJobStartStats(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, char jobId[64]);
 
 /**
- * This API is used by the clients to notify DCGM to stop collecting stats for the job represented 
+ * This API is used by the clients to notify DCGM to stop collecting stats for the job represented
  * by job id. Should be invoked as part of job epilogue.
  * The job Id remains available to view the stats at any point but cannot be used to start a new job.
  * You must call dcgmWatchJobFields() before this call to enable watching of job
- * 
- * @param pDcgmHandle       IN  : DCGM Handle
- * @param jobId             IN  : User provided string to represent the job
+ *
+ * @param pDcgmHandle       IN: DCGM Handle
+ * @param jobId             IN: User provided string to represent the job
+ *
  * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
  *       - \ref DCGM_ST_NO_DATA             if \a jobId is not a valid job identifier.
+ *
  */
 dcgmReturn_t dcgmJobStopStats(dcgmHandle_t pDcgmHandle, char jobId[64]);
 
 /**
- * Get stats for the job identified by DCGM generated job id. The stats can be retrieved at any 
+ * Get stats for the job identified by DCGM generated job id. The stats can be retrieved at any
  * point when the job is in process.
  * If you want to reuse this jobId, call \ref dcgmJobRemove after this call.
- * 
- * @param pDcgmHandle       IN  : DCGM Handle
- * @param jobId             IN  : User provided string to represent the job
- * @param pJobInfo       IN/OUT : Structure to return information about the
- *                                job.<br> .version should be set to
- *                                \ref dcgmJobInfo_version before this call.
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param jobId              IN: User provided string to represent the job
+ * @param pJobInfo       IN/OUT: Structure to return information about the job.<br> .version should be set to
+ *                               \ref dcgmJobInfo_version before this call.
+ *
  * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
  *       - \ref DCGM_ST_NO_DATA             if \a jobId is not a valid job identifier.
  *       - \ref DCGM_ST_VER_MISMATCH        if .version is not set or is invalid.
+ *
  */
 dcgmReturn_t dcgmJobGetStats(dcgmHandle_t pDcgmHandle, char jobId[64], dcgmJobInfo_t *pJobInfo);
 
@@ -1156,13 +1274,14 @@ dcgmReturn_t dcgmJobGetStats(dcgmHandle_t pDcgmHandle, char jobId[64], dcgmJobIn
  * be able to call dcgmJobGetStats() on this jobId. However, you will be able to reuse jobId after
  * this call.
  *
- * @param pDcgmHandle       IN  : DCGM Handle
- * @param jobId             IN  : User provided string to represent the job
+ * @param pDcgmHandle       IN: DCGM Handle
+ * @param jobId             IN: User provided string to represent the job
  *
  * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
  *       - \ref DCGM_ST_NO_DATA             if \a jobId is not a valid job identifier.
+ *
  */
 dcgmReturn_t dcgmJobRemove(dcgmHandle_t pDcgmHandle, char jobId[64]);
 
@@ -1171,7 +1290,7 @@ dcgmReturn_t dcgmJobRemove(dcgmHandle_t pDcgmHandle, char jobId[64]);
  * be able to call dcgmJobGetStats() any jobs until you call dcgmJobStartStats again.
  * You will be able to reuse any previously-used jobIds after this call.
  *
- * @param pDcgmHandle       IN  : DCGM Handle
+ * @param pDcgmHandle       IN: DCGM Handle
  *
  * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
@@ -1185,7 +1304,7 @@ dcgmReturn_t dcgmJobRemoveAll(dcgmHandle_t pDcgmHandle);
 /** @defgroup DCGMAPI_HM Health Monitor
  *
  *  This chapter describes the methods that handle the GPU health monitor.
- *   
+ *
  *  @{
  */
 /***************************************************************************************************/
@@ -1193,40 +1312,53 @@ dcgmReturn_t dcgmJobRemoveAll(dcgmHandle_t pDcgmHandle);
 /**
  * Enable the DCGM health check system for the given systems defined in \ref dcgmHealthSystems_t
  *
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param groupId                       IN: Group ID representing collection of one or more entities. Look
- *                                          at \ref dcgmGroupCreate for details on creating the group.
- *                                          Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES
- *                                          to perform operation on all the NvSwitches.
- * @param systems                       IN: An enum representing systems that should be enabled for health
- *                                          checks logically OR'd together. Refer to \ref dcgmHealthSystems_t
- *                                          for details.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more entities. Look at \ref dcgmGroupCreate
+ *                               for details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs or
+ *                               \a DCGM_GROUP_ALL_NVSWITCHES to perform operation on all the NvSwitches.
+ * @param systems            IN: An enum representing systems that should be enabled for health checks logically OR'd
+ *                               together. Refer to \ref dcgmHealthSystems_t for details.
  *
  * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
+ *
  */
- 
 dcgmReturn_t dcgmHealthSet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmHealthSystems_t systems);
 
 /**
- * Retrieve the current state of the DCGM health check system
+ * Enable the DCGM health check system for the given systems defined in \ref dcgmHealthSystems_t
+ *
+ * Since DCGM 2.0
  *
  * @param pDcgmHandle                   IN: DCGM Handle
- * @param groupId                       IN: Group ID representing collection of one or more entities. Look
- *                                          at \ref dcgmGroupCreate for details on creating the group.
- *                                          Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES
- *                                          to perform operation on all the NvSwitches.
- * @param systems                      OUT: An integer representing the enabled systems for the given group
- *                                          Refer to \ref dcgmHealthSystems_t for details.
+ * @param healthSet                     IN: Parameters to use when setting health watches. See
+ *                                          \ref dcgmHealthSetParams_v2 for the description of each parameter.
  *
  * @return
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
  */
- 
+
+dcgmReturn_t dcgmHealthSet_v2(dcgmHandle_t pDcgmHandle, dcgmHealthSetParams_v2 *params);
+
+/**
+ * Retrieve the current state of the DCGM health check system
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more entities. Look at \ref dcgmGroupCreate
+ *                               for details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs or
+ *                               \a DCGM_GROUP_ALL_NVSWITCHES to perform operation on all the NvSwitches.
+ * @param systems           OUT: An integer representing the enabled systems for the given group Refer to
+ *                               \ref dcgmHealthSystems_t for details.
+ *
+ * @return
+ *       - \ref DCGM_ST_OK                  if the call was successful
+ *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
+ *
+ */
 dcgmReturn_t dcgmHealthGet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmHealthSystems_t *systems);
 
 
@@ -1236,7 +1368,7 @@ dcgmReturn_t dcgmHealthGet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmH
  * about all of the enabled watches within a group is created but no error results are
  * provided.  On subsequent calls, any error information will be returned.
  *
- * 
+ *
  * @param pDcgmHandle                   IN: DCGM Handle
  * @param groupId                       IN: Group ID representing a collection of one or more entities.
  *                                          Refer to \ref dcgmGroupCreate for details on creating a group
@@ -1247,8 +1379,8 @@ dcgmReturn_t dcgmHealthGet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmH
  *       - \ref DCGM_ST_OK                  if the call was successful
  *       - \ref DCGM_ST_BADPARAM            if a parameter is invalid
  *       - \ref DCGM_ST_VER_MISMATCH        if results->version is not dcgmHealthResponse_version
+ *
  */
-
 dcgmReturn_t dcgmHealthCheck(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmHealthResponse_t *results);
 
 /** @} */
@@ -1258,14 +1390,14 @@ dcgmReturn_t dcgmHealthCheck(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcg
  *
  *  This chapter describes the methods that handle system policy management and violation settings.
  *  The APIs in Policies module can be broken down into following categories:
- *   
+ *
  *  @{
  */
 /***************************************************************************************************/
 
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_PO_Setup Setup and Management
- *  Describes APIs for setting up policies and registering callbacks to receive notification in 
+ *  Describes APIs for setting up policies and registering callbacks to receive notification in
  *  case specific policy condition has been violated.
  *  @{
  */
@@ -1275,98 +1407,99 @@ dcgmReturn_t dcgmHealthCheck(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcg
  * Set the current violation policy inside the policy manager.  Given the conditions within the
  * \ref dcgmPolicy_t structure, if a violation has occurred, subsequent action(s) may be performed to
  * either report or contain the failure.
- * 
- * This API is only supported on Tesla GPUs and will return DCGM_ST_NOT_SUPPORTED if any non-Tesla GPUs
- * are part of the GPU group specified in groupId.
- * 
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param groupId                       IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
- *                                          for details on creating the group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs.
- * @param policy                        IN: A reference to \ref dcgmPolicy_t that will be applied to all 
- *                                          GPUs in the group.
- * @param statusHandle              IN/OUT: Resulting status for the operation.  Pass it as NULL if 
- *                                          the detailed error information is not needed. 
- *                                          Refer to \ref dcgmStatusCreate for details on 
- *                                          creating a status handle.
- * @return 
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                               details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param policy             IN: A reference to \ref dcgmPolicy_t that will be applied to all GPUs in the group.
+ * @param statusHandle   IN/OUT: Resulting status for the operation.  Pass it as NULL if the detailed error information
+ *                               is not needed. Refer to \ref dcgmStatusCreate for details on creating a status handle.
+ *
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if \a groupId or \a policy is invalid
- *        - \ref DCGM_ST_NOT_SUPPORTED        if any non-Tesla GPUs are part of the GPU group specified in groupId
+ *        - \ref DCGM_ST_NOT_SUPPORTED        if any unsupported GPUs are part of the GPU group specified in groupId
  *        - DCGM_ST_*                         a different error has occurred and is stored in \a statusHandle.
  *                                            Refer to \ref dcgmReturn_t
+ *
  */
-dcgmReturn_t dcgmPolicySet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmPolicy_t *policy, dcgmStatus_t statusHandle);
+dcgmReturn_t dcgmPolicySet(dcgmHandle_t pDcgmHandle,
+                           dcgmGpuGrp_t groupId,
+                           dcgmPolicy_t *policy,
+                           dcgmStatus_t statusHandle);
 
 /**
- * Get the current violation policy inside the policy manager. Given a groupId, a number of 
+ * Get the current violation policy inside the policy manager. Given a groupId, a number of
  * policy structures are retrieved.
  *
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param groupId                       IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
- *                                          for details on creating the group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs.
- * @param count                         IN: The size of the policy array.  This is the maximum number of policies
- *                                          that will be retrieved and ultimately should correspond to the number
- *                                          of GPUs specified in the group.
- * @param policy                       OUT: A reference to \ref dcgmPolicy_t that will used as storage for the
- *                                          current policies applied to each GPU in the group.
- * @param statusHandle              IN/OUT: Resulting status for the operation. Pass it as NULL if 
- *                                          the detailed error information for the operation is not 
- *                                          needed. Refer to \ref dcgmStatusCreate
- *                                          for details on creating a status handle.
- *
- * @return 
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                               details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param count              IN: The size of the policy array.  This is the maximum number of policies that will be
+ *                               retrieved and ultimately should correspond to the number of GPUs specified in the
+ *                               group.
+ * @param policy             OUT: A reference to \ref dcgmPolicy_t that will used as storage for the current policies
+ *                                applied to each GPU in the group.
+ * @param statusHandle    IN/OUT: Resulting status for the operation. Pass it as NULL if the detailed error information
+ *                                for the operation is not needed. Refer to \ref dcgmStatusCreate for details on
+ *                                creating a status handle.
+ *
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if \a groupId or \a policy is invalid
  *        - DCGM_ST_*                         a different error has occurred and is stored in \a statusHandle.
  *                                            Refer to \ref dcgmReturn_t
+ *
  */
-dcgmReturn_t dcgmPolicyGet(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, int count, 
-                                 dcgmPolicy_t *policy, dcgmStatus_t statusHandle);
+dcgmReturn_t dcgmPolicyGet(dcgmHandle_t pDcgmHandle,
+                           dcgmGpuGrp_t groupId,
+                           int count,
+                           dcgmPolicy_t *policy,
+                           dcgmStatus_t statusHandle);
 
 /**
- * Register a function to be called when a specific policy condition (see \ref dcgmPolicyCondition_t) has been 
- * violated.  This callback(s) will be called automatically when in DCGM_OPERATION_MODE_AUTO mode and only after 
+ * Register a function to be called when a specific policy condition (see \ref dcgmPolicyCondition_t) has been
+ * violated.  This callback(s) will be called automatically when in DCGM_OPERATION_MODE_AUTO mode and only after
  * dcgmPolicyTrigger when in DCGM_OPERATION_MODE_MANUAL mode.  All callbacks are made within a separate thread.
  *
- * This API is only supported on Tesla GPUs and will return DCGM_ST_NOT_SUPPORTED if any non-Tesla GPUs
- * are part of the GPU group specified in groupId.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                               details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param condition          IN: The set of conditions specified as an OR'd list (see \ref dcgmPolicyCondition_t) for
+ *                               which to register a callback function
+ * @param beginCallback      IN: A reference to a function that should be called should a violation occur.
+ *                               This function will be called prior to any actions specified by the policy are taken.
+ * @param finishCallback     IN: A reference to a function that should be called should a violation occur.
+ *                           This function will be called after any action specified by the policy are completed.
  *
- * @param pDcgmHandle                   IN: DCGM Handle
- * 
- * @param groupId                       IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
- *                                          for details on creating the group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs.
- * @param condition                     IN: The set of conditions specified as an OR'd list (see \ref dcgmPolicyCondition_t) 
- *                                          for which to register a callback function
- * @param beginCallback                 IN: A reference to a function that should be called should a violation occur.  This 
- *                                          function will be called prior to any actions specified by the policy are taken.
- * @param finishCallback                IN: A reference to a function that should be called should a violation occur.  This 
- *                                          function will be called after any action specified by the policy are completed.
- *
- * @return 
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
- *        - \ref DCGM_ST_BADPARAM             if \a groupId, \a condition, is invalid, \a beginCallback, or \a finishCallback is NULL
- *        - \ref DCGM_ST_NOT_SUPPORTED        if any non-Tesla GPUs are part of the GPU group specified in groupId
+ *        - \ref DCGM_ST_BADPARAM             if \a groupId, \a condition, is invalid, \a beginCallback, or
+ *                                            \a finishCallback is NULL
+ *        - \ref DCGM_ST_NOT_SUPPORTED        if any unsupported GPUs are part of the GPU group specified in groupId
  *
  */
-dcgmReturn_t dcgmPolicyRegister(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmPolicyCondition_t condition, 
-                                      fpRecvUpdates beginCallback, fpRecvUpdates finishCallback);
+dcgmReturn_t dcgmPolicyRegister(dcgmHandle_t pDcgmHandle,
+                                dcgmGpuGrp_t groupId,
+                                dcgmPolicyCondition_t condition,
+                                fpRecvUpdates beginCallback,
+                                fpRecvUpdates finishCallback);
 
 /**
  * Unregister a function to be called for a specific policy condition (see \ref dcgmPolicyCondition_t).
  * This function will unregister all callbacks for a given condition and handle.
- * 
- * @param pDcgmHandle                   IN: DCGM Handle
  *
- * @param groupId                       IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
- *                                          for details on creating the group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs.
- * @param condition                     IN: The set of conditions specified as an OR'd list (see \ref dcgmPolicyCondition_t) 
- *                                          for which to unregister a callback function
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                               details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param condition          IN: The set of conditions specified as an OR'd list (see \ref dcgmPolicyCondition_t) for
+ *                               which to unregister a callback function
  *
- * @return 
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if \a groupId, \a condition, is invalid or \a callback is NULL
  *
@@ -1377,7 +1510,7 @@ dcgmReturn_t dcgmPolicyUnregister(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId
 
 /***************************************************************************************************/
 /** @defgroup DCGMAPI_PO_MI Manual Invocation
- *  Describes APIs which can be used to perform direct actions (e.g. Perform GPU Reset, Run Health 
+ *  Describes APIs which can be used to perform direct actions (e.g. Perform GPU Reset, Run Health
  *  Diagnostics) on a group of GPUs.
  *  @{
  */
@@ -1387,62 +1520,60 @@ dcgmReturn_t dcgmPolicyUnregister(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId
  * Inform the action manager to perform a manual validation of a group of GPUs on the system
  *
  * *************************************** DEPRECATED ***************************************
- * 
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param groupId                       IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
- *                                          for details on creating the group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
- *                                          to perform operation on all the GPUs.
- * @param validate                      IN: The validation to perform after the action.
- * @param response                     OUT: Result of the validation process. Refer to \ref dcgmDiagResponse_t for details.
- *                                          
- *
- * @return 
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate for
+ *                               details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param validate           IN: The validation to perform after the action.
+ * @param response          OUT: Result of the validation process. Refer to \ref dcgmDiagResponse_t for details.
+ *
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
- *        - \ref DCGM_ST_NOT_SUPPORTED        if running the specified \a validate is not supported. This is usually due to the
- *                                            Tesla recommended driver not being installed on the system.
+ *        - \ref DCGM_ST_NOT_SUPPORTED        if running the specified \a validate is not supported. This is usually due
+ *                                            to the Tesla recommended driver not being installed on the system.
  *        - \ref DCGM_ST_BADPARAM             if \a groupId, \a validate, or \a statusHandle is invalid
  *        - \ref DCGM_ST_GENERIC_ERROR        an internal error has occurred
- *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if \a groupId refers to a group of non-homogeneous GPUs. This is currently not allowed.
+ *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if \a groupId refers to a group of non-homogeneous GPUs. This is currently
+ *                                            not allowed.
+ *
  */
-dcgmReturn_t dcgmActionValidate(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmPolicyValidation_t validate,
+dcgmReturn_t dcgmActionValidate(dcgmHandle_t pDcgmHandle,
+                                dcgmGpuGrp_t groupId,
+                                dcgmPolicyValidation_t validate,
                                 dcgmDiagResponse_t *response);
 
 /**
  * Inform the action manager to perform a manual validation of a group of GPUs on the system
- * 
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param drd                           IN: Contains the group id, test names, test parameters, struct version, and 
- *                                          the validation that should be performed. Look at \ref dcgmGroupCreate 
- *                                          for details on creating the group. Alternatively, pass in the group id 
- *                                          as \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
- * @param response                     OUT: Result of the validation process. Refer to \ref dcgmDiagResponse_t for details.
- *                                          
- *
- * @return 
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param drd                IN: Contains the group id, test names, test parameters, struct version, and the validation
+ *                               that should be performed. Look at \ref dcgmGroupCreate for details on creating the
+ *                               group. Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS to perform
+ *                               operation on all the GPUs.
+ * @param response          OUT: Result of the validation process. Refer to \ref dcgmDiagResponse_t for details.
+ *
+ * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
- *        - \ref DCGM_ST_NOT_SUPPORTED        if running the specified \a validate is not supported. This is usually due to the
- *                                            Tesla recommended driver not being installed on the system.
+ *        - \ref DCGM_ST_NOT_SUPPORTED        if running the specified \a validate is not supported. This is usually
+ *                                            due to the Tesla recommended driver not being installed on the system.
  *        - \ref DCGM_ST_BADPARAM             if \a groupId, \a validate, or \a statusHandle is invalid
  *        - \ref DCGM_ST_GENERIC_ERROR        an internal error has occurred
- *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if \a groupId refers to a group of non-homogeneous GPUs. This is currently not allowed.
+ *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if \a groupId refers to a group of non-homogeneous GPUs. This is
+ *                                            currently not allowed.
  */
-dcgmReturn_t dcgmActionValidate_v2(dcgmHandle_t pDcgmHandle, dcgmRunDiag_t *drd, dcgmDiagResponse_t *response);
+dcgmReturn_t dcgmActionValidate_v2(dcgmHandle_t pDcgmHandle, dcgmRunDiag_v6 *drd, dcgmDiagResponse_t *response);
 
 /**
  * Run a diagnostic on a group of GPUs
  *
- * @param pDcgmHandle          IN: DCGM Handle
- * @param groupId              IN: Group ID representing collection of one
- *                                 or more GPUs. Look at \ref dcgmGroupCreate
- *                                 for details on creating the group.
- *                                 Alternatively, pass in the group id as
- *                                 \a DCGM_GROUP_ALL_GPUS to perform
- *                                 operation on all the GPUs.
- * @param diagLevel            IN: Diagnostic level to run
- * @param diagResponse     IN/OUT: Result of running the DCGM diagnostic.<br>
- *                                 .version should be set to
- *                                 \ref dcgmDiagResponse_version before this
- *                                 call.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param groupId            IN: Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
+ *                               for details on creating the group. Alternatively, pass in the group id as
+ *                               \a DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs.
+ * @param diagLevel          IN: Diagnostic level to run
+ * @param diagResponse   IN/OUT: Result of running the DCGM diagnostic.<br>
+ *                               .version should be set to \ref dcgmDiagResponse_version before this call.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
@@ -1450,10 +1581,14 @@ dcgmReturn_t dcgmActionValidate_v2(dcgmHandle_t pDcgmHandle, dcgmRunDiag_t *drd,
  *                                            Tesla recommended driver not being installed on the system.
  *        - \ref DCGM_ST_BADPARAM             if a provided parameter is invalid or missing
  *        - \ref DCGM_ST_GENERIC_ERROR        an internal error has occurred
- *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if \a groupId refers to a group of non-homogeneous GPUs. This is currently not allowed.
+ *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if \a groupId refers to a group of non-homogeneous GPUs. This is
+ *                                            currently not allowed.
  *        - \ref DCGM_ST_VER_MISMATCH         if .version is not set or is invalid.
+ *
  */
-dcgmReturn_t dcgmRunDiagnostic(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmDiagnosticLevel_t diagLevel,
+dcgmReturn_t dcgmRunDiagnostic(dcgmHandle_t pDcgmHandle,
+                               dcgmGpuGrp_t groupId,
+                               dcgmDiagnosticLevel_t diagLevel,
                                dcgmDiagResponse_t *diagResponse);
 
 /** @} */ // Closing for DCGMAPI_PO_MI
@@ -1469,14 +1604,14 @@ dcgmReturn_t dcgmRunDiagnostic(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, d
 /**
  * Inform the policy manager loop to perform an iteration and trigger the callbacks of any
  * registered functions. Callback functions will be called from a separate thread as the calling function.
- * 
- * Note: The GPU monitoring and management agent must call this method periodically if the operation 
- * mode is set to manual mode (DCGM_OPERATION_MODE_MANUAL) during initialization 
+ *
+ * Note: The GPU monitoring and management agent must call this method periodically if the operation
+ * mode is set to manual mode (DCGM_OPERATION_MODE_MANUAL) during initialization
  * (\ref dcgmInit).
- * 
+ *
  * @param pDcgmHandle                   IN: DCGM Handle
- * 
- * @return 
+ *
+ * @return
  *        - \ref DCGM_ST_OK                   If the call was successful
  *        - DCGM_ST_GENERIC_ERROR             The policy manager was unable to perform another iteration.
  */
@@ -1495,29 +1630,34 @@ dcgmReturn_t dcgmPolicyTrigger(dcgmHandle_t pDcgmHandle);
  *
  * @param pDcgmHandle             IN: DCGM Handle
  * @param gpuId                   IN: GPU Id corresponding to which topology information should be fetched
- * @param pDcgmDeviceTopology IN/OUT: Topology information corresponding to \a gpuId. pDcgmDeviceTopology->version
- *                                    must be set to dcgmDeviceTopology_version before this call.
+ * @param pDcgmDeviceTopology IN/OUT: Topology information corresponding to \a gpuId. pDcgmDeviceTopology->version must
+ *                                    be set to dcgmDeviceTopology_version before this call.
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful.
  *        - \ref DCGM_ST_BADPARAM             if \a gpuId or \a pDcgmDeviceTopology were not valid.
  *        - \ref DCGM_ST_VER_MISMATCH         if pDcgmDeviceTopology->version was not set to dcgmDeviceTopology_version.
+ *
  */
-
-dcgmReturn_t DECLDIR dcgmGetDeviceTopology(dcgmHandle_t pDcgmHandle, unsigned int gpuId, dcgmDeviceTopology_t *pDcgmDeviceTopology);
+dcgmReturn_t DECLDIR dcgmGetDeviceTopology(dcgmHandle_t pDcgmHandle,
+                                           unsigned int gpuId,
+                                           dcgmDeviceTopology_t *pDcgmDeviceTopology);
 
 /**
  * Gets group topology corresponding to the \a groupId.
  *
  * @param pDcgmHandle            IN: DCGM Handle
  * @param groupId                IN: GroupId corresponding to which topology information should be fetched
- * @param pDcgmGroupTopology IN/OUT: Topology information corresponding to \a groupId. pDcgmgroupTopology->version
- *                                   must be set to dcgmGroupTopology_version.
+ * @param pDcgmGroupTopology IN/OUT: Topology information corresponding to \a groupId. pDcgmgroupTopology->version must
+ *                                   be set to dcgmGroupTopology_version.
  * @return
  *        - \ref DCGM_ST_OK             if the call was successful.
  *        - \ref DCGM_ST_BADPARAM       if \a groupId or \a pDcgmGroupTopology were not valid.
  *        - \ref DCGM_ST_VER_MISMATCH   if pDcgmgroupTopology->version was not set to dcgmGroupTopology_version.
+ *
  */
-dcgmReturn_t DECLDIR dcgmGetGroupTopology(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t groupId, dcgmGroupTopology_t *pDcgmGroupTopology);
+dcgmReturn_t DECLDIR dcgmGetGroupTopology(dcgmHandle_t pDcgmHandle,
+                                          dcgmGpuGrp_t groupId,
+                                          dcgmGroupTopology_t *pDcgmGroupTopology);
 
 /** @} */ // Closing for DCGMAPI_Topo
 
@@ -1529,14 +1669,16 @@ dcgmReturn_t DECLDIR dcgmGetGroupTopology(dcgmHandle_t pDcgmHandle, dcgmGpuGrp_t
 /***************************************************************************************************/
 
 /**
- * Toggle the state of introspection metadata gathering in DCGM.  Metadata gathering will increase the memory usage of DCGM
- * so that it can store the metadata it gathers.
+ * Toggle the state of introspection metadata gathering in DCGM.  Metadata gathering will increase the memory usage
+ * of DCGM so that it can store the metadata it gathers.
  *
  * @param pDcgmHandle                   IN: DCGM Handle
  * @param enabledState                  IN: The state to set gathering of introspection data to
+ *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             enabledState is an invalid state for metadata gathering
+ *
  */
 dcgmReturn_t DECLDIR dcgmIntrospectToggleState(dcgmHandle_t pDcgmHandle, dcgmIntrospectState_t enabledState);
 
@@ -1545,19 +1687,19 @@ dcgmReturn_t DECLDIR dcgmIntrospectToggleState(dcgmHandle_t pDcgmHandle, dcgmInt
  * Get the current amount of memory used to store the given field collection.
  *
  * @param pDcgmHandle                   IN: DCGM Handle
- * @param context                       IN: see \ref dcgmIntrospectContext_t.  This identifies the level
- *                                          of fields to do introspection for (ex: all fields, field groups)
- *                                          context->version must be set to dcgmIntrospectContext_version prior
- *                                          to this call.
- * @param memoryInfo                IN/OUT: see \ref dcgmIntrospectFullMemory_t. memoryInfo->version must be set
- *                                          to dcgmIntrospectFullMemory_version prior to this call.
- * @param waitIfNoData                  IN: if no metadata has been gathered, should this call block until data
- *                                          has been gathered (1), or should this call just return DCGM_ST_NO_DATA (0).
+ * @param context                       IN: see \ref dcgmIntrospectContext_t.  This identifies the level of fields to do
+ *                                          introspection for (ex: all fields, field groups) context->version must be
+ *                                          set to dcgmIntrospectContext_version prior to this call.
+ * @param memoryInfo                IN/OUT: see \ref dcgmIntrospectFullMemory_t. memoryInfo->version must be set to
+ *                                          dcgmIntrospectFullMemory_version prior to this call.
+ * @param waitIfNoData                  IN: if no metadata has been gathered, should this call block until data has been
+ *                                          gathered (1), or should this call just return DCGM_ST_NO_DATA (0).
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_NOT_CONFIGURED       if metadata gathering state is \a DCGM_INTROSPECT_STATE_DISABLED
  *        - \ref DCGM_ST_NO_DATA              if \a waitIfNoData is false and metadata has not been gathered yet
  *        - \ref DCGM_ST_VER_MISMATCH         if context->version or memoryInfo->version is 0 or invalid.
+ *
  */
 dcgmReturn_t DECLDIR dcgmIntrospectGetFieldsMemoryUsage(dcgmHandle_t pDcgmHandle,
                                                         dcgmIntrospectContext_t *context,
@@ -1571,17 +1713,17 @@ dcgmReturn_t DECLDIR dcgmIntrospectGetFieldsMemoryUsage(dcgmHandle_t pDcgmHandle
  * This measurement represents both the resident set size (what is currently in RAM) and
  * the swapped memory that belongs to the process.
  *
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param memoryInfo                IN/OUT: see \ref dcgmIntrospectMemory_t. memoryInfo->version must be set to
- *                                          dcgmIntrospectMemory_version prior to this call.
- * @param waitIfNoData                  IN: if no metadata is gathered wait till this occurs (!0)
- *                                          or return DCGM_ST_NO_DATA (0)
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param memoryInfo     IN/OUT: see \ref dcgmIntrospectMemory_t. memoryInfo->version must be set to
+ *                               dcgmIntrospectMemory_version prior to this call.
+ * @param waitIfNoData       IN: if no metadata is gathered wait till this occurs (!0) or return DCGM_ST_NO_DATA (0)
  *
  * @return
  *       - \ref DCGM_ST_OK                   if the call was successful
  *       - \ref DCGM_ST_NOT_CONFIGURED       if metadata gathering state is \a DCGM_INTROSPECT_STATE_DISABLED
  *       - \ref DCGM_ST_NO_DATA              if \a waitIfNoData is false and metadata has not been gathered yet
  *       - \ref DCGM_ST_VER_MISMATCH         if memoryInfo->version is 0 or invalid.
+ *
  */
 dcgmReturn_t DECLDIR dcgmIntrospectGetHostengineMemoryUsage(dcgmHandle_t pDcgmHandle,
                                                             dcgmIntrospectMemory_t *memoryInfo,
@@ -1592,20 +1734,20 @@ dcgmReturn_t DECLDIR dcgmIntrospectGetHostengineMemoryUsage(dcgmHandle_t pDcgmHa
  * Get introspection info relating to execution time needed to update the fields
  * identified by \a context.
  *
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param context                       IN: see \ref dcgmIntrospectContext_t.  This identifies the level
- *                                          of fields to do introspection for (ex: all fields, field group )
- *                                          context->version must be set to dcgmIntrospectContext_version prior
- *                                          to this call.
- * @param execTime                  IN/OUT: see \ref dcgmIntrospectFullFieldsExecTime_t. execTime->version must be
- *                                          set to dcgmIntrospectFullFieldsExecTime_version prior to this call.
- * @param waitIfNoData                  IN: if no metadata is gathered, wait until data has been gathered (1)
- *                                          or return DCGM_ST_NO_DATA (0)
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param context            IN: see \ref dcgmIntrospectContext_t.  This identifies the level of fields to do
+ *                               introspection for (ex: all fields, field group ) context->version must be set to
+ *                               dcgmIntrospectContext_version prior to this call.
+ * @param execTime       IN/OUT: see \ref dcgmIntrospectFullFieldsExecTime_t. execTime->version must be set to
+ *                               dcgmIntrospectFullFieldsExecTime_version prior to this call.
+ * @param waitIfNoData       IN: if no metadata is gathered, wait until data has been gathered (1) or return
+ *                               DCGM_ST_NO_DATA (0)
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_NOT_CONFIGURED       if metadata gathering state is \a DCGM_INTROSPECT_STATE_DISABLED
  *        - \ref DCGM_ST_NO_DATA              if \a waitIfNoData is false and metadata has not been gathered yet
  *        - \ref DCGM_ST_VER_MISMATCH         if context->version or execTime->version is 0 or invalid.
+ *
  */
 dcgmReturn_t DECLDIR dcgmIntrospectGetFieldsExecTime(dcgmHandle_t pDcgmHandle,
                                                      dcgmIntrospectContext_t *context,
@@ -1613,20 +1755,20 @@ dcgmReturn_t DECLDIR dcgmIntrospectGetFieldsExecTime(dcgmHandle_t pDcgmHandle,
                                                      int waitIfNoData);
 
 /*************************************************************************/
-/*
+/**
  * Retrieve the CPU utilization of the DCGM hostengine process.
  *
- * @param pDcgmHandle                   IN: DCGM Handle
- * @param cpuUtil                   IN/OUT: see \ref dcgmIntrospectCpuUtil_t. cpuUtil->version must be set to
- *                                          dcgmIntrospectCpuUtil_version prior to this call.
- * @param waitIfNoData                  IN: if no metadata is gathered wait till this occurs (!0)
- *                                          or return DCGM_ST_NO_DATA (0)
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param cpuUtil        IN/OUT: see \ref dcgmIntrospectCpuUtil_t. cpuUtil->version must be set to
+ *                               dcgmIntrospectCpuUtil_version prior to this call.
+ * @param waitIfNoData       IN: if no metadata is gathered wait till this occurs (!0) or return DCGM_ST_NO_DATA (0)
  *
  * @return
  *       - \ref DCGM_ST_OK                   if the call was successful
  *       - \ref DCGM_ST_NOT_CONFIGURED       if metadata gathering state is \a DCGM_INTROSPECT_STATE_DISABLED
  *       - \ref DCGM_ST_NO_DATA              if \a waitIfNoData is false and metadata has not been gathered yet
  *       - \ref DCGM_ST_VER_MISMATCH         if cpuUtil->version or execTime->version is 0 or invalid.
+ *
  */
 dcgmReturn_t DECLDIR dcgmIntrospectGetHostengineCpuUtilization(dcgmHandle_t pDcgmHandle,
                                                                dcgmIntrospectCpuUtil_t *cpuUtil,
@@ -1638,13 +1780,13 @@ dcgmReturn_t DECLDIR dcgmIntrospectGetHostengineCpuUtilization(dcgmHandle_t pDcg
  * all DCGM introspection data. This is normally performed automatically on an
  * interval of 1 second.
  *
- * @param pDcgmHandle           IN: DCGM Handle
- * @param waitForUpdate         IN: Whether or not to wait for the update loop to
- *                                  complete before returning to the caller
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param waitForUpdate      IN: Whether or not to wait for the update loop to complete before returning to the caller
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if \a waitForUpdate is invalid
+ *
  */
 dcgmReturn_t DECLDIR dcgmIntrospectUpdateAll(dcgmHandle_t pDcgmHandle, int waitForUpdate);
 
@@ -1659,35 +1801,30 @@ dcgmReturn_t DECLDIR dcgmIntrospectUpdateAll(dcgmHandle_t pDcgmHandle, int waitF
 
 /*************************************************************************/
 /**
- * Get the best group of gpus from the specified bitmask according to topological proximity: cpuAffinity, NUMA 
+ * Get the best group of gpus from the specified bitmask according to topological proximity: cpuAffinity, NUMA
  * node, and NVLink.
  *
- * @param pDcgmHandle         IN: DCGM Handle
- * @param inputGpuIds         IN: a bitmask of which GPUs DCGM should consider. If some of the GPUs on the system 
- *                                are already in use, they shouldn't be included in the bitmask. 0 means that all
- *                                of the GPUs in the system should be considered.
- * @param numGpus             IN: the number of GPUs that are desired from inputGpuIds. If this number is greater 
- *                                than the number of healthy GPUs in inputGpuIds, then less than numGpus gpus will
- *                                be specified in outputGpuIds.
- * @param outputGpuIds       OUT: a bitmask of numGpus or fewer GPUs from inputGpuIds that represent the best
- *                                placement available from inputGpuIds.
- * @param hintFlags           IN: a bitmask of DCGM_TOPO_HINT_F_ #defines of hints that should be taken into
- *                                account when assigning outputGpuIds.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param inputGpuIds        IN: a bitmask of which GPUs DCGM should consider. If some of the GPUs on the system are
+ *                               already in use, they shouldn't be included in the bitmask. 0 means that all of the GPUs
+ *                               in the system should be considered.
+ * @param numGpus            IN: the number of GPUs that are desired from inputGpuIds. If this number is greater than
+ *                               the number of healthy GPUs in inputGpuIds, then less than numGpus gpus will be
+ *                               specified in outputGpuIds.
+ * @param outputGpuIds      OUT: a bitmask of numGpus or fewer GPUs from inputGpuIds that represent the best placement
+ *                               available from inputGpuIds.
+ * @param hintFlags          IN: a bitmask of DCGM_TOPO_HINT_F_ #defines of hints that should be taken into account when
+ *                               assigning outputGpuIds.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
- */
-dcgmReturn_t DECLDIR dcgmSelectGpusByTopology(dcgmHandle_t pDcgmHandle, uint64_t inputGpuIds, uint32_t numGpus,
-                                              uint64_t *outputGpuIds, uint64_t hintFlags);
-
-/*************************************************************************/
-/**
- * Get a summary of the values for a field id over a period of time.
  *
- * @param pDcgmHandle         IN: DCGM Handle
- * @param request       IN / OUT: a pointer to the struct detailing the request and containing the response
  */
-dcgmReturn_t DECLDIR dcgmGetFieldSummary(dcgmHandle_t pDcgmHandle, dcgmFieldSummaryRequest_t *request);
+dcgmReturn_t DECLDIR dcgmSelectGpusByTopology(dcgmHandle_t pDcgmHandle,
+                                              uint64_t inputGpuIds,
+                                              uint32_t numGpus,
+                                              uint64_t *outputGpuIds,
+                                              uint64_t hintFlags);
 
 /** @} */ // Closing for DCGMAPI_TOPOLOGY
 
@@ -1706,15 +1843,15 @@ dcgmReturn_t DECLDIR dcgmGetFieldSummary(dcgmHandle_t pDcgmHandle, dcgmFieldSumm
  * You can also pass --blacklist-modules to the nv-hostengine binary to make sure modules
  * get blacklisted immediately after the host engine starts up.
  *
- * @param pDcgmHandle  IN: DCGM Handle
- * @param moduleId     IN: ID of the module to blacklist. Use \ref dcgmModuleGetStatuses to get a 
- *                         list of valid module IDs.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param moduleId           IN: ID of the module to blacklist. Use \ref dcgmModuleGetStatuses to get a list of valid
+ *                               module IDs.
  *
  * @return
- *        - \ref DCGM_ST_OK if the module has been blacklisted.
- *        - \ref DCGM_ST_IN_USE if the module has already been loaded and cannot be blacklisted.
- *        - \ref DCGM_ST_BADPARAM if a parameter is missing or bad.
- * 
+ *        - \ref DCGM_ST_OK         if the module has been blacklisted.
+ *        - \ref DCGM_ST_IN_USE     if the module has already been loaded and cannot be blacklisted.
+ *        - \ref DCGM_ST_BADPARAM   if a parameter is missing or bad.
+ *
  */
 dcgmReturn_t DECLDIR dcgmModuleBlacklist(dcgmHandle_t pDcgmHandle, dcgmModuleId_t moduleId);
 
@@ -1722,13 +1859,14 @@ dcgmReturn_t DECLDIR dcgmModuleBlacklist(dcgmHandle_t pDcgmHandle, dcgmModuleId_
 /**
  * Get the status of all of the DCGM modules.
  *
- * @param pDcgmHandle     IN: DCGM Handle
- * @param moduleStatuses OUT: Module statuses. .version should be set to dcgmModuleStatuses_version upon calling.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param moduleStatuses    OUT: Module statuses.<br>
+ *                               .version should be set to dcgmModuleStatuses_version upon calling.
  *
  * @return
- *        - \ref DCGM_ST_OK if the request succeeds.
- *        - \ref DCGM_ST_BADPARAM if a parameter is missing or bad.
- * 
+ *        - \ref DCGM_ST_OK         if the request succeeds.
+ *        - \ref DCGM_ST_BADPARAM   if a parameter is missing or bad.
+ *
  */
 dcgmReturn_t DECLDIR dcgmModuleGetStatuses(dcgmHandle_t pDcgmHandle, dcgmModuleGetStatuses_t *moduleStatuses);
 
@@ -1744,78 +1882,134 @@ dcgmReturn_t DECLDIR dcgmModuleGetStatuses(dcgmHandle_t pDcgmHandle, dcgmModuleG
 /*************************************************************************/
 /**
  * Get all of the profiling metric groups for a given GPU group.
- * 
+ *
  * Profiling metrics are watched in groups of fields that are all watched together. For instance, if you want
  * to watch DCGM_FI_PROF_GR_ENGINE_ACTIVITY, this might also be in the same group as DCGM_FI_PROF_SM_EFFICIENCY.
  * Watching this group would result in DCGM storing values for both of these metrics.
- * 
+ *
  * Some groups cannot be watched concurrently as others as they utilize the same hardware resource. For instance,
  * you may not be able to watch DCGM_FI_PROF_TENSOR_OP_UTIL at the same time as DCGM_FI_PROF_GR_ENGINE_ACTIVITY
  * on your hardware. At the same time, you may be able to watch DCGM_FI_PROF_TENSOR_OP_UTIL at the same time as
  * DCGM_FI_PROF_NVLINK_TX_DATA.
- * 
+ *
  * Metrics that can be watched concurrently will have different .majorId fields in their dcgmProfMetricGroupInfo_t
- * 
+ *
  * See \ref dcgmGroupCreate for details on creating a GPU group
  * See \ref dcgmProfWatchFields to actually watch a metric group
  *
- * @param pDcgmHandle      IN: DCGM Handle
- * @param metricGroups IN/OUT: Metric groups supported for metricGroups->groupId. 
- *                             metricGroups->version should be set to dcgmProfGetMetricGroups_version upon calling.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param metricGroups   IN/OUT: Metric groups supported for metricGroups->groupId.<br>
+ *                               metricGroups->version should be set to dcgmProfGetMetricGroups_version upon calling.
  *
  * @return
- *        - \ref DCGM_ST_OK if the request succeeds.
- *        - \ref DCGM_ST_BADPARAM if a parameter is missing or bad.
- *        - \ref DCGM_ST_GROUP_INCOMPATIBLE if metricGroups->groupId's GPUs are not identical GPUs. 
- *        - \ref DCGM_ST_NOT_SUPPORTED if profiling metrics are not supported for the given GPU group.
- * 
+ *        - \ref DCGM_ST_OK                     if the request succeeds.
+ *        - \ref DCGM_ST_BADPARAM               if a parameter is missing or bad.
+ *        - \ref DCGM_ST_GROUP_INCOMPATIBLE     if metricGroups->groupId's GPUs are not identical GPUs.
+ *        - \ref DCGM_ST_NOT_SUPPORTED          if profiling metrics are not supported for the given GPU group.
+ *
  */
-dcgmReturn_t DECLDIR dcgmProfGetSupportedMetricGroups(dcgmHandle_t pDcgmHandle, dcgmProfGetMetricGroups_t *metricGroups);
+dcgmReturn_t DECLDIR dcgmProfGetSupportedMetricGroups(dcgmHandle_t pDcgmHandle,
+                                                      dcgmProfGetMetricGroups_t *metricGroups);
 
 /**
  * Request that DCGM start recording updates for a given list of profiling field IDs.
- * 
+ *
  * Once metrics have been watched by this API, any of the normal DCGM field-value retrieval APIs can be used on
  * the underlying fieldIds of this metric group. See \ref dcgmGetLatestValues_v2, \ref dcgmGetLatestValuesForFields,
  * \ref dcgmEntityGetLatestValues, and \ref dcgmEntitiesGetLatestValues.
  *
- * @param pDcgmHandle         IN: DCGM Handle
- * @param watchFields         IN: Details of which metric groups to watch for which GPUs. See \ref dcgmProfWatchFields_v1
- *                                for details of what should be put in each struct member. 
- *                                watchFields->version should be set to dcgmProfWatchFields_version upon calling.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param watchFields        IN: Details of which metric groups to watch for which GPUs. See \ref dcgmProfWatchFields_v1
+ *                               for details of what should be put in each struct member. watchFields->version should be
+ *                               set to dcgmProfWatchFields_version upon calling.
  *
  * @return
- *        - \ref DCGM_ST_OK                   if the call was successful
- *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
- *        - \ref DCGM_ST_NOT_SUPPORTED        if profiling metric group metricGroupTag is not supported 
- *                                            for the given GPU group.
- *        - \ref DCGM_ST_GROUP_INCOMPATIBLE   if groupId's GPUs are not identical GPUs. Profiling metrics are only
- *                                            support for homogenous groups of GPUs.
- *        - \ref DCGM_ST_PROFILING_MULTI_PASS if any of the metric groups could not be watched concurrently due to
- *                                            requiring the hardware to gather them with multiple passes
+ *        - \ref DCGM_ST_OK                     if the call was successful
+ *        - \ref DCGM_ST_BADPARAM               if a parameter is invalid
+ *        - \ref DCGM_ST_NOT_SUPPORTED          if profiling metric group metricGroupTag is not supported for the given
+ *                                              GPU group.
+ *        - \ref DCGM_ST_GROUP_INCOMPATIBLE     if groupId's GPUs are not identical GPUs. Profiling metrics are only
+ *                                              support for homogenous groups of GPUs.
+ *        - \ref DCGM_ST_PROFILING_MULTI_PASS   if any of the metric groups could not be watched concurrently due to
+ *                                              requiring the hardware to gather them with multiple passes
+ *
  */
-
 dcgmReturn_t dcgmProfWatchFields(dcgmHandle_t pDcgmHandle, dcgmProfWatchFields_t *watchFields);
 
 /**
  * Request that DCGM stop recording updates for all profiling field IDs for all GPUs
  *
- * @param pDcgmHandle    IN: DCGM Handle
- * @param unwatchFields  IN: Details of which metric groups to unwatch for which GPUs. See \ref dcgmProfUnwatchFields_v1
- *                           for details of what should be put in each struct member.
- *                           unwatchFields->version should be set to dcgmProfUnwatchFields_version upon calling.
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param unwatchFields      IN: Details of which metric groups to unwatch for which GPUs. See \ref
+ *                               dcgmProfUnwatchFields_v1 for details of what should be put in each struct member.
+ *                               unwatchFields->version should be set to dcgmProfUnwatchFields_version upon calling.
  *
  * @return
  *        - \ref DCGM_ST_OK                   if the call was successful
  *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid
+ *
  */
- dcgmReturn_t dcgmProfUnwatchFields(dcgmHandle_t pDcgmHandle, dcgmProfUnwatchFields_t *unwatchFields);
+dcgmReturn_t dcgmProfUnwatchFields(dcgmHandle_t pDcgmHandle, dcgmProfUnwatchFields_t *unwatchFields);
 
+/**
+ * Pause profiling activities in DCGM. This should be used when you are monitoring profiling fields
+ * from DCGM but want to be able to still run developer tools like nvprof, nsight systems, and nsight compute.
+ * Profiling fields start with DCGM_PROF_ and are in the field ID range 1001-1012.
+ *
+ * Call this API before you launch one of those tools and dcgmProfResume() after the tool has completed.
+ *
+ * DCGM will save BLANK values while profiling is paused.
+ *
+ * Calling this while profiling activities are already paused is fine and will be treated as a no-op.
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ *
+ * @return
+ *        - \ref DCGM_ST_OK                   If the call was successful.
+ *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid.
+ *
+ */
+dcgmReturn_t dcgmProfPause(dcgmHandle_t pDcgmHandle);
+
+/**
+ * Resume profiling activities in DCGM that were previously paused with dcgmProfPause().
+ *
+ * Call this API after you have completed running other NVIDIA developer tools to reenable DCGM
+ * profiling metrics.
+ *
+ * DCGM will save BLANK values while profiling is paused.
+ *
+ * Calling this while profiling activities have already been resumed is fine and will be treated as a no-op.
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ *
+ * @return
+ *        - \ref DCGM_ST_OK                   If the call was successful.
+ *        - \ref DCGM_ST_BADPARAM             if a parameter is invalid.
+ *
+ */
+dcgmReturn_t dcgmProfResume(dcgmHandle_t pDcgmHandle);
 
 /** @} */ // Closing for DCGMAPI_PROFILING
 
-#ifdef	__cplusplus
+/**
+ * Adds fake GPU instances and or compute instances for testing purposes. The entity IDs specified for
+ * the GPU instances and compute instances are only guaranteed to be used by DCGM if MIG mode is not active.
+ *
+ * NOTE: this API will not work on a real system reading actual values from NVML, and it may even cause
+ * the real instances to malfunction. This API is for testing purposes only.
+ *
+ * @param pDcgmHandle        IN: DCGM Handle
+ * @param hierarchy
+ *
+ * @return
+ *        - \ref DCGM_ST_OK
+ *
+ */
+dcgmReturn_t dcgmAddFakeInstances(dcgmHandle_t pDcgmHandle, dcgmMigHierarchy_v1 *hierarchy);
+
+#ifdef __cplusplus
 }
 #endif
 
-#endif	/* DCGM_AGENT_H */
+#endif /* DCGM_AGENT_H */
diff --git a/bindings/go/dcgm/dcgm_errors.h b/bindings/go/dcgm/dcgm_errors.h
index fbdfd5d..619bdb7 100644
--- a/bindings/go/dcgm/dcgm_errors.h
+++ b/bindings/go/dcgm/dcgm_errors.h
@@ -7,100 +7,104 @@
  */
 typedef enum dcgmError_enum
 {
-    DCGM_FR_OK                                  = 0,  //!< No error
-    DCGM_FR_UNKNOWN                             = 1,  //!< Unknown error code
-    DCGM_FR_UNRECOGNIZED                        = 2,  //!< Unrecognized error code
-    DCGM_FR_PCI_REPLAY_RATE                     = 3,  //!< Unacceptable rate of PCI errors
-    DCGM_FR_VOLATILE_DBE_DETECTED               = 4,  //!< Uncorrectable volatile double bit error
-    DCGM_FR_VOLATILE_SBE_DETECTED               = 5,  //!< Unacceptable rate of volatile single bit errors
-    DCGM_FR_PENDING_PAGE_RETIREMENTS            = 6,  //!< Pending page retirements detected
-    DCGM_FR_RETIRED_PAGES_LIMIT                 = 7,  //!< Unacceptable total page retirements detected
-    DCGM_FR_RETIRED_PAGES_DBE_LIMIT             = 8,  //!< Unacceptable total page retirements due to uncorrectable errors
-    DCGM_FR_CORRUPT_INFOROM                     = 9,  //!< Corrupt inforom found
-    DCGM_FR_CLOCK_THROTTLE_THERMAL              = 10, //!< Clocks being throttled due to overheating
-    DCGM_FR_POWER_UNREADABLE                    = 11, //!< Cannot get a reading for power from NVML
-    DCGM_FR_CLOCK_THROTTLE_POWER                = 12, //!< Clock being throttled due to power restrictions
-    DCGM_FR_NVLINK_ERROR_THRESHOLD              = 13, //!< Unacceptable rate of NVLink errors
-    DCGM_FR_NVLINK_DOWN                         = 14, //!< NVLink is down
-    DCGM_FR_NVSWITCH_FATAL_ERROR                = 15, //!< Fatal errors on the NVSwitch
-    DCGM_FR_NVSWITCH_NON_FATAL_ERROR            = 16, //!< Non-fatal errors on the NVSwitch
-    DCGM_FR_NVSWITCH_DOWN                       = 17, //!< NVSwitch is down
-    DCGM_FR_NO_ACCESS_TO_FILE                   = 18, //!< Cannot access a file
-    DCGM_FR_NVML_API                            = 19, //!< Error occurred on an NVML API
-    DCGM_FR_DEVICE_COUNT_MISMATCH               = 20, //!< Disagreement in GPU count between /dev and NVML
-    DCGM_FR_BAD_PARAMETER                       = 21, //!< Bad parameter passed to API
-    DCGM_FR_CANNOT_OPEN_LIB                     = 22, //!< Cannot open a library that must be accessed
-    DCGM_FR_BLACKLISTED_DRIVER                  = 23, //!< A blacklisted driver (nouveau) is active
-    DCGM_FR_NVML_LIB_BAD                        = 24, //!< The NVML library is missing expected functions
-    DCGM_FR_GRAPHICS_PROCESSES                  = 25, //!< Graphics processes are active on this GPU
-    DCGM_FR_HOSTENGINE_CONN                     = 26, //!< Unstable connection to nv-hostengine (daemonized DCGM)
-    DCGM_FR_FIELD_QUERY                         = 27, //!< Error querying a field from DCGM
-    DCGM_FR_BAD_CUDA_ENV                        = 28, //!< The environment has variables that hurt CUDA
-    DCGM_FR_PERSISTENCE_MODE                    = 29, //!< Persistence mode is disabled
-    DCGM_FR_LOW_BANDWIDTH                       = 30, //!< The bandwidth is unacceptably low
-    DCGM_FR_HIGH_LATENCY                        = 31, //!< Latency is too high
-    DCGM_FR_CANNOT_GET_FIELD_TAG                = 32, //!< Cannot find a tag for a field
-    DCGM_FR_FIELD_VIOLATION                     = 33, //!< The value for the specified error field is above 0
-    DCGM_FR_FIELD_THRESHOLD                     = 34, //!< The value for the specified field is above the threshold
-    DCGM_FR_FIELD_VIOLATION_DBL                 = 35, //!< The value for the specified error field is above 0
-    DCGM_FR_FIELD_THRESHOLD_DBL                 = 36, //!< The value for the specified field is above the threshold
-    DCGM_FR_UNSUPPORTED_FIELD_TYPE              = 37, //!< Field type cannot be supported
-    DCGM_FR_FIELD_THRESHOLD_TS                  = 38, //!< The value for the specified field is above the threshold
-    DCGM_FR_FIELD_THRESHOLD_TS_DBL              = 39, //!< The value for the specified field is above the threshold
-    DCGM_FR_THERMAL_VIOLATIONS                  = 40, //!< Thermal violations detected
-    DCGM_FR_THERMAL_VIOLATIONS_TS               = 41, //!< Thermal violations detected with a timestamp
-    DCGM_FR_TEMP_VIOLATION                      = 42, //!< Temperature is too high
-    DCGM_FR_THROTTLING_VIOLATION                = 43, //!< Non-benign clock throttling is occurring
-    DCGM_FR_INTERNAL                            = 44, //!< An internal error was detected
-    DCGM_FR_PCIE_GENERATION                     = 45, //!< PCIe generation is too low
-    DCGM_FR_PCIE_WIDTH                          = 46, //!< PCIe width is too low
-    DCGM_FR_ABORTED                             = 47, //!< Test was aborted by a user signal
-    DCGM_FR_TEST_DISABLED                       = 48, //!< This test is disabled for this GPU
-    DCGM_FR_CANNOT_GET_STAT                     = 49, //!< Cannot get telemetry for a needed value
-    DCGM_FR_STRESS_LEVEL                        = 50, //!< Stress level is too low (bad performance)
-    DCGM_FR_CUDA_API                            = 51, //!< Error calling the specified CUDA API
-    DCGM_FR_FAULTY_MEMORY                       = 52, //!< Faulty memory detected on this GPU
-    DCGM_FR_CANNOT_SET_WATCHES                  = 53, //!< Unable to set field watches in DCGM
-    DCGM_FR_CUDA_UNBOUND                        = 54, //!< CUDA context is no longer bound
-    DCGM_FR_ECC_DISABLED                        = 55, //!< ECC memory is disabled right now
-    DCGM_FR_MEMORY_ALLOC                        = 56, //!< Cannot allocate memory on the GPU
-    DCGM_FR_CUDA_DBE                            = 57, //!< CUDA detected unrecovable double-bit error
-    DCGM_FR_MEMORY_MISMATCH                     = 58, //!< Memory error detected
-    DCGM_FR_CUDA_DEVICE                         = 59, //!< No CUDA device discoverable for existing GPU
-    DCGM_FR_ECC_UNSUPPORTED                     = 60, //!< ECC memory is unsupported by this SKU
-    DCGM_FR_ECC_PENDING                         = 61, //!< ECC memory is in a pending state
-    DCGM_FR_MEMORY_BANDWIDTH                    = 62, //!< Memory bandwidth is too low
-    DCGM_FR_TARGET_POWER                        = 63, //!< Cannot hit the target power draw
-    DCGM_FR_API_FAIL                            = 64, //!< The specified API call failed
-    DCGM_FR_API_FAIL_GPU                        = 65, //!< The specified API call failed for the specified GPU
-    DCGM_FR_CUDA_CONTEXT                        = 66, //!< Cannot create a CUDA context on this GPU
-    DCGM_FR_DCGM_API                            = 67, //!< DCGM API failure
-    DCGM_FR_CONCURRENT_GPUS                     = 68, //!< Need multiple GPUs to run this test
-    DCGM_FR_TOO_MANY_ERRORS                     = 69, //!< More errors than fit in the return struct
-    DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD          = 70, //!< More than 100 CRC errors are happening per second
-    DCGM_FR_NVLINK_ERROR_CRITICAL               = 71, //!< NVLink error for a field that should always be 0
-    DCGM_FR_ENFORCED_POWER_LIMIT                = 72, //!< The enforced power limit is too low to hit the target
-    DCGM_FR_MEMORY_ALLOC_HOST                   = 73, //!< Cannot allocate memory on the host
-    DCGM_FR_GPU_OP_MODE                         = 74, //!< Bad GPU operating mode for running plugin
-    DCGM_FR_NO_MEMORY_CLOCKS                    = 75, //!< No memory clocks with the needed MHz were found
-    DCGM_FR_NO_GRAPHICS_CLOCKS                  = 76, //!< No graphics clocks with the needed MHz were found
-    DCGM_FR_HAD_TO_RESTORE_STATE                = 77, //!< Note that we had to restore a GPU's state
-    DCGM_FR_ERROR_SENTINEL                      = 78, //!< MUST BE THE LAST ERROR CODE
+    DCGM_FR_OK                         = 0,  //!< No error
+    DCGM_FR_UNKNOWN                    = 1,  //!< Unknown error code
+    DCGM_FR_UNRECOGNIZED               = 2,  //!< Unrecognized error code
+    DCGM_FR_PCI_REPLAY_RATE            = 3,  //!< Unacceptable rate of PCI errors
+    DCGM_FR_VOLATILE_DBE_DETECTED      = 4,  //!< Uncorrectable volatile double bit error
+    DCGM_FR_VOLATILE_SBE_DETECTED      = 5,  //!< Unacceptable rate of volatile single bit errors
+    DCGM_FR_PENDING_PAGE_RETIREMENTS   = 6,  //!< Pending page retirements detected
+    DCGM_FR_RETIRED_PAGES_LIMIT        = 7,  //!< Unacceptable total page retirements detected
+    DCGM_FR_RETIRED_PAGES_DBE_LIMIT    = 8,  //!< Unacceptable total page retirements due to uncorrectable errors
+    DCGM_FR_CORRUPT_INFOROM            = 9,  //!< Corrupt inforom found
+    DCGM_FR_CLOCK_THROTTLE_THERMAL     = 10, //!< Clocks being throttled due to overheating
+    DCGM_FR_POWER_UNREADABLE           = 11, //!< Cannot get a reading for power from NVML
+    DCGM_FR_CLOCK_THROTTLE_POWER       = 12, //!< Clock being throttled due to power restrictions
+    DCGM_FR_NVLINK_ERROR_THRESHOLD     = 13, //!< Unacceptable rate of NVLink errors
+    DCGM_FR_NVLINK_DOWN                = 14, //!< NVLink is down
+    DCGM_FR_NVSWITCH_FATAL_ERROR       = 15, //!< Fatal errors on the NVSwitch
+    DCGM_FR_NVSWITCH_NON_FATAL_ERROR   = 16, //!< Non-fatal errors on the NVSwitch
+    DCGM_FR_NVSWITCH_DOWN              = 17, //!< NVSwitch is down
+    DCGM_FR_NO_ACCESS_TO_FILE          = 18, //!< Cannot access a file
+    DCGM_FR_NVML_API                   = 19, //!< Error occurred on an NVML API
+    DCGM_FR_DEVICE_COUNT_MISMATCH      = 20, //!< Disagreement in GPU count between /dev and NVML
+    DCGM_FR_BAD_PARAMETER              = 21, //!< Bad parameter passed to API
+    DCGM_FR_CANNOT_OPEN_LIB            = 22, //!< Cannot open a library that must be accessed
+    DCGM_FR_BLACKLISTED_DRIVER         = 23, //!< A blacklisted driver (nouveau) is active
+    DCGM_FR_NVML_LIB_BAD               = 24, //!< The NVML library is missing expected functions
+    DCGM_FR_GRAPHICS_PROCESSES         = 25, //!< Graphics processes are active on this GPU
+    DCGM_FR_HOSTENGINE_CONN            = 26, //!< Unstable connection to nv-hostengine (daemonized DCGM)
+    DCGM_FR_FIELD_QUERY                = 27, //!< Error querying a field from DCGM
+    DCGM_FR_BAD_CUDA_ENV               = 28, //!< The environment has variables that hurt CUDA
+    DCGM_FR_PERSISTENCE_MODE           = 29, //!< Persistence mode is disabled
+    DCGM_FR_LOW_BANDWIDTH              = 30, //!< The bandwidth is unacceptably low
+    DCGM_FR_HIGH_LATENCY               = 31, //!< Latency is too high
+    DCGM_FR_CANNOT_GET_FIELD_TAG       = 32, //!< Cannot find a tag for a field
+    DCGM_FR_FIELD_VIOLATION            = 33, //!< The value for the specified error field is above 0
+    DCGM_FR_FIELD_THRESHOLD            = 34, //!< The value for the specified field is above the threshold
+    DCGM_FR_FIELD_VIOLATION_DBL        = 35, //!< The value for the specified error field is above 0
+    DCGM_FR_FIELD_THRESHOLD_DBL        = 36, //!< The value for the specified field is above the threshold
+    DCGM_FR_UNSUPPORTED_FIELD_TYPE     = 37, //!< Field type cannot be supported
+    DCGM_FR_FIELD_THRESHOLD_TS         = 38, //!< The value for the specified field is above the threshold
+    DCGM_FR_FIELD_THRESHOLD_TS_DBL     = 39, //!< The value for the specified field is above the threshold
+    DCGM_FR_THERMAL_VIOLATIONS         = 40, //!< Thermal violations detected
+    DCGM_FR_THERMAL_VIOLATIONS_TS      = 41, //!< Thermal violations detected with a timestamp
+    DCGM_FR_TEMP_VIOLATION             = 42, //!< Temperature is too high
+    DCGM_FR_THROTTLING_VIOLATION       = 43, //!< Non-benign clock throttling is occurring
+    DCGM_FR_INTERNAL                   = 44, //!< An internal error was detected
+    DCGM_FR_PCIE_GENERATION            = 45, //!< PCIe generation is too low
+    DCGM_FR_PCIE_WIDTH                 = 46, //!< PCIe width is too low
+    DCGM_FR_ABORTED                    = 47, //!< Test was aborted by a user signal
+    DCGM_FR_TEST_DISABLED              = 48, //!< This test is disabled for this GPU
+    DCGM_FR_CANNOT_GET_STAT            = 49, //!< Cannot get telemetry for a needed value
+    DCGM_FR_STRESS_LEVEL               = 50, //!< Stress level is too low (bad performance)
+    DCGM_FR_CUDA_API                   = 51, //!< Error calling the specified CUDA API
+    DCGM_FR_FAULTY_MEMORY              = 52, //!< Faulty memory detected on this GPU
+    DCGM_FR_CANNOT_SET_WATCHES         = 53, //!< Unable to set field watches in DCGM
+    DCGM_FR_CUDA_UNBOUND               = 54, //!< CUDA context is no longer bound
+    DCGM_FR_ECC_DISABLED               = 55, //!< ECC memory is disabled right now
+    DCGM_FR_MEMORY_ALLOC               = 56, //!< Cannot allocate memory on the GPU
+    DCGM_FR_CUDA_DBE                   = 57, //!< CUDA detected unrecovable double-bit error
+    DCGM_FR_MEMORY_MISMATCH            = 58, //!< Memory error detected
+    DCGM_FR_CUDA_DEVICE                = 59, //!< No CUDA device discoverable for existing GPU
+    DCGM_FR_ECC_UNSUPPORTED            = 60, //!< ECC memory is unsupported by this SKU
+    DCGM_FR_ECC_PENDING                = 61, //!< ECC memory is in a pending state
+    DCGM_FR_MEMORY_BANDWIDTH           = 62, //!< Memory bandwidth is too low
+    DCGM_FR_TARGET_POWER               = 63, //!< Cannot hit the target power draw
+    DCGM_FR_API_FAIL                   = 64, //!< The specified API call failed
+    DCGM_FR_API_FAIL_GPU               = 65, //!< The specified API call failed for the specified GPU
+    DCGM_FR_CUDA_CONTEXT               = 66, //!< Cannot create a CUDA context on this GPU
+    DCGM_FR_DCGM_API                   = 67, //!< DCGM API failure
+    DCGM_FR_CONCURRENT_GPUS            = 68, //!< Need multiple GPUs to run this test
+    DCGM_FR_TOO_MANY_ERRORS            = 69, //!< More errors than fit in the return struct
+    DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD = 70, //!< More than 100 CRC errors are happening per second
+    DCGM_FR_NVLINK_ERROR_CRITICAL      = 71, //!< NVLink error for a field that should always be 0
+    DCGM_FR_ENFORCED_POWER_LIMIT       = 72, //!< The enforced power limit is too low to hit the target
+    DCGM_FR_MEMORY_ALLOC_HOST          = 73, //!< Cannot allocate memory on the host
+    DCGM_FR_GPU_OP_MODE                = 74, //!< Bad GPU operating mode for running plugin
+    DCGM_FR_NO_MEMORY_CLOCKS           = 75, //!< No memory clocks with the needed MHz were found
+    DCGM_FR_NO_GRAPHICS_CLOCKS         = 76, //!< No graphics clocks with the needed MHz were found
+    DCGM_FR_HAD_TO_RESTORE_STATE       = 77, //!< Note that we had to restore a GPU's state
+    DCGM_FR_L1TAG_UNSUPPORTED          = 78, //!< L1TAG test is unsupported by this SKU
+    DCGM_FR_L1TAG_MISCOMPARE           = 79, //!< L1TAG test failed on a miscompare
+    DCGM_FR_ROW_REMAP_FAILURE          = 80, //!< Row remapping failed (Ampere or newer GPUs)
+    DCGM_FR_UNCONTAINED_ERROR          = 81, //!< Uncontained error - XID 95
+    DCGM_FR_ERROR_SENTINEL             = 82, //!< MUST BE THE LAST ERROR CODE
 } dcgmError_t;
 
 typedef enum dcgmErrorSeverity_enum
 {
-    DCGM_ERROR_MONITOR     = 0,            // Can perform workload, but needs to be monitored.
-    DCGM_ERROR_ISOLATE     = 1,            // Cannot perform workload. GPU should be isolated.
-    DCGM_ERROR_UNKNOWN     = 2,            // This error code is not recognized
+    DCGM_ERROR_MONITOR = 0, //!< Can perform workload, but needs to be monitored.
+    DCGM_ERROR_ISOLATE = 1, //!< Cannot perform workload. GPU should be isolated.
+    DCGM_ERROR_UNKNOWN = 2, //!< This error code is not recognized
 } dcgmErrorSeverity_t;
 
 typedef struct
 {
-    dcgmError_t  errorId;
-    const char  *msgFormat;
-    const char  *suggestion;
-    int          severity;
+    dcgmError_t errorId;
+    const char *msgFormat;
+    const char *suggestion;
+    int severity;
 } dcgm_error_meta_t;
 
 extern dcgm_error_meta_t dcgmErrorMeta[];
@@ -108,286 +112,336 @@ extern dcgm_error_meta_t dcgmErrorMeta[];
 
 /* Standard message for running a field diagnostic */
 #define TRIAGE_RUN_FIELD_DIAG_MSG "Run a field diagnostic on the GPU."
-#define DEBUG_COOLING_MSG         "Verify that the cooling on this machine is functional, including external, "\
-                                  "thermal material interface, fans, and any other components."
+#define DEBUG_COOLING_MSG                                                         \
+    "Verify that the cooling on this machine is functional, including external, " \
+    "thermal material interface, fans, and any other components."
 
 /*
  * Messages for the error codes. All messages must be defined in the ERROR_CODE_MSG <msg> format
  * where <msg> is the actual message.
  */
-#define DCGM_FR_OK_MSG                          "The operation completed successfully."
-#define DCGM_FR_UNKNOWN_MSG                     "Unknown error."
-#define DCGM_FR_UNRECOGNIZED_MSG                "Unrecognized error code."
+#define DCGM_FR_OK_MSG           "The operation completed successfully."
+#define DCGM_FR_UNKNOWN_MSG      "Unknown error."
+#define DCGM_FR_UNRECOGNIZED_MSG "Unrecognized error code."
 // replay limit, gpu id, replay errors detected
-#define DCGM_FR_PCI_REPLAY_RATE_MSG             "Detected more than %u PCIe replays per minute for GPU %u : %d"
+#define DCGM_FR_PCI_REPLAY_RATE_MSG "Detected more than %u PCIe replays per minute for GPU %u : %d"
 // dbes deteced, gpu id
-#define DCGM_FR_VOLATILE_DBE_DETECTED_MSG       "Detected %d volatile double-bit ECC error(s) in GPU %u."
+#define DCGM_FR_VOLATILE_DBE_DETECTED_MSG "Detected %d volatile double-bit ECC error(s) in GPU %u."
 // sbe limit, gpu id, sbes detected
-#define DCGM_FR_VOLATILE_SBE_DETECTED_MSG       "More than %u single-bit ECC error(s) detected in GPU %u Volatile SBEs: %lld"
+#define DCGM_FR_VOLATILE_SBE_DETECTED_MSG "More than %u single-bit ECC error(s) detected in GPU %u Volatile SBEs: %lld"
 // gpu id
-#define DCGM_FR_PENDING_PAGE_RETIREMENTS_MSG    "A pending retired page has been detected in GPU %u."
+#define DCGM_FR_PENDING_PAGE_RETIREMENTS_MSG "A pending retired page has been detected in GPU %u."
 // retired pages detected, gpud id
-#define DCGM_FR_RETIRED_PAGES_LIMIT_MSG         "%u or more retired pages have been detected in GPU %u. "
+#define DCGM_FR_RETIRED_PAGES_LIMIT_MSG "%u or more retired pages have been detected in GPU %u. "
 // retired pages due to dbes detected, gpu id
-#define DCGM_FR_RETIRED_PAGES_DBE_LIMIT_MSG     "An excess of %u retired pages due to DBEs have been detected and" \
-                                                    " more than one page has been retired due to DBEs in the past" \
-                                                    " week in GPU %u."
+#define DCGM_FR_RETIRED_PAGES_DBE_LIMIT_MSG                            \
+    "An excess of %u retired pages due to DBEs have been detected and" \
+    " more than one page has been retired due to DBEs in the past"     \
+    " week in GPU %u."
 // gpu id
-#define DCGM_FR_CORRUPT_INFOROM_MSG             "A corrupt InfoROM has been detected in GPU %u."
+#define DCGM_FR_CORRUPT_INFOROM_MSG "A corrupt InfoROM has been detected in GPU %u."
 // gpu id
-#define DCGM_FR_CLOCK_THROTTLE_THERMAL_MSG      "Detected clock throttling due to thermal violation in GPU %u."
+#define DCGM_FR_CLOCK_THROTTLE_THERMAL_MSG "Detected clock throttling due to thermal violation in GPU %u."
 // gpu id
-#define DCGM_FR_POWER_UNREADABLE_MSG            "Cannot reliably read the power usage for GPU %u."
+#define DCGM_FR_POWER_UNREADABLE_MSG "Cannot reliably read the power usage for GPU %u."
 // gpu id
-#define DCGM_FR_CLOCK_THROTTLE_POWER_MSG        "Detected clock throttling due to power violation in GPU %u."
+#define DCGM_FR_CLOCK_THROTTLE_POWER_MSG "Detected clock throttling due to power violation in GPU %u."
 // nvlink errors detected, nvlink id, error threshold
-#define DCGM_FR_NVLINK_ERROR_THRESHOLD_MSG      "Detected %ld %s NvLink errors on GPU %u's NVLink which exceeds "\
-                                                "threshold of %u"
+#define DCGM_FR_NVLINK_ERROR_THRESHOLD_MSG                            \
+    "Detected %ld %s NvLink errors on GPU %u's NVLink which exceeds " \
+    "threshold of %u"
 // gpu id, nvlink id
-#define DCGM_FR_NVLINK_DOWN_MSG                 "GPU %u's NvLink link %d is currently down"
+#define DCGM_FR_NVLINK_DOWN_MSG "GPU %u's NvLink link %d is currently down"
 // nvswitch id, nvlink id
-#define DCGM_FR_NVSWITCH_FATAL_ERROR_MSG        "Detected fatal errors on NvSwitch %u link %u"
+#define DCGM_FR_NVSWITCH_FATAL_ERROR_MSG "Detected fatal errors on NvSwitch %u link %u"
 // nvswitch id, nvlink id
-#define DCGM_FR_NVSWITCH_NON_FATAL_ERROR_MSG    "Detected nonfatal errors on NvSwitch %u link %u"
+#define DCGM_FR_NVSWITCH_NON_FATAL_ERROR_MSG "Detected nonfatal errors on NvSwitch %u link %u"
 // nvswitch id, nvlink port
-#define DCGM_FR_NVSWITCH_DOWN_MSG               "NvSwitch physical ID %u's NvLink port %d is currently down."
+#define DCGM_FR_NVSWITCH_DOWN_MSG "NvSwitch physical ID %u's NvLink port %d is currently down."
 // file path, error detail
-#define DCGM_FR_NO_ACCESS_TO_FILE_MSG           "File %s could not be accessed directly: %s"
+#define DCGM_FR_NO_ACCESS_TO_FILE_MSG "File %s could not be accessed directly: %s"
 // purpose for communicating with NVML, NVML error as string, NVML error
-#define DCGM_FR_NVML_API_MSG                    "Error calling NVML API %s: %s"
-#define DCGM_FR_DEVICE_COUNT_MISMATCH_MSG       "The number of devices NVML returns is different than the number "\
-                                                "of devices in /dev."
+#define DCGM_FR_NVML_API_MSG "Error calling NVML API %s: %s"
+#define DCGM_FR_DEVICE_COUNT_MISMATCH_MSG                              \
+    "The number of devices NVML returns is different than the number " \
+    "of devices in /dev."
 // function name
-#define DCGM_FR_BAD_PARAMETER_MSG               "Bad parameter to function %s cannot be processed"
+#define DCGM_FR_BAD_PARAMETER_MSG "Bad parameter to function %s cannot be processed"
 // library name, error returned from dlopen
-#define DCGM_FR_CANNOT_OPEN_LIB_MSG             "Cannot open library %s: '%s'"
+#define DCGM_FR_CANNOT_OPEN_LIB_MSG "Cannot open library %s: '%s'"
 // the name of the blacklisted driver
-#define DCGM_FR_BLACKLISTED_DRIVER_MSG          "Found blacklisted driver: %s"
+#define DCGM_FR_BLACKLISTED_DRIVER_MSG "Found blacklisted driver: %s"
 // the name of the function that wasn't found
-#define DCGM_FR_NVML_LIB_BAD_MSG                "Cannot get pointer to %s from libnvidia-ml.so"
-#define DCGM_FR_GRAPHICS_PROCESSES_MSG          "NVVS has detected graphics processes running on at least one "\
-                                                "GPU. This may cause some tests to fail."
+#define DCGM_FR_NVML_LIB_BAD_MSG "Cannot get pointer to %s from libnvidia-ml.so"
+#define DCGM_FR_GRAPHICS_PROCESSES_MSG                              \
+    "NVVS has detected graphics processes running on at least one " \
+    "GPU. This may cause some tests to fail."
 // error message from the API call
-#define DCGM_FR_HOSTENGINE_CONN_MSG             "Could not connect to the host engine: '%s'"
+#define DCGM_FR_HOSTENGINE_CONN_MSG "Could not connect to the host engine: '%s'"
 // field name, gpu id
-#define DCGM_FR_FIELD_QUERY_MSG                 "Could not query field %s for GPU %u"
+#define DCGM_FR_FIELD_QUERY_MSG "Could not query field %s for GPU %u"
 // environment variable name
-#define DCGM_FR_BAD_CUDA_ENV_MSG                "Found CUDA performance-limiting environment variable '%s'."
+#define DCGM_FR_BAD_CUDA_ENV_MSG "Found CUDA performance-limiting environment variable '%s'."
 // gpu id
-#define DCGM_FR_PERSISTENCE_MODE_MSG            "Persistence mode for GPU %u is currently disabled. The DCGM "\
-                                                "diagnostic requires peristence mode to be enabled."
+#define DCGM_FR_PERSISTENCE_MODE_MSG                               \
+    "Persistence mode for GPU %u is currently disabled. The DCGM " \
+    "diagnostic requires peristence mode to be enabled."
 // gpu id, direction (d2h, e.g.), measured bandwidth, expected bandwidth
-#define DCGM_FR_LOW_BANDWIDTH_MSG               "Bandwidth of GPU %u in direction %s of %.2f did not exceed "\
-                                                "minimum required bandwidth of %.2f."
+#define DCGM_FR_LOW_BANDWIDTH_MSG                                 \
+    "Bandwidth of GPU %u in direction %s of %.2f did not exceed " \
+    "minimum required bandwidth of %.2f."
 // gpu id, direction (d2h, e.g.), measured latency, expected latency
-#define DCGM_FR_HIGH_LATENCY_MSG                "Latency type %s of GPU %u value %.2f exceeded maximum allowed "\
-                                                "latency of %.2f."
+#define DCGM_FR_HIGH_LATENCY_MSG                                     \
+    "Latency type %s of GPU %u value %.2f exceeded maximum allowed " \
+    "latency of %.2f."
 // field id
-#define DCGM_FR_CANNOT_GET_FIELD_TAG_MSG        "Unable to get field information for field id %hu"
+#define DCGM_FR_CANNOT_GET_FIELD_TAG_MSG "Unable to get field information for field id %hu"
 // field value, field name, gpu id (this message is for fields that should always have a 0 value)
-#define DCGM_FR_FIELD_VIOLATION_MSG             "Detected %ld %s for GPU %u"
+#define DCGM_FR_FIELD_VIOLATION_MSG "Detected %ld %s for GPU %u"
 // field value, field name, gpu id, allowable threshold
-#define DCGM_FR_FIELD_THRESHOLD_MSG             "Detected %ld %s for GPU %u which is above the threshold %ld"
+#define DCGM_FR_FIELD_THRESHOLD_MSG "Detected %ld %s for GPU %u which is above the threshold %ld"
 // field value, field name, gpu id (same as DCGM_FR_FIELD_VIOLATION, but it's a double)
-#define DCGM_FR_FIELD_VIOLATION_DBL_MSG         "Detected %.1f %s for GPU %u"
+#define DCGM_FR_FIELD_VIOLATION_DBL_MSG "Detected %.1f %s for GPU %u"
 // field value, field name, gpu id, allowable threshold (same as DCGM_FR_FIELD_THRESHOLD, but it's a double)
-#define DCGM_FR_FIELD_THRESHOLD_DBL_MSG         "Detected %.1f %s for GPU %u which is above the threshold %.1f"
+#define DCGM_FR_FIELD_THRESHOLD_DBL_MSG "Detected %.1f %s for GPU %u which is above the threshold %.1f"
 // field name
-#define DCGM_FR_UNSUPPORTED_FIELD_TYPE_MSG      "Field %s is not supported by this API because it is neither an "\
-                                                "int64 nor a double type."
+#define DCGM_FR_UNSUPPORTED_FIELD_TYPE_MSG                            \
+    "Field %s is not supported by this API because it is neither an " \
+    "int64 nor a double type."
 // field name, allowable threshold, observed value, seconds
-#define DCGM_FR_FIELD_THRESHOLD_TS_MSG          "%s met or exceeded the threshold of %lu per second: %lu at "\
-                                                "%.1f seconds into the test."
+#define DCGM_FR_FIELD_THRESHOLD_TS_MSG                            \
+    "%s met or exceeded the threshold of %lu per second: %lu at " \
+    "%.1f seconds into the test."
 // field name, allowable threshold, observed value, seconds (same as DCGM_FR_FIELD_THRESHOLD, but it's a double)
-#define DCGM_FR_FIELD_THRESHOLD_TS_DBL_MSG      "%s met or exceeded the threshold of %.1f per second: %.1f at "\
-                                                "%.1f seconds into the test."
+#define DCGM_FR_FIELD_THRESHOLD_TS_DBL_MSG                          \
+    "%s met or exceeded the threshold of %.1f per second: %.1f at " \
+    "%.1f seconds into the test."
 // total seconds of violation, gpu id
-#define DCGM_FR_THERMAL_VIOLATIONS_MSG          "There were thermal violations totaling %lu seconds for GPU %u"
+#define DCGM_FR_THERMAL_VIOLATIONS_MSG "There were thermal violations totaling %lu seconds for GPU %u"
 // total seconds of violations, first instance, gpu id
-#define DCGM_FR_THERMAL_VIOLATIONS_TS_MSG       "Thermal violations totaling %lu samples started at %.1f seconds "\
-                                                "into the test for GPU %u"
+#define DCGM_FR_THERMAL_VIOLATIONS_TS_MSG                              \
+    "Thermal violations totaling %lu samples started at %.1f seconds " \
+    "into the test for GPU %u"
 // observed temperature, gpu id, max allowed temperature
-#define DCGM_FR_TEMP_VIOLATION_MSG              "Temperature %lld of GPU %u exceeded user-specified maximum "\
-                                                "allowed temperature %lld"
+#define DCGM_FR_TEMP_VIOLATION_MSG                                \
+    "Temperature %lld of GPU %u exceeded user-specified maximum " \
+    "allowed temperature %lld"
 // gpu id, seconds into test, details about throttling
-#define DCGM_FR_THROTTLING_VIOLATION_MSG        "Clocks are being throttled for GPU %u because of clock "\
-                                                "throttling starting %.1f seconds into the test. %s"
+#define DCGM_FR_THROTTLING_VIOLATION_MSG                      \
+    "Clocks are being throttled for GPU %u because of clock " \
+    "throttling starting %.1f seconds into the test. %s"
 // details about error
-#define DCGM_FR_INTERNAL_MSG                    "There was an internal error during the test: '%s'"
+#define DCGM_FR_INTERNAL_MSG "There was an internal error during the test: '%s'"
 // gpu id, PCIe generation, minimum allowed, parameter to control
-#define DCGM_FR_PCIE_GENERATION_MSG             "GPU %u is running at PCI link generation %d, which is below "\
-                                                "the minimum allowed link generation of %d (parameter '%s')"
+#define DCGM_FR_PCIE_GENERATION_MSG                                \
+    "GPU %u is running at PCI link generation %d, which is below " \
+    "the minimum allowed link generation of %d (parameter '%s')"
 // gpu id, PCIe width, minimum allowed, parameter to control
-#define DCGM_FR_PCIE_WIDTH_MSG                  "GPU %u is running at PCI link width %dX, which is below the "\
-                                                "minimum allowed link generation of %d (parameter '%s')"
-#define DCGM_FR_ABORTED_MSG                     "Test was aborted early due to user signal"
+#define DCGM_FR_PCIE_WIDTH_MSG                                     \
+    "GPU %u is running at PCI link width %dX, which is below the " \
+    "minimum allowed link generation of %d (parameter '%s')"
+#define DCGM_FR_ABORTED_MSG "Test was aborted early due to user signal"
 // Test name
-#define DCGM_FR_TEST_DISABLED_MSG               "The %s test is skipped for this GPU."
+#define DCGM_FR_TEST_DISABLED_MSG "The %s test is skipped for this GPU."
 // stat name, gpu id
-#define DCGM_FR_CANNOT_GET_STAT_MSG             "Unable to generate / collect stat %s for GPU %u"
+#define DCGM_FR_CANNOT_GET_STAT_MSG "Unable to generate / collect stat %s for GPU %u"
 // observed value, minimum allowed, gpu id
-#define DCGM_FR_STRESS_LEVEL_MSG                "Max stress level of %.1f did not reach desired stress level of "\
-                                                "%.1f for GPU %u"
+#define DCGM_FR_STRESS_LEVEL_MSG                                      \
+    "Max stress level of %.1f did not reach desired stress level of " \
+    "%.1f for GPU %u"
 // CUDA API name
-#define DCGM_FR_CUDA_API_MSG                    "Error using CUDA API %s"
+#define DCGM_FR_CUDA_API_MSG "Error using CUDA API %s"
 // count, gpu id
-#define DCGM_FR_FAULTY_MEMORY_MSG               "Found %d faulty memory elements on GPU %u"
+#define DCGM_FR_FAULTY_MEMORY_MSG "Found %d faulty memory elements on GPU %u"
 // error detail
-#define DCGM_FR_CANNOT_SET_WATCHES_MSG          "Unable to add field watches to DCGM: %s"
+#define DCGM_FR_CANNOT_SET_WATCHES_MSG "Unable to add field watches to DCGM: %s"
 // gpu id
-#define DCGM_FR_CUDA_UNBOUND_MSG                "Cuda GPU %d is no longer bound to a CUDA context...Aborting"
+#define DCGM_FR_CUDA_UNBOUND_MSG "Cuda GPU %d is no longer bound to a CUDA context...Aborting"
 // Test name, gpu id
-#define DCGM_FR_ECC_DISABLED_MSG                "Skipping test %s because ECC is not enabled on GPU %u"
+#define DCGM_FR_ECC_DISABLED_MSG "Skipping test %s because ECC is not enabled on GPU %u"
 // percentage of memory we tried to allocate, gpu id
-#define DCGM_FR_MEMORY_ALLOC_MSG                "Couldn't allocate at least %.1f%% of GPU memory on GPU %u"
+#define DCGM_FR_MEMORY_ALLOC_MSG "Couldn't allocate at least %.1f%% of GPU memory on GPU %u"
 // gpu id
-#define DCGM_FR_CUDA_DBE_MSG                    "CUDA APIs have indicated that a double-bit ECC error has "\
-                                                "occured on GPU %u."
+#define DCGM_FR_CUDA_DBE_MSG                                    \
+    "CUDA APIs have indicated that a double-bit ECC error has " \
+    "occured on GPU %u."
 // gpu id
-#define DCGM_FR_MEMORY_MISMATCH_MSG             "A memory mismatch was detected on GPU %u, but no error was "\
-                                                "reported by CUDA or NVML."
+#define DCGM_FR_MEMORY_MISMATCH_MSG                               \
+    "A memory mismatch was detected on GPU %u, but no error was " \
+    "reported by CUDA or NVML."
 // gpu id, error detail
-#define DCGM_FR_CUDA_DEVICE_MSG                 "Unable to find a corresponding CUDA device for GPU %u: '%s'"
-#define DCGM_FR_ECC_UNSUPPORTED_MSG             "This card does not support ECC Memory. Skipping test."
+#define DCGM_FR_CUDA_DEVICE_MSG     "Unable to find a corresponding CUDA device for GPU %u: '%s'"
+#define DCGM_FR_ECC_UNSUPPORTED_MSG "ECC Memory is not turned on or is unsupported. Skipping test."
 // gpu id
-#define DCGM_FR_ECC_PENDING_MSG                 "ECC memory for GPU %u is in a pending state."
+#define DCGM_FR_ECC_PENDING_MSG "ECC memory for GPU %u is in a pending state."
 // gpu id, observed bandwidth, required, test name
-#define DCGM_FR_MEMORY_BANDWIDTH_MSG            "GPU %u only achieved a memory bandwidth of %.2f GB/s, failing "\
-                                                "to meet %.2f GB/s for test %d"
+#define DCGM_FR_MEMORY_BANDWIDTH_MSG                                 \
+    "GPU %u only achieved a memory bandwidth of %.2f GB/s, failing " \
+    "to meet %.2f GB/s for test %d"
 // power draw observed, field tag, minimum power draw required, gpu id
-#define DCGM_FR_TARGET_POWER_MSG                "Max power of %.1f did not reach desired power minimum %s of "\
-                                                "%.1f for GPU %u"
+#define DCGM_FR_TARGET_POWER_MSG                                   \
+    "Max power of %.1f did not reach desired power minimum %s of " \
+    "%.1f for GPU %u"
 // API name, error detail
-#define DCGM_FR_API_FAIL_MSG                    "API call %s failed: '%s'"
+#define DCGM_FR_API_FAIL_MSG "API call %s failed: '%s'"
 // API name, gpu id, error detail
-#define DCGM_FR_API_FAIL_GPU_MSG                "API call %s failed for GPU %u: '%s'"
+#define DCGM_FR_API_FAIL_GPU_MSG "API call %s failed for GPU %u: '%s'"
 // gpu id, error detail
-#define DCGM_FR_CUDA_CONTEXT_MSG                "GPU %u failed to create a CUDA context: %s"
+#define DCGM_FR_CUDA_CONTEXT_MSG "GPU %u failed to create a CUDA context: %s"
 // DCGM API name
-#define DCGM_FR_DCGM_API_MSG                    "Error using DCGM API %s"
-#define DCGM_FR_CONCURRENT_GPUS_MSG             "Unable to run concurrent pair bandwidth test without 2 or more "\
-                                                "gpus. Skipping"
-#define DCGM_FR_TOO_MANY_ERRORS_MSG             "This API can only return up to four errors per system. "\
-                                                "Additional errors were found for this system that couldn't be "\
-                                                "communicated."
-#define DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD_MSG  "%.1f %s NvLink errors found occuring per second on GPU %u, "\
-                                                "exceeding the limit of 100 per second."
-#define DCGM_FR_NVLINK_ERROR_CRITICAL_MSG       "Detected %ld %s NvLink errors on GPU %u's NVLink (should be 0)"
-#define DCGM_FR_ENFORCED_POWER_LIMIT_MSG        "Enforced power limit on GPU %u set to %.1f, which is too low to "\
-                                                "attempt to achieve target power %.1f"
-#define DCGM_FR_MEMORY_ALLOC_HOST_MSG           "Cannot allocate %zu bytes on the host"
-#define DCGM_FR_GPU_OP_MODE_MSG                 "Skipping plugin due to a GPU being in GPU Operating Mode: LOW_DP."
-#define DCGM_FR_NO_MEMORY_CLOCKS_MSG            "No memory clocks <= %u MHZ were found in %u supported memory clocks."
-#define DCGM_FR_NO_GRAPHICS_CLOCKS_MSG          "No graphics clocks <= %u MHZ were found in %u supported graphics clocks for memory clock %u MHZ."
-#define DCGM_FR_HAD_TO_RESTORE_STATE_MSG        "Had to restore GPU state on NVML GPU(s): %s"
-
+#define DCGM_FR_DCGM_API_MSG "Error using DCGM API %s"
+#define DCGM_FR_CONCURRENT_GPUS_MSG                                   \
+    "Unable to run concurrent pair bandwidth test without 2 or more " \
+    "gpus. Skipping"
+#define DCGM_FR_TOO_MANY_ERRORS_MSG                                  \
+    "This API can only return up to four errors per system. "        \
+    "Additional errors were found for this system that couldn't be " \
+    "communicated."
+#define DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD_MSG                    \
+    "%.1f %s NvLink errors found occuring per second on GPU %u, " \
+    "exceeding the limit of 100 per second."
+#define DCGM_FR_NVLINK_ERROR_CRITICAL_MSG "Detected %ld %s NvLink errors on GPU %u's NVLink (should be 0)"
+#define DCGM_FR_ENFORCED_POWER_LIMIT_MSG                               \
+    "Enforced power limit on GPU %u set to %.1f, which is too low to " \
+    "attempt to achieve target power %.1f"
+#define DCGM_FR_MEMORY_ALLOC_HOST_MSG "Cannot allocate %zu bytes on the host"
+#define DCGM_FR_GPU_OP_MODE_MSG       "Skipping plugin due to a GPU being in GPU Operating Mode: LOW_DP."
+#define DCGM_FR_NO_MEMORY_CLOCKS_MSG  "No memory clocks <= %u MHZ were found in %u supported memory clocks."
+#define DCGM_FR_NO_GRAPHICS_CLOCKS_MSG \
+    "No graphics clocks <= %u MHZ were found in %u supported graphics clocks for memory clock %u MHZ."
+#define DCGM_FR_HAD_TO_RESTORE_STATE_MSG "Had to restore GPU state on NVML GPU(s): %s"
+#define DCGM_FR_L1TAG_UNSUPPORTED_MSG    "This card does not support the L1 cache test. Skipping test."
+#define DCGM_FR_L1TAG_MISCOMPARE_MSG     "Detected a miscompare failure in the L1 cache."
+#define DCGM_FR_ROW_REMAP_FAILURE_MSG    "Row remapping failed."
+#define DCGM_FR_UNCONTAINED_ERROR_MSG    "GPU had an uncontained error (XID 95)"
 
 /*
  * Suggestions for next steps for the corresponding error message
  */
-#define DCGM_FR_OK_NEXT                         "N/A"
-#define DCGM_FR_UNKNOWN_NEXT                    ""
-#define DCGM_FR_UNRECOGNIZED_NEXT               ""
-#define DCGM_FR_PCI_REPLAY_RATE_NEXT            "Reconnect PCIe card. Run system side PCIE diagnostic utilities "\
-                                                "to verify hops off the GPU board. If issue is on the board, run "\
-                                                "the field diagnostic."
-#define DCGM_FR_VOLATILE_DBE_DETECTED_NEXT      "Drain the GPU and reset it or reboot the node."
-#define DCGM_FR_VOLATILE_SBE_DETECTED_NEXT      "Monitor - this GPU can still perform workload."
-#define DCGM_FR_PENDING_PAGE_RETIREMENTS_NEXT   "If volatile double bit errors exist, drain the GPU and reset it "\
-                                                "or reboot the node. Otherwise, monitor - GPU can still perform "\
-                                                "workload."
-#define DCGM_FR_RETIRED_PAGES_LIMIT_NEXT        TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_RETIRED_PAGES_DBE_LIMIT_NEXT    TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_CORRUPT_INFOROM_NEXT            "Flash the InfoROM to clear this corruption."
-#define DCGM_FR_CLOCK_THROTTLE_THERMAL_NEXT     DEBUG_COOLING_MSG
-#define DCGM_FR_POWER_UNREADABLE_NEXT           ""
-#define DCGM_FR_CLOCK_THROTTLE_POWER_NEXT       "Monitor the power conditions. This GPU can still perform workload."
-#define DCGM_FR_NVLINK_ERROR_THRESHOLD_NEXT     "Monitor the NVLink. It can still perform workload."
-#define DCGM_FR_NVLINK_DOWN_NEXT                TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_NVSWITCH_FATAL_ERROR_NEXT       TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_NVSWITCH_NON_FATAL_ERROR_NEXT   "Monitor the NVSwitch. It can still perform workload."
-#define DCGM_FR_NVSWITCH_DOWN_NEXT              ""
-#define DCGM_FR_NO_ACCESS_TO_FILE_NEXT          "Check relevant permissions, access, and existence of the file."
-#define DCGM_FR_NVML_API_NEXT                   "Check the error condition and ensure that appropriate libraries "\
-                                                "are present and accessible."
-#define DCGM_FR_DEVICE_COUNT_MISMATCH_NEXT      "Check for the presence of cgroups, operating system blocks, and "\
-                                                "or unsupported / older cards"
-#define DCGM_FR_BAD_PARAMETER_NEXT              ""
-#define DCGM_FR_CANNOT_OPEN_LIB_NEXT            "Check for the existence of the library and set LD_LIBRARY_PATH "\
-                                                "if needed."
-#define DCGM_FR_BLACKLISTED_DRIVER_NEXT         "Please load the appropriate driver."
-#define DCGM_FR_NVML_LIB_BAD_NEXT               "Make sure that the required version of libnvidia-ml.so "\
-                                                "is present and accessible on the system."
-#define DCGM_FR_GRAPHICS_PROCESSES_NEXT         "Stop the graphics processes or run this diagnostic on a server "\
-                                                "that is not being used for display purposes."
-#define DCGM_FR_HOSTENGINE_CONN_NEXT            "If hostengine is run separately, please ensure that it is up "\
-                                                "and responsive."
-#define DCGM_FR_FIELD_QUERY_NEXT                ""
-#define DCGM_FR_BAD_CUDA_ENV_NEXT               "Please unset this environment variable to address test failures."
-#define DCGM_FR_PERSISTENCE_MODE_NEXT           "Enable persistence mode by running \"nvidia-smi -i <gpuId> -pm "\
-                                                "1 \" as root."
-#define DCGM_FR_LOW_BANDWIDTH_NEXT              "Verify that your minimum bandwidth setting is appropriate for "\
-                                                "the topology of each GPU. If so, and errors are consistent, "\
-                                                "please run a field diagnostic."
-#define DCGM_FR_HIGH_LATENCY_NEXT               "Verify that your maximum latency setting is appropriate for "\
-                                                "the topology of each GPU. If so, and errors are consistent, "\
-                                                "please run a field diagnostic."
-#define DCGM_FR_CANNOT_GET_FIELD_TAG_NEXT       ""
-#define DCGM_FR_FIELD_VIOLATION_NEXT            ""
-#define DCGM_FR_FIELD_THRESHOLD_NEXT            ""
-#define DCGM_FR_FIELD_VIOLATION_DBL_NEXT        ""
-#define DCGM_FR_FIELD_THRESHOLD_DBL_NEXT        ""
-#define DCGM_FR_UNSUPPORTED_FIELD_TYPE_NEXT     ""
-#define DCGM_FR_FIELD_THRESHOLD_TS_NEXT         ""
-#define DCGM_FR_FIELD_THRESHOLD_TS_DBL_NEXT     ""
-#define DCGM_FR_THERMAL_VIOLATIONS_NEXT         DEBUG_COOLING_MSG
-#define DCGM_FR_THERMAL_VIOLATIONS_TS_NEXT      DEBUG_COOLING_MSG
-#define DCGM_FR_TEMP_VIOLATION_NEXT             "Verify that the user-specified temperature maximum is set "\
-                                                "correctly. If it is, check the cooling for this GPU and node: "\
-                                                DEBUG_COOLING_MSG
-#define DCGM_FR_THROTTLING_VIOLATION_NEXT       ""
-#define DCGM_FR_INTERNAL_NEXT                   ""
-#define DCGM_FR_PCIE_GENERATION_NEXT            ""
-#define DCGM_FR_PCIE_WIDTH_NEXT                 ""
-#define DCGM_FR_ABORTED_NEXT                    ""
-#define DCGM_FR_TEST_DISABLED_NEXT              ""
-#define DCGM_FR_CANNOT_GET_STAT_NEXT            "If running a standalone nv-hostengine, verify that it is up "\
-                                                "and responsive."
-#define DCGM_FR_STRESS_LEVEL_NEXT               ""
-#define DCGM_FR_CUDA_API_NEXT                   ""
-#define DCGM_FR_FAULTY_MEMORY_NEXT              TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_CANNOT_SET_WATCHES_NEXT         ""
-#define DCGM_FR_CUDA_UNBOUND_NEXT               ""
-#define DCGM_FR_ECC_DISABLED_NEXT               "Enable ECC memory by running \"nvidia-smi -i <gpuId> -e 1\" "\
-                                                "to enable. This may require a GPU reset or reboot to take effect."
-#define DCGM_FR_MEMORY_ALLOC_NEXT               ""
-#define DCGM_FR_CUDA_DBE_NEXT                   TRIAGE_RUN_FIELD_DIAG_MSG 
-#define DCGM_FR_MEMORY_MISMATCH_NEXT            TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_CUDA_DEVICE_NEXT                "Make sure CUDA_VISIBLE_DEVICES is not preventing visibility of "\
-                                                "this GPU. Also check if CUDA libraries are compatible and "\
-                                                "correctly installed."
-#define DCGM_FR_ECC_UNSUPPORTED_NEXT            ""
-#define DCGM_FR_ECC_PENDING_NEXT                "Reboot to complete activation of the ECC memory."
-#define DCGM_FR_MEMORY_BANDWIDTH_NEXT           ""
-#define DCGM_FR_TARGET_POWER_NEXT               "Verify that the clock speeds and GPU utilization are high."
-#define DCGM_FR_API_FAIL_NEXT                   ""
-#define DCGM_FR_API_FAIL_GPU_NEXT               ""
-#define DCGM_FR_CUDA_CONTEXT_NEXT               "Please make sure the correct driver version is installed and "\
-                                                "verify that no conflicting libraries are present."
+#define DCGM_FR_OK_NEXT           "N/A"
+#define DCGM_FR_UNKNOWN_NEXT      ""
+#define DCGM_FR_UNRECOGNIZED_NEXT ""
+#define DCGM_FR_PCI_REPLAY_RATE_NEXT                                   \
+    "Reconnect PCIe card. Run system side PCIE diagnostic utilities "  \
+    "to verify hops off the GPU board. If issue is on the board, run " \
+    "the field diagnostic."
+#define DCGM_FR_VOLATILE_DBE_DETECTED_NEXT "Drain the GPU and reset it or reboot the node."
+#define DCGM_FR_VOLATILE_SBE_DETECTED_NEXT "Monitor - this GPU can still perform workload."
+#define DCGM_FR_PENDING_PAGE_RETIREMENTS_NEXT                          \
+    "If volatile double bit errors exist, drain the GPU and reset it " \
+    "or reboot the node. Otherwise, monitor - GPU can still perform "  \
+    "workload."
+#define DCGM_FR_RETIRED_PAGES_LIMIT_NEXT      TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_RETIRED_PAGES_DBE_LIMIT_NEXT  TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_CORRUPT_INFOROM_NEXT          "Flash the InfoROM to clear this corruption."
+#define DCGM_FR_CLOCK_THROTTLE_THERMAL_NEXT   DEBUG_COOLING_MSG
+#define DCGM_FR_POWER_UNREADABLE_NEXT         ""
+#define DCGM_FR_CLOCK_THROTTLE_POWER_NEXT     "Monitor the power conditions. This GPU can still perform workload."
+#define DCGM_FR_NVLINK_ERROR_THRESHOLD_NEXT   "Monitor the NVLink. It can still perform workload."
+#define DCGM_FR_NVLINK_DOWN_NEXT              TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_NVSWITCH_FATAL_ERROR_NEXT     TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_NVSWITCH_NON_FATAL_ERROR_NEXT "Monitor the NVSwitch. It can still perform workload."
+#define DCGM_FR_NVSWITCH_DOWN_NEXT            ""
+#define DCGM_FR_NO_ACCESS_TO_FILE_NEXT        "Check relevant permissions, access, and existence of the file."
+#define DCGM_FR_NVML_API_NEXT                                          \
+    "Check the error condition and ensure that appropriate libraries " \
+    "are present and accessible."
+#define DCGM_FR_DEVICE_COUNT_MISMATCH_NEXT                             \
+    "Check for the presence of cgroups, operating system blocks, and " \
+    "or unsupported / older cards"
+#define DCGM_FR_BAD_PARAMETER_NEXT ""
+#define DCGM_FR_CANNOT_OPEN_LIB_NEXT                                  \
+    "Check for the existence of the library and set LD_LIBRARY_PATH " \
+    "if needed."
+#define DCGM_FR_BLACKLISTED_DRIVER_NEXT "Please load the appropriate driver."
+#define DCGM_FR_NVML_LIB_BAD_NEXT                             \
+    "Make sure that the required version of libnvidia-ml.so " \
+    "is present and accessible on the system."
+#define DCGM_FR_GRAPHICS_PROCESSES_NEXT                               \
+    "Stop the graphics processes or run this diagnostic on a server " \
+    "that is not being used for display purposes."
+#define DCGM_FR_HOSTENGINE_CONN_NEXT                                \
+    "If hostengine is run separately, please ensure that it is up " \
+    "and responsive."
+#define DCGM_FR_FIELD_QUERY_NEXT  ""
+#define DCGM_FR_BAD_CUDA_ENV_NEXT "Please unset this environment variable to address test failures."
+#define DCGM_FR_PERSISTENCE_MODE_NEXT                                 \
+    "Enable persistence mode by running \"nvidia-smi -i <gpuId> -pm " \
+    "1 \" as root."
+#define DCGM_FR_LOW_BANDWIDTH_NEXT                                   \
+    "Verify that your minimum bandwidth setting is appropriate for " \
+    "the topology of each GPU. If so, and errors are consistent, "   \
+    "please run a field diagnostic."
+#define DCGM_FR_HIGH_LATENCY_NEXT                                  \
+    "Verify that your maximum latency setting is appropriate for " \
+    "the topology of each GPU. If so, and errors are consistent, " \
+    "please run a field diagnostic."
+#define DCGM_FR_CANNOT_GET_FIELD_TAG_NEXT   ""
+#define DCGM_FR_FIELD_VIOLATION_NEXT        ""
+#define DCGM_FR_FIELD_THRESHOLD_NEXT        ""
+#define DCGM_FR_FIELD_VIOLATION_DBL_NEXT    ""
+#define DCGM_FR_FIELD_THRESHOLD_DBL_NEXT    ""
+#define DCGM_FR_UNSUPPORTED_FIELD_TYPE_NEXT ""
+#define DCGM_FR_FIELD_THRESHOLD_TS_NEXT     ""
+#define DCGM_FR_FIELD_THRESHOLD_TS_DBL_NEXT ""
+#define DCGM_FR_THERMAL_VIOLATIONS_NEXT     DEBUG_COOLING_MSG
+#define DCGM_FR_THERMAL_VIOLATIONS_TS_NEXT  DEBUG_COOLING_MSG
+#define DCGM_FR_TEMP_VIOLATION_NEXT                              \
+    "Verify that the user-specified temperature maximum is set " \
+    "correctly. If it is, check the cooling for this GPU and node: " DEBUG_COOLING_MSG
+#define DCGM_FR_THROTTLING_VIOLATION_NEXT ""
+#define DCGM_FR_INTERNAL_NEXT             ""
+#define DCGM_FR_PCIE_GENERATION_NEXT      ""
+#define DCGM_FR_PCIE_WIDTH_NEXT           ""
+#define DCGM_FR_ABORTED_NEXT              ""
+#define DCGM_FR_TEST_DISABLED_NEXT        ""
+#define DCGM_FR_CANNOT_GET_STAT_NEXT                               \
+    "If running a standalone nv-hostengine, verify that it is up " \
+    "and responsive."
+#define DCGM_FR_STRESS_LEVEL_NEXT       ""
+#define DCGM_FR_CUDA_API_NEXT           ""
+#define DCGM_FR_FAULTY_MEMORY_NEXT      TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_CANNOT_SET_WATCHES_NEXT ""
+#define DCGM_FR_CUDA_UNBOUND_NEXT       ""
+#define DCGM_FR_ECC_DISABLED_NEXT                                  \
+    "Enable ECC memory by running \"nvidia-smi -i <gpuId> -e 1\" " \
+    "to enable. This may require a GPU reset or reboot to take effect."
+#define DCGM_FR_MEMORY_ALLOC_NEXT    ""
+#define DCGM_FR_CUDA_DBE_NEXT        TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_MEMORY_MISMATCH_NEXT TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_CUDA_DEVICE_NEXT                                      \
+    "Make sure CUDA_VISIBLE_DEVICES is not preventing visibility of " \
+    "this GPU. Also check if CUDA libraries are compatible and "      \
+    "correctly installed."
+#define DCGM_FR_ECC_UNSUPPORTED_NEXT  ""
+#define DCGM_FR_ECC_PENDING_NEXT      "Reboot to complete activation of the ECC memory."
+#define DCGM_FR_MEMORY_BANDWIDTH_NEXT ""
+#define DCGM_FR_TARGET_POWER_NEXT     "Verify that the clock speeds and GPU utilization are high."
+#define DCGM_FR_API_FAIL_NEXT         ""
+#define DCGM_FR_API_FAIL_GPU_NEXT     ""
+#define DCGM_FR_CUDA_CONTEXT_NEXT                                   \
+    "Please make sure the correct driver version is installed and " \
+    "verify that no conflicting libraries are present."
 #define DCGM_FR_DCGM_API_NEXT                   ""
 #define DCGM_FR_CONCURRENT_GPUS_NEXT            ""
 #define DCGM_FR_TOO_MANY_ERRORS_NEXT            ""
 #define DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD_NEXT TRIAGE_RUN_FIELD_DIAG_MSG
 #define DCGM_FR_NVLINK_ERROR_CRITICAL_NEXT      TRIAGE_RUN_FIELD_DIAG_MSG
-#define DCGM_FR_ENFORCED_POWER_LIMIT_NEXT       "If this enforced power limit is necessary, then this test "\
-                                                "cannot be run. If it is unnecessary, then raise the enforced "\
-                                                "power limit setting to be able to run this test."
-#define DCGM_FR_MEMORY_ALLOC_HOST_NEXT          "Manually kill processes or restart your machine."
-#define DCGM_FR_GPU_OP_MODE_NEXT                "Fix by running nvidia-smi as root with: nvidia-smi --gom=0 -i "\
-                                                "<gpu index>"
-#define DCGM_FR_NO_MEMORY_CLOCKS_NEXT           ""
-#define DCGM_FR_NO_GRAPHICS_CLOCKS_NEXT         ""
-#define DCGM_FR_HAD_TO_RESTORE_STATE_NEXT       ""
+#define DCGM_FR_ENFORCED_POWER_LIMIT_NEXT                           \
+    "If this enforced power limit is necessary, then this test "    \
+    "cannot be run. If it is unnecessary, then raise the enforced " \
+    "power limit setting to be able to run this test."
+#define DCGM_FR_MEMORY_ALLOC_HOST_NEXT "Manually kill processes or restart your machine."
+#define DCGM_FR_GPU_OP_MODE_NEXT                                     \
+    "Fix by running nvidia-smi as root with: nvidia-smi --gom=0 -i " \
+    "<gpu index>"
+#define DCGM_FR_NO_MEMORY_CLOCKS_NEXT     ""
+#define DCGM_FR_NO_GRAPHICS_CLOCKS_NEXT   ""
+#define DCGM_FR_HAD_TO_RESTORE_STATE_NEXT ""
+#define DCGM_FR_L1TAG_UNSUPPORTED_NEXT    ""
+#define DCGM_FR_L1TAG_MISCOMPARE_NEXT     TRIAGE_RUN_FIELD_DIAG_MSG
+#define DCGM_FR_ROW_REMAP_FAILURE_NEXT    DCGM_FR_VOLATILE_DBE_DETECTED_NEXT
+#define DCGM_FR_UNCONTAINED_ERROR_NEXT    DCGM_FR_VOLATILE_DBE_DETECTED_NEXT
 
 dcgmErrorSeverity_t dcgmErrorGetPriorityByCode(unsigned int code);
 const char *dcgmErrorGetFormatMsgByCode(unsigned int code);
diff --git a/bindings/go/dcgm/dcgm_fields.h b/bindings/go/dcgm/dcgm_fields.h
index b514766..8bbf799 100644
--- a/bindings/go/dcgm/dcgm_fields.h
+++ b/bindings/go/dcgm/dcgm_fields.h
@@ -15,70 +15,79 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-    
+
 /***************************************************************************************************/
 /** @defgroup dcgmFieldTypes Field Types
  *  Field Types are a single byte.
  *  @{
  */
-/***************************************************************************************************/        
+/***************************************************************************************************/
 
 /**
  * Blob of binary data representing a structure
  */
-#define DCGM_FT_BINARY    'b'
-    
+#define DCGM_FT_BINARY 'b'
+
 /**
  * 8-byte double precision
  */
-#define DCGM_FT_DOUBLE    'd'
-    
+#define DCGM_FT_DOUBLE 'd'
+
 /**
  * 8-byte signed integer
  */
-#define DCGM_FT_INT64     'i'
-    
+#define DCGM_FT_INT64 'i'
+
 /**
  * Null-terminated ASCII Character string
  */
-#define DCGM_FT_STRING    's'
-    
+#define DCGM_FT_STRING 's'
+
 /**
  * 8-byte signed integer usec since 1970
  */
 #define DCGM_FT_TIMESTAMP 't'
-    
-/** @} */    
-    
+
+/** @} */
+
 
 /***************************************************************************************************/
 /** @defgroup dcgmFieldScope Field Scope
  *  Represents field association with entity scope or global scope.
  *  @{
  */
-/***************************************************************************************************/     
+/***************************************************************************************************/
 
 /**
  * Field is global (ex: driver version)
  */
-#define DCGM_FS_GLOBAL  0
+#define DCGM_FS_GLOBAL 0
 
 /**
  * Field is associated with an entity (GPU, VGPU...etc)
  */
-#define DCGM_FS_ENTITY  1
+#define DCGM_FS_ENTITY 1
 
 /**
  * Field is associated with a device. Deprecated. Use DCGM_FS_ENTITY
  */
-#define DCGM_FS_DEVICE  DCGM_FS_ENTITY
+#define DCGM_FS_DEVICE DCGM_FS_ENTITY
+
+/** @} */
+
+/***************************************************************************************************/
+/** @defgroup dcgmFieldConstants Field Constants
+ *  Constants that represent contents of individual field values.
+ *  @{
+ */
+/***************************************************************************************************/
 
 /**
  * DCGM_FI_DEV_CUDA_COMPUTE_CAPABILITY is 16 bits of major version followed by
  * 16 bits of the minor version. These macros separate the two.
  */
-#define DCGM_CUDA_COMPUTE_CAPABILITY_MAJOR(x) ((uint64_t)(x) & 0xFFFF0000)
-#define DCGM_CUDA_COMPUTE_CAPABILITY_MINOR(x) ((uint64_t)(x) & 0x0000FFFF)
+#define DCGM_CUDA_COMPUTE_CAPABILITY_MAJOR(x) ((uint64_t)(x)&0xFFFF0000)
+#define DCGM_CUDA_COMPUTE_CAPABILITY_MINOR(x) ((uint64_t)(x)&0x0000FFFF)
 
 /**
  * DCGM_FI_DEV_CLOCK_THROTTLE_REASONS is a bitmap of why the clock is throttled.
@@ -88,23 +97,23 @@ extern "C" {
 /** Nothing is running on the GPU and the clocks are dropping to Idle state
  * \note This limiter may be removed in a later release
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE        0x0000000000000001LL
+#define DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE 0x0000000000000001LL
 /** GPU clocks are limited by current setting of applications clocks
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING  0x0000000000000002LL
-/** SW Power Scaling algorithm is reducing the clocks below requested clocks 
+#define DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING 0x0000000000000002LL
+/** SW Power Scaling algorithm is reducing the clocks below requested clocks
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP    0x0000000000000004LL
+#define DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP 0x0000000000000004LL
 /** HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged
  *
- *This is an indicator of:
- * - temperature being too high
- * - External Power Brake Assertion is triggered (e.g. by the system power supply)
- * - Power draw is too high and Fast Trigger protection is reducing the clocks
- * - May be also reported during PState or clock change
- * - This behavior may be removed in a later release.
- */
-#define DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN     0x0000000000000008LL
+ * This is an indicator of:
+ *  - temperature being too high
+ *  - External Power Brake Assertion is triggered (e.g. by the system power supply)
+ *  - Power draw is too high and Fast Trigger protection is reducing the clocks
+ *  - May be also reported during PState or clock change
+ *  - This behavior may be removed in a later release.
+ */
+#define DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN 0x0000000000000008LL
 /** Sync Boost
  *
  * This GPU has been added to a Sync boost group with nvidia-smi or DCGM in
@@ -113,29 +122,42 @@ extern "C" {
  * the throttle reasons for other GPUs in the system to see why those GPUs are
  * holding this one at lower clocks.
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST      0x0000000000000010LL
+#define DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST 0x0000000000000010LL
 /** SW Thermal Slowdown
  *
  * This is an indicator of one or more of the following:
  *  - Current GPU temperature above the GPU Max Operating Temperature
  *  - Current memory temperature above the Memory Max Operating Temperature
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL      0x0000000000000020LL
+#define DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL 0x0000000000000020LL
 /** HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged
  *
  * This is an indicator of:
  *  - temperature being too high
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL      0x0000000000000040LL
+#define DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL 0x0000000000000040LL
 /** HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged
  *
  * This is an indicator of:
  *  - External Power Brake Assertion being triggered (e.g. by the system power supply)
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE  0x0000000000000080LL
+#define DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE 0x0000000000000080LL
 /** GPU clocks are limited by current setting of Display clocks
  */
-#define DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS  0x0000000000000100LL
+#define DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS 0x0000000000000100LL
+
+/**
+ * GPU virtualization mode types for DCGM_FI_DEV_VIRTUAL_MODE
+ */
+typedef enum
+{
+    DCGM_GPU_VIRTUALIZATION_MODE_NONE        = 0, //!< Represents Bare Metal GPU
+    DCGM_GPU_VIRTUALIZATION_MODE_PASSTHROUGH = 1, //!< Device is associated with GPU-Passthrough
+    DCGM_GPU_VIRTUALIZATION_MODE_VGPU        = 2, //!< Device is associated with vGPU inside virtual machine.
+    DCGM_GPU_VIRTUALIZATION_MODE_HOST_VGPU   = 3, //!< Device is associated with VGX hypervisor in vGPU mode
+    DCGM_GPU_VIRTUALIZATION_MODE_HOST_VSGA   = 4, //!< Device is associated with VGX hypervisor in vSGA mode
+} dcgmGpuVirtualizationMode_t;
+
 
 /** @} */
 
@@ -151,12 +173,14 @@ extern "C" {
  */
 typedef enum dcgm_field_entity_group_t
 {
-    DCGM_FE_NONE = 0, /** Field is not associated with an entity. Field scope should be DCGM_FS_GLOBAL */
-    DCGM_FE_GPU,      /** Field is associated with a GPU entity */
-    DCGM_FE_VGPU,     /** Field is associated with a VGPU entity */
-    DCGM_FE_SWITCH,   /** Field is associated with a Switch entity */
-
-    DCGM_FE_COUNT     /** Number of elements in this enumeration. Keep this entry last */
+    DCGM_FE_NONE = 0, /*!< Field is not associated with an entity. Field scope should be DCGM_FS_GLOBAL */
+    DCGM_FE_GPU,      /*!< Field is associated with a GPU entity */
+    DCGM_FE_VGPU,     /*!< Field is associated with a VGPU entity */
+    DCGM_FE_SWITCH,   /*!< Field is associated with a Switch entity */
+    DCGM_FE_GPU_I,    /*!< Field is associated with a GPU Instance entity */
+    DCGM_FE_GPU_CI,   /*!< Field is associated with a GPU Compute Instance entity */
+
+    DCGM_FE_COUNT /*!< Number of elements in this enumeration. Keep this entry last */
 } dcgm_field_entity_group_t;
 
 /**
@@ -172,98 +196,106 @@ typedef unsigned int dcgm_field_eid_t;
  *  @{
  */
 /***************************************************************************************************/
-    
+
 /**
  * NULL field
- */    
-#define DCGM_FI_UNKNOWN                   0
-    
+ */
+#define DCGM_FI_UNKNOWN 0
+
 /**
  * Driver Version
  */
-#define DCGM_FI_DRIVER_VERSION            1
-    
+#define DCGM_FI_DRIVER_VERSION 1
+
 /* Underlying NVML version */
-#define DCGM_FI_NVML_VERSION              2
-    
+#define DCGM_FI_NVML_VERSION 2
+
 /*
  * Process Name
  */
-#define DCGM_FI_PROCESS_NAME              3
-    
+#define DCGM_FI_PROCESS_NAME 3
+
 /**
  * Number of Devices on the node
- */    
-#define DCGM_FI_DEV_COUNT                 4
+ */
+#define DCGM_FI_DEV_COUNT 4
+
+/**
+ * Cuda Driver Version
+ * Retrieves a number with the major value in the thousands place and the minor value in the hundreds place.
+ * CUDA 11.1 = 11100
+ */
+#define DCGM_FI_CUDA_DRIVER_VERSION 5
+
 
 /**
  * Name of the GPU device
  */
-#define DCGM_FI_DEV_NAME                  50
-    
+#define DCGM_FI_DEV_NAME 50
+
 /**
  * Device Brand
  */
-#define DCGM_FI_DEV_BRAND                 51
-    
+#define DCGM_FI_DEV_BRAND 51
+
 /**
  * NVML index of this GPU
  */
-#define DCGM_FI_DEV_NVML_INDEX            52
+#define DCGM_FI_DEV_NVML_INDEX 52
 
 /**
  * Device Serial Number
  */
-#define DCGM_FI_DEV_SERIAL                53
+#define DCGM_FI_DEV_SERIAL 53
 
 /**
  * UUID corresponding to the device
  */
-#define DCGM_FI_DEV_UUID                  54
+#define DCGM_FI_DEV_UUID 54
 
 /**
  * Device node minor number /dev/nvidia#
  */
-#define DCGM_FI_DEV_MINOR_NUMBER          55
+#define DCGM_FI_DEV_MINOR_NUMBER 55
 
 /**
  * OEM inforom version
  */
-#define DCGM_FI_DEV_OEM_INFOROM_VER       56
+#define DCGM_FI_DEV_OEM_INFOROM_VER 56
 
 /**
  * PCI attributes for the device
  */
-#define DCGM_FI_DEV_PCI_BUSID             57
+#define DCGM_FI_DEV_PCI_BUSID 57
 
 /**
  * The combined 16-bit device id and 16-bit vendor id
  */
-#define DCGM_FI_DEV_PCI_COMBINED_ID       58
-    
+#define DCGM_FI_DEV_PCI_COMBINED_ID 58
+
 /**
  * The 32-bit Sub System Device ID
  */
-#define DCGM_FI_DEV_PCI_SUBSYS_ID         59
+#define DCGM_FI_DEV_PCI_SUBSYS_ID 59
 
 /**
  * Topology of all GPUs on the system via PCI (static)
  */
-#define DCGM_FI_GPU_TOPOLOGY_PCI          60
+#define DCGM_FI_GPU_TOPOLOGY_PCI 60
 
 /**
  * Topology of all GPUs on the system via NVLINK (static)
  */
-#define DCGM_FI_GPU_TOPOLOGY_NVLINK       61
+#define DCGM_FI_GPU_TOPOLOGY_NVLINK 61
 
 /**
  * Affinity of all GPUs on the system (static)
  */
-#define DCGM_FI_GPU_TOPOLOGY_AFFINITY     62
+#define DCGM_FI_GPU_TOPOLOGY_AFFINITY 62
 
 /**
  * Cuda compute capability for the device.
- * The major version is the upper 32 bits and 
+ * The major version is the upper 32 bits and
  * the minor version is the lower 32 bits.
  */
 #define DCGM_FI_DEV_CUDA_COMPUTE_CAPABILITY 63
@@ -271,103 +303,114 @@ typedef unsigned int dcgm_field_eid_t;
 /**
  * Compute mode for the device
  */
-#define DCGM_FI_DEV_COMPUTE_MODE          65
+#define DCGM_FI_DEV_COMPUTE_MODE 65
 
+/**
+ * Persistence mode for the device
+ * Boolean: 0 is disabled, 1 is enabled
+ */
+#define DCGM_FI_DEV_PERSISTENCE_MODE 66
+
+/**
+ * MIG mode for the device
+ * Boolean: 0 is disabled, 1 is enabled
+ */
+#define DCGM_FI_DEV_MIG_MODE 67
 
 /**
  * Device CPU affinity. part 1/8 = cpus 0 - 63
  */
-#define DCGM_FI_DEV_CPU_AFFINITY_0        70
-    
+#define DCGM_FI_DEV_CPU_AFFINITY_0 70
+
 /**
  * Device CPU affinity. part 1/8 = cpus 64 - 127
- */      
-#define DCGM_FI_DEV_CPU_AFFINITY_1        71
-    
+ */
+#define DCGM_FI_DEV_CPU_AFFINITY_1 71
+
 /**
  * Device CPU affinity. part 2/8 = cpus 128 - 191
- */      
-#define DCGM_FI_DEV_CPU_AFFINITY_2        72
+ */
+#define DCGM_FI_DEV_CPU_AFFINITY_2 72
 
 /**
  * Device CPU affinity. part 3/8 = cpus 192 - 255
  */
-#define DCGM_FI_DEV_CPU_AFFINITY_3        73
+#define DCGM_FI_DEV_CPU_AFFINITY_3 73
 
 /**
  * ECC inforom version
  */
-#define DCGM_FI_DEV_ECC_INFOROM_VER       80
+#define DCGM_FI_DEV_ECC_INFOROM_VER 80
 
 /**
  * Power management object inforom version
  */
-#define DCGM_FI_DEV_POWER_INFOROM_VER     81
+#define DCGM_FI_DEV_POWER_INFOROM_VER 81
 
 /**
  * Inforom image version
  */
-#define DCGM_FI_DEV_INFOROM_IMAGE_VER     82
+#define DCGM_FI_DEV_INFOROM_IMAGE_VER 82
 
 /**
  * Inforom configuration checksum
  */
-#define DCGM_FI_DEV_INFOROM_CONFIG_CHECK  83
+#define DCGM_FI_DEV_INFOROM_CONFIG_CHECK 83
 
 /**
  * Reads the infoROM from the flash and verifies the checksums
  */
-#define DCGM_FI_DEV_INFOROM_CONFIG_VALID  84
+#define DCGM_FI_DEV_INFOROM_CONFIG_VALID 84
 
 /**
  * VBIOS version of the device
  */
-#define DCGM_FI_DEV_VBIOS_VERSION         85
+#define DCGM_FI_DEV_VBIOS_VERSION 85
 
 /**
  * Total BAR1 of the GPU in MB
  */
-#define DCGM_FI_DEV_BAR1_TOTAL            90
+#define DCGM_FI_DEV_BAR1_TOTAL 90
 
 /**
- * Sync boost settings on the node
+ * Deprecated - Sync boost settings on the node
  */
-#define DCGM_FI_SYNC_BOOST                91
+#define DCGM_FI_SYNC_BOOST 91
 
 /**
  * Used BAR1 of the GPU in MB
  */
-#define DCGM_FI_DEV_BAR1_USED             92
+#define DCGM_FI_DEV_BAR1_USED 92
 
 /**
  * Free BAR1 of the GPU in MB
  */
-#define DCGM_FI_DEV_BAR1_FREE             93
+#define DCGM_FI_DEV_BAR1_FREE 93
 
 /**
  * SM clock for the device
  */
-#define DCGM_FI_DEV_SM_CLOCK              100
+#define DCGM_FI_DEV_SM_CLOCK 100
 
 /**
  * Memory clock for the device
  */
-#define DCGM_FI_DEV_MEM_CLOCK             101
+#define DCGM_FI_DEV_MEM_CLOCK 101
 
 /**
  * Video encoder/decoder clock for the device
  */
-#define DCGM_FI_DEV_VIDEO_CLOCK           102
+#define DCGM_FI_DEV_VIDEO_CLOCK 102
 
 /**
  * SM Application clocks
  */
-#define DCGM_FI_DEV_APP_SM_CLOCK          110
+#define DCGM_FI_DEV_APP_SM_CLOCK 110
 
 /**
  * Memory Application clocks
  */
-#define DCGM_FI_DEV_APP_MEM_CLOCK         111
+#define DCGM_FI_DEV_APP_MEM_CLOCK 111
 
 /**
  * Current clock throttle reasons (bitmask of DCGM_CLOCKS_THROTTLE_REASON_*)
@@ -377,42 +420,53 @@ typedef unsigned int dcgm_field_eid_t;
 /**
  * Maximum supported SM clock for the device
  */
-#define DCGM_FI_DEV_MAX_SM_CLOCK          113
+#define DCGM_FI_DEV_MAX_SM_CLOCK 113
 
 /**
  * Maximum supported Memory clock for the device
  */
-#define DCGM_FI_DEV_MAX_MEM_CLOCK         114
+#define DCGM_FI_DEV_MAX_MEM_CLOCK 114
 
 /**
  * Maximum supported Video encoder/decoder clock for the device
  */
-#define DCGM_FI_DEV_MAX_VIDEO_CLOCK       115
+#define DCGM_FI_DEV_MAX_VIDEO_CLOCK 115
 
 /**
  * Auto-boost for the device (1 = enabled. 0 = disabled)
  */
-#define DCGM_FI_DEV_AUTOBOOST             120
+#define DCGM_FI_DEV_AUTOBOOST 120
 
 /**
  * Supported clocks for the device
  */
-#define DCGM_FI_DEV_SUPPORTED_CLOCKS      130
+#define DCGM_FI_DEV_SUPPORTED_CLOCKS 130
 
 /**
  * Memory temperature for the device
  */
-#define DCGM_FI_DEV_MEMORY_TEMP           140
+#define DCGM_FI_DEV_MEMORY_TEMP 140
 
 /**
  * Current temperature readings for the device, in degrees C
  */
-#define DCGM_FI_DEV_GPU_TEMP              150
+#define DCGM_FI_DEV_GPU_TEMP 150
+
+/**
+ * Maximum operating temperature for the memory of this GPU
+ */
+#define DCGM_FI_DEV_MEM_MAX_OP_TEMP 151
+
+/**
+ * Maximum operating temperature for this GPU
+ */
+#define DCGM_FI_DEV_GPU_MAX_OP_TEMP 152
+
 
 /**
  * Power usage for the device in Watts
  */
-#define DCGM_FI_DEV_POWER_USAGE           155
+#define DCGM_FI_DEV_POWER_USAGE 155
 
 /**
  * Total energy consumption for the GPU in mJ since the driver was last reloaded
@@ -422,72 +476,76 @@ typedef unsigned int dcgm_field_eid_t;
 /**
  * Slowdown temperature for the device
  */
-#define DCGM_FI_DEV_SLOWDOWN_TEMP         158
+#define DCGM_FI_DEV_SLOWDOWN_TEMP 158
 
 /**
  * Shutdown temperature for the device
  */
-#define DCGM_FI_DEV_SHUTDOWN_TEMP         159
+#define DCGM_FI_DEV_SHUTDOWN_TEMP 159
 
 /**
  * Current Power limit for the device
  */
-#define DCGM_FI_DEV_POWER_MGMT_LIMIT      160
+#define DCGM_FI_DEV_POWER_MGMT_LIMIT 160
 
 /**
  * Minimum power management limit for the device
  */
-#define DCGM_FI_DEV_POWER_MGMT_LIMIT_MIN  161
+#define DCGM_FI_DEV_POWER_MGMT_LIMIT_MIN 161
 
 /**
  * Maximum power management limit for the device
  */
-#define DCGM_FI_DEV_POWER_MGMT_LIMIT_MAX  162
+#define DCGM_FI_DEV_POWER_MGMT_LIMIT_MAX 162
 
 /**
  * Default power management limit for the device
  */
-#define DCGM_FI_DEV_POWER_MGMT_LIMIT_DEF  163
+#define DCGM_FI_DEV_POWER_MGMT_LIMIT_DEF 163
 
 /**
  * Effective power limit that the driver enforces after taking into account all limiters
  */
-#define DCGM_FI_DEV_ENFORCED_POWER_LIMIT  164
+#define DCGM_FI_DEV_ENFORCED_POWER_LIMIT 164
 
 /**
  * Performance state (P-State) 0-15. 0=highest
  */
-#define DCGM_FI_DEV_PSTATE                190
+#define DCGM_FI_DEV_PSTATE 190
 
 /**
  * Fan speed for the device in percent 0-100
  */
-#define DCGM_FI_DEV_FAN_SPEED             191
+#define DCGM_FI_DEV_FAN_SPEED 191
 
 /**
  * PCIe Tx utilization information
+ *
+ * Deprecated: Use DCGM_FI_PROF_PCIE_TX_BYTES instead.
  */
-#define DCGM_FI_DEV_PCIE_TX_THROUGHPUT    200
-    
+#define DCGM_FI_DEV_PCIE_TX_THROUGHPUT 200
+
 /**
  * PCIe Rx utilization information
- */    
-#define DCGM_FI_DEV_PCIE_RX_THROUGHPUT    201
-    
+ *
+ * Deprecated: Use DCGM_FI_PROF_PCIE_RX_BYTES instead.
+ */
+#define DCGM_FI_DEV_PCIE_RX_THROUGHPUT 201
+
 /**
  * PCIe replay counter
  */
-#define DCGM_FI_DEV_PCIE_REPLAY_COUNTER   202
+#define DCGM_FI_DEV_PCIE_REPLAY_COUNTER 202
 
 /**
  * GPU Utilization
  */
-#define DCGM_FI_DEV_GPU_UTIL              203
+#define DCGM_FI_DEV_GPU_UTIL 203
 
 /**
  * Memory Utilization
  */
-#define DCGM_FI_DEV_MEM_COPY_UTIL         204
+#define DCGM_FI_DEV_MEM_COPY_UTIL 204
 
 /**
  * Process accounting stats.
@@ -496,17 +554,17 @@ typedef unsigned int dcgm_field_eid_t;
  * enable accounting ahead of time. Accounting mode can be enabled by
  * running "nvidia-smi -am 1" as root on the same node the host engine is running on.
  */
-#define DCGM_FI_DEV_ACCOUNTING_DATA       205
+#define DCGM_FI_DEV_ACCOUNTING_DATA 205
 
 /**
  * Encoder Utilization
  */
-#define DCGM_FI_DEV_ENC_UTIL              206
+#define DCGM_FI_DEV_ENC_UTIL 206
 
 /**
  * Decoder Utilization
  */
-#define DCGM_FI_DEV_DEC_UTIL              207
+#define DCGM_FI_DEV_DEC_UTIL 207
 
 /**
  * Memory utilization samples
@@ -516,57 +574,57 @@ typedef unsigned int dcgm_field_eid_t;
 /*
  * SM utilization samples
  */
-#define DCGM_FI_DEV_GPU_UTIL_SAMPLES      211
+#define DCGM_FI_DEV_GPU_UTIL_SAMPLES 211
 
 /**
  * Graphics processes running on the GPU.
  */
-#define DCGM_FI_DEV_GRAPHICS_PIDS         220
+#define DCGM_FI_DEV_GRAPHICS_PIDS 220
 
 /**
  * Compute processes running on the GPU.
  */
-#define DCGM_FI_DEV_COMPUTE_PIDS          221
+#define DCGM_FI_DEV_COMPUTE_PIDS 221
 
 /**
  * XID errors. The value is the specific XID error
  */
-#define DCGM_FI_DEV_XID_ERRORS            230
+#define DCGM_FI_DEV_XID_ERRORS 230
 
 /**
  * PCIe Max Link Generation
  */
-#define DCGM_FI_DEV_PCIE_MAX_LINK_GEN     235
+#define DCGM_FI_DEV_PCIE_MAX_LINK_GEN 235
 
 /**
  * PCIe Max Link Width
  */
-#define DCGM_FI_DEV_PCIE_MAX_LINK_WIDTH   236
+#define DCGM_FI_DEV_PCIE_MAX_LINK_WIDTH 236
 
 /**
  * PCIe Current Link Generation
  */
-#define DCGM_FI_DEV_PCIE_LINK_GEN         237
+#define DCGM_FI_DEV_PCIE_LINK_GEN 237
 
 /**
  * PCIe Current Link Width
  */
-#define DCGM_FI_DEV_PCIE_LINK_WIDTH       238
+#define DCGM_FI_DEV_PCIE_LINK_WIDTH 238
 
 /**
  * Power Violation time in usec
  */
-#define DCGM_FI_DEV_POWER_VIOLATION       240
+#define DCGM_FI_DEV_POWER_VIOLATION 240
 
 /**
  * Thermal Violation time in usec
  */
-#define DCGM_FI_DEV_THERMAL_VIOLATION     241
+#define DCGM_FI_DEV_THERMAL_VIOLATION 241
 
 /**
  * Sync Boost Violation time in usec
  */
-#define DCGM_FI_DEV_SYNC_BOOST_VIOLATION  242
+#define DCGM_FI_DEV_SYNC_BOOST_VIOLATION 242
 
 /**
  * Board violation limit.
@@ -576,7 +634,7 @@ typedef unsigned int dcgm_field_eid_t;
 /**
  *Low utilisation violation limit.
  */
-#define DCGM_FI_DEV_LOW_UTIL_VIOLATION    244
+#define DCGM_FI_DEV_LOW_UTIL_VIOLATION 244
 
 /**
  *Reliability violation limit.
@@ -596,1301 +654,1439 @@ typedef unsigned int dcgm_field_eid_t;
 /**
  * Total Frame Buffer of the GPU in MB
  */
-#define DCGM_FI_DEV_FB_TOTAL          250
+#define DCGM_FI_DEV_FB_TOTAL 250
 
 /**
  * Free Frame Buffer in MB
  */
-#define DCGM_FI_DEV_FB_FREE           251
+#define DCGM_FI_DEV_FB_FREE 251
 
 /**
  * Used Frame Buffer in MB
  */
-#define DCGM_FI_DEV_FB_USED           252
+#define DCGM_FI_DEV_FB_USED 252
 
 /**
  * Current ECC mode for the device
  */
-#define DCGM_FI_DEV_ECC_CURRENT           300
-    
+#define DCGM_FI_DEV_ECC_CURRENT 300
+
 /**
  * Pending ECC mode for the device
- */    
-#define DCGM_FI_DEV_ECC_PENDING           301
-    
+ */
+#define DCGM_FI_DEV_ECC_PENDING 301
+
 /**
  * Total single bit volatile ECC errors
- */    
-#define DCGM_FI_DEV_ECC_SBE_VOL_TOTAL     310
-    
+ */
+#define DCGM_FI_DEV_ECC_SBE_VOL_TOTAL 310
+
 /**
  * Total double bit volatile ECC errors
- */        
-#define DCGM_FI_DEV_ECC_DBE_VOL_TOTAL     311
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_VOL_TOTAL 311
+
 /**
  * Total single bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */    
-#define DCGM_FI_DEV_ECC_SBE_AGG_TOTAL     312
-    
+ */
+#define DCGM_FI_DEV_ECC_SBE_AGG_TOTAL 312
+
 /**
  * Total double bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */    
-#define DCGM_FI_DEV_ECC_DBE_AGG_TOTAL     313
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_AGG_TOTAL 313
+
 /**
  * L1 cache single bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_SBE_VOL_L1        314
-    
+#define DCGM_FI_DEV_ECC_SBE_VOL_L1 314
+
 /**
  * L1 cache double bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_DBE_VOL_L1        315
-    
+#define DCGM_FI_DEV_ECC_DBE_VOL_L1 315
+
 /**
  * L2 cache single bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_SBE_VOL_L2        316
-    
+#define DCGM_FI_DEV_ECC_SBE_VOL_L2 316
+
 /**
  * L2 cache double bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_DBE_VOL_L2        317
-    
+#define DCGM_FI_DEV_ECC_DBE_VOL_L2 317
+
 /**
  * Device memory single bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_SBE_VOL_DEV       318
+#define DCGM_FI_DEV_ECC_SBE_VOL_DEV 318
 
 /**
  * Device memory double bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_DBE_VOL_DEV       319
-    
+#define DCGM_FI_DEV_ECC_DBE_VOL_DEV 319
+
 /**
  * Register file single bit volatile ECC errors
  */
-#define DCGM_FI_DEV_ECC_SBE_VOL_REG       320
-    
+#define DCGM_FI_DEV_ECC_SBE_VOL_REG 320
+
 /**
  * Register file double bit volatile ECC errors
- */    
-#define DCGM_FI_DEV_ECC_DBE_VOL_REG       321
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_VOL_REG 321
+
 /**
  * Texture memory single bit volatile ECC errors
- */        
-#define DCGM_FI_DEV_ECC_SBE_VOL_TEX       322
-    
+ */
+#define DCGM_FI_DEV_ECC_SBE_VOL_TEX 322
+
 /**
  * Texture memory double bit volatile ECC errors
- */            
-#define DCGM_FI_DEV_ECC_DBE_VOL_TEX       323
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_VOL_TEX 323
+
 /**
  * L1 cache single bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */       
-#define DCGM_FI_DEV_ECC_SBE_AGG_L1        324
-    
+ */
+#define DCGM_FI_DEV_ECC_SBE_AGG_L1 324
+
 /**
  * L1 cache double bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */           
-#define DCGM_FI_DEV_ECC_DBE_AGG_L1        325
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_AGG_L1 325
+
 /**
  * L2 cache single bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */               
-#define DCGM_FI_DEV_ECC_SBE_AGG_L2        326
+ */
+#define DCGM_FI_DEV_ECC_SBE_AGG_L2 326
 
 /**
  * L2 cache double bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */                   
-#define DCGM_FI_DEV_ECC_DBE_AGG_L2        327
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_AGG_L2 327
+
 /**
  * Device memory single bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */                       
-#define DCGM_FI_DEV_ECC_SBE_AGG_DEV       328
-    
+ */
+#define DCGM_FI_DEV_ECC_SBE_AGG_DEV 328
+
 /**
  * Device memory double bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */                       
-#define DCGM_FI_DEV_ECC_DBE_AGG_DEV       329
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_AGG_DEV 329
+
 /**
  * Register File single bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */                           
-#define DCGM_FI_DEV_ECC_SBE_AGG_REG       330
-    
+ */
+#define DCGM_FI_DEV_ECC_SBE_AGG_REG 330
+
 /**
  * Register File double bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
  */
-#define DCGM_FI_DEV_ECC_DBE_AGG_REG       331
-    
+#define DCGM_FI_DEV_ECC_DBE_AGG_REG 331
+
 /**
  * Texture memory single bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
  */
-#define DCGM_FI_DEV_ECC_SBE_AGG_TEX       332
+#define DCGM_FI_DEV_ECC_SBE_AGG_TEX 332
 
 /**
  * Texture memory double bit aggregate (persistent) ECC errors
  * Note: monotonically increasing
- */    
-#define DCGM_FI_DEV_ECC_DBE_AGG_TEX       333
-    
+ */
+#define DCGM_FI_DEV_ECC_DBE_AGG_TEX 333
+
 /**
  * Number of retired pages because of single bit errors
  * Note: monotonically increasing
  */
-#define DCGM_FI_DEV_RETIRED_SBE           390
+#define DCGM_FI_DEV_RETIRED_SBE 390
 
 /**
  * Number of retired pages because of double bit errors
  * Note: monotonically increasing
  */
-#define DCGM_FI_DEV_RETIRED_DBE           391
+#define DCGM_FI_DEV_RETIRED_DBE 391
 
 /**
  * Number of pages pending retirement
  */
-#define DCGM_FI_DEV_RETIRED_PENDING       392
+#define DCGM_FI_DEV_RETIRED_PENDING 392
+
+/**
+ * Number of remapped rows for uncorrectable errors
+ */
+#define DCGM_FI_DEV_UNCORRECTABLE_REMAPPED_ROWS 393
+
+/**
+ * Number of remapped rows for correctable errors
+ */
+#define DCGM_FI_DEV_CORRECTABLE_REMAPPED_ROWS 394
+
+/**
+ * Whether remapping of rows has failed
+ */
+#define DCGM_FI_DEV_ROW_REMAP_FAILURE 395
 
 /*
-* NV Link flow control CRC  Error Counter for Lane 0
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0        400
+ * NV Link flow control CRC  Error Counter for Lane 0
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0 400
 
 /*
-* NV Link flow control CRC  Error Counter for Lane 1
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1        401
+ * NV Link flow control CRC  Error Counter for Lane 1
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1 401
 
 /*
-* NV Link flow control CRC  Error Counter for Lane 2
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2        402
+ * NV Link flow control CRC  Error Counter for Lane 2
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2 402
 
 /*
-* NV Link flow control CRC  Error Counter for Lane 3
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L3        403
+ * NV Link flow control CRC  Error Counter for Lane 3
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L3 403
 
 /*
-* NV Link flow control CRC  Error Counter for Lane 4
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L4        404
+ * NV Link flow control CRC  Error Counter for Lane 4
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L4 404
 
 /*
-* NV Link flow control CRC  Error Counter for Lane 5
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5        405
+ * NV Link flow control CRC  Error Counter for Lane 5
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5 405
 
 /*
-* NV Link flow control CRC  Error Counter total for all Lanes
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL     409
+ * NV Link flow control CRC  Error Counter total for all Lanes
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL 409
 
 /*
-* NV Link data CRC Error Counter for Lane 0
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0      410
+ * NV Link data CRC Error Counter for Lane 0
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0 410
 
 /*
-* NV Link data CRC Error Counter for Lane 1
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1      411
+ * NV Link data CRC Error Counter for Lane 1
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1 411
 
 /*
-* NV Link data CRC Error Counter for Lane 2
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2      412
+ * NV Link data CRC Error Counter for Lane 2
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2 412
 
 /*
-* NV Link data CRC Error Counter for Lane 3
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L3      413
+ * NV Link data CRC Error Counter for Lane 3
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L3 413
 
 /*
-* NV Link data CRC Error Counter for Lane 4
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L4      414
+ * NV Link data CRC Error Counter for Lane 4
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L4 414
 
 /*
-* NV Link data CRC Error Counter for Lane 5
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5      415
+ * NV Link data CRC Error Counter for Lane 5
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5 415
 
 /*
-* NV Link data CRC Error Counter total for all Lanes
-*/
-#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL   419
+ * NV Link data CRC Error Counter total for all Lanes
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL 419
 
 /*
-* NV Link Replay Error Counter for Lane 0
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0          420
+ * NV Link Replay Error Counter for Lane 0
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0 420
 
 /*
-* NV Link Replay Error Counter for Lane 1
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1          421
+ * NV Link Replay Error Counter for Lane 1
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1 421
 
 /*
-* NV Link Replay Error Counter for Lane 2
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2          422
+ * NV Link Replay Error Counter for Lane 2
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2 422
 
 /*
-* NV Link Replay Error Counter for Lane 3
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L3          423
+ * NV Link Replay Error Counter for Lane 3
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L3 423
 
 /*
-* NV Link Replay Error Counter for Lane 4
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L4          424
+ * NV Link Replay Error Counter for Lane 4
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L4 424
 
 /*
-* NV Link Replay Error Counter for Lane 5
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5          425
+ * NV Link Replay Error Counter for Lane 5
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5 425
 
 /*
-* NV Link Replay Error Counter total for all Lanes
-*/
-#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL       429
+ * NV Link Replay Error Counter total for all Lanes
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL 429
 
 /*
-* NV Link Recovery Error Counter for Lane 0
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0        430
+ * NV Link Recovery Error Counter for Lane 0
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0 430
 
 /*
-* NV Link Recovery Error Counter for Lane 1
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1        431
+ * NV Link Recovery Error Counter for Lane 1
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1 431
 
 /*
-* NV Link Recovery Error Counter for Lane 2
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2        432
+ * NV Link Recovery Error Counter for Lane 2
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2 432
 
 /*
-* NV Link Recovery Error Counter for Lane 3
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L3        433
+ * NV Link Recovery Error Counter for Lane 3
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L3 433
 
 /*
-* NV Link Recovery Error Counter for Lane 4
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L4        434
+ * NV Link Recovery Error Counter for Lane 4
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L4 434
 
 /*
-* NV Link Recovery Error Counter for Lane 5
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L5        435
+ * NV Link Recovery Error Counter for Lane 5
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L5 435
 
 /*
-* NV Link Recovery Error Counter total for all Lanes
-*/
-#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL     439
+ * NV Link Recovery Error Counter total for all Lanes
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL 439
 
 /*
-* NV Link Bandwidth Counter for Lane 0
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L0                   440
+ * NV Link Bandwidth Counter for Lane 0 - Not supported in DCGM 2.0
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L0 440
 
 /*
-* NV Link Bandwidth Counter for Lane 1
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L1                   441
+ * NV Link Bandwidth Counter for Lane 1 - Not supported in DCGM 2.0
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L1 441
 
 /*
-* NV Link Bandwidth Counter for Lane 2
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L2                   442
+ * NV Link Bandwidth Counter for Lane 2 - Not supported in DCGM 2.0
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L2 442
 
 /*
-* NV Link Bandwidth Counter for Lane 3
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L3                   443
+ * NV Link Bandwidth Counter for Lane 3 - Not supported in DCGM 2.0
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L3 443
 
 /*
-* NV Link Bandwidth Counter for Lane 4
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L4                   444
+ * NV Link Bandwidth Counter for Lane 4 - Not supported in DCGM 2.0
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L4 444
 
 /*
-* NV Link Bandwidth Counter for Lane 5
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L5                   445
+ * NV Link Bandwidth Counter for Lane 5 - Not supported in DCGM 2.0
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_L5 445
 
 /*
-* NV Link Bandwidth Counter total for all Lanes
-*/
-#define DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL                449
+ * NV Link Bandwidth Counter total for all Lanes
+ */
+#define DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL 449
 
 /*
-* GPU NVLink error information
-*/
-#define DCGM_FI_DEV_GPU_NVLINK_ERRORS                     450
+ * GPU NVLink error information
+ */
+#define DCGM_FI_DEV_GPU_NVLINK_ERRORS 450
+
+/*
+ * NV Link flow control CRC  Error Counter for Lane 6
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6 451
+
+/*
+ * NV Link flow control CRC  Error Counter for Lane 7
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7 452
+
+/*
+ * NV Link flow control CRC  Error Counter for Lane 8
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8 453
+
+/*
+ * NV Link flow control CRC  Error Counter for Lane 9
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L9 454
+
+/*
+ * NV Link flow control CRC  Error Counter for Lane 10
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10 455
+
+/*
+ * NV Link flow control CRC  Error Counter for Lane 11
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11 456
+
+/*
+ * NV Link data CRC Error Counter for Lane 6
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6 457
+
+/*
+ * NV Link data CRC Error Counter for Lane 7
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7 458
+
+/*
+ * NV Link data CRC Error Counter for Lane 8
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8 459
+
+/*
+ * NV Link data CRC Error Counter for Lane 9
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L9 460
+
+/*
+ * NV Link data CRC Error Counter for Lane 10
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10 461
+
+/*
+ * NV Link data CRC Error Counter for Lane 11
+ */
+#define DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11 462
+
+/*
+ * NV Link Replay Error Counter for Lane 6
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6 463
+
+/*
+ * NV Link Replay Error Counter for Lane 7
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7 464
+
+/*
+ * NV Link Replay Error Counter for Lane 8
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8 465
+
+/*
+ * NV Link Replay Error Counter for Lane 9
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L9 466
+
+/*
+ * NV Link Replay Error Counter for Lane 10
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10 467
+
+/*
+ * NV Link Replay Error Counter for Lane 11
+ */
+#define DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11 468
+
+/*
+ * NV Link Recovery Error Counter for Lane 6
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6 469
+
+/*
+ * NV Link Recovery Error Counter for Lane 7
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7 470
+
+/*
+ * NV Link Recovery Error Counter for Lane 8
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8 471
+
+/*
+ * NV Link Recovery Error Counter for Lane 9
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L9 472
+
+/*
+ * NV Link Recovery Error Counter for Lane 10
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L10 473
+
+/*
+ * NV Link Recovery Error Counter for Lane 11
+ */
+#define DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L11 474
 
 /**
- * Virtualization Mode corresponding to the GPU
+ * Virtualization Mode corresponding to the GPU.
+ *
+ * One of DCGM_GPU_VIRTUALIZATION_MODE_* constants.
  */
-#define DCGM_FI_DEV_VIRTUAL_MODE                          500
+#define DCGM_FI_DEV_VIRTUAL_MODE 500
 
 /**
  * Includes Count and Static info of vGPU types supported on a device
  */
-#define DCGM_FI_DEV_SUPPORTED_TYPE_INFO                   501
+#define DCGM_FI_DEV_SUPPORTED_TYPE_INFO 501
 
 /**
  * Includes Count and currently Creatable vGPU types on a device
  */
-#define DCGM_FI_DEV_CREATABLE_VGPU_TYPE_IDS               502
+#define DCGM_FI_DEV_CREATABLE_VGPU_TYPE_IDS 502
 
 /**
  * Includes Count and currently Active vGPU Instances on a device
  */
-#define DCGM_FI_DEV_VGPU_INSTANCE_IDS                     503
+#define DCGM_FI_DEV_VGPU_INSTANCE_IDS 503
 
 /**
  * Utilization values for vGPUs running on the device
  */
-#define DCGM_FI_DEV_VGPU_UTILIZATIONS                     504
+#define DCGM_FI_DEV_VGPU_UTILIZATIONS 504
 
 /**
  * Utilization values for processes running within vGPU VMs using the device
  */
-#define DCGM_FI_DEV_VGPU_PER_PROCESS_UTILIZATION          505
+#define DCGM_FI_DEV_VGPU_PER_PROCESS_UTILIZATION 505
 
 /**
  * Current encoder statistics for a given device
  */
-#define DCGM_FI_DEV_ENC_STATS                             506
+#define DCGM_FI_DEV_ENC_STATS 506
 
 /**
  * Statistics of current active frame buffer capture sessions on a given device
  */
-#define DCGM_FI_DEV_FBC_STATS                             507
+#define DCGM_FI_DEV_FBC_STATS 507
 
 /**
  * Information about active frame buffer capture sessions on a target device
  */
-#define DCGM_FI_DEV_FBC_SESSIONS_INFO                     508
+#define DCGM_FI_DEV_FBC_SESSIONS_INFO 508
 /**
  * VM ID of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_VM_ID                            520
+#define DCGM_FI_DEV_VGPU_VM_ID 520
 
 /**
  * VM name of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_VM_NAME                          521
+#define DCGM_FI_DEV_VGPU_VM_NAME 521
 
 /**
  * vGPU type of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_TYPE                             522
+#define DCGM_FI_DEV_VGPU_TYPE 522
 
 /**
  * UUID of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_UUID                             523
+#define DCGM_FI_DEV_VGPU_UUID 523
 
 /**
  * Driver version of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_DRIVER_VERSION                   524
+#define DCGM_FI_DEV_VGPU_DRIVER_VERSION 524
 
 /**
  * Memory usage of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_MEMORY_USAGE                     525
+#define DCGM_FI_DEV_VGPU_MEMORY_USAGE 525
 
 /**
  * License status of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_LICENSE_STATUS                   526
+#define DCGM_FI_DEV_VGPU_LICENSE_STATUS 526
 
 /**
  * Frame rate limit of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_FRAME_RATE_LIMIT                 527
+#define DCGM_FI_DEV_VGPU_FRAME_RATE_LIMIT 527
 
 /**
  * Current encoder statistics of the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_ENC_STATS                        528
+#define DCGM_FI_DEV_VGPU_ENC_STATS 528
 
 /**
  * Information about all active encoder sessions on the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_ENC_SESSIONS_INFO                529
+#define DCGM_FI_DEV_VGPU_ENC_SESSIONS_INFO 529
 
 /**
  * Statistics of current active frame buffer capture sessions on the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_FBC_STATS                        530
+#define DCGM_FI_DEV_VGPU_FBC_STATS 530
 
 /**
  * Information about active frame buffer capture sessions on the vGPU instance
  */
-#define DCGM_FI_DEV_VGPU_FBC_SESSIONS_INFO                531
+#define DCGM_FI_DEV_VGPU_FBC_SESSIONS_INFO 531
 
 /**
  * Starting field ID of the vGPU instance
  */
-#define DCGM_FI_FIRST_VGPU_FIELD_ID                       520
+#define DCGM_FI_FIRST_VGPU_FIELD_ID 520
 
 /**
  * Last field ID of the vGPU instance
  */
-#define DCGM_FI_LAST_VGPU_FIELD_ID                        570
+#define DCGM_FI_LAST_VGPU_FIELD_ID 570
 
 /**
  * For now max vGPU field Ids taken as difference of DCGM_FI_LAST_VGPU_FIELD_ID and DCGM_FI_LAST_VGPU_FIELD_ID i.e. 50
  */
-#define DCGM_FI_MAX_VGPU_FIELDS     DCGM_FI_LAST_VGPU_FIELD_ID - DCGM_FI_FIRST_VGPU_FIELD_ID
+#define DCGM_FI_MAX_VGPU_FIELDS DCGM_FI_LAST_VGPU_FIELD_ID - DCGM_FI_FIRST_VGPU_FIELD_ID
 
 /**
  * Starting ID for all the internal fields
  */
-#define DCGM_FI_INTERNAL_FIELDS_0_START                   600
+#define DCGM_FI_INTERNAL_FIELDS_0_START 600
 
 /**
  * Last ID for all the internal fields
  */
 
 /**
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch entity field IDs start here.</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 0</p>
-*/
-
-#define DCGM_FI_INTERNAL_FIELDS_0_END                     699
-
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P00               700
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P00               701
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P00              702
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 1</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P00               703
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P01               704
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P01               705
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P01              706
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 2</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P01               707
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P02               708
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P02               709
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P02              710
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 3</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P02               711
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P03               712
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P03               713
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P03              714
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 4</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P03               715
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P04               716
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P04               717
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P04              718
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 5</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P04               719
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P05               720
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P05               721
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P05              722
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 6</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P05               723
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P06               724
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P06               725
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P06              726
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 7</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P06               727
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P07               728
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P07               729
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P07              730
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 8</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P07               731
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P08               732
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P08               733
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P08              734
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 9</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P08               735
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P09               736
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P09               737
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P09              738
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 10</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P09               739
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P10               740
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P10               741
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P10              742
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 11</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P10               743
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P11               744
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P11               745
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P11              746
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 12</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P11               747
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P12               748
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P12               749
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P12              750
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 13</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P12               751
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P13               752
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P13               753
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P13              754
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 14</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P13               755
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P14               756
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P14               757
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P14              758
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 15</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P14               759
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P15               760
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P15               761
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P15              762
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 16</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P15               763
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P16               764
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P16               765
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P16              766
-/** 
-* Max latency bin
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch latency bins for port 17</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P16               767
-
-/**
-* <p>Low latency bin</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P17               768
-/** 
-* Medium latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P17               769
-/** 
-* High latency bin
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P17              770
-/** 
-* <p>Max latency bin</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch Tx and Rx Counter 0 for each port</p>
-* <p>By default, Counter 0 counts bytes.</p> 
-*/
-#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P17               771
-
-/**
-* <p>NVSwitch Tx Bandwidth Counter 0 for port 0</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P00            780
-/**
-* NVSwitch Rx Bandwidth Counter 0 for port 0
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P00            781
-
-/**
-* NVSwitch Tx Bandwidth Counter 0 for port 1
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P01            782
-/**
-* NVSwitch Rx Bandwidth Counter 0 for port 1
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P01            783
-
-/**
-* NVSwitch Tx Bandwidth Counter 0 for port 2
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P02            784
-/**
-* NVSwitch Rx Bandwidth Counter 0 for port 2
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P02            785
-
-/**
-* NVSwitch Tx Bandwidth Counter 0 for port 3
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P03            786
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch entity field IDs start here.</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 0</p>
+ */
+
+#define DCGM_FI_INTERNAL_FIELDS_0_END 699
+
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P00 700
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P00 701
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P00 702
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 3
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P03            787
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 1</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P00 703
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 4
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P04            788
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P01 704
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P01 705
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P01 706
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 4
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P04            789
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 2</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P01 707
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 5
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P05            790
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P02 708
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 5
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P05            791
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P02 709
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P02 710
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 3</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P02 711
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 6
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P06            792
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P03 712
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 6
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P06            793
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P03 713
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P03 714
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 4</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P03 715
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P04 716
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P04 717
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P04 718
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 5</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P04 719
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P05 720
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P05 721
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P05 722
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 6</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P05 723
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P06 724
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P06 725
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P06 726
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 7</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P06 727
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P07 728
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P07 729
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P07 730
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 8</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P07 731
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P08 732
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P08 733
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P08 734
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 9</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P08 735
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P09 736
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P09 737
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P09 738
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 10</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P09 739
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P10 740
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P10 741
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P10 742
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 11</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P10 743
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P11 744
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P11 745
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P11 746
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 12</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P11 747
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P12 748
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P12 749
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P12 750
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 13</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P12 751
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P13 752
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P13 753
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P13 754
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 14</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P13 755
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P14 756
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P14 757
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P14 758
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 15</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P14 759
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P15 760
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P15 761
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P15 762
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 16</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P15 763
+
+/**
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P16 764
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P16 765
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P16 766
+/**
+ * Max latency bin
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch latency bins for port 17</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P16 767
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 7
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P07            794
+ * <p>Low latency bin</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P17 768
+/**
+ * Medium latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P17 769
+/**
+ * High latency bin
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P17 770
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 7
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P07            795
+ * <p>Max latency bin</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch Tx and Rx Counter 0 for each port</p>
+ * <p>By default, Counter 0 counts bytes.</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P17 771
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 8
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P08            796
+ * <p>NVSwitch Tx Bandwidth Counter 0 for port 0</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P00 780
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 8
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P08            797
+ * NVSwitch Rx Bandwidth Counter 0 for port 0
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P00 781
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 9
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P09            798
+ * NVSwitch Tx Bandwidth Counter 0 for port 1
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P01 782
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 9
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P09            799
+ * NVSwitch Rx Bandwidth Counter 0 for port 1
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P01 783
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 10
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P10            800
+ * NVSwitch Tx Bandwidth Counter 0 for port 2
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P02 784
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 10
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P10            801
+ * NVSwitch Rx Bandwidth Counter 0 for port 2
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P02 785
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 11
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P11            802
+ * NVSwitch Tx Bandwidth Counter 0 for port 3
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P03 786
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 11
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P11            803
- 
+ * NVSwitch Rx Bandwidth Counter 0 for port 3
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P03 787
+
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 12
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P12            804
+ * NVSwitch Tx Bandwidth Counter 0 for port 4
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P04 788
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 12
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P12            805
+ * NVSwitch Rx Bandwidth Counter 0 for port 4
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P04 789
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 13
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P13            806
+ * NVSwitch Tx Bandwidth Counter 0 for port 5
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P05 790
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 13
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P13            807
+ * NVSwitch Rx Bandwidth Counter 0 for port 5
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P05 791
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 14
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P14            808
+ * NVSwitch Tx Bandwidth Counter 0 for port 6
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P06 792
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 14
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P14            809
+ * NVSwitch Rx Bandwidth Counter 0 for port 6
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P06 793
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 15
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P15            810
+ * NVSwitch Tx Bandwidth Counter 0 for port 7
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P07 794
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 15
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P15            811
+ * NVSwitch Rx Bandwidth Counter 0 for port 7
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P07 795
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 16
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P16            812
+ * NVSwitch Tx Bandwidth Counter 0 for port 8
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P08 796
 /**
-* NVSwitch Rx Bandwidth Counter 0 for port 16
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P16            813
+ * NVSwitch Rx Bandwidth Counter 0 for port 8
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P08 797
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 17
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P17            814
+ * NVSwitch Tx Bandwidth Counter 0 for port 9
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P09 798
 /**
-* <p>NVSwitch Rx Bandwidth Counter 0 for port 17</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>NVSwitch Tx and RX Bandwidth Counter 1 for each port</p>
-* <p>By default, Counter 1 counts packets.</p> 
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P17            815
+ * NVSwitch Rx Bandwidth Counter 0 for port 9
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P09 799
 
 /**
-* <p>NVSwitch Tx Bandwidth Counter 1 for port 0</p>
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P00            820
+ * NVSwitch Tx Bandwidth Counter 0 for port 10
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P10 800
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 0
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P00            821
+ * NVSwitch Rx Bandwidth Counter 0 for port 10
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P10 801
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 1
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P01            822
+ * NVSwitch Tx Bandwidth Counter 0 for port 11
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P11 802
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 1
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P01            823
+ * NVSwitch Rx Bandwidth Counter 0 for port 11
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P11 803
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 2
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P02            824
+ * NVSwitch Tx Bandwidth Counter 0 for port 12
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P12 804
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 2
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P02            825
+ * NVSwitch Rx Bandwidth Counter 0 for port 12
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P12 805
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 3
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P03            826
+ * NVSwitch Tx Bandwidth Counter 0 for port 13
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P13 806
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 3
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P03            827
+ * NVSwitch Rx Bandwidth Counter 0 for port 13
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P13 807
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 4
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P04            828
+ * NVSwitch Tx Bandwidth Counter 0 for port 14
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P14 808
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 4
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P04            829
+ * NVSwitch Rx Bandwidth Counter 0 for port 14
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P14 809
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 5
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P05            830
+ * NVSwitch Tx Bandwidth Counter 0 for port 15
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P15 810
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 5
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P05            831
+ * NVSwitch Rx Bandwidth Counter 0 for port 15
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P15 811
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 6
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P06            832
+ * NVSwitch Tx Bandwidth Counter 0 for port 16
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P16 812
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 6
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P06            833
+ * NVSwitch Rx Bandwidth Counter 0 for port 16
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P16 813
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 7
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P07            834
+ * NVSwitch Tx Bandwidth Counter 0 for port 17
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_0_P17 814
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 7
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P07            835
+ * <p>NVSwitch Rx Bandwidth Counter 0 for port 17</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>NVSwitch Tx and RX Bandwidth Counter 1 for each port</p>
+ * <p>By default, Counter 1 counts packets.</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_0_P17 815
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 8
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P08            836
+ * <p>NVSwitch Tx Bandwidth Counter 1 for port 0</p>
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P00 820
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 8
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P08            837
+ * NVSwitch Rx Bandwidth Counter 1 for port 0
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P00 821
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 9
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P09            838
+ * NVSwitch Tx Bandwidth Counter 1 for port 1
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P01 822
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 9
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P09            839
+ * NVSwitch Rx Bandwidth Counter 1 for port 1
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P01 823
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 10
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P10            840
+ * NVSwitch Tx Bandwidth Counter 1 for port 2
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P02 824
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 10
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P10            841
+ * NVSwitch Rx Bandwidth Counter 1 for port 2
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P02 825
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 11
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P11            842
+ * NVSwitch Tx Bandwidth Counter 1 for port 3
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P03 826
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 11
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P11            843
+ * NVSwitch Rx Bandwidth Counter 1 for port 3
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P03 827
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 12
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P12            844
+ * NVSwitch Tx Bandwidth Counter 1 for port 4
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P04 828
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 12
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P12            845
+ * NVSwitch Rx Bandwidth Counter 1 for port 4
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P04 829
 
 /**
-* NVSwitch Tx Bandwidth Counter 0 for port 13
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P13            846
+ * NVSwitch Tx Bandwidth Counter 1 for port 5
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P05 830
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 13
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P13            847
+ * NVSwitch Rx Bandwidth Counter 1 for port 5
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P05 831
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 14
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P14            848
+ * NVSwitch Tx Bandwidth Counter 1 for port 6
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P06 832
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 14
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P14            849
+ * NVSwitch Rx Bandwidth Counter 1 for port 6
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P06 833
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 15
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P15            850
+ * NVSwitch Tx Bandwidth Counter 1 for port 7
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P07 834
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 15
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P15            851
+ * NVSwitch Rx Bandwidth Counter 1 for port 7
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P07 835
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 16
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P16            852
+ * NVSwitch Tx Bandwidth Counter 1 for port 8
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P08 836
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 16
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P16            853
+ * NVSwitch Rx Bandwidth Counter 1 for port 8
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P08 837
 
 /**
-* NVSwitch Tx Bandwidth Counter 1 for port 17
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P17            854
+ * NVSwitch Tx Bandwidth Counter 1 for port 9
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P09 838
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 9
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P09 839
+
+/**
+ * NVSwitch Tx Bandwidth Counter 0 for port 10
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P10 840
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 10
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P10 841
+
+/**
+ * NVSwitch Tx Bandwidth Counter 1 for port 11
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P11 842
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 11
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P11 843
+
+/**
+ * NVSwitch Tx Bandwidth Counter 1 for port 12
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P12 844
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 12
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P12 845
+
+/**
+ * NVSwitch Tx Bandwidth Counter 0 for port 13
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P13 846
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 13
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P13 847
+
+/**
+ * NVSwitch Tx Bandwidth Counter 1 for port 14
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P14 848
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 14
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P14 849
+
+/**
+ * NVSwitch Tx Bandwidth Counter 1 for port 15
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P15 850
 /**
-* NVSwitch Rx Bandwidth Counter 1 for port 17
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* <p>&nbsp;</p>
-* NVSwitch error counters
-*/
-#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P17            855
+ * NVSwitch Rx Bandwidth Counter 1 for port 15
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P15 851
 
 /**
-* NVSwitch fatal error information.
-* Note: value field indicates the specific SXid reported
-*/
-#define DCGM_FI_DEV_NVSWITCH_FATAL_ERRORS                  856
+ * NVSwitch Tx Bandwidth Counter 1 for port 16
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P16 852
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 16
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P16 853
 
 /**
-* NVSwitch non fatal error information.
-* Note: value field indicates the specific SXid reported
-*/
-#define DCGM_FI_DEV_NVSWITCH_NON_FATAL_ERRORS              857
+ * NVSwitch Tx Bandwidth Counter 1 for port 17
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_TX_1_P17 854
+/**
+ * NVSwitch Rx Bandwidth Counter 1 for port 17
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * <p>&nbsp;</p>
+ * NVSwitch error counters
+ */
+#define DCGM_FI_DEV_NVSWITCH_BANDWIDTH_RX_1_P17 855
+
+/**
+ * NVSwitch fatal error information.
+ * Note: value field indicates the specific SXid reported
+ */
+#define DCGM_FI_DEV_NVSWITCH_FATAL_ERRORS 856
+
+/**
+ * NVSwitch non fatal error information.
+ * Note: value field indicates the specific SXid reported
+ */
+#define DCGM_FI_DEV_NVSWITCH_NON_FATAL_ERRORS 857
 
 /**
  * Starting field ID of the NVSwitch instance
  */
-#define DCGM_FI_FIRST_NVSWITCH_FIELD_ID                    700
+#define DCGM_FI_FIRST_NVSWITCH_FIELD_ID 700
 
 /**
  * Last field ID of the NVSwitch instance
  */
-#define DCGM_FI_LAST_NVSWITCH_FIELD_ID                     860
+#define DCGM_FI_LAST_NVSWITCH_FIELD_ID 860
 
 /**
- * For now max NVSwitch field Ids taken as difference of DCGM_FI_LAST_NVSWITCH_FIELD_ID and DCGM_FI_FIRST_NVSWITCH_FIELD_ID + 1 i.e. 200
+ * For now max NVSwitch field Ids taken as difference of DCGM_FI_LAST_NVSWITCH_FIELD_ID and
+ * DCGM_FI_FIRST_NVSWITCH_FIELD_ID + 1 i.e. 200
  */
-#define DCGM_FI_MAX_NVSWITCH_FIELDS     DCGM_FI_LAST_NVSWITCH_FIELD_ID - DCGM_FI_FIRST_NVSWITCH_FIELD_ID + 1
+#define DCGM_FI_MAX_NVSWITCH_FIELDS DCGM_FI_LAST_NVSWITCH_FIELD_ID - DCGM_FI_FIRST_NVSWITCH_FIELD_ID + 1
 
 /**
  * Profiling Fields. These all start with DCGM_FI_PROF_*
  */
 
 /**
- * Ratio of time the graphics engine is active. The graphics engine is 
- * active if a graphics/compute context is bound and the graphics pipe or 
+ * Ratio of time the graphics engine is active. The graphics engine is
+ * active if a graphics/compute context is bound and the graphics pipe or
  * compute pipe is busy.
  */
-#define DCGM_FI_PROF_GR_ENGINE_ACTIVE                      1001
+#define DCGM_FI_PROF_GR_ENGINE_ACTIVE 1001
 
 /**
- * The ratio of cycles an SM has at least 1 warp assigned 
- * (computed from the number of cycles and elapsed cycles) 
+ * The ratio of cycles an SM has at least 1 warp assigned
+ * (computed from the number of cycles and elapsed cycles)
  */
-#define DCGM_FI_PROF_SM_ACTIVE                             1002
+#define DCGM_FI_PROF_SM_ACTIVE 1002
 
 /**
- * The ratio of number of warps resident on an SM. 
- * (number of resident as a ratio of the theoretical 
+ * The ratio of number of warps resident on an SM.
+ * (number of resident as a ratio of the theoretical
  * maximum number of warps per elapsed cycle)
  */
-#define DCGM_FI_PROF_SM_OCCUPANCY                          1003
+#define DCGM_FI_PROF_SM_OCCUPANCY 1003
 
 /**
- * The ratio of cycles the tensor (HMMA) pipe is active 
+ * The ratio of cycles the tensor (HMMA) pipe is active
  * (off the peak sustained elapsed cycles)
  */
-#define DCGM_FI_PROF_PIPE_TENSOR_ACTIVE                    1004
+#define DCGM_FI_PROF_PIPE_TENSOR_ACTIVE 1004
 
 /**
- * The ratio of cycles the device memory interface is 
+ * The ratio of cycles the device memory interface is
  * active sending or receiving data.
  */
-#define DCGM_FI_PROF_DRAM_ACTIVE                           1005
+#define DCGM_FI_PROF_DRAM_ACTIVE 1005
 
 /**
  * Ratio of cycles the fp64 pipe is active.
  */
-#define DCGM_FI_PROF_PIPE_FP64_ACTIVE                      1006
+#define DCGM_FI_PROF_PIPE_FP64_ACTIVE 1006
 
 /**
  * Ratio of cycles the fp32 pipe is active.
  */
-#define DCGM_FI_PROF_PIPE_FP32_ACTIVE                      1007
+#define DCGM_FI_PROF_PIPE_FP32_ACTIVE 1007
 
 /**
  * Ratio of cycles the fp16 pipe is active. This does not include HMMA.
  */
-#define DCGM_FI_PROF_PIPE_FP16_ACTIVE                      1008
+#define DCGM_FI_PROF_PIPE_FP16_ACTIVE 1008
 
 /**
  * The number of bytes of active PCIe tx (transmit) data including both header and payload.
- * 
+ *
  * Note that this is from the perspective of the GPU, so copying data from device to host (DtoH)
  * would be reflected in this metric.
  */
-#define DCGM_FI_PROF_PCIE_TX_BYTES                         1009
+#define DCGM_FI_PROF_PCIE_TX_BYTES 1009
 
 /**
  * The number of bytes of active PCIe rx (read) data including both header and payload.
- * 
+ *
  * Note that this is from the perspective of the GPU, so copying data from host to device (HtoD)
  * would be reflected in this metric.
  */
-#define DCGM_FI_PROF_PCIE_RX_BYTES                         1010
+#define DCGM_FI_PROF_PCIE_RX_BYTES 1010
 
 /**
  * The number of bytes of active NvLink tx (transmit) data including both header and payload.
  */
-#define DCGM_FI_PROF_NVLINK_TX_BYTES                       1011
+#define DCGM_FI_PROF_NVLINK_TX_BYTES 1011
 
 /**
  * The number of bytes of active NvLink rx (read) data including both header and payload.
  */
-#define DCGM_FI_PROF_NVLINK_RX_BYTES                       1012
+#define DCGM_FI_PROF_NVLINK_RX_BYTES 1012
 
 /**
  * 1 greater than maximum fields above. This is the 1 greater than the maximum field id that could be allocated
  */
-#define DCGM_FI_MAX_FIELDS                1013
+#define DCGM_FI_MAX_FIELDS 1013
 
 
 /** @} */
@@ -1903,11 +2099,11 @@ typedef unsigned int dcgm_field_eid_t;
  */
 typedef struct
 {
-    char    shortName[10];  /* Short name corresponding to field. This short name 
-                               is used to identify columns in dmon output.*/
-    char    unit[4];        /* The unit of value. Eg: C(elsius), W(att), MB/s*/
-    short   width;          /* Maximum width/number of digits that a value for field can have.*/
-} dcgm_field_output_format_t,*dcgm_field_output_format_p;
+    char shortName[10]; /*!< Short name corresponding to field. This short name is used to identify columns in dmon
+                             output.*/
+    char unit[4];       /*!< The unit of value. Eg: C(elsius), W(att), MB/s*/
+    short width;        /*!< Maximum width/number of digits that a value for field can have.*/
+} dcgm_field_output_format_t, *dcgm_field_output_format_p;
 
 /**
  * Structure to store meta data for the field
@@ -1915,15 +2111,18 @@ typedef struct
 
 typedef struct
 {
-    unsigned short fieldId;     /* Field identifier. DCGM_FI_? #define */
-    char           fieldType;   /* Field type. DCGM_FT_? #define */
-    unsigned char  size;        /* field size in bytes (raw value size). 0=variable (like DCGM_FT_STRING) */
-    char           tag[48];     /* Tag for this field for serialization like 'device_temperature' */
-    int            scope;       /* Field scope. DCGM_FS_? #define of this field's association */
-    int            nvmlFieldId; /* Optional NVML field this DCGM field maps to. 0 = no mapping. Otherwise,
-                                   this should be a NVML_FI_? #define from nvml.h */
-    
-    dcgm_field_output_format_p valueFormat; /* pointer to the structure that holds the formatting the values for fields */
+    unsigned short fieldId; /*!< Field identifier. DCGM_FI_? #define */
+    char fieldType;         /*!< Field type. DCGM_FT_? #define */
+    unsigned char size;     /*!< field size in bytes (raw value size). 0=variable (like DCGM_FT_STRING) */
+    char tag[48];           /*!< Tag for this field for serialization like 'device_temperature' */
+    int scope;              /*!< Field scope. DCGM_FS_? #define of this field's association */
+    int nvmlFieldId;        /*!< Optional NVML field this DCGM field maps to. 0 = no mapping.
+                                 Otherwise, this should be a NVML_FI_? #define from nvml.h */
+    dcgm_field_entity_group_t
+        entityLevel; /*!< Field entity level. DCGM_FE_? specifying at what level the field is queryable */
+
+    dcgm_field_output_format_p valueFormat; /*!< pointer to the structure that holds the formatting the
+                                                 values for fields */
 } dcgm_field_meta_t, *dcgm_field_meta_p;
 
 /***************************************************************************************************/
@@ -1934,48 +2133,60 @@ typedef struct
 
 /**
  * Get a pointer to the metadata for a field by its field ID. See DCGM_FI_? for a list of field IDs.
- * @param fieldId     IN:   One of the field IDs (DCGM_FI_?)
+ *
+ * @param fieldId     IN: One of the field IDs (DCGM_FI_?)
+ *
  * @return
- *      0       On Failure
- *      > 0     Pointer to field metadata structure if found.
+ *        0     On Failure
+ *       >0     Pointer to field metadata structure if found.
+ *
  */
 dcgm_field_meta_p DcgmFieldGetById(unsigned short fieldId);
 
 /**
  * Get a pointer to the metadata for a field by its field tag.
+ *
  * @param tag       IN: Tag for the field of interest
+ *
  * @return
- *  0             On failure or not found
- *  > 0           Pointer to field metadata structure if found
+ *        0     On failure or not found
+ *       >0     Pointer to field metadata structure if found
+ *
  */
 dcgm_field_meta_p DcgmFieldGetByTag(char *tag);
 
 /**
  * Initialize the DcgmFields module. Call this once from inside
  * your program
- * @return 
- *  0                On success
- *  <0               On error
+ *
+ * @return
+ *        0     On success
+ *       <0     On error
+ *
  */
 int DcgmFieldsInit(void);
 
 /**
  * Terminates the DcgmFields module. Call this once from inside your program
- * @return 
- *  0            On success
- *  <0           On error
+ *
+ * @return
+ *        0     On success
+ *       <0     On error
+ *
  */
 int DcgmFieldsTerm(void);
 
 /**
  * Get the string version of a entityGroupId
  *
- * Returns         Pointer to a string like GPU/NvSwitch..etc
- *                 Null on error
+ * @returns
+ *         - Pointer to a string like GPU/NvSwitch..etc
+ *         - Null on error
+ *
  */
 char *DcgmFieldsGetEntityGroupString(dcgm_field_entity_group_t entityGroupId);
 
-/** @} */  
+/** @} */
 
 
 #ifdef __cplusplus
@@ -1983,4 +2194,4 @@ char *DcgmFieldsGetEntityGroupString(dcgm_field_entity_group_t entityGroupId);
 #endif
 
 
-#endif //DCGMFIELDS_H
+#endif // DCGMFIELDS_H
diff --git a/bindings/go/dcgm/dcgm_structs.h b/bindings/go/dcgm/dcgm_structs.h
index a882ce1..501de36 100644
--- a/bindings/go/dcgm/dcgm_structs.h
+++ b/bindings/go/dcgm/dcgm_structs.h
@@ -16,40 +16,41 @@
 #ifndef DCGM_STRUCTS_H
 #define DCGM_STRUCTS_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-    
-#include "dcgm_fields.h"  
+#include "dcgm_fields.h"
 #include <stdint.h>
 
+
 /***************************************************************************************************/
-/** @defgroup nvmlReturnEnums Enums and Macros
+/** @defgroup dcgmReturnEnums Enums and Macros
  *  @{
  */
-/***************************************************************************************************/    
+/***************************************************************************************************/
+
+/**
+ * Creates a unique version number for each struct
+ */
+#define MAKE_DCGM_VERSION(typeName, ver) (unsigned int)(sizeof(typeName) | ((unsigned long)(ver) << 24U))
 
 /**
  * Represents value of the field which can be returned by Host Engine in case the
  * operation is not successful
- *
  */
 #ifndef DCGM_BLANK_VALUES
 #define DCGM_BLANK_VALUES
-    
+
 /**
- * Base value for 32 bits integer blank. can be used as an unspecified blank 
+ * Base value for 32 bits integer blank. can be used as an unspecified blank
  */
 #define DCGM_INT32_BLANK 0x7ffffff0
-    
+
 /**
- * Base value for 64 bits integer blank. can be used as an unspecified blank 
+ * Base value for 64 bits integer blank. can be used as an unspecified blank
  */
 #define DCGM_INT64_BLANK 0x7ffffffffffffff0
 
 /**
  * Base value for double blank. 2 ** 47. FP 64 has 52 bits of mantissa,
- * so 47 bits can still increment by 1 and represent each value from 0-15 
+ * so 47 bits can still increment by 1 and represent each value from 0-15
  */
 #define DCGM_FP64_BLANK 140737488355328.0
 
@@ -58,201 +59,192 @@ extern "C" {
  */
 #define DCGM_STR_BLANK "<<<NULL>>>"
 
-/** 
- * Represents an error where INT32 data was not found 
- */
-#define DCGM_INT32_NOT_FOUND          (DCGM_INT32_BLANK+1)
-    
-/** 
- * Represents an error where INT64 data was not found 
- */
-#define DCGM_INT64_NOT_FOUND          (DCGM_INT64_BLANK+1)
-    
-/** 
- * Represents an error where FP64 data was not found 
- */    
-#define DCGM_FP64_NOT_FOUND           (DCGM_FP64_BLANK+1.0)
-    
-/** 
- * Represents an error where STR data was not found 
- */        
-#define DCGM_STR_NOT_FOUND            "<<<NOT_FOUND>>>"
-
-/** 
- * Represents an error where fetching the INT32 value is not supported 
- */
-#define DCGM_INT32_NOT_SUPPORTED           (DCGM_INT32_BLANK+2)
-    
-/** 
- * Represents an error where fetching the INT64 value is not supported 
- */    
-#define DCGM_INT64_NOT_SUPPORTED           (DCGM_INT64_BLANK+2)
-    
-/** 
- * Represents an error where fetching the FP64 value is not supported 
- */        
-#define DCGM_FP64_NOT_SUPPORTED            (DCGM_FP64_BLANK+2.0)
-    
-/** 
- * Represents an error where fetching the STR value is not supported 
- */            
-#define DCGM_STR_NOT_SUPPORTED             "<<<NOT_SUPPORTED>>>"
-
-/**
- *  Represents and error where fetching the INT32 value is not allowed with our current credentials 
- */
-#define DCGM_INT32_NOT_PERMISSIONED        (DCGM_INT32_BLANK+3)
-    
-/**
- *  Represents and error where fetching the INT64 value is not allowed with our current credentials 
- */    
-#define DCGM_INT64_NOT_PERMISSIONED        (DCGM_INT64_BLANK+3)
-    
-/**
- *  Represents and error where fetching the FP64 value is not allowed with our current credentials 
- */        
-#define DCGM_FP64_NOT_PERMISSIONED         (DCGM_FP64_BLANK+3.0)
-    
-/**
- *  Represents and error where fetching the STR value is not allowed with our current credentials 
- */            
-#define DCGM_STR_NOT_PERMISSIONED          "<<<NOT_PERM>>>"
-
-/** 
- * Macro to check if a INT32 value is blank or not 
+/**
+ * Represents an error where INT32 data was not found
+ */
+#define DCGM_INT32_NOT_FOUND (DCGM_INT32_BLANK + 1)
+
+/**
+ * Represents an error where INT64 data was not found
+ */
+#define DCGM_INT64_NOT_FOUND (DCGM_INT64_BLANK + 1)
+
+/**
+ * Represents an error where FP64 data was not found
+ */
+#define DCGM_FP64_NOT_FOUND (DCGM_FP64_BLANK + 1.0)
+
+/**
+ * Represents an error where STR data was not found
+ */
+#define DCGM_STR_NOT_FOUND "<<<NOT_FOUND>>>"
+
+/**
+ * Represents an error where fetching the INT32 value is not supported
+ */
+#define DCGM_INT32_NOT_SUPPORTED (DCGM_INT32_BLANK + 2)
+
+/**
+ * Represents an error where fetching the INT64 value is not supported
+ */
+#define DCGM_INT64_NOT_SUPPORTED (DCGM_INT64_BLANK + 2)
+
+/**
+ * Represents an error where fetching the FP64 value is not supported
+ */
+#define DCGM_FP64_NOT_SUPPORTED (DCGM_FP64_BLANK + 2.0)
+
+/**
+ * Represents an error where fetching the STR value is not supported
+ */
+#define DCGM_STR_NOT_SUPPORTED "<<<NOT_SUPPORTED>>>"
+
+/**
+ *  Represents and error where fetching the INT32 value is not allowed with our current credentials
+ */
+#define DCGM_INT32_NOT_PERMISSIONED (DCGM_INT32_BLANK + 3)
+
+/**
+ *  Represents and error where fetching the INT64 value is not allowed with our current credentials
+ */
+#define DCGM_INT64_NOT_PERMISSIONED (DCGM_INT64_BLANK + 3)
+
+/**
+ *  Represents and error where fetching the FP64 value is not allowed with our current credentials
+ */
+#define DCGM_FP64_NOT_PERMISSIONED (DCGM_FP64_BLANK + 3.0)
+
+/**
+ *  Represents and error where fetching the STR value is not allowed with our current credentials
+ */
+#define DCGM_STR_NOT_PERMISSIONED "<<<NOT_PERM>>>"
+
+/**
+ * Macro to check if a INT32 value is blank or not
  */
 #define DCGM_INT32_IS_BLANK(val) (((val) >= DCGM_INT32_BLANK) ? 1 : 0)
-    
-/** 
- * Macro to check if a INT64 value is blank or not 
- */    
+
+/**
+ * Macro to check if a INT64 value is blank or not
+ */
 #define DCGM_INT64_IS_BLANK(val) (((val) >= DCGM_INT64_BLANK) ? 1 : 0)
-    
-/** 
- * Macro to check if a FP64 value is blank or not 
- */        
+
+/**
+ * Macro to check if a FP64 value is blank or not
+ */
 #define DCGM_FP64_IS_BLANK(val) (((val) >= DCGM_FP64_BLANK ? 1 : 0))
-    
-/** 
- * Macro to check if a STR value is blank or not 
+
+/**
+ * Macro to check if a STR value is blank or not
  * Works on (char *). Looks for <<< at first position and >>> inside string
- */        
+ */
 #define DCGM_STR_IS_BLANK(val) (val == strstr(val, "<<<") && strstr(val, ">>>"))
 
-#endif //DCGM_BLANK_VALUES
+#endif // DCGM_BLANK_VALUES
 
 /**
  * Max number of GPUs supported by DCGM
- */    
-#define DCGM_MAX_NUM_DEVICES   16
+ */
+#define DCGM_MAX_NUM_DEVICES 32 /* DCGM 2.0 and newer = 32. DCGM 1.8 and older = 16. */
 
 /**
  * Number of NvLink links per GPU supported by DCGM
- * This is 6 for Volta and 4 for Pascal
+ * This is 12 for Ampere, 6 for Volta, and 4 for Pascal
+ */
+#define DCGM_NVLINK_MAX_LINKS_PER_GPU 12
+
+/**
+ * Maximum NvLink links pre-Ampere
  */
-#define DCGM_NVLINK_MAX_LINKS_PER_GPU 6
+#define DCGM_NVLINK_MAX_LINKS_PER_GPU_LEGACY1 6
 
 /**
- * Max number of NvSwitches supported by DCGM 
+ * Max number of NvSwitches supported by DCGM
  **/
 #define DCGM_MAX_NUM_SWITCHES 12
 
 /**
  * Number of NvLink links per NvSwitch supported by DCGM
  */
-#define DCGM_NVLINK_MAX_LINKS_PER_NVSWITCH 18
+#define DCGM_NVLINK_MAX_LINKS_PER_NVSWITCH 36
 
 /**
  * Maximum number of vGPU instances per physical GPU
  */
 #define DCGM_MAX_VGPU_INSTANCES_PER_PGPU 32
 
-/**
- * Max number of vGPUs supported on DCGM
- */
-#define DCGM_MAX_NUM_VGPU_DEVICES   DCGM_MAX_NUM_DEVICES * DCGM_MAX_VGPU_INSTANCES_PER_PGPU
-
 /**
  * Max length of the DCGM string field
  */
-#define DCGM_MAX_STR_LENGTH     256
+#define DCGM_MAX_STR_LENGTH 256
 
 /**
  * Max number of clocks supported for a device
  */
-#define DCGM_MAX_CLOCKS         256
+#define DCGM_MAX_CLOCKS 256
 
 /**
  * Max limit on the number of groups supported by DCGM
  */
-#define DCGM_MAX_NUM_GROUPS      64
+#define DCGM_MAX_NUM_GROUPS 64
 
 /**
  * Max number of active FBC sessions
  */
-#define DCGM_MAX_FBC_SESSIONS   256
-    
+#define DCGM_MAX_FBC_SESSIONS 256
 
 /**
- * Represents the size of a buffer that holds a vGPU type Name or vGPU class type or name of process running on vGPU instance.
+ * Represents the size of a buffer that holds a vGPU type Name or vGPU class type or name of process running on vGPU
+ * instance.
  */
-#define DCGM_VGPU_NAME_BUFFER_SIZE     64
+#define DCGM_VGPU_NAME_BUFFER_SIZE 64
 
 /**
  * Represents the size of a buffer that holds a vGPU license string
  */
-#define DCGM_GRID_LICENSE_BUFFER_SIZE  128
+#define DCGM_GRID_LICENSE_BUFFER_SIZE 128
 
 /**
  * Default compute mode -- multiple contexts per device
  */
-#define DCGM_CONFIG_COMPUTEMODE_DEFAULT            0
-    
+#define DCGM_CONFIG_COMPUTEMODE_DEFAULT 0
+
 /**
  * Compute-prohibited mode -- no contexts per device
  */
-#define DCGM_CONFIG_COMPUTEMODE_PROHIBITED         1
-    
+#define DCGM_CONFIG_COMPUTEMODE_PROHIBITED 1
+
 /**
- * Compute-exclusive-process mode -- only one context per device, usable from multiple threads at 
- * a time
+ * Compute-exclusive-process mode -- only one context per device, usable from multiple threads at a time
  */
-#define DCGM_CONFIG_COMPUTEMODE_EXCLUSIVE_PROCESS  2
-    
+#define DCGM_CONFIG_COMPUTEMODE_EXCLUSIVE_PROCESS 2
 
 /**
  * Default Port Number for DCGM Host Engine
  */
 #define DCGM_HE_PORT_NUMBER 5555
 
-
-/**
- * Creates a unique version number for each struct
- */
-#define MAKE_DCGM_VERSION(typeName,ver) (unsigned int)(sizeof(typeName) | ((ver)<<24))
-
-/***************************************************************************************************/
-
-
-
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 /**
  * Operation mode for DCGM
- * 
- * DCGM can run in auto-mode where it runs additional threads in the background to collect 
+ *
+ * DCGM can run in auto-mode where it runs additional threads in the background to collect
  * any metrics of interest and auto manages any operations needed for policy management.
- * 
+ *
  * DCGM can also operate in manual-mode where it's execution is controlled by the user. In
  * this mode, the user has to periodically call APIs such as \ref dcgmPolicyTrigger and
  * \ref dcgmUpdateAllFields which tells DCGM to wake up and perform data collection and
  * operations needed for policy management.
  */
-typedef enum dcgmOperationMode_enum 
+typedef enum dcgmOperationMode_enum
 {
     DCGM_OPERATION_MODE_AUTO   = 1,
     DCGM_OPERATION_MODE_MANUAL = 2
 } dcgmOperationMode_t;
-    
+
 /**
  * When more than one value is returned from a query, which order should it be returned in?
  */
@@ -262,12 +254,12 @@ typedef enum dcgmOrder_enum
     DCGM_ORDER_DESCENDING = 2  //!< Data with latest (highest) timestamps returned first
 } dcgmOrder_t;
 
-/** 
- * Return values for DCGM API calls. 
+/**
+ * Return values for DCGM API calls.
  */
 typedef enum dcgmReturn_enum
 {
-    DCGM_ST_OK                   =  0,  //!< Success
+    DCGM_ST_OK                   = 0,   //!< Success
     DCGM_ST_BADPARAM             = -1,  //!< A bad parameter was passed to a function
     DCGM_ST_GENERIC_ERROR        = -3,  //!< A generic, unspecified error
     DCGM_ST_MEMORY               = -4,  //!< An out of memory error occurred
@@ -287,166 +279,105 @@ typedef enum dcgmReturn_enum
     DCGM_ST_GPU_IS_LOST          = -18, //!< GPU is no longer reachable
     DCGM_ST_RESET_REQUIRED       = -19, //!< GPU requires a reset
     DCGM_ST_FUNCTION_NOT_FOUND   = -20, //!< The function that was requested was not found (bindings only error)
-    DCGM_ST_CONNECTION_NOT_VALID = -21, //!< The connection to the host engine is not valid any longer 
+    DCGM_ST_CONNECTION_NOT_VALID = -21, //!< The connection to the host engine is not valid any longer
     DCGM_ST_GPU_NOT_SUPPORTED    = -22, //!< This GPU is not supported by DCGM
-    DCGM_ST_GROUP_INCOMPATIBLE   = -23, //!< The GPUs of the provided group are not compatible with each other for the requested operation
-    DCGM_ST_MAX_LIMIT            = -24, //!< Max limit reached for the object
-    DCGM_ST_LIBRARY_NOT_FOUND    = -25, //!< DCGM library could not be found
-    DCGM_ST_DUPLICATE_KEY        = -26, //!< Duplicate key passed to a function
-    DCGM_ST_GPU_IN_SYNC_BOOST_GROUP = -27, //!<GPU is already a part of a sync boost group
-    DCGM_ST_GPU_NOT_IN_SYNC_BOOST_GROUP = -28, //!<GPU is not a part of a sync boost group
-    DCGM_ST_REQUIRES_ROOT        = -29,  //!< This operation cannot be performed when the host engine is running as non-root
-    DCGM_ST_NVVS_ERROR           = -30, //!< DCGM GPU Diagnostic was successfully executed, but reported an error.
-    DCGM_ST_INSUFFICIENT_SIZE    = -31, //!< An input argument is not large enough
+    DCGM_ST_GROUP_INCOMPATIBLE   = -23, //!< The GPUs of the provided group are not compatible with each other for the
+                                        //!< requested operation
+    DCGM_ST_MAX_LIMIT                   = -24, //!< Max limit reached for the object
+    DCGM_ST_LIBRARY_NOT_FOUND           = -25, //!< DCGM library could not be found
+    DCGM_ST_DUPLICATE_KEY               = -26, //!< Duplicate key passed to a function
+    DCGM_ST_GPU_IN_SYNC_BOOST_GROUP     = -27, //!< GPU is already a part of a sync boost group
+    DCGM_ST_GPU_NOT_IN_SYNC_BOOST_GROUP = -28, //!< GPU is not a part of a sync boost group
+    DCGM_ST_REQUIRES_ROOT     = -29, //!< This operation cannot be performed when the host engine is running as non-root
+    DCGM_ST_NVVS_ERROR        = -30, //!< DCGM GPU Diagnostic was successfully executed, but reported an error.
+    DCGM_ST_INSUFFICIENT_SIZE = -31, //!< An input argument is not large enough
     DCGM_ST_FIELD_UNSUPPORTED_BY_API = -32, //!< The given field ID is not supported by the API being called
-    DCGM_ST_MODULE_NOT_LOADED    = -33, //!< This request is serviced by a module of DCGM that is not currently loaded
-    DCGM_ST_IN_USE               = -34, //!< The requested operation could not be completed because the affected resource is in use
-    DCGM_ST_GROUP_IS_EMPTY       = -35, //!< This group is empty and the requested operation is not valid on an empty group
-    DCGM_ST_PROFILING_NOT_SUPPORTED = -36, //!< Profiling is not supported for this group of GPUs or GPU.
-    DCGM_ST_PROFILING_LIBRARY_ERROR = -37,  //!< The third-party Profiling module returned an unrecoverable error.
-    DCGM_ST_PROFILING_MULTI_PASS = -38, //!< The requested profiling metrics cannot be collected in a single pass
-    DCGM_ST_DIAG_ALREADY_RUNNING = -39, //!< A diag instance is already running, cannot run a new diag until the current one finishes.
-    DCGM_ST_DIAG_BAD_JSON = -40,        //!< The DCGM GPU Diagnostic returned JSON that cannot be parsed
-    DCGM_ST_DIAG_BAD_LAUNCH = -41,       //!< Error while launching the DCGM GPU Diagnostic
-    DCGM_ST_DIAG_VARIANCE = -42, //!< There is too much variance while training the diagnostic
-    DCGM_ST_DIAG_THRESHOLD_EXCEEDED = -43, //!< A field value met or exceeded the error threshold.
-    DCGM_ST_INSUFFICIENT_DRIVER_VERSION = -44 //The installed driver version is insufficient for this API
+    DCGM_ST_MODULE_NOT_LOADED = -33, //!< This request is serviced by a module of DCGM that is not currently loaded
+    DCGM_ST_IN_USE            = -34, //!< The requested operation could not be completed because the affected
+                                     //!< resource is in use
+    DCGM_ST_GROUP_IS_EMPTY = -35,    //!< This group is empty and the requested operation is not valid on an empty group
+    DCGM_ST_PROFILING_NOT_SUPPORTED = -36,     //!< Profiling is not supported for this group of GPUs or GPU.
+    DCGM_ST_PROFILING_LIBRARY_ERROR = -37,     //!< The third-party Profiling module returned an unrecoverable error.
+    DCGM_ST_PROFILING_MULTI_PASS    = -38,     //!< The requested profiling metrics cannot be collected in a single pass
+    DCGM_ST_DIAG_ALREADY_RUNNING    = -39,     //!< A diag instance is already running, cannot run a new diag until
+                                               //!< the current one finishes.
+    DCGM_ST_DIAG_BAD_JSON               = -40, //!< The DCGM GPU Diagnostic returned JSON that cannot be parsed
+    DCGM_ST_DIAG_BAD_LAUNCH             = -41, //!< Error while launching the DCGM GPU Diagnostic
+    DCGM_ST_DIAG_VARIANCE               = -42, //!< There is too much variance while training the diagnostic
+    DCGM_ST_DIAG_THRESHOLD_EXCEEDED     = -43, //!< A field value met or exceeded the error threshold.
+    DCGM_ST_INSUFFICIENT_DRIVER_VERSION = -44, //!< The installed driver version is insufficient for this API
+    DCGM_ST_INSTANCE_NOT_FOUND          = -45, //!< The specified GPU instance does not exist
+    DCGM_ST_COMPUTE_INSTANCE_NOT_FOUND  = -46, //!< The specified GPU compute instance does not exist
+    DCGM_ST_CHILD_NOT_KILLED            = -47, //!< Couldn't kill a child process within the retries
+    DCGM_ST_3RD_PARTY_LIBRARY_ERROR     = -48, //!< Detected an error in a 3rd-party library
+    DCGM_ST_INSUFFICIENT_RESOURCES      = -49, //!< Not enough resources available
 } dcgmReturn_t;
 
-static const char* errorString(dcgmReturn_t result)
-{
-    switch (result)
-    {
-        case DCGM_ST_OK:
-            return "Success";
-        case DCGM_ST_BADPARAM:
-            return "Bad parameter passed to function";
-        case DCGM_ST_GENERIC_ERROR:
-            return "Generic unspecified error";
-        case DCGM_ST_MEMORY:
-            return "Out of memory error";
-        case DCGM_ST_NOT_CONFIGURED:
-            return "Setting not configured";
-        case DCGM_ST_NOT_SUPPORTED:
-            return "Feature not supported";
-        case DCGM_ST_INIT_ERROR:
-            return "DCGM initialization error";
-        case DCGM_ST_NVML_ERROR:
-            return "NVML error";
-        case DCGM_ST_PENDING:
-            return "Object is in a pending state";
-        case DCGM_ST_UNINITIALIZED:
-            return "Object is in an undefined state";
-        case DCGM_ST_TIMEOUT:
-            return "Timeout";
-        case DCGM_ST_VER_MISMATCH:
-            return "API version mismatch";
-        case DCGM_ST_UNKNOWN_FIELD:
-            return "Unknown field identifier";
-        case DCGM_ST_NO_DATA:
-            return "No data is available";
-        case DCGM_ST_STALE_DATA:
-            return "Only stale data is available";
-        case DCGM_ST_NOT_WATCHED:
-            return "Field is not being watched";
-        case DCGM_ST_NO_PERMISSION:
-            return "No permission";
-        case DCGM_ST_GPU_IS_LOST:
-            return "GPU is lost";
-        case DCGM_ST_RESET_REQUIRED:
-            return "GPU requires reset";
-        case DCGM_ST_CONNECTION_NOT_VALID:
-            return "Host engine connection invalid/disconnected";
-        case DCGM_ST_GPU_NOT_SUPPORTED:
-            return "This GPU is not supported by DCGM";
-        case DCGM_ST_GROUP_INCOMPATIBLE:
-            return "The GPUs of this group are incompatible with each other for the requested operation";
-        case DCGM_ST_MAX_LIMIT:
-            return "Max limit reached for the object";
-        case DCGM_ST_LIBRARY_NOT_FOUND:
-            return "DCGM library could not be found";
-        case DCGM_ST_DUPLICATE_KEY:
-            return "Duplicate Key passed to function";
-        case DCGM_ST_GPU_IN_SYNC_BOOST_GROUP:
-            return "GPU is a part of a Sync Boost Group";
-        case DCGM_ST_GPU_NOT_IN_SYNC_BOOST_GROUP:
-            return "GPU is not a part of Sync Boost Group";
-        case DCGM_ST_REQUIRES_ROOT:
-            return "Host engine is running as non-root";
-        case DCGM_ST_NVVS_ERROR:
-            return "DCGM GPU Diagnostic returned an error";
-        case DCGM_ST_INSUFFICIENT_SIZE:
-            return "An input argument is not large enough";
-        case DCGM_ST_FIELD_UNSUPPORTED_BY_API:
-            return "The given field ID is not supported by the API being called";
-        case DCGM_ST_MODULE_NOT_LOADED:
-            return "This request is serviced by a module of DCGM that is not currently loaded";
-        case DCGM_ST_IN_USE:
-            return "The requested operation could not be completed because the affected resource is in use";
-        case DCGM_ST_GROUP_IS_EMPTY:
-            return "The specified group is empty, and this operation is incompatible with an empty group";
-        case DCGM_ST_PROFILING_NOT_SUPPORTED:
-            return "Profiling is not supported for this group of GPUs or GPU";
-        case DCGM_ST_PROFILING_LIBRARY_ERROR: 
-            return "The third-party Profiling module returned an unrecoverable error";
-        case DCGM_ST_PROFILING_MULTI_PASS:
-            return "The requested profiling metrics cannot be collected in a single pass";
-        case DCGM_ST_DIAG_ALREADY_RUNNING:
-            return "A diag instance is already running, cannot run a new diag until the current one finishes";
-        case DCGM_ST_DIAG_BAD_JSON:
-            return "The GPU Diagnostic returned Json that cannot be parsed.";
-        case DCGM_ST_DIAG_BAD_LAUNCH:
-            return "Error while launching the GPU Diagnostic.";
-        case DCGM_ST_DIAG_VARIANCE:
-            return "The results of training DCGM GPU Diagnostic cannot be trusted because they vary too much from run to run";
-        case DCGM_ST_DIAG_THRESHOLD_EXCEEDED:
-            return "A field value met or exceeded the error threshold.";
-        case DCGM_ST_INSUFFICIENT_DRIVER_VERSION:
-            return "The installed driver version is insufficient for this API";
-        default:
-            // Wrong error codes should be handled by the caller
-            return 0;
-    }
-}
+const char *errorString(dcgmReturn_t result);
 
 /**
  * Type of GPU groups
  */
-typedef enum dcgmGroupType_enum     
+typedef enum dcgmGroupType_enum
 {
-    DCGM_GROUP_DEFAULT = 0,     //!< All the GPUs on the node are added to the group
-    DCGM_GROUP_EMPTY   = 1,     //!< Creates an empty group
-    DCGM_GROUP_DEFAULT_NVSWITCHES = 2 //!< All NvSwitches of the node are added to the group
+    DCGM_GROUP_DEFAULT                   = 0, //!< All the GPUs on the node are added to the group
+    DCGM_GROUP_EMPTY                     = 1, //!< Creates an empty group
+    DCGM_GROUP_DEFAULT_NVSWITCHES        = 2, //!< All NvSwitches of the node are added to the group
+    DCGM_GROUP_DEFAULT_INSTANCES         = 3, //!< All GPU instances of the node are added to the group
+    DCGM_GROUP_DEFAULT_COMPUTE_INSTANCES = 4, //!< All compute instances of the node are added to the group
+    DCGM_GROUP_DEFAULT_EVERYTHING        = 5, //!< All entities are added to this default group
 } dcgmGroupType_t;
 
 /**
  * Identifies for special DCGM groups
  */
-#define DCGM_GROUP_ALL_GPUS       0x7fffffff
-#define DCGM_GROUP_ALL_NVSWITCHES 0x7ffffffe
+#define DCGM_GROUP_ALL_GPUS              0x7fffffff
+#define DCGM_GROUP_ALL_NVSWITCHES        0x7ffffffe
+#define DCGM_GROUP_ALL_INSTANCES         0x7ffffffd
+#define DCGM_GROUP_ALL_COMPUTE_INSTANCES 0x7ffffffc
+#define DCGM_GROUP_ALL_ENTITIES          0x7ffffffb
 
-/** 
+/**
  * Maximum number of entities per entity group
  */
 #define DCGM_GROUP_MAX_ENTITIES 64
 
+/**
+ * Simplified chip architecture. Note that these are made to match nvmlChipArchitecture_t and thus
+ * do not start at 0.
+ */
+typedef enum dcgmChipArchitecture_enum
+{
+    DCGM_CHIP_ARCH_OLDER   = 1, //!< All GPUs older than Kepler
+    DCGM_CHIP_ARCH_KEPLER  = 2, //!< All Kepler-architecture parts
+    DCGM_CHIP_ARCH_MAXWELL = 3, //!< All Maxwell-architecture parts
+    DCGM_CHIP_ARCH_PASCAL  = 4, //!< All Pascal-architecture parts
+    DCGM_CHIP_ARCH_VOLTA   = 5, //!< All Volta-architecture parts
+    DCGM_CHIP_ARCH_TURING  = 6, //!< All Turing-architecture parts
+    DCGM_CHIP_ARCH_AMPERE  = 7, //!< All Ampere-architecture parts
+
+    DCGM_CHIP_ARCH_COUNT, //!< Keep this second to last, exclude unknown
+
+    DCGM_CHIP_ARCH_UNKNOWN = 0xffffffff //!< Anything else, presumably something newer
+} dcgmChipArchitecture_t;
+
 /**
  * Represents the type of configuration to be fetched from the GPUs
  */
 typedef enum dcgmConfigType_enum
 {
-    DCGM_CONFIG_TARGET_STATE = 0,          //!< The target configuration values to be applied
-    DCGM_CONFIG_CURRENT_STATE = 1          //!< The current configuration state
-}dcgmConfigType_t;
+    DCGM_CONFIG_TARGET_STATE  = 0, //!< The target configuration values to be applied
+    DCGM_CONFIG_CURRENT_STATE = 1, //!< The current configuration state
+} dcgmConfigType_t;
 
 /**
  * Represents the power cap for each member of the group.
  */
 typedef enum dcgmConfigPowerLimitType_enum
 {
-    DCGM_CONFIG_POWER_CAP_INDIVIDUAL    = 0, //!< Represents the power cap to be applied for each member of the group
-    DCGM_CONFIG_POWER_BUDGET_GROUP      = 1  //!< Represents the power budget for the entire group
-}dcgmConfigPowerLimitType_t;
+    DCGM_CONFIG_POWER_CAP_INDIVIDUAL = 0, //!< Represents the power cap to be applied for each member of the group
+    DCGM_CONFIG_POWER_BUDGET_GROUP   = 1, //!< Represents the power budget for the entire group
+} dcgmConfigPowerLimitType_t;
 
 /** @} */
 
@@ -461,96 +392,122 @@ typedef uintptr_t dcgmGpuGrp_t;   //!< Identifier for a group of GPUs. A group c
 typedef uintptr_t dcgmFieldGrp_t; //!< Identifier for a group of fields.
 typedef uintptr_t dcgmStatus_t;   //!< Identifier for list of status codes
 
+/**
+ * DCGM Logging Severities. These match up with plog severities defined in Severity.h
+ * Each level includes all of the levels above it. For instance, level 4 includes 3,2, and 1 as well
+ */
+typedef enum
+{
+    DcgmLoggingSeverityUnspecified = -1, /*!< Don't care/inherit from the environment */
+    DcgmLoggingSeverityNone        = 0,  /*!< No logging */
+    DcgmLoggingSeverityFatal       = 1,  /*!< Fatal Errors */
+    DcgmLoggingSeverityError       = 2,  /*!< Errors */
+    DcgmLoggingSeverityWarning     = 3,  /*!< Warnings */
+    DcgmLoggingSeverityInfo        = 4,  /*!< Informative */
+    DcgmLoggingSeverityDebug       = 5,  /*!< Debug information (will generate large logs) */
+    DcgmLoggingSeverityVerbose     = 6   /*!< Verbose debugging information */
+} DcgmLoggingSeverity_t;
+
 /**
  * Connection options for dcgmConnect_v2 (v1)
- * 
+ *
  * NOTE: This version is deprecated. use dcgmConnectV2Params_v2
  */
-typedef struct 
+typedef struct
 {
-    unsigned int version;                //!< Version number. Use dcgmConnectV2Params_version
-    unsigned int persistAfterDisconnect; /*!< Whether to persist DCGM state modified by this conection 
-                                              once the connection is terminated. Normally, all field 
-                                              watches created by a connection are removed once a 
+    unsigned int version;                /*!< Version number. Use dcgmConnectV2Params_version */
+    unsigned int persistAfterDisconnect; /*!< Whether to persist DCGM state modified by this connection
+                                              once the connection is terminated. Normally, all field
+                                              watches created by a connection are removed once a
                                               connection goes away.
-                                              1 = do not clean up after this connection. 
+                                              1 = do not clean up after this connection.
                                               0 = clean up after this connection */
 } dcgmConnectV2Params_v1;
- 
- /**
-  * Version 1 for \ref dcgmConnectV2Params_v1
-  */
- #define dcgmConnectV2Params_version1 MAKE_DCGM_VERSION(dcgmConnectV2Params_v1, 1)
+
+/**
+ * Version 1 for \ref dcgmConnectV2Params_v1
+ */
+#define dcgmConnectV2Params_version1 MAKE_DCGM_VERSION(dcgmConnectV2Params_v1, 1)
 
 /**
  * Connection options for dcgmConnect_v2 (v2)
  */
-typedef struct 
+typedef struct
 {
-    unsigned int version;                //!< Version number. Use dcgmConnectV2Params_version
-    unsigned int persistAfterDisconnect; /*!< Whether to persist DCGM state modified by this conection 
-                                              once the connection is terminated. Normally, all field 
-                                              watches created by a connection are removed once a 
-                                              connection goes away.
-                                              1 = do not clean up after this connection. 
-                                              0 = clean up after this connection */
-    unsigned int timeoutMs;              /*!< When attempting to connect to the specified host engine, 
-                                              how long should we wait in milliseconds before giving up */
-    unsigned int addressIsUnixSocket;    /*!< Whether or not the passed-in address is a unix socket filename (1)
-                                              or a TCP/IP address (0) */
+    unsigned int version;                /*!< Version number. Use dcgmConnectV2Params_version */
+    unsigned int persistAfterDisconnect; /*!< Whether to persist DCGM state modified by this connection once the
+                                              connection is terminated. Normally, all field watches created by a
+                                              connection are removed once a connection goes away. 1 = do not clean up
+                                              after this connection. 0 = clean up after this connection */
+    unsigned int timeoutMs;              /*!< When attempting to connect to the specified host engine, how long should
+                                              we wait in milliseconds before giving up */
+    unsigned int addressIsUnixSocket;    /*!< Whether or not the passed-in address is a unix socket filename (1) or a
+                                              TCP/IP address (0) */
 } dcgmConnectV2Params_v2;
 
 /**
  * Typedef for \ref dcgmConnectV2Params_v2
  */
- typedef dcgmConnectV2Params_v2 dcgmConnectV2Params_t;
- 
- /**
-  * Version 2 for \ref dcgmConnectV2Params_v2
-  */
- #define dcgmConnectV2Params_version2 MAKE_DCGM_VERSION(dcgmConnectV2Params_v2, 2)
- 
- /**
-  * Latest version for \ref dcgmConnectV2Params_t
-  */
- #define dcgmConnectV2Params_version dcgmConnectV2Params_version2
+typedef dcgmConnectV2Params_v2 dcgmConnectV2Params_t;
 
 /**
- * Structure to store information for DCGM group
+ * Version 2 for \ref dcgmConnectV2Params_v2
+ */
+#define dcgmConnectV2Params_version2 MAKE_DCGM_VERSION(dcgmConnectV2Params_v2, 2)
+
+/**
+ * Latest version for \ref dcgmConnectV2Params_t
+ */
+#define dcgmConnectV2Params_version dcgmConnectV2Params_version2
+
+/**
+ * Typedef for \ref dcgmHostengineHealth_v1
  */
 typedef struct
 {
-    unsigned int version;                         //!< Version Number (use dcgmGroupInfo_version1)
-    unsigned int count;                           //!< count of GPU IDs returned in \a gpuIdList
-    unsigned int gpuIdList[DCGM_MAX_NUM_DEVICES]; //!< List of GPU Ids part of the group
-    char groupName[DCGM_MAX_STR_LENGTH];          //!< Group Name
-}dcgmGroupInfo_v1;
+    unsigned int version;       //!< The version of this request
+    unsigned int overallHealth; //!< 0 to indicate healthy, or a code to indicate the error
+                                //   For now, this will always be populated with 0 if the
+                                //   hostengine can respond. In the future this will be
+                                //   updated to have other options like NVML unresponsive,
+                                //   no GPUs on system, etc.
+} dcgmHostengineHealth_v1;
+
+/**
+ * Typedef for \ref dcgmHostengineHealth_t
+ */
+typedef dcgmHostengineHealth_v1 dcgmHostengineHealth_t;
+
+#define dcgmHostengineHealth_version1 MAKE_DCGM_VERSION(dcgmHostengineHealth_v1, 1)
 
 /**
- * Version 1 for \ref dcgmGroupInfo_v1
+ * Latest version for \ref dcgmHostengineHealth_t
  */
-#define dcgmGroupInfo_version1 MAKE_DCGM_VERSION(dcgmGroupInfo_v1, 1)
+#define dcgmHostengineHealth_version dcgmHostengineHealth_version1
 
 /**
- * Represents a entityGroupId + entityId pair to uniquely identify a given entityId inside
- * a group of entities
+ * Represents a entityGroupId + entityId pair to uniquely identify a given entityId inside a group of entities
+ *
+ * Added in DCGM 1.5.0
  */
 typedef struct
 {
-    dcgm_field_entity_group_t entityGroupId;   //!< Entity Group ID entity belongs to
-    dcgm_field_eid_t entityId;                 //!< Entity ID of the entity
+    dcgm_field_entity_group_t entityGroupId; //!< Entity Group ID entity belongs to
+    dcgm_field_eid_t entityId;               //!< Entity ID of the entity
 } dcgmGroupEntityPair_t;
 
 /**
  * Structure to store information for DCGM group
+ *
+ * Added in DCGM 1.5.0
  */
 typedef struct
 {
-    unsigned int version;                         //!< Version Number (use dcgmGroupInfo_version2)
-    unsigned int count;                           //!< count of entityIds returned in \a entityList
-    char groupName[DCGM_MAX_STR_LENGTH];          //!< Group Name
+    unsigned int version;                                      //!< Version Number (use dcgmGroupInfo_version2)
+    unsigned int count;                                        //!< count of entityIds returned in \a entityList
+    char groupName[DCGM_MAX_STR_LENGTH];                       //!< Group Name
     dcgmGroupEntityPair_t entityList[DCGM_GROUP_MAX_ENTITIES]; //!< List of the entities that are in this group
-}dcgmGroupInfo_v2;
+} dcgmGroupInfo_v2;
 
 /**
  * Typedef for \ref dcgmGroupInfo_v2
@@ -567,6 +524,63 @@ typedef dcgmGroupInfo_v2 dcgmGroupInfo_t;
  */
 #define dcgmGroupInfo_version dcgmGroupInfo_version2
 
+/**
+ * Enum for the different kinds of MIG profiles
+ */
+typedef enum
+{
+    DcgmMigProfileNone                  = 0,  /*!< No profile (for GPUs) */
+    DcgmMigProfileGpuInstanceSlice1     = 1,  /*!< GPU instance slice 1 */
+    DcgmMigProfileGpuInstanceSlice2     = 2,  /*!< GPU instance slice 2 */
+    DcgmMigProfileGpuInstanceSlice3     = 3,  /*!< GPU instance slice 3 */
+    DcgmMigProfileGpuInstanceSlice4     = 4,  /*!< GPU instance slice 4 */
+    DcgmMigProfileGpuInstanceSlice7     = 5,  /*!< GPU instance slice 7 */
+    DcgmMigProfileComputeInstanceSlice1 = 30, /*!< compute instance slice 1 */
+    DcgmMigProfileComputeInstanceSlice2 = 31, /*!< compute instance slice 2 */
+    DcgmMigProfileComputeInstanceSlice3 = 32, /*!< compute instance slice 3 */
+    DcgmMigProfileComputeInstanceSlice4 = 33, /*!< compute instance slice 4*/
+    DcgmMigProfileComputeInstanceSlice7 = 34, /*!< compute instance slice 7 */
+} dcgmMigProfile_t;
+
+/**
+ * Represents a pair of entity pairings to uniquely identify an entity and its place in the hierarchy.
+ */
+typedef struct
+{
+    dcgmGroupEntityPair_t entity;  //!< Entity id and type for the entity in question
+    dcgmGroupEntityPair_t parent;  //!< Entity id and type for the parent of the entity in question
+    dcgmMigProfile_t sliceProfile; //!< Entity MIG profile identifier
+} dcgmMigHierarchyInfo_t;
+
+#define DCGM_MAX_INSTANCES_PER_GPU 7
+// There can never be more compute instances per GPU than instances per GPU because a compute instance is part
+// of an instance
+#define DCGM_MAX_COMPUTE_INSTANCES_PER_GPU DCGM_MAX_INSTANCES_PER_GPU
+// Currently, there cannot be more than 14 instances + compute instances. There are always 7 compute instances
+// and never more than 7 instances
+#define DCGM_MAX_TOTAL_INSTANCES_PER_GPU 14
+#define DCGM_MAX_HIERARCHY_INFO          DCGM_MAX_NUM_DEVICES *DCGM_MAX_TOTAL_INSTANCES_PER_GPU
+#define DCGM_MAX_INSTANCES               DCGM_MAX_NUM_DEVICES *DCGM_MAX_INSTANCES_PER_GPU
+// The maximum compute instances are always the same as the maximum instances because each compute instance is
+// part of an instance.
+#define DCGM_MAX_COMPUTE_INSTANCES DCGM_MAX_INSTANCES
+
+/**
+ * Structure to store the GPU hierarchy for a system
+ *
+ * Added in DCGM 2.0
+ */
+typedef struct
+{
+    unsigned int version;
+    unsigned int count;
+    dcgmMigHierarchyInfo_t entityList[DCGM_MAX_HIERARCHY_INFO];
+} dcgmMigHierarchy_v1;
+
+#define dcgmMigHierarchy_version1 MAKE_DCGM_VERSION(dcgmMigHierarchy_v1, 1)
+
+#define dcgmMigHierarchy_version dcgmMigHiearchyVersion1
+
 /**
  * Maximum number of field groups that can exist
  */
@@ -582,10 +596,10 @@ typedef dcgmGroupInfo_v2 dcgmGroupInfo_t;
  */
 typedef struct
 {
-    unsigned int version;                                //!< Version number (dcgmFieldGroupInfo_version)
-    unsigned int numFieldIds;                            //!< Number of entries in fieldIds[] that are valid
-    dcgmFieldGrp_t fieldGroupId;                         //!< ID of this field group
-    char fieldGroupName[DCGM_MAX_STR_LENGTH];            //!< Field Group Name
+    unsigned int version;                                        //!< Version number (dcgmFieldGroupInfo_version)
+    unsigned int numFieldIds;                                    //!< Number of entries in fieldIds[] that are valid
+    dcgmFieldGrp_t fieldGroupId;                                 //!< ID of this field group
+    char fieldGroupName[DCGM_MAX_STR_LENGTH];                    //!< Field Group Name
     unsigned short fieldIds[DCGM_MAX_FIELD_IDS_PER_FIELD_GROUP]; //!< Field ids that belong to this group
 } dcgmFieldGroupInfo_v1;
 
@@ -601,11 +615,10 @@ typedef dcgmFieldGroupInfo_v1 dcgmFieldGroupInfo_t;
  */
 #define dcgmFieldGroupInfo_version dcgmFieldGroupInfo_version1
 
-
 typedef struct
 {
-    unsigned int version;                                        //!< Version number (dcgmAllFieldGroupInfo_version)
-    unsigned int numFieldGroups;                                 //!< Number of entries in fieldGroups[] that are populated
+    unsigned int version;        //!< Version number (dcgmAllFieldGroupInfo_version)
+    unsigned int numFieldGroups; //!< Number of entries in fieldGroups[] that are populated
     dcgmFieldGroupInfo_t fieldGroups[DCGM_MAX_NUM_FIELD_GROUPS]; //!< Info about each field group
 } dcgmAllFieldGroup_v1;
 
@@ -621,31 +634,32 @@ typedef dcgmAllFieldGroup_v1 dcgmAllFieldGroup_t;
  */
 #define dcgmAllFieldGroup_version dcgmAllFieldGroup_version1
 
-
 /**
  * Structure to represent error attributes
  */
 typedef struct
 {
-    unsigned int gpuId;      //!<  Represents GPU ID
-    short        fieldId;    //!<  One of DCGM_FI_?
-    int          status;     //!<  One of DCGM_ST_?
-}dcgmErrorInfo_t;
+    unsigned int gpuId; //!< Represents GPU ID
+    short fieldId;      //!< One of DCGM_FI_?
+    int status;         //!< One of DCGM_ST_?
+} dcgmErrorInfo_t;
 
 /**
- * Represents a set of memory, SM, and video clocks for a device. This can be current values or a target values based on context
+ * Represents a set of memory, SM, and video clocks for a device. This can be current values or a target values
+ * based on context
  */
 typedef struct
 {
-    int version;                //!< Version Number (dcgmClockSet_version)
-    unsigned int memClock;      //!< Memory Clock  (Memory Clock value OR DCGM_INT32_BLANK to Ignore/Use compatible value with smClk)
-    unsigned int smClock;       //!< SM Clock      (SM Clock value OR DCGM_INT32_BLANK to Ignore/Use compatible value with memClk)
-}dcgmClockSet_v1;
+    int version;           //!< Version Number (dcgmClockSet_version)
+    unsigned int memClock; //!< Memory Clock (Memory Clock value OR DCGM_INT32_BLANK to Ignore/Use compatible
+                           //!< value with smClk)
+    unsigned int smClock;  //!< SM Clock (SM Clock value OR DCGM_INT32_BLANK to Ignore/Use compatible value with memClk)
+} dcgmClockSet_v1;
 
 /**
  * Typedef for \ref dcgmClockSet_v1
  */
-typedef dcgmClockSet_v1  dcgmClockSet_t;
+typedef dcgmClockSet_v1 dcgmClockSet_t;
 
 /**
  * Version 1 for \ref dcgmClockSet_v1
@@ -660,11 +674,12 @@ typedef dcgmClockSet_v1  dcgmClockSet_t;
 /**
  * Represents list of supported clock sets for a device
  */
-typedef struct {
-    unsigned int version;                       //!< Version Number (dcgmDeviceSupportedClockSets_version)
-    unsigned int count;                         //!< Number of supported clocks
-    dcgmClockSet_t clockSet[DCGM_MAX_CLOCKS];   //!< Valid clock sets for the device. Upto \ref count entries are filled
-}dcgmDeviceSupportedClockSets_v1;
+typedef struct
+{
+    unsigned int version;                     //!< Version Number (dcgmDeviceSupportedClockSets_version)
+    unsigned int count;                       //!< Number of supported clocks
+    dcgmClockSet_t clockSet[DCGM_MAX_CLOCKS]; //!< Valid clock sets for the device. Upto \ref count entries are filled
+} dcgmDeviceSupportedClockSets_v1;
 
 /**
  * Typedef for \ref dcgmDeviceSupportedClockSets_v1
@@ -684,19 +699,22 @@ typedef dcgmDeviceSupportedClockSets_v1 dcgmDeviceSupportedClockSets_t;
 /**
  * Represents accounting data for one process
  */
-typedef struct {
-    unsigned int version;                       //!< Version Number. Should match dcgmDevicePidAccountingStats_version
-    unsigned int pid;                           //!< Process id of the process these stats are for
-    unsigned int gpuUtilization;                //!< Percent of time over the process's lifetime during which one or more kernels was executing on the GPU.
-                                                //! Set to DCGM_INT32_NOT_SUPPORTED if is not supported
-    unsigned int memoryUtilization;             //!< Percent of time over the process's lifetime during which global (device) memory was being read or written.
-                                                //! Set to DCGM_INT32_NOT_SUPPORTED if is not supported
-    unsigned long long maxMemoryUsage;          //!< Maximum total memory in bytes that was ever allocated by the process.
-                                                //! Set to DCGM_INT64_NOT_SUPPORTED if is not supported
-    unsigned long long startTimestamp;          //!< CPU Timestamp in usec representing start time for the process
-    unsigned long long activeTimeUsec;          //!< Amount of time in usec during which the compute context was active. Note that
-                                                //! this does not mean the context was being used. endTimestamp
-                                                //! can be computed as startTimestamp + activeTime
+typedef struct
+{
+    unsigned int version;              //!< Version Number. Should match dcgmDevicePidAccountingStats_version
+    unsigned int pid;                  //!< Process id of the process these stats are for
+    unsigned int gpuUtilization;       //!< Percent of time over the process's lifetime during which one or more kernels
+                                       //!< was executing on the GPU.
+                                       //!< Set to DCGM_INT32_NOT_SUPPORTED if is not supported
+    unsigned int memoryUtilization;    //!< Percent of time over the process's lifetime during which global (device)
+                                       //!< memory was being read or written.
+                                       //!< Set to DCGM_INT32_NOT_SUPPORTED if is not supported
+    unsigned long long maxMemoryUsage; //!< Maximum total memory in bytes that was ever allocated by the process.
+                                       //!< Set to DCGM_INT64_NOT_SUPPORTED if is not supported
+    unsigned long long startTimestamp; //!< CPU Timestamp in usec representing start time for the process
+    unsigned long long activeTimeUsec; //!< Amount of time in usec during which the compute context was active.
+                                       //!< Note that this does not mean the context was being used. endTimestamp
+                                       //!< can be computed as startTimestamp + activeTime
 } dcgmDevicePidAccountingStats_v1;
 
 /**
@@ -704,7 +722,7 @@ typedef struct {
  */
 typedef dcgmDevicePidAccountingStats_v1 dcgmDevicePidAccountingStats_t;
 
- /**
+/**
  * Version 1 for \ref dcgmDevicePidAccountingStats_v1
  */
 #define dcgmDevicePidAccountingStats_version1 MAKE_DCGM_VERSION(dcgmDevicePidAccountingStats_v1, 1)
@@ -717,11 +735,12 @@ typedef dcgmDevicePidAccountingStats_v1 dcgmDevicePidAccountingStats_t;
 /**
  * Represents thermal information
  */
- typedef struct {
-    unsigned int version;                       //!< Version Number
-    unsigned int slowdownTemp;                  //!< Slowdown temperature
-    unsigned int shutdownTemp;                  //!< Shutdown temperature
-}dcgmDeviceThermals_v1;
+typedef struct
+{
+    unsigned int version;      //!< Version Number
+    unsigned int slowdownTemp; //!< Slowdown temperature
+    unsigned int shutdownTemp; //!< Shutdown temperature
+} dcgmDeviceThermals_v1;
 
 /**
  * Typedef for \ref dcgmDeviceThermals_v1
@@ -741,14 +760,16 @@ typedef dcgmDeviceThermals_v1 dcgmDeviceThermals_t;
 /**
  * Represents various power limits
  */
-typedef struct {
-    unsigned int version;               //!< Version Number
-    unsigned int curPowerLimit;         //!< Power management limit associated with this device (in W)
-    unsigned int defaultPowerLimit;     //!< Power management limit effective at device boot (in W)
-    unsigned int enforcedPowerLimit;    //!< Effective power limit that the driver enforces after taking into account all limiters (in W)
-    unsigned int minPowerLimit;         //!< Minimum power management limit (in W)
-    unsigned int maxPowerLimit;         //!< Maximum power management limit (in W)
-}dcgmDevicePowerLimits_v1;
+typedef struct
+{
+    unsigned int version;            //!< Version Number
+    unsigned int curPowerLimit;      //!< Power management limit associated with this device (in W)
+    unsigned int defaultPowerLimit;  //!< Power management limit effective at device boot (in W)
+    unsigned int enforcedPowerLimit; //!< Effective power limit that the driver enforces after taking into account
+                                     //!< all limiters (in W)
+    unsigned int minPowerLimit;      //!< Minimum power management limit (in W)
+    unsigned int maxPowerLimit;      //!< Maximum power management limit (in W)
+} dcgmDevicePowerLimits_v1;
 
 /**
  * Typedef for \ref dcgmDevicePowerLimits_v1
@@ -770,19 +791,19 @@ typedef dcgmDevicePowerLimits_v1 dcgmDevicePowerLimits_t;
  */
 typedef struct
 {
-    unsigned int version;                           //!< Version Number (dcgmDeviceIdentifiers_version)
-    char brandName[DCGM_MAX_STR_LENGTH];            //!< Brand Name
-    char deviceName[DCGM_MAX_STR_LENGTH];           //!< Name of the device
-    char pciBusId[DCGM_MAX_STR_LENGTH];             //!< PCI Bus ID
-    char serial[DCGM_MAX_STR_LENGTH];               //!< Serial for the device
-    char uuid[DCGM_MAX_STR_LENGTH];                 //!< UUID for the device
-    char vbios[DCGM_MAX_STR_LENGTH];                //!< VBIOS version
-    char inforomImageVersion[DCGM_MAX_STR_LENGTH];  //!< Inforom Image version
-    unsigned int pciDeviceId;                       //!< The combined 16-bit device id and 16-bit vendor id
-    unsigned int pciSubSystemId;                    //!< The 32-bit Sub System Device ID
-    char driverVersion[DCGM_MAX_STR_LENGTH];        //!< Driver Version
-    unsigned int virtualizationMode;                //!< Virtualization Mode
-}dcgmDeviceIdentifiers_v1;
+    unsigned int version;                          //!< Version Number (dcgmDeviceIdentifiers_version)
+    char brandName[DCGM_MAX_STR_LENGTH];           //!< Brand Name
+    char deviceName[DCGM_MAX_STR_LENGTH];          //!< Name of the device
+    char pciBusId[DCGM_MAX_STR_LENGTH];            //!< PCI Bus ID
+    char serial[DCGM_MAX_STR_LENGTH];              //!< Serial for the device
+    char uuid[DCGM_MAX_STR_LENGTH];                //!< UUID for the device
+    char vbios[DCGM_MAX_STR_LENGTH];               //!< VBIOS version
+    char inforomImageVersion[DCGM_MAX_STR_LENGTH]; //!< Inforom Image version
+    unsigned int pciDeviceId;                      //!< The combined 16-bit device id and 16-bit vendor id
+    unsigned int pciSubSystemId;                   //!< The 32-bit Sub System Device ID
+    char driverVersion[DCGM_MAX_STR_LENGTH];       //!< Driver Version
+    unsigned int virtualizationMode;               //!< Virtualization Mode
+} dcgmDeviceIdentifiers_v1;
 
 /**
  * Typedef for \ref dcgmDeviceIdentifiers_v1
@@ -804,12 +825,12 @@ typedef dcgmDeviceIdentifiers_v1 dcgmDeviceIdentifiers_t;
  */
 typedef struct
 {
-    unsigned int    version;    //!< Version Number (dcgmDeviceMemoryUsage_version)
-    unsigned int    bar1Total;  //!< Total BAR1 size in megabytes
-    unsigned int    fbTotal;    //!< Total framebuffer memory in megabytes
-    unsigned int    fbUsed;     //!< Used framebuffer memory in megabytes
-    unsigned int    fbFree;     //!< Free framebuffer memory in megabytes
-}dcgmDeviceMemoryUsage_v1;
+    unsigned int version;   //!< Version Number (dcgmDeviceMemoryUsage_version)
+    unsigned int bar1Total; //!< Total BAR1 size in megabytes
+    unsigned int fbTotal;   //!< Total framebuffer memory in megabytes
+    unsigned int fbUsed;    //!< Used framebuffer memory in megabytes
+    unsigned int fbFree;    //!< Free framebuffer memory in megabytes
+} dcgmDeviceMemoryUsage_v1;
 
 /**
  * Typedef for \ref dcgmDeviceMemoryUsage_v1
@@ -826,18 +847,18 @@ typedef dcgmDeviceMemoryUsage_v1 dcgmDeviceMemoryUsage_t;
  */
 #define dcgmDeviceMemoryUsage_version dcgmDeviceMemoryUsage_version1
 
- /**
+/**
  * Represents utilization values for vGPUs running on the device
  */
 typedef struct
 {
-    unsigned int version;           //!< Version Number (dcgmDeviceVgpuUtilInfo_version)
-    unsigned int vgpuId;            //!< vGPU instance ID
-    unsigned int smUtil;            //!< GPU utilization for vGPU
-    unsigned int memUtil;           //!< Memory utilization for vGPU
-    unsigned int encUtil;           //!< Encoder utilization for vGPU
-    unsigned int decUtil;           //!< Decoder utilization for vGPU
-}dcgmDeviceVgpuUtilInfo_v1;
+    unsigned int version; //!< Version Number (dcgmDeviceVgpuUtilInfo_version)
+    unsigned int vgpuId;  //!< vGPU instance ID
+    unsigned int smUtil;  //!< GPU utilization for vGPU
+    unsigned int memUtil; //!< Memory utilization for vGPU
+    unsigned int encUtil; //!< Encoder utilization for vGPU
+    unsigned int decUtil; //!< Decoder utilization for vGPU
+} dcgmDeviceVgpuUtilInfo_v1;
 
 /**
  * Typedef for \ref dcgmDeviceVgpuUtilInfo_v1
@@ -854,16 +875,16 @@ typedef dcgmDeviceVgpuUtilInfo_v1 dcgmDeviceVgpuUtilInfo_t;
  */
 #define dcgmDeviceVgpuUtilInfo_version dcgmDeviceVgpuUtilInfo_version1
 
- /**
+/**
  * Represents current encoder statistics for the given device/vGPU instance
  */
 typedef struct
 {
-    unsigned int version;           //!< Version Number (dcgmDeviceEncStats_version)
-    unsigned int sessionCount;      //!< Count of active encoder sessions
-    unsigned int averageFps;        //!< Trailing average FPS of all active sessions
-    unsigned int averageLatency;    //!< Encode latency in milliseconds
-}dcgmDeviceEncStats_v1;
+    unsigned int version;        //!< Version Number (dcgmDeviceEncStats_version)
+    unsigned int sessionCount;   //!< Count of active encoder sessions
+    unsigned int averageFps;     //!< Trailing average FPS of all active sessions
+    unsigned int averageLatency; //!< Encode latency in milliseconds
+} dcgmDeviceEncStats_v1;
 
 /**
  * Typedef for \ref dcgmDeviceEncStats_v1
@@ -885,11 +906,11 @@ typedef dcgmDeviceEncStats_v1 dcgmDeviceEncStats_t;
  */
 typedef struct
 {
-    unsigned int version;           //!< Version Number (dcgmDeviceFbcStats_version)
-    unsigned int sessionCount;      //!< Count of active FBC sessions
-    unsigned int averageFps;        //!< Moving average new frames captured per second
-    unsigned int averageLatency;    //!< Moving average new frame capture latency in microseconds
-}dcgmDeviceFbcStats_v1;
+    unsigned int version;        //!< Version Number (dcgmDeviceFbcStats_version)
+    unsigned int sessionCount;   //!< Count of active FBC sessions
+    unsigned int averageFps;     //!< Moving average new frames captured per second
+    unsigned int averageLatency; //!< Moving average new frame capture latency in microseconds
+} dcgmDeviceFbcStats_v1;
 
 /**
  * Typedef for \ref dcgmDeviceFbcStats_v1
@@ -911,32 +932,32 @@ typedef dcgmDeviceFbcStats_v1 dcgmDeviceFbcStats_t;
  */
 typedef enum dcgmFBCSessionType_enum
 {
-    DCGM_FBC_SESSION_TYPE_UNKNOWN = 0,     //!< Unknown
-    DCGM_FBC_SESSION_TYPE_TOSYS,           //!< FB capture for a system buffer
-    DCGM_FBC_SESSION_TYPE_CUDA,            //!< FB capture for a cuda buffer
-    DCGM_FBC_SESSION_TYPE_VID,             //!< FB capture for a Vid buffer
-    DCGM_FBC_SESSION_TYPE_HWENC,           //!< FB capture for a NVENC HW buffer
-}dcgmFBCSessionType_t;
+    DCGM_FBC_SESSION_TYPE_UNKNOWN = 0, //!< Unknown
+    DCGM_FBC_SESSION_TYPE_TOSYS,       //!< FB capture for a system buffer
+    DCGM_FBC_SESSION_TYPE_CUDA,        //!< FB capture for a cuda buffer
+    DCGM_FBC_SESSION_TYPE_VID,         //!< FB capture for a Vid buffer
+    DCGM_FBC_SESSION_TYPE_HWENC,       //!< FB capture for a NVENC HW buffer
+} dcgmFBCSessionType_t;
 
 /**
  * Represents information about active FBC session on the given device/vGPU instance
  */
 typedef struct
 {
-    unsigned int          version;              //!< Version Number (dcgmDeviceFbcSessionInfo_version)
-    unsigned int          sessionId;            //!< Unique session ID
-    unsigned int          pid;                  //!< Owning process ID
-    unsigned int          vgpuId;               //!< vGPU instance ID (only valid on vGPU hosts, otherwise zero)
-    unsigned int          displayOrdinal;       //!< Display identifier
-    dcgmFBCSessionType_t  sessionType;          //!< Type of frame buffer capture session
-    unsigned int          sessionFlags;         //!< Session flags
-    unsigned int          hMaxResolution;       //!< Max horizontal resolution supported by the capture session
-    unsigned int          vMaxResolution;       //!< Max vertical resolution supported by the capture session
-    unsigned int          hResolution;          //!< Horizontal resolution requested by caller in capture call
-    unsigned int          vResolution;          //!< Vertical resolution requested by caller in capture call
-    unsigned int          averageFps;           //!< Moving average new frames captured per second
-    unsigned int          averageLatency;       //!< Moving average new frame capture latency in microseconds
-}dcgmDeviceFbcSessionInfo_v1;
+    unsigned int version;             //!< Version Number (dcgmDeviceFbcSessionInfo_version)
+    unsigned int sessionId;           //!< Unique session ID
+    unsigned int pid;                 //!< Owning process ID
+    unsigned int vgpuId;              //!< vGPU instance ID (only valid on vGPU hosts, otherwise zero)
+    unsigned int displayOrdinal;      //!< Display identifier
+    dcgmFBCSessionType_t sessionType; //!< Type of frame buffer capture session
+    unsigned int sessionFlags;        //!< Session flags
+    unsigned int hMaxResolution;      //!< Max horizontal resolution supported by the capture session
+    unsigned int vMaxResolution;      //!< Max vertical resolution supported by the capture session
+    unsigned int hResolution;         //!< Horizontal resolution requested by caller in capture call
+    unsigned int vResolution;         //!< Vertical resolution requested by caller in capture call
+    unsigned int averageFps;          //!< Moving average new frames captured per second
+    unsigned int averageLatency;      //!< Moving average new frame capture latency in microseconds
+} dcgmDeviceFbcSessionInfo_v1;
 
 /**
  * Typedef for \ref dcgmDeviceFbcSessionInfo_v1
@@ -958,10 +979,10 @@ typedef dcgmDeviceFbcSessionInfo_v1 dcgmDeviceFbcSessionInfo_t;
  */
 typedef struct
 {
-    unsigned int version;                                           //!< Version Number (dcgmDeviceFbcSessions_version)
-    unsigned int sessionCount;                                      //!< Count of active FBC sessions
-    dcgmDeviceFbcSessionInfo_t sessionInfo[DCGM_MAX_FBC_SESSIONS];  //!< Info about the active FBC session
-}dcgmDeviceFbcSessions_v1;
+    unsigned int version;                                          //!< Version Number (dcgmDeviceFbcSessions_version)
+    unsigned int sessionCount;                                     //!< Count of active FBC sessions
+    dcgmDeviceFbcSessionInfo_t sessionInfo[DCGM_MAX_FBC_SESSIONS]; //!< Info about the active FBC session
+} dcgmDeviceFbcSessions_v1;
 
 /**
  * Typedef for \ref dcgmDeviceFbcSessions_v1
@@ -985,26 +1006,27 @@ typedef enum dcgmEncoderQueryType_enum
 {
     DCGM_ENCODER_QUERY_H264 = 0,
     DCGM_ENCODER_QUERY_HEVC = 1
-}dcgmEncoderType_t;
+} dcgmEncoderType_t;
 
- /**
+/**
  * Represents information about active encoder sessions on the given vGPU instance
  */
 typedef struct
 {
-    unsigned int version;               //!< Version Number (dcgmDeviceVgpuEncSessions_version)
-    union {
-        unsigned int vgpuId;            //!< vGPU instance ID
+    unsigned int version; //!< Version Number (dcgmDeviceVgpuEncSessions_version)
+    union
+    {
+        unsigned int vgpuId; //!< vGPU instance ID
         unsigned int sessionCount;
     } encoderSessionInfo;
-    unsigned int       sessionId;       //!< Unique session ID
-    unsigned int       pid;             //!< Process ID
-    dcgmEncoderType_t  codecType;       //!< Video encoder type
-    unsigned int       hResolution;     //!< Current encode horizontal resolution
-    unsigned int       vResolution;     //!< Current encode vertical resolution
-    unsigned int       averageFps;      //!< Moving average encode frames per second
-    unsigned int       averageLatency;  //!< Moving average encode latency in milliseconds
-}dcgmDeviceVgpuEncSessions_v1;
+    unsigned int sessionId;      //!< Unique session ID
+    unsigned int pid;            //!< Process ID
+    dcgmEncoderType_t codecType; //!< Video encoder type
+    unsigned int hResolution;    //!< Current encode horizontal resolution
+    unsigned int vResolution;    //!< Current encode vertical resolution
+    unsigned int averageFps;     //!< Moving average encode frames per second
+    unsigned int averageLatency; //!< Moving average encode latency in milliseconds
+} dcgmDeviceVgpuEncSessions_v1;
 
 /**
  * Typedef for \ref dcgmDeviceVgpuEncSessions_v1
@@ -1021,23 +1043,25 @@ typedef dcgmDeviceVgpuEncSessions_v1 dcgmDeviceVgpuEncSessions_t;
  */
 #define dcgmDeviceVgpuEncSessions_version dcgmDeviceVgpuEncSessions_version1
 
- /**
+/**
  * Represents utilization values for processes running in vGPU VMs using the device
  */
 typedef struct
 {
-    unsigned int version;                           //!< Version Number (dcgmDeviceVgpuProcessUtilInfo_version)
-    union {
-        unsigned int vgpuId;                        //!< vGPU instance ID
-        unsigned int vgpuProcessSamplesCount;       //!< Count of processes running in the vGPU VM,for which utilization rates are being reported in this cycle.
+    unsigned int version; //!< Version Number (dcgmDeviceVgpuProcessUtilInfo_version)
+    union
+    {
+        unsigned int vgpuId;                  //!< vGPU instance ID
+        unsigned int vgpuProcessSamplesCount; //!< Count of processes running in the vGPU VM,for which utilization
+                                              //!< rates are being reported in this cycle.
     } vgpuProcessUtilInfo;
-    unsigned int pid;                               //!< Process ID of the process running in the vGPU VM.
-    char processName[DCGM_VGPU_NAME_BUFFER_SIZE];   //!< Process Name of process running in the vGPU VM.
-    unsigned int smUtil;                            //!< GPU utilization of process running in the vGPU VM.
-    unsigned int memUtil;                           //!< Memory utilization of process running in the vGPU VM.
-    unsigned int encUtil;                           //!< Encoder utilization of process running in the vGPU VM.
-    unsigned int decUtil;                           //!< Decoder utilization of process running in the vGPU VM.
-}dcgmDeviceVgpuProcessUtilInfo_v1;
+    unsigned int pid;                             //!< Process ID of the process running in the vGPU VM.
+    char processName[DCGM_VGPU_NAME_BUFFER_SIZE]; //!< Process Name of process running in the vGPU VM.
+    unsigned int smUtil;                          //!< GPU utilization of process running in the vGPU VM.
+    unsigned int memUtil;                         //!< Memory utilization of process running in the vGPU VM.
+    unsigned int encUtil;                         //!< Encoder utilization of process running in the vGPU VM.
+    unsigned int decUtil;                         //!< Decoder utilization of process running in the vGPU VM.
+} dcgmDeviceVgpuProcessUtilInfo_v1;
 
 /**
  * Typedef for \ref dcgmDeviceVgpuProcessUtilInfo_v1
@@ -1054,55 +1078,29 @@ typedef dcgmDeviceVgpuProcessUtilInfo_v1 dcgmDeviceVgpuProcessUtilInfo_t;
  */
 #define dcgmDeviceVgpuProcessUtilInfo_version dcgmDeviceVgpuProcessUtilInfo_version1
 
-/**
- * Represents various IDs related to vGPU.
- */
-typedef struct
-{
-    unsigned int    version;                                              //!< Version Number (dcgmDeviceVgpuIds_version)
-    unsigned int    unusedSupportedVgpuTypeCount;                         //!< Unused Field
-    unsigned int    unusedSupportedVgpuTypeIds[DCGM_MAX_NUM_DEVICES];     //!< Unused Field
-    unsigned int    unusedcreatableVgpuTypeCount;                         //!< Unused Field
-    unsigned int    unusedcreatableVgpuTypeIds[DCGM_MAX_NUM_DEVICES];     //!< Unused Field
-}dcgmDeviceVgpuIds_v1;
-
-/**
- * Typedef for \ref dcgmDeviceVgpuIds_v1
- */
-typedef dcgmDeviceVgpuIds_v1 dcgmDeviceVgpuIds_t;
-
-/**
- * Version 1 for \ref dcgmDeviceVgpuIds_v1
- */
-#define dcgmDeviceVgpuIds_version1 MAKE_DCGM_VERSION(dcgmDeviceVgpuIds_v1, 1)
-
-/**
- * Latest version for \ref dcgmDeviceVgpuIds_t
- */
-#define dcgmDeviceVgpuIds_version dcgmDeviceVgpuIds_version1
-
 /**
  * Represents static info related to vGPUs supported on the device.
  */
 typedef struct
 {
-    unsigned int    version;                                            //!< Version number (dcgmDeviceVgpuTypeIdStaticInfo_version)
-    union {
+    unsigned int version; //!< Version number (dcgmDeviceVgpuTypeIdStaticInfo_version)
+    union
+    {
         unsigned int vgpuTypeId;
         unsigned int supportedVgpuTypeCount;
-    } vgpuTypeInfo;                                                     //!< vGPU type ID and Supported vGPU type count
-    char            vgpuTypeName[DCGM_VGPU_NAME_BUFFER_SIZE];           //!< vGPU type Name
-    char            vgpuTypeClass[DCGM_VGPU_NAME_BUFFER_SIZE];          //!< Class of vGPU type
-    char            vgpuTypeLicense[DCGM_GRID_LICENSE_BUFFER_SIZE];     //!< license of vGPU type
-    int             deviceId;                                           //!< device ID of vGPU type
-    int             subsystemId;                                        //!< Subsytem ID of vGPU type
-    int             numDisplayHeads;                                    //!< Count of vGPU's supported display heads
-    int             maxInstances;                                       //!< maximum number of vGPU instances creatable on a device for given vGPU type
-    int             frameRateLimit;                                     //!< Frame rate limit value of the vGPU type
-    int             maxResolutionX;                                     //!< vGPU display head's maximum supported resolution in X dimension
-    int             maxResolutionY;                                     //!< vGPU display head's maximum supported resolution in Y dimension
-    int             fbTotal;                                            //!< vGPU Total framebuffer size in megabytes
-}dcgmDeviceVgpuTypeInfo_v1;
+    } vgpuTypeInfo;                                      //!< vGPU type ID and Supported vGPU type count
+    char vgpuTypeName[DCGM_VGPU_NAME_BUFFER_SIZE];       //!< vGPU type Name
+    char vgpuTypeClass[DCGM_VGPU_NAME_BUFFER_SIZE];      //!< Class of vGPU type
+    char vgpuTypeLicense[DCGM_GRID_LICENSE_BUFFER_SIZE]; //!< license of vGPU type
+    int deviceId;                                        //!< device ID of vGPU type
+    int subsystemId;                                     //!< Subsystem ID of vGPU type
+    int numDisplayHeads;                                 //!< Count of vGPU's supported display heads
+    int maxInstances;   //!< maximum number of vGPU instances creatable on a device for given vGPU type
+    int frameRateLimit; //!< Frame rate limit value of the vGPU type
+    int maxResolutionX; //!< vGPU display head's maximum supported resolution in X dimension
+    int maxResolutionY; //!< vGPU display head's maximum supported resolution in Y dimension
+    int fbTotal;        //!< vGPU Total framebuffer size in megabytes
+} dcgmDeviceVgpuTypeInfo_v1;
 
 /**
  * Typedef for \ref dcgmDeviceVgpuTypeInfo_v1
@@ -1119,143 +1117,107 @@ typedef dcgmDeviceVgpuTypeInfo_v1 dcgmDeviceVgpuTypeInfo_t;
  */
 #define dcgmDeviceVgpuTypeInfo_version dcgmDeviceVgpuTypeInfo_version1
 
-/**
- * Represents attributes corresponding to a device
- */
 typedef struct
 {
-    unsigned int version;                                                                  //!< Version number (dcgmDeviceAttributes_version)
-    dcgmDeviceSupportedClockSets_t clockSets;                                              //!< Supported clocks for the device
-    dcgmDeviceThermals_t        thermalSettings;                                           //!< Thermal settings for the device
-    dcgmDevicePowerLimits_t     powerLimits;                                               //!< Various power limits for the device
-    dcgmDeviceIdentifiers_t     identifiers;                                               //!< Identifiers for the device
-    dcgmDeviceMemoryUsage_t     memoryUsage;                                               //!< Memory usage info for the device
-    dcgmDeviceVgpuIds_t         unusedVgpuIds;                                             //!< Unused Field
-    unsigned int                unusedActiveVgpuInstanceCount;                             //!< Unused Field
-    unsigned int                unusedVgpuInstanceIds[DCGM_MAX_NUM_DEVICES];               //!< Unused Field
-}dcgmDeviceAttributes_v1;
-
-/**
- * Typedef for \ref dcgmDeviceAttributes_v1
- */
-typedef dcgmDeviceAttributes_v1 dcgmDeviceAttributes_t;
+    unsigned int version;
+    unsigned int persistenceModeEnabled;
+    unsigned int migModeEnabled;
+} dcgmDeviceSettings_v1;
 
-/**
- * Version 1 for \ref dcgmDeviceAttributes_v1
- */
-#define dcgmDeviceAttributes_version1 MAKE_DCGM_VERSION(dcgmDeviceAttributes_v1, 1)
+typedef dcgmDeviceSettings_v1 dcgmDeviceSettings_t;
 
-/**
- * Latest version for \ref dcgmDeviceAttributes_t
- */
-#define dcgmDeviceAttributes_version dcgmDeviceAttributes_version1
+#define dcgmDevicesSettings_version1 MAKE_DCGM_VERSION(dcgmDeviceSettings_v1, 1)
 
-/**
- * Maximum number of vGPU types per physical GPU
- */
-#define DCGM_MAX_VGPU_TYPES_PER_PGPU 32
+#define dcgmDeviceSettings_version dcgmDeviceSettings_version1
 
 /**
- * Represents the vGPU attributes corresponding to a physical device
+ * Represents attributes corresponding to a device
  */
 typedef struct
 {
-    unsigned int                        version;                                                    //!< Version number (dcgmVgpuDeviceAttributes_version)
-    unsigned int                        activeVgpuInstanceCount;                                    //!< Count of active vGPU instances on the device
-    unsigned int                        activeVgpuInstanceIds[DCGM_MAX_VGPU_INSTANCES_PER_PGPU];    //!< List of vGPU instances
-    unsigned int                        creatableVgpuTypeCount;                                     //!< Creatable vGPU type count
-    unsigned int                        creatableVgpuTypeIds[DCGM_MAX_VGPU_TYPES_PER_PGPU];         //!< List of Creatable vGPU types
-    unsigned int                        supportedVgpuTypeCount;                                     //!< Supported vGPU type count
-    dcgmDeviceVgpuTypeInfo_t            supportedVgpuTypeInfo[DCGM_MAX_VGPU_TYPES_PER_PGPU];        //!< Info related to vGPUs supported on the device
-    dcgmDeviceVgpuUtilInfo_t            vgpuUtilInfo[DCGM_MAX_VGPU_TYPES_PER_PGPU];                 //!< Utilizations specific to vGPU instance
-    unsigned int                        gpuUtil;                                                    //!< GPU utilization
-    unsigned int                        memCopyUtil;                                                //!< Memory utilization
-    unsigned int                        encUtil;                                                    //!< Encoder utilization
-    unsigned int                        decUtil;                                                    //!< Decoder utilization
-}dcgmVgpuDeviceAttributes_v6;
+    unsigned int version;                     //!< Version number (dcgmDeviceAttributes_version)
+    dcgmDeviceSupportedClockSets_t clockSets; //!< Supported clocks for the device
+    dcgmDeviceThermals_t thermalSettings;     //!< Thermal settings for the device
+    dcgmDevicePowerLimits_t powerLimits;      //!< Various power limits for the device
+    dcgmDeviceIdentifiers_t identifiers;      //!< Identifiers for the device
+    dcgmDeviceMemoryUsage_t memoryUsage;      //!< Memory usage info for the device
+    char unused[208];                         //!< Unused Space. Set to 0 for now
+} dcgmDeviceAttributes_v1;
 
 /**
- * Typedef for \ref dcgmVgpuDeviceAttributes_v6
- */
-typedef dcgmVgpuDeviceAttributes_v6 dcgmVgpuDeviceAttributes_t;
-
-/**
- * Version 6 for \ref dcgmVgpuDeviceAttributes_v6
+ * Version 1 for \ref dcgmDeviceAttributes_v1
  */
-#define dcgmVgpuDeviceAttributes_version6 MAKE_DCGM_VERSION(dcgmVgpuDeviceAttributes_v6, 1)
+#define dcgmDeviceAttributes_version1 MAKE_DCGM_VERSION(dcgmDeviceAttributes_v1, 1)
 
-/**
- * Latest version for \ref dcgmVgpuDeviceAttributes_t
- */
-#define dcgmVgpuDeviceAttributes_version dcgmVgpuDeviceAttributes_version6
+typedef struct
+{
+    unsigned int version;                     //!< Version number (dcgmDeviceAttributes_version)
+    dcgmDeviceSupportedClockSets_t clockSets; //!< Supported clocks for the device
+    dcgmDeviceThermals_t thermalSettings;     //!< Thermal settings for the device
+    dcgmDevicePowerLimits_t powerLimits;      //!< Various power limits for the device
+    dcgmDeviceIdentifiers_t identifiers;      //!< Identifiers for the device
+    dcgmDeviceMemoryUsage_t memoryUsage;      //!< Memory usage info for the device
+    dcgmDeviceSettings_t settings;            //!< Basic device settings
+} dcgmDeviceAttributes_v2;
 
 /**
- * Represents the size of a buffer that holds string related to attributes specific to vGPU instance
+ * Typedef for \ref dcgmDeviceAttributes_v2
  */
-#define DCGM_DEVICE_UUID_BUFFER_SIZE 80
+typedef dcgmDeviceAttributes_v2 dcgmDeviceAttributes_t;
 
 /**
- * Represents attributes specific to vGPU instance
+ * Version 1 for \ref dcgmDeviceAttributes_v2
  */
-typedef struct
-{
-    unsigned int                version;                                                //!< Version number (dcgmVgpuInstanceAttributes_version)
-    char                        vmId[DCGM_DEVICE_UUID_BUFFER_SIZE];                     //!< VM ID of the vGPU instance
-    char                        vmName[DCGM_DEVICE_UUID_BUFFER_SIZE];                   //!< VM name of the vGPU instance
-    unsigned int                vgpuTypeId;                                             //!< Type ID of the vGPU instance
-    char                        vgpuUuid[DCGM_DEVICE_UUID_BUFFER_SIZE];                 //!< UUID of the vGPU instance
-    char                        vgpuDriverVersion[DCGM_DEVICE_UUID_BUFFER_SIZE];        //!< Driver version of the vGPU instance
-    unsigned int                fbUsage;                                                //!< Fb usage of the vGPU instance
-    unsigned int                licenseStatus;                                          //!< License status of the vGPU instance
-    unsigned int                frameRateLimit;                                         //!< Frame rate limit of the vGPU instance
-}dcgmVgpuInstanceAttributes_v1;
+#define dcgmDeviceAttributes_version2 MAKE_DCGM_VERSION(dcgmDeviceAttributes_v2, 2)
 
 /**
- * Typedef for \ref dcgmVgpuInstanceAttributes_v1
+ * Latest version for \ref dcgmDeviceAttributes_t
  */
-typedef dcgmVgpuInstanceAttributes_v1 dcgmVgpuInstanceAttributes_t;
+#define dcgmDeviceAttributes_version dcgmDeviceAttributes_version2
 
 /**
- * Version 1 for \ref dcgmVgpuInstanceAttributes_v1
+ * Maximum number of vGPU types per physical GPU
  */
-#define dcgmVgpuInstanceAttributes_version1 MAKE_DCGM_VERSION(dcgmVgpuInstanceAttributes_v1, 1)
+#define DCGM_MAX_VGPU_TYPES_PER_PGPU 32
 
 /**
- * Latest version for \ref dcgmVgpuInstanceAttributes_t
+ * Represents the size of a buffer that holds string related to attributes specific to vGPU instance
  */
-#define dcgmVgpuInstanceAttributes_version dcgmVgpuInstanceAttributes_version1
+#define DCGM_DEVICE_UUID_BUFFER_SIZE 80
 
 /**
  * Used to represent Performance state settings
  */
 typedef struct
 {
-    unsigned int          syncBoost;    //!< Sync Boost Mode (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored). Note that using this setting may result in lower clocks than targetClocks
-    dcgmClockSet_t        targetClocks; //!< Target clocks. Set smClock and memClock to DCGM_INT32_BLANK to ignore/use compatible values. For GPUs > Maxwell, setting this implies autoBoost=0
-}dcgmConfigPerfStateSettings_t;
+    unsigned int syncBoost;      //!< Sync Boost Mode (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored). Note that
+                                 //!< using this setting may result in lower clocks than targetClocks
+    dcgmClockSet_t targetClocks; //!< Target clocks. Set smClock and memClock to DCGM_INT32_BLANK to ignore/use
+                                 //!< compatible values. For GPUs > Maxwell, setting this implies autoBoost=0
+} dcgmConfigPerfStateSettings_t;
 
 /**
- * Used to represents the power capping limit for each GPU in the group or to represent the power 
+ * Used to represents the power capping limit for each GPU in the group or to represent the power
  * budget for the entire group
  */
 typedef struct
 {
-    dcgmConfigPowerLimitType_t type;  //!< Flag to represent power cap for each GPU or power budget for the group of GPUs
-    unsigned int val;                 //!< Power Limit in Watts (Set a value OR DCGM_INT32_BLANK to Ignore)
-}dcgmConfigPowerLimit_t;
+    dcgmConfigPowerLimitType_t type; //!< Flag to represent power cap for each GPU or power budget for the group of GPUs
+    unsigned int val;                //!< Power Limit in Watts (Set a value OR DCGM_INT32_BLANK to Ignore)
+} dcgmConfigPowerLimit_t;
 
 /**
  * Structure to represent default and target configuration for a device
  */
 typedef struct
 {
-    unsigned int                  version;      //!< Version number (dcgmConfig_version)
-    unsigned int                  gpuId;        //!< GPU ID
-    unsigned int                  eccMode;      //!< ECC Mode  (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored)
-    unsigned int                  computeMode;  //!< Compute Mode (One of DCGM_CONFIG_COMPUTEMODE_? OR DCGM_INT32_BLANK to Ignore)
-    dcgmConfigPerfStateSettings_t perfState;    //!< Performance State Settings (clocks / boost mode)
-    dcgmConfigPowerLimit_t        powerLimit;   //!< Power Limits
-}dcgmConfig_v1;
+    unsigned int version;     //!< Version number (dcgmConfig_version)
+    unsigned int gpuId;       //!< GPU ID
+    unsigned int eccMode;     //!< ECC Mode  (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored)
+    unsigned int computeMode; //!< Compute Mode (One of DCGM_CONFIG_COMPUTEMODE_? OR DCGM_INT32_BLANK to Ignore)
+    dcgmConfigPerfStateSettings_t perfState; //!< Performance State Settings (clocks / boost mode)
+    dcgmConfigPowerLimit_t powerLimit;       //!< Power Limits
+} dcgmConfig_v1;
 
 /**
  * Typedef for \ref dcgmConfig_v1
@@ -1272,56 +1234,28 @@ typedef dcgmConfig_v1 dcgmConfig_t;
  */
 #define dcgmConfig_version dcgmConfig_version1
 
-/**
- * Structure to represent default and target vgpu configuration for a device
- */
-typedef struct
-{
-    unsigned int                  version;      //!< Version number (dcgmConfig_version)
-    unsigned int                  gpuId;        //!< GPU ID
-    unsigned int                  eccMode;      //!< ECC Mode  (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored)
-    unsigned int                  computeMode;  //!< Compute Mode (One of DCGM_CONFIG_COMPUTEMODE_? OR DCGM_INT32_BLANK to Ignore)
-    dcgmConfigPerfStateSettings_t perfState;    //!< Performance State Settings (clocks / boost mode)
-    dcgmConfigPowerLimit_t        powerLimit;   //!< Power Limits
-}dcgmVgpuConfig_v1;
-
-/**
- * Typedef for \ref dcgmVgpuConfig_v1
- */
-typedef dcgmVgpuConfig_v1 dcgmVgpuConfig_t;
-
-/**
- * Version 1 for \ref dcgmVgpuConfig_v1
- */
-#define dcgmVgpuConfig_version1 MAKE_DCGM_VERSION(dcgmVgpuConfig_v1, 1)
-
-/**
- * Latest version for \ref dcgmVgpuConfig_t
- */
-#define dcgmVgpuConfig_version dcgmVgpuConfig_version1
-
 /**
  * Represents a callback to receive updates from asynchronous functions.
  * Currently the only implemented callback function is dcgmPolicyRegister
  * and the void * data will be a pointer to dcgmPolicyCallbackResponse_t.
  * Ex.
  * dcgmPolicyCallbackResponse_t *callbackResponse = (dcgmPolicyCallbackResponse_t *) userData;
- * 
+ *
  */
 typedef int (*fpRecvUpdates)(void *userData);
 
 /*Remove from doxygen documentation
  *
- * Define the structure that contains specific policy information 
+ * Define the structure that contains specific policy information
  */
-typedef struct 
+typedef struct
 {
     // version must always be first
-    unsigned int version;                   //!< Version number (dcgmPolicyViolation_version)
+    unsigned int version; //!< Version number (dcgmPolicyViolation_version)
 
-    unsigned int notifyOnEccDbe;            //!< true/false notification on ECC Double Bit Errors
-    unsigned int notifyOnPciEvent;          //!< true/false notification on PCI Events
-    unsigned int notifyOnMaxRetiredPages;   //!< number of retired pages to occur before notification
+    unsigned int notifyOnEccDbe;          //!< true/false notification on ECC Double Bit Errors
+    unsigned int notifyOnPciEvent;        //!< true/false notification on PCI Events
+    unsigned int notifyOnMaxRetiredPages; //!< number of retired pages to occur before notification
 } dcgmPolicyViolation_v1;
 
 /*Remove from doxygen documentation
@@ -1344,21 +1278,22 @@ typedef dcgmPolicyViolation_v1 dcgmPolicyViolation_t;
  */
 #define dcgmPolicyViolation_version dcgmPolicyViolation_version1
 
-/** 
+/**
  * Enumeration for policy conditions.
- * When used as part of dcgmPolicy_t these have corresponding parameters to 
+ * When used as part of dcgmPolicy_t these have corresponding parameters to
  * allow them to be switched on/off or set specific violation thresholds
  */
 typedef enum dcgmPolicyCondition_enum
 {
     // these are bitwise rather than sequential
-    DCGM_POLICY_COND_DBE               = 0x1,              //!< Double bit errors -- boolean in dcgmPolicyConditionParms_t
-    DCGM_POLICY_COND_PCI               = 0x2,              //!< PCI events/errors -- boolean in dcgmPolicyConditionParms_t
-    DCGM_POLICY_COND_MAX_PAGES_RETIRED = 0x4,              //!< Maximum number of retired pages -- number required in dcgmPolicyConditionParms_t
-    DCGM_POLICY_COND_THERMAL           = 0x8,              //!< Thermal violation -- number required in dcgmPolicyConditionParms_t
-    DCGM_POLICY_COND_POWER             = 0x10,             //!< Power violation -- number required in dcgmPolicyConditionParms_t
-    DCGM_POLICY_COND_NVLINK            = 0x20,             //!< NVLINK errors -- boolean in dcgmPolicyConditionParms_t
-    DCGM_POLICY_COND_XID               = 0x40,             //!< XID errors -- number required in dcgmPolicyConditionParms_t
+    DCGM_POLICY_COND_DBE               = 0x1, //!< Double bit errors -- boolean in dcgmPolicyConditionParams_t
+    DCGM_POLICY_COND_PCI               = 0x2, //!< PCI events/errors -- boolean in dcgmPolicyConditionParams_t
+    DCGM_POLICY_COND_MAX_PAGES_RETIRED = 0x4, //!< Maximum number of retired pages -- number
+                                              //!< required in dcgmPolicyConditionParams_t
+    DCGM_POLICY_COND_THERMAL = 0x8,           //!< Thermal violation -- number required in dcgmPolicyConditionParams_t
+    DCGM_POLICY_COND_POWER   = 0x10,          //!< Power violation -- number required in dcgmPolicyConditionParams_t
+    DCGM_POLICY_COND_NVLINK  = 0x20,          //!< NVLINK errors -- boolean in dcgmPolicyConditionParams_t
+    DCGM_POLICY_COND_XID     = 0x40,          //!< XID errors -- number required in dcgmPolicyConditionParams_t
 } dcgmPolicyCondition_t;
 
 #define DCGM_POLICY_COND_MAX 7
@@ -1369,22 +1304,27 @@ typedef enum dcgmPolicyCondition_enum
  * as well as a "val" which is a union of the possible value types.  For example,
  * to pass a true boolean: tag = BOOL, val.boolean = 1.
  */
-typedef struct dcgmPolicyConditionParms_st
+typedef struct dcgmPolicyConditionParams_st
 {
-    enum {BOOL, LLONG} tag;       
-    union {   
-        unsigned int boolean;                
+    enum
+    {
+        BOOL,
+        LLONG
+    } tag;
+    union
+    {
+        unsigned int boolean;
         unsigned long long llval;
     } val;
-} dcgmPolicyConditionParms_t;
+} dcgmPolicyConditionParams_t;
 
 /**
  * Enumeration for policy modes
  */
 typedef enum dcgmPolicyMode_enum
 {
-    DCGM_POLICY_MODE_AUTOMATED = 0,      //!< automatic mode
-    DCGM_POLICY_MODE_MANUAL    = 1,      //!< manual mode
+    DCGM_POLICY_MODE_AUTOMATED = 0, //!< automatic mode
+    DCGM_POLICY_MODE_MANUAL    = 1, //!< manual mode
 } dcgmPolicyMode_t;
 
 /**
@@ -1392,7 +1332,7 @@ typedef enum dcgmPolicyMode_enum
  */
 typedef enum dcgmPolicyIsolation_enum
 {
-    DCGM_POLICY_ISOLATION_NONE = 0,      //!< no isolation of GPUs on error
+    DCGM_POLICY_ISOLATION_NONE = 0, //!< no isolation of GPUs on error
 } dcgmPolicyIsolation_t;
 
 /**
@@ -1400,8 +1340,8 @@ typedef enum dcgmPolicyIsolation_enum
  */
 typedef enum dcgmPolicyAction_enum
 {
-    DCGM_POLICY_ACTION_NONE     = 0,     //!< no action
-    DCGM_POLICY_ACTION_GPURESET = 1,     //!< perform a GPU reset on violation
+    DCGM_POLICY_ACTION_NONE     = 0, //!< no action
+    DCGM_POLICY_ACTION_GPURESET = 1, //!< Deprecated - perform a GPU reset on violation
 } dcgmPolicyAction_t;
 
 /**
@@ -1409,10 +1349,10 @@ typedef enum dcgmPolicyAction_enum
  */
 typedef enum dcgmPolicyValidation_enum
 {
-    DCGM_POLICY_VALID_NONE      = 0,      //!< no validation after an action is performed
-    DCGM_POLICY_VALID_SV_SHORT  = 1,      //!< run a short System Validation on the system after failure
-    DCGM_POLICY_VALID_SV_MED    = 2,      //!< run a medium System Validation test after failure
-    DCGM_POLICY_VALID_SV_LONG   = 3,      //!< run a extensive System Validation test after failure
+    DCGM_POLICY_VALID_NONE     = 0, //!< no validation after an action is performed
+    DCGM_POLICY_VALID_SV_SHORT = 1, //!< run a short System Validation on the system after failure
+    DCGM_POLICY_VALID_SV_MED   = 2, //!< run a medium System Validation test after failure
+    DCGM_POLICY_VALID_SV_LONG  = 3, //!< run a extensive System Validation test after failure
 } dcgmPolicyValidation_t;
 
 /**
@@ -1420,33 +1360,33 @@ typedef enum dcgmPolicyValidation_enum
  */
 typedef enum dcgmPolicyFailureResp_enum
 {
-    DCGM_POLICY_FAILURE_NONE = 0,        //!< on failure of validation perform no action
+    DCGM_POLICY_FAILURE_NONE = 0, //!< on failure of validation perform no action
 } dcgmPolicyFailureResp_t;
 
-/** 
+/**
  * Structure to fill when a user queries for policy violations
  */
-typedef struct 
+typedef struct
 {
-    unsigned int gpuId;                 //!< gpu ID
-    unsigned int violationOccurred;     //!< a violation based on the bit values in \ref dcgmPolicyCondition_t
+    unsigned int gpuId;             //!< gpu ID
+    unsigned int violationOccurred; //!< a violation based on the bit values in \ref dcgmPolicyCondition_t
 } dcgmPolicyViolationNotify_t;
 
 /**
- * Define the structure that specifies a policy to be enforced for a GPU 
+ * Define the structure that specifies a policy to be enforced for a GPU
  */
-typedef struct 
+typedef struct
 {
     // version must always be first
-    unsigned int version;                   //!< version number (dcgmPolicy_version)
-
-    dcgmPolicyCondition_t condition;        //!< Condition(s) to access \ref dcgmPolicyCondition_t
-    dcgmPolicyMode_t mode;                  //!< Mode of operation \ref dcgmPolicyMode_t
-    dcgmPolicyIsolation_t isolation;        //!< Isolation level after a policy violation \ref dcgmPolicyIsolation_t
-    dcgmPolicyAction_t action;              //!< Action to perform after a policy violation \ref dcgmPolicyAction_t action
-    dcgmPolicyValidation_t validation;      //!< Validation to perform after action is taken \ref dcgmPolicyValidation_t
-    dcgmPolicyFailureResp_t response;       //!< Failure to validation response \ref dcgmPolicyFailureResp_t
-    dcgmPolicyConditionParms_t parms[DCGM_POLICY_COND_MAX]; //!< Parameters for the \a condition fields
+    unsigned int version; //!< version number (dcgmPolicy_version)
+
+    dcgmPolicyCondition_t condition;   //!< Condition(s) to access \ref dcgmPolicyCondition_t
+    dcgmPolicyMode_t mode;             //!< Mode of operation \ref dcgmPolicyMode_t
+    dcgmPolicyIsolation_t isolation;   //!< Isolation level after a policy violation \ref dcgmPolicyIsolation_t
+    dcgmPolicyAction_t action;         //!< Action to perform after a policy violation \ref dcgmPolicyAction_t action
+    dcgmPolicyValidation_t validation; //!< Validation to perform after action is taken \ref dcgmPolicyValidation_t
+    dcgmPolicyFailureResp_t response;  //!< Failure to validation response \ref dcgmPolicyFailureResp_t
+    dcgmPolicyConditionParams_t parms[DCGM_POLICY_COND_MAX]; //!< Parameters for the \a condition fields
 } dcgmPolicy_v1;
 
 /**
@@ -1470,9 +1410,16 @@ typedef dcgmPolicy_v1 dcgmPolicy_t;
  */
 typedef struct
 {
-    long long timestamp;                                //!< timestamp of the error
-    enum {L1, L2, DEVICE, REGISTER, TEXTURE} location;  //!< location of the error
-    unsigned int numerrors;                             //!< number of errors
+    long long timestamp; //!< timestamp of the error
+    enum
+    {
+        L1,
+        L2,
+        DEVICE,
+        REGISTER,
+        TEXTURE
+    } location;             //!< location of the error
+    unsigned int numerrors; //!< number of errors
 } dcgmPolicyConditionDbe_t;
 
 /**
@@ -1480,8 +1427,8 @@ typedef struct
  */
 typedef struct
 {
-    long long timestamp;                                //!< timestamp of the error
-    unsigned int counter;                               //!< value of the PCIe replay counter
+    long long timestamp;  //!< timestamp of the error
+    unsigned int counter; //!< value of the PCIe replay counter
 } dcgmPolicyConditionPci_t;
 
 /**
@@ -1489,37 +1436,37 @@ typedef struct
  */
 typedef struct
 {
-    long long timestamp;                                //!< timestamp of the error
-    unsigned int sbepages;                              //!< number of pending pages due to SBE
-    unsigned int dbepages;                              //!< number of pending pages due to DBE
+    long long timestamp;   //!< timestamp of the error
+    unsigned int sbepages; //!< number of pending pages due to SBE
+    unsigned int dbepages; //!< number of pending pages due to DBE
 } dcgmPolicyConditionMpr_t;
 
-/** 
+/**
  * Define the thermal policy violations return structure
  */
 typedef struct
 {
-    long long timestamp;                                //!< timestamp of the error
-    unsigned int thermalViolation;                      //!< Temperature reached that violated policy
+    long long timestamp;           //!< timestamp of the error
+    unsigned int thermalViolation; //!< Temperature reached that violated policy
 } dcgmPolicyConditionThermal_t;
 
-/** 
+/**
  * Define the power policy violations return structure
  */
 typedef struct
 {
-    long long timestamp;                                //!< timestamp of the error
-    unsigned int powerViolation;                        //!< Power value reached that violated policy
+    long long timestamp;         //!< timestamp of the error
+    unsigned int powerViolation; //!< Power value reached that violated policy
 } dcgmPolicyConditionPower_t;
 
-/** 
+/**
  * Define the nvlink policy violations return structure
  */
 typedef struct
 {
-    long long timestamp;                //!< timestamp of the error
-    unsigned short fieldId;             //!<Nvlink counter field ID that violated policy
-    unsigned int counter;               //!< Nvlink counter value that violated policy
+    long long timestamp;    //!< timestamp of the error
+    unsigned short fieldId; //!< Nvlink counter field ID that violated policy
+    unsigned int counter;   //!< Nvlink counter value that violated policy
 } dcgmPolicyConditionNvlink_t;
 
 /**
@@ -1527,21 +1474,22 @@ typedef struct
  */
 typedef struct
 {
-    long long timestamp;        //!< Timestamp of the error
-    unsigned int errnum;        //!< The XID error number
+    long long timestamp; //!< Timestamp of the error
+    unsigned int errnum; //!< The XID error number
 } dcgmPolicyConditionXID_t;
 
 
-/** 
+/**
  * Define the structure that is given to the callback function
  */
 typedef struct
 {
-    //version must always be first
-    unsigned int version;                     //!< version number (dcgmPolicyCallbackResponse_version)
+    // version must always be first
+    unsigned int version; //!< version number (dcgmPolicyCallbackResponse_version)
 
-    dcgmPolicyCondition_t condition;          //!< Condition that was violated 
-    union {   
+    dcgmPolicyCondition_t condition; //!< Condition that was violated
+    union
+    {
         dcgmPolicyConditionDbe_t dbe;         //!< ECC DBE return structure
         dcgmPolicyConditionPci_t pci;         //!< PCI replay error return structure
         dcgmPolicyConditionMpr_t mpr;         //!< Max retired pages limit return structure
@@ -1568,8 +1516,10 @@ typedef dcgmPolicyCallbackResponse_v1 dcgmPolicyCallbackResponse_t;
  */
 #define dcgmPolicyCallbackResponse_version dcgmPolicyCallbackResponse_version1
 
-
-#define DCGM_MAX_BLOB_LENGTH    4096 //!<  Set above size of largest blob entry. Currently this is dcgmDeviceVgpuTypeInfo_v1
+/**
+ * Set above size of largest blob entry. Currently this is dcgmDeviceVgpuTypeInfo_v1
+ */
+#define DCGM_MAX_BLOB_LENGTH 4096
 
 /**
  * This structure is used to represent value for the field to be queried.
@@ -1577,19 +1527,20 @@ typedef dcgmPolicyCallbackResponse_v1 dcgmPolicyCallbackResponse_t;
 typedef struct
 {
     // version must always be first
-    unsigned int version;               //!< version number (dcgmFieldValue_version1)
+    unsigned int version; //!< version number (dcgmFieldValue_version1)
 
-    unsigned short fieldId;             //!<  One of DCGM_FI_?
-    unsigned short fieldType;           //!< One of DCGM_FT_?
-    int     status;                     //!< Status for the querying the field. DCGM_ST_OK or one of DCGM_ST_?
-    int64_t ts;                         //!< Timestamp in usec since 1970 */
-    union {
-        int64_t i64;      //!<  Int64 value
-        double  dbl;      //!< Double value
-        char    str[DCGM_MAX_STR_LENGTH]; //!< NULL terminated string
-        char    blob[DCGM_MAX_BLOB_LENGTH]; //!< Binary blob
-    } value;            //!< Value
-}dcgmFieldValue_v1;
+    unsigned short fieldId;   //!< One of DCGM_FI_?
+    unsigned short fieldType; //!< One of DCGM_FT_?
+    int status;               //!< Status for the querying the field. DCGM_ST_OK or one of DCGM_ST_?
+    int64_t ts;               //!< Timestamp in usec since 1970
+    union
+    {
+        int64_t i64;                     //!< Int64 value
+        double dbl;                      //!< Double value
+        char str[DCGM_MAX_STR_LENGTH];   //!< NULL terminated string
+        char blob[DCGM_MAX_BLOB_LENGTH]; //!< Binary blob
+    } value;                             //!< Value
+} dcgmFieldValue_v1;
 
 /**
  * Version 1 for \ref dcgmFieldValue_v1
@@ -1602,34 +1553,36 @@ typedef struct
 typedef struct
 {
     // version must always be first
-    unsigned int version;               //!< version number (dcgmFieldValue_version2)
+    unsigned int version;                    //!< version number (dcgmFieldValue_version2)
     dcgm_field_entity_group_t entityGroupId; //!< Entity group this field value's entity belongs to
-    dcgm_field_eid_t entityId;          //!< Entity this field value belongs to
-    unsigned short fieldId;             //!<  One of DCGM_FI_?
-    unsigned short fieldType;           //!< One of DCGM_FT_?
-    int     status;                     //!< Status for the querying the field. DCGM_ST_OK or one of DCGM_ST_?
-    unsigned int unused;                //!< Unused for now to align ts to an 8-byte boundary. */
-    int64_t ts;                         //!< Timestamp in usec since 1970 */
-    union {
-        int64_t i64;      //!<  Int64 value
-        double  dbl;      //!< Double value
-        char    str[DCGM_MAX_STR_LENGTH]; //!< NULL terminated string
-        char    blob[DCGM_MAX_BLOB_LENGTH]; //!< Binary blob
-    } value;            //!< Value
-}dcgmFieldValue_v2;
+    dcgm_field_eid_t entityId;               //!< Entity this field value belongs to
+    unsigned short fieldId;                  //!< One of DCGM_FI_?
+    unsigned short fieldType;                //!< One of DCGM_FT_?
+    int status;                              //!< Status for the querying the field. DCGM_ST_OK or one of DCGM_ST_?
+    unsigned int unused;                     //!< Unused for now to align ts to an 8-byte boundary.
+    int64_t ts;                              //!< Timestamp in usec since 1970
+    union
+    {
+        int64_t i64;                     //!< Int64 value
+        double dbl;                      //!< Double value
+        char str[DCGM_MAX_STR_LENGTH];   //!< NULL terminated string
+        char blob[DCGM_MAX_BLOB_LENGTH]; //!< Binary blob
+    } value;                             //!< Value
+} dcgmFieldValue_v2;
 
 /**
  * Version 2 for \ref dcgmFieldValue_v2
  */
 #define dcgmFieldValue_version2 MAKE_DCGM_VERSION(dcgmFieldValue_v2, 2)
 
-/** 
+/**
  * Field value flags used by \ref dcgmEntitiesGetLatestValues
- **/
-#define DCGM_FV_FLAG_LIVE_DATA     0x00000001 /** Retrieve live data from the driver rather than cached data. 
-                                                  Warning: Setting this flag will result in multiple calls to the
-                                                  NVIDIA driver that will be much slower than retrieving a cached 
-                                                  value. */
+ *
+ * Retrieve live data from the driver rather than cached data.
+ * Warning: Setting this flag will result in multiple calls to the NVIDIA driver that will be much slower than
+ *          retrieving a cached value.
+ */
+#define DCGM_FV_FLAG_LIVE_DATA 0x00000001
 
 /**
  * User callback function for processing one or more field updates. This callback will
@@ -1637,18 +1590,20 @@ typedef struct
  * enumerated. It is up to the callee to detect when the field id changes
  *
  * @param gpuId                IN: GPU ID of the GPU this field value set belongs to
- * @param values               IN: Field values. These values must be copied as they will
- *                                 be destroyed as soon as this call returns.
+ * @param values               IN: Field values. These values must be copied as they will be destroyed as soon as this
+ *                                 call returns.
  * @param numValues            IN: Number of entries that are valid in values[]
- * @param userData             IN: User data pointer passed to the update function that generated
- *                                 this callback
+ * @param userData             IN: User data pointer passed to the update function that generated this callback
  *
- * Returns 0 if OK
- *        <0 if enumeration should stop. This allows to callee to abort field value enumeration.
+ * @returns
+ *          0 if OK
+ *         <0 if enumeration should stop. This allows to callee to abort field value enumeration.
  *
  */
-typedef int (*dcgmFieldValueEnumeration_f)(unsigned int gpuId, dcgmFieldValue_v1 *values,
-                                           int numValues, void *userData);
+typedef int (*dcgmFieldValueEnumeration_f)(unsigned int gpuId,
+                                           dcgmFieldValue_v1 *values,
+                                           int numValues,
+                                           void *userData);
 
 /**
  * User callback function for processing one or more field updates. This callback will
@@ -1657,36 +1612,39 @@ typedef int (*dcgmFieldValueEnumeration_f)(unsigned int gpuId, dcgmFieldValue_v1
  *
  * @param entityGroupId        IN: entityGroup of the entity this field value set belongs to
  * @param entityId             IN: Entity this field value set belongs to
- * @param values               IN: Field values. These values must be copied as they will
- *                                 be destroyed as soon as this call returns.
+ * @param values               IN: Field values. These values must be copied as they will be destroyed as soon as this
+ *                                 call returns.
  * @param numValues            IN: Number of entries that are valid in values[]
- * @param userData             IN: User data pointer passed to the update function that generated
- *                                 this callback
+ * @param userData             IN: User data pointer passed to the update function that generated this callback
  *
- * Returns 0 if OK
- *        <0 if enumeration should stop. This allows to callee to abort field value enumeration.
+ * @returns
+ *          0 if OK
+ *         <0 if enumeration should stop. This allows to callee to abort field value enumeration.
  *
  */
-typedef int (*dcgmFieldValueEntityEnumeration_f)(dcgm_field_entity_group_t entityGroupId, 
+typedef int (*dcgmFieldValueEntityEnumeration_f)(dcgm_field_entity_group_t entityGroupId,
                                                  dcgm_field_eid_t entityId,
                                                  dcgmFieldValue_v1 *values,
-                                                 int numValues, void *userData);
+                                                 int numValues,
+                                                 void *userData);
 
 
 /**
  * Summary of time series data in int64 format.
- * Each value will either be set or be a BLANK value. Check for
- * blank with the DCGM_INT64_IS_BLANK() macro. See dcgmvalue.h for the actual
- * values of BLANK values */
+ *
+ * Each value will either be set or be a BLANK value.
+ * Check for blank with the DCGM_INT64_IS_BLANK() macro.
+ * \sa See dcgmvalue.h for the actual values of BLANK values
+ */
 typedef struct
 {
-    long long minValue; //!< Minimum value of the samples looked at 
-    long long maxValue; //!< Maximum value of the samples looked at 
+    long long minValue; //!< Minimum value of the samples looked at
+    long long maxValue; //!< Maximum value of the samples looked at
     long long average;  //!< Simple average of the samples looked at. Blank values are ignored for this calculation
 } dcgmStatSummaryInt64_t;
 
 /**
- * Same as dcgmStatSummaryInt64_t, but with 32-bit integer values 
+ * Same as dcgmStatSummaryInt64_t, but with 32-bit integer values
  */
 typedef struct
 {
@@ -1697,9 +1655,10 @@ typedef struct
 
 /**
  * Summary of time series data in double-precision format.
- * Each value will either be set or be a BLANK value. Check for
- * blank with the DCGM_FP64_IS_BLANK() macro. See dcgmvalue.h for the actual
- * values of BLANK values */
+ * Each value will either be set or be a BLANK value.
+ * Check for blank with the DCGM_FP64_IS_BLANK() macro.
+ * \sa See dcgmvalue.h for the actual values of BLANK values
+ */
 typedef struct
 {
     double minValue; //!< Minimum value of the samples looked at
@@ -1712,133 +1671,109 @@ typedef struct
  */
 typedef enum dcgmHealthSystems_enum
 {
-    DCGM_HEALTH_WATCH_PCIE      = 0x1,                     //!< PCIe system watches (must have 1m of data before query)
-    DCGM_HEALTH_WATCH_NVLINK    = 0x2,                     //!< NVLINK system watches
-    DCGM_HEALTH_WATCH_PMU       = 0x4,                     //!< Power management unit watches
-    DCGM_HEALTH_WATCH_MCU       = 0x8,                     //!< Microcontroller unit watches
-    DCGM_HEALTH_WATCH_MEM       = 0x10,                    //!< Memory watches
-    DCGM_HEALTH_WATCH_SM        = 0x20,                    //!< Streaming multiprocessor watches
-    DCGM_HEALTH_WATCH_INFOROM   = 0x40,                    //!< Inforom watches
-    DCGM_HEALTH_WATCH_THERMAL   = 0x80,                    //!< Temperature watches (must have 1m of data before query)
-    DCGM_HEALTH_WATCH_POWER     = 0x100,                   //!< Power watches (must have 1m of data before query)
-    DCGM_HEALTH_WATCH_DRIVER    = 0x200,                   //!< Driver-related watches
-    DCGM_HEALTH_WATCH_NVSWITCH_NONFATAL = 0x400,           //!< Non-fatal errors in NvSwitch
-    DCGM_HEALTH_WATCH_NVSWITCH_FATAL = 0x800,              //!< Fatal errors in NvSwitch
+    DCGM_HEALTH_WATCH_PCIE              = 0x1,   //!< PCIe system watches (must have 1m of data before query)
+    DCGM_HEALTH_WATCH_NVLINK            = 0x2,   //!< NVLINK system watches
+    DCGM_HEALTH_WATCH_PMU               = 0x4,   //!< Power management unit watches
+    DCGM_HEALTH_WATCH_MCU               = 0x8,   //!< Micro-controller unit watches
+    DCGM_HEALTH_WATCH_MEM               = 0x10,  //!< Memory watches
+    DCGM_HEALTH_WATCH_SM                = 0x20,  //!< Streaming multiprocessor watches
+    DCGM_HEALTH_WATCH_INFOROM           = 0x40,  //!< Inforom watches
+    DCGM_HEALTH_WATCH_THERMAL           = 0x80,  //!< Temperature watches (must have 1m of data before query)
+    DCGM_HEALTH_WATCH_POWER             = 0x100, //!< Power watches (must have 1m of data before query)
+    DCGM_HEALTH_WATCH_DRIVER            = 0x200, //!< Driver-related watches
+    DCGM_HEALTH_WATCH_NVSWITCH_NONFATAL = 0x400, //!< Non-fatal errors in NvSwitch
+    DCGM_HEALTH_WATCH_NVSWITCH_FATAL    = 0x800, //!< Fatal errors in NvSwitch
 
     // ...
-    DCGM_HEALTH_WATCH_ALL       = 0xFFFFFFFF               //!< All watches enabled
+    DCGM_HEALTH_WATCH_ALL = 0xFFFFFFFF //!< All watches enabled
 } dcgmHealthSystems_t;
 
-#define DCGM_HEALTH_WATCH_COUNT_V1 10                      //!< For iterating through the dcgmHealthSystems_v1 enum
-#define DCGM_HEALTH_WATCH_COUNT_V2 12                      //!< For iterating through the dcgmHealthSystems_v2 enum
+#define DCGM_HEALTH_WATCH_COUNT_V1 10 /*!< For iterating through the dcgmHealthSystems_v1 enum */
+#define DCGM_HEALTH_WATCH_COUNT_V2 12 /*!< For iterating through the dcgmHealthSystems_v2 enum */
 
 /**
  * Health Watch test results
  */
 typedef enum dcgmHealthWatchResult_enum
 {
-    DCGM_HEALTH_RESULT_PASS = 0,                       //!< All results within this system are reporting normal
-    DCGM_HEALTH_RESULT_WARN = 10,                       //!< A warning has been issued, refer to the response for more information
-    DCGM_HEALTH_RESULT_FAIL = 20,                       //!< A failure has been issued, refer to the response for more information
+    DCGM_HEALTH_RESULT_PASS = 0,  //!< All results within this system are reporting normal
+    DCGM_HEALTH_RESULT_WARN = 10, //!< A warning has been issued, refer to the response for more information
+    DCGM_HEALTH_RESULT_FAIL = 20, //!< A failure has been issued, refer to the response for more information
 } dcgmHealthWatchResults_t;
 
-/**
- * Health Response structure version 1. GPU Only
- */
-typedef struct 
+typedef struct
 {
-    unsigned int version;                                //!< version number (dcgmHealthResponse_version)
-    dcgmHealthWatchResults_t overallHealth;              //!< The overall health of the system.  \ref dcgmHealthWatchResults_t
-    unsigned int gpuCount;                               //!< The number of GPUs with warnings/errors
-    struct {
-        unsigned int gpuId;                              //!< GPU ID for which this data is valid
-        dcgmHealthWatchResults_t overallHealth;          //!< overall health of this GPU
-        unsigned int incidentCount;                      //!< The number of systems that encountered a warning/error
-        struct {
-            dcgmHealthSystems_t system;                  //!< system to which this information belongs
-            dcgmHealthWatchResults_t health;             //!< health of the specified system on this GPU
-            char errorString[1024];                      //!< information about the error(s) or warning(s) flagged
-        } systems[DCGM_HEALTH_WATCH_COUNT_V1];         
-    } gpu[DCGM_MAX_NUM_DEVICES];
-} dcgmHealthResponse_v1;
+    char msg[1024];
+    unsigned int code;
+} dcgmDiagErrorDetail_t;
 
-/**
- * Version 1 for \ref dcgmHealthResponse_v1
- */
-#define dcgmHealthResponse_version1 MAKE_DCGM_VERSION(dcgmHealthResponse_v1, 1)
+#define DCGM_HEALTH_WATCH_MAX_INCIDENTS DCGM_GROUP_MAX_ENTITIES
 
-/**
- * Health Response structure version 2 - NvSwitch-compatible
- */
-typedef struct 
+typedef struct
 {
-    unsigned int version;                                //!< version number (dcgmHealthResponse_version)
-    dcgmHealthWatchResults_t overallHealth;              //!< The overall health of the system.  \ref dcgmHealthWatchResults_t
-    unsigned int entityCount;                            //!< The number of entities with warnings/errors
-    struct {
-        dcgm_field_entity_group_t entityGroupId;         //!< entity group entityId belongs to
-        dcgm_field_eid_t entityId;                       //!< entity for which this data is valid
-        dcgmHealthWatchResults_t overallHealth;          //!< overall health of this entity
-        unsigned int incidentCount;                      //!< The number of systems that encountered a warning/error
-        struct {
-            dcgmHealthSystems_t system;                  //!< system to which this information belongs
-            dcgmHealthWatchResults_t health;             //!< health of the specified system on this entity
-            char errorString[1024];                      //!< information about the error(s) or warning(s) flagged
-        } systems[DCGM_HEALTH_WATCH_COUNT_V2];
-    } entities[DCGM_GROUP_MAX_ENTITIES];
-} dcgmHealthResponse_v2;
+    dcgmHealthSystems_t system;       //!< system to which this information belongs
+    dcgmHealthWatchResults_t health;  //!< health diagnosis of this incident
+    dcgmDiagErrorDetail_t error;      //!< Information about the error(s) and their error codes
+    dcgmGroupEntityPair_t entityInfo; //!< identify which entity has this error
+} dcgmIncidentInfo_t;
 
 /**
- * Version 2 for \ref dcgmHealthResponse_v2
+ * Health response structure version 4 - Simply list the incidents instead of reporting by entity
+ *
+ * Since DCGM 2.0
  */
-#define dcgmHealthResponse_version2 MAKE_DCGM_VERSION(dcgmHealthResponse_v2, 2)
-
 typedef struct
 {
-    char         msg[1024];
-    unsigned int code;
-} dcgmDiagErrorDetail_t;
+    unsigned int version;                   //!< The version number of this struct
+    dcgmHealthWatchResults_t overallHealth; //!< The overall health of this entire host
+    unsigned int incidentCount;             //!< The number of health incidents reported in this struct
+    dcgmIncidentInfo_t incidents[DCGM_HEALTH_WATCH_MAX_INCIDENTS]; //!< Report of the errors detected
+} dcgmHealthResponse_v4;
 
 /**
- * Health Response structure version 3 - NvSwitch-compatible and uses error codes for easier processing
+ * Version 4 for \ref dcgmHealthResponse_v4
  */
-typedef struct 
-{
-    unsigned int version;                                //!< version number (dcgmHealthResponse_version)
-    dcgmHealthWatchResults_t overallHealth;              //!< The overall health of the system.  \ref dcgmHealthWatchResults_t
-    unsigned int entityCount;                            //!< The number of entities with warnings/errors
-    struct {
-        dcgm_field_entity_group_t entityGroupId;         //!< entity group entityId belongs to
-        dcgm_field_eid_t entityId;                       //!< entity for which this data is valid
-        dcgmHealthWatchResults_t overallHealth;          //!< overall health of this entity
-        unsigned int incidentCount;                      //!< The number of systems that encountered a warning/error
-        struct {
-            dcgmHealthSystems_t system;                  //!< system to which this information belongs
-            dcgmHealthWatchResults_t health;             //!< health of the specified system on this entity
-            dcgmDiagErrorDetail_t errors[4];             //!< Information about the error(s) and their error codes
-            unsigned int          errorCount;            //!< count of errors so far for this system
-        } systems[DCGM_HEALTH_WATCH_COUNT_V2];
-    } entities[DCGM_GROUP_MAX_ENTITIES];
-} dcgmHealthResponse_v3;
+#define dcgmHealthResponse_version4 MAKE_DCGM_VERSION(dcgmHealthResponse_v4, 4)
+
+/**
+ * Latest version for \ref dcgmHealthResponse_t
+ */
+#define dcgmHealthResponse_version dcgmHealthResponse_version4
 
 /**
- * Version 3 for \ref dcgmHealthResponse_v3
+ * Typedef for \ref dcgmHealthResponse_v4
  */
-#define dcgmHealthResponse_version3 MAKE_DCGM_VERSION(dcgmHealthResponse_v3, 3)
+typedef dcgmHealthResponse_v4 dcgmHealthResponse_t;
 
 /**
- * Typedef for \ref dcgmHealthResponse_v3
+ * Structure used to set health watches via the dcgmHealthSet_v2 API
  */
-typedef dcgmHealthResponse_v3 dcgmHealthResponse_t;
+typedef struct
+{
+    unsigned int version;        /*!< Version of this struct. Should be dcgmHealthSet_version2 */
+    dcgmGpuGrp_t groupId;        /*!< Group ID representing collection of one or more entities. Look
+                                      at \ref dcgmGroupCreate for details on creating the group.
+                                      Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
+                                      to perform operation on all the GPUs or \a DCGM_GROUP_ALL_NVSWITCHES
+                                      to perform operation on all the NvSwitches. */
+    dcgmHealthSystems_t systems; /*!< An enum representing systems that should be enabled for health
+                                      checks logically OR'd together. Refer to \ref dcgmHealthSystems_t
+                                      for details. */
+    long long updateInterval;    /*!< How often to query the underlying health information from the
+                                      NVIDIA driver in usec. This should be the same as how often you call
+                                      dcgmHealthCheck */
+    double maxKeepAge;           /*!< How long to keep data cached for this field in seconds. This should
+                                      be at least your maximum time between calling dcgmHealthCheck */
+} dcgmHealthSetParams_v2;
 
 /**
- * Latest version for \ref dcgmHealthResponse_t
+ * Version 2 for \ref dcgmHealthSet_v2
  */
-#define dcgmHealthResponse_version dcgmHealthResponse_version3
+#define dcgmHealthSetParams_version2 MAKE_DCGM_VERSION(dcgmHealthSetParams_v2, 2)
 
 
 #define DCGM_MAX_PID_INFO_NUM 16
-/** 
+/**
  * per process utilization rates
  */
 typedef struct
@@ -1846,7 +1781,7 @@ typedef struct
     unsigned int pid;
     double smUtil;
     double memUtil;
-}dcgmProcessUtilInfo_t;
+} dcgmProcessUtilInfo_t;
 
 /**
  *Internal structure used to get the PID and the corresponding utilization rate
@@ -1855,54 +1790,56 @@ typedef struct
 {
     double util;
     unsigned int pid;
-}dcgmProcessUtilSample_t;
-
+} dcgmProcessUtilSample_t;
 
 /**
  * Info corresponding to single PID
  */
 typedef struct
 {
-    unsigned int gpuId;                     //!< ID of the GPU this pertains to. GPU_ID_INVALID = summary information for multiple GPUs
+    unsigned int gpuId; //!< ID of the GPU this pertains to. GPU_ID_INVALID = summary information for multiple GPUs
 
     /* All of the following are during the process's lifetime */
 
-    long long energyConsumed;               //!< Energy consumed by the gpu in milliwatt-seconds
-    dcgmStatSummaryInt64_t pcieRxBandwidth; //!< PCI-E bytes read from the GPU 
-    dcgmStatSummaryInt64_t pcieTxBandwidth; //!< PCI-E bytes written to the GPU 
-    long long pcieReplays;                  //!< Count of PCI-E replays that occurred 
-    long long startTime;                      //!< Process start time in microseconds since 1970
-    long long endTime;                        //!< Process end time in microseconds since 1970 or reported as 0 if the process is not completed
+    long long energyConsumed;               //!< Energy consumed by the gpu in milli-watt/seconds
+    dcgmStatSummaryInt64_t pcieRxBandwidth; //!< PCI-E bytes read from the GPU
+    dcgmStatSummaryInt64_t pcieTxBandwidth; //!< PCI-E bytes written to the GPU
+    long long pcieReplays;                  //!< Count of PCI-E replays that occurred
+    long long startTime;                    //!< Process start time in microseconds since 1970
+    long long endTime; //!< Process end time in microseconds since 1970 or reported as 0 if the process is not completed
     dcgmProcessUtilInfo_t processUtilization; //!< Process SM and Memory Utilization (in percent)
-    dcgmStatSummaryInt32_t smUtilization;     //!< GPU SM Utilization in percent 
+    dcgmStatSummaryInt32_t smUtilization;     //!< GPU SM Utilization in percent
     dcgmStatSummaryInt32_t memoryUtilization; //!< GPU Memory Utilization in percent
-    unsigned int eccSingleBit;                //!< Count of ECC single bit errors that occurred 
-    unsigned int eccDoubleBit;                //!< Count of ECC double bit errors that occurred 
-    dcgmStatSummaryInt32_t memoryClock;       //!< Memory clock in MHz 
+    unsigned int eccSingleBit;                //!< Deprecated - Count of ECC single bit errors that occurred
+    unsigned int eccDoubleBit;                //!< Count of ECC double bit errors that occurred
+    dcgmStatSummaryInt32_t memoryClock;       //!< Memory clock in MHz
     dcgmStatSummaryInt32_t smClock;           //!< SM clock in MHz
 
-    int numXidCriticalErrors;                 //!< Number of valid entries in xidCriticalErrorsTs
-    long long xidCriticalErrorsTs[10];        //!< Timestamps of the critical XID errors that occurred
+    int numXidCriticalErrors;          //!< Number of valid entries in xidCriticalErrorsTs
+    long long xidCriticalErrorsTs[10]; //!< Timestamps of the critical XID errors that occurred
 
-    int numOtherComputePids;                  //!< Count of otherComputePids entries that are valid 
-    unsigned int otherComputePids[DCGM_MAX_PID_INFO_NUM];        //!< Other compute processes that ran. 0=no process 
+    int numOtherComputePids;                              //!< Count of otherComputePids entries that are valid
+    unsigned int otherComputePids[DCGM_MAX_PID_INFO_NUM]; //!< Other compute processes that ran. 0=no process
 
-    int numOtherGraphicsPids;                 //!< Count of otherGraphicsPids entries that are valid 
-    unsigned int otherGraphicsPids[DCGM_MAX_PID_INFO_NUM];       //!< Other graphics processes that ran. 0=no process
+    int numOtherGraphicsPids;                              //!< Count of otherGraphicsPids entries that are valid
+    unsigned int otherGraphicsPids[DCGM_MAX_PID_INFO_NUM]; //!< Other graphics processes that ran. 0=no process
 
-    long long maxGpuMemoryUsed;               //!< Maximum amount of GPU memory that was used in bytes
+    long long maxGpuMemoryUsed; //!< Maximum amount of GPU memory that was used in bytes
 
-    long long powerViolationTime;             //!< Number of microseconds we were at reduced clocks due to power violation
-    long long thermalViolationTime;           //!< Number of microseconds we were at reduced clocks due to thermal violation
-    long long reliabilityViolationTime;       //!< Amount of microseconds we were at reduced clocks due to the reliability limit 
-    long long boardLimitViolationTime;        //!< Amount of microseconds we were at reduced clocks due to being at the board's max voltage
-    long long lowUtilizationTime;             //!< Amount of microseconds we were at reduced clocks due to low utilization
-    long long syncBoostTime;                  //!< Amount of microseconds we were at reduced clocks due to sync boost
-    dcgmHealthWatchResults_t overallHealth;              //!< The overall health of the system .  \ref dcgmHealthWatchResults_t
+    long long powerViolationTime;       //!< Number of microseconds we were at reduced clocks due to power violation
+    long long thermalViolationTime;     //!< Number of microseconds we were at reduced clocks due to thermal violation
+    long long reliabilityViolationTime; //!< Amount of microseconds we were at reduced clocks
+                                        //!< due to the reliability limit
+    long long boardLimitViolationTime;  //!< Amount of microseconds we were at reduced clocks due to being at the
+                                        //!< board's max voltage
+    long long lowUtilizationTime;       //!< Amount of microseconds we were at reduced clocks due to low utilization
+    long long syncBoostTime;            //!< Amount of microseconds we were at reduced clocks due to sync boost
+    dcgmHealthWatchResults_t overallHealth; //!< The overall health of the system. \ref dcgmHealthWatchResults_t
     unsigned int incidentCount;
-    struct {
-        dcgmHealthSystems_t system;                  //!< system to which this information belongs
-        dcgmHealthWatchResults_t health;             //!< health of the specified system on this GPU
+    struct
+    {
+        dcgmHealthSystems_t system;      //!< system to which this information belongs
+        dcgmHealthWatchResults_t health; //!< health of the specified system on this GPU
     } systems[DCGM_HEALTH_WATCH_COUNT_V1];
 } dcgmPidSingleInfo_t;
 
@@ -1911,74 +1848,79 @@ typedef struct
  */
 typedef struct
 {
-    unsigned int version;         //!< Version of this message  (dcgmPidInfo_version)
-    unsigned int pid;             //!< PID of the process
+    unsigned int version; //!< Version of this message  (dcgmPidInfo_version)
+    unsigned int pid;     //!< PID of the process
     unsigned int unused;
-    int numGpus;                  //!< Number of GPUs that are valid in GPUs
-    dcgmPidSingleInfo_t summary;  //!< Summary information for all GPUs listed in gpus[] 
-    dcgmPidSingleInfo_t gpus[16]; //!< Per-GPU information for this PID
-} dcgmPidInfo_v1;
+    int numGpus;                                    //!< Number of GPUs that are valid in GPUs
+    dcgmPidSingleInfo_t summary;                    //!< Summary information for all GPUs listed in gpus[]
+    dcgmPidSingleInfo_t gpus[DCGM_MAX_NUM_DEVICES]; //!< Per-GPU information for this PID
+} dcgmPidInfo_v2;
 
 /**
- * Typedef for \ref dcgmPidInfo_v1
+ * Typedef for \ref dcgmPidInfo_v2
  */
-typedef dcgmPidInfo_v1 dcgmPidInfo_t;
+typedef dcgmPidInfo_v2 dcgmPidInfo_t;
 
- /**
- * Version 1 for \ref dcgmPidInfo_v1
+/**
+ * Version 2 for \ref dcgmPidInfo_v2
  */
-#define dcgmPidInfo_version1 MAKE_DCGM_VERSION(dcgmPidInfo_v1, 1)
+#define dcgmPidInfo_version2 MAKE_DCGM_VERSION(dcgmPidInfo_v2, 2)
 
 /**
  * Latest version for \ref dcgmPidInfo_t
  */
-#define dcgmPidInfo_version dcgmPidInfo_version1
+#define dcgmPidInfo_version dcgmPidInfo_version2
 
 /**
  * Info corresponding to the job on a GPU
  */
 typedef struct
 {
-    unsigned int gpuId;                     //!< ID of the GPU this pertains to. GPU_ID_INVALID = summary information for multiple GPUs
+    unsigned int gpuId; //!< ID of the GPU this pertains to. GPU_ID_INVALID = summary information for multiple GPUs
 
     /* All of the following are during the job's lifetime */
 
-    long long energyConsumed;                 //!< Energy consumed in milliwatt-seconds
+    long long energyConsumed;                 //!< Energy consumed in milli-watt/seconds
     dcgmStatSummaryFp64_t powerUsage;         //!< Power usage Min/Max/Avg in watts
     dcgmStatSummaryInt64_t pcieRxBandwidth;   //!< PCI-E bytes read from the GPU
-    dcgmStatSummaryInt64_t pcieTxBandwidth;   //!< PCI-E bytes written to the GPU 
-    long long pcieReplays;                    //!< Count of PCI-E replays that occurred 
+    dcgmStatSummaryInt64_t pcieTxBandwidth;   //!< PCI-E bytes written to the GPU
+    long long pcieReplays;                    //!< Count of PCI-E replays that occurred
     long long startTime;                      //!< User provided job start time in microseconds since 1970
-    long long endTime;                        //!< User provided job end time in microseconds since 1970 
-    dcgmStatSummaryInt32_t smUtilization;       //!< GPU SM Utilization in percent 
-    dcgmStatSummaryInt32_t memoryUtilization;   //!< GPU Memory Utilization in percent
-    unsigned int eccSingleBit;                //!< Count of ECC single bit errors that occurred 
-    unsigned int eccDoubleBit;                //!< Count of ECC double bit errors that occurred 
-    dcgmStatSummaryInt32_t memoryClock;       //!< Memory clock in MHz 
+    long long endTime;                        //!< User provided job end time in microseconds since 1970
+    dcgmStatSummaryInt32_t smUtilization;     //!< GPU SM Utilization in percent
+    dcgmStatSummaryInt32_t memoryUtilization; //!< GPU Memory Utilization in percent
+    unsigned int eccSingleBit;                //!< Deprecated - Count of ECC single bit errors that occurred
+    unsigned int eccDoubleBit;                //!< Count of ECC double bit errors that occurred
+    dcgmStatSummaryInt32_t memoryClock;       //!< Memory clock in MHz
     dcgmStatSummaryInt32_t smClock;           //!< SM clock in MHz
 
-    int numXidCriticalErrors;                 //!< Number of valid entries in xidCriticalErrorsTs
-    long long xidCriticalErrorsTs[10];        //!< Timestamps of the critical XID errors that occurred
+    int numXidCriticalErrors;          //!< Number of valid entries in xidCriticalErrorsTs
+    long long xidCriticalErrorsTs[10]; //!< Timestamps of the critical XID errors that occurred
+
+    int numComputePids;                                          //!< Count of computePids entries that are valid
+    dcgmProcessUtilInfo_t computePidInfo[DCGM_MAX_PID_INFO_NUM]; //!< List of compute processes that ran during the job
+                                                                 //!< 0=no process
 
-    int numComputePids;                       //!< Count of computePids entries that are valid 
-    dcgmProcessUtilInfo_t computePidInfo[DCGM_MAX_PID_INFO_NUM];             //!< List of compute processes that ran during the job. 0=no process 
+    int numGraphicsPids;                                          //!< Count of graphicsPids entries that are valid
+    dcgmProcessUtilInfo_t graphicsPidInfo[DCGM_MAX_PID_INFO_NUM]; //!< List of compute processes that ran during the job
+                                                                  //!< 0=no process
 
-    int numGraphicsPids;                      //!< Count of graphicsPids entries that are valid 
-    dcgmProcessUtilInfo_t graphicsPidInfo[DCGM_MAX_PID_INFO_NUM];            //!< List of compute processes that ran during the job. 0=no process
-    
-    long long maxGpuMemoryUsed;               //!< Maximum amount of GPU memory that was used in bytes
+    long long maxGpuMemoryUsed; //!< Maximum amount of GPU memory that was used in bytes
 
-    long long powerViolationTime;             //!< Number of microseconds we were at reduced clocks due to power violation
-    long long thermalViolationTime;           //!< Number of microseconds we were at reduced clocks due to thermal violation
-    long long reliabilityViolationTime;       //!< Amount of microseconds we were at reduced clocks due to the reliability limit 
-    long long boardLimitViolationTime;        //!< Amount of microseconds we were at reduced clocks due to being at the board's max voltage
-    long long lowUtilizationTime;             //!< Amount of microseconds we were at reduced clocks due to low utilization
-    long long syncBoostTime;                  //!< Amount of microseconds we were at reduced clocks due to sync boost
-    dcgmHealthWatchResults_t overallHealth;              //!< The overall health of the system .  \ref dcgmHealthWatchResults_t
+    long long powerViolationTime;       //!< Number of microseconds we were at reduced clocks due to power violation
+    long long thermalViolationTime;     //!< Number of microseconds we were at reduced clocks due to thermal violation
+    long long reliabilityViolationTime; //!< Amount of microseconds we were at reduced clocks
+                                        //!< due to the reliability limit
+    long long boardLimitViolationTime;  //!< Amount of microseconds we were at reduced clocks
+                                        //!< due to being at the board's max voltage
+    long long lowUtilizationTime;       //!< Amount of microseconds we were at reduced clocks due to low utilization
+    long long syncBoostTime;            //!< Amount of microseconds we were at reduced clocks due to sync boost
+    dcgmHealthWatchResults_t overallHealth; //!< The overall health of the system. \ref dcgmHealthWatchResults_t
     unsigned int incidentCount;
-    struct {
-        dcgmHealthSystems_t system;                  //!< system to which this information belongs
-        dcgmHealthWatchResults_t health;             //!< health of the specified system on this GPU
+    struct
+    {
+        dcgmHealthSystems_t system;      //!< system to which this information belongs
+        dcgmHealthWatchResults_t health; //!< health of the specified system on this GPU
     } systems[DCGM_HEALTH_WATCH_COUNT_V1];
 } dcgmGpuUsageInfo_t;
 
@@ -1999,26 +1941,26 @@ typedef struct
  */
 typedef struct
 {
-    unsigned int version;         //!< Version of this message  (dcgmPidInfo_version)
-    int numGpus;                  //!< Number of GPUs that are valid in gpus[]
-    dcgmGpuUsageInfo_t summary;   //!< Summary information for all GPUs listed in gpus[] 
-    dcgmGpuUsageInfo_t gpus[16];  //!< Per-GPU information for this PID
-} dcgmJobInfo_v2;
+    unsigned int version;                          //!< Version of this message  (dcgmPidInfo_version)
+    int numGpus;                                   //!< Number of GPUs that are valid in gpus[]
+    dcgmGpuUsageInfo_t summary;                    //!< Summary information for all GPUs listed in gpus[]
+    dcgmGpuUsageInfo_t gpus[DCGM_MAX_NUM_DEVICES]; //!< Per-GPU information for this PID
+} dcgmJobInfo_v3;
 
 /**
- * Typedef for \ref dcgmJobInfo_v2
+ * Typedef for \ref dcgmJobInfo_v3
  */
-typedef dcgmJobInfo_v2 dcgmJobInfo_t;
+typedef dcgmJobInfo_v3 dcgmJobInfo_t;
 
- /**
- * Version 2 for \ref dcgmJobInfo_v2
+/**
+ * Version 3 for \ref dcgmJobInfo_v3
  */
-#define dcgmJobInfo_version2 MAKE_DCGM_VERSION(dcgmJobInfo_v2, 2)
+#define dcgmJobInfo_version3 MAKE_DCGM_VERSION(dcgmJobInfo_v3, 3)
 
 /**
  * Latest version for \ref dcgmJobInfo_t
  */
-#define dcgmJobInfo_version dcgmJobInfo_version2
+#define dcgmJobInfo_version dcgmJobInfo_version3
 
 
 /**
@@ -2036,7 +1978,7 @@ typedef struct
  */
 typedef dcgmRunningProcess_v1 dcgmRunningProcess_t;
 
- /**
+/**
  * Version 1 for \ref dcgmRunningProcess_v1
  */
 #define dcgmRunningProcess_version1 MAKE_DCGM_VERSION(dcgmRunningProcess_v1, 1)
@@ -2051,10 +1993,10 @@ typedef dcgmRunningProcess_v1 dcgmRunningProcess_t;
  */
 typedef enum
 {
-    DCGM_DIAG_LVL_INVALID   = 0,      //!< Uninitialized
-    DCGM_DIAG_LVL_SHORT     = 10,     //!< run a very basic health check on the system
-    DCGM_DIAG_LVL_MED       = 20,     //!< run a medium-length diagnostic (a few minutes)
-    DCGM_DIAG_LVL_LONG      = 30,     //!< run a extensive diagnostic (several minutes)
+    DCGM_DIAG_LVL_INVALID = 0,  //!< Uninitialized
+    DCGM_DIAG_LVL_SHORT   = 10, //!< run a very basic health check on the system
+    DCGM_DIAG_LVL_MED     = 20, //!< run a medium-length diagnostic (a few minutes)
+    DCGM_DIAG_LVL_LONG    = 30, //!< run a extensive diagnostic (several minutes)
 } dcgmDiagnosticLevel_t;
 
 /**
@@ -2062,25 +2004,25 @@ typedef enum
  */
 typedef enum dcgmDiagResult_enum
 {
-    DCGM_DIAG_RESULT_PASS = 0,            //!< This test passed as diagnostics
-    DCGM_DIAG_RESULT_SKIP = 1,            //!< This test was skipped
-    DCGM_DIAG_RESULT_WARN = 2,            //!< This test passed with warnings
-    DCGM_DIAG_RESULT_FAIL = 3,            //!< This test failed the diagnostics
-    DCGM_DIAG_RESULT_NOT_RUN = 4,         //!< This test wasn't executed
+    DCGM_DIAG_RESULT_PASS    = 0, //!< This test passed as diagnostics
+    DCGM_DIAG_RESULT_SKIP    = 1, //!< This test was skipped
+    DCGM_DIAG_RESULT_WARN    = 2, //!< This test passed with warnings
+    DCGM_DIAG_RESULT_FAIL    = 3, //!< This test failed the diagnostics
+    DCGM_DIAG_RESULT_NOT_RUN = 4, //!< This test wasn't executed
 } dcgmDiagResult_t;
 
 typedef struct
 {
-    dcgmDiagResult_t status;              //!< The result of the test
-    char             warning[1024];       //!< Warning returned from the test, if any
-    char             info[1024];          //!< Information details returned from the test, if any
+    dcgmDiagResult_t status; //!< The result of the test
+    char warning[1024];      //!< Warning returned from the test, if any
+    char info[1024];         //!< Information details returned from the test, if any
 } dcgmDiagTestResult_v1;
 
 typedef struct
 {
-    dcgmDiagResult_t      status;              //!< The result of the test
-    dcgmDiagErrorDetail_t error;               //!< The error message and error code, if any
-    char                  info[1024];          //!< Information details returned from the test, if any
+    dcgmDiagResult_t status;     //!< The result of the test
+    dcgmDiagErrorDetail_t error; //!< The error message and error code, if any
+    char info[1024];             //!< Information details returned from the test, if any
 } dcgmDiagTestResult_v2;
 
 
@@ -2095,7 +2037,7 @@ typedef enum dcgmPerGpuTestIndices_enum
     DCGM_SM_PERF_INDEX          = 3, //!< SM Stress test index
     DCGM_TARGETED_PERF_INDEX    = 4, //!< Targeted Stress test index
     DCGM_TARGETED_POWER_INDEX   = 5, //!< Targeted Power test index
-    DCGM_MEMORY_BANDWIDTH_INDEX = 6  //!< Memory bandwidth test index
+    DCGM_MEMORY_BANDWIDTH_INDEX = 6, //!< Memory bandwidth test index
 } dcgmPerGpuTestIndices_t;
 
 // This test is only run by itself, so it can use the 0 slot
@@ -2109,139 +2051,101 @@ typedef enum dcgmPerGpuTestIndices_enum
  */
 typedef struct
 {
-    unsigned int gpuId;                   //!< ID for the GPU this information pertains
-    unsigned int hwDiagnosticReturn;      //!< Per GPU hardware diagnostic test return code
-    dcgmDiagTestResult_v1 results[DCGM_PER_GPU_TEST_COUNT]; //!< Array with a result for each per-gpu test
-} dcgmDiagResponsePerGpu_v1;
-
-typedef struct
-{
-    unsigned int gpuId;                   //!< ID for the GPU this information pertains
-    unsigned int hwDiagnosticReturn;      //!< Per GPU hardware diagnostic test return code
+    unsigned int gpuId;                                     //!< ID for the GPU this information pertains
+    unsigned int hwDiagnosticReturn;                        //!< Per GPU hardware diagnostic test return code
     dcgmDiagTestResult_v2 results[DCGM_PER_GPU_TEST_COUNT]; //!< Array with a result for each per-gpu test
 } dcgmDiagResponsePerGpu_v2;
 
-/**
- * Global diagnostics result structure
- */
-typedef struct
-{
-    unsigned int version;                                        //!< version number (dcgmDiagResult_version)
-    unsigned int gpuCount;                                       //!< number of valid per GPU results
-
-    dcgmDiagResult_t blacklist;                                  //!< test for presence of blacklisted drivers (e.g. nouveau)
-    dcgmDiagResult_t nvmlLibrary;                                //!< test for presence (and version) of NVML lib
-    dcgmDiagResult_t cudaMainLibrary;                            //!< test for presence (and version) of CUDA lib
-    dcgmDiagResult_t cudaRuntimeLibrary;                         //!< test for presence (and version) of CUDA RT lib
-    dcgmDiagResult_t permissions;                                //!< test for character device permissions
-    dcgmDiagResult_t persistenceMode;                            //!< test for persistence mode enabled
-    dcgmDiagResult_t environment;                                //!< test for CUDA environment vars that may slow tests
-    dcgmDiagResult_t pageRetirement;                             //!< test for pending frame buffer page retirement
-    dcgmDiagResult_t inforom;                                    //!< test for inforom corruption
-    dcgmDiagResult_t graphicsProcesses;                          //!< test for graphics processes running
-    dcgmDiagResponsePerGpu_v1 perGpuResponses[DCGM_MAX_NUM_DEVICES];  //!< per GPU test results
-    char systemError[1024];                                      //!< System-wide error reported from NVVS
-} dcgmDiagResponse_v3;
-
-
 #define DCGM_SWTEST_COUNT     10
 #define LEVEL_ONE_MAX_RESULTS 16
 
 typedef enum dcgmSoftwareTest_enum
 {
-    DCGM_SWTEST_BLACKLIST            = 0, // test for presence of blacklisted drivers (e.g. nouveau)
-    DCGM_SWTEST_NVML_LIBRARY         = 1, // test for presence (and version) of NVML lib
-    DCGM_SWTEST_CUDA_MAIN_LIBRARY    = 2, // test for presence (and version) of CUDA lib
-    DCGM_SWTEST_CUDA_RUNTIME_LIBRARY = 3, // test for presence (and version) of CUDA RT lib
-    DCGM_SWTEST_PERMISSIONS          = 4, // test for character device permissions
-    DCGM_SWTEST_PERSISTENCE_MODE     = 5, // test for persistence mode enabled
-    DCGM_SWTEST_ENVIRONMENT          = 6, // test for CUDA environment vars that may slow tests
-    DCGM_SWTEST_PAGE_RETIREMENT      = 7, // test for pending frame buffer page retirement
-    DCGM_SWTEST_GRAPHICS_PROCESSES   = 8, // test for graphics processes running
-    DCGM_SWTEST_INFOROM              = 9, // test for inforom corruption
+    DCGM_SWTEST_BLACKLIST            = 0, //!< test for presence of blacklisted drivers (e.g. nouveau)
+    DCGM_SWTEST_NVML_LIBRARY         = 1, //!< test for presence (and version) of NVML lib
+    DCGM_SWTEST_CUDA_MAIN_LIBRARY    = 2, //!< test for presence (and version) of CUDA lib
+    DCGM_SWTEST_CUDA_RUNTIME_LIBRARY = 3, //!< test for presence (and version) of CUDA RT lib
+    DCGM_SWTEST_PERMISSIONS          = 4, //!< test for character device permissions
+    DCGM_SWTEST_PERSISTENCE_MODE     = 5, //!< test for persistence mode enabled
+    DCGM_SWTEST_ENVIRONMENT          = 6, //!< test for CUDA environment vars that may slow tests
+    DCGM_SWTEST_PAGE_RETIREMENT      = 7, //!< test for pending frame buffer page retirement
+    DCGM_SWTEST_GRAPHICS_PROCESSES   = 8, //!< test for graphics processes running
+    DCGM_SWTEST_INFOROM              = 9, //!< test for inforom corruption
 } dcgmSoftwareTest_t;
 
 /**
- * Global diagnostics result structure
+ * Global diagnostics result structure v6
+ *
+ * Since DCGM 2.0
  */
 typedef struct
 {
-    unsigned int version;                                             //!< version number (dcgmDiagResult_version)
-    unsigned int gpuCount;                                            //!< number of valid per GPU results
-    unsigned int levelOneTestCount;                                   //!< number of valid levelOne results
-
-    dcgmDiagTestResult_v1     levelOneResults[LEVEL_ONE_MAX_RESULTS]; //!< Basic, system-wide test results.
-    dcgmDiagResponsePerGpu_v1 perGpuResponses[DCGM_MAX_NUM_DEVICES];  //!< per GPU test results
-    char systemError[1024];                                           //!< System-wide error reported from NVVS
-    char trainingMsg[1024];                                           //!< Training Message
-} dcgmDiagResponse_v4;
-
-typedef struct
-{
-    unsigned int version;                                             //!< version number (dcgmDiagResult_version)
-    unsigned int gpuCount;                                            //!< number of valid per GPU results
-    unsigned int levelOneTestCount;                                   //!< number of valid levelOne results
+    unsigned int version;           //!< version number (dcgmDiagResult_version)
+    unsigned int gpuCount;          //!< number of valid per GPU results
+    unsigned int levelOneTestCount; //!< number of valid levelOne results
 
-    dcgmDiagTestResult_v2     levelOneResults[LEVEL_ONE_MAX_RESULTS]; //!< Basic, system-wide test results.
-    dcgmDiagResponsePerGpu_v2 perGpuResponses[DCGM_MAX_NUM_DEVICES];  //!< per GPU test results
-    dcgmDiagErrorDetail_t     systemError;                            //!< System-wide error reported from NVVS
-    char                      trainingMsg[1024];                      //!< Training Message
-} dcgmDiagResponse_v5;
+    dcgmDiagTestResult_v2 levelOneResults[LEVEL_ONE_MAX_RESULTS];    //!< Basic, system-wide test results.
+    dcgmDiagResponsePerGpu_v2 perGpuResponses[DCGM_MAX_NUM_DEVICES]; //!< per GPU test results
+    dcgmDiagErrorDetail_t systemError;                               //!< System-wide error reported from NVVS
+    char trainingMsg[1024];                                          //!< Training Message
+} dcgmDiagResponse_v6;
 
 /**
- * Typedef for \ref dcgmDiagResponse_v4
+ * Typedef for \ref dcgmDiagResponse_v6
  */
-typedef dcgmDiagResponse_v5 dcgmDiagResponse_t;
+typedef dcgmDiagResponse_v6 dcgmDiagResponse_t;
 
 /**
- * Version 3 for \ref dcgmDiagResponse_v3
+ * Version 6 for \ref dcgmDiagResponse_v6
  */
-#define dcgmDiagResponse_version3 MAKE_DCGM_VERSION(dcgmDiagResponse_v3, 3)
-
-/**
- * Version 4 for \ref dcgmDiagResponse_v4
- */
-#define dcgmDiagResponse_version4 MAKE_DCGM_VERSION(dcgmDiagResponse_v4, 4)
-
-/**
- * Version 5 for \ref dcgmDiagResponse_v5
- */
-#define dcgmDiagResponse_version5 MAKE_DCGM_VERSION(dcgmDiagResponse_v5, 5)
+#define dcgmDiagResponse_version6 MAKE_DCGM_VERSION(dcgmDiagResponse_v6, 6)
 
 /**
  * Latest version for \ref dcgmDiagResponse_t
  */
-#define dcgmDiagResponse_version dcgmDiagResponse_version5
+#define dcgmDiagResponse_version dcgmDiagResponse_version6
 
 /**
  * Represents level relationships within a system between two GPUs
- * The enums are spaced to allow for future relationships.  These match
- * the definitions in nvml.h
+ * The enums are spaced to allow for future relationships.
+ * These match the definitions in nvml.h
  */
 typedef enum dcgmGpuLevel_enum
 {
-    // PCI connectivity states
-    DCGM_TOPOLOGY_BOARD              = 0x1,   //!< multi-GPU board
-    DCGM_TOPOLOGY_SINGLE             = 0x2,   //!< all devices that only need traverse a single PCIe switch
-    DCGM_TOPOLOGY_MULTIPLE           = 0x4,   //!< all devices that need not traverse a host bridge
-    DCGM_TOPOLOGY_HOSTBRIDGE         = 0x8,   //!< all devices that are connected to the same host bridge
-    DCGM_TOPOLOGY_CPU                = 0x10,  //!< all devices that are connected to the same CPU but possibly multiple host bridges
-    DCGM_TOPOLOGY_SYSTEM             = 0x20,  //!< all devices in the system
-
-    // NVLINK connectivity states
-    DCGM_TOPOLOGY_NVLINK1            = 0x0100, //!< GPUs connected via a single NVLINK link
-    DCGM_TOPOLOGY_NVLINK2            = 0x0200, //!< GPUs connected via two NVLINK links
-    DCGM_TOPOLOGY_NVLINK3            = 0x0400, //!< GPUs connected via three NVLINK links
-    DCGM_TOPOLOGY_NVLINK4            = 0x0800, //!< GPUs connected via four NVLINK links
-    DCGM_TOPOLOGY_NVLINK5            = 0x1000, //!< GPUs connected via five NVLINK links
-    DCGM_TOPOLOGY_NVLINK6            = 0x2000, //!< GPUs connected via six NVLINK links
+    DCGM_TOPOLOGY_UNINITIALIZED = 0x0,
+
+    /** \name PCI connectivity states */
+    /**@{*/
+    DCGM_TOPOLOGY_BOARD      = 0x1, //!< multi-GPU board
+    DCGM_TOPOLOGY_SINGLE     = 0x2, //!< all devices that only need traverse a single PCIe switch
+    DCGM_TOPOLOGY_MULTIPLE   = 0x4, //!< all devices that need not traverse a host bridge
+    DCGM_TOPOLOGY_HOSTBRIDGE = 0x8, //!< all devices that are connected to the same host bridge
+    DCGM_TOPOLOGY_CPU    = 0x10, //!< all devices that are connected to the same CPU but possibly multiple host bridges
+    DCGM_TOPOLOGY_SYSTEM = 0x20, //!< all devices in the system
+    /**@}*/
+
+    /** \name NVLINK connectivity states */
+    /**@{*/
+    DCGM_TOPOLOGY_NVLINK1  = 0x0100,  //!< GPUs connected via a single NVLINK link
+    DCGM_TOPOLOGY_NVLINK2  = 0x0200,  //!< GPUs connected via two NVLINK links
+    DCGM_TOPOLOGY_NVLINK3  = 0x0400,  //!< GPUs connected via three NVLINK links
+    DCGM_TOPOLOGY_NVLINK4  = 0x0800,  //!< GPUs connected via four NVLINK links
+    DCGM_TOPOLOGY_NVLINK5  = 0x1000,  //!< GPUs connected via five NVLINK links
+    DCGM_TOPOLOGY_NVLINK6  = 0x2000,  //!< GPUs connected via six NVLINK links
+    DCGM_TOPOLOGY_NVLINK7  = 0x4000,  //!< GPUs connected via seven NVLINK links
+    DCGM_TOPOLOGY_NVLINK8  = 0x8000,  //!< GPUs connected via eight NVLINK links
+    DCGM_TOPOLOGY_NVLINK9  = 0x10000, //!< GPUs connected via nine NVLINK links
+    DCGM_TOPOLOGY_NVLINK10 = 0x20000, //!< GPUs connected via ten NVLINK links
+    DCGM_TOPOLOGY_NVLINK11 = 0x40000, //!< GPUs connected via eleven NVLINK links
+    DCGM_TOPOLOGY_NVLINK12 = 0x80000, //!< GPUs connected via twelve NVLINK links
+    /**@}*/
 } dcgmGpuTopologyLevel_t;
 
 // the PCI paths are the lower 8 bits of the path information
-#define DCGM_TOPOLOGY_PATH_PCI(x) (dcgmGpuTopologyLevel_t) ((unsigned int)(x) & 0xFF)
+#define DCGM_TOPOLOGY_PATH_PCI(x) (dcgmGpuTopologyLevel_t)((unsigned int)(x)&0xFF)
 
 // the NVLINK paths are the upper 24 bits of the path information
-#define DCGM_TOPOLOGY_PATH_NVLINK(x) (dcgmGpuTopologyLevel_t) ((unsigned int)(x) & 0xFFFFFF00)
+#define DCGM_TOPOLOGY_PATH_NVLINK(x) (dcgmGpuTopologyLevel_t)((unsigned int)(x)&0xFFFFFF00)
 
 #define DCGM_AFFINITY_BITMASK_ARRAY_SIZE 8
 
@@ -2250,24 +2154,25 @@ typedef enum dcgmGpuLevel_enum
  */
 typedef struct
 {
-    unsigned int version;                                                        //!< version number (dcgmDeviceTopology_version)
+    unsigned int version; //!< version number (dcgmDeviceTopology_version)
 
-    unsigned long cpuAffinityMask[DCGM_AFFINITY_BITMASK_ARRAY_SIZE];            //!< affinity mask for the specified GPU
-                                                                                //!<   a 1 represents affinity to the CPU in that bit position
-                                                                                //!<   supports up to 256 cores
-    unsigned int numGpus;                                                       //!< number of valid entries in gpuPaths
+    unsigned long cpuAffinityMask[DCGM_AFFINITY_BITMASK_ARRAY_SIZE]; //!< affinity mask for the specified GPU
+                                                                     //!< a 1 represents affinity to the CPU in that
+                                                                     //!< bit position supports up to 256 cores
+    unsigned int numGpus;                                            //!< number of valid entries in gpuPaths
 
-    struct {
-        unsigned int gpuId;                                                     //!< gpuId to which the path represents
-        dcgmGpuTopologyLevel_t path;                                            //!< path to the gpuId from this GPU. Note that this is a bitmask
-                                                                                //!<   of DCGM_TOPOLOGY_* values and can contain both PCIe topology
-                                                                                //!<   and NvLink topology where applicable. For instance:
-                                                                                //!<   0x210 = DCGM_TOPOLOGY_CPU | DCGM_TOPOLOGY_NVLINK2
-                                                                                //!<   Use the macros DCGM_TOPOLOGY_PATH_NVLINK and 
-                                                                                //!<   DCGM_TOPOLOGY_PATH_PCI to mask the NvLink and PCI paths, respectively.
-        unsigned int localNvLinkIds;                                            //!< bits representing the local links connected to gpuId
-                                                                                //!< e.g. if this field == 3, links 0 and 1 are connected, 
-                                                                                //!< field is only valid if NVLINKS actually exist between GPUs
+    struct
+    {
+        unsigned int gpuId;          //!< gpuId to which the path represents
+        dcgmGpuTopologyLevel_t path; //!< path to the gpuId from this GPU. Note that this is a bit-mask
+                                     //!< of DCGM_TOPOLOGY_* values and can contain both PCIe topology
+                                     //!< and NvLink topology where applicable. For instance:
+                                     //!< 0x210 = DCGM_TOPOLOGY_CPU | DCGM_TOPOLOGY_NVLINK2
+                                     //!< Use the macros DCGM_TOPOLOGY_PATH_NVLINK and
+                                     //!< DCGM_TOPOLOGY_PATH_PCI to mask the NvLink and PCI paths, respectively.
+        unsigned int localNvLinkIds; //!< bits representing the local links connected to gpuId
+                                     //!< e.g. if this field == 3, links 0 and 1 are connected,
+                                     //!< field is only valid if NVLINKS actually exist between GPUs
     } gpuPaths[DCGM_MAX_NUM_DEVICES - 1];
 } dcgmDeviceTopology_v1;
 
@@ -2291,15 +2196,16 @@ typedef dcgmDeviceTopology_v1 dcgmDeviceTopology_t;
  */
 typedef struct
 {
-    unsigned int version;                                                        //!< version number (dcgmGroupTopology_version)
+    unsigned int version; //!< version number (dcgmGroupTopology_version)
 
-    unsigned long groupCpuAffinityMask[DCGM_AFFINITY_BITMASK_ARRAY_SIZE];        //!< the CPU affinity mask for all GPUs in the group
-                                                                                 //!<   a 1 represents affinity to the CPU in that bit position
-                                                                                 //!<   supports up to 256 cores
-    unsigned int numaOptimalFlag;                                                //!< a zero value indicates that 1 or more GPUs
-                                                                                //!<   in the group have a different CPU affinity and thus
-                                                                                //!<   may not be optimal for certain algorithms
-    dcgmGpuTopologyLevel_t slowestPath;                                            //!< the slowest path amongst GPUs in the group
+    unsigned long
+        groupCpuAffinityMask[DCGM_AFFINITY_BITMASK_ARRAY_SIZE]; //!< the CPU affinity mask for all GPUs in the group
+                                                                //!< a 1 represents affinity to the CPU in that bit
+                                                                //!< position supports up to 256 cores
+    unsigned int numaOptimalFlag;                               //!< a zero value indicates that 1 or more GPUs
+                                                                //!< in the group have a different CPU affinity and thus
+                                                                //!< may not be optimal for certain algorithms
+    dcgmGpuTopologyLevel_t slowestPath;                         //!< the slowest path amongst GPUs in the group
 } dcgmGroupTopology_v1;
 
 /**
@@ -2322,8 +2228,8 @@ typedef dcgmGroupTopology_v1 dcgmGroupTopology_t;
  */
 typedef enum dcgmIntrospectLevel_enum
 {
-    DCGM_INTROSPECT_LVL_INVALID = 0,     //!< Invalid value
-    DCGM_INTROSPECT_LVL_FIELD = 1,       //!< Introspection data is grouped by field ID
+    DCGM_INTROSPECT_LVL_INVALID     = 0, //!< Invalid value
+    DCGM_INTROSPECT_LVL_FIELD       = 1, //!< Introspection data is grouped by field ID
     DCGM_INTROSPECT_LVL_FIELD_GROUP = 2, //!< Introspection data is grouped by field group
     DCGM_INTROSPECT_LVL_ALL_FIELDS,      //!< Introspection data is aggregated for all fields
 } dcgmIntrospectLevel_t;
@@ -2331,20 +2237,21 @@ typedef enum dcgmIntrospectLevel_enum
 /**
  * Identifies the retrieval context for introspection API calls.
  */
-typedef struct {
+typedef struct
+{
     unsigned int version;                //!< version number (dcgmIntrospectContext_version)
-    dcgmIntrospectLevel_t introspectLvl; //!<Introspect Level \ref dcgmIntrospectLevel_t
+    dcgmIntrospectLevel_t introspectLvl; //!< Introspect Level \ref dcgmIntrospectLevel_t
     union
     {
-        dcgmGpuGrp_t fieldGroupId;       //!< Only needed if \ref introspectLvl is DCGM_INTROSPECT_LVL_FIELD_GROUP
-        unsigned short fieldId;          //!< Only needed if \ref introspectLvl is DCGM_INTROSPECT_LVL_FIELD
-        unsigned long long contextId;    //!< Overloaded way to access both fieldGroupId and fieldId
+        dcgmGpuGrp_t fieldGroupId;    //!< Only needed if \ref introspectLvl is DCGM_INTROSPECT_LVL_FIELD_GROUP
+        unsigned short fieldId;       //!< Only needed if \ref introspectLvl is DCGM_INTROSPECT_LVL_FIELD
+        unsigned long long contextId; //!< Overloaded way to access both fieldGroupId and fieldId
     };
 } dcgmIntrospectContext_v1;
 
 /**
-* Typedef for \ref dcgmIntrospectContext_v1
-*/
+ * Typedef for \ref dcgmIntrospectContext_v1
+ */
 typedef dcgmIntrospectContext_v1 dcgmIntrospectContext_t;
 
 /**
@@ -2362,15 +2269,15 @@ typedef dcgmIntrospectContext_v1 dcgmIntrospectContext_t;
  */
 typedef struct
 {
-    unsigned int version;            //!< version number (dcgmIntrospectFieldsExecTime_version)
+    unsigned int version; //!< version number (dcgmIntrospectFieldsExecTime_version)
 
-    long long meanUpdateFreqUsec;    //!< the mean update frequency of all fields
+    long long meanUpdateFreqUsec; //!< the mean update frequency of all fields
 
-    double recentUpdateUsec;         //!< the sum of every field's most recent execution time after they
-                                     //!< have been normalized to \ref meanUpdateFreqUsec".
-                                     //!< This is roughly how long it takes to update fields every \ref meanUpdateFreqUsec
+    double recentUpdateUsec; //!< the sum of every field's most recent execution time after they
+                             //!< have been normalized to \ref meanUpdateFreqUsec".
+                             //!< This is roughly how long it takes to update fields every \ref meanUpdateFreqUsec
 
-    long long totalEverUpdateUsec;   //!< The total amount of time, ever, that has been spent updating all the fields
+    long long totalEverUpdateUsec; //!< The total amount of time, ever, that has been spent updating all the fields
 } dcgmIntrospectFieldsExecTime_v1;
 
 /**
@@ -2390,45 +2297,49 @@ typedef dcgmIntrospectFieldsExecTime_v1 dcgmIntrospectFieldsExecTime_t;
 
 /**
  * Full introspection info for field execution time
+ *
+ * Since DCGM 2.0
  */
-typedef struct {
-    unsigned int version;                 //!< version number (dcgmIntrospectFullFieldsExecTime_version)
+typedef struct
+{
+    unsigned int version; //!< version number (dcgmIntrospectFullFieldsExecTime_version)
 
-    dcgmIntrospectFieldsExecTime_v1 aggregateInfo;  //!< info that includes global and device scope
+    dcgmIntrospectFieldsExecTime_v1 aggregateInfo; //!< info that includes global and device scope
 
     int hasGlobalInfo;                          //!< 0 means \ref globalInfo is populated, !0 means it's not
     dcgmIntrospectFieldsExecTime_v1 globalInfo; //!< info that only includes global field scope
 
-    unsigned short gpuInfoCount;                            //!< count of how many entries in \ref gpuInfo are populated
-    unsigned int gpuIdsForGpuInfo[DCGM_MAX_NUM_DEVICES];    //!< the GPU ID at a given index identifies which gpu
-                                                            //!< the corresponding entry in \ref gpuInfo is from
+    unsigned short gpuInfoCount;                         //!< count of how many entries in \ref gpuInfo are populated
+    unsigned int gpuIdsForGpuInfo[DCGM_MAX_NUM_DEVICES]; //!< the GPU ID at a given index identifies which gpu
+                                                         //!< the corresponding entry in \ref gpuInfo is from
 
-    dcgmIntrospectFieldsExecTime_v1 gpuInfo[DCGM_MAX_NUM_DEVICES];    //!< info that is separated by the
-                                                                      //!< GPU ID that the watches were for
-} dcgmIntrospectFullFieldsExecTime_v1;
+    dcgmIntrospectFieldsExecTime_v1 gpuInfo[DCGM_MAX_NUM_DEVICES]; //!< info that is separated by the
+                                                                   //!< GPU ID that the watches were for
+} dcgmIntrospectFullFieldsExecTime_v2;
 
 /**
-* typedef for \ref dcgmIntrospectFullFieldsExecTime_v1
-*/
-typedef dcgmIntrospectFullFieldsExecTime_v1 dcgmIntrospectFullFieldsExecTime_t;
+ * typedef for \ref dcgmIntrospectFullFieldsExecTime_v1
+ */
+typedef dcgmIntrospectFullFieldsExecTime_v2 dcgmIntrospectFullFieldsExecTime_t;
 
 /**
  * Version 1 for \ref dcgmIntrospectFullFieldsExecTime_t
  */
-#define dcgmIntrospectFullFieldsExecTime_version1 MAKE_DCGM_VERSION(dcgmIntrospectFullFieldsExecTime_v1, 1)
+#define dcgmIntrospectFullFieldsExecTime_version2 MAKE_DCGM_VERSION(dcgmIntrospectFullFieldsExecTime_v2, 2)
 
 /**
  * Latest version for \ref dcgmIntrospectFullFieldsExecTime_t
  */
-#define dcgmIntrospectFullFieldsExecTime_version dcgmIntrospectFullFieldsExecTime_version1
+#define dcgmIntrospectFullFieldsExecTime_version dcgmIntrospectFullFieldsExecTime_version2
 
 /**
  * State of DCGM metadata gathering.  If it is set to DISABLED then "Metadata" API
  * calls to DCGM are not supported.
  */
-typedef enum dcgmIntrospectState_enum {
+typedef enum dcgmIntrospectState_enum
+{
     DCGM_INTROSPECT_STATE_DISABLED = 0,
-    DCGM_INTROSPECT_STATE_ENABLED = 1
+    DCGM_INTROSPECT_STATE_ENABLED  = 1
 } dcgmIntrospectState_t;
 
 /**
@@ -2436,8 +2347,8 @@ typedef enum dcgmIntrospectState_enum {
  */
 typedef struct
 {
-    unsigned int version;     //!< version number (dcgmIntrospectMemory_version)
-    long long bytesUsed;      //!< number of bytes
+    unsigned int version; //!< version number (dcgmIntrospectMemory_version)
+    long long bytesUsed;  //!< number of bytes
 } dcgmIntrospectMemory_v1;
 
 /**
@@ -2459,25 +2370,26 @@ typedef dcgmIntrospectMemory_v1 dcgmIntrospectMemory_t;
 /**
  * Full introspection info for field memory
  */
-typedef struct {
-    unsigned int version;                 //!< version number (dcgmIntrospectFullMemory_version)
+typedef struct
+{
+    unsigned int version; //!< version number (dcgmIntrospectFullMemory_version)
 
-    dcgmIntrospectMemory_v1 aggregateInfo;//!< info that includes global and device scope
+    dcgmIntrospectMemory_v1 aggregateInfo; //!< info that includes global and device scope
 
-    int hasGlobalInfo;                    //!< 0 means \ref globalInfo is populated, !0 means it's not
-    dcgmIntrospectMemory_v1 globalInfo;   //!< info that only includes global field scope
+    int hasGlobalInfo;                  //!< 0 means \ref globalInfo is populated, !0 means it's not
+    dcgmIntrospectMemory_v1 globalInfo; //!< info that only includes global field scope
 
-    unsigned short gpuInfoCount;          //!< count of how many entries in \ref gpuInfo are populated
-    unsigned int gpuIdsForGpuInfo[DCGM_MAX_NUM_DEVICES];  //!< the GPU ID at a given index identifies which gpu
-                                                          //!< the corresponding entry in \ref gpuInfo is from
+    unsigned short gpuInfoCount;                         //!< count of how many entries in \ref gpuInfo are populated
+    unsigned int gpuIdsForGpuInfo[DCGM_MAX_NUM_DEVICES]; //!< the GPU ID at a given index identifies which gpu
+                                                         //!< the corresponding entry in \ref gpuInfo is from
 
-    dcgmIntrospectMemory_v1 gpuInfo[DCGM_MAX_NUM_DEVICES];  //!< info that is divided by the
-                                                            //!< GPU ID that the watches were for
+    dcgmIntrospectMemory_v1 gpuInfo[DCGM_MAX_NUM_DEVICES]; //!< info that is divided by the
+                                                           //!< GPU ID that the watches were for
 } dcgmIntrospectFullMemory_v1;
 
 /**
-* typedef for \ref dcgmIntrospectFullMemory_v1
-*/
+ * typedef for \ref dcgmIntrospectFullMemory_v1
+ */
 typedef dcgmIntrospectFullMemory_v1 dcgmIntrospectFullMemory_t;
 
 /**
@@ -2516,172 +2428,127 @@ typedef dcgmIntrospectCpuUtil_v1 dcgmIntrospectCpuUtil_t;
  */
 #define dcgmIntrospectCpuUtil_version dcgmIntrospectCpuUtil_version1
 
-#define DCGM_MAX_CONFIG_FILE_LEN  10000
-#define DCGM_MAX_TEST_NAMES     20
-#define DCGM_MAX_TEST_NAMES_LEN 50
-#define DCGM_MAX_TEST_PARMS     100
-#define DCGM_MAX_TEST_PARMS_LEN 100
-#define DCGM_GPU_LIST_LEN       50
-#define DCGM_FILE_LEN           30
-#define DCGM_PATH_LEN           128
-#define DCGM_THROTTLE_MASK_LEN 50
-
-// Flags options for running the GPU diagnostic
-#define DCGM_RUN_FLAGS_VERBOSE     0x0001 // Output in verbose mode; include information as well as warnings
-#define DCGM_RUN_FLAGS_STATSONFAIL 0x0002 // Output stats only on failure
-#define DCGM_RUN_FLAGS_TRAIN       0x0004 // Train DCGM diagnostic and output a configuration file with golden values
-#define DCGM_RUN_FLAGS_FORCE_TRAIN 0x0008 // Ignore warnings against training the diagnostic and train anyway
-#define DCGM_RUN_FLAGS_FAIL_EARLY  0x0010 // Enable fail early checks for the Targeted Stress, Targeted Power, SM Stress, and Diagnostic tests
-
-typedef struct
-{
-    unsigned int  version; //! < version of this message
-    unsigned int  flags; //! < flags specifying binary options for running it. See DCGM_RUN_FLAGS_*
-    unsigned int  debugLevel; //! < 0-5 for the debug level the GPU diagnostic will use for logging.
-    dcgmGpuGrp_t  groupId; //! < group of GPUs to verify. Cannot be specified together with gpuList.
-    dcgmPolicyValidation_t validate; //! < 0-3 for which tests to run. Optional.
-    char          testNames[DCGM_MAX_TEST_NAMES][DCGM_MAX_TEST_NAMES_LEN]; //! < Specifed list of test names. Optional.
-    char          testParms[DCGM_MAX_TEST_PARMS][DCGM_MAX_TEST_PARMS_LEN]; //! < Parameters to set for specified tests in the format: testName.parameterName=parameterValue. Optional.
-    char          gpuList[DCGM_GPU_LIST_LEN]; //! < Comma-separated list of gpus. Cannot be specified with the groupId.
-    char          debugLogFile[DCGM_FILE_LEN]; //! < Alternate name for the debug log file that should be used
-    char          statsPath[DCGM_PATH_LEN]; //! < Path that the plugin's statistics files should be written to
-} dcgmRunDiag_v1;
-
-typedef struct
-{
-    unsigned int  version; //! < version of this message
-    unsigned int  flags; //! < flags specifying binary options for running it. See DCGM_RUN_FLAGS_*
-    unsigned int  debugLevel; //! < 0-5 for the debug level the GPU diagnostic will use for logging.
-    dcgmGpuGrp_t  groupId; //! < group of GPUs to verify. Cannot be specified together with gpuList.
-    dcgmPolicyValidation_t validate; //! < 0-3 for which tests to run. Optional.
-    char          testNames[DCGM_MAX_TEST_NAMES][DCGM_MAX_TEST_NAMES_LEN]; //! < Specifed list of test names. Optional.
-    char          testParms[DCGM_MAX_TEST_PARMS][DCGM_MAX_TEST_PARMS_LEN]; //! < Parameters to set for specified tests in the format: testName.parameterName=parameterValue. Optional.
-    char          gpuList[DCGM_GPU_LIST_LEN]; //! < Comma-separated list of gpus. Cannot be specified with the groupId.
-    char          debugLogFile[DCGM_FILE_LEN]; //! < Alternate name for the debug log file that should be used
-    char          statsPath[DCGM_PATH_LEN]; //! < Path that the plugin's statistics files should be written to
-    char          configFileContents[DCGM_MAX_CONFIG_FILE_LEN]; //! < Contents of nvvs config file (likely yaml)
-} dcgmRunDiag_v2;
-
-typedef struct
-{
-    unsigned int  version; //! < version of this message
-    unsigned int  flags; //! < flags specifying binary options for running it. See DCGM_RUN_FLAGS_*
-    unsigned int  debugLevel; //! < 0-5 for the debug level the GPU diagnostic will use for logging.
-    dcgmGpuGrp_t  groupId; //! < group of GPUs to verify. Cannot be specified together with gpuList.
-    dcgmPolicyValidation_t validate; //! < 0-3 for which tests to run. Optional.
-    char          testNames[DCGM_MAX_TEST_NAMES][DCGM_MAX_TEST_NAMES_LEN]; //! < Specifed list of test names. Optional.
-    char          testParms[DCGM_MAX_TEST_PARMS][DCGM_MAX_TEST_PARMS_LEN]; //! < Parameters to set for specified tests in the format: testName.parameterName=parameterValue. Optional.
-    char          gpuList[DCGM_GPU_LIST_LEN]; //! < Comma-separated list of gpus. Cannot be specified with the groupId.
-    char          debugLogFile[DCGM_FILE_LEN]; //! < Alternate name for the debug log file that should be used
-    char          statsPath[DCGM_PATH_LEN]; //! < Path that the plugin's statistics files should be written to
-    char          configFileContents[DCGM_MAX_CONFIG_FILE_LEN]; //! < Contents of nvvs config file (likely yaml)
-    char          throttleMask[DCGM_THROTTLE_MASK_LEN]; //! < Throttle reasons to ignore as either integer mask or csv list of reasons
-} dcgmRunDiag_v3;
-
-typedef struct
-{
-    unsigned int  version; //! < version of this message
-    unsigned int  flags; //! < flags specifying binary options for running it. See DCGM_RUN_FLAGS_*
-    unsigned int  debugLevel; //! < 0-5 for the debug level the GPU diagnostic will use for logging.
-    dcgmGpuGrp_t  groupId; //! < group of GPUs to verify. Cannot be specified together with gpuList.
-    dcgmPolicyValidation_t validate; //! < 0-3 for which tests to run. Optional.
-    char          testNames[DCGM_MAX_TEST_NAMES][DCGM_MAX_TEST_NAMES_LEN]; //! < Specifed list of test names. Optional.
-    char          testParms[DCGM_MAX_TEST_PARMS][DCGM_MAX_TEST_PARMS_LEN]; //! < Parameters to set for specified tests in the format: testName.parameterName=parameterValue. Optional.
-    char          gpuList[DCGM_GPU_LIST_LEN]; //! < Comma-separated list of gpus. Cannot be specified with the groupId.
-    char          debugLogFile[DCGM_FILE_LEN]; //! < Alternate name for the debug log file that should be used
-    char          statsPath[DCGM_PATH_LEN]; //! < Path that the plugin's statistics files should be written to
-    char          configFileContents[DCGM_MAX_CONFIG_FILE_LEN]; //! < Contents of nvvs config file (likely yaml)
-    char          throttleMask[DCGM_THROTTLE_MASK_LEN]; //! < Throttle reasons to ignore as either integer mask or csv list of reasons
-    char          pluginPath[DCGM_PATH_LEN]; //! < Custom path to the diagnostic plugins
-    unsigned int  trainingIterations; //! < Number of iterations for training 
-    unsigned int  trainingVariance; //! < Acceptable training variance as a percentage of the value. (0-100)
-    unsigned int  trainingTolerance; //! < Acceptable training tolerance as a percentage of the value. (0-100)
-    char          goldenValuesFile[DCGM_PATH_LEN]; //! < The path where the golden values should be recorded
-} dcgmRunDiag_v4;
+#define DCGM_MAX_CONFIG_FILE_LEN 10000
+#define DCGM_MAX_TEST_NAMES      20
+#define DCGM_MAX_TEST_NAMES_LEN  50
+#define DCGM_MAX_TEST_PARMS      100
+#define DCGM_MAX_TEST_PARMS_LEN  100
+#define DCGM_GPU_LIST_LEN        50
+#define DCGM_FILE_LEN            30
+#define DCGM_PATH_LEN            128
+#define DCGM_THROTTLE_MASK_LEN   50
 
-typedef struct
-{
-    unsigned int  version; //! < version of this message
-    unsigned int  flags; //! < flags specifying binary options for running it. See DCGM_RUN_FLAGS_*
-    unsigned int  debugLevel; //! < 0-5 for the debug level the GPU diagnostic will use for logging.
-    dcgmGpuGrp_t  groupId; //! < group of GPUs to verify. Cannot be specified together with gpuList.
-    dcgmPolicyValidation_t validate; //! < 0-3 for which tests to run. Optional.
-    char          testNames[DCGM_MAX_TEST_NAMES][DCGM_MAX_TEST_NAMES_LEN]; //! < Specifed list of test names. Optional.
-    char          testParms[DCGM_MAX_TEST_PARMS][DCGM_MAX_TEST_PARMS_LEN]; //! < Parameters to set for specified tests in the format: testName.parameterName=parameterValue. Optional.
-    char          gpuList[DCGM_GPU_LIST_LEN]; //! < Comma-separated list of gpus. Cannot be specified with the groupId.
-    char          debugLogFile[DCGM_PATH_LEN]; //! < Alternate name for the debug log file that should be used
-    char          statsPath[DCGM_PATH_LEN]; //! < Path that the plugin's statistics files should be written to
-    char          configFileContents[DCGM_MAX_CONFIG_FILE_LEN]; //! < Contents of nvvs config file (likely yaml)
-    char          throttleMask[DCGM_THROTTLE_MASK_LEN]; //! < Throttle reasons to ignore as either integer mask or csv list of reasons
-    char          pluginPath[DCGM_PATH_LEN]; //! < Custom path to the diagnostic plugins
-    unsigned int  trainingIterations; //! < Number of iterations for training 
-    unsigned int  trainingVariance; //! < Acceptable training variance as a percentage of the value. (0-100)
-    unsigned int  trainingTolerance; //! < Acceptable training tolerance as a percentage of the value. (0-100)
-    char          goldenValuesFile[DCGM_PATH_LEN]; //! < The path where the golden values should be recorded
-    unsigned int  failCheckInterval; //! < How often the fail early checks should occur when enabled.
-} dcgmRunDiag_v5;
+/**
+ * Flags options for running the GPU diagnostic
+ * @{
+ *
+ */
 
 /**
- * Typedef for \ref dcgmRunDiag_t
+ * Output in verbose mode; include information as well as warnings
  */
-typedef dcgmRunDiag_v5 dcgmRunDiag_t;
+#define DCGM_RUN_FLAGS_VERBOSE 0x0001
 
 /**
- * Version 1 for \ref dcgmRunDiag_t
+ * Output stats only on failure
  */
-#define dcgmRunDiag_version1 MAKE_DCGM_VERSION(dcgmRunDiag_v1, 1)
+#define DCGM_RUN_FLAGS_STATSONFAIL 0x0002
 
 /**
- * Version 2 for \ref dcgmRunDiag_t
+ * Train DCGM diagnostic and output a configuration file with golden values
  */
-#define dcgmRunDiag_version2 MAKE_DCGM_VERSION(dcgmRunDiag_v2, 2)
+#define DCGM_RUN_FLAGS_TRAIN 0x0004
 
 /**
- * Version 3 for \ref dcgmRunDiag_t
+ * Ignore warnings against training the diagnostic and train anyway
  */
-#define dcgmRunDiag_version3 MAKE_DCGM_VERSION(dcgmRunDiag_v3, 3)
+#define DCGM_RUN_FLAGS_FORCE_TRAIN 0x0008
 
 /**
- * Version 4 for \ref dcgmRunDiag_t
+ * Enable fail early checks for the Targeted Stress, Targeted Power, SM Stress, and Diagnostic tests
  */
-#define dcgmRunDiag_version4 MAKE_DCGM_VERSION(dcgmRunDiag_v4, 4)
+#define DCGM_RUN_FLAGS_FAIL_EARLY 0x0010
 
 /**
- * Version 5 for \ref dcgmRunDiag_t
+ * @}
  */
-#define dcgmRunDiag_version5 MAKE_DCGM_VERSION(dcgmRunDiag_v5, 5)
+
+/*
+ * Run diagnostic structure v6
+ *
+ * Added in DCGM 2.0.0. This Is identical to dcgmRunDiag_v5 added in 1.7.0.
+ * The version number but was bumped to match dcgmDiagResponse_v6
+ */
+typedef struct
+{
+    unsigned int version;            //!< version of this message
+    unsigned int flags;              //!< flags specifying binary options for running it. See DCGM_RUN_FLAGS_*
+    unsigned int debugLevel;         //!< 0-5 for the debug level the GPU diagnostic will use for logging.
+    dcgmGpuGrp_t groupId;            //!< group of GPUs to verify. Cannot be specified together with gpuList.
+    dcgmPolicyValidation_t validate; //!< 0-3 for which tests to run. Optional.
+    char testNames[DCGM_MAX_TEST_NAMES][DCGM_MAX_TEST_NAMES_LEN]; //!< Specified list of test names. Optional.
+    char testParms[DCGM_MAX_TEST_PARMS][DCGM_MAX_TEST_PARMS_LEN]; //!< Parameters to set for specified tests
+                                                                  //!< in the format:
+                                                                  //!< testName.parameterName=parameterValue. Optional.
+    char gpuList[DCGM_GPU_LIST_LEN];  //!< Comma-separated list of GPUs. Cannot be specified with the groupId.
+    char debugLogFile[DCGM_PATH_LEN]; //!< Alternate name for the debug log file that should be used
+    char statsPath[DCGM_PATH_LEN];    //!< Path that the plugin's statistics files should be written to
+    char configFileContents[DCGM_MAX_CONFIG_FILE_LEN]; //!< Contents of nvvs config file (likely yaml)
+    char throttleMask[DCGM_THROTTLE_MASK_LEN]; //!< Throttle reasons to ignore as either integer mask or csv list of
+                                               //!< reasons
+    char pluginPath[DCGM_PATH_LEN];            //!< Custom path to the diagnostic plugins
+    unsigned int trainingIterations;           //!< Number of iterations for training
+    unsigned int trainingVariance;             //!< Acceptable training variance as a percentage of the value. (0-100)
+    unsigned int trainingTolerance;            //!< Acceptable training tolerance as a percentage of the value. (0-100)
+    char goldenValuesFile[DCGM_PATH_LEN];      //!< The path where the golden values should be recorded
+    unsigned int failCheckInterval;            //!< How often the fail early checks should occur when enabled.
+} dcgmRunDiag_v6;
 
 /**
- * Latest version for \ref dcgmRunDiag_t
+ * Version 6 for \ref dcgmRunDiag_t
  */
-#define dcgmRunDiag_version dcgmRunDiag_version5
+#define dcgmRunDiag_version6 MAKE_DCGM_VERSION(dcgmRunDiag_v6, 6)
 
 /**
  * Flags for dcgmGetEntityGroupEntities's flags parameter
+ *
+ * Only return entities that are supported by DCGM.
+ * This mimics the behavior of dcgmGetAllSupportedDevices().
  */
-#define DCGM_GEGE_FLAG_ONLY_SUPPORTED 0x00000001 //!< Only return entities that are supported by DCGM. 
-                                                 //!< This mimics the behavior of dcgmGetAllSupportedDevices().
+#define DCGM_GEGE_FLAG_ONLY_SUPPORTED 0x00000001
 
 /**
  * Identifies a GPU NVLink error type returned by DCGM_FI_DEV_GPU_NVLINK_ERRORS
  */
 typedef enum dcgmGpuNVLinkErrorType_enum
 {
-    DCGM_GPU_NVLINK_ERROR_RECOVERY_REQUIRED = 1,  //!< NVLink link recovery error occurred
-    DCGM_GPU_NVLINK_ERROR_FATAL,         //!< NVLink link fatal error occurred
+    DCGM_GPU_NVLINK_ERROR_RECOVERY_REQUIRED = 1, //!< NVLink link recovery error occurred
+    DCGM_GPU_NVLINK_ERROR_FATAL,                 //!< NVLink link fatal error occurred
 } dcgmGpuNVLinkErrorType_t;
 
-/* Topology hints for dcgmSelectGpusByTopology() */
-#define DCGM_TOPO_HINT_F_NONE         0x00000000 /* No hints specified */
-#define DCGM_TOPO_HINT_F_IGNOREHEALTH 0x00000001 /* Ignore the health of the GPUs when picking GPUs for job 
-                                                    exection. By default, only healthy GPUs are considered. */
+/** Topology hints for dcgmSelectGpusByTopology()
+ * @{
+ */
+
+/** No hints specified */
+#define DCGM_TOPO_HINT_F_NONE 0x00000000
+
+/** Ignore the health of the GPUs when picking GPUs for job
+ * execution. By default, only healthy GPUs are considered.
+ */
+#define DCGM_TOPO_HINT_F_IGNOREHEALTH 0x00000001
+
+/**
+ * @}
+ */
+
 
 typedef struct
 {
-    unsigned int version; //! < version of this message
-    uint64_t     inputGpuIds; //! < bitmask of the GPU ids to choose from
-    uint32_t     numGpus; //! < the number of gpus that DCGM should choose
-    uint64_t     hintFlags; //! < Hints to ignore certain factors for the scheduling hint
+    unsigned int version; //!< version of this message
+    uint64_t inputGpuIds; //!< bit-mask of the GPU ids to choose from
+    uint32_t numGpus;     //!< the number of GPUs that DCGM should choose
+    uint64_t hintFlags;   //!< Hints to ignore certain factors for the scheduling hint
 } dcgmTopoSchedHint_v1;
 
 typedef dcgmTopoSchedHint_v1 dcgmTopoSchedHint_t;
@@ -2694,26 +2561,33 @@ typedef dcgmTopoSchedHint_v1 dcgmTopoSchedHint_t;
 typedef enum dcgmNvLinkLinkState_enum
 {
     DcgmNvLinkLinkStateNotSupported = 0, //!< NvLink is unsupported by this GPU (Default for GPUs)
-    DcgmNvLinkLinkStateDisabled     = 1, //!< NvLink is supported for this link but this link is disabled (Default for NvSwitches)
-    DcgmNvLinkLinkStateDown         = 2, //!< This NvLink link is down (inactive)
-    DcgmNvLinkLinkStateUp           = 3  //!< This NvLink link is up (active)
+    DcgmNvLinkLinkStateDisabled     = 1, //!< NvLink is supported for this link but this link is disabled
+                                         //!< (Default for NvSwitches)
+    DcgmNvLinkLinkStateDown = 2,         //!< This NvLink link is down (inactive)
+    DcgmNvLinkLinkStateUp   = 3          //!< This NvLink link is up (active)
 } dcgmNvLinkLinkState_t;
 
-/** 
- * State of NvLink links for a GPU 
+/**
+ * State of NvLink links for a GPU
  */
 typedef struct
 {
-    dcgm_field_eid_t entityId; //!< Entity ID of the GPU (gpuId)
+    dcgm_field_eid_t entityId;                                              //!< Entity ID of the GPU (gpuId)
+    dcgmNvLinkLinkState_t linkState[DCGM_NVLINK_MAX_LINKS_PER_GPU_LEGACY1]; //!< Per-GPU link states
+} dcgmNvLinkGpuLinkStatus_v1;
+
+typedef struct
+{
+    dcgm_field_eid_t entityId;                                      //!< Entity ID of the GPU (gpuId)
     dcgmNvLinkLinkState_t linkState[DCGM_NVLINK_MAX_LINKS_PER_GPU]; //!< Per-GPU link states
-} dcgmNvLinkGpuLinkStatus_t;
+} dcgmNvLinkGpuLinkStatus_v2;
 
-/** 
- * State of NvLink links for a NvSwitch 
+/**
+ * State of NvLink links for a NvSwitch
  */
 typedef struct
 {
-    dcgm_field_eid_t entityId; //!< Entity ID of the NvSwitch (physicalId)
+    dcgm_field_eid_t entityId;                                           //!< Entity ID of the NvSwitch (physicalId)
     dcgmNvLinkLinkState_t linkState[DCGM_NVLINK_MAX_LINKS_PER_NVSWITCH]; //!< Per-NvSwitch link states
 } dcgmNvLinkNvSwitchLinkStatus_t;
 
@@ -2722,10 +2596,10 @@ typedef struct
  */
 typedef struct
 {
-    unsigned int version;       //!< Version of this request. Should be dcgmNvLinkStatus_version1
-    unsigned int numGpus;       //!< Number of entries in gpus[] that are populated
-    dcgmNvLinkGpuLinkStatus_t gpus[DCGM_MAX_NUM_DEVICES]; //!< Per-GPU NvLink link statuses
-    unsigned int numNvSwitches; //!< Number of entries in nvSwitches[] that are populated
+    unsigned int version; //!< Version of this request. Should be dcgmNvLinkStatus_version1
+    unsigned int numGpus; //!< Number of entries in gpus[] that are populated
+    dcgmNvLinkGpuLinkStatus_v1 gpus[DCGM_MAX_NUM_DEVICES]; //!< Per-GPU NvLink link statuses
+    unsigned int numNvSwitches;                            //!< Number of entries in nvSwitches[] that are populated
     dcgmNvLinkNvSwitchLinkStatus_t nvSwitches[DCGM_MAX_NUM_SWITCHES]; //!< Per-NvSwitch link statuses
 } dcgmNvLinkStatus_v1;
 
@@ -2734,44 +2608,60 @@ typedef struct
  */
 #define dcgmNvLinkStatus_version1 MAKE_DCGM_VERSION(dcgmNvLinkStatus_v1, 1)
 
+typedef struct
+{
+    unsigned int version; //!< Version of this request. Should be dcgmNvLinkStatus_version1
+    unsigned int numGpus; //!< Number of entries in gpus[] that are populated
+    dcgmNvLinkGpuLinkStatus_v2 gpus[DCGM_MAX_NUM_DEVICES]; //!< Per-GPU NvLink link statuses
+    unsigned int numNvSwitches;                            //!< Number of entries in nvSwitches[] that are populated
+    dcgmNvLinkNvSwitchLinkStatus_t nvSwitches[DCGM_MAX_NUM_SWITCHES]; //!< Per-NvSwitch link statuses
+} dcgmNvLinkStatus_v2;
+
+typedef dcgmNvLinkStatus_v2 dcgmNvLinkStatus_t;
+
+/**
+ * Version 2 of dcgmNvLinkStatus
+ */
+#define dcgmNvLinkStatus_version2 MAKE_DCGM_VERSION(dcgmNvLinkStatus_v2, 2)
+
 /* Bitmask values for dcgmGetFieldIdSummary - Sync with DcgmcmSummaryType_t */
-#define DCGM_SUMMARY_MIN          0x00000001
-#define DCGM_SUMMARY_MAX          0x00000002
-#define DCGM_SUMMARY_AVG          0x00000004
-#define DCGM_SUMMARY_SUM          0x00000008
-#define DCGM_SUMMARY_COUNT        0x00000010
-#define DCGM_SUMMARY_INTEGRAL     0x00000020
-#define DCGM_SUMMARY_DIFF         0x00000040
-#define DCGM_SUMMARY_SIZE         7
+#define DCGM_SUMMARY_MIN      0x00000001
+#define DCGM_SUMMARY_MAX      0x00000002
+#define DCGM_SUMMARY_AVG      0x00000004
+#define DCGM_SUMMARY_SUM      0x00000008
+#define DCGM_SUMMARY_COUNT    0x00000010
+#define DCGM_SUMMARY_INTEGRAL 0x00000020
+#define DCGM_SUMMARY_DIFF     0x00000040
+#define DCGM_SUMMARY_SIZE     7
 
 /* dcgmSummaryResponse_t is part of dcgmFieldSummaryRequest, so it uses dcgmFieldSummaryRequest's version. */
 
 typedef struct
 {
-    unsigned int fieldType;    //! < type of field that is summarized (int64 or fp64)
-    unsigned int summaryCount; //! < the number of populated summaries in \ref values
+    unsigned int fieldType;    //!< type of field that is summarized (int64 or fp64)
+    unsigned int summaryCount; //!< the number of populated summaries in \ref values
     union
     {
         int64_t i64;
-        double  fp64;
-    } values[DCGM_SUMMARY_SIZE]; //! < array for storing the values of each summary. The summaries are stored
-                                 //! < in order. For example, if MIN AND MAX are requested, then 0 will be MIN
-                                 //! < and 1 will be MAX. If AVG and DIFF were requested, then AVG would be 0
-                                 //! < and 1 would be DIFF
+        double fp64;
+    } values[DCGM_SUMMARY_SIZE]; //!< array for storing the values of each summary. The summaries are stored
+                                 //!< in order. For example, if MIN AND MAX are requested, then 0 will be MIN
+                                 //!< and 1 will be MAX. If AVG and DIFF were requested, then AVG would be 0
+                                 //!< and 1 would be DIFF
 } dcgmSummaryResponse_t;
 
 typedef struct
 {
-    unsigned int              version;         //! < version of this message - dcgmFieldSummaryRequest_v1
-    unsigned short            fieldId;         //! < field id to be summarized
-    dcgm_field_entity_group_t entityGroupId;    //! < the type of entity whose field we're getting
-    dcgm_field_eid_t          entityId;        //! < ordinal id for this entity
-    uint32_t                  summaryTypeMask; //! < bitmask of DCGM_SUMMARY_*, the requested summaries
-    uint64_t                  startTime;       //! < start time for the interval being summarized. 0 means to use
-                                               //! < any data before.
-    uint64_t                  endTime;         //! < end time for the interval being summarized. 0 means to use
-                                               //! < any data after.
-    dcgmSummaryResponse_t     response;        //! < response data for this request
+    unsigned int version;                    //!< version of this message - dcgmFieldSummaryRequest_v1
+    unsigned short fieldId;                  //!< field id to be summarized
+    dcgm_field_entity_group_t entityGroupId; //!< the type of entity whose field we're getting
+    dcgm_field_eid_t entityId;               //!< ordinal id for this entity
+    uint32_t summaryTypeMask;                //!< bit-mask of DCGM_SUMMARY_*, the requested summaries
+    uint64_t startTime;                      //!< start time for the interval being summarized. 0 means to use
+                                             //!< any data before.
+    uint64_t endTime;                        //!< end time for the interval being summarized. 0 means to use
+                                             //!< any data after.
+    dcgmSummaryResponse_t response;          //!< response data for this request
 } dcgmFieldSummaryRequest_v1;
 
 typedef dcgmFieldSummaryRequest_v1 dcgmFieldSummaryRequest_t;
@@ -2783,29 +2673,30 @@ typedef dcgmFieldSummaryRequest_v1 dcgmFieldSummaryRequest_t;
  */
 typedef enum
 {
-    DcgmModuleIdCore           = 0, //!< Core DCGM - always loaded
-    DcgmModuleIdNvSwitch       = 1, //!< NvSwitch Module
-    DcgmModuleIdVGPU           = 2, //!< VGPU Module
-    DcgmModuleIdIntrospect     = 3, //!< Introspection Module
-    DcgmModuleIdHealth         = 4, //!< Health Module
-    DcgmModuleIdPolicy         = 5, //!< Policy Module
-    DcgmModuleIdConfig         = 6, //!< Config Module
-    DcgmModuleIdDiag           = 7, //!< GPU Diagnostic Module
-    DcgmModuleIdProfiling      = 8, //!< Profiling Module
-    
-    DcgmModuleIdCount               //!< Always last. 1 greater than largest value above
+    DcgmModuleIdCore       = 0, //!< Core DCGM - always loaded
+    DcgmModuleIdNvSwitch   = 1, //!< NvSwitch Module
+    DcgmModuleIdVGPU       = 2, //!< VGPU Module
+    DcgmModuleIdIntrospect = 3, //!< Introspection Module
+    DcgmModuleIdHealth     = 4, //!< Health Module
+    DcgmModuleIdPolicy     = 5, //!< Policy Module
+    DcgmModuleIdConfig     = 6, //!< Config Module
+    DcgmModuleIdDiag       = 7, //!< GPU Diagnostic Module
+    DcgmModuleIdProfiling  = 8, //!< Profiling Module
+
+    DcgmModuleIdCount //!< Always last. 1 greater than largest value above
 } dcgmModuleId_t;
 
 /**
  * Module Status. Modules are lazy loaded, so they will be in status DcgmModuleStatusNotLoaded
- * until they are used. One modules are used, they will move to another status. 
+ * until they are used. One modules are used, they will move to another status.
  */
 typedef enum
 {
-    DcgmModuleStatusNotLoaded = 0,   //!< Module has not been loaded yet
+    DcgmModuleStatusNotLoaded   = 0, //!< Module has not been loaded yet
     DcgmModuleStatusBlacklisted = 1, //!< Module has been blacklisted from being loaded
-    DcgmModuleStatusFailed = 2,      //!< Loading the module failed
-    DcgmModuleStatusLoaded = 3,      //!< Module has been loaded
+    DcgmModuleStatusFailed      = 2, //!< Loading the module failed
+    DcgmModuleStatusLoaded      = 3, //!< Module has been loaded
+    DcgmModuleStatusUnloaded    = 4, //!< Module has been unloaded, happens during shutdown
 } dcgmModuleStatus_t;
 
 /**
@@ -2813,16 +2704,17 @@ typedef enum
  */
 typedef struct
 {
-    dcgmModuleId_t     id;     //!< ID of this module
+    dcgmModuleId_t id;         //!< ID of this module
     dcgmModuleStatus_t status; //!< Status of this module
 } dcgmModuleGetStatusesModule_t;
 
-#define DCGM_MODULE_STATUSES_CAPACITY 16 //!< This is larger than DcgmModuleIdCount so we can add modules without versioning this request
+/* This is larger than DcgmModuleIdCount so we can add modules without versioning this request */
+#define DCGM_MODULE_STATUSES_CAPACITY 16
 
 typedef struct
 {
-    unsigned int version;       //!< Version of this request. Should be dcgmModuleGetStatuses_version1
-    unsigned int numStatuses;   //!< Number of entries in statuses[] that are populated
+    unsigned int version;     //!< Version of this request. Should be dcgmModuleGetStatuses_version1
+    unsigned int numStatuses; //!< Number of entries in statuses[] that are populated
     dcgmModuleGetStatusesModule_t statuses[DCGM_MODULE_STATUSES_CAPACITY]; //!< Per-module status information
 } dcgmModuleGetStatuses_v1;
 
@@ -2830,47 +2722,85 @@ typedef struct
  * Version 1 of dcgmModuleGetStatuses
  */
 #define dcgmModuleGetStatuses_version1 MAKE_DCGM_VERSION(dcgmModuleGetStatuses_v1, 1)
-#define dcgmModuleGetStatuses_version dcgmModuleGetStatuses_version1
+#define dcgmModuleGetStatuses_version  dcgmModuleGetStatuses_version1
 typedef dcgmModuleGetStatuses_v1 dcgmModuleGetStatuses_t;
 
+/**
+ * Options for dcgmStartEmbedded_v2
+ *
+ * Added in DCGM 2.0.0
+ */
+typedef struct
+{
+    unsigned int version;           /*!< Version number. Use dcgmStartEmbeddedV2Params_version1 */
+    dcgmOperationMode_t opMode;     /*!< IN: Collect data automatically or manually when asked by the user. */
+    dcgmHandle_t dcgmHandle;        /*!< OUT: DCGM Handle to use for API calls */
+    const char *logFile;            /*!< IN: File that DCGM should log to. NULL = do not log. '-' = stdout */
+    DcgmLoggingSeverity_t severity; /*!< IN: Severity at which DCGM should log to logFile */
+    unsigned int blackListCount;    /*!< IN: Number of modules that to be blacklisted in blackList[] */
+    dcgmModuleId_t blackList[DcgmModuleIdCount]; /* IN: IDs of modules to blacklist */
+    unsigned int unused;                         /*!< IN: Unused. Set to 0. Aligns structure to 8-bytes */
+} dcgmStartEmbeddedV2Params_v1;
 
 /**
- * Structure to return all of the profiling metric groups that are available for the given groupId.
- * 
+ * Version 1 for \ref dcgmStartEmbeddedV2Params_v1
  */
+#define dcgmStartEmbeddedV2Params_version1 MAKE_DCGM_VERSION(dcgmStartEmbeddedV2Params_v1, 1)
 
-#define DCGM_PROF_MAX_NUM_GROUPS  10  //!< Maximum number of metric ID groups that can exist in DCGM
-#define DCGM_PROF_MAX_FIELD_IDS_PER_GROUP 8 //!< Maximum number of field IDs that can be in a single DCGM profiling metric group
+/**
+ * Maximum number of metric ID groups that can exist in DCGM
+ */
+#define DCGM_PROF_MAX_NUM_GROUPS 10
+
+/**
+ * Maximum number of field IDs that can be in a single DCGM profiling metric group
+ */
+#define DCGM_PROF_MAX_FIELD_IDS_PER_GROUP 8
 
+/**
+ * Structure to return all of the profiling metric groups that are available for the given groupId.
+ */
 typedef struct
 {
-    unsigned short majorId;  /** Major ID of this metric group. Metric groups with the same majorId cannot be
-                                 watched concurrently with other metric groups with the same majorId */
-    unsigned short minorId;  /** Minor ID of this metric group. This distinguishes metric groups within the same
-                                 major metric group from each other */
-    unsigned int numFieldIds;/** Number of field IDs that are populated in fieldIds[] */
-    unsigned short fieldIds[DCGM_PROF_MAX_FIELD_IDS_PER_GROUP]; /** DCGM Field IDs that are part of this profiling group. 
-                                 See DCGM_FI_PROF_* definitions in dcgm_fields.h for details. */
+    unsigned short majorId;   //!< Major ID of this metric group. Metric groups with the same majorId cannot be
+                              //!< watched concurrently with other metric groups with the same majorId
+    unsigned short minorId;   //!< Minor ID of this metric group. This distinguishes metric groups within the same
+                              //!< major metric group from each other
+    unsigned int numFieldIds; //!< Number of field IDs that are populated in fieldIds[]
+    unsigned short fieldIds[DCGM_PROF_MAX_FIELD_IDS_PER_GROUP]; //!< DCGM Field IDs that are part of this profiling
+                                                                //!< group. See DCGM_FI_PROF_* definitions in
+                                                                //!< dcgm_fields.h for details.
 } dcgmProfMetricGroupInfo_t;
 
 typedef struct
 {
-    /* Input parameters */
-    unsigned int version;         /** Version of this request. Should be dcgmProfGetMetricGroups_version */
-    unsigned int unused;          /** Not used for now. Set to 0 */
-    dcgmGpuGrp_t groupId;         /** Group of GPUs we should get the metric groups for. These must all be the
-                                      exact same GPU or DCGM_ST_GROUP_INCOMPATIBLE will be returned */
-    /* Output */
-    unsigned int numMetricGroups; /** Number of entries in metricGroups[] that are populated */
-    unsigned int unused1;         /** Not used for now. Set to 0 */
-    dcgmProfMetricGroupInfo_t metricGroups[DCGM_PROF_MAX_NUM_GROUPS]; /* Info for each metric group */
+    /** \name Input parameters
+     * @{
+     */
+    unsigned int version; //!< Version of this request. Should be dcgmProfGetMetricGroups_version
+    unsigned int unused;  //!< Not used for now. Set to 0
+    dcgmGpuGrp_t groupId; //!< Group of GPUs we should get the metric groups for. These must all be the
+                          //!< exact same GPU or DCGM_ST_GROUP_INCOMPATIBLE will be returned
+    /**
+     * @}
+     */
+
+    /** \name Output
+     * @{
+     */
+    unsigned int numMetricGroups; //!< Number of entries in metricGroups[] that are populated
+    unsigned int unused1;         //!< Not used for now. Set to 0
+    dcgmProfMetricGroupInfo_t metricGroups[DCGM_PROF_MAX_NUM_GROUPS]; //!< Info for each metric group
+    /**
+     * @}
+     */
 } dcgmProfGetMetricGroups_v2;
 
 /**
  * Version 1 of dcgmProfGetMetricGroups_t
  */
 #define dcgmProfGetMetricGroups_version2 MAKE_DCGM_VERSION(dcgmProfGetMetricGroups_v2, 2)
-#define dcgmProfGetMetricGroups_version dcgmProfGetMetricGroups_version2
+#define dcgmProfGetMetricGroups_version  dcgmProfGetMetricGroups_version2
 typedef dcgmProfGetMetricGroups_v2 dcgmProfGetMetricGroups_t;
 
 /**
@@ -2878,29 +2808,28 @@ typedef dcgmProfGetMetricGroups_v2 dcgmProfGetMetricGroups_t;
  */
 typedef struct
 {
-    unsigned int version; /** Version of this request. Should be dcgmProfWatchFields_version */
-    dcgmGpuGrp_t groupId; /** Group ID representing collection of one or more GPUs. Look at
-                              \ref dcgmGroupCreate for details on creating the group.
-                              Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
-                              to perform operation on all the GPUs. The GPUs of the group must all be
-                              identical or DCGM_ST_GROUP_INCOMPATIBLE will be returned by this API. */
-    unsigned int numFieldIds; /** Number of field IDs that are being passed in fieldIds[] */
-    unsigned short fieldIds[16]; /** DCGM_FI_PROF_? field IDs to watch */
-    long long updateFreq; /** How often to update this field in usec. Note that profiling metrics may need to be
-                              sampled more frequently than this value. See dcgmProfMetricGroupInfo_t.minUpdateFreqUsec
-                              of the metric group matching metricGroupTag to see what this minimum is. If 
-                              minUpdateFreqUsec < updateFreq then samples will be aggregated to updateFreq intervals
-                              in DCGM's internal cache. */
-    double maxKeepAge;    /** How long to keep data for every fieldId in seconds */
-    int maxKeepSamples;   /** Maximum number of samples to keep for each fieldId. 0=no limit */
-    unsigned int flags;   /** For future use. Set to 0 for now. */
+    unsigned int version;        //!< Version of this request. Should be dcgmProfWatchFields_version
+    dcgmGpuGrp_t groupId;        //!< Group ID representing collection of one or more GPUs. Look at \ref dcgmGroupCreate
+                                 //!< for details on creating the group. Alternatively, pass in the group id as \a
+                                 //!< DCGM_GROUP_ALL_GPUS to perform operation on all the GPUs. The GPUs of the group
+                                 //!< must all be identical or DCGM_ST_GROUP_INCOMPATIBLE will be returned by this API.
+    unsigned int numFieldIds;    //!< Number of field IDs that are being passed in fieldIds[]
+    unsigned short fieldIds[16]; //!< DCGM_FI_PROF_? field IDs to watch
+    long long updateFreq;        //!< How often to update this field in usec. Note that profiling metrics may need to be
+                                 //!< sampled more frequently than this value. See
+                                 //!< dcgmProfMetricGroupInfo_t.minUpdateFreqUsec of the metric group matching
+                                 //!< metricGroupTag to see what this minimum is. If minUpdateFreqUsec < updateFreq
+                                 //!< then samples will be aggregated to updateFreq intervals in DCGM's internal cache.
+    double maxKeepAge;           //!< How long to keep data for every fieldId in seconds
+    int maxKeepSamples;          //!< Maximum number of samples to keep for each fieldId. 0=no limit
+    unsigned int flags;          //!< For future use. Set to 0 for now.
 } dcgmProfWatchFields_v1;
 
 /**
  * Version 1 of dcgmProfWatchFields_v1
  */
 #define dcgmProfWatchFields_version1 MAKE_DCGM_VERSION(dcgmProfWatchFields_v1, 1)
-#define dcgmProfWatchFields_version dcgmProfWatchFields_version1
+#define dcgmProfWatchFields_version  dcgmProfWatchFields_version1
 typedef dcgmProfWatchFields_v1 dcgmProfWatchFields_t;
 
 /**
@@ -2908,46 +2837,76 @@ typedef dcgmProfWatchFields_v1 dcgmProfWatchFields_t;
  */
 typedef struct
 {
-    unsigned int version; /** Version of this request. Should be dcgmProfUnwatchFields_version */
-    dcgmGpuGrp_t groupId; /** Group ID representing collection of one or more GPUs. Look at
-                              \ref dcgmGroupCreate for details on creating the group.
-                              Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
-                              to perform operation on all the GPUs. The GPUs of the group must all be
-                              identical or DCGM_ST_GROUP_INCOMPATIBLE will be returned by this API. */
-    unsigned int flags;   /** For future use. Set to 0 for now. */
+    unsigned int version; //!< Version of this request. Should be dcgmProfUnwatchFields_version
+    dcgmGpuGrp_t groupId; //!< Group ID representing collection of one or more GPUs. Look at
+                          //!< \ref dcgmGroupCreate for details on creating the group.
+                          //!< Alternatively, pass in the group id as \a DCGM_GROUP_ALL_GPUS
+                          //!< to perform operation on all the GPUs. The GPUs of the group must all be
+                          //!< identical or DCGM_ST_GROUP_INCOMPATIBLE will be returned by this API.
+    unsigned int flags;   //!< For future use. Set to 0 for now.
 } dcgmProfUnwatchFields_v1;
 
 /**
  * Version 1 of dcgmProfUnwatchFields_v1
  */
 #define dcgmProfUnwatchFields_version1 MAKE_DCGM_VERSION(dcgmProfUnwatchFields_v1, 1)
-#define dcgmProfUnwatchFields_version dcgmProfUnwatchFields_version1
+#define dcgmProfUnwatchFields_version  dcgmProfUnwatchFields_version1
 typedef dcgmProfUnwatchFields_v1 dcgmProfUnwatchFields_t;
 
 /**
- * Structure to describe the DCGM build environment
+ * Version 1 of dcgmSettingsSetLoggingSeverity_t
  */
 typedef struct
 {
-    unsigned int version;                    //!< Version of this message
-    char changelist[DCGM_MAX_STR_LENGTH];    //!< Changelist number from which DCGM was built
-    char platform[DCGM_MAX_STR_LENGTH];      //!< Builder platform - uname result without hostname
-    char branch[DCGM_MAX_STR_LENGTH];        //!< Name of the branch where DCGM was built
-    char driverVersion[DCGM_MAX_STR_LENGTH]; //!< The version of NVidia driver DCGM was linked with
-    char buildDate[DCGM_MAX_STR_LENGTH];     //!< Date of the build
-} dcgmVersionInfo_v1;
+    int targetLogger;
+    DcgmLoggingSeverity_t targetSeverity;
+} dcgmSettingsSetLoggingSeverity_v1;
+
+
+#define dcgmSettingsSetLoggingSeverity_version1 MAKE_DCGM_VERSION(dcgmSettingsSetLoggingSeverity_v1, 1)
+#define dcgmSettingsSetLoggingSeverity_version  dcgmSettingsSetLoggingSeverity_version1
+typedef dcgmSettingsSetLoggingSeverity_v1 dcgmSettingsSetLoggingSeverity_t;
 
 /**
- * Version 1 of dcgmVersionInfo_v1;
+ * Structure to describe the DCGM build environment ver 2.0
  */
-#define dcgmVersionInfo_version1 MAKE_DCGM_VERSION(dcgmVersionInfo_v1, 1)
-#define dcgmVersionInfo_version dcgmVersionInfo_version1
-typedef dcgmVersionInfo_v1 dcgmVersionInfo_t;
+typedef struct
+{
+    unsigned int version; //<! Structure version
+    /**
+     * Raw form of the DCGM build info. There may be multiple kv-pairs separated by semicolon (;).<br>
+     * Every pair is separated by a colon char (:). Only the very first colon is considered as a separation.<br>
+     * Values can contain colon chars. Values and Keys cannot contain semicolon chars.<br>
+     * Usually defined keys are:
+     *      <p style="margin-left:20px">
+     *      <i>version</i> : DCGM Version.<br>
+     *      <i>arch</i>    : Target DCGM Architecture.<br>
+     *      <i>buildid</i> : Build ID. Usually a sequential number.<br>
+     *      <i>commit</i>  : Commit ID (Usually a git commit hash).<br>
+     *      <i>author</i>  : Author of the commit above.<br>
+     *      <i>branch</i>  : Branch (Usually a git branch that was used for the build).<br>
+     *      <i>buildtype</i> : Build Type.<br>
+     *      <i>builddate</i> : Date of the build.<br>
+     *      <i>buildplatform</i>   : Platform where the build was made.<br>
+     *      </p>
+     * Any or all keys may be absent.<br>
+     * This values are for reference only are not supposed to participate in some complicated logic.<br>
+     */
+    char rawBuildInfoString[DCGM_MAX_STR_LENGTH * 2];
+} dcgmVersionInfo_v2;
+
+/**
+ * Version 2 of the dcgmVersionInfo_v2
+ */
+#define dcgmVersionInfo_version2 MAKE_DCGM_VERSION(dcgmVersionInfo_v2, 2)
+
+#define dcgmVersionInfo_version dcgmVersionInfo_version2
+typedef dcgmVersionInfo_v2 dcgmVersionInfo_t;
 
 /** @} */
 
-#ifdef    __cplusplus
+#ifdef __cplusplus
 }
 #endif
 
-#endif    /* DCGM_STRUCTS_H */
+#endif /* DCGM_STRUCTS_H */
diff --git a/bindings/go/dcgm/device_info.go b/bindings/go/dcgm/device_info.go
index 8c61a55..bda27a1 100644
--- a/bindings/go/dcgm/device_info.go
+++ b/bindings/go/dcgm/device_info.go
@@ -120,7 +120,7 @@ func getPciBandwidth(gpuId uint) (int64, error) {
 
 func getDeviceInfo(gpuid uint) (deviceInfo Device, err error) {
 	var device C.dcgmDeviceAttributes_t
-	device.version = makeVersion1(unsafe.Sizeof(device))
+	device.version = makeVersion2(unsafe.Sizeof(device))
 
 	result := C.dcgmGetDeviceAttributes(handle.handle, C.uint(gpuid), &device)
 	if err = errorString(result); err != nil {
diff --git a/bindings/go/dcgm/health.go b/bindings/go/dcgm/health.go
index 5ce3abc..e611e72 100644
--- a/bindings/go/dcgm/health.go
+++ b/bindings/go/dcgm/health.go
@@ -48,8 +48,8 @@ func healthCheckByGpuId(gpuId uint) (deviceHealth DeviceHealth, err error) {
 		return
 	}
 
-	var healthResults C.dcgmHealthResponse_v1
-	healthResults.version = makeVersion1(unsafe.Sizeof(healthResults))
+	var healthResults C.dcgmHealthResponse_v4
+	healthResults.version = makeVersion2(unsafe.Sizeof(healthResults))
 
 	result := C.dcgmHealthCheck(handle.handle, groupId.handle, (*C.dcgmHealthResponse_t)(unsafe.Pointer(&healthResults)))
 
@@ -60,18 +60,15 @@ func healthCheckByGpuId(gpuId uint) (deviceHealth DeviceHealth, err error) {
 	status := healthStatus(int8(healthResults.overallHealth))
 	watches := []SystemWatch{}
 
-	// only 1 gpu
-	i := 0
-
 	// number of watches that encountred error/warning
-	incidents := uint(healthResults.gpu[i].incidentCount)
+	incidents := uint(healthResults.incidentCount)
 
 	for j := uint(0); j < incidents; j++ {
 		watch := SystemWatch{
-			Type:   systemWatch(int(healthResults.gpu[i].systems[j].system)),
-			Status: healthStatus(int8(healthResults.gpu[i].systems[j].health)),
+			Type:   systemWatch(int(healthResults.incidents[j].system)),
+			Status: healthStatus(int8(healthResults.incidents[j].health)),
 
-			Error: *stringPtr(&healthResults.gpu[i].systems[j].errorString[0]),
+			Error: *stringPtr(&healthResults.incidents[j].error.msg[0]),
 		}
 		watches = append(watches, watch)
 	}
diff --git a/bindings/go/dcgm/hostengine_status.go b/bindings/go/dcgm/hostengine_status.go
index 7848f09..4e6e6b9 100644
--- a/bindings/go/dcgm/hostengine_status.go
+++ b/bindings/go/dcgm/hostengine_status.go
@@ -24,7 +24,7 @@ func introspect() (engine DcgmStatus, err error) {
 	}
 
 	var memory C.dcgmIntrospectMemory_t
-	memory.version = makeVersion1(unsafe.Sizeof(memory))
+	memory.version = makeVersion2(unsafe.Sizeof(memory))
 	waitIfNoData := 1
 	result = C.dcgmIntrospectGetHostengineMemoryUsage(handle.handle, &memory, C.int(waitIfNoData))
 
@@ -34,7 +34,7 @@ func introspect() (engine DcgmStatus, err error) {
 
 	var cpu C.dcgmIntrospectCpuUtil_t
 
-	cpu.version = makeVersion1(unsafe.Sizeof(cpu))
+	cpu.version = makeVersion2(unsafe.Sizeof(cpu))
 	result = C.dcgmIntrospectGetHostengineCpuUtilization(handle.handle, &cpu, C.int(waitIfNoData))
 
 	if err = errorString(result); err != nil {
diff --git a/bindings/go/dcgm/policy.go b/bindings/go/dcgm/policy.go
index 9352529..06be22f 100644
--- a/bindings/go/dcgm/policy.go
+++ b/bindings/go/dcgm/policy.go
@@ -250,7 +250,7 @@ func ViolationRegistration(data unsafe.Pointer) int {
 
 func setPolicy(groupId GroupHandle, condition C.dcgmPolicyCondition_t, paramList []policyIndex) (err error) {
 	var policy C.dcgmPolicy_t
-	policy.version = makeVersion1(unsafe.Sizeof(policy))
+	policy.version = makeVersion2(unsafe.Sizeof(policy))
 	policy.mode = C.dcgmPolicyMode_t(C.DCGM_OPERATION_MODE_AUTO)
 	policy.action = C.DCGM_POLICY_ACTION_NONE
 	policy.isolation = C.DCGM_POLICY_ISOLATION_NONE
diff --git a/bindings/go/dcgm/process_info.go b/bindings/go/dcgm/process_info.go
index 16f7e33..64227cf 100644
--- a/bindings/go/dcgm/process_info.go
+++ b/bindings/go/dcgm/process_info.go
@@ -95,7 +95,7 @@ func watchPidFields(gpus ...uint) (groupId GroupHandle, err error) {
 
 func getProcessInfo(groupId GroupHandle, pid uint) (processInfo []ProcessInfo, err error) {
 	var pidInfo C.dcgmPidInfo_t
-	pidInfo.version = makeVersion1(unsafe.Sizeof(pidInfo))
+	pidInfo.version = makeVersion2(unsafe.Sizeof(pidInfo))
 	pidInfo.pid = C.uint(pid)
 
 	result := C.dcgmGetPidInfo(handle.handle, groupId.handle, &pidInfo)
diff --git a/bindings/go/dcgm/topology.go b/bindings/go/dcgm/topology.go
index cf1dbd5..f3afc38 100644
--- a/bindings/go/dcgm/topology.go
+++ b/bindings/go/dcgm/topology.go
@@ -97,7 +97,7 @@ func getCPUAffinity(busid string) (string, error) {
 
 func getBusid(gpuid uint) (string, error) {
 	var device C.dcgmDeviceAttributes_t
-	device.version = makeVersion1(unsafe.Sizeof(device))
+	device.version = makeVersion2(unsafe.Sizeof(device))
 
 	result := C.dcgmGetDeviceAttributes(handle.handle, C.uint(gpuid), &device)
 	if err := errorString(result); err != nil {
@@ -108,7 +108,7 @@ func getBusid(gpuid uint) (string, error) {
 
 func getDeviceTopology(gpuid uint) (links []P2PLink, err error) {
 	var topology C.dcgmDeviceTopology_t
-	topology.version = makeVersion1(unsafe.Sizeof(topology))
+	topology.version = makeVersion2(unsafe.Sizeof(topology))
 
 	result := C.dcgmGetDeviceTopology(handle.handle, C.uint(gpuid), &topology)
 	if result == C.DCGM_ST_NOT_SUPPORTED {
diff --git a/docker/Dockerfile.ubi8 b/docker/Dockerfile.ubi8
index 37e9c0d..9adf2da 100644
--- a/docker/Dockerfile.ubi8
+++ b/docker/Dockerfile.ubi8
@@ -6,7 +6,7 @@ COPY . .
 
 RUN make binary check-format
 
-FROM registry.access.redhat.com/ubi8:latest
+FROM nvidia/cuda:11.0-base-ubi8
 LABEL io.k8s.display-name="NVIDIA DCGM Exporter"
 
 ARG DCGM_VERSION
@@ -19,9 +19,8 @@ COPY --from=builder /go/src/github.com/NVIDIA/gpu-monitoring-tools/dcgm-exporter
 COPY etc/dcgm-exporter /etc/dcgm-exporter
 
 ENV NVIDIA_VISIBLE_DEVICES=all
-
-RUN useradd dcgm-exporter
-USER dcgm-exporter
+# Required for DCP metrics
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,compat32
 
 ARG VERSION
 
@@ -35,4 +34,8 @@ LABEL description="See summary"
 
 COPY ./LICENSE ./licenses/LICENSE
 
-ENTRYPOINT ["/usr/bin/dcgm-exporter"]
+ENV NO_SETCAP=
+COPY docker/docker-entrypoint.sh /usr/local/dcgm/docker-entrypoint.sh
+RUN chmod +x /usr/local/dcgm/docker-entrypoint.sh
+
+ENTRYPOINT ["/usr/local/dcgm/docker-entrypoint.sh"]
diff --git a/docker/Dockerfile.ubuntu18.04 b/docker/Dockerfile.ubuntu18.04
index 0eb50a7..4c61bd2 100644
--- a/docker/Dockerfile.ubuntu18.04
+++ b/docker/Dockerfile.ubuntu18.04
@@ -6,7 +6,7 @@ COPY . .
 
 RUN make binary check-format
 
-FROM ubuntu:18.04
+FROM nvidia/cuda:11.0-base-ubuntu18.04
 LABEL io.k8s.display-name="NVIDIA DCGM Exporter"
 
 COPY --from=builder /go/src/github.com/NVIDIA/gpu-monitoring-tools/dcgm-exporter /usr/bin/
@@ -14,6 +14,7 @@ COPY etc/dcgm-exporter /etc/dcgm-exporter
 
 ARG DCGM_VERSION
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    libcap2-bin \
     libgomp1 \
     wget && \
     rm -rf /var/lib/apt/lists/* && \
@@ -21,9 +22,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     dpkg -i datacenter-gpu-manager_*.deb && \
     rm -f datacenter-gpu-manager_*.deb
 
+# Required for DCP metrics
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,compat32
+
 ENV NVIDIA_VISIBLE_DEVICES=all
 
-RUN useradd dcgm-exporter
-USER dcgm-exporter
+ENV NO_SETCAP=
+COPY docker/docker-entrypoint.sh /usr/local/dcgm/docker-entrypoint.sh
+RUN chmod +x /usr/local/dcgm/docker-entrypoint.sh
 
-ENTRYPOINT ["/usr/bin/dcgm-exporter"]
+ENTRYPOINT ["/usr/local/dcgm/docker-entrypoint.sh"]
diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh
new file mode 100644
index 0000000..d6c8ea6
--- /dev/null
+++ b/docker/docker-entrypoint.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# We want to setcap only when the container is started with the right permissions
+DCGM_EXPORTER=$(readlink -f $(which dcgm-exporter))
+if [ -z "$NO_SETCAP" ]; then
+   setcap 'cap_sys_admin=+ep' $DCGM_EXPORTER
+
+   if ! $DCGM_EXPORTER -v 1>/dev/null 2>/dev/null; then
+      >&2 echo "dcgm-exporter doesn't have sufficient privileges to expose profiling metrics. To use dcgm-exporter for profiling metrics use --cap-add SYS_ADMIN"
+      setcap 'cap_sys_admin=-ep' $DCGM_EXPORTER
+   fi
+fi
+
+# Pass the command line arguments to dcgm-exporter
+set -- $DCGM_EXPORTER "$@"
+exec "$@"