From 2502b7b49eca14d293fa3fee3b647aa9452a85f9 Mon Sep 17 00:00:00 2001
From: Christopher Desiniotis <cdesiniotis@nvidia.com>
Date: Sun, 9 Mar 2025 11:33:22 -0700
Subject: [PATCH 1/4] Revert "Always add 'config' emptyDir volume to GFD and
 device-plugin daemonsets"

This reverts commit 22941b5416db1d4fd26fd9c4bb2aa561f94729fd.

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
---
 .../gpu-feature-discovery/0500_daemonset.yaml   | 10 ----------
 assets/state-device-plugin/0500_daemonset.yaml  | 10 ----------
 controllers/object_controls.go                  | 17 +++++++++++++++--
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/assets/gpu-feature-discovery/0500_daemonset.yaml b/assets/gpu-feature-discovery/0500_daemonset.yaml
index 7efc1b342..75532744f 100644
--- a/assets/gpu-feature-discovery/0500_daemonset.yaml
+++ b/assets/gpu-feature-discovery/0500_daemonset.yaml
@@ -104,9 +104,6 @@ spec:
             value: ""
           - name: PROCESS_TO_SIGNAL
             value: ""
-          volumeMounts:
-            - name: config
-              mountPath: /config
       containers:
         - image: "FILLED BY THE OPERATOR"
           name: gpu-feature-discovery
@@ -130,8 +127,6 @@ spec:
             - name: host-sys
               mountPath: /sys
               readOnly: true
-            - name: config
-              mountPath: /config
           securityContext:
             privileged: true
         - image: "FILLED BY THE OPERATOR"
@@ -162,9 +157,6 @@ spec:
             value: "1" # SIGHUP
           - name: PROCESS_TO_SIGNAL
             value: "gpu-feature-discovery"
-          volumeMounts:
-            - name: config
-              mountPath: /config
       volumes:
         - name: output-dir
           hostPath:
@@ -183,5 +175,3 @@ spec:
           hostPath:
             path: /run/nvidia/driver
             type: DirectoryOrCreate
-        - name: config
-          emptyDir: {}
diff --git a/assets/state-device-plugin/0500_daemonset.yaml b/assets/state-device-plugin/0500_daemonset.yaml
index e6a68bd16..76de43420 100644
--- a/assets/state-device-plugin/0500_daemonset.yaml
+++ b/assets/state-device-plugin/0500_daemonset.yaml
@@ -61,9 +61,6 @@ spec:
           value: ""
         - name: PROCESS_TO_SIGNAL
           value: ""
-        volumeMounts:
-          - name: config
-            mountPath: /config
       containers:
       - image: "FILLED BY THE OPERATOR"
         name: nvidia-device-plugin
@@ -110,8 +107,6 @@ spec:
             mountPath: /dev/shm
           - name: mps-root
             mountPath: /mps
-          - name: config
-            mountPath: /config
       - image: "FILLED BY THE OPERATOR"
         name: config-manager
         command: ["config-manager"]
@@ -140,9 +135,6 @@ spec:
           value: "1" # SIGHUP
         - name: PROCESS_TO_SIGNAL
           value: "nvidia-device-plugin"
-        volumeMounts:
-          - name: config
-            mountPath: /config
       volumes:
         - name: nvidia-device-plugin-entrypoint
           configMap:
@@ -173,5 +165,3 @@ spec:
         - name: mps-shm
           hostPath:
             path: /run/nvidia/mps/shm
-        - name: config
-          emptyDir: {}
diff --git a/controllers/object_controls.go b/controllers/object_controls.go
index 8103b9009..5696f44b9 100644
--- a/controllers/object_controls.go
+++ b/controllers/object_controls.go
@@ -2456,7 +2456,10 @@ func isCustomPluginConfigSet(pluginConfig *gpuv1.DevicePluginConfig) bool {
 
 // adds shared volume mounts required for custom plugin config provided via a ConfigMap
 func addSharedMountsForPluginConfig(container *corev1.Container, config *gpuv1.DevicePluginConfig) {
+	emptyDirMount := corev1.VolumeMount{Name: "config", MountPath: "/config"}
 	configVolMount := corev1.VolumeMount{Name: config.Name, MountPath: "/available-configs"}
+
+	container.VolumeMounts = append(container.VolumeMounts, emptyDirMount)
 	container.VolumeMounts = append(container.VolumeMounts, configVolMount)
 }
 
@@ -2492,7 +2495,7 @@ func handleDevicePluginConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
 			continue
 		}
 		setContainerEnv(&obj.Spec.Template.Spec.Containers[i], "CONFIG_FILE", "/config/config.yaml")
-		// add configmap volume mount
+		// setup sharedvolume(emptydir) for main container
 		addSharedMountsForPluginConfig(&obj.Spec.Template.Spec.Containers[i], config.DevicePlugin.Config)
 	}
 
@@ -2503,8 +2506,9 @@ func handleDevicePluginConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
 		shareProcessNamespace := true
 		obj.Spec.Template.Spec.ShareProcessNamespace = &shareProcessNamespace
 	}
-	// add configmap volume
+	// setup volumes from configmap and shared emptyDir
 	obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, createConfigMapVolume(config.DevicePlugin.Config.Name, nil))
+	obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, createEmptyDirVolume("config"))
 
 	// apply env/volume changes to initContainer
 	err := transformConfigManagerInitContainer(obj, config)
@@ -3129,6 +3133,15 @@ func createConfigMapVolume(configMapName string, itemsToInclude []corev1.KeyToPa
 	return corev1.Volume{Name: configMapName, VolumeSource: volumeSource}
 }
 
+func createEmptyDirVolume(volumeName string) corev1.Volume {
+	return corev1.Volume{
+		Name: volumeName,
+		VolumeSource: corev1.VolumeSource{
+			EmptyDir: &corev1.EmptyDirVolumeSource{},
+		},
+	}
+}
+
 func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
 	driverIndex := 0
 	driverCtrFound := false

From b6747d619e55c821cd477a08dccbae2c74fbaca7 Mon Sep 17 00:00:00 2001
From: Christopher Desiniotis <cdesiniotis@nvidia.com>
Date: Sun, 9 Mar 2025 11:35:55 -0700
Subject: [PATCH 2/4] Revert "Add init container to GFD for handling imex nodes
 config mount"

This reverts commit a076f8911841f7762ab5373e6be32a435871a701.

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
---
 .../gpu-feature-discovery/0500_configmap.yaml | 22 ++++++
 ...500_daemonset.yaml => 0600_daemonset.yaml} | 67 ++++++-------------
 controllers/object_controls.go                |  8 ---
 3 files changed, 44 insertions(+), 53 deletions(-)
 create mode 100644 assets/gpu-feature-discovery/0500_configmap.yaml
 rename assets/gpu-feature-discovery/{0500_daemonset.yaml => 0600_daemonset.yaml} (73%)

diff --git a/assets/gpu-feature-discovery/0500_configmap.yaml b/assets/gpu-feature-discovery/0500_configmap.yaml
new file mode 100644
index 000000000..5f6c54496
--- /dev/null
+++ b/assets/gpu-feature-discovery/0500_configmap.yaml
@@ -0,0 +1,22 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: gpu-feature-discovery-entrypoint
+  namespace: "FILLED BY THE OPERATOR"
+  labels:
+    app: gpu-feature-discovery
+data:
+  entrypoint.sh: |-
+    #!/bin/bash
+
+    until [[ -f /run/nvidia/validations/driver-ready ]]
+    do
+      echo "waiting for the driver validations to be ready..."
+      sleep 5
+    done
+
+    set -o allexport
+    cat /run/nvidia/validations/driver-ready
+    . /run/nvidia/validations/driver-ready
+
+    exec gpu-feature-discovery
diff --git a/assets/gpu-feature-discovery/0500_daemonset.yaml b/assets/gpu-feature-discovery/0600_daemonset.yaml
similarity index 73%
rename from assets/gpu-feature-discovery/0500_daemonset.yaml
rename to assets/gpu-feature-discovery/0600_daemonset.yaml
index 75532744f..1c8198200 100644
--- a/assets/gpu-feature-discovery/0500_daemonset.yaml
+++ b/assets/gpu-feature-discovery/0600_daemonset.yaml
@@ -33,51 +33,9 @@ spec:
           securityContext:
             privileged: true
           volumeMounts:
-            - name: run-nvidia-validations
-              mountPath: /run/nvidia/validations
-              mountPropagation: HostToContainer
-        - name: gpu-feature-discovery-imex-init
-          image: "FILLED BY THE OPERATOR"
-          command: ["/bin/bash", "-c"]
-          args:
-            - |
-              until [[ -f /run/nvidia/validations/driver-ready ]]
-              do
-                echo "waiting for the driver validations to be ready..."
-                sleep 5
-              done
-              set -o allexport
-              cat /run/nvidia/validations/driver-ready
-              . /run/nvidia/validations/driver-ready
-              
-              IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg
-              if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then
-                echo "Removing cached IMEX nodes config"
-                rm -f /config/${IMEX_NODES_CONFIG_FILE}
-              fi
-              if [[ ! -f ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} ]]; then
-                echo "No IMEX nodes config path detected; Skipping"
-                exit 0
-              fi
-              echo "Copying IMEX nodes config"
-              mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE})
-              cp ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE}
-          securityContext:
-            privileged: true
-          volumeMounts:
-            - name: config
-              mountPath: /config
-            - name: run-nvidia-validations
-              mountPath: /run/nvidia/validations
+            - name: run-nvidia
+              mountPath: /run/nvidia
               mountPropagation: HostToContainer
-            - name: host-root
-              mountPath: /host/etc
-              subPath: etc
-              readOnly: true
-            - name: driver-install-dir
-              mountPath: /driver-root/etc
-              subPath: etc
-              readOnly: true
         - name: config-manager-init
           image: "FILLED BY THE OPERATOR"
           command: ["config-manager"]
@@ -107,7 +65,9 @@ spec:
       containers:
         - image: "FILLED BY THE OPERATOR"
           name: gpu-feature-discovery
-          command: ["gpu-feature-discovery"]
+          command: [ "/bin/bash", "-c" ]
+          args:
+            - /bin/entrypoint.sh
           env:
             - name: GFD_SLEEP_INTERVAL
               value: 60s
@@ -122,11 +82,24 @@ spec:
                 fieldRef:
                   fieldPath: spec.nodeName
           volumeMounts:
+            - name: gpu-feature-discovery-entrypoint
+              readOnly: true
+              mountPath: /bin/entrypoint.sh
+              subPath: entrypoint.sh
             - name: output-dir
               mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
             - name: host-sys
               mountPath: /sys
               readOnly: true
+            - name: run-nvidia-validations
+              mountPath: /run/nvidia/validations
+            - name: driver-install-dir
+              mountPath: /driver-root
+              mountPropagation: HostToContainer
+            - name: host-root
+              mountPath: /host
+              readOnly: true
+              mountPropagation: HostToContainer
           securityContext:
             privileged: true
         - image: "FILLED BY THE OPERATOR"
@@ -158,6 +131,10 @@ spec:
           - name: PROCESS_TO_SIGNAL
             value: "gpu-feature-discovery"
       volumes:
+        - name: gpu-feature-discovery-entrypoint
+          configMap:
+            name: gpu-feature-discovery-entrypoint
+            defaultMode: 448
         - name: output-dir
           hostPath:
             path: "/etc/kubernetes/node-feature-discovery/features.d"
diff --git a/controllers/object_controls.go b/controllers/object_controls.go
index 5696f44b9..21f980145 100644
--- a/controllers/object_controls.go
+++ b/controllers/object_controls.go
@@ -895,14 +895,6 @@ func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol
 	}
 	obj.Spec.Template.Spec.Containers[0].Image = img
 
-	// update image for IMEX init container
-	for i, initCtr := range obj.Spec.Template.Spec.InitContainers {
-		if initCtr.Name == "gpu-feature-discovery-imex-init" {
-			obj.Spec.Template.Spec.InitContainers[i].Image = img
-			break
-		}
-	}
-
 	// update image pull policy
 	obj.Spec.Template.Spec.Containers[0].ImagePullPolicy = gpuv1.ImagePullPolicy(config.GPUFeatureDiscovery.ImagePullPolicy)
 

From 0a6f1f2251f4643c615f8d45ad2fbf81fba1ceb9 Mon Sep 17 00:00:00 2001
From: Christopher Desiniotis <cdesiniotis@nvidia.com>
Date: Sun, 9 Mar 2025 11:42:25 -0700
Subject: [PATCH 3/4] Revert "Make the IMEX nodes config file available to GFD"

This reverts commit 5525636ac933798698b69955496fc293c2a71f83.

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
---
 .../gpu-feature-discovery/0500_configmap.yaml | 22 ------------
 ...600_daemonset.yaml => 0500_daemonset.yaml} | 34 +++----------------
 2 files changed, 4 insertions(+), 52 deletions(-)
 delete mode 100644 assets/gpu-feature-discovery/0500_configmap.yaml
 rename assets/gpu-feature-discovery/{0600_daemonset.yaml => 0500_daemonset.yaml} (78%)

diff --git a/assets/gpu-feature-discovery/0500_configmap.yaml b/assets/gpu-feature-discovery/0500_configmap.yaml
deleted file mode 100644
index 5f6c54496..000000000
--- a/assets/gpu-feature-discovery/0500_configmap.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: gpu-feature-discovery-entrypoint
-  namespace: "FILLED BY THE OPERATOR"
-  labels:
-    app: gpu-feature-discovery
-data:
-  entrypoint.sh: |-
-    #!/bin/bash
-
-    until [[ -f /run/nvidia/validations/driver-ready ]]
-    do
-      echo "waiting for the driver validations to be ready..."
-      sleep 5
-    done
-
-    set -o allexport
-    cat /run/nvidia/validations/driver-ready
-    . /run/nvidia/validations/driver-ready
-
-    exec gpu-feature-discovery
diff --git a/assets/gpu-feature-discovery/0600_daemonset.yaml b/assets/gpu-feature-discovery/0500_daemonset.yaml
similarity index 78%
rename from assets/gpu-feature-discovery/0600_daemonset.yaml
rename to assets/gpu-feature-discovery/0500_daemonset.yaml
index 1c8198200..8cf620184 100644
--- a/assets/gpu-feature-discovery/0600_daemonset.yaml
+++ b/assets/gpu-feature-discovery/0500_daemonset.yaml
@@ -65,9 +65,7 @@ spec:
       containers:
         - image: "FILLED BY THE OPERATOR"
           name: gpu-feature-discovery
-          command: [ "/bin/bash", "-c" ]
-          args:
-            - /bin/entrypoint.sh
+          command: ["gpu-feature-discovery"]
           env:
             - name: GFD_SLEEP_INTERVAL
               value: 60s
@@ -82,24 +80,11 @@ spec:
                 fieldRef:
                   fieldPath: spec.nodeName
           volumeMounts:
-            - name: gpu-feature-discovery-entrypoint
-              readOnly: true
-              mountPath: /bin/entrypoint.sh
-              subPath: entrypoint.sh
             - name: output-dir
               mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
             - name: host-sys
               mountPath: /sys
               readOnly: true
-            - name: run-nvidia-validations
-              mountPath: /run/nvidia/validations
-            - name: driver-install-dir
-              mountPath: /driver-root
-              mountPropagation: HostToContainer
-            - name: host-root
-              mountPath: /host
-              readOnly: true
-              mountPropagation: HostToContainer
           securityContext:
             privileged: true
         - image: "FILLED BY THE OPERATOR"
@@ -131,24 +116,13 @@ spec:
           - name: PROCESS_TO_SIGNAL
             value: "gpu-feature-discovery"
       volumes:
-        - name: gpu-feature-discovery-entrypoint
-          configMap:
-            name: gpu-feature-discovery-entrypoint
-            defaultMode: 448
         - name: output-dir
           hostPath:
             path: "/etc/kubernetes/node-feature-discovery/features.d"
         - name: host-sys
           hostPath:
             path: /sys
-        - name: run-nvidia-validations
-          hostPath:
-            path: "/run/nvidia/validations"
-            type: DirectoryOrCreate
-        - name: host-root
-          hostPath:
-            path: /
-        - name: driver-install-dir
+        - name: run-nvidia
           hostPath:
-            path: /run/nvidia/driver
-            type: DirectoryOrCreate
+            path: "/run/nvidia"
+            type: Directory

From 8c4b19db957c2968f88e0932103b1cdf58416b6c Mon Sep 17 00:00:00 2001
From: Christopher Desiniotis <cdesiniotis@nvidia.com>
Date: Tue, 11 Mar 2025 10:37:50 -0700
Subject: [PATCH 4/4] Revert "Always add 'config' emptyDir volume to MPS
 daemonset"

This reverts commit b82b91be4f7ace51aa6e96cacc773f3b626ae9e8.

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
---
 assets/state-mps-control-daemon/0400_daemonset.yaml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/assets/state-mps-control-daemon/0400_daemonset.yaml b/assets/state-mps-control-daemon/0400_daemonset.yaml
index 097ce8ca9..3be58af20 100644
--- a/assets/state-mps-control-daemon/0400_daemonset.yaml
+++ b/assets/state-mps-control-daemon/0400_daemonset.yaml
@@ -72,9 +72,6 @@ spec:
               value: ""
             - name: PROCESS_TO_SIGNAL
               value: ""
-          volumeMounts:
-            - name: config
-              mountPath: /config
       containers:
         - image: "FILLED BY OPERATOR"
           name: mps-control-daemon-ctr
@@ -96,8 +93,6 @@ spec:
               mountPath: /dev/shm
             - name: mps-root
               mountPath: /mps
-            - name: config
-              mountPath: /config
         - image: "FILLED BY THE OPERATOR"
           name: config-manager
           command: ["config-manager"]
@@ -126,9 +121,6 @@ spec:
               value: "1" # SIGHUP
             - name: PROCESS_TO_SIGNAL
               value: "/usr/bin/mps-control-daemon"
-          volumeMounts:
-            - name: config
-              mountPath: /config
       volumes:
         - name: run-nvidia
           hostPath:
@@ -141,5 +133,3 @@ spec:
         - name: mps-shm
           hostPath:
             path: /run/nvidia/mps/shm
-        - name: config
-          emptyDir: {}