diff --git a/assets/gpu-feature-discovery/0500_daemonset.yaml b/assets/gpu-feature-discovery/0500_daemonset.yaml index 7efc1b342..8cf620184 100644 --- a/assets/gpu-feature-discovery/0500_daemonset.yaml +++ b/assets/gpu-feature-discovery/0500_daemonset.yaml @@ -33,51 +33,9 @@ spec: securityContext: privileged: true volumeMounts: - - name: run-nvidia-validations - mountPath: /run/nvidia/validations + - name: run-nvidia + mountPath: /run/nvidia mountPropagation: HostToContainer - - name: gpu-feature-discovery-imex-init - image: "FILLED BY THE OPERATOR" - command: ["/bin/bash", "-c"] - args: - - | - until [[ -f /run/nvidia/validations/driver-ready ]] - do - echo "waiting for the driver validations to be ready..." - sleep 5 - done - set -o allexport - cat /run/nvidia/validations/driver-ready - . /run/nvidia/validations/driver-ready - - IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg - if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then - echo "Removing cached IMEX nodes config" - rm -f /config/${IMEX_NODES_CONFIG_FILE} - fi - if [[ ! -f ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} ]]; then - echo "No IMEX nodes config path detected; Skipping" - exit 0 - fi - echo "Copying IMEX nodes config" - mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE}) - cp ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE} - securityContext: - privileged: true - volumeMounts: - - name: config - mountPath: /config - - name: run-nvidia-validations - mountPath: /run/nvidia/validations - mountPropagation: HostToContainer - - name: host-root - mountPath: /host/etc - subPath: etc - readOnly: true - - name: driver-install-dir - mountPath: /driver-root/etc - subPath: etc - readOnly: true - name: config-manager-init image: "FILLED BY THE OPERATOR" command: ["config-manager"] @@ -104,9 +62,6 @@ spec: value: "" - name: PROCESS_TO_SIGNAL value: "" - volumeMounts: - - name: config - mountPath: /config containers: - image: "FILLED BY THE OPERATOR" name: gpu-feature-discovery @@ -130,8 +85,6 @@ spec: - name: host-sys mountPath: /sys readOnly: true - - name: config - mountPath: /config securityContext: privileged: true - image: "FILLED BY THE OPERATOR" @@ -162,9 +115,6 @@ spec: value: "1" # SIGHUP - name: PROCESS_TO_SIGNAL value: "gpu-feature-discovery" - volumeMounts: - - name: config - mountPath: /config volumes: - name: output-dir hostPath: @@ -172,16 +122,7 @@ spec: - name: host-sys hostPath: path: /sys - - name: run-nvidia-validations - hostPath: - path: "/run/nvidia/validations" - type: DirectoryOrCreate - - name: host-root - hostPath: - path: / - - name: driver-install-dir + - name: run-nvidia hostPath: - path: /run/nvidia/driver - type: DirectoryOrCreate - - name: config - emptyDir: {} + path: "/run/nvidia" + type: Directory diff --git a/assets/state-device-plugin/0500_daemonset.yaml b/assets/state-device-plugin/0500_daemonset.yaml index e6a68bd16..76de43420 100644 --- a/assets/state-device-plugin/0500_daemonset.yaml +++ b/assets/state-device-plugin/0500_daemonset.yaml @@ -61,9 +61,6 @@ spec: value: "" - name: PROCESS_TO_SIGNAL value: "" - volumeMounts: - - name: config - mountPath: /config containers: - image: "FILLED BY THE OPERATOR" name: nvidia-device-plugin @@ -110,8 +107,6 @@ spec: mountPath: /dev/shm - name: mps-root mountPath: /mps - - name: config - mountPath: /config - image: "FILLED BY THE OPERATOR" name: config-manager command: ["config-manager"] @@ -140,9 +135,6 @@ spec: value: "1" # SIGHUP - name: PROCESS_TO_SIGNAL value: "nvidia-device-plugin" - volumeMounts: - - name: config - mountPath: /config volumes: - name: nvidia-device-plugin-entrypoint configMap: @@ -173,5 +165,3 @@ spec: - name: mps-shm hostPath: path: /run/nvidia/mps/shm - - name: config - emptyDir: {} diff --git a/assets/state-mps-control-daemon/0400_daemonset.yaml b/assets/state-mps-control-daemon/0400_daemonset.yaml index 097ce8ca9..3be58af20 100644 --- a/assets/state-mps-control-daemon/0400_daemonset.yaml +++ b/assets/state-mps-control-daemon/0400_daemonset.yaml @@ -72,9 +72,6 @@ spec: value: "" - name: PROCESS_TO_SIGNAL value: "" - volumeMounts: - - name: config - mountPath: /config containers: - image: "FILLED BY OPERATOR" name: mps-control-daemon-ctr @@ -96,8 +93,6 @@ spec: mountPath: /dev/shm - name: mps-root mountPath: /mps - - name: config - mountPath: /config - image: "FILLED BY THE OPERATOR" name: config-manager command: ["config-manager"] @@ -126,9 +121,6 @@ spec: value: "1" # SIGHUP - name: PROCESS_TO_SIGNAL value: "/usr/bin/mps-control-daemon" - volumeMounts: - - name: config - mountPath: /config volumes: - name: run-nvidia hostPath: @@ -141,5 +133,3 @@ spec: - name: mps-shm hostPath: path: /run/nvidia/mps/shm - - name: config - emptyDir: {} diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 8103b9009..21f980145 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -895,14 +895,6 @@ func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol } obj.Spec.Template.Spec.Containers[0].Image = img - // update image for IMEX init container - for i, initCtr := range obj.Spec.Template.Spec.InitContainers { - if initCtr.Name == "gpu-feature-discovery-imex-init" { - obj.Spec.Template.Spec.InitContainers[i].Image = img - break - } - } - // update image pull policy obj.Spec.Template.Spec.Containers[0].ImagePullPolicy = gpuv1.ImagePullPolicy(config.GPUFeatureDiscovery.ImagePullPolicy) @@ -2456,7 +2448,10 @@ func isCustomPluginConfigSet(pluginConfig *gpuv1.DevicePluginConfig) bool { // adds shared volume mounts required for custom plugin config provided via a ConfigMap func addSharedMountsForPluginConfig(container *corev1.Container, config *gpuv1.DevicePluginConfig) { + emptyDirMount := corev1.VolumeMount{Name: "config", MountPath: "/config"} configVolMount := corev1.VolumeMount{Name: config.Name, MountPath: "/available-configs"} + + container.VolumeMounts = append(container.VolumeMounts, emptyDirMount) container.VolumeMounts = append(container.VolumeMounts, configVolMount) } @@ -2492,7 +2487,7 @@ func handleDevicePluginConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy continue } setContainerEnv(&obj.Spec.Template.Spec.Containers[i], "CONFIG_FILE", "/config/config.yaml") - // add configmap volume mount + // setup sharedvolume(emptydir) for main container addSharedMountsForPluginConfig(&obj.Spec.Template.Spec.Containers[i], config.DevicePlugin.Config) } @@ -2503,8 +2498,9 @@ func handleDevicePluginConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy shareProcessNamespace := true obj.Spec.Template.Spec.ShareProcessNamespace = &shareProcessNamespace } - // add configmap volume + // setup volumes from configmap and shared emptyDir obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, createConfigMapVolume(config.DevicePlugin.Config.Name, nil)) + obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, createEmptyDirVolume("config")) // apply env/volume changes to initContainer err := transformConfigManagerInitContainer(obj, config) @@ -3129,6 +3125,15 @@ func createConfigMapVolume(configMapName string, itemsToInclude []corev1.KeyToPa return corev1.Volume{Name: configMapName, VolumeSource: volumeSource} } +func createEmptyDirVolume(volumeName string) corev1.Volume { + return corev1.Volume{ + Name: volumeName, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + } +} + func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error { driverIndex := 0 driverCtrFound := false