Skip to content

Commit ef79ad3

Browse files
committed
Support the DevicePluginCDIDevices feature gate
This patch adds support for the `DevicePluginCDIDevices` feature gate by adding `spec.operator.useDevicePluginCDIDevicesFeature` to `ClusterPolicy`. When this field is set, the operator sets the `DEVICE_LIST_STRATEGY` device plug-in environment variable to `cdi-cri`. Signed-off-by: Jean-Francois Roy <[email protected]>
1 parent 8fa9ed5 commit ef79ad3

File tree

8 files changed

+39
-1
lines changed

8 files changed

+39
-1
lines changed

api/nvidia/v1/clusterpolicy_types.go

+12
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ type OperatorSpec struct {
148148
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="On OpenShift, enable DriverToolkit image to build and install driver modules"
149149
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
150150
UseOpenShiftDriverToolkit *bool `json:"use_ocp_driver_toolkit,omitempty"`
151+
152+
// UseDevicePluginCDIDevicesFeature indicates if the device plug-in should be configured to use the CDI devices feature
153+
UseDevicePluginCDIDevicesFeature *bool `json:"useDevicePluginCDIDevicesFeature,omitempty"`
151154
}
152155

153156
// HostPathsSpec defines various paths on the host needed by GPU Operator components
@@ -1827,6 +1830,15 @@ func ImagePullPolicy(pullPolicy string) corev1.PullPolicy {
18271830
return imagePullPolicy
18281831
}
18291832

1833+
// DevicePluginCDIDevicesFeatureEnabled returns true if use DevicePluginCDIDevices feature is enabled
1834+
func (s *OperatorSpec) DevicePluginCDIDevicesFeatureEnabled() bool {
1835+
if s.UseDevicePluginCDIDevicesFeature == nil {
1836+
// default is false if not specified by user
1837+
return false
1838+
}
1839+
return *s.UseDevicePluginCDIDevicesFeature
1840+
}
1841+
18301842
// IsEnabled returns true if driver install is enabled(default) through gpu-operator
18311843
func (d *DriverSpec) IsEnabled() bool {
18321844
if d.Enabled == nil {

api/nvidia/v1/zz_generated.deepcopy.go

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/nvidia.com_clusterpolicies.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,10 @@ spec:
15581558
image should be used on OpenShift to build and install driver
15591559
modules
15601560
type: boolean
1561+
useDevicePluginCDIDevicesFeature:
1562+
description: UseDevicePluginCDIDevicesFeature indicates if the device plug-in
1563+
should be configured to use the CDI devices feature
1564+
type: boolean
15611565
required:
15621566
- defaultRuntime
15631567
type: object

config/crd/bases/nvidia.com_clusterpolicies.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,10 @@ spec:
15581558
image should be used on OpenShift to build and install driver
15591559
modules
15601560
type: boolean
1561+
useDevicePluginCDIDevicesFeature:
1562+
description: UseDevicePluginCDIDevicesFeature indicates if the
1563+
device plug-in should be configured to use the CDI devices feature
1564+
type: boolean
15611565
required:
15621566
- defaultRuntime
15631567
type: object

controllers/object_controls.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -1398,7 +1398,11 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
13981398
// update env required for CDI support
13991399
if config.CDI.IsEnabled() {
14001400
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true")
1401-
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations")
1401+
if config.Operator.DevicePluginCDIDevicesFeatureEnabled() {
1402+
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "cdi-cri")
1403+
} else {
1404+
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations")
1405+
}
14021406
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, "nvidia.cdi.k8s.io/")
14031407
if config.Toolkit.IsEnabled() {
14041408
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook"))

deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,10 @@ spec:
15581558
image should be used on OpenShift to build and install driver
15591559
modules
15601560
type: boolean
1561+
useDevicePluginCDIDevicesFeature:
1562+
description: UseDevicePluginCDIDevicesFeature indicates if the device plug-in
1563+
should be configured to use the CDI devices feature
1564+
type: boolean
15611565
required:
15621566
- defaultRuntime
15631567
type: object

deployments/gpu-operator/templates/clusterpolicy.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ spec:
4646
{{- if .Values.operator.use_ocp_driver_toolkit }}
4747
use_ocp_driver_toolkit: {{ .Values.operator.use_ocp_driver_toolkit }}
4848
{{- end }}
49+
{{- if .Values.operator.useDevicePluginCDIDevicesFeature }}
50+
useDevicePluginCDIDevicesFeature: {{ .Values.operator.useDevicePluginCDIDevicesFeature }}
51+
{{- end }}
4952
daemonsets:
5053
labels:
5154
{{- include "gpu-operator.operand-labels" . | nindent 6 }}

deployments/gpu-operator/values.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ operator:
8080
# upgrade CRD on chart upgrade, requires --disable-openapi-validation flag
8181
# to be passed during helm upgrade.
8282
upgradeCRD: true
83+
# use DevicePluginCDIDevices feature
84+
useDevicePluginCDIDevicesFeature: false
8385
initContainer:
8486
image: cuda
8587
repository: nvcr.io/nvidia

0 commit comments

Comments
 (0)