diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 41e6abac4..13ebabf3d 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -57,16 +57,18 @@ import ( const ( // DefaultContainerdConfigFile indicates default config file path for containerd DefaultContainerdConfigFile = "/etc/containerd/config.toml" + // DefaultContainerdDropInConfigFile indicates default drop-in config file path for containerd + DefaultContainerdDropInConfigFile = "/etc/containerd/conf.d/99-nvidia.toml" // DefaultContainerdSocketFile indicates default containerd socket file DefaultContainerdSocketFile = "/run/containerd/containerd.sock" // DefaultDockerConfigFile indicates default config file path for docker DefaultDockerConfigFile = "/etc/docker/daemon.json" // DefaultDockerSocketFile indicates default docker socket file DefaultDockerSocketFile = "/var/run/docker.sock" - // DefaultCRIOConfigFile indicates default config file path for cri-o. - // Note, config files in the drop-in directory, /etc/crio/crio.conf.d, - // have a higher priority than the default /etc/crio/crio.conf file. - DefaultCRIOConfigFile = "/etc/crio/crio.conf.d/99-nvidia.conf" + // DefaultCRIOConfigFile indicates default config file path for cri-o. . + DefaultCRIOConfigFile = "/etc/crio/config.toml" + // DefaultCRIODropInConfigFile indicates the default path to the drop-in config file for cri-o + DefaultCRIODropInConfigFile = "/etc/crio/crio.conf.d/99-nvidia.conf" // TrustedCAConfigMapName indicates configmap with custom user CA injected TrustedCAConfigMapName = "gpu-operator-trusted-ca" // TrustedCABundleFileName indicates custom user ca certificate filename @@ -95,6 +97,8 @@ const ( DefaultRuntimeSocketTargetDir = "/runtime/sock-dir/" // DefaultRuntimeConfigTargetDir represents target directory where runtime socket dirctory will be mounted DefaultRuntimeConfigTargetDir = "/runtime/config-dir/" + // DefaultRuntimeDropInConfigTargetDir represents target directory where drop-in config directory will be mounted + DefaultRuntimeDropInConfigTargetDir = "/runtime/config-dir.d/" // ValidatorImageEnvName indicates env name for validator image passed ValidatorImageEnvName = "VALIDATOR_IMAGE" // ValidatorImagePullPolicyEnvName indicates env name for validator image pull policy passed @@ -1355,12 +1359,18 @@ func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, setContainerEnv(mainContainer, CRIOConfigModeEnvName, "config") } + // For runtime config files we have top-level configs and drop-in files. + // These are supported as follows: + // * Docker only supports top-level config files. + // * Containerd supports drop-in files, but required modification to the top-level config + // * Crio supports drop-in files at a predefined location. The top-level config may be read + // but should not be updated. + // setup mounts for runtime config file - runtimeConfigFile, err := getRuntimeConfigFile(mainContainer, runtime) + topLevelConfigFile, dropInConfigFile, err := getRuntimeConfigFiles(mainContainer, runtime) if err != nil { - return fmt.Errorf("error getting path to runtime config file: %v", err) + return fmt.Errorf("error getting path to runtime config file: %w", err) } - sourceConfigFileName := path.Base(runtimeConfigFile) var configEnvvarName string switch runtime { @@ -1372,15 +1382,43 @@ func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, configEnvvarName = "CRIO_CONFIG" } - setContainerEnv(mainContainer, "RUNTIME_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) - setContainerEnv(mainContainer, configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName) + // Handle the top-level configs + if topLevelConfigFile != "" { + sourceConfigFileName := path.Base(topLevelConfigFile) + sourceConfigDir := path.Dir(topLevelConfigFile) + containerConfigDir := DefaultRuntimeConfigTargetDir + setContainerEnv(mainContainer, "RUNTIME_CONFIG", containerConfigDir+sourceConfigFileName) + setContainerEnv(mainContainer, configEnvvarName, containerConfigDir+sourceConfigFileName) + + volMountConfigName := fmt.Sprintf("%s-config", runtime) + volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: containerConfigDir} + mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig) + + configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: sourceConfigDir, Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}} + obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol) + } + + // Handle the drop-in configs + // TODO: It's a bit of a hack to skip the `nvidia-kata-manager` container here. + // Ideally if the two projects are using the SAME API then this should be + // captured more rigorously. + // Note that we probably want to implement drop-in file support in the + // kata manager in any case -- in which case it will be good to use a + // similar implementation. + if dropInConfigFile != "" && containerName != "nvidia-kata-manager" { + sourceConfigFileName := path.Base(dropInConfigFile) + sourceConfigDir := path.Dir(dropInConfigFile) + containerConfigDir := DefaultRuntimeDropInConfigTargetDir + setContainerEnv(mainContainer, "RUNTIME_DROP_IN_CONFIG", containerConfigDir+sourceConfigFileName) + setContainerEnv(mainContainer, "RUNTIME_DROP_IN_CONFIG_HOST_PATH", dropInConfigFile) - volMountConfigName := fmt.Sprintf("%s-config", runtime) - volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} - mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig) + volMountConfigName := fmt.Sprintf("%s-drop-in-config", runtime) + volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: containerConfigDir} + mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig) - configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeConfigFile), Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}} - obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol) + configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: sourceConfigDir, Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}} + obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol) + } // setup mounts for runtime socket file runtimeSocketFile, err := getRuntimeSocketFile(mainContainer, runtime) @@ -2396,30 +2434,47 @@ func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol return nil } -// get runtime(docker, containerd) config file path based on toolkit container env or default -func getRuntimeConfigFile(c *corev1.Container, runtime string) (string, error) { - var runtimeConfigFile string +// getRuntimeConfigFiles returns the path to the top-level and drop-in config files that +// should be used when configuring the specified container runtime. +func getRuntimeConfigFiles(c *corev1.Container, runtime string) (string, string, error) { switch runtime { case gpuv1.Docker.String(): - runtimeConfigFile = DefaultDockerConfigFile + topLevelConfigFile := DefaultDockerConfigFile if value := getContainerEnv(c, "DOCKER_CONFIG"); value != "" { - runtimeConfigFile = value + topLevelConfigFile = value + } else if value := getContainerEnv(c, "RUNTIME_CONFIG"); value != "" { + topLevelConfigFile = value } + // Docker does not support drop-in files. + return topLevelConfigFile, "", nil case gpuv1.Containerd.String(): - runtimeConfigFile = DefaultContainerdConfigFile + topLevelConfigFile := DefaultContainerdConfigFile if value := getContainerEnv(c, "CONTAINERD_CONFIG"); value != "" { - runtimeConfigFile = value + topLevelConfigFile = value + } else if value := getContainerEnv(c, "RUNTIME_CONFIG"); value != "" { + topLevelConfigFile = value + } + dropInConfigFile := DefaultContainerdDropInConfigFile + if value := getContainerEnv(c, "RUNTIME_DROP_IN_CONFIG"); value != "" { + dropInConfigFile = value } + return topLevelConfigFile, dropInConfigFile, nil case gpuv1.CRIO.String(): - runtimeConfigFile = DefaultCRIOConfigFile + // TODO: We should still allow the top-level config to be specified + topLevelConfigFile := DefaultCRIOConfigFile if value := getContainerEnv(c, "CRIO_CONFIG"); value != "" { - runtimeConfigFile = value + topLevelConfigFile = value + } else if value := getContainerEnv(c, "RUNTIME_CONFIG"); value != "" { + topLevelConfigFile = value + } + dropInConfigFile := DefaultCRIODropInConfigFile + if value := getContainerEnv(c, "RUNTIME_DROP_IN_CONFIG"); value != "" { + dropInConfigFile = value } + return topLevelConfigFile, dropInConfigFile, nil default: - return "", fmt.Errorf("invalid runtime: %s", runtime) + return "", "", fmt.Errorf("invalid runtime: %s", runtime) } - - return runtimeConfigFile, nil } // get runtime(docker, containerd) socket file path based on toolkit container env or default diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index db2864ded..29a0c07dc 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -332,6 +332,7 @@ func TestTransformForRuntime(t *testing.T) { WithContainer(corev1.Container{Name: "test-ctr"}), expectedOutput: NewDaemonset(). WithHostPathVolume("containerd-config", filepath.Dir(DefaultContainerdConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithHostPathVolume("containerd-drop-in-config", "/etc/containerd/conf.d", newHostPathType(corev1.HostPathDirectoryOrCreate)). WithHostPathVolume("containerd-socket", filepath.Dir(DefaultContainerdSocketFile), nil). WithContainer(corev1.Container{ Name: "test-ctr", @@ -340,11 +341,14 @@ func TestTransformForRuntime(t *testing.T) { {Name: "CONTAINERD_RUNTIME_CLASS", Value: DefaultRuntimeClass}, {Name: "RUNTIME_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile))}, {Name: "CONTAINERD_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile))}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/etc/containerd/conf.d/99-nvidia.toml"}, {Name: "RUNTIME_SOCKET", Value: filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile))}, {Name: "CONTAINERD_SOCKET", Value: filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile))}, }, VolumeMounts: []corev1.VolumeMount{ {Name: "containerd-config", MountPath: DefaultRuntimeConfigTargetDir}, + {Name: "containerd-drop-in-config", MountPath: "/runtime/config-dir.d/"}, {Name: "containerd-socket", MountPath: DefaultRuntimeSocketTargetDir}, }, }), @@ -354,17 +358,21 @@ func TestTransformForRuntime(t *testing.T) { runtime: gpuv1.CRIO, input: NewDaemonset().WithContainer(corev1.Container{Name: "test-ctr"}), expectedOutput: NewDaemonset(). - WithHostPathVolume("crio-config", filepath.Dir(DefaultCRIOConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithHostPathVolume("crio-config", "/etc/crio", newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithHostPathVolume("crio-drop-in-config", "/etc/crio/crio.conf.d", newHostPathType(corev1.HostPathDirectoryOrCreate)). WithContainer(corev1.Container{ Name: "test-ctr", Env: []corev1.EnvVar{ {Name: "RUNTIME", Value: gpuv1.CRIO.String()}, {Name: CRIOConfigModeEnvName, Value: "config"}, - {Name: "RUNTIME_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile))}, - {Name: "CRIO_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile))}, + {Name: "RUNTIME_CONFIG", Value: "/runtime/config-dir/config.toml"}, + {Name: "CRIO_CONFIG", Value: "/runtime/config-dir/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.conf"}, + {Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/etc/crio/crio.conf.d/99-nvidia.conf"}, }, VolumeMounts: []corev1.VolumeMount{ {Name: "crio-config", MountPath: DefaultRuntimeConfigTargetDir}, + {Name: "crio-drop-in-config", MountPath: "/runtime/config-dir.d/"}, }, }), }, @@ -657,15 +665,19 @@ func TestTransformToolkit(t *testing.T) { {Name: "CONTAINERD_RUNTIME_CLASS", Value: "nvidia"}, {Name: "RUNTIME_CONFIG", Value: "/runtime/config-dir/config.toml"}, {Name: "CONTAINERD_CONFIG", Value: "/runtime/config-dir/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/etc/containerd/conf.d/99-nvidia.toml"}, {Name: "RUNTIME_SOCKET", Value: "/runtime/sock-dir/containerd.sock"}, {Name: "CONTAINERD_SOCKET", Value: "/runtime/sock-dir/containerd.sock"}, }, VolumeMounts: []corev1.VolumeMount{ {Name: "containerd-config", MountPath: "/runtime/config-dir/"}, + {Name: "containerd-drop-in-config", MountPath: "/runtime/config-dir.d/"}, {Name: "containerd-socket", MountPath: "/runtime/sock-dir/"}, }, }). WithHostPathVolume("containerd-config", "/etc/containerd", newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithHostPathVolume("containerd-drop-in-config", "/etc/containerd/conf.d", newHostPathType(corev1.HostPathDirectoryOrCreate)). WithHostPathVolume("containerd-socket", "/run/containerd", nil). WithPullSecret("pull-secret"), }, @@ -731,14 +743,18 @@ func TestTransformToolkit(t *testing.T) { {Name: "CONTAINERD_SET_AS_DEFAULT", Value: "true"}, {Name: "RUNTIME", Value: "containerd"}, {Name: "RUNTIME_CONFIG", Value: "/runtime/config-dir/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/etc/containerd/conf.d/99-nvidia.toml"}, {Name: "RUNTIME_SOCKET", Value: "/runtime/sock-dir/containerd.sock"}, }, VolumeMounts: []corev1.VolumeMount{ {Name: "containerd-config", MountPath: "/runtime/config-dir/"}, + {Name: "containerd-drop-in-config", MountPath: "/runtime/config-dir.d/"}, {Name: "containerd-socket", MountPath: "/runtime/sock-dir/"}, }, }). WithHostPathVolume("containerd-config", "/var/lib/rancher/k3s/agent/etc/containerd", newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithHostPathVolume("containerd-drop-in-config", "/etc/containerd/conf.d", newHostPathType(corev1.HostPathDirectoryOrCreate)). WithHostPathVolume("containerd-socket", "/run/k3s/containerd", nil). WithPullSecret("pull-secret"), }, @@ -2261,3 +2277,129 @@ func TestTransformDevicePluginCtrForCDI(t *testing.T) { }) } } + +func TestGetRuntimeConfigFiles(t *testing.T) { + testCases := []struct { + description string + container corev1.Container + runtime string + expectedTopLevelConfigFile string + expectedDropInConfigFile string + errorExpected bool + }{ + { + description: "invalid runtime", + container: corev1.Container{}, + runtime: "foo", + errorExpected: true, + }, + { + description: "docker", + container: corev1.Container{}, + runtime: gpuv1.Docker.String(), + expectedTopLevelConfigFile: DefaultDockerConfigFile, + expectedDropInConfigFile: "", + }, + { + description: "docker, config path overridden", + container: corev1.Container{ + Env: []corev1.EnvVar{ + {Name: "RUNTIME_CONFIG", Value: "/path/to/docker/daemon.json"}, + }, + }, + runtime: gpuv1.Docker.String(), + expectedTopLevelConfigFile: "/path/to/docker/daemon.json", + expectedDropInConfigFile: "", + }, + { + description: "docker, config path overridden, DOCKER_CONFIG envvar has highest precedence", + container: corev1.Container{ + Env: []corev1.EnvVar{ + {Name: "RUNTIME_CONFIG", Value: "/path/to/docker/daemon.json"}, + {Name: "DOCKER_CONFIG", Value: "/another/path/to/docker/daemon.json"}, + }, + }, + runtime: gpuv1.Docker.String(), + expectedTopLevelConfigFile: "/another/path/to/docker/daemon.json", + expectedDropInConfigFile: "", + }, + { + description: "containerd", + container: corev1.Container{}, + runtime: gpuv1.Containerd.String(), + expectedTopLevelConfigFile: DefaultContainerdConfigFile, + expectedDropInConfigFile: DefaultContainerdDropInConfigFile, + }, + { + description: "containerd, config path overridden", + container: corev1.Container{ + Env: []corev1.EnvVar{ + {Name: "RUNTIME_CONFIG", Value: "/path/to/containerd/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/path/to/containerd/drop-in/config.toml"}, + }, + }, + runtime: gpuv1.Containerd.String(), + expectedTopLevelConfigFile: "/path/to/containerd/config.toml", + expectedDropInConfigFile: "/path/to/containerd/drop-in/config.toml", + }, + { + description: "containerd, config path overridden, CONTAINERD_CONFIG envvar has highest precedence", + container: corev1.Container{ + Env: []corev1.EnvVar{ + {Name: "RUNTIME_CONFIG", Value: "/path/to/containerd/config.toml"}, + {Name: "CONTAINERD_CONFIG", Value: "/another/path/to/containerd/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/path/to/containerd/drop-in/config.toml"}, + }, + }, + runtime: gpuv1.Containerd.String(), + expectedTopLevelConfigFile: "/another/path/to/containerd/config.toml", + expectedDropInConfigFile: "/path/to/containerd/drop-in/config.toml", + }, + { + description: "crio", + container: corev1.Container{}, + runtime: gpuv1.CRIO.String(), + expectedTopLevelConfigFile: DefaultCRIOConfigFile, + expectedDropInConfigFile: DefaultCRIODropInConfigFile, + }, + { + description: "crio, config path overridden", + container: corev1.Container{ + Env: []corev1.EnvVar{ + {Name: "RUNTIME_CONFIG", Value: "/path/to/crio/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/path/to/crio/drop-in/config.toml"}, + }, + }, + runtime: gpuv1.CRIO.String(), + expectedTopLevelConfigFile: "/path/to/crio/config.toml", + expectedDropInConfigFile: "/path/to/crio/drop-in/config.toml", + }, + { + description: "crio, config path overridden, CRIO_CONFIG envvar has highest precedence", + container: corev1.Container{ + Env: []corev1.EnvVar{ + {Name: "RUNTIME_CONFIG", Value: "/path/to/crio/config.toml"}, + {Name: "CRIO_CONFIG", Value: "/another/path/to/crio/config.toml"}, + {Name: "RUNTIME_DROP_IN_CONFIG", Value: "/path/to/crio/drop-in/config.toml"}, + }, + }, + runtime: gpuv1.CRIO.String(), + expectedTopLevelConfigFile: "/another/path/to/crio/config.toml", + expectedDropInConfigFile: "/path/to/crio/drop-in/config.toml", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + topLevelConfigFile, dropInConfigFile, err := getRuntimeConfigFiles(&tc.container, tc.runtime) + if tc.errorExpected { + require.Error(t, err) + return + } + require.NoError(t, err) + require.EqualValues(t, tc.expectedTopLevelConfigFile, topLevelConfigFile) + require.EqualValues(t, tc.expectedDropInConfigFile, dropInConfigFile) + }) + } + +}