-
Notifications
You must be signed in to change notification settings - Fork 458
OKD-294: Migrate runtime from runc to crun on an upgrade for OKD #5389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -60,7 +60,9 @@ const ( | |||||
| // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s | ||||||
| maxRetries = 15 | ||||||
|
|
||||||
| builtInLabelKey = "machineconfiguration.openshift.io/mco-built-in" | ||||||
| builtInLabelKey = "machineconfiguration.openshift.io/mco-built-in" | ||||||
| configMapName = "crio-default-container-runtime" | ||||||
| forceSyncOnUpgrade = "force-sync-on-upgrade" | ||||||
| ) | ||||||
|
|
||||||
| var ( | ||||||
|
|
@@ -79,6 +81,7 @@ type Controller struct { | |||||
| templatesDir string | ||||||
|
|
||||||
| client mcfgclientset.Interface | ||||||
| kubeClient clientset.Interface | ||||||
| configClient configclientset.Interface | ||||||
| eventRecorder record.EventRecorder | ||||||
|
|
||||||
|
|
@@ -148,6 +151,7 @@ func New( | |||||
| ctrl := &Controller{ | ||||||
| templatesDir: templatesDir, | ||||||
| client: mcfgClient, | ||||||
| kubeClient: kubeClient, | ||||||
| configClient: configClient, | ||||||
| eventRecorder: ctrlcommon.NamespacedEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "machineconfigcontroller-containerruntimeconfigcontroller"})), | ||||||
| queue: workqueue.NewTypedRateLimitingQueueWithConfig( | ||||||
|
|
@@ -213,6 +217,7 @@ func New( | |||||
|
|
||||||
| ctrl.clusterVersionLister = clusterVersionInformer.Lister() | ||||||
| ctrl.clusterVersionListerSynced = clusterVersionInformer.Informer().HasSynced | ||||||
| ctrl.queue.Add(forceSyncOnUpgrade) | ||||||
|
|
||||||
| ctrl.fgHandler = fgHandler | ||||||
|
|
||||||
|
|
@@ -596,6 +601,60 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.ContainerRuntimeConfig, annota | |||||
| return annotationUpdateErr | ||||||
| } | ||||||
|
|
||||||
| // migrateRuncToCrun performs the upgrade migration from runc to crun as the default container runtime. | ||||||
| // This function checks for the existence of the crio-default-container-runtime ConfigMap. If it exists, | ||||||
| // it deletes the MachineConfigs for master and worker pools, then deletes the ConfigMap to prevent | ||||||
| // re-running the migration. | ||||||
| func (ctrl *Controller) migrateRuncToCrun() error { | ||||||
| // Check if the migration ConfigMap exists | ||||||
| _, err := ctrl.kubeClient.CoreV1().ConfigMaps(ctrlcommon.MCONamespace).Get(context.TODO(), configMapName, metav1.GetOptions{}) | ||||||
| if errors.IsNotFound(err) { | ||||||
| // ConfigMap doesn't exist, no migration needed | ||||||
| return nil | ||||||
| } | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error checking for crio-default-container-runtime configmap: %w", err) | ||||||
| } | ||||||
|
|
||||||
| klog.Info("Found crio-default-container-runtime ConfigMap, starting migration from runc to crun") | ||||||
|
|
||||||
| // Get all MachineConfigPools | ||||||
| pools, err := ctrl.mcpLister.List(labels.Everything()) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error listing MachineConfigPools: %w", err) | ||||||
| } | ||||||
|
|
||||||
| // Only process master and worker pools for the migration | ||||||
| for _, pool := range pools { | ||||||
| if pool.Name != ctrlcommon.MachineConfigPoolMaster && pool.Name != ctrlcommon.MachineConfigPoolWorker { | ||||||
| continue | ||||||
| } | ||||||
|
Comment on lines
+627
to
+631
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the context in #4635 (comment), I'm gathering that the intention is to only target default MCPs. What about the machine-config-operator/pkg/controller/common/constants.go Lines 66 to 67 in a1ac217
cc @yuqi-zhang Since it looks like you had the original suggestion to scope to default MCPs.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yuqi-zhang do we need to consider the arbiter here? or are we good with the changes as is?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think arbiter probably isn't relevant here, since 2-node-with-fencing is not yet GA'ed and even if it was, I don't think there would be any OKD overlap with it atm for upgrades, so we should be good. So basically this is "forcing" any OKD clusters to use crun since runc is gone on upgrades, is installs still possible for runc if you set it (and is there a guard against it if so, I forget if we deprecated it)? Is RHEL10 also dropping runc? Maybe we need this generally speaking cc @sdodson
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, RHEL10 dropped runc but AFAIK the plan is to start building it for OCP as we're hesitant to drop runc until crun sees more adoption even if crun is the default since 4.18. |
||||||
|
|
||||||
| // Get the MachineConfig name for this pool | ||||||
| mcName := fmt.Sprintf("00-override-%s-generated-crio-default-container-runtime", pool.Name) | ||||||
Prashanth684 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
|
||||||
| // Delete the existing MachineConfig | ||||||
| err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(context.TODO(), mcName, metav1.DeleteOptions{}) | ||||||
| if errors.IsNotFound(err) { | ||||||
| klog.Infof("MachineConfig %s not found, skipping migration for pool %s", mcName, pool.Name) | ||||||
| continue | ||||||
| } | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error deleting MachineConfig %s: %w", mcName, err) | ||||||
| } | ||||||
|
|
||||||
| klog.Infof("Successfully deleted MachineConfig %s", mcName) | ||||||
| } | ||||||
|
|
||||||
| // Delete the ConfigMap after successful migration | ||||||
| if err := ctrl.kubeClient.CoreV1().ConfigMaps(ctrlcommon.MCONamespace).Delete(context.TODO(), configMapName, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { | ||||||
| return fmt.Errorf("error deleting crio-default-container-runtime configmap: %w", err) | ||||||
| } | ||||||
|
|
||||||
| klog.Info("Successfully completed migration from runc to crun and deleted migration ConfigMap") | ||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| // syncContainerRuntimeConfig will sync the ContainerRuntimeconfig with the given key. | ||||||
| // This function is not meant to be invoked concurrently with the same key. | ||||||
| // nolint: gocyclo | ||||||
|
|
@@ -606,6 +665,17 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { | |||||
| klog.V(4).Infof("Finished syncing ContainerRuntimeconfig %q (%v)", key, time.Since(startTime)) | ||||||
| }() | ||||||
|
|
||||||
| // OKD only: Run the migration function at the start of sync | ||||||
| if version.IsSCOS() { | ||||||
| if err := ctrl.migrateRuncToCrun(); err != nil { | ||||||
| return fmt.Errorf("Error during runc to crun migration: %w", err) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if key == forceSyncOnUpgrade { | ||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| _, name, err := cache.SplitMetaNamespaceKey(key) | ||||||
| if err != nil { | ||||||
| return err | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have you observed this to be necessary? The new controller is expected to re-gen all the MCs so it should always be synced post-new controller deployment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes - i noticed that before i added this, the upgrade to an image with these changes did not change the MC at all.