Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ const (
// 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s
maxRetries = 15

builtInLabelKey = "machineconfiguration.openshift.io/mco-built-in"
builtInLabelKey = "machineconfiguration.openshift.io/mco-built-in"
configMapName = "crio-default-container-runtime"
forceSyncOnUpgrade = "force-sync-on-upgrade"
)

var (
Expand All @@ -79,6 +81,7 @@ type Controller struct {
templatesDir string

client mcfgclientset.Interface
kubeClient clientset.Interface
configClient configclientset.Interface
eventRecorder record.EventRecorder

Expand Down Expand Up @@ -148,6 +151,7 @@ func New(
ctrl := &Controller{
templatesDir: templatesDir,
client: mcfgClient,
kubeClient: kubeClient,
configClient: configClient,
eventRecorder: ctrlcommon.NamespacedEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "machineconfigcontroller-containerruntimeconfigcontroller"})),
queue: workqueue.NewTypedRateLimitingQueueWithConfig(
Expand Down Expand Up @@ -213,6 +217,7 @@ func New(

ctrl.clusterVersionLister = clusterVersionInformer.Lister()
ctrl.clusterVersionListerSynced = clusterVersionInformer.Informer().HasSynced
ctrl.queue.Add(forceSyncOnUpgrade)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you observed this to be necessary? The new controller is expected to re-gen all the MCs so it should always be synced post-new controller deployment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes - i noticed that before i added this, the upgrade to an image with these changes did not change the MC at all.


ctrl.fgHandler = fgHandler

Expand Down Expand Up @@ -596,6 +601,60 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.ContainerRuntimeConfig, annota
return annotationUpdateErr
}

// migrateRuncToCrun performs the upgrade migration from runc to crun as the default container runtime.
// This function checks for the existence of the crio-default-container-runtime ConfigMap. If it exists,
// it deletes the MachineConfigs for master and worker pools, then deletes the ConfigMap to prevent
// re-running the migration.
func (ctrl *Controller) migrateRuncToCrun() error {
// Check if the migration ConfigMap exists
_, err := ctrl.kubeClient.CoreV1().ConfigMaps(ctrlcommon.MCONamespace).Get(context.TODO(), configMapName, metav1.GetOptions{})
if errors.IsNotFound(err) {
// ConfigMap doesn't exist, no migration needed
return nil
}
if err != nil {
return fmt.Errorf("error checking for crio-default-container-runtime configmap: %w", err)
}

klog.Info("Found crio-default-container-runtime ConfigMap, starting migration from runc to crun")

// Get all MachineConfigPools
pools, err := ctrl.mcpLister.List(labels.Everything())
if err != nil {
return fmt.Errorf("error listing MachineConfigPools: %w", err)
}

// Only process master and worker pools for the migration
for _, pool := range pools {
if pool.Name != ctrlcommon.MachineConfigPoolMaster && pool.Name != ctrlcommon.MachineConfigPoolWorker {
continue
}
Comment on lines +627 to +631
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the context in #4635 (comment), I'm gathering that the intention is to only target default MCPs. What about the arbiter MCP? Should it also be included in this list of default MCPs?

// MachineConfigPoolArbiter is the MachineConfigPool name given to the arbiter
MachineConfigPoolArbiter = "arbiter"

cc @yuqi-zhang Since it looks like you had the original suggestion to scope to default MCPs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yuqi-zhang do we need to consider the arbiter here? or are we good with the changes as is?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think arbiter probably isn't relevant here, since 2-node-with-fencing is not yet GA'ed and even if it was, I don't think there would be any OKD overlap with it atm for upgrades, so we should be good.

So basically this is "forcing" any OKD clusters to use crun since runc is gone on upgrades, is installs still possible for runc if you set it (and is there a guard against it if so, I forget if we deprecated it)? Is RHEL10 also dropping runc? Maybe we need this generally speaking cc @sdodson

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, RHEL10 dropped runc but AFAIK the plan is to start building it for OCP as we're hesitant to drop runc until crun sees more adoption even if crun is the default since 4.18.


// Get the MachineConfig name for this pool
mcName := fmt.Sprintf("00-override-%s-generated-crio-default-container-runtime", pool.Name)

// Delete the existing MachineConfig
err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(context.TODO(), mcName, metav1.DeleteOptions{})
if errors.IsNotFound(err) {
klog.Infof("MachineConfig %s not found, skipping migration for pool %s", mcName, pool.Name)
continue
}
if err != nil {
return fmt.Errorf("error deleting MachineConfig %s: %w", mcName, err)
}

klog.Infof("Successfully deleted MachineConfig %s", mcName)
}

// Delete the ConfigMap after successful migration
if err := ctrl.kubeClient.CoreV1().ConfigMaps(ctrlcommon.MCONamespace).Delete(context.TODO(), configMapName, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("error deleting crio-default-container-runtime configmap: %w", err)
}

klog.Info("Successfully completed migration from runc to crun and deleted migration ConfigMap")
return nil
}

// syncContainerRuntimeConfig will sync the ContainerRuntimeconfig with the given key.
// This function is not meant to be invoked concurrently with the same key.
// nolint: gocyclo
Expand All @@ -606,6 +665,17 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error {
klog.V(4).Infof("Finished syncing ContainerRuntimeconfig %q (%v)", key, time.Since(startTime))
}()

// OKD only: Run the migration function at the start of sync
if version.IsSCOS() {
if err := ctrl.migrateRuncToCrun(); err != nil {
return fmt.Errorf("Error during runc to crun migration: %w", err)
}
}

if key == forceSyncOnUpgrade {
return nil
}

_, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return err
Expand Down