From 1203a83c8729c68dff8f953a32f2b0f8cf577117 Mon Sep 17 00:00:00 2001 From: David Date: Wed, 13 Aug 2025 10:06:16 -0400 Subject: [PATCH 1/2] certcontroller: resync on infra object --- cmd/machine-config-controller/start.go | 1 + .../certrotation/certrotation_controller.go | 67 ++++++++++-------- .../certrotation/dynamic_serving.go | 42 +++++++++++ pkg/controller/certrotation/hostnames.go | 70 +++++++++++++++++++ 4 files changed, 152 insertions(+), 28 deletions(-) create mode 100644 pkg/controller/certrotation/dynamic_serving.go create mode 100644 pkg/controller/certrotation/hostnames.go diff --git a/cmd/machine-config-controller/start.go b/cmd/machine-config-controller/start.go index 6fbf870c63..0945aba22c 100644 --- a/cmd/machine-config-controller/start.go +++ b/cmd/machine-config-controller/start.go @@ -92,6 +92,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { ctrlctx.KubeMAOSharedInformer.Core().V1().Secrets(), ctrlctx.KubeNamespacedInformerFactory.Core().V1().Secrets(), ctrlctx.KubeNamespacedInformerFactory.Core().V1().ConfigMaps(), + ctrlctx.ConfigInformerFactory.Config().V1().Infrastructures(), ) if err != nil { klog.Fatalf("unable to start cert rotation controller: %v", err) diff --git a/pkg/controller/certrotation/certrotation_controller.go b/pkg/controller/certrotation/certrotation_controller.go index f7dff2368d..3a0a39e641 100644 --- a/pkg/controller/certrotation/certrotation_controller.go +++ b/pkg/controller/certrotation/certrotation_controller.go @@ -5,7 +5,6 @@ import ( "context" "encoding/json" "fmt" - "net/url" "time" "github.com/vincent-petithory/dataurl" @@ -15,10 +14,12 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" coreinformersv1 "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes" corelisterv1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" "k8s.io/utils/clock" @@ -32,6 +33,9 @@ import ( aroclientset "github.com/Azure/ARO-RP/pkg/operator/clientset/versioned" + configinformers "github.com/openshift/client-go/config/informers/externalversions/config/v1" + configlisterv1 "github.com/openshift/client-go/config/listers/config/v1" + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" ) @@ -42,6 +46,7 @@ const ( mcsCARefresh = 8 * oneYear mcsTLSKeyExpiry = mcsCAExpiry mcsTLSKeyRefresh = mcsCARefresh + workQueueKey = "key" ) type CertRotationController struct { @@ -51,9 +56,14 @@ type CertRotationController struct { mcoConfigMapInfomer coreinformersv1.ConfigMapInformer maoSecretInformer coreinformersv1.SecretInformer + infraInformer configinformers.InfrastructureInformer mcoSecretLister corelisterv1.SecretLister maoSecretLister corelisterv1.SecretLister + infraLister configlisterv1.InfrastructureLister + + hostnamesRotation *DynamicServingRotation + hostnamesQueue workqueue.TypedRateLimitingInterface[string] certRotators []factory.Controller @@ -71,6 +81,7 @@ func New( maoSecretInformer coreinformersv1.SecretInformer, mcoSecretInformer coreinformersv1.SecretInformer, mcoConfigMapInfomer coreinformersv1.ConfigMapInformer, + infraInformer configinformers.InfrastructureInformer, ) (*CertRotationController, error) { recorder := events.NewLoggingEventRecorder(componentName, clock.RealClock{}) @@ -88,34 +99,15 @@ func New( maoSecretInformer.Informer().HasSynced, mcoSecretInformer.Informer().HasSynced, mcoConfigMapInfomer.Informer().HasSynced, + infraInformer.Informer().HasSynced, }, - } - // This is required for the machine-config-server-tls secret rotation - cfg, err := configClient.ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{}) - if err != nil { - return nil, fmt.Errorf("unable to get cluster infrastructure resource: %w", err) - } - - serverIPs := getServerIPsFromInfra(cfg) - - if cfg.Status.APIServerInternalURL == "" { - return nil, fmt.Errorf("no APIServerInternalURL found in cluster infrastructure resource") - } - apiserverIntURL, err := url.Parse(cfg.Status.APIServerInternalURL) - if err != nil { - return nil, fmt.Errorf("failed to parse %s: %w", apiserverIntURL, err) - } - - // Only attempt to get ARO cluster resource on Azure platform - if cfg.Status.PlatformStatus != nil && cfg.Status.PlatformStatus.Type == configv1.AzurePlatformType { - aroCluster, err := c.aroClient.AroV1alpha1().Clusters().Get(context.Background(), "cluster", metav1.GetOptions{}) - if err != nil { - klog.Infof("ARO cluster resource not found or not accessible: %v", err) - } else { - klog.Infof("ARO cluster resource found w/ IPs: %s", aroCluster.Spec.APIIntIP) - serverIPs = append(serverIPs, aroCluster.Spec.APIIntIP) - } + hostnamesRotation: &DynamicServingRotation{hostnamesChanged: make(chan struct{}, 10)}, + hostnamesQueue: workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.DefaultTypedControllerRateLimiter[string](), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "Hostnames"}), + infraInformer: infraInformer, + infraLister: infraInformer.Lister(), } // The cert controller will begin creating "machine-config-server-ca" configmap & secret in the MCO namespace. @@ -161,7 +153,8 @@ func New( Validity: mcsTLSKeyExpiry, Refresh: mcsTLSKeyRefresh, CertCreator: &certrotation.ServingRotation{ - Hostnames: func() []string { return append([]string{apiserverIntURL.Hostname()}, serverIPs...) }, + Hostnames: c.hostnamesRotation.GetHostnames, + HostnamesChanged: c.hostnamesRotation.hostnamesChanged, }, Informer: mcoSecretInformer, Lister: c.mcoSecretLister, @@ -214,6 +207,12 @@ func (c *CertRotationController) Run(ctx context.Context, workers int) { utilruntime.HandleError(err) } + if err := c.syncHostnames(); err != nil { + utilruntime.HandleError(err) + } + + go wait.Until(c.runHostnames, time.Second, ctx.Done()) + for _, certRotator := range c.certRotators { go certRotator.Run(ctx, workers) } @@ -224,6 +223,11 @@ func (c *CertRotationController) Run(ctx context.Context, workers int) { // This should not be directly called; it is only to be used for unit tests. func (c *CertRotationController) Sync() error { syncCtx := factory.NewSyncContext("mco-cert-rotation-sync", c.recorder) + + if err := c.syncHostnames(); err != nil { + return err + } + for _, certRotator := range c.certRotators { if err := certRotator.Sync(context.TODO(), syncCtx); err != nil { return err @@ -286,6 +290,13 @@ func (c *CertRotationController) StartInformers() error { }); err != nil { return fmt.Errorf("unable to attach secret handler: %w", err) } + if _, err := c.infraInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(_ any) { c.hostnamesQueue.Add(workQueueKey) }, + UpdateFunc: func(_, _ any) { c.hostnamesQueue.Add(workQueueKey) }, + DeleteFunc: func(_ any) { c.hostnamesQueue.Add(workQueueKey) }, + }); err != nil { + return fmt.Errorf("unable to attach infra handler: %w", err) + } return nil } diff --git a/pkg/controller/certrotation/dynamic_serving.go b/pkg/controller/certrotation/dynamic_serving.go new file mode 100644 index 0000000000..e3abc9284c --- /dev/null +++ b/pkg/controller/certrotation/dynamic_serving.go @@ -0,0 +1,42 @@ +package certrotationcontroller + +import ( + "sync" + + "k8s.io/apimachinery/pkg/util/sets" +) + +// DynamicServingRotation is a threadsafe struct to provide hostname methods for certrotation.ServingRotation info. +// This allows us to change the hostnames and get our certs regenerated. +type DynamicServingRotation struct { + lock sync.RWMutex + hostnames sets.Set[string] + hostnamesChanged chan struct{} +} + +func (r *DynamicServingRotation) setHostnames(newHostnames []string) { + if r.isSame(newHostnames) { + return + } + + r.lock.Lock() + r.hostnames = sets.New(newHostnames...) + r.lock.Unlock() + select { + case r.hostnamesChanged <- struct{}{}: + default: + } +} + +func (r *DynamicServingRotation) isSame(newHostnames []string) bool { + r.lock.RLock() + defer r.lock.RUnlock() + newSet := sets.New(newHostnames...) + return r.hostnames.Equal(newSet) +} + +func (r *DynamicServingRotation) GetHostnames() []string { + r.lock.RLock() + defer r.lock.RUnlock() + return r.hostnames.UnsortedList() +} diff --git a/pkg/controller/certrotation/hostnames.go b/pkg/controller/certrotation/hostnames.go new file mode 100644 index 0000000000..0ea9f7759c --- /dev/null +++ b/pkg/controller/certrotation/hostnames.go @@ -0,0 +1,70 @@ +package certrotationcontroller + +import ( + "context" + "fmt" + "net/url" + + configv1 "github.com/openshift/api/config/v1" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/klog/v2" +) + +func (c *CertRotationController) syncHostnames() error { + cfg, err := c.infraLister.Get("cluster") + if err != nil { + return fmt.Errorf("unable to get cluster infrastructure resource: %w", err) + } + + serverIPs := getServerIPsFromInfra(cfg) + + if cfg.Status.APIServerInternalURL == "" { + return fmt.Errorf("no APIServerInternalURL found in cluster infrastructure resource") + } + apiserverIntURL, err := url.Parse(cfg.Status.APIServerInternalURL) + if err != nil { + return fmt.Errorf("no APIServerInternalURL found in cluster infrastructure resource") + } + + // Only attempt to get ARO cluster resource on Azure platform + if cfg.Status.PlatformStatus != nil && cfg.Status.PlatformStatus.Type == configv1.AzurePlatformType { + aroCluster, err := c.aroClient.AroV1alpha1().Clusters().Get(context.Background(), "cluster", metav1.GetOptions{}) + if err != nil { + klog.Infof("ARO cluster resource not found or not accessible: %v", err) + } else { + klog.Infof("ARO cluster resource found w/ IPs: %s", aroCluster.Spec.APIIntIP) + serverIPs = append(serverIPs, aroCluster.Spec.APIIntIP) + } + } + + hostnames := append([]string{apiserverIntURL.Hostname()}, serverIPs...) + klog.Infof("syncing hostnames: %v", hostnames) + c.hostnamesRotation.setHostnames(hostnames) + return nil +} + +func (c *CertRotationController) runHostnames() { + for c.processHostnames() { + } +} + +func (c *CertRotationController) processHostnames() bool { + dsKey, quit := c.hostnamesQueue.Get() + if quit { + return false + } + defer c.hostnamesQueue.Done(dsKey) + + err := c.syncHostnames() + if err == nil { + c.hostnamesQueue.Forget(dsKey) + return true + } + + utilruntime.HandleError(fmt.Errorf("%v failed with : %v", dsKey, err)) + c.hostnamesQueue.AddRateLimited(dsKey) + + return true +} From 941c327913d9ac1a255811a3fc157bc9b6feb14d Mon Sep 17 00:00:00 2001 From: David Date: Wed, 13 Aug 2025 10:06:32 -0400 Subject: [PATCH 2/2] certcontroller: update unit tests --- .../certrotation/certrotation_controller.go | 17 ---- .../certrotation_controller_test.go | 98 ++++++++++++++++++- 2 files changed, 95 insertions(+), 20 deletions(-) diff --git a/pkg/controller/certrotation/certrotation_controller.go b/pkg/controller/certrotation/certrotation_controller.go index 3a0a39e641..823e347eb3 100644 --- a/pkg/controller/certrotation/certrotation_controller.go +++ b/pkg/controller/certrotation/certrotation_controller.go @@ -220,23 +220,6 @@ func (c *CertRotationController) Run(ctx context.Context, workers int) { <-ctx.Done() } -// This should not be directly called; it is only to be used for unit tests. -func (c *CertRotationController) Sync() error { - syncCtx := factory.NewSyncContext("mco-cert-rotation-sync", c.recorder) - - if err := c.syncHostnames(); err != nil { - return err - } - - for _, certRotator := range c.certRotators { - if err := certRotator.Sync(context.TODO(), syncCtx); err != nil { - return err - } - } - return nil - -} - func getServerIPsFromInfra(cfg *configv1.Infrastructure) []string { if cfg.Status.PlatformStatus == nil { return []string{} diff --git a/pkg/controller/certrotation/certrotation_controller_test.go b/pkg/controller/certrotation/certrotation_controller_test.go index fdac5874ce..231a1ea672 100644 --- a/pkg/controller/certrotation/certrotation_controller_test.go +++ b/pkg/controller/certrotation/certrotation_controller_test.go @@ -12,6 +12,8 @@ import ( "github.com/stretchr/testify/require" configv1 "github.com/openshift/api/config/v1" + configinformers "github.com/openshift/client-go/config/informers/externalversions" + "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/certrotation" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -43,12 +45,14 @@ type fixture struct { maoSecretLister []*corev1.Secret mcoSecretLister []*corev1.Secret mcoConfigMapLister []*corev1.ConfigMap + infraLister []*configv1.Infrastructure objects []runtime.Object configObjects []runtime.Object machineObjects []runtime.Object aroObjects []runtime.Object k8sI kubeinformers.SharedInformerFactory + infraInformer configinformers.SharedInformerFactory controller *CertRotationController } @@ -80,7 +84,7 @@ func (f *fixture) newController() *CertRotationController { Status: configv1.InfrastructureStatus{ ControlPlaneTopology: configv1.HighlyAvailableTopologyMode, PlatformStatus: platformStatus, - APIServerInternalURL: "test-url"}, + APIServerInternalURL: "https://10.0.0.1:6443"}, }) f.kubeClient = fake.NewSimpleClientset(f.objects...) @@ -88,6 +92,7 @@ func (f *fixture) newController() *CertRotationController { f.machineClient = fakemachineclientset.NewSimpleClientset(f.machineObjects...) f.aroClient = fakearoclientset.NewSimpleClientset(f.aroObjects...) f.k8sI = kubeinformers.NewSharedInformerFactory(f.kubeClient, noResyncPeriodFunc()) + f.infraInformer = configinformers.NewSharedInformerFactory(f.configClient, noResyncPeriodFunc()) for _, secret := range f.maoSecretLister { f.k8sI.Core().V1().Secrets().Informer().GetIndexer().Add(secret) @@ -101,7 +106,12 @@ func (f *fixture) newController() *CertRotationController { f.k8sI.Core().V1().ConfigMaps().Informer().GetIndexer().Add(configMap) } - c, err := New(f.kubeClient, f.configClient, f.machineClient, f.aroClient, f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().ConfigMaps()) + for _, infra := range f.configObjects { + f.infraInformer.Config().V1().Infrastructures().Informer().GetIndexer().Add(infra) + f.infraLister = append(f.infraLister, infra.(*configv1.Infrastructure)) + } + + c, err := New(f.kubeClient, f.configClient, f.machineClient, f.aroClient, f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().ConfigMaps(), f.infraInformer.Config().V1().Infrastructures()) require.NoError(f.t, err) c.StartInformers() @@ -110,9 +120,25 @@ func (f *fixture) newController() *CertRotationController { return c } +func (f *fixture) sync() error { + syncCtx := factory.NewSyncContext("mco-cert-rotation-sync", f.controller.recorder) + + if err := f.controller.syncHostnames(); err != nil { + return err + } + + for _, certRotator := range f.controller.certRotators { + if err := certRotator.Sync(context.TODO(), syncCtx); err != nil { + return err + } + } + return nil + +} + func (f *fixture) runController() { - err := f.controller.Sync() + err := f.sync() require.NoError(f.t, err) f.controller.reconcileUserDataSecrets() @@ -164,6 +190,72 @@ func (f *fixture) verifyAROIPInTLSCertificate(t *testing.T, expectedIP string) { t.Logf("Successfully verified ARO IP %s is present in TLS certificate", expectedIP) } +func TestInfraUpdateTriggersCertResync(t *testing.T) { + f := newFixture(t) + f.objects = append(f.objects, getGoodMAOSecret("test-user-data")) + f.maoSecretLister = append(f.maoSecretLister, getGoodMAOSecret("test-user-data")) + f.machineObjects = append(f.machineObjects, getMachineSet("test-machine")) + + f.controller = f.newController() + + // Perform initial sync to create initial certificates + f.runController() + + // Update the Infrastructure object with a new APIServerInternalURL + infraObj := &configv1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Status: configv1.InfrastructureStatus{ + ControlPlaneTopology: configv1.HighlyAvailableTopologyMode, + APIServerInternalURL: "https://10.0.0.2:6443", // Changed from 10.0.0.1 to 10.0.0.2 + }, + } + + // Update the Infrastructure object + _, err := f.configClient.ConfigV1().Infrastructures().Update(context.TODO(), infraObj, metav1.UpdateOptions{}) + require.NoError(t, err) + + // Update the informer with the new Infrastructure object + f.infraInformer.Config().V1().Infrastructures().Informer().GetIndexer().Update(infraObj) + + // Trigger the sync after Infrastructure update + f.syncListers(t) + f.runController() + + // Verify that the TLS certificate was regenerated with the new hostname + tlsSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerTLSSecretName, metav1.GetOptions{}) + require.NoError(t, err) + require.NotNil(t, tlsSecret) + + // Verify certificate contains new hostname + certData, exists := tlsSecret.Data["tls.crt"] + require.True(t, exists, "TLS certificate should exist in secret") + require.NotEmpty(t, certData, "TLS certificate data should not be empty") + + // Decode and parse certificate + block, _ := pem.Decode(certData) + require.NotNil(t, block, "Should be able to decode PEM certificate") + + cert, err := x509.ParseCertificate(block.Bytes) + require.NoError(t, err, "Should be able to parse TLS certificate") + + // Verify the new hostname is in the certificate's DNS names + expectedHostname := "10.0.0.2" + found := false + for _, dnsName := range cert.DNSNames { + if dnsName == expectedHostname { + found = true + break + } + } + require.True(t, found, "New hostname %s should be present in certificate DNS names", expectedHostname) + t.Logf("Successfully verified hostname %s is present in TLS certificate after Infrastructure update", expectedHostname) + + // Verify that user data secrets were updated (should be 1 total update) + f.verifyUserDataSecretUpdateCount(1) +} + func TestMCSCARotation(t *testing.T) { tests := []struct { name string