Skip to content

Commit 671fb37

Browse files
Merge pull request #5326 from openshift-cherrypick-robot/cherry-pick-5245-to-release-4.20
[release-4.20] OCPBUGS-62675: Cert Controller should live fetch SAN IPs during cert rotation
2 parents d707114 + 941c327 commit 671fb37

File tree

5 files changed

+241
-42
lines changed

5 files changed

+241
-42
lines changed

cmd/machine-config-controller/start.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func runStartCmd(_ *cobra.Command, _ []string) {
9292
ctrlctx.KubeMAOSharedInformer.Core().V1().Secrets(),
9393
ctrlctx.KubeNamespacedInformerFactory.Core().V1().Secrets(),
9494
ctrlctx.KubeNamespacedInformerFactory.Core().V1().ConfigMaps(),
95+
ctrlctx.ConfigInformerFactory.Config().V1().Infrastructures(),
9596
)
9697
if err != nil {
9798
klog.Fatalf("unable to start cert rotation controller: %v", err)

pkg/controller/certrotation/certrotation_controller.go

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"context"
66
"encoding/json"
77
"fmt"
8-
"net/url"
98
"time"
109

1110
"github.com/vincent-petithory/dataurl"
@@ -15,10 +14,12 @@ import (
1514
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
1615
"k8s.io/apimachinery/pkg/labels"
1716
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
17+
"k8s.io/apimachinery/pkg/util/wait"
1818
coreinformersv1 "k8s.io/client-go/informers/core/v1"
1919
"k8s.io/client-go/kubernetes"
2020
corelisterv1 "k8s.io/client-go/listers/core/v1"
2121
"k8s.io/client-go/tools/cache"
22+
"k8s.io/client-go/util/workqueue"
2223
"k8s.io/klog/v2"
2324
"k8s.io/utils/clock"
2425

@@ -32,6 +33,9 @@ import (
3233

3334
aroclientset "github.com/Azure/ARO-RP/pkg/operator/clientset/versioned"
3435

36+
configinformers "github.com/openshift/client-go/config/informers/externalversions/config/v1"
37+
configlisterv1 "github.com/openshift/client-go/config/listers/config/v1"
38+
3539
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
3640
)
3741

@@ -42,6 +46,7 @@ const (
4246
mcsCARefresh = 8 * oneYear
4347
mcsTLSKeyExpiry = mcsCAExpiry
4448
mcsTLSKeyRefresh = mcsCARefresh
49+
workQueueKey = "key"
4550
)
4651

4752
type CertRotationController struct {
@@ -51,9 +56,14 @@ type CertRotationController struct {
5156

5257
mcoConfigMapInfomer coreinformersv1.ConfigMapInformer
5358
maoSecretInformer coreinformersv1.SecretInformer
59+
infraInformer configinformers.InfrastructureInformer
5460

5561
mcoSecretLister corelisterv1.SecretLister
5662
maoSecretLister corelisterv1.SecretLister
63+
infraLister configlisterv1.InfrastructureLister
64+
65+
hostnamesRotation *DynamicServingRotation
66+
hostnamesQueue workqueue.TypedRateLimitingInterface[string]
5767

5868
certRotators []factory.Controller
5969

@@ -71,6 +81,7 @@ func New(
7181
maoSecretInformer coreinformersv1.SecretInformer,
7282
mcoSecretInformer coreinformersv1.SecretInformer,
7383
mcoConfigMapInfomer coreinformersv1.ConfigMapInformer,
84+
infraInformer configinformers.InfrastructureInformer,
7485
) (*CertRotationController, error) {
7586

7687
recorder := events.NewLoggingEventRecorder(componentName, clock.RealClock{})
@@ -88,34 +99,15 @@ func New(
8899
maoSecretInformer.Informer().HasSynced,
89100
mcoSecretInformer.Informer().HasSynced,
90101
mcoConfigMapInfomer.Informer().HasSynced,
102+
infraInformer.Informer().HasSynced,
91103
},
92-
}
93-
94-
// This is required for the machine-config-server-tls secret rotation
95-
cfg, err := configClient.ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
96-
if err != nil {
97-
return nil, fmt.Errorf("unable to get cluster infrastructure resource: %w", err)
98-
}
99104

100-
serverIPs := getServerIPsFromInfra(cfg)
101-
102-
if cfg.Status.APIServerInternalURL == "" {
103-
return nil, fmt.Errorf("no APIServerInternalURL found in cluster infrastructure resource")
104-
}
105-
apiserverIntURL, err := url.Parse(cfg.Status.APIServerInternalURL)
106-
if err != nil {
107-
return nil, fmt.Errorf("failed to parse %s: %w", apiserverIntURL, err)
108-
}
109-
110-
// Only attempt to get ARO cluster resource on Azure platform
111-
if cfg.Status.PlatformStatus != nil && cfg.Status.PlatformStatus.Type == configv1.AzurePlatformType {
112-
aroCluster, err := c.aroClient.AroV1alpha1().Clusters().Get(context.Background(), "cluster", metav1.GetOptions{})
113-
if err != nil {
114-
klog.Infof("ARO cluster resource not found or not accessible: %v", err)
115-
} else {
116-
klog.Infof("ARO cluster resource found w/ IPs: %s", aroCluster.Spec.APIIntIP)
117-
serverIPs = append(serverIPs, aroCluster.Spec.APIIntIP)
118-
}
105+
hostnamesRotation: &DynamicServingRotation{hostnamesChanged: make(chan struct{}, 10)},
106+
hostnamesQueue: workqueue.NewTypedRateLimitingQueueWithConfig(
107+
workqueue.DefaultTypedControllerRateLimiter[string](),
108+
workqueue.TypedRateLimitingQueueConfig[string]{Name: "Hostnames"}),
109+
infraInformer: infraInformer,
110+
infraLister: infraInformer.Lister(),
119111
}
120112

121113
// The cert controller will begin creating "machine-config-server-ca" configmap & secret in the MCO namespace.
@@ -161,7 +153,8 @@ func New(
161153
Validity: mcsTLSKeyExpiry,
162154
Refresh: mcsTLSKeyRefresh,
163155
CertCreator: &certrotation.ServingRotation{
164-
Hostnames: func() []string { return append([]string{apiserverIntURL.Hostname()}, serverIPs...) },
156+
Hostnames: c.hostnamesRotation.GetHostnames,
157+
HostnamesChanged: c.hostnamesRotation.hostnamesChanged,
165158
},
166159
Informer: mcoSecretInformer,
167160
Lister: c.mcoSecretLister,
@@ -214,23 +207,17 @@ func (c *CertRotationController) Run(ctx context.Context, workers int) {
214207
utilruntime.HandleError(err)
215208
}
216209

217-
for _, certRotator := range c.certRotators {
218-
go certRotator.Run(ctx, workers)
210+
if err := c.syncHostnames(); err != nil {
211+
utilruntime.HandleError(err)
219212
}
220213

221-
<-ctx.Done()
222-
}
214+
go wait.Until(c.runHostnames, time.Second, ctx.Done())
223215

224-
// This should not be directly called; it is only to be used for unit tests.
225-
func (c *CertRotationController) Sync() error {
226-
syncCtx := factory.NewSyncContext("mco-cert-rotation-sync", c.recorder)
227216
for _, certRotator := range c.certRotators {
228-
if err := certRotator.Sync(context.TODO(), syncCtx); err != nil {
229-
return err
230-
}
217+
go certRotator.Run(ctx, workers)
231218
}
232-
return nil
233219

220+
<-ctx.Done()
234221
}
235222

236223
func getServerIPsFromInfra(cfg *configv1.Infrastructure) []string {
@@ -286,6 +273,13 @@ func (c *CertRotationController) StartInformers() error {
286273
}); err != nil {
287274
return fmt.Errorf("unable to attach secret handler: %w", err)
288275
}
276+
if _, err := c.infraInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
277+
AddFunc: func(_ any) { c.hostnamesQueue.Add(workQueueKey) },
278+
UpdateFunc: func(_, _ any) { c.hostnamesQueue.Add(workQueueKey) },
279+
DeleteFunc: func(_ any) { c.hostnamesQueue.Add(workQueueKey) },
280+
}); err != nil {
281+
return fmt.Errorf("unable to attach infra handler: %w", err)
282+
}
289283
return nil
290284
}
291285

pkg/controller/certrotation/certrotation_controller_test.go

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"github.com/stretchr/testify/require"
1313

1414
configv1 "github.com/openshift/api/config/v1"
15+
configinformers "github.com/openshift/client-go/config/informers/externalversions"
16+
"github.com/openshift/library-go/pkg/controller/factory"
1517
"github.com/openshift/library-go/pkg/operator/certrotation"
1618
corev1 "k8s.io/api/core/v1"
1719
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -43,12 +45,14 @@ type fixture struct {
4345
maoSecretLister []*corev1.Secret
4446
mcoSecretLister []*corev1.Secret
4547
mcoConfigMapLister []*corev1.ConfigMap
48+
infraLister []*configv1.Infrastructure
4649

4750
objects []runtime.Object
4851
configObjects []runtime.Object
4952
machineObjects []runtime.Object
5053
aroObjects []runtime.Object
5154
k8sI kubeinformers.SharedInformerFactory
55+
infraInformer configinformers.SharedInformerFactory
5256

5357
controller *CertRotationController
5458
}
@@ -80,14 +84,15 @@ func (f *fixture) newController() *CertRotationController {
8084
Status: configv1.InfrastructureStatus{
8185
ControlPlaneTopology: configv1.HighlyAvailableTopologyMode,
8286
PlatformStatus: platformStatus,
83-
APIServerInternalURL: "test-url"},
87+
APIServerInternalURL: "https://10.0.0.1:6443"},
8488
})
8589

8690
f.kubeClient = fake.NewSimpleClientset(f.objects...)
8791
f.configClient = fakeconfigv1client.NewSimpleClientset(f.configObjects...)
8892
f.machineClient = fakemachineclientset.NewSimpleClientset(f.machineObjects...)
8993
f.aroClient = fakearoclientset.NewSimpleClientset(f.aroObjects...)
9094
f.k8sI = kubeinformers.NewSharedInformerFactory(f.kubeClient, noResyncPeriodFunc())
95+
f.infraInformer = configinformers.NewSharedInformerFactory(f.configClient, noResyncPeriodFunc())
9196

9297
for _, secret := range f.maoSecretLister {
9398
f.k8sI.Core().V1().Secrets().Informer().GetIndexer().Add(secret)
@@ -101,7 +106,12 @@ func (f *fixture) newController() *CertRotationController {
101106
f.k8sI.Core().V1().ConfigMaps().Informer().GetIndexer().Add(configMap)
102107
}
103108

104-
c, err := New(f.kubeClient, f.configClient, f.machineClient, f.aroClient, f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().ConfigMaps())
109+
for _, infra := range f.configObjects {
110+
f.infraInformer.Config().V1().Infrastructures().Informer().GetIndexer().Add(infra)
111+
f.infraLister = append(f.infraLister, infra.(*configv1.Infrastructure))
112+
}
113+
114+
c, err := New(f.kubeClient, f.configClient, f.machineClient, f.aroClient, f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().ConfigMaps(), f.infraInformer.Config().V1().Infrastructures())
105115
require.NoError(f.t, err)
106116

107117
c.StartInformers()
@@ -110,9 +120,25 @@ func (f *fixture) newController() *CertRotationController {
110120
return c
111121
}
112122

123+
func (f *fixture) sync() error {
124+
syncCtx := factory.NewSyncContext("mco-cert-rotation-sync", f.controller.recorder)
125+
126+
if err := f.controller.syncHostnames(); err != nil {
127+
return err
128+
}
129+
130+
for _, certRotator := range f.controller.certRotators {
131+
if err := certRotator.Sync(context.TODO(), syncCtx); err != nil {
132+
return err
133+
}
134+
}
135+
return nil
136+
137+
}
138+
113139
func (f *fixture) runController() {
114140

115-
err := f.controller.Sync()
141+
err := f.sync()
116142
require.NoError(f.t, err)
117143

118144
f.controller.reconcileUserDataSecrets()
@@ -164,6 +190,72 @@ func (f *fixture) verifyAROIPInTLSCertificate(t *testing.T, expectedIP string) {
164190
t.Logf("Successfully verified ARO IP %s is present in TLS certificate", expectedIP)
165191
}
166192

193+
func TestInfraUpdateTriggersCertResync(t *testing.T) {
194+
f := newFixture(t)
195+
f.objects = append(f.objects, getGoodMAOSecret("test-user-data"))
196+
f.maoSecretLister = append(f.maoSecretLister, getGoodMAOSecret("test-user-data"))
197+
f.machineObjects = append(f.machineObjects, getMachineSet("test-machine"))
198+
199+
f.controller = f.newController()
200+
201+
// Perform initial sync to create initial certificates
202+
f.runController()
203+
204+
// Update the Infrastructure object with a new APIServerInternalURL
205+
infraObj := &configv1.Infrastructure{
206+
ObjectMeta: metav1.ObjectMeta{
207+
Name: "cluster",
208+
},
209+
Status: configv1.InfrastructureStatus{
210+
ControlPlaneTopology: configv1.HighlyAvailableTopologyMode,
211+
APIServerInternalURL: "https://10.0.0.2:6443", // Changed from 10.0.0.1 to 10.0.0.2
212+
},
213+
}
214+
215+
// Update the Infrastructure object
216+
_, err := f.configClient.ConfigV1().Infrastructures().Update(context.TODO(), infraObj, metav1.UpdateOptions{})
217+
require.NoError(t, err)
218+
219+
// Update the informer with the new Infrastructure object
220+
f.infraInformer.Config().V1().Infrastructures().Informer().GetIndexer().Update(infraObj)
221+
222+
// Trigger the sync after Infrastructure update
223+
f.syncListers(t)
224+
f.runController()
225+
226+
// Verify that the TLS certificate was regenerated with the new hostname
227+
tlsSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerTLSSecretName, metav1.GetOptions{})
228+
require.NoError(t, err)
229+
require.NotNil(t, tlsSecret)
230+
231+
// Verify certificate contains new hostname
232+
certData, exists := tlsSecret.Data["tls.crt"]
233+
require.True(t, exists, "TLS certificate should exist in secret")
234+
require.NotEmpty(t, certData, "TLS certificate data should not be empty")
235+
236+
// Decode and parse certificate
237+
block, _ := pem.Decode(certData)
238+
require.NotNil(t, block, "Should be able to decode PEM certificate")
239+
240+
cert, err := x509.ParseCertificate(block.Bytes)
241+
require.NoError(t, err, "Should be able to parse TLS certificate")
242+
243+
// Verify the new hostname is in the certificate's DNS names
244+
expectedHostname := "10.0.0.2"
245+
found := false
246+
for _, dnsName := range cert.DNSNames {
247+
if dnsName == expectedHostname {
248+
found = true
249+
break
250+
}
251+
}
252+
require.True(t, found, "New hostname %s should be present in certificate DNS names", expectedHostname)
253+
t.Logf("Successfully verified hostname %s is present in TLS certificate after Infrastructure update", expectedHostname)
254+
255+
// Verify that user data secrets were updated (should be 1 total update)
256+
f.verifyUserDataSecretUpdateCount(1)
257+
}
258+
167259
func TestMCSCARotation(t *testing.T) {
168260
tests := []struct {
169261
name string
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package certrotationcontroller
2+
3+
import (
4+
"sync"
5+
6+
"k8s.io/apimachinery/pkg/util/sets"
7+
)
8+
9+
// DynamicServingRotation is a threadsafe struct to provide hostname methods for certrotation.ServingRotation info.
10+
// This allows us to change the hostnames and get our certs regenerated.
11+
type DynamicServingRotation struct {
12+
lock sync.RWMutex
13+
hostnames sets.Set[string]
14+
hostnamesChanged chan struct{}
15+
}
16+
17+
func (r *DynamicServingRotation) setHostnames(newHostnames []string) {
18+
if r.isSame(newHostnames) {
19+
return
20+
}
21+
22+
r.lock.Lock()
23+
r.hostnames = sets.New(newHostnames...)
24+
r.lock.Unlock()
25+
select {
26+
case r.hostnamesChanged <- struct{}{}:
27+
default:
28+
}
29+
}
30+
31+
func (r *DynamicServingRotation) isSame(newHostnames []string) bool {
32+
r.lock.RLock()
33+
defer r.lock.RUnlock()
34+
newSet := sets.New(newHostnames...)
35+
return r.hostnames.Equal(newSet)
36+
}
37+
38+
func (r *DynamicServingRotation) GetHostnames() []string {
39+
r.lock.RLock()
40+
defer r.lock.RUnlock()
41+
return r.hostnames.UnsortedList()
42+
}

0 commit comments

Comments
 (0)