Skip to content

Commit 82dbc48

Browse files
committed
test/extended: add CPMS boot image update e2es
1 parent 06e7b70 commit 82dbc48

File tree

9 files changed

+397
-0
lines changed

9 files changed

+397
-0
lines changed

test/extended/boot_image.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ func getRandomMachineSet(machineClient *machineclient.Clientset) machinev1beta1.
104104
}
105105

106106
// verifyMachineSetUpdate verifies that the the boot image values of a MachineSet are reconciled correctly
107+
// nolint:dupl // I separated these from verifyControlPlaneMachineSetUpdate for readability
107108
func verifyMachineSetUpdate(oc *exutil.CLI, machineSet machinev1beta1.MachineSet, updateExpected bool) {
108109

109110
newProviderSpecPatch, originalProviderSpecPatch, backdatedBootImage, originalBootImage := createFakeUpdatePatch(oc, machineSet)

test/extended/boot_image_cpms.go

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
package extended
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
osconfigv1 "github.com/openshift/api/config/v1"
8+
"sigs.k8s.io/yaml"
9+
10+
machinev1 "github.com/openshift/api/machine/v1"
11+
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
12+
exutil "github.com/openshift/machine-config-operator/test/extended/util"
13+
14+
o "github.com/onsi/gomega"
15+
16+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17+
e2e "k8s.io/kubernetes/test/e2e/framework"
18+
)
19+
20+
// verifyControlPlaneMachineSetUpdate verifies that the the boot image values of a ControlPlaneMachineSet are reconciled correctly
21+
// nolint:dupl // I separated these from verifyMachineSetUpdate for readability
22+
func verifyControlPlaneMachineSetUpdate(oc *exutil.CLI, cpms machinev1.ControlPlaneMachineSet, updateExpected bool) {
23+
24+
newProviderSpecPatch, originalProviderSpecPatch, backdatedBootImage, originalBootImage := createFakeUpdatePatchCPMS(oc, cpms)
25+
err := oc.Run("patch").Args(ControlPlaneMachinesetQualifiedName, cpms.Name, "-p", newProviderSpecPatch, "-n", MAPINamespace, "--type=json").Execute()
26+
o.Expect(err).NotTo(o.HaveOccurred())
27+
defer func() {
28+
// Restore machineSet to original boot image as the machineset may be used by other test variants, regardless of success/fail
29+
err = oc.Run("patch").Args(ControlPlaneMachinesetQualifiedName, cpms.Name, "-p", originalProviderSpecPatch, "-n", MAPINamespace, "--type=json").Execute()
30+
o.Expect(err).NotTo(o.HaveOccurred())
31+
e2e.Logf("Restored build name in the machineset %s to \"%s\"", cpms.Name, originalBootImage)
32+
}()
33+
// Ensure boot image controller is not progressing
34+
e2e.Logf("Waiting until the boot image controller is not progressing...")
35+
waitForBootImageControllerToComplete(oc)
36+
37+
// Fetch the providerSpec of the machineset under test again
38+
providerSpec, err := oc.Run("get").Args(ControlPlaneMachinesetQualifiedName, cpms.Name, "-o", "template", "--template=`{{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value}}`", "-n", MAPINamespace).Output()
39+
o.Expect(err).NotTo(o.HaveOccurred())
40+
41+
// Verify that the machineset has the expected boot image values
42+
// If an update is expected, the backdated boot image should not be present
43+
// If an update is NOT expected, the backdated boot image should still be present; ie machineset is left untouched
44+
if updateExpected {
45+
o.Expect(providerSpec).ShouldNot(o.ContainSubstring(backdatedBootImage))
46+
} else {
47+
o.Expect(providerSpec).Should(o.ContainSubstring(backdatedBootImage))
48+
}
49+
}
50+
51+
// createFakeUpdatePatchCPMS creates an update patch for the ControlPlaneMachineSet object based on the platform
52+
func createFakeUpdatePatchCPMS(oc *exutil.CLI, cpms machinev1.ControlPlaneMachineSet) (string, string, string, string) {
53+
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
54+
o.Expect(err).NotTo(o.HaveOccurred())
55+
56+
switch infra.Status.PlatformStatus.Type {
57+
case osconfigv1.AWSPlatformType:
58+
return generateAWSProviderSpecPatchCPMS(cpms)
59+
case osconfigv1.GCPPlatformType:
60+
return generateGCPProviderSpecPatchCPMS(cpms)
61+
case osconfigv1.AzurePlatformType:
62+
return generateAzureProviderSpecPatchCPMS(cpms)
63+
default:
64+
e2e.Failf("unexpected platform type; should not be here")
65+
return "", "", "", ""
66+
}
67+
}
68+
69+
// generateAWSProviderSpecPatchCPMS generates a fake update patch for the AWS ControlPlaneMachineSet
70+
func generateAWSProviderSpecPatchCPMS(cpms machinev1.ControlPlaneMachineSet) (string, string, string, string) {
71+
providerSpec := new(machinev1beta1.AWSMachineProviderConfig)
72+
err := unmarshalProviderSpecCPMS(&cpms, providerSpec)
73+
o.Expect(err).NotTo(o.HaveOccurred())
74+
75+
// Modify the boot image to an older known AMI value
76+
// See: https://issues.redhat.com/browse/OCPBUGS-57426
77+
originalBootImage := *providerSpec.AMI.ID
78+
newBootImage := "ami-000145e5a91e9ac22"
79+
jsonPatch := fmt.Sprintf(`[{"op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/ami/id", "value": "%s"}]`, newBootImage)
80+
81+
// Create JSON patch to restore original AMI ID
82+
originalJSONPatch := fmt.Sprintf(`[{"op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/ami/id", "value": "%s"}]`, originalBootImage)
83+
84+
return jsonPatch, originalJSONPatch, newBootImage, originalBootImage
85+
86+
}
87+
88+
// generateGCPProviderSpecPatchCPMS generates a fake update patch for the GCP ControlPlaneMachineSet
89+
func generateGCPProviderSpecPatchCPMS(cpms machinev1.ControlPlaneMachineSet) (string, string, string, string) {
90+
providerSpec := new(machinev1beta1.GCPMachineProviderSpec)
91+
err := unmarshalProviderSpecCPMS(&cpms, providerSpec)
92+
o.Expect(err).NotTo(o.HaveOccurred())
93+
94+
// Modify the boot image to a older known value.
95+
// See: https://issues.redhat.com/browse/OCPBUGS-57426
96+
originalBootImage := providerSpec.Disks[0].Image
97+
newBootImage := "projects/rhcos-cloud/global/images/rhcos-410-84-202210040010-0-gcp-x86-64"
98+
jsonPatch := fmt.Sprintf(`[{"op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/disks/0/image", "value": "%s"}]`, newBootImage)
99+
100+
// Create JSON patch to restore original disk image
101+
originalJSONPatch := fmt.Sprintf(`[{"op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/disks/0/image", "value": "%s"}]`, originalBootImage)
102+
103+
return jsonPatch, originalJSONPatch, newBootImage, originalBootImage
104+
}
105+
106+
// generateAzureProviderSpecPatchCPMS generates a fake update patch for the Azure ControlPlaneMachineSet
107+
func generateAzureProviderSpecPatchCPMS(cpms machinev1.ControlPlaneMachineSet) (string, string, string, string) {
108+
providerSpec := new(machinev1beta1.AzureMachineProviderSpec)
109+
err := unmarshalProviderSpecCPMS(&cpms, providerSpec)
110+
o.Expect(err).NotTo(o.HaveOccurred())
111+
112+
// Use JSON patch to precisely replace just the image field with marketplace image
113+
// This avoids any merge conflicts with existing fields
114+
// Use an older known 4.18 boot image that is available in the marketplace
115+
jsonPatch := `[{"op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/image", "value": {"offer": "aro4", "publisher": "azureopenshift", "resourceID": "", "sku": "418-v2", "version": "418.94.20250122", "type": "MarketplaceNoPlan"}}]`
116+
117+
// Create JSON patch to restore original image
118+
originalImage := providerSpec.Image
119+
originalJSONPatch := fmt.Sprintf(`[{"op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/image", "value": {"offer": "%s", "publisher": "%s", "resourceID": "%s", "sku": "%s", "version": "%s", "type": "%s"}}]`,
120+
originalImage.Offer, originalImage.Publisher, originalImage.ResourceID, originalImage.SKU, originalImage.Version, originalImage.Type)
121+
122+
return jsonPatch, originalJSONPatch, "418.94.20250122", providerSpec.Image.Version
123+
}
124+
125+
// unmarshalProviderSpecCPMS unmarshals the controlplanemachineset's provider spec into
126+
// a ProviderSpec object. Returns an error if providerSpec field is nil,
127+
// or the unmarshal fails
128+
func unmarshalProviderSpecCPMS(cpms *machinev1.ControlPlaneMachineSet, providerSpec interface{}) error {
129+
if cpms.Spec.Template.OpenShiftMachineV1Beta1Machine.Spec.ProviderSpec.Value == nil {
130+
return fmt.Errorf("providerSpec field was empty")
131+
}
132+
if err := yaml.Unmarshal(cpms.Spec.Template.OpenShiftMachineV1Beta1Machine.Spec.ProviderSpec.Value.Raw, &providerSpec); err != nil {
133+
return fmt.Errorf("unmarshal into providerSpec failed %w", err)
134+
}
135+
return nil
136+
}

test/extended/boot_image_update_agnostic.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ import (
1010
o "github.com/onsi/gomega"
1111

1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/util/sets"
1314
"k8s.io/kubernetes/test/e2e/framework"
15+
16+
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
1417
)
1518

1619
func AllMachineSetTest(oc *exutil.CLI, fixture string) {
@@ -101,3 +104,117 @@ func EnsureConfigMapStampTest(oc *exutil.CLI) {
101104
}, 2*time.Minute, 5*time.Second).Should(o.BeTrue())
102105
framework.Logf("Successfully verified that the configmap has been correctly stamped")
103106
}
107+
108+
func AllControlPlaneMachineSetTest(oc *exutil.CLI, fixture string) {
109+
// This fixture applies a boot image update configuration that opts in all controlplanemachinesets
110+
// However, since CPMS is typically a singleton, it is just targeting a single resource
111+
applyMachineConfigurationFixture(oc, fixture)
112+
113+
// Grab the CPMS and verify that the boot image was reconciled correctly.
114+
machineClient, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig())
115+
o.Expect(err).NotTo(o.HaveOccurred())
116+
117+
cpms, err := machineClient.MachineV1().ControlPlaneMachineSets("openshift-machine-api").Get(context.TODO(), "cluster", metav1.GetOptions{})
118+
o.Expect(err).NotTo(o.HaveOccurred())
119+
verifyControlPlaneMachineSetUpdate(oc, *cpms, true)
120+
121+
// Delete a control plane machine to verify that CPMS reconciles it with the updated boot image
122+
// Get the list of control plane machines
123+
machines, err := machineClient.MachineV1beta1().Machines(MAPINamespace).List(context.TODO(), metav1.ListOptions{LabelSelector: MAPIMasterMachineLabelSelector})
124+
o.Expect(err).NotTo(o.HaveOccurred())
125+
o.Expect(machines.Items).NotTo(o.BeEmpty(), "No control plane machines found")
126+
127+
// Capture the initial set of control plane machine names and count before deletion
128+
initialMachineNames := sets.New[string]()
129+
for _, machine := range machines.Items {
130+
initialMachineNames.Insert(machine.Name)
131+
}
132+
initialMachineCount := initialMachineNames.Len()
133+
134+
// Delete the first control plane machine
135+
machineToDelete := machines.Items[0].Name
136+
framework.Logf("Deleting control plane machine: %s", machineToDelete)
137+
err = machineClient.MachineV1beta1().Machines(MAPINamespace).Delete(context.TODO(), machineToDelete, metav1.DeleteOptions{})
138+
o.Expect(err).NotTo(o.HaveOccurred())
139+
140+
// Wait until the new control plane machine is running and the old one is deleted
141+
// Arbitrarily picking 25 minutes timeout as scale-up time varies based on platform
142+
framework.Logf("Waiting for CPMS to reconcile and create a new control plane machine (up to 25 minutes)...")
143+
o.Eventually(func() bool {
144+
currentMachines, err := machineClient.MachineV1beta1().Machines(MAPINamespace).List(context.TODO(), metav1.ListOptions{LabelSelector: MAPIMasterMachineLabelSelector})
145+
if err != nil {
146+
framework.Logf("Error listing machines: %v", err)
147+
return false
148+
}
149+
150+
// Check that the deleted control plane machine is gone and all current machines are running
151+
currentMachineNames := sets.New[string]()
152+
runningMachines := sets.New[string]()
153+
154+
for _, machine := range currentMachines.Items {
155+
currentMachineNames.Insert(machine.Name)
156+
phase := ""
157+
if machine.Status.Phase != nil {
158+
phase = *machine.Status.Phase
159+
}
160+
if phase == machinev1beta1.PhaseRunning {
161+
runningMachines.Insert(machine.Name)
162+
} else {
163+
framework.Logf("Machine %s is in phase: %s", machine.Name, phase)
164+
}
165+
}
166+
167+
// All machines must be running
168+
if runningMachines.Len() != initialMachineCount {
169+
framework.Logf("Only %d out of %d machines are running", runningMachines.Len(), initialMachineCount)
170+
return false
171+
}
172+
173+
// The deleted machine should not be in the current set
174+
if currentMachineNames.Has(machineToDelete) {
175+
framework.Logf("Deleted machine %s still exists", machineToDelete)
176+
return false
177+
}
178+
179+
framework.Logf("All %d control plane machines are running and the deleted machine is gone", initialMachineCount)
180+
181+
// Ensure master MCP is done updating and has the correct ready count
182+
masterMCP := NewMachineConfigPool(oc, MachineConfigPoolMaster)
183+
updatedStatus, err := masterMCP.GetUpdatedStatus()
184+
if err != nil {
185+
framework.Logf("Error getting master MCP updated status: %v", err)
186+
return false
187+
}
188+
if updatedStatus != TrueString {
189+
framework.Logf("Master MCP is not yet updated (Updated=%s)", updatedStatus)
190+
return false
191+
}
192+
193+
readyMachineCount, err := masterMCP.getUpdatedMachineCount()
194+
if err != nil {
195+
framework.Logf("Error getting master MCP ready machine count: %v", err)
196+
return false
197+
}
198+
if readyMachineCount != initialMachineCount {
199+
framework.Logf("Master MCP ready machine count %d does not match initial count %d", readyMachineCount, initialMachineCount)
200+
return false
201+
}
202+
203+
framework.Logf("Master MCP is updated with %d ready machines", readyMachineCount)
204+
return true
205+
}, 25*time.Minute, 2*time.Minute).Should(o.BeTrue(), "CPMS failed to reconcile control plane machines within 25 minutes")
206+
}
207+
208+
func NoneControlPlaneMachineSetTest(oc *exutil.CLI, fixture string) {
209+
// This fixture applies a boot image update configuration that opts in no controlplanemachineset, i.e. feature is disabled.
210+
applyMachineConfigurationFixture(oc, fixture)
211+
212+
// Grab the CPMS and verify that the boot image was reconciled correctly.
213+
machineClient, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig())
214+
o.Expect(err).NotTo(o.HaveOccurred())
215+
216+
cpms, err := machineClient.MachineV1().ControlPlaneMachineSets("openshift-machine-api").Get(context.TODO(), "cluster", metav1.GetOptions{})
217+
o.Expect(err).NotTo(o.HaveOccurred())
218+
219+
verifyControlPlaneMachineSetUpdate(oc, *cpms, false)
220+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package extended
2+
3+
import (
4+
"path/filepath"
5+
6+
osconfigv1 "github.com/openshift/api/config/v1"
7+
8+
g "github.com/onsi/ginkgo/v2"
9+
exutil "github.com/openshift/machine-config-operator/test/extended/util"
10+
)
11+
12+
// These tests are [Serial] because it modifies the cluster/machineconfigurations.operator.openshift.io object in each test.
13+
var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive][Serial][OCPFeatureGate:ManagedBootImagesAWS]", g.Ordered, func() {
14+
defer g.GinkgoRecover()
15+
var (
16+
AllControlPlaneMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-cpms-all.yaml")
17+
NoneControlPlaneMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-cpms-none.yaml")
18+
EmptyMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-empty.yaml")
19+
20+
oc = exutil.NewCLI("mco-bootimage", exutil.KubeConfigPath()).AsAdmin()
21+
)
22+
23+
g.BeforeEach(func() {
24+
// Skip this test if not on AWS platform
25+
skipUnlessTargetPlatform(oc, osconfigv1.AWSPlatformType)
26+
// Skip this test if the cluster is not using MachineAPI
27+
skipUnlessFunctionalMachineAPI(oc)
28+
// Skip this test on single node platforms
29+
skipOnSingleNodeTopology(oc)
30+
})
31+
32+
g.AfterEach(func() {
33+
// Clear out boot image configuration between tests
34+
applyMachineConfigurationFixture(oc, EmptyMachineSetFixture)
35+
})
36+
37+
// This test is [Disruptive] because it scales up a new control plane node after performing a boot image update, and the scales it down.
38+
g.It("[OCPFeatureGate:ManagedBootImagesCPMS][Disruptive] Should update boot images on ControlPlaneMachineSets and resize properly [apigroup:machineconfiguration.openshift.io]", func() {
39+
AllControlPlaneMachineSetTest(oc, AllControlPlaneMachineSetFixture)
40+
})
41+
42+
g.It("[OCPFeatureGate:ManagedBootImagesCPMS] Should not update boot images on ControlPlaneMachineSets when not configured [apigroup:machineconfiguration.openshift.io]", func() {
43+
NoneControlPlaneMachineSetTest(oc, NoneControlPlaneMachineSetFixture)
44+
})
45+
})

test/extended/boot_image_update_azure.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
2626
PartialMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-partial.yaml")
2727
EmptyMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-empty.yaml")
2828

29+
AllControlPlaneMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-cpms-all.yaml")
30+
NoneControlPlaneMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-cpms-none.yaml")
31+
2932
oc = exutil.NewCLI("mco-bootimage", exutil.KubeConfigPath()).AsAdmin()
3033
)
3134

@@ -63,6 +66,15 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
6366
g.It("[Disruptive] Should update boot images on an Azure MachineSets with a legacy boot image and scale successfully [apigroup:machineconfiguration.openshift.io]", func() {
6467
AzureLegacyBootImageTest(oc, PartialMachineSetFixture)
6568
})
69+
70+
// This test is [Disruptive] because it scales up a new control plane node after performing a boot image update, and the scales it down.
71+
g.It("[OCPFeatureGate:ManagedBootImagesCPMS][Disruptive] Should update boot images on ControlPlaneMachineSets and resize properly [apigroup:machineconfiguration.openshift.io]", func() {
72+
AllControlPlaneMachineSetTest(oc, AllControlPlaneMachineSetFixture)
73+
})
74+
75+
g.It("[OCPFeatureGate:ManagedBootImagesCPMS] Should not update boot images on ControlPlaneMachineSets when not configured [apigroup:machineconfiguration.openshift.io]", func() {
76+
NoneControlPlaneMachineSetTest(oc, NoneControlPlaneMachineSetFixture)
77+
})
6678
})
6779

6880
func AzureLegacyBootImageTest(oc *exutil.CLI, fixture string) {
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package extended
2+
3+
import (
4+
"path/filepath"
5+
6+
osconfigv1 "github.com/openshift/api/config/v1"
7+
8+
g "github.com/onsi/ginkgo/v2"
9+
exutil "github.com/openshift/machine-config-operator/test/extended/util"
10+
)
11+
12+
// These tests are [Serial] because it modifies the cluster/machineconfigurations.operator.openshift.io object in each test.
13+
var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive][Serial][OCPFeatureGate:ManagedBootImages]", g.Ordered, func() {
14+
defer g.GinkgoRecover()
15+
var (
16+
AllControlPlaneMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-cpms-all.yaml")
17+
NoneControlPlaneMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-cpms-none.yaml")
18+
EmptyMachineSetFixture = filepath.Join("machineconfigurations", "managedbootimages-empty.yaml")
19+
20+
oc = exutil.NewCLI("mco-bootimage", exutil.KubeConfigPath()).AsAdmin()
21+
)
22+
23+
g.BeforeEach(func() {
24+
// Skip this test if not on GCP platform
25+
skipUnlessTargetPlatform(oc, osconfigv1.GCPPlatformType)
26+
// Skip this test if the cluster is not using MachineAPI
27+
skipUnlessFunctionalMachineAPI(oc)
28+
// Skip this test on single node platforms
29+
skipOnSingleNodeTopology(oc)
30+
})
31+
32+
g.AfterEach(func() {
33+
// Clear out boot image configuration between tests
34+
applyMachineConfigurationFixture(oc, EmptyMachineSetFixture)
35+
})
36+
37+
// This test is [Disruptive] because it scales up a new control plane node after performing a boot image update, and the scales it down.
38+
g.It("[OCPFeatureGate:ManagedBootImagesCPMS][Disruptive] Should update boot images on ControlPlaneMachineSets and resize properly [apigroup:machineconfiguration.openshift.io]", func() {
39+
AllControlPlaneMachineSetTest(oc, AllControlPlaneMachineSetFixture)
40+
})
41+
42+
g.It("[OCPFeatureGate:ManagedBootImagesCPMS] Should not update boot images on ControlPlaneMachineSets when not configured [apigroup:machineconfiguration.openshift.io]", func() {
43+
NoneControlPlaneMachineSetTest(oc, NoneControlPlaneMachineSetFixture)
44+
})
45+
})

0 commit comments

Comments
 (0)