Skip to content

Commit ef21e66

Browse files
committed
Ensure MOSC updates rolls out new image built to nodes
When an MOSC is edited, ensure that the new image that is built is rolled out onto the nodes in the pool by ensuring that the node_controller is able to pick the correct and most recent MOSB even if the rendered-MC used for the new image build hasn't changed. Signed-off-by: Urvashi <[email protected]>
1 parent 8dc0f4d commit ef21e66

File tree

3 files changed

+172
-0
lines changed

3 files changed

+172
-0
lines changed

pkg/controller/node/node_controller.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,9 +1019,32 @@ func (ctrl *Controller) getConfigAndBuild(pool *mcfgv1.MachineConfigPool) (*mcfg
10191019
return nil, nil, err
10201020
}
10211021

1022+
// First, try to get the MOSB from the current-machine-os-build annotation on the MOSC
1023+
// This ensures we get the correct build when multiple MOSBs exist for the same rendered MC
1024+
if currentBuildName, hasAnnotation := ourConfig.Annotations[buildconstants.CurrentMachineOSBuildAnnotationKey]; hasAnnotation {
1025+
for _, build := range buildList {
1026+
if build.Name == currentBuildName {
1027+
// Validate that the build matches the pool's current rendered config
1028+
// to prevent using stale builds during config transitions
1029+
if build.Spec.MachineConfig.Name == pool.Spec.Configuration.Name {
1030+
ourBuild = build
1031+
klog.V(4).Infof("Found current MachineOSBuild %q from annotation for MachineOSConfig %q", currentBuildName, ourConfig.Name)
1032+
return ourConfig, ourBuild, nil
1033+
}
1034+
klog.Warningf("MachineOSBuild %q from annotation is for rendered config %q, but pool has %q - annotation is stale", currentBuildName, build.Spec.MachineConfig.Name, pool.Spec.Configuration.Name)
1035+
// Don't return here - fall through to the fallback logic
1036+
break
1037+
}
1038+
}
1039+
klog.Warningf("MachineOSConfig %q has current-machine-os-build annotation pointing to %q, but that build was not found", ourConfig.Name, currentBuildName)
1040+
}
1041+
1042+
// Fallback: if annotation is not present or build not found, use the old logic
1043+
// This handles backwards compatibility and edge cases
10221044
for _, build := range buildList {
10231045
if build.Spec.MachineOSConfig.Name == ourConfig.Name && build.Spec.MachineConfig.Name == pool.Spec.Configuration.Name {
10241046
ourBuild = build
1047+
klog.V(4).Infof("Using fallback logic to find MachineOSBuild %q for MachineOSConfig %q and rendered config %q", build.Name, ourConfig.Name, pool.Spec.Configuration.Name)
10251048
break
10261049
}
10271050
}

test/e2e-ocl/Containerfile.simple

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM configs AS final
2+
RUN touch /etc/simple-test-file.txt && \
3+
ostree container commit

test/e2e-ocl/onclusterlayering_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
2121

2222
"github.com/openshift/machine-config-operator/pkg/apihelpers"
23+
daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
2324
"github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets"
2425
"github.com/openshift/machine-config-operator/test/framework"
2526
"github.com/openshift/machine-config-operator/test/helpers"
@@ -68,6 +69,9 @@ var (
6869

6970
//go:embed Containerfile.okd-fcos
7071
okdFcosDockerfile string
72+
73+
//go:embed Containerfile.simple
74+
simpleDockerfile string
7175
)
7276

7377
var skipCleanupAlways bool
@@ -1456,3 +1460,145 @@ func TestImageBuildDegradedOnFailureAndClearedOnBuildStart(t *testing.T) {
14561460
assert.Equal(t, string(mcfgv1.MachineConfigPoolBuilding), degradedCondition.Reason, "ImageBuildDegraded reason should be Building")
14571461
t.Logf("ImageBuildDegraded condition correctly cleared to False with message: %s", degradedCondition.Message)
14581462
}
1463+
1464+
// TestCurrentMachineOSBuildAnnotationHandling tests that the node controller correctly uses the
1465+
// current-machine-os-build annotation on the MachineOSConfig to select the correct MOSB when
1466+
// multiple MOSBs exist for the same rendered MachineConfig. This can happen during rapid updates
1467+
// or when a rebuild annotation is applied.
1468+
func TestCurrentMachineOSBuildAnnotationHandling(t *testing.T) {
1469+
ctx, cancel := context.WithCancel(context.Background())
1470+
t.Cleanup(cancel)
1471+
1472+
cs := framework.NewClientSet("")
1473+
1474+
// Setup: Create initial layered pool and build
1475+
mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{
1476+
poolName: layeredMCPName,
1477+
customDockerfiles: map[string]string{
1478+
layeredMCPName: simpleDockerfile,
1479+
},
1480+
})
1481+
1482+
createMachineOSConfig(t, cs, mosc)
1483+
1484+
// Wait for the first build to complete
1485+
firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name)
1486+
t.Logf("First MachineOSBuild %q has started", firstMosb.Name)
1487+
1488+
firstFinishedBuild := waitForBuildToComplete(t, cs, firstMosb)
1489+
firstImagePullspec := string(firstFinishedBuild.Status.DigestedImagePushSpec)
1490+
t.Logf("First MachineOSBuild %q completed with image: %s", firstFinishedBuild.Name, firstImagePullspec)
1491+
1492+
// Verify the MOSC has the current-machine-os-build annotation set to the first build
1493+
apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1494+
require.NoError(t, err)
1495+
currentBuildAnnotation := apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey]
1496+
assert.Equal(t, firstFinishedBuild.Name, currentBuildAnnotation,
1497+
"MOSC should have current-machine-os-build annotation pointing to first build")
1498+
t.Logf("Verified MOSC has current-machine-os-build annotation: %s", currentBuildAnnotation)
1499+
1500+
// Trigger a second build by editing the MOSC (e.g., updating the containerfile)
1501+
// This does NOT create a new rendered MC, which is the scenario we're testing
1502+
t.Logf("Updating MachineOSConfig containerfile to trigger second build without new rendered MC")
1503+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1504+
require.NoError(t, err)
1505+
1506+
// Update the containerfile to trigger a rebuild
1507+
apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{
1508+
{
1509+
ContainerfileArch: mcfgv1.NoArch,
1510+
Content: simpleDockerfile + "\nRUN echo 'test annotation handling' > /etc/test-annotation",
1511+
},
1512+
}
1513+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{})
1514+
require.NoError(t, err)
1515+
t.Logf("Updated MachineOSConfig %q containerfile", apiMosc.Name)
1516+
1517+
// Wait for the second build to start
1518+
t.Logf("Waiting for second build to start...")
1519+
secondMosbName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, mosc.Name, firstMosb.Name)
1520+
secondMosb, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMosbName, metav1.GetOptions{})
1521+
require.NoError(t, err)
1522+
secondMosb = waitForBuildToStart(t, cs, secondMosb)
1523+
t.Logf("Second MachineOSBuild %q has started", secondMosb.Name)
1524+
1525+
// At this point, both MOSBs exist:
1526+
// - firstMosb is completed (with original containerfile)
1527+
// - secondMosb is building (with updated containerfile, but SAME rendered MC)
1528+
// This is the critical scenario: multiple MOSBs for the same rendered MachineConfig
1529+
1530+
// Verify that the MOSC annotation now points to the second build
1531+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1532+
require.NoError(t, err)
1533+
currentBuildAnnotation = apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey]
1534+
assert.Equal(t, secondMosb.Name, currentBuildAnnotation,
1535+
"MOSC should have current-machine-os-build annotation pointing to second build")
1536+
t.Logf("Verified MOSC annotation updated to second build: %s", currentBuildAnnotation)
1537+
1538+
// List all MOSBs to confirm both exist
1539+
allMosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{})
1540+
require.NoError(t, err)
1541+
1542+
mosbNames := []string{}
1543+
for _, mosb := range allMosbs.Items {
1544+
if mosb.Spec.MachineOSConfig.Name == mosc.Name {
1545+
mosbNames = append(mosbNames, mosb.Name)
1546+
}
1547+
}
1548+
t.Logf("Found %d MOSBs for MachineOSConfig %q: %v", len(mosbNames), mosc.Name, mosbNames)
1549+
assert.GreaterOrEqual(t, len(mosbNames), 2, "Should have at least 2 MOSBs at this point")
1550+
1551+
// The critical test: The node controller should use the MOSB specified by the annotation
1552+
// (secondMosb) even though firstMosb also exists and matches the MOSC name.
1553+
// This is implicitly tested by the fact that the pool status should reflect the second build.
1554+
// We verify this by checking the pool is waiting for the second build, not using the first.
1555+
1556+
t.Logf("Verifying that pool targets the correct (second) build based on annotation")
1557+
// The pool should be waiting for the second build to complete, not using the first completed build
1558+
// We can verify this by checking that the pool doesn't have the first image in its status
1559+
1560+
// Wait for the second build to complete
1561+
t.Logf("Waiting for second build to complete...")
1562+
secondFinishedBuild := waitForBuildToComplete(t, cs, secondMosb)
1563+
secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec)
1564+
t.Logf("Second MachineOSBuild %q completed with image: %s", secondFinishedBuild.Name, secondImagePullspec)
1565+
1566+
// Verify the images are different (proving we built a new image, not reusing the old one)
1567+
assert.NotEqual(t, firstImagePullspec, secondImagePullspec,
1568+
"First and second builds should produce different images")
1569+
1570+
// Verify that the MOSC status reflects the second build's image
1571+
waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, secondImagePullspec)
1572+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1573+
require.NoError(t, err)
1574+
assert.Equal(t, mcfgv1.ImageDigestFormat(secondImagePullspec), apiMosc.Status.CurrentImagePullSpec,
1575+
"MOSC status should have the second build's image pullspec")
1576+
1577+
// The critical test: Verify the node controller uses the annotation to select the correct MOSB
1578+
// Add a node to the pool and verify it gets the SECOND build's image, not the first
1579+
t.Logf("Adding node to pool to verify node controller uses annotation-based MOSB selection")
1580+
node := helpers.GetRandomNode(t, cs, "worker")
1581+
1582+
unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName)))
1583+
defer unlabelFunc()
1584+
1585+
// Wait for the node controller to update the node's desiredImage annotation
1586+
// The node controller should use the annotation on the MOSC to select the second MOSB
1587+
// and therefore set the desiredImage to the second build's image, NOT the first
1588+
t.Logf("Waiting for node %s to have desiredImage set to second build's image", node.Name)
1589+
helpers.WaitForNodeImageChange(t, cs, node, secondImagePullspec)
1590+
1591+
// Verify the node's desiredImage annotation matches the second build
1592+
updatedNode, err := cs.CoreV1Interface.Nodes().Get(ctx, node.Name, metav1.GetOptions{})
1593+
require.NoError(t, err)
1594+
desiredImage := updatedNode.Annotations[daemonconsts.DesiredImageAnnotationKey]
1595+
assert.Equal(t, secondImagePullspec, desiredImage,
1596+
"Node controller should use annotation to select second build, not first build")
1597+
t.Logf("Node controller correctly selected second build based on annotation")
1598+
1599+
// Also verify it's NOT the first build's image
1600+
assert.NotEqual(t, firstImagePullspec, desiredImage,
1601+
"Node should NOT have first build's image (would indicate annotation was ignored)")
1602+
1603+
t.Logf("Successfully verified that node controller uses annotation-based build selection")
1604+
}

0 commit comments

Comments
 (0)