Skip to content

Commit 99f6e3a

Browse files
Merge pull request #5383 from umohnani8/mosc-edit
OCPBUGS-63006: Ensure MOSC updates rolls out new image built to nodes
2 parents 7c04bdb + ef21e66 commit 99f6e3a

File tree

3 files changed

+172
-0
lines changed

3 files changed

+172
-0
lines changed

pkg/controller/node/node_controller.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,9 +1019,32 @@ func (ctrl *Controller) getConfigAndBuild(pool *mcfgv1.MachineConfigPool) (*mcfg
10191019
return nil, nil, err
10201020
}
10211021

1022+
// First, try to get the MOSB from the current-machine-os-build annotation on the MOSC
1023+
// This ensures we get the correct build when multiple MOSBs exist for the same rendered MC
1024+
if currentBuildName, hasAnnotation := ourConfig.Annotations[buildconstants.CurrentMachineOSBuildAnnotationKey]; hasAnnotation {
1025+
for _, build := range buildList {
1026+
if build.Name == currentBuildName {
1027+
// Validate that the build matches the pool's current rendered config
1028+
// to prevent using stale builds during config transitions
1029+
if build.Spec.MachineConfig.Name == pool.Spec.Configuration.Name {
1030+
ourBuild = build
1031+
klog.V(4).Infof("Found current MachineOSBuild %q from annotation for MachineOSConfig %q", currentBuildName, ourConfig.Name)
1032+
return ourConfig, ourBuild, nil
1033+
}
1034+
klog.Warningf("MachineOSBuild %q from annotation is for rendered config %q, but pool has %q - annotation is stale", currentBuildName, build.Spec.MachineConfig.Name, pool.Spec.Configuration.Name)
1035+
// Don't return here - fall through to the fallback logic
1036+
break
1037+
}
1038+
}
1039+
klog.Warningf("MachineOSConfig %q has current-machine-os-build annotation pointing to %q, but that build was not found", ourConfig.Name, currentBuildName)
1040+
}
1041+
1042+
// Fallback: if annotation is not present or build not found, use the old logic
1043+
// This handles backwards compatibility and edge cases
10221044
for _, build := range buildList {
10231045
if build.Spec.MachineOSConfig.Name == ourConfig.Name && build.Spec.MachineConfig.Name == pool.Spec.Configuration.Name {
10241046
ourBuild = build
1047+
klog.V(4).Infof("Using fallback logic to find MachineOSBuild %q for MachineOSConfig %q and rendered config %q", build.Name, ourConfig.Name, pool.Spec.Configuration.Name)
10251048
break
10261049
}
10271050
}

test/e2e-ocl/Containerfile.simple

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM configs AS final
2+
RUN touch /etc/simple-test-file.txt && \
3+
ostree container commit

test/e2e-ocl/onclusterlayering_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
2121

2222
"github.com/openshift/machine-config-operator/pkg/apihelpers"
23+
daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
2324
"github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets"
2425
"github.com/openshift/machine-config-operator/test/framework"
2526
"github.com/openshift/machine-config-operator/test/helpers"
@@ -68,6 +69,9 @@ var (
6869

6970
//go:embed Containerfile.okd-fcos
7071
okdFcosDockerfile string
72+
73+
//go:embed Containerfile.simple
74+
simpleDockerfile string
7175
)
7276

7377
var skipCleanupAlways bool
@@ -1456,3 +1460,145 @@ func TestImageBuildDegradedOnFailureAndClearedOnBuildStart(t *testing.T) {
14561460
assert.Equal(t, string(mcfgv1.MachineConfigPoolBuilding), degradedCondition.Reason, "ImageBuildDegraded reason should be Building")
14571461
t.Logf("ImageBuildDegraded condition correctly cleared to False with message: %s", degradedCondition.Message)
14581462
}
1463+
1464+
// TestCurrentMachineOSBuildAnnotationHandling tests that the node controller correctly uses the
1465+
// current-machine-os-build annotation on the MachineOSConfig to select the correct MOSB when
1466+
// multiple MOSBs exist for the same rendered MachineConfig. This can happen during rapid updates
1467+
// or when a rebuild annotation is applied.
1468+
func TestCurrentMachineOSBuildAnnotationHandling(t *testing.T) {
1469+
ctx, cancel := context.WithCancel(context.Background())
1470+
t.Cleanup(cancel)
1471+
1472+
cs := framework.NewClientSet("")
1473+
1474+
// Setup: Create initial layered pool and build
1475+
mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{
1476+
poolName: layeredMCPName,
1477+
customDockerfiles: map[string]string{
1478+
layeredMCPName: simpleDockerfile,
1479+
},
1480+
})
1481+
1482+
createMachineOSConfig(t, cs, mosc)
1483+
1484+
// Wait for the first build to complete
1485+
firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name)
1486+
t.Logf("First MachineOSBuild %q has started", firstMosb.Name)
1487+
1488+
firstFinishedBuild := waitForBuildToComplete(t, cs, firstMosb)
1489+
firstImagePullspec := string(firstFinishedBuild.Status.DigestedImagePushSpec)
1490+
t.Logf("First MachineOSBuild %q completed with image: %s", firstFinishedBuild.Name, firstImagePullspec)
1491+
1492+
// Verify the MOSC has the current-machine-os-build annotation set to the first build
1493+
apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1494+
require.NoError(t, err)
1495+
currentBuildAnnotation := apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey]
1496+
assert.Equal(t, firstFinishedBuild.Name, currentBuildAnnotation,
1497+
"MOSC should have current-machine-os-build annotation pointing to first build")
1498+
t.Logf("Verified MOSC has current-machine-os-build annotation: %s", currentBuildAnnotation)
1499+
1500+
// Trigger a second build by editing the MOSC (e.g., updating the containerfile)
1501+
// This does NOT create a new rendered MC, which is the scenario we're testing
1502+
t.Logf("Updating MachineOSConfig containerfile to trigger second build without new rendered MC")
1503+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1504+
require.NoError(t, err)
1505+
1506+
// Update the containerfile to trigger a rebuild
1507+
apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{
1508+
{
1509+
ContainerfileArch: mcfgv1.NoArch,
1510+
Content: simpleDockerfile + "\nRUN echo 'test annotation handling' > /etc/test-annotation",
1511+
},
1512+
}
1513+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{})
1514+
require.NoError(t, err)
1515+
t.Logf("Updated MachineOSConfig %q containerfile", apiMosc.Name)
1516+
1517+
// Wait for the second build to start
1518+
t.Logf("Waiting for second build to start...")
1519+
secondMosbName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, mosc.Name, firstMosb.Name)
1520+
secondMosb, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMosbName, metav1.GetOptions{})
1521+
require.NoError(t, err)
1522+
secondMosb = waitForBuildToStart(t, cs, secondMosb)
1523+
t.Logf("Second MachineOSBuild %q has started", secondMosb.Name)
1524+
1525+
// At this point, both MOSBs exist:
1526+
// - firstMosb is completed (with original containerfile)
1527+
// - secondMosb is building (with updated containerfile, but SAME rendered MC)
1528+
// This is the critical scenario: multiple MOSBs for the same rendered MachineConfig
1529+
1530+
// Verify that the MOSC annotation now points to the second build
1531+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1532+
require.NoError(t, err)
1533+
currentBuildAnnotation = apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey]
1534+
assert.Equal(t, secondMosb.Name, currentBuildAnnotation,
1535+
"MOSC should have current-machine-os-build annotation pointing to second build")
1536+
t.Logf("Verified MOSC annotation updated to second build: %s", currentBuildAnnotation)
1537+
1538+
// List all MOSBs to confirm both exist
1539+
allMosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{})
1540+
require.NoError(t, err)
1541+
1542+
mosbNames := []string{}
1543+
for _, mosb := range allMosbs.Items {
1544+
if mosb.Spec.MachineOSConfig.Name == mosc.Name {
1545+
mosbNames = append(mosbNames, mosb.Name)
1546+
}
1547+
}
1548+
t.Logf("Found %d MOSBs for MachineOSConfig %q: %v", len(mosbNames), mosc.Name, mosbNames)
1549+
assert.GreaterOrEqual(t, len(mosbNames), 2, "Should have at least 2 MOSBs at this point")
1550+
1551+
// The critical test: The node controller should use the MOSB specified by the annotation
1552+
// (secondMosb) even though firstMosb also exists and matches the MOSC name.
1553+
// This is implicitly tested by the fact that the pool status should reflect the second build.
1554+
// We verify this by checking the pool is waiting for the second build, not using the first.
1555+
1556+
t.Logf("Verifying that pool targets the correct (second) build based on annotation")
1557+
// The pool should be waiting for the second build to complete, not using the first completed build
1558+
// We can verify this by checking that the pool doesn't have the first image in its status
1559+
1560+
// Wait for the second build to complete
1561+
t.Logf("Waiting for second build to complete...")
1562+
secondFinishedBuild := waitForBuildToComplete(t, cs, secondMosb)
1563+
secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec)
1564+
t.Logf("Second MachineOSBuild %q completed with image: %s", secondFinishedBuild.Name, secondImagePullspec)
1565+
1566+
// Verify the images are different (proving we built a new image, not reusing the old one)
1567+
assert.NotEqual(t, firstImagePullspec, secondImagePullspec,
1568+
"First and second builds should produce different images")
1569+
1570+
// Verify that the MOSC status reflects the second build's image
1571+
waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, secondImagePullspec)
1572+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1573+
require.NoError(t, err)
1574+
assert.Equal(t, mcfgv1.ImageDigestFormat(secondImagePullspec), apiMosc.Status.CurrentImagePullSpec,
1575+
"MOSC status should have the second build's image pullspec")
1576+
1577+
// The critical test: Verify the node controller uses the annotation to select the correct MOSB
1578+
// Add a node to the pool and verify it gets the SECOND build's image, not the first
1579+
t.Logf("Adding node to pool to verify node controller uses annotation-based MOSB selection")
1580+
node := helpers.GetRandomNode(t, cs, "worker")
1581+
1582+
unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName)))
1583+
defer unlabelFunc()
1584+
1585+
// Wait for the node controller to update the node's desiredImage annotation
1586+
// The node controller should use the annotation on the MOSC to select the second MOSB
1587+
// and therefore set the desiredImage to the second build's image, NOT the first
1588+
t.Logf("Waiting for node %s to have desiredImage set to second build's image", node.Name)
1589+
helpers.WaitForNodeImageChange(t, cs, node, secondImagePullspec)
1590+
1591+
// Verify the node's desiredImage annotation matches the second build
1592+
updatedNode, err := cs.CoreV1Interface.Nodes().Get(ctx, node.Name, metav1.GetOptions{})
1593+
require.NoError(t, err)
1594+
desiredImage := updatedNode.Annotations[daemonconsts.DesiredImageAnnotationKey]
1595+
assert.Equal(t, secondImagePullspec, desiredImage,
1596+
"Node controller should use annotation to select second build, not first build")
1597+
t.Logf("Node controller correctly selected second build based on annotation")
1598+
1599+
// Also verify it's NOT the first build's image
1600+
assert.NotEqual(t, firstImagePullspec, desiredImage,
1601+
"Node should NOT have first build's image (would indicate annotation was ignored)")
1602+
1603+
t.Logf("Successfully verified that node controller uses annotation-based build selection")
1604+
}

0 commit comments

Comments
 (0)