Skip to content

Commit ae88523

Browse files
committed
Fix ImageBuildDegraded Status updates
After an image was built and rolled out successfully, and we triggered a new build, the ImageBuildDegraded status wasn't updating correctly as it kept focusing on the successful build and not the build currently active. Fix the logic to be able to keep track of the active build instead and update the status accordingly. Signed-off-by: Urvashi <[email protected]>
1 parent 7a56cf0 commit ae88523

File tree

3 files changed

+253
-47
lines changed

3 files changed

+253
-47
lines changed

pkg/controller/build/reconciler.go

Lines changed: 90 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -266,56 +266,20 @@ func (b *buildReconciler) updateMachineOSBuild(ctx context.Context, old, current
266266
}
267267

268268
if !oldState.IsBuildFailure() && curState.IsBuildFailure() {
269-
klog.Infof("MachineOSBuild %s failed, leaving ephemeral objects in place for inspection and setting BuildDegraded condition", current.Name)
270-
271-
// Before setting BuildDegraded, check if another MachineOSBuild is active or if this one has been superseded
272-
mosbList, err := b.getMachineOSBuildsForMachineOSConfig(mosc)
273-
if err != nil {
274-
return fmt.Errorf("could not get MachineOSBuilds for MachineOSConfig %q: %w", mosc.Name, err)
275-
}
276-
277-
// Check if there are any newer or active builds that would supersede this failure
278-
hasActiveBuild := false
279-
isCurrentBuildStale := false
280-
for _, mosb := range mosbList {
281-
// Skip the current failed build
282-
if mosb.Name == current.Name {
283-
continue
284-
}
285-
mosbState := ctrlcommon.NewMachineOSBuildState(mosb)
286-
// Check if there's an active build (building, prepared, or succeeded)
287-
if mosbState.IsBuilding() || mosbState.IsBuildPrepared() || mosbState.IsBuildSuccess() {
288-
hasActiveBuild = true
289-
klog.Infof("Found active MachineOSBuild %s, skipping BuildDegraded condition for failed build %s", mosb.Name, current.Name)
290-
break
291-
}
292-
}
293-
294-
// Also check if the failed MachineOSBuild is no longer referenced by the MachineOSConfig
295-
if mosc.Status.CurrentImagePullSpec != "" && !isMachineOSBuildCurrentForMachineOSConfigWithPullspec(mosc, current) {
296-
isCurrentBuildStale = true
297-
klog.Infof("Failed MachineOSBuild %s is no longer current for MachineOSConfig %s, skipping BuildDegraded condition", current.Name, mosc.Name)
298-
}
299-
300-
// Only set BuildDegraded if there are no active builds and this build is still current
301-
if hasActiveBuild || isCurrentBuildStale {
302-
klog.Infof("Skipping BuildDegraded condition for failed MachineOSBuild %s (hasActiveBuild=%v, isStale=%v)", current.Name, hasActiveBuild, isCurrentBuildStale)
303-
return nil
304-
}
269+
klog.Infof("MachineOSBuild %s failed, leaving ephemeral objects in place for inspection", current.Name)
305270

306271
mcp, err := b.machineConfigPoolLister.Get(mosc.Spec.MachineConfigPool.Name)
307272
if err != nil {
308273
return fmt.Errorf("could not get MachineConfigPool from MachineOSConfig %q: %w", mosc.Name, err)
309274
}
310275

311-
// Set BuildDegraded condition
312-
buildError := getBuildErrorFromMOSB(current)
313-
return b.syncBuildFailureStatus(ctx, mcp, buildError, current.Name)
276+
// Always update ImageBuildDegraded condition based on current active build status
277+
return b.updateImageBuildDegradedCondition(ctx, mcp, mosc)
314278
}
315279

316280
// If the build was successful, clean up the build objects and propagate the
317281
// final image pushspec onto the MachineOSConfig object.
318-
// Also clear BuildDegraded condition if it was set due to a previously failed build
282+
// Also update BuildDegraded condition based on current active build status
319283
if !oldState.IsBuildSuccess() && curState.IsBuildSuccess() {
320284
klog.Infof("MachineOSBuild %s succeeded, cleaning up all ephemeral objects used for the build", current.Name)
321285

@@ -324,9 +288,9 @@ func (b *buildReconciler) updateMachineOSBuild(ctx context.Context, old, current
324288
return fmt.Errorf("could not get MachineConfigPool from MachineOSConfig %q: %w", mosc.Name, err)
325289
}
326290

327-
// Clear BuildDegraded condition if set
328-
if err := b.syncBuildSuccessStatus(ctx, mcp); err != nil {
329-
klog.Errorf("Failed to clear BuildDegraded condition for pool %s: %v", mcp.Name, err)
291+
// Update BuildDegraded condition based on current active build status
292+
if err := b.updateImageBuildDegradedCondition(ctx, mcp, mosc); err != nil {
293+
klog.Errorf("Failed to update ImageBuildDegraded condition for pool %s: %v", mcp.Name, err)
330294
}
331295

332296
// Clean up ephemeral objects
@@ -372,6 +336,18 @@ func (b *buildReconciler) updateMachineOSConfigStatus(ctx context.Context, mosc
372336
klog.Infof("Updated annotations on MachineOSConfig %q", mosc.Name)
373337

374338
mosc = updatedMosc
339+
340+
// When annotations are updated (new build starts), also update observedGeneration
341+
// to signal that the controller is processing the current generation
342+
if mosc.Status.ObservedGeneration != mosc.GetGeneration() {
343+
mosc.Status.ObservedGeneration = mosc.GetGeneration()
344+
_, err = b.mcfgclient.MachineconfigurationV1().MachineOSConfigs().UpdateStatus(ctx, mosc, metav1.UpdateOptions{})
345+
if err != nil {
346+
klog.Errorf("Failed to update observedGeneration on MachineOSConfig %q: %v", mosc.Name, err)
347+
} else {
348+
klog.Infof("Updated observedGeneration on MachineOSConfig %q to %d", mosc.Name, mosc.GetGeneration())
349+
}
350+
}
375351
}
376352

377353
// Skip the status update if digest image pushspec hasn't been set yet.
@@ -460,14 +436,14 @@ func (b *buildReconciler) startBuild(ctx context.Context, mosb *mcfgv1.MachineOS
460436
return fmt.Errorf("could not update MachineOSConfig %q status for MachineOSBuild %q: %w", mosc.Name, mosb.Name, err)
461437
}
462438

463-
// Initialize BuildDegraded condition to False when build starts
439+
// Update BuildDegraded condition based on current active build status when build starts
464440
mcp, err := b.machineConfigPoolLister.Get(mosc.Spec.MachineConfigPool.Name)
465441
if err != nil {
466442
return fmt.Errorf("could not get MachineConfigPool from MachineOSConfig %q: %w", mosc.Name, err)
467443
}
468444

469-
if err := b.initializeBuildDegradedCondition(ctx, mcp); err != nil {
470-
klog.Errorf("Failed to initialize BuildDegraded condition for pool %s: %v", mcp.Name, err)
445+
if err := b.updateImageBuildDegradedCondition(ctx, mcp, mosc); err != nil {
446+
klog.Errorf("Failed to update ImageBuildDegraded condition for pool %s: %v", mcp.Name, err)
471447
}
472448

473449
return nil
@@ -1713,3 +1689,71 @@ func (b *buildReconciler) syncBuildFailureStatus(ctx context.Context, pool *mcfg
17131689
}
17141690
return buildErr
17151691
}
1692+
1693+
// updateImageBuildDegradedCondition examines all MachineOSBuilds for the MachineOSConfig
1694+
// and sets the ImageBuildDegraded condition based on the status of the currently active build
1695+
func (b *buildReconciler) updateImageBuildDegradedCondition(ctx context.Context, pool *mcfgv1.MachineConfigPool, mosc *mcfgv1.MachineOSConfig) error {
1696+
mosbList, err := b.getMachineOSBuildsForMachineOSConfig(mosc)
1697+
if err != nil {
1698+
return fmt.Errorf("could not get MachineOSBuilds for MachineOSConfig %q: %w", mosc.Name, err)
1699+
}
1700+
1701+
// Find the currently active (most relevant) build
1702+
// Priority: 1) Currently referenced by MOSC, 2) Building/Prepared, 3) Most recent
1703+
var activeBuild *mcfgv1.MachineOSBuild
1704+
var mostRecentBuild *mcfgv1.MachineOSBuild
1705+
1706+
// First, look for the build currently referenced by the MachineOSConfig
1707+
for _, mosb := range mosbList {
1708+
if isMachineOSBuildCurrentForMachineOSConfig(mosc, mosb) {
1709+
activeBuild = mosb
1710+
break
1711+
}
1712+
}
1713+
1714+
// If no current build found, look for active builds (building/prepared)
1715+
if activeBuild == nil {
1716+
for _, mosb := range mosbList {
1717+
mosbState := ctrlcommon.NewMachineOSBuildState(mosb)
1718+
if mosbState.IsBuilding() || mosbState.IsBuildPrepared() {
1719+
activeBuild = mosb
1720+
break
1721+
}
1722+
}
1723+
}
1724+
1725+
// Keep track of the most recent build regardless
1726+
for _, mosb := range mosbList {
1727+
if mostRecentBuild == nil || mosb.CreationTimestamp.After(mostRecentBuild.CreationTimestamp.Time) {
1728+
mostRecentBuild = mosb
1729+
}
1730+
}
1731+
1732+
// If still no active build, use the most recent one
1733+
if activeBuild == nil {
1734+
activeBuild = mostRecentBuild
1735+
}
1736+
1737+
// If no builds exist at all, clear any existing BuildDegraded condition
1738+
if activeBuild == nil {
1739+
return b.syncBuildSuccessStatus(ctx, pool)
1740+
}
1741+
1742+
// Update condition based on the active build's status
1743+
activeState := ctrlcommon.NewMachineOSBuildState(activeBuild)
1744+
1745+
if activeState.IsBuildFailure() {
1746+
// Set BuildDegraded=True for failed builds
1747+
buildError := getBuildErrorFromMOSB(activeBuild)
1748+
return b.syncBuildFailureStatus(ctx, pool, buildError, activeBuild.Name)
1749+
} else if activeState.IsBuildSuccess() {
1750+
// Clear BuildDegraded=False for successful builds
1751+
return b.syncBuildSuccessStatus(ctx, pool)
1752+
} else if activeState.IsBuilding() || activeState.IsBuildPrepared() {
1753+
// Clear BuildDegraded=False for builds in progress (allow retry after previous failure)
1754+
return b.initializeBuildDegradedCondition(ctx, pool)
1755+
}
1756+
1757+
// For any other states, don't change the condition
1758+
return nil
1759+
}

pkg/controller/node/status.go

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ func (ctrl *Controller) calculateStatus(mcs []*mcfgv1.MachineConfigNode, cconfig
259259

260260
allUpdated := updatedMachineCount == machineCount &&
261261
readyMachineCount == machineCount &&
262-
unavailableMachineCount == 0
262+
unavailableMachineCount == 0 &&
263+
!isLayeredPoolBuilding(isLayeredPool, mosc, mosb)
263264

264265
if allUpdated {
265266
//TODO: update api to only have one condition regarding status of update.
@@ -563,3 +564,35 @@ func isPinnedImageSetsInProgressForPool(pool *mcfgv1.MachineConfigPool) bool {
563564
}
564565
return false
565566
}
567+
568+
// isLayeredPoolBuilding checks if a layered pool has an active build or failed build that would
569+
// make nodes not truly "updated" even if they have the current machine config
570+
func isLayeredPoolBuilding(isLayeredPool bool, mosc *mcfgv1.MachineOSConfig, mosb *mcfgv1.MachineOSBuild) bool {
571+
if !isLayeredPool || mosc == nil || mosb == nil {
572+
return false
573+
}
574+
575+
mosbState := ctrlcommon.NewMachineOSBuildState(mosb)
576+
577+
// Check if there's an active build (building or prepared)
578+
if mosbState.IsBuilding() || mosbState.IsBuildPrepared() {
579+
return true
580+
}
581+
582+
// Check if there's a failed build - this means the update attempt failed
583+
// so nodes should not be considered "updated"
584+
if mosbState.IsBuildFailure() {
585+
return true
586+
}
587+
588+
// Check if there's a successful build that nodes haven't applied yet
589+
// This happens when a build completes but the MOSC status hasn't been updated
590+
// or nodes haven't picked up the new image yet
591+
if mosbState.IsBuildSuccess() && mosb.Status.DigestedImagePushSpec != "" {
592+
// If the successful build's image differs from what MOSC thinks is current,
593+
// then nodes are not truly updated to the latest successful build
594+
return string(mosb.Status.DigestedImagePushSpec) != string(mosc.Status.CurrentImagePullSpec)
595+
}
596+
597+
return false
598+
}

test/e2e-ocl/onclusterlayering_test.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,11 +1362,15 @@ func TestImageBuildDegradedOnFailureAndClearedOnBuildSuccess(t *testing.T) {
13621362

13631363
cs := framework.NewClientSet("")
13641364

1365+
// Get a random worker node to add to the layered pool
1366+
targetNode := helpers.GetRandomNode(t, cs, "worker")
1367+
13651368
mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{
13661369
poolName: layeredMCPName,
13671370
customDockerfiles: map[string]string{
13681371
layeredMCPName: cowsayDockerfile,
13691372
},
1373+
targetNode: &targetNode,
13701374
})
13711375

13721376
// First, add a bad containerfile to cause a build failure
@@ -1419,10 +1423,135 @@ func TestImageBuildDegradedOnFailureAndClearedOnBuildSuccess(t *testing.T) {
14191423

14201424
t.Logf("Second build completed successfully: %s", finishedBuild.Name)
14211425

1426+
// Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation
1427+
waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finishedBuild.Status.DigestedImagePushSpec))
1428+
14221429
// Wait for and verify ImageBuildDegraded condition is now False
14231430
degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse)
14241431
require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present")
14251432
assert.Equal(t, "BuildSucceeded", degradedCondition.Reason, "ImageBuildDegraded reason should be BuildSucceeded")
14261433

14271434
t.Logf("ImageBuildDegraded condition correctly cleared to False with message: %s", degradedCondition.Message)
1435+
1436+
// Verify MCP status is correct after successful build and full reconciliation
1437+
successMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{})
1438+
require.NoError(t, err)
1439+
1440+
// After successful build completion and full reconciliation, MCP should show:
1441+
// Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False
1442+
kubeassert.Eventually().MachineConfigPoolReachesState(successMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) {
1443+
if err != nil {
1444+
return false, err
1445+
}
1446+
// Return false (keep polling) if conditions don't match expected state
1447+
if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) ||
1448+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) ||
1449+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) ||
1450+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) {
1451+
return false, nil
1452+
}
1453+
1454+
// Return true when expected state is reached
1455+
t.Logf("MCP status after successful build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v",
1456+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated),
1457+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating),
1458+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded),
1459+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded))
1460+
1461+
return true, nil
1462+
}, "MCP should reach correct state after successful build (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)")
1463+
1464+
// Now trigger another build to test MCP status transitions when a new build starts
1465+
t.Logf("Triggering a third build to test MCP status transitions")
1466+
1467+
// Modify the containerfile slightly to trigger a new build
1468+
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{})
1469+
require.NoError(t, err)
1470+
1471+
// Add a comment to the containerfile to change it and trigger a new build
1472+
modifiedDockerfile := cowsayDockerfile + "\n# Comment to trigger new build"
1473+
apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{
1474+
{
1475+
ContainerfileArch: mcfgv1.NoArch,
1476+
Content: modifiedDockerfile,
1477+
},
1478+
}
1479+
1480+
updated, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{})
1481+
require.NoError(t, err)
1482+
1483+
t.Logf("Modified containerfile, waiting for third build to start")
1484+
1485+
// Get the updated MCP to compute the new build
1486+
mcp, err = cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{})
1487+
require.NoError(t, err)
1488+
1489+
// Compute the new MachineOSBuild name for the third build
1490+
thirdMoscMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updated, mcp)
1491+
1492+
// Wait for the third build to start
1493+
thirdMosb := waitForBuildToStart(t, cs, thirdMoscMosb)
1494+
t.Logf("Third build started: %s", thirdMosb.Name)
1495+
1496+
// Verify MCP status during active build:
1497+
// Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False
1498+
buildingMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{})
1499+
require.NoError(t, err)
1500+
1501+
kubeassert.Eventually().MachineConfigPoolReachesState(buildingMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) {
1502+
if err != nil {
1503+
return false, err
1504+
}
1505+
// During build, MCP should show: Updated=False, Updating=True
1506+
if apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) ||
1507+
!apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) ||
1508+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) ||
1509+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) {
1510+
return false, nil
1511+
}
1512+
1513+
t.Logf("MCP status during active build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v",
1514+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated),
1515+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating),
1516+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded),
1517+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded))
1518+
1519+
return true, nil
1520+
}, "MCP should reach correct state during active build (Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False)")
1521+
1522+
// Wait for the third build to complete successfully
1523+
finalBuild := waitForBuildToComplete(t, cs, thirdMosb)
1524+
t.Logf("Third build completed successfully: %s", finalBuild.Name)
1525+
1526+
// Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation
1527+
waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finalBuild.Status.DigestedImagePushSpec))
1528+
1529+
// Final verification: MCP status should return to:
1530+
// Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False
1531+
finalMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{})
1532+
require.NoError(t, err)
1533+
1534+
kubeassert.Eventually().MachineConfigPoolReachesState(finalMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) {
1535+
if err != nil {
1536+
return false, err
1537+
}
1538+
// Return false (keep polling) if conditions don't match expected state
1539+
if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) ||
1540+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) ||
1541+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) ||
1542+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) {
1543+
return false, nil
1544+
}
1545+
1546+
// Return true when expected state is reached
1547+
t.Logf("Final MCP status after third build completion - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v",
1548+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated),
1549+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating),
1550+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded),
1551+
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded))
1552+
1553+
return true, nil
1554+
}, "MCP should return to correct state after final build completion (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)")
1555+
1556+
t.Logf("All MCP status transitions verified successfully across build failure, success, and subsequent new build")
14281557
}

0 commit comments

Comments
 (0)