-
Notifications
You must be signed in to change notification settings - Fork 448
OCPBUGS-61114: Fix ImageBuildDegraded Status updates #5279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -266,56 +266,20 @@ func (b *buildReconciler) updateMachineOSBuild(ctx context.Context, old, current | |
} | ||
|
||
if !oldState.IsBuildFailure() && curState.IsBuildFailure() { | ||
klog.Infof("MachineOSBuild %s failed, leaving ephemeral objects in place for inspection and setting BuildDegraded condition", current.Name) | ||
|
||
// Before setting BuildDegraded, check if another MachineOSBuild is active or if this one has been superseded | ||
mosbList, err := b.getMachineOSBuildsForMachineOSConfig(mosc) | ||
if err != nil { | ||
return fmt.Errorf("could not get MachineOSBuilds for MachineOSConfig %q: %w", mosc.Name, err) | ||
} | ||
|
||
// Check if there are any newer or active builds that would supersede this failure | ||
hasActiveBuild := false | ||
isCurrentBuildStale := false | ||
for _, mosb := range mosbList { | ||
// Skip the current failed build | ||
if mosb.Name == current.Name { | ||
continue | ||
} | ||
mosbState := ctrlcommon.NewMachineOSBuildState(mosb) | ||
// Check if there's an active build (building, prepared, or succeeded) | ||
if mosbState.IsBuilding() || mosbState.IsBuildPrepared() || mosbState.IsBuildSuccess() { | ||
hasActiveBuild = true | ||
klog.Infof("Found active MachineOSBuild %s, skipping BuildDegraded condition for failed build %s", mosb.Name, current.Name) | ||
break | ||
} | ||
} | ||
|
||
// Also check if the failed MachineOSBuild is no longer referenced by the MachineOSConfig | ||
if mosc.Status.CurrentImagePullSpec != "" && !isMachineOSBuildCurrentForMachineOSConfigWithPullspec(mosc, current) { | ||
isCurrentBuildStale = true | ||
klog.Infof("Failed MachineOSBuild %s is no longer current for MachineOSConfig %s, skipping BuildDegraded condition", current.Name, mosc.Name) | ||
} | ||
|
||
// Only set BuildDegraded if there are no active builds and this build is still current | ||
if hasActiveBuild || isCurrentBuildStale { | ||
klog.Infof("Skipping BuildDegraded condition for failed MachineOSBuild %s (hasActiveBuild=%v, isStale=%v)", current.Name, hasActiveBuild, isCurrentBuildStale) | ||
return nil | ||
} | ||
klog.Infof("MachineOSBuild %s failed, leaving ephemeral objects in place for inspection", current.Name) | ||
|
||
mcp, err := b.machineConfigPoolLister.Get(mosc.Spec.MachineConfigPool.Name) | ||
if err != nil { | ||
return fmt.Errorf("could not get MachineConfigPool from MachineOSConfig %q: %w", mosc.Name, err) | ||
} | ||
|
||
// Set BuildDegraded condition | ||
buildError := getBuildErrorFromMOSB(current) | ||
return b.syncBuildFailureStatus(ctx, mcp, buildError, current.Name) | ||
// Always update ImageBuildDegraded condition based on current active build status | ||
return b.updateImageBuildDegradedCondition(ctx, mcp, mosc) | ||
} | ||
|
||
// If the build was successful, clean up the build objects and propagate the | ||
// final image pushspec onto the MachineOSConfig object. | ||
// Also clear BuildDegraded condition if it was set due to a previously failed build | ||
// Also update BuildDegraded condition based on current active build status | ||
if !oldState.IsBuildSuccess() && curState.IsBuildSuccess() { | ||
klog.Infof("MachineOSBuild %s succeeded, cleaning up all ephemeral objects used for the build", current.Name) | ||
|
||
|
@@ -324,9 +288,9 @@ func (b *buildReconciler) updateMachineOSBuild(ctx context.Context, old, current | |
return fmt.Errorf("could not get MachineConfigPool from MachineOSConfig %q: %w", mosc.Name, err) | ||
} | ||
|
||
// Clear BuildDegraded condition if set | ||
if err := b.syncBuildSuccessStatus(ctx, mcp); err != nil { | ||
klog.Errorf("Failed to clear BuildDegraded condition for pool %s: %v", mcp.Name, err) | ||
// Update BuildDegraded condition based on current active build status | ||
if err := b.updateImageBuildDegradedCondition(ctx, mcp, mosc); err != nil { | ||
klog.Errorf("Failed to update ImageBuildDegraded condition for pool %s: %v", mcp.Name, err) | ||
} | ||
|
||
// Clean up ephemeral objects | ||
|
@@ -372,6 +336,18 @@ func (b *buildReconciler) updateMachineOSConfigStatus(ctx context.Context, mosc | |
klog.Infof("Updated annotations on MachineOSConfig %q", mosc.Name) | ||
|
||
mosc = updatedMosc | ||
|
||
// When annotations are updated (new build starts), also update observedGeneration | ||
// to signal that the controller is processing the current generation | ||
if mosc.Status.ObservedGeneration != mosc.GetGeneration() { | ||
mosc.Status.ObservedGeneration = mosc.GetGeneration() | ||
_, err = b.mcfgclient.MachineconfigurationV1().MachineOSConfigs().UpdateStatus(ctx, mosc, metav1.UpdateOptions{}) | ||
if err != nil { | ||
klog.Errorf("Failed to update observedGeneration on MachineOSConfig %q: %v", mosc.Name, err) | ||
} else { | ||
klog.Infof("Updated observedGeneration on MachineOSConfig %q to %d", mosc.Name, mosc.GetGeneration()) | ||
} | ||
} | ||
} | ||
|
||
// Skip the status update if digest image pushspec hasn't been set yet. | ||
|
@@ -460,14 +436,14 @@ func (b *buildReconciler) startBuild(ctx context.Context, mosb *mcfgv1.MachineOS | |
return fmt.Errorf("could not update MachineOSConfig %q status for MachineOSBuild %q: %w", mosc.Name, mosb.Name, err) | ||
} | ||
|
||
// Initialize BuildDegraded condition to False when build starts | ||
// Update BuildDegraded condition based on current active build status when build starts | ||
mcp, err := b.machineConfigPoolLister.Get(mosc.Spec.MachineConfigPool.Name) | ||
if err != nil { | ||
return fmt.Errorf("could not get MachineConfigPool from MachineOSConfig %q: %w", mosc.Name, err) | ||
} | ||
|
||
if err := b.initializeBuildDegradedCondition(ctx, mcp); err != nil { | ||
klog.Errorf("Failed to initialize BuildDegraded condition for pool %s: %v", mcp.Name, err) | ||
if err := b.updateImageBuildDegradedCondition(ctx, mcp, mosc); err != nil { | ||
klog.Errorf("Failed to update ImageBuildDegraded condition for pool %s: %v", mcp.Name, err) | ||
} | ||
|
||
return nil | ||
|
@@ -1713,3 +1689,72 @@ func (b *buildReconciler) syncBuildFailureStatus(ctx context.Context, pool *mcfg | |
} | ||
return buildErr | ||
} | ||
|
||
// updateImageBuildDegradedCondition examines all MachineOSBuilds for the MachineOSConfig | ||
// and sets the ImageBuildDegraded condition based on the status of the currently active build | ||
func (b *buildReconciler) updateImageBuildDegradedCondition(ctx context.Context, pool *mcfgv1.MachineConfigPool, mosc *mcfgv1.MachineOSConfig) error { | ||
mosbList, err := b.getMachineOSBuildsForMachineOSConfig(mosc) | ||
if err != nil { | ||
return fmt.Errorf("could not get MachineOSBuilds for MachineOSConfig %q: %w", mosc.Name, err) | ||
} | ||
|
||
// Find the currently active (most relevant) build | ||
// Priority: 1) Currently referenced by MOSC, 2) Building/Prepared, 3) Most recent | ||
var activeBuild *mcfgv1.MachineOSBuild | ||
var mostRecentBuild *mcfgv1.MachineOSBuild | ||
|
||
// First, look for the build currently referenced by the MachineOSConfig | ||
for _, mosb := range mosbList { | ||
if isMachineOSBuildCurrentForMachineOSConfig(mosc, mosb) { | ||
activeBuild = mosb | ||
break | ||
} | ||
} | ||
|
||
// If no current build found, look for active builds (building/prepared) | ||
if activeBuild == nil { | ||
for _, mosb := range mosbList { | ||
mosbState := ctrlcommon.NewMachineOSBuildState(mosb) | ||
if mosbState.IsBuilding() || mosbState.IsBuildPrepared() { | ||
activeBuild = mosb | ||
break | ||
} | ||
} | ||
} | ||
|
||
// Keep track of the most recent build regardless | ||
for _, mosb := range mosbList { | ||
if mostRecentBuild == nil || mosb.CreationTimestamp.After(mostRecentBuild.CreationTimestamp.Time) { | ||
mostRecentBuild = mosb | ||
} | ||
} | ||
|
||
// If still no active build, use the most recent one | ||
if activeBuild == nil { | ||
activeBuild = mostRecentBuild | ||
} | ||
|
||
Comment on lines
+1696
to
+1736
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (non-blocking): Would it make sense to refactor this logic into a separate function called, |
||
// If no builds exist at all, clear any existing BuildDegraded condition | ||
if activeBuild == nil { | ||
return b.syncBuildSuccessStatus(ctx, pool) | ||
} | ||
|
||
// Update condition based on the active build's status | ||
activeState := ctrlcommon.NewMachineOSBuildState(activeBuild) | ||
|
||
switch { | ||
case activeState.IsBuildFailure(): | ||
// Set BuildDegraded=True for failed builds | ||
buildError := getBuildErrorFromMOSB(activeBuild) | ||
return b.syncBuildFailureStatus(ctx, pool, buildError, activeBuild.Name) | ||
case activeState.IsBuildSuccess(): | ||
// Clear BuildDegraded=False for successful builds | ||
return b.syncBuildSuccessStatus(ctx, pool) | ||
case activeState.IsBuilding(), activeState.IsBuildPrepared(): | ||
// Clear BuildDegraded=False for builds in progress (allow retry after previous failure) | ||
return b.initializeBuildDegradedCondition(ctx, pool) | ||
} | ||
|
||
// For any other states, don't change the condition | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1362,11 +1362,15 @@ func TestImageBuildDegradedOnFailureAndClearedOnBuildSuccess(t *testing.T) { | |
|
||
cs := framework.NewClientSet("") | ||
|
||
// Get a random worker node to add to the layered pool | ||
targetNode := helpers.GetRandomNode(t, cs, "worker") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Do we need to roll the image out to the node every time we do a build in this test? I ask because each rollout takes some time to do, and with three image builds, this test could take a long time overall. |
||
|
||
mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ | ||
poolName: layeredMCPName, | ||
customDockerfiles: map[string]string{ | ||
layeredMCPName: cowsayDockerfile, | ||
}, | ||
targetNode: &targetNode, | ||
}) | ||
|
||
// First, add a bad containerfile to cause a build failure | ||
|
@@ -1419,10 +1423,135 @@ func TestImageBuildDegradedOnFailureAndClearedOnBuildSuccess(t *testing.T) { | |
|
||
t.Logf("Second build completed successfully: %s", finishedBuild.Name) | ||
|
||
// Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation | ||
waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finishedBuild.Status.DigestedImagePushSpec)) | ||
|
||
// Wait for and verify ImageBuildDegraded condition is now False | ||
degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) | ||
require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") | ||
assert.Equal(t, "BuildSucceeded", degradedCondition.Reason, "ImageBuildDegraded reason should be BuildSucceeded") | ||
|
||
t.Logf("ImageBuildDegraded condition correctly cleared to False with message: %s", degradedCondition.Message) | ||
|
||
// Verify MCP status is correct after successful build and full reconciliation | ||
successMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) | ||
require.NoError(t, err) | ||
|
||
// After successful build completion and full reconciliation, MCP should show: | ||
// Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False | ||
kubeassert.Eventually().MachineConfigPoolReachesState(successMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { | ||
if err != nil { | ||
return false, err | ||
} | ||
// Return false (keep polling) if conditions don't match expected state | ||
if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { | ||
return false, nil | ||
} | ||
|
||
// Return true when expected state is reached | ||
t.Logf("MCP status after successful build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) | ||
|
||
return true, nil | ||
}, "MCP should reach correct state after successful build (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") | ||
|
||
// Now trigger another build to test MCP status transitions when a new build starts | ||
t.Logf("Triggering a third build to test MCP status transitions") | ||
|
||
// Modify the containerfile slightly to trigger a new build | ||
apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) | ||
require.NoError(t, err) | ||
|
||
// Add a comment to the containerfile to change it and trigger a new build | ||
modifiedDockerfile := cowsayDockerfile + "\n# Comment to trigger new build" | ||
apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ | ||
{ | ||
ContainerfileArch: mcfgv1.NoArch, | ||
Content: modifiedDockerfile, | ||
}, | ||
} | ||
|
||
updated, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) | ||
require.NoError(t, err) | ||
|
||
t.Logf("Modified containerfile, waiting for third build to start") | ||
|
||
// Get the updated MCP to compute the new build | ||
mcp, err = cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) | ||
require.NoError(t, err) | ||
|
||
// Compute the new MachineOSBuild name for the third build | ||
thirdMoscMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updated, mcp) | ||
|
||
// Wait for the third build to start | ||
thirdMosb := waitForBuildToStart(t, cs, thirdMoscMosb) | ||
t.Logf("Third build started: %s", thirdMosb.Name) | ||
|
||
// Verify MCP status during active build: | ||
// Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False | ||
buildingMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) | ||
require.NoError(t, err) | ||
|
||
kubeassert.Eventually().MachineConfigPoolReachesState(buildingMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { | ||
if err != nil { | ||
return false, err | ||
} | ||
// During build, MCP should show: Updated=False, Updating=True | ||
if apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || | ||
!apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { | ||
return false, nil | ||
} | ||
|
||
t.Logf("MCP status during active build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) | ||
|
||
return true, nil | ||
}, "MCP should reach correct state during active build (Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False)") | ||
|
||
// Wait for the third build to complete successfully | ||
finalBuild := waitForBuildToComplete(t, cs, thirdMosb) | ||
t.Logf("Third build completed successfully: %s", finalBuild.Name) | ||
|
||
// Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation | ||
waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finalBuild.Status.DigestedImagePushSpec)) | ||
|
||
// Final verification: MCP status should return to: | ||
// Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False | ||
finalMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) | ||
require.NoError(t, err) | ||
|
||
kubeassert.Eventually().MachineConfigPoolReachesState(finalMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { | ||
if err != nil { | ||
return false, err | ||
} | ||
// Return false (keep polling) if conditions don't match expected state | ||
if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { | ||
return false, nil | ||
} | ||
|
||
// Return true when expected state is reached | ||
t.Logf("Final MCP status after third build completion - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), | ||
apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) | ||
|
||
return true, nil | ||
}, "MCP should return to correct state after final build completion (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") | ||
|
||
t.Logf("All MCP status transitions verified successfully across build failure, success, and subsequent new build") | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thought (non-blocking): I feel like we need a few more of these checks throughout the build controller. However, I'm not sure where / when to use them.