diff --git a/go.mod b/go.mod index 1b37df107..4a3064ebd 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 github.com/operator-framework/api v0.30.0 - github.com/operator-framework/helm-operator-plugins v0.8.0 + github.com/operator-framework/helm-operator-plugins v0.8.1-0.20250222123620-6261f2574b3b github.com/operator-framework/operator-registry v1.50.0 github.com/prometheus/client_golang v1.21.1 github.com/sirupsen/logrus v1.9.3 diff --git a/go.sum b/go.sum index 2e5ddec2f..2dbb8619b 100644 --- a/go.sum +++ b/go.sum @@ -536,8 +536,8 @@ github.com/openshift/crd-schema-checker v0.0.0-20240404194209-35a9033b1d11 h1:eT github.com/openshift/crd-schema-checker v0.0.0-20240404194209-35a9033b1d11/go.mod h1:EmVJt97N+pfWFsli/ipXTBZqSG5F5KGQhm3c3IsGq1o= github.com/operator-framework/api v0.30.0 h1:44hCmGnEnZk/Miol5o44dhSldNH0EToQUG7vZTl29kk= github.com/operator-framework/api v0.30.0/go.mod h1:FYxAPhjtlXSAty/fbn5YJnFagt6SpJZJgFNNbvDe5W0= -github.com/operator-framework/helm-operator-plugins v0.8.0 h1:0f6HOQC5likkf0b/OvGvw7nhDb6h8Cj5twdCNjwNzMc= -github.com/operator-framework/helm-operator-plugins v0.8.0/go.mod h1:Sc+8bE38xTCgCChBUvtq/PxatEg9fAypr7S5iAw8nlA= +github.com/operator-framework/helm-operator-plugins v0.8.1-0.20250222123620-6261f2574b3b h1:andoOL7lpEafTXdjPGDuNLJlJQh0UmRzgj0+D31K8HU= +github.com/operator-framework/helm-operator-plugins v0.8.1-0.20250222123620-6261f2574b3b/go.mod h1:qioyxwOSI89RAtTWhccc+RyaPQdKTTJRc6sFkT6kqys= github.com/operator-framework/operator-lib v0.17.0 h1:cbz51wZ9+GpWR1ZYP4CSKSSBxDlWxmmnseaHVZZjZt4= github.com/operator-framework/operator-lib v0.17.0/go.mod h1:TGopBxIE8L6E/Cojzo26R3NFp1eNlqhQNmzqhOblaLw= github.com/operator-framework/operator-registry v1.50.0 h1:kMAwsKAEDjuSx5dGchMX+CD3SMHWwOAC/xyK3LQweB4= diff --git a/internal/operator-controller/action/helm_test.go b/internal/operator-controller/action/helm_test.go index 3f1f02f9d..b301bcd4c 100644 --- a/internal/operator-controller/action/helm_test.go +++ b/internal/operator-controller/action/helm_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/release" "sigs.k8s.io/controller-runtime/pkg/client" @@ -70,6 +71,11 @@ func (m *mockActionClient) Reconcile(rel *release.Release) error { return args.Error(0) } +func (m *mockActionClient) Config() *action.Configuration { + args := m.Called() + return args.Get(0).(*action.Configuration) +} + var _ actionclient.ActionClientGetter = &mockActionClientGetter{} type mockActionClientGetter struct { diff --git a/internal/operator-controller/applier/helm.go b/internal/operator-controller/applier/helm.go index 60a03477a..0e5971a71 100644 --- a/internal/operator-controller/applier/helm.go +++ b/internal/operator-controller/applier/helm.go @@ -36,6 +36,11 @@ const ( maxHelmReleaseHistory = 10 ) +var ( + errPendingRelease = errors.New("release is in a pending state") + errBuildHelmChartFuncNil = errors.New("BundleToHelmChartFn is nil") +) + // Preflight is a check that should be run before making any changes to the cluster type Preflight interface { // Install runs checks that should be successful prior @@ -95,7 +100,24 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte labels: objectLabels, } - rel, desiredRel, state, err := h.getReleaseState(ac, ext, chrt, values, post) + rel, desiredRel, state, err := h.getReleaseState(ctx, ac, ext, chrt, values, post) + // if a release is pending, that means that a helm action + // (installation/upgrade) we were attempting was likely interrupted in-flight. + // Pending release would leave us in reconciliation error loop because helm + // wouldn't be able to progress automatically from it. + // + // one of the workarounds is to try and remove helm metadata relating to + // that pending release which should 'reset' its state communicated to helm + // and the next reconciliation should be able to successfully pick up from here + // for context see: https://github.com/helm/helm/issues/5595 and https://github.com/helm/helm/issues/7476 + // and the discussion in https://github.com/operator-framework/operator-controller/pull/1776 + if errors.Is(err, errPendingRelease) { + if _, err := ac.Config().Releases.Delete(rel.Name, rel.Version); err != nil { + return nil, "", fmt.Errorf("failed removing pending release %q version %d metadata: %w", rel.Name, rel.Version, err) + } + // return an error to try to detect proper state (installation/upgrade) at next reconciliation + return nil, "", fmt.Errorf("removed pending release %q version %d metadata", rel.Name, rel.Version) + } if err != nil { return nil, "", err } @@ -155,7 +177,7 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*chart.Chart, error) { if h.BundleToHelmChartFn == nil { - return nil, errors.New("BundleToHelmChartFn is nil") + return nil, errBuildHelmChartFuncNil } watchNamespace, err := GetWatchNamespace(ext) if err != nil { @@ -164,9 +186,12 @@ func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*char return h.BundleToHelmChartFn(bundleFS, ext.Spec.Namespace, watchNamespace) } -func (h *Helm) getReleaseState(cl helmclient.ActionInterface, ext *ocv1.ClusterExtension, chrt *chart.Chart, values chartutil.Values, post postrender.PostRenderer) (*release.Release, *release.Release, string, error) { +func (h *Helm) getReleaseState(ctx context.Context, cl helmclient.ActionInterface, ext *ocv1.ClusterExtension, chrt *chart.Chart, values chartutil.Values, post postrender.PostRenderer) (*release.Release, *release.Release, string, error) { + logger := log.FromContext(ctx) currentRelease, err := cl.Get(ext.GetName()) + if errors.Is(err, driver.ErrReleaseNotFound) { + logger.V(4).Info("ClusterExtension dry-run install", "extension", ext.GetName()) desiredRelease, err := cl.Install(ext.GetName(), ext.Spec.Namespace, chrt, values, func(i *action.Install) error { i.DryRun = true i.DryRunOption = "server" @@ -184,8 +209,12 @@ func (h *Helm) getReleaseState(cl helmclient.ActionInterface, ext *ocv1.ClusterE if err != nil { return nil, nil, StateError, err } + if currentRelease.Info.Status.IsPending() { + return currentRelease, nil, StateError, errPendingRelease + } desiredRelease, err := cl.Upgrade(ext.GetName(), ext.Spec.Namespace, chrt, values, func(upgrade *action.Upgrade) error { + logger.V(4).Info("ClusterExtension dry-run upgrade", "extension", ext.GetName()) upgrade.MaxHistory = maxHelmReleaseHistory upgrade.DryRun = true upgrade.DryRunOption = "server" diff --git a/internal/operator-controller/applier/helm_test.go b/internal/operator-controller/applier/helm_test.go index faaa41783..1d9216bb8 100644 --- a/internal/operator-controller/applier/helm_test.go +++ b/internal/operator-controller/applier/helm_test.go @@ -13,6 +13,7 @@ import ( "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/release" + "helm.sh/helm/v3/pkg/storage" "helm.sh/helm/v3/pkg/storage/driver" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -50,6 +51,7 @@ type mockActionGetter struct { reconcileErr error desiredRel *release.Release currentRel *release.Release + config *action.Configuration } func (mag *mockActionGetter) ActionClientFor(ctx context.Context, obj client.Object) (helmclient.ActionInterface, error) { @@ -98,6 +100,10 @@ func (mag *mockActionGetter) Reconcile(rel *release.Release) error { return mag.reconcileErr } +func (mag *mockActionGetter) Config() *action.Configuration { + return mag.config +} + var ( // required for unmockable call to convert.RegistryV1ToHelmChart validFS = fstest.MapFS{ @@ -179,6 +185,80 @@ func TestApply_Base(t *testing.T) { }) } +func TestApply_PendingRelease(t *testing.T) { + t.Run("fails removing a pending install release", func(t *testing.T) { + testRel := &release.Release{Name: "testrel", Version: 0, Info: &release.Info{Status: release.StatusPendingInstall}} + testStorage := storage.Init(driver.NewMemory()) + + mockAcg := &mockActionGetter{currentRel: testRel, config: &action.Configuration{Releases: testStorage}} + helmApplier := applier.Helm{ + ActionClientGetter: mockAcg, + BundleToHelmChartFn: convert.RegistryV1ToHelmChart, + } + + objs, state, err := helmApplier.Apply(context.TODO(), validFS, testCE, testObjectLabels, testStorageLabels) + require.Error(t, err) + require.ErrorContains(t, err, "removing pending release") + require.Nil(t, objs) + require.Empty(t, state) + }) + + t.Run("fails removing a pending upgrade release", func(t *testing.T) { + testRel := &release.Release{Name: "testrel", Version: 0, Info: &release.Info{Status: release.StatusPendingUpgrade}} + testStorage := storage.Init(driver.NewMemory()) + + mockAcg := &mockActionGetter{currentRel: testRel, config: &action.Configuration{Releases: testStorage}} + helmApplier := applier.Helm{ + ActionClientGetter: mockAcg, + BundleToHelmChartFn: convert.RegistryV1ToHelmChart, + } + + objs, state, err := helmApplier.Apply(context.TODO(), validFS, testCE, testObjectLabels, testStorageLabels) + require.Error(t, err) + require.ErrorContains(t, err, "removing pending release") + require.Nil(t, objs) + require.Empty(t, state) + }) + + t.Run("successfully removes a pending install release", func(t *testing.T) { + testRel := &release.Release{Name: "testrel", Version: 0, Info: &release.Info{Status: release.StatusPendingInstall}} + testStorage := storage.Init(driver.NewMemory()) + err := testStorage.Create(testRel) + require.NoError(t, err) + + mockAcg := &mockActionGetter{currentRel: testRel, config: &action.Configuration{Releases: testStorage}} + helmApplier := applier.Helm{ + ActionClientGetter: mockAcg, + BundleToHelmChartFn: convert.RegistryV1ToHelmChart, + } + + objs, state, err := helmApplier.Apply(context.TODO(), validFS, testCE, testObjectLabels, testStorageLabels) + require.Error(t, err) + require.ErrorContains(t, err, "removed pending release") + require.Nil(t, objs) + require.Empty(t, state) + }) + + t.Run("successfully removes an pending upgrade release", func(t *testing.T) { + testRel := &release.Release{Name: "testrel", Version: 0, Info: &release.Info{Status: release.StatusPendingUpgrade}} + testStorage := storage.Init(driver.NewMemory()) + err := testStorage.Create(testRel) + require.NoError(t, err) + + mockAcg := &mockActionGetter{currentRel: testRel, config: &action.Configuration{Releases: testStorage}} + helmApplier := applier.Helm{ + ActionClientGetter: mockAcg, + BundleToHelmChartFn: convert.RegistryV1ToHelmChart, + } + + objs, state, err := helmApplier.Apply(context.TODO(), validFS, testCE, testObjectLabels, testStorageLabels) + require.Error(t, err) + require.ErrorContains(t, err, "removed pending release") + require.Nil(t, objs) + require.Empty(t, state) + }) +} + func TestApply_Installation(t *testing.T) { t.Run("fails during dry-run installation", func(t *testing.T) { mockAcg := &mockActionGetter{ @@ -340,6 +420,7 @@ func TestApply_Upgrade(t *testing.T) { t.Run("fails during dry-run upgrade", func(t *testing.T) { mockAcg := &mockActionGetter{ + currentRel: testCurrentRelease, dryRunUpgradeErr: errors.New("failed attempting to dry-run upgrade chart"), } helmApplier := applier.Helm{ diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index be61891a0..1531c9318 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -12,6 +12,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/release" "helm.sh/helm/v3/pkg/storage/driver" @@ -1411,6 +1412,10 @@ func (mag *MockActionGetter) Reconcile(rel *release.Release) error { return nil } +func (mag *MockActionGetter) Config() *action.Configuration { + return nil +} + func TestGetInstalledBundleHistory(t *testing.T) { getter := controllers.DefaultInstalledBundleGetter{}