Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion internal/controller/postgrescluster/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -776,10 +776,13 @@ func (r *Reconciler) rolloutInstance(
ctx context.Context, cluster *v1beta1.PostgresCluster,
instances *observedInstances, instance *Instance,
) error {
log := logging.FromContext(ctx).WithName("patroni")
log.Info("Starting to rolloutInstance...", "cluster", cluster.Name)
// The StatefulSet and number of Pods should have already been verified, but
// check again rather than panic.
// TODO(cbandy): The check for StatefulSet can go away if we watch Pod deletes.
if instance.Runner == nil || len(instance.Pods) != 1 {
log.Info("Unexpected instance state during rollout.")
return errors.Errorf(
"unexpected instance state during rollout: %v has %v pods",
instance.Name, len(instance.Pods))
Expand All @@ -805,6 +808,8 @@ func (r *Reconciler) rolloutInstance(
// NOTE(cbandy): The StatefulSet controlling this Pod reflects this change
// in its Status and triggers another reconcile.
if primary && len(instances.forCluster) > 1 {
log.Info("Starting controlled switchover...")

var span trace.Span
ctx, span = r.Tracer.Start(ctx, "patroni-change-primary")
defer span.End()
Expand All @@ -814,7 +819,27 @@ func (r *Reconciler) rolloutInstance(
return errors.Wrap(err, "failed to check if patroni v4 is used")
}

success, err := patroni.Executor(exec).ChangePrimaryAndWait(ctx, pod.Name, "", patroniVer4)
var success bool

// If PatroniPreferHTTP feature is enabled, try HTTP first, then fallback
if feature.Enabled(ctx, feature.PatroniPreferHTTP) {
log.Info("Attempting HTTP call...")

if res, httpErr := patroni.NewHttpClient(ctx, r.Client, pod.Name); httpErr == nil {
if success, err = res.ChangePrimaryAndWait(ctx, pod.Name, "", true); err == nil {
log.Info("HTTP call succeeded.")
}
}

if err != nil {
log.Info("HTTP call failed. Falling back to PodExec...")
}
}

if err != nil {
success, err = patroni.Executor(exec).ChangePrimaryAndWait(ctx, pod.Name, "", patroniVer4)
}

if err = errors.WithStack(err); err == nil && !success {
err = errors.New("unable to switchover")
}
Expand Down
108 changes: 89 additions & 19 deletions internal/controller/postgrescluster/patroni.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"k8s.io/apimachinery/pkg/util/intstr"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/percona/percona-postgresql-operator/internal/feature"
"github.com/percona/percona-postgresql-operator/internal/initialize"
"github.com/percona/percona-postgresql-operator/internal/logging"
"github.com/percona/percona-postgresql-operator/internal/naming"
Expand Down Expand Up @@ -51,6 +52,7 @@ func (r *Reconciler) handlePatroniRestarts(
) error {
const container = naming.ContainerDatabase
var primaryNeedsRestart, replicaNeedsRestart *Instance
log := logging.FromContext(ctx).WithName("[PATRONI]")

// Look for one primary and one replica that need to restart. Ignore
// containers that are terminating or not running; Kubernetes will start
Expand Down Expand Up @@ -88,11 +90,6 @@ func (r *Reconciler) handlePatroniRestarts(
// first.
if primaryNeedsRestart != nil {
pod := primaryNeedsRestart.Pods[0]
exec := patroni.Executor(func(
ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string,
) error {
return r.PodExec(ctx, pod.Namespace, pod.Name, container, stdin, stdout, stderr, command...)
})

patroniVer4, err := cluster.IsPatroniVer4()
if err != nil {
Expand All @@ -106,6 +103,27 @@ func (r *Reconciler) handlePatroniRestarts(
role = "master"
}

// If PatroniPreferHTTP feature is enabled, try HTTP first, then fallback
if feature.Enabled(ctx, feature.PatroniPreferHTTP) {
log.Info("Attempting HTTP call...")

if client, err := patroni.NewHttpClient(ctx, r.Client, pod.Name); err == nil {
// We don't use scope in the HTTP Rest API
if err := client.RestartPendingMembers(ctx, role, ""); err == nil {
log.Info("HTTP call succeeded.")
return nil
}
}

log.Info("HTTP call failed. Falling back to PodExec...")
}

exec := patroni.Executor(func(
ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string,
) error {
return r.PodExec(ctx, pod.Namespace, pod.Name, container, stdin, stdout, stderr, command...)
})

return errors.WithStack(exec.RestartPendingMembers(ctx, role, naming.PatroniScope(cluster)))
}

Expand All @@ -124,10 +142,26 @@ func (r *Reconciler) handlePatroniRestarts(
// how we decide when to restart.
// - https://www.postgresql.org/docs/current/runtime-config-replication.html
if replicaNeedsRestart != nil {
pod := replicaNeedsRestart.Pods[0]

// If PatroniPreferHTTP feature is enabled, try HTTP first, then fallback
if feature.Enabled(ctx, feature.PatroniPreferHTTP) {
log.Info("Attempting HTTP call...")

if client, err := patroni.NewHttpClient(ctx, r.Client, pod.Name); err == nil {
// We don't use scope in the HTTP Rest API
if err := client.RestartPendingMembers(ctx, "replica", ""); err == nil {
log.Info("HTTP call succeeded.")
return nil
}
}

log.Info("HTTP call failed. Falling back to PodExec...")
}

exec := patroni.Executor(func(
ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string,
) error {
pod := replicaNeedsRestart.Pods[0]
return r.PodExec(ctx, pod.Namespace, pod.Name, container, stdin, stdout, stderr, command...)
})

Expand Down Expand Up @@ -187,6 +221,8 @@ func (r *Reconciler) reconcilePatroniDynamicConfiguration(
ctx context.Context, cluster *v1beta1.PostgresCluster, instances *observedInstances,
pgHBAs postgres.HBAs, pgParameters postgres.Parameters,
) error {
log := logging.FromContext(ctx).WithName("[PATRONI]")

if !patroni.ClusterBootstrapped(cluster) {
// Patroni has not yet bootstrapped. Dynamic configuration happens through
// configuration files during bootstrap, so there's nothing to do here.
Expand All @@ -209,21 +245,35 @@ func (r *Reconciler) reconcilePatroniDynamicConfiguration(
return nil
}

// NOTE(cbandy): Despite the guards above, calling PodExec may still fail
// due to a missing or stopped container.

exec := func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error {
return r.PodExec(ctx, pod.Namespace, pod.Name, naming.ContainerDatabase, stdin, stdout, stderr, command...)
}

var configuration map[string]any

if cluster.Spec.Patroni != nil {
configuration = cluster.Spec.Patroni.DynamicConfiguration
}

configuration = patroni.DynamicConfiguration(cluster, configuration, pgHBAs, pgParameters)

return errors.WithStack(
patroni.Executor(exec).ReplaceConfiguration(ctx, configuration))
// If PatroniPreferHTTP feature is enabled, try HTTP first, then fallback
if feature.Enabled(ctx, feature.PatroniPreferHTTP) {
log.Info("Attempting HTTP call...")

if client, err := patroni.NewHttpClient(ctx, r.Client, pod.Name); err == nil {
if err := client.ReplaceConfiguration(ctx, configuration); err == nil {
log.Info("HTTP call succeeded.")
return nil
}
}

log.Info("HTTP call failed. Falling back to PodExec...")
}

// NOTE(cbandy): Despite the guards above, calling PodExec may still fail
// due to a missing or stopped container.
exec := func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error {
return r.PodExec(ctx, pod.Namespace, pod.Name, naming.ContainerDatabase, stdin, stdout, stderr, command...)
}

return errors.WithStack(patroni.Executor(exec).ReplaceConfiguration(ctx, configuration))
}

// generatePatroniLeaderLeaseService returns a v1.Service that exposes the
Expand Down Expand Up @@ -462,7 +512,7 @@ func replicationCertSecretProjection(certificate *corev1.Secret) *corev1.SecretP
func (r *Reconciler) reconcilePatroniSwitchover(ctx context.Context,
cluster *v1beta1.PostgresCluster, instances *observedInstances,
) error {
log := logging.FromContext(ctx)
log := logging.FromContext(ctx).WithName("[PATRONI]")

// If switchover is not enabled, clear out the Patroni switchover status fields
// which might have been set by previous switchovers.
Expand Down Expand Up @@ -552,9 +602,29 @@ func (r *Reconciler) reconcilePatroniSwitchover(ctx context.Context,
// TODO(benjaminjb): consider pulling the timeline from the pod annotation; manual experiments
// have shown that the annotation on the Leader pod is up to date during a switchover, but
// missing from the Replica pods.
timeline, err := patroni.Executor(exec).GetTimeline(ctx)
if err != nil {
return err
var timeline int64
var err error

// If PatroniPreferHTTP feature is enabled, try HTTP first, then fallback
if feature.Enabled(ctx, feature.PatroniPreferHTTP) {
log.Info("Attempting HTTP call...")

if res, httpErr := patroni.NewHttpClient(ctx, r.Client, runningPod.Name); httpErr == nil {
if timeline, err = res.GetTimeline(ctx); err == nil {
log.Info("HTTP call succeeded.")
}
}

if err != nil {
log.Error(err, "HTTP call failed. Falling back to PodExec...")
}
}

if timeline == 0 {
timeline, err = patroni.Executor(exec).GetTimeline(ctx)
if err != nil {
return err
}
}

if timeline == 0 {
Expand Down
4 changes: 4 additions & 0 deletions internal/feature/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ const (

// Support VolumeSnapshots
VolumeSnapshots = "VolumeSnapshots"

// Use HTTP client for Patroni API calls instead of kubectl exec
PatroniPreferHTTP = "PatroniPreferHTTP"
)

// NewGate returns a MutableGate with the Features defined in this package.
Expand All @@ -98,6 +101,7 @@ func NewGate() MutableGate {
AutoGrowVolumes: {Default: false, PreRelease: featuregate.Alpha},
BridgeIdentifiers: {Default: false, PreRelease: featuregate.Alpha},
InstanceSidecars: {Default: false, PreRelease: featuregate.Alpha},
PatroniPreferHTTP: {Default: false, PreRelease: featuregate.Alpha},
PGBouncerSidecars: {Default: false, PreRelease: featuregate.Alpha},
TablespaceVolumes: {Default: false, PreRelease: featuregate.Alpha},
VolumeSnapshots: {Default: false, PreRelease: featuregate.Alpha},
Expand Down
Loading
Loading