diff --git a/apis/v1beta1/vspheremachine_types.go b/apis/v1beta1/vspheremachine_types.go index cc6d31d1aa..cbdd5ff7ac 100644 --- a/apis/v1beta1/vspheremachine_types.go +++ b/apis/v1beta1/vspheremachine_types.go @@ -81,6 +81,10 @@ const ( // Note: This reason is used only in supervisor mode. VSphereMachineVirtualMachinePoweringOnV1Beta2Reason = "PoweringOn" + // VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason surfaces that the VirtualMachine + // is waiting for its corresponding VirtualMachineGroup to be created and to include this VM as a member. + VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason = "WaitingForVirtualMachineGroup" + // VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason surfaces when the VirtualMachine that is controlled // by the VSphereMachine waiting for the machine network settings to be reported after machine being powered on. VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason = "WaitingForNetworkAddress" diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 401dd765e5..102217c078 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -21,7 +21,7 @@ spec: - "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}" - "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}" - --v=4 - - "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}" + - "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},NodeAutoPlacement=${EXP_NODE_AUTO_PLACEMENT:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}" image: controller:latest imagePullPolicy: IfNotPresent name: manager diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ff4613da71..c57a326fb9 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -249,6 +249,7 @@ rules: - apiGroups: - vmoperator.vmware.com resources: + - virtualmachinegroups - virtualmachineimages - virtualmachineimages/status - virtualmachines @@ -264,6 +265,12 @@ rules: - patch - update - watch +- apiGroups: + - vmoperator.vmware.com + resources: + - virtualmachinegroups/status + verbs: + - get - apiGroups: - vmware.com resources: diff --git a/controllers/vmware/controllers_suite_test.go b/controllers/vmware/controllers_suite_test.go index 87d99112e0..128ee2086d 100644 --- a/controllers/vmware/controllers_suite_test.go +++ b/controllers/vmware/controllers_suite_test.go @@ -26,6 +26,7 @@ import ( . "github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2/types" . "github.com/onsi/gomega" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -71,6 +72,7 @@ func setup(ctx context.Context) (*helpers.TestEnvironment, clustercache.ClusterC utilruntime.Must(infrav1.AddToScheme(scheme.Scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme.Scheme)) utilruntime.Must(vmwarev1.AddToScheme(scheme.Scheme)) + utilruntime.Must(vmoprv1.AddToScheme(scheme.Scheme)) testEnv := helpers.NewTestEnvironment(ctx) diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go new file mode 100644 index 0000000000..d0c60aee01 --- /dev/null +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -0,0 +1,138 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmware + +import ( + "context" + + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + apitypes "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/util/predicates" + ctrl "sigs.k8s.io/controller-runtime" + ctrlbldr "sigs.k8s.io/controller-runtime/pkg/builder" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" +) + +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get +// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch + +// AddVirtualMachineGroupControllerToManager adds the VirtualMachineGroup controller to the provided +// manager. +func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerManagerCtx *capvcontext.ControllerManagerContext, mgr manager.Manager, options controller.Options) error { + predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "virtualmachinegroup") + + reconciler := &VirtualMachineGroupReconciler{ + Client: controllerManagerCtx.Client, + Recorder: mgr.GetEventRecorderFor("virtualmachinegroup-controller"), + } + + // Predicate: only allow VMG with the cluster-name label. Ensures the controller only works on VMG objects created by CAPV. + hasClusterNameLabel := predicate.NewPredicateFuncs(func(obj ctrlclient.Object) bool { + labels := obj.GetLabels() + if labels == nil { + return false + } + _, ok := labels[clusterv1.ClusterNameLabel] + return ok + }) + + builder := ctrl.NewControllerManagedBy(mgr). + For(&vmoprv1.VirtualMachineGroup{}). + WithOptions(options). + WithEventFilter(hasClusterNameLabel). + Watches( + &clusterv1.Cluster{}, + handler.EnqueueRequestsFromMapFunc(reconciler.ClusterToVirtualMachineGroup), + ). + Watches( + &vmwarev1.VSphereMachine{}, + handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToVirtualMachineGroup), + ctrlbldr.WithPredicates( + predicate.Funcs{ + UpdateFunc: func(event.UpdateEvent) bool { return false }, + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + GenericFunc: func(event.GenericEvent) bool { return false }, + }), + ). + WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue)) + + return builder.Complete(reconciler) +} + +// ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests. +func (r *VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(_ context.Context, a ctrlclient.Object) []reconcile.Request { + cluster, ok := a.(*clusterv1.Cluster) + if !ok { + return nil + } + + // Always enqueue a request for the "would-be VMG" + return []reconcile.Request{{ + NamespacedName: apitypes.NamespacedName{ + Namespace: cluster.Namespace, + Name: cluster.Name, + }, + }} +} + +// VSphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests. +// This handler only processes VSphereMachine objects for Day-2 operations when VMG could be found, ensuring +// VMG member list in sync with VSphereMachines. If no corresponding VMG is found, this is a no-op. +func (r *VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { + vSphereMachine, ok := a.(*vmwarev1.VSphereMachine) + if !ok { + return nil + } + + clusterName, ok := vSphereMachine.Labels[clusterv1.ClusterNameLabel] + if !ok || clusterName == "" { + return nil + } + + vmg := &vmoprv1.VirtualMachineGroup{} + err := r.Client.Get(ctx, apitypes.NamespacedName{ + Namespace: vSphereMachine.Namespace, + Name: clusterName, + }, vmg) + + if err != nil { + return nil + } + + return []reconcile.Request{{ + NamespacedName: apitypes.NamespacedName{ + Namespace: vmg.Namespace, + Name: vmg.Name, + }, + }} +} diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go new file mode 100644 index 0000000000..0be615857b --- /dev/null +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -0,0 +1,346 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package vmware contains the VirtualMachineGroup Reconciler. +package vmware + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/pkg/errors" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" +) + +const ( + reconciliationDelay = 10 * time.Second +) + +// VirtualMachineGroupReconciler reconciles VirtualMachineGroup. +type VirtualMachineGroupReconciler struct { + Client client.Client + Recorder record.EventRecorder +} + +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get +// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch + +func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + + // Fetch the Cluster instance. + cluster := &clusterv1.Cluster{} + if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + return reconcile.Result{}, err + } + + log = log.WithValues("Cluster", klog.KObj(cluster)) + + // If Cluster is deleted, just return as VirtualMachineGroup will be GCed and no extra processing needed. + if !cluster.DeletionTimestamp.IsZero() { + return reconcile.Result{}, nil + } + + // If ControlPlane haven't initialized, requeue it since VSphereMachines of MachineDeployment will only be created after + // ControlPlane is initialized. + if !conditions.IsTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) { + log.Info("Waiting for Cluster ControlPlaneInitialized") + return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + } + + // Continue with the main logic. + return r.createOrUpdateVMG(ctx, cluster) +} + +// createOrUpdateVMG Create or Update VirtualMachineGroup. +func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + + // Calculate current Machines of all MachineDeployments. + current, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) + if err != nil { + return reconcile.Result{}, errors.Wrapf(err, "failed to get current VSphereMachine of cluster %s/%s", + cluster.Name, cluster.Namespace) + } + + desiredVMG := &vmoprv1.VirtualMachineGroup{} + key := &client.ObjectKey{ + Namespace: cluster.Namespace, + Name: cluster.Name, + } + + if err := r.Client.Get(ctx, *key, desiredVMG); err != nil { + if !apierrors.IsNotFound(err) { + log.Error(err, "failed to get VirtualMachineGroup") + return ctrl.Result{}, err + } + + // Calculate expected Machines of all MachineDeployments. + expected, err := getExpectedVSphereMachines(ctx, r.Client, cluster) + if err != nil { + log.Error(err, "failed to get expected Machines of all MachineDeployment") + return ctrl.Result{}, err + } + + if expected == 0 { + log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") + return reconcile.Result{}, nil + } + + // Wait for all intended VSphereMachines corresponding to MachineDeployment to exist only during initial Cluster creation. + current := int32(len(current)) + if current < expected { + log.Info("current VSphereMachines do not match expected", "Expected:", expected, + "Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace) + return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + } + + desiredVMG = &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + } + } + + // Generate VM names according to the naming strategy set on the VSphereMachine. + vmNames := make([]string, 0, len(current)) + for _, machine := range current { + name, err := GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) + if err != nil { + return reconcile.Result{}, err + } + vmNames = append(vmNames, name) + } + // Sort the VM names alphabetically for consistent ordering + sort.Slice(vmNames, func(i, j int) bool { + return vmNames[i] < vmNames[j] + }) + + members := make([]vmoprv1.GroupMember, 0, len(current)) + for _, name := range vmNames { + members = append(members, vmoprv1.GroupMember{ + Name: name, + Kind: "VirtualMachine", + }) + } + + // Get all the names of MachineDeployments of the Cluster. + if !cluster.Spec.Topology.IsDefined() { + return reconcile.Result{}, errors.Errorf("Cluster Topology is not defined %s/%s", + cluster.Namespace, cluster.Name) + } + machineDeployments := &clusterv1.MachineDeploymentList{} + if err := r.Client.List(ctx, machineDeployments, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { + return reconcile.Result{}, err + } + mdNames := []string{} + for _, md := range machineDeployments.Items { + mdNames = append(mdNames, md.Name) + } + + // Use CreateOrPatch to create or update the VirtualMachineGroup. + _, err = controllerutil.CreateOrPatch(ctx, r.Client, desiredVMG, func() error { + // Set the desired labels + if desiredVMG.Labels == nil { + desiredVMG.Labels = make(map[string]string) + // Set Cluster name label + desiredVMG.Labels[clusterv1.ClusterNameLabel] = cluster.Name + } + + // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done + // Do not update per-md-zone label once set, as placement decision should not change without user explicitly + // ask. + placementDecisionLabels, err := GenerateVMGPlacementAnnotations(ctx, desiredVMG, mdNames) + if err != nil { + return err + } + if len(placementDecisionLabels) > 0 { + for k, v := range placementDecisionLabels { + if _, exists := desiredVMG.Labels[k]; exists { + // Skip if the label already exists + continue + } + desiredVMG.Labels[k] = v + } + } + + // Compose bootOrder. + desiredVMG.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: members, + }, + } + + // Make sure the Cluster owns the VM Operator VirtualMachineGroup. + if err = controllerutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { + return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", + cluster.GroupVersionKind(), + cluster.Namespace, + cluster.Name, + desiredVMG.GroupVersionKind(), + desiredVMG.Namespace, + desiredVMG.Name) + } + + return nil + }) + + return reconcile.Result{}, err +} + +// MachineDeployments belonging to the Cluster. +func getExpectedVSphereMachines(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { + var mdList clusterv1.MachineDeploymentList + if err := kubeClient.List( + ctx, + &mdList, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, + ); err != nil { + return 0, errors.Wrap(err, "failed to list MachineDeployments") + } + + var total int32 + for _, md := range mdList.Items { + if md.Spec.Replicas != nil { + total += *md.Spec.Replicas + } + } + + return total, nil +} + +// getCurrentVSphereMachines returns the list of VSphereMachines belonging to the Cluster’s MachineDeployments. +// VSphereMachines marked for removal are excluded from the result. +func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { + log := ctrl.LoggerFrom(ctx) + + // List VSphereMachine objects + var vsMachineList vmwarev1.VSphereMachineList + if err := kubeClient.List(ctx, &vsMachineList, + client.InNamespace(clusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + client.HasLabels{clusterv1.MachineDeploymentNameLabel}, + ); err != nil { + return nil, errors.Wrapf(err, "failed to list VSphereMachines in namespace %s", clusterNamespace) + } + + var result []vmwarev1.VSphereMachine + for _, vs := range vsMachineList.Items { + if vs.DeletionTimestamp.IsZero() { + result = append(result, vs) + } + } + log.V(4).Info("Final list of VSphereMachines for VMG member generation", "count", len(result)) + + return result, nil +} + +// GenerateVMGPlacementAnnotations returns annotations per MachineDeployment which contains zone info for placed VMs for day-2 operations. +func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { + log := ctrl.LoggerFrom(ctx) + annotations := make(map[string]string) + + // For each member in status + for _, member := range vmg.Status.Members { + // Skip if not a VM or not placement ready, + if member.Kind != "VirtualMachine" { + return nil, errors.Errorf("VirtualMachineGroup %s/%s contains none VirtualMachine member, member.Kind %s", vmg.Namespace, vmg.Name, member.Kind) + } + + // Once member VM is placed, VirtualMachineGroupMemberConditionPlacementReady will be set to true. + if !conditions.IsTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) { + continue + } + + // Check if this VM belongs to any of our target Machine Deployments + // Use machine deployment name as the annotation key prefix. + for _, md := range machineDeployments { + // Check if we already found placement for this Machine Deployments + if _, found := annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)]; found { + log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found in annotations", md)) + continue + } + + // Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1) + // TODO: Establish membership via the machine deployment name label + if strings.Contains(member.Name, "-"+md+"-") { + // Get the VM placement information by member status. + // VMs that have undergone placement do not have Placement info set, skip. + if member.Placement == nil { + log.V(4).Info("VM in VMG has no placement info. Placement is nil", "VM", member.Name, "VMG", vmg.Name, "Namespace", vmg.Namespace) + continue + } + + // Skip to next member if Zone is empty. + zone := member.Placement.Zone + if zone == "" { + log.V(4).Info("VM in VMG has no placement info. Zone is empty", "VM", member.Name, "VMG", vmg.Name, "Namespace", vmg.Namespace) + continue + } + + log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone)) + annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone + } + } + } + + return annotations, nil +} + +// GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy. +// Duplicated this logic from pkg/services/vmoperator/vmopmachine.go. +func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) (string, error) { + // Per default the name of the VirtualMachine should be equal to the Machine name (this is the same as "{{ .machine.name }}") + if namingStrategy == nil || namingStrategy.Template == nil { + // Note: No need to trim to max length in this case as valid Machine names will also be valid VirtualMachine names. + return machineName, nil + } + + name, err := infrautilv1.GenerateMachineNameFromTemplate(machineName, namingStrategy.Template) + if err != nil { + return "", errors.Wrap(err, "failed to generate name for VirtualMachine") + } + + return name, nil +} diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go new file mode 100644 index 0000000000..d5e54915f1 --- /dev/null +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -0,0 +1,497 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmware + +import ( + "context" + "fmt" + "sort" + "testing" + "time" + + . "github.com/onsi/gomega" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" +) + +const ( + clusterName = "test-cluster" + otherClusterName = "other-cluster" + clusterNamespace = "test-ns" + mdName1 = "md-worker-a" + mdName2 = "md-worker-b" + zoneA = "zone-a" + zoneB = "zone-b" +) + +func TestGetExpectedVSphereMachines(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + targetCluster := newTestCluster(clusterName, clusterNamespace) + + mdA := newMachineDeployment("md-a", clusterName, clusterNamespace, ptr.To(int32(3))) + mdB := newMachineDeployment("md-b", clusterName, clusterNamespace, ptr.To(int32(5))) + mdCNil := newMachineDeployment("md-c-nil", clusterName, clusterNamespace, nil) + mdDZero := newMachineDeployment("md-d-zero", clusterName, clusterNamespace, ptr.To(int32(0))) + // Create an MD for a different cluster (should be filtered) + mdOtherCluster := newMachineDeployment("md-other", otherClusterName, clusterNamespace, ptr.To(int32(5))) + + tests := []struct { + name string + initialObjects []client.Object + expectedTotal int32 + wantErr bool + }{ + { + name: "Sum of two MDs", + initialObjects: []client.Object{mdA, mdB}, + expectedTotal: 8, + wantErr: false, + }, + { + name: "Includes nil and zero replicas", + initialObjects: []client.Object{mdA, mdB, mdCNil, mdDZero}, + expectedTotal: 8, + wantErr: false, + }, + { + name: "Filters MDs from other clusters", + initialObjects: []client.Object{mdA, mdB, mdOtherCluster}, + expectedTotal: 8, + wantErr: false, + }, + { + name: "Success: No MachineDeployments found", + initialObjects: []client.Object{}, + expectedTotal: 0, + wantErr: false, + }, + } + + for _, tt := range tests { + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() + total, err := getExpectedVSphereMachines(ctx, fakeClient, targetCluster) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(total).To(Equal(tt.expectedTotal)) + } + }) + } +} + +func TestGetCurrentVSphereMachines(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + scheme := runtime.NewScheme() + g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) + + // VSphereMachine names are based on CAPI Machine names, but we use fake name here. + vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) + vsm2 := newVSphereMachine("vsm-2", mdName2, false, false, nil) + vsmDeleting := newVSphereMachine("vsm-3", mdName1, false, true, nil) // Deleting + vsmControlPlane := newVSphereMachine("vsm-cp", "not-md", true, false, nil) + + tests := []struct { + name string + objects []client.Object + want int + }{ + { + name: "Success: Filtered non-deleting worker VSphereMachines", + objects: []client.Object{ + vsm1, + vsm2, + vsmDeleting, + vsmControlPlane, + }, + want: 2, + }, + { + name: "No VSphereMachines found", + objects: []client.Object{}, + want: 0, + }, + } + + for _, tt := range tests { + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() + got, err := getCurrentVSphereMachines(ctx, fakeClient, clusterNamespace, clusterName) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(got).To(HaveLen(tt.want)) + + // Check that the correct Machines are present + if tt.want > 0 { + names := make([]string, len(got)) + for i, vsm := range got { + names[i] = vsm.Name + } + sort.Strings(names) + g.Expect(names).To(Equal([]string{"vsm-1", "vsm-2"})) + } + }) + } +} + +func TestGenerateVMGPlacementAnnotations(t *testing.T) { + g := NewWithT(t) + + // Define object names for members + vmName1 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1) + vmName2 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName2) + vmNameUnplaced := fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1) + vmNameWrongKind := "not-a-vm" + + tests := []struct { + name string + vmg *vmoprv1.VirtualMachineGroup + machineDeployments []string + wantAnnotations map[string]string + wantErr bool + }{ + { + name: "Success: Two placed VMs for two MDs", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + // Placed member for MD1 in Zone A + newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), + // Placed member for MD2 in Zone B + newVMGMemberStatus(vmName2, "VirtualMachine", true, zoneB), + }, + }, + }, + machineDeployments: []string{mdName1, mdName2}, + wantAnnotations: map[string]string{ + fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName1): zoneA, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName2): zoneB, + }, + wantErr: false, + }, + { + name: "Skip: Unplaced VM (PlacementReady false)", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vmName1, "VirtualMachine", false, ""), + }, + }, + }, + machineDeployments: []string{mdName1}, + wantAnnotations: map[string]string{}, + wantErr: false, + }, + { + name: "Skip: PlacementReady but missing Zone info", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vmName1, "VirtualMachine", true, ""), + }, + }, + }, + machineDeployments: []string{mdName1}, + wantAnnotations: map[string]string{}, + wantErr: false, + }, + { + name: "Skip: Placement already found for MD", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + // First VM sets the placement + newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), + // Second VM is ignored + newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", true, zoneB), + }, + }, + }, + machineDeployments: []string{mdName1}, + wantAnnotations: map[string]string{ + fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName1): zoneA, + }, + wantErr: false, + }, + { + name: "Error: Member Kind is not VirtualMachine", + vmg: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vmNameWrongKind, "VirtualMachineGroup", true, zoneA), + }, + }, + }, + machineDeployments: []string{mdName1}, + wantAnnotations: nil, + wantErr: true, + }, + } + + for _, tt := range tests { + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + ctx := ctrl.LoggerInto(context.Background(), ctrl.LoggerFrom(context.Background())) + + got, err := GenerateVMGPlacementAnnotations(ctx, tt.vmg, tt.machineDeployments) + + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(got).To(Equal(tt.wantAnnotations)) + } + }) + } +} + +func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) + g.Expect(vmoprv1.AddToScheme(scheme)).To(Succeed()) + + // Initial objects for the successful VMG creation path (Expected: 1, Current: 1) + cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) + vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) + md1 := newMachineDeployment(mdName1, clusterName, clusterNamespace, ptr.To(int32(1))) + + tests := []struct { + name string + initialObjects []client.Object + expectedResult reconcile.Result + checkVMGExists bool + }{ + { + name: "Exit: Cluster Not Found", + initialObjects: []client.Object{}, + expectedResult: reconcile.Result{}, + checkVMGExists: false, + }, + { + name: "Exit: Cluster Deletion Timestamp Set", + initialObjects: []client.Object{ + func() client.Object { + c := cluster.DeepCopy() + c.Finalizers = []string{"test.finalizer.cluster"} + c.DeletionTimestamp = &metav1.Time{Time: time.Now()} + return c + }(), + }, + expectedResult: reconcile.Result{}, + checkVMGExists: false, + }, + { + name: "Requeue: ControlPlane Not Initialized", + initialObjects: []client.Object{ + newCluster(clusterName, clusterNamespace, false, 1, 0), + }, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, + }, + { + name: "Requeue: VMG Not Found", + initialObjects: []client.Object{ + cluster.DeepCopy(), + md1.DeepCopy(), + }, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, + }, + { + name: "Success: VMG Created", + initialObjects: []client.Object{ + cluster.DeepCopy(), + md1.DeepCopy(), + vsm1.DeepCopy(), + }, + expectedResult: reconcile.Result{}, + checkVMGExists: true, + }, + { + name: "Success: VMG Updated (Already Exists)", + initialObjects: []client.Object{ + cluster.DeepCopy(), + md1.DeepCopy(), + vsm1.DeepCopy(), + &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, + }, + }, + expectedResult: reconcile.Result{}, + checkVMGExists: true, + }, + } + + for _, tt := range tests { + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() + reconciler := &VirtualMachineGroupReconciler{ + Client: fakeClient, + Recorder: record.NewFakeRecorder(1), + } + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}} + + result, err := reconciler.Reconcile(ctx, req) + + g.Expect(err).NotTo(HaveOccurred(), "Reconcile should not return an error") + g.Expect(result).To(Equal(tt.expectedResult)) + + vmg := &vmoprv1.VirtualMachineGroup{} + vmgKey := types.NamespacedName{Name: clusterName, Namespace: clusterNamespace} + err = fakeClient.Get(ctx, vmgKey, vmg) + + if tt.checkVMGExists { + g.Expect(err).NotTo(HaveOccurred(), "VMG should exist") + // Check that the core fields were set by the MutateFn + g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName)) + g.Expect(vmg.Spec.BootOrder).To(HaveLen(1)) + expected, err := getExpectedVSphereMachines(ctx, fakeClient, tt.initialObjects[0].(*clusterv1.Cluster)) + g.Expect(err).NotTo(HaveOccurred(), "Should get expected Machines") + g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(expected))) + + // VMG members should match the VSphereMachine (name: vsm-1) + g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(ContainElement("vsm-1")) + } + }) + } +} + +// Helper function to create a basic Cluster object. +func newCluster(name, namespace string, initialized bool, replicasMD1, replicasMD2 int32) *clusterv1.Cluster { + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: name}, + }, + Spec: clusterv1.ClusterSpec{ + Topology: clusterv1.Topology{ + Workers: clusterv1.WorkersTopology{ + MachineDeployments: []clusterv1.MachineDeploymentTopology{ + {Name: mdName1, Replicas: &replicasMD1}, + {Name: mdName2, Replicas: &replicasMD2}, + }, + }, + }, + }, + } + if initialized { + conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterControlPlaneInitializedCondition, + Status: metav1.ConditionTrue, + }) + } + return cluster +} + +// Helper function to create a VSphereMachine (worker, owned by a CAPI Machine). +func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) *vmwarev1.VSphereMachine { + vsm := &vmwarev1.VSphereMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + Spec: vmwarev1.VSphereMachineSpec{ + NamingStrategy: namingStrategy, + }, + } + if !isCP { + vsm.Labels[clusterv1.MachineDeploymentNameLabel] = mdName + } else { + vsm.Labels[clusterv1.MachineControlPlaneLabel] = "true" + } + if deleted { + vsm.Finalizers = []string{"test.finalizer.0"} + vsm.DeletionTimestamp = &metav1.Time{Time: time.Now()} + } + return vsm +} + +// Helper function to create a VMG member status with placement info. +func newVMGMemberStatus(name, kind string, isPlacementReady bool, zone string) vmoprv1.VirtualMachineGroupMemberStatus { + memberStatus := vmoprv1.VirtualMachineGroupMemberStatus{ + Name: name, + Kind: kind, + } + + if isPlacementReady { + conditions.Set(&memberStatus, metav1.Condition{ + Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, + Status: metav1.ConditionTrue, + }) + memberStatus.Placement = &vmoprv1.VirtualMachinePlacementStatus{Zone: zone} + } + return memberStatus +} + +// Helper function to create a MachineDeployment object. +func newMachineDeployment(name, clusterName, clusterNS string, replicas *int32) *clusterv1.MachineDeployment { + return &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNS, + Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: replicas, + }, + } +} + +// Helper function to create a basic Cluster object used as input. +func newTestCluster(name, namespace string) *clusterv1.Cluster { + return &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } +} diff --git a/feature/feature.go b/feature/feature.go index a233d351c7..1799aaeb68 100644 --- a/feature/feature.go +++ b/feature/feature.go @@ -44,6 +44,11 @@ const ( // alpha: v1.11 NamespaceScopedZones featuregate.Feature = "NamespaceScopedZones" + // NodeAutoPlacement is a feature gate for the NodeAutoPlacement functionality for supervisor. + // + // alpha: v1.15 + NodeAutoPlacement featuregate.Feature = "NodeAutoPlacement" + // PriorityQueue is a feature gate that controls if the controller uses the controller-runtime PriorityQueue // instead of the default queue implementation. // @@ -61,6 +66,7 @@ var defaultCAPVFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ // Every feature should be initiated here: NodeAntiAffinity: {Default: false, PreRelease: featuregate.Alpha}, NamespaceScopedZones: {Default: false, PreRelease: featuregate.Alpha}, + NodeAutoPlacement: {Default: false, PreRelease: featuregate.Alpha}, PriorityQueue: {Default: false, PreRelease: featuregate.Alpha}, MultiNetworks: {Default: false, PreRelease: featuregate.Alpha}, } diff --git a/go.mod b/go.mod index 44f52fa9ea..df97e28e79 100644 --- a/go.mod +++ b/go.mod @@ -4,16 +4,16 @@ go 1.24.0 replace sigs.k8s.io/cluster-api => sigs.k8s.io/cluster-api v1.11.0-rc.0.0.20250905091528-eb4e38c46ff6 -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505 +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests -replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 +replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.8.6 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 github.com/vmware/govmomi v0.52.0 ) diff --git a/go.sum b/go.sum index 47a16466b0..34bb470a23 100644 --- a/go.sum +++ b/go.sum @@ -243,8 +243,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/internal/test/helpers/envtest.go b/internal/test/helpers/envtest.go index 41341b70cb..0acbcd68eb 100644 --- a/internal/test/helpers/envtest.go +++ b/internal/test/helpers/envtest.go @@ -29,6 +29,7 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/pkg/errors" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" "github.com/vmware/govmomi/simulator" "golang.org/x/tools/go/packages" admissionv1 "k8s.io/api/admissionregistration/v1" @@ -89,6 +90,7 @@ func init() { utilruntime.Must(admissionv1.AddToScheme(scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme)) utilruntime.Must(infrav1.AddToScheme(scheme)) + utilruntime.Must(vmoprv1.AddToScheme(scheme)) // Get the root of the current file to use in CRD paths. _, filename, _, ok := goruntime.Caller(0) diff --git a/main.go b/main.go index b92f48d25a..6d6ea9e011 100644 --- a/main.go +++ b/main.go @@ -94,6 +94,7 @@ var ( vSphereVMConcurrency int vSphereClusterIdentityConcurrency int vSphereDeploymentZoneConcurrency int + virtualMachineGroupConcurrency int skipCRDMigrationPhases []string managerOptions = capiflags.ManagerOptions{} @@ -141,6 +142,9 @@ func InitFlags(fs *pflag.FlagSet) { fs.IntVar(&vSphereDeploymentZoneConcurrency, "vspheredeploymentzone-concurrency", 10, "Number of vSphere deployment zones to process simultaneously") + fs.IntVar(&virtualMachineGroupConcurrency, "virtualmachinegroup-concurrency", 10, + "Number of virtual machine group to process simultaneously") + fs.StringVar( &managerOpts.PodName, "pod-name", @@ -482,6 +486,12 @@ func setupSupervisorControllers(ctx context.Context, controllerCtx *capvcontext. return err } + if feature.Gates.Enabled(feature.NamespaceScopedZones) && feature.Gates.Enabled(feature.NodeAutoPlacement) { + if err := vmware.AddVirtualMachineGroupControllerToManager(ctx, controllerCtx, mgr, concurrency(virtualMachineGroupConcurrency)); err != nil { + return err + } + } + return vmware.AddServiceDiscoveryControllerToManager(ctx, controllerCtx, mgr, clusterCache, concurrency(serviceDiscoveryConcurrency)) } diff --git a/packaging/go.sum b/packaging/go.sum index 14a389257b..0659c3663f 100644 --- a/packaging/go.sum +++ b/packaging/go.sum @@ -135,8 +135,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/pkg/services/network/netop_provider.go b/pkg/services/network/netop_provider.go index fa1c1860fa..e13de3bd4d 100644 --- a/pkg/services/network/netop_provider.go +++ b/pkg/services/network/netop_provider.go @@ -136,7 +136,7 @@ func (np *netopNetworkProvider) ConfigureVirtualMachine(ctx context.Context, clu // Set the VM primary interface vm.Spec.Network.Interfaces = append(vm.Spec.Network.Interfaces, vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: PrimaryInterfaceName, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: NetworkGVKNetOperator.Kind, APIVersion: NetworkGVKNetOperator.GroupVersion().String(), diff --git a/pkg/services/network/nsxt_provider.go b/pkg/services/network/nsxt_provider.go index 96a0450bb7..90885cb568 100644 --- a/pkg/services/network/nsxt_provider.go +++ b/pkg/services/network/nsxt_provider.go @@ -223,7 +223,7 @@ func (np *nsxtNetworkProvider) ConfigureVirtualMachine(_ context.Context, cluste } vm.Spec.Network.Interfaces = append(vm.Spec.Network.Interfaces, vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: fmt.Sprintf("eth%d", len(vm.Spec.Network.Interfaces)), - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: NetworkGVKNSXT.Kind, APIVersion: NetworkGVKNSXT.GroupVersion().String(), diff --git a/pkg/services/network/nsxt_vpc_provider.go b/pkg/services/network/nsxt_vpc_provider.go index 0c3533a37c..9b2c8defa0 100644 --- a/pkg/services/network/nsxt_vpc_provider.go +++ b/pkg/services/network/nsxt_vpc_provider.go @@ -224,7 +224,7 @@ func (vp *nsxtVPCNetworkProvider) ConfigureVirtualMachine(_ context.Context, clu networkName := clusterCtx.VSphereCluster.Name vm.Spec.Network.Interfaces = append(vm.Spec.Network.Interfaces, vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: PrimaryInterfaceName, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: NetworkGVKNSXTVPCSubnetSet.Kind, APIVersion: NetworkGVKNSXTVPCSubnetSet.GroupVersion().String(), @@ -243,7 +243,7 @@ func (vp *nsxtVPCNetworkProvider) ConfigureVirtualMachine(_ context.Context, clu } vmInterface := vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: PrimaryInterfaceName, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: primary.Network.Kind, APIVersion: primary.Network.APIVersion, @@ -281,7 +281,7 @@ func setVMSecondaryInterfaces(machine *vmwarev1.VSphereMachine, vm *vmoprv1.Virt } vmInterface := vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: secondaryInterface.Name, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: secondaryInterface.Network.Kind, APIVersion: secondaryInterface.Network.APIVersion, diff --git a/pkg/services/vmoperator/constants.go b/pkg/services/vmoperator/constants.go index 011082a06c..37ca556fc6 100644 --- a/pkg/services/vmoperator/constants.go +++ b/pkg/services/vmoperator/constants.go @@ -18,8 +18,6 @@ limitations under the License. package vmoperator const ( - kubeTopologyZoneLabelKey = "topology.kubernetes.io/zone" - // ControlPlaneVMClusterModuleGroupName is the name used for the control plane Cluster Module. ControlPlaneVMClusterModuleGroupName = "control-plane-group" // ClusterModuleNameAnnotationKey is key for the Cluster Module annotation. diff --git a/pkg/services/vmoperator/control_plane_endpoint.go b/pkg/services/vmoperator/control_plane_endpoint.go index e0070188e3..3b500711d7 100644 --- a/pkg/services/vmoperator/control_plane_endpoint.go +++ b/pkg/services/vmoperator/control_plane_endpoint.go @@ -189,7 +189,7 @@ func newVirtualMachineService(ctx *vmware.ClusterContext) *vmoprv1.VirtualMachin Namespace: ctx.Cluster.Namespace, }, TypeMeta: metav1.TypeMeta{ - APIVersion: vmoprv1.SchemeGroupVersion.String(), + APIVersion: vmoprv1.GroupVersion.String(), Kind: "VirtualMachineService", }, } diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 840b166406..a60292c2d0 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" @@ -41,6 +42,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" @@ -163,6 +165,13 @@ func (v *VmopMachineService) SyncFailureReason(_ context.Context, machineCtx cap return supervisorMachineCtx.VSphereMachine.Status.FailureReason != nil || supervisorMachineCtx.VSphereMachine.Status.FailureMessage != nil, nil } +// affinityInfo is an internal to store VM affinity information. +type affinityInfo struct { + affinitySpec *vmoprv1.AffinitySpec + vmGroupName string + failureDomain *string +} + // ReconcileNormal reconciles create and update events for VM Operator VMs. func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx capvcontext.MachineContext) (bool, error) { log := ctrl.LoggerFrom(ctx) @@ -171,10 +180,6 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap return false, errors.New("received unexpected SupervisorMachineContext type") } - if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { - supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) - } - // If debug logging is enabled, report the number of vms in the cluster before and after the reconcile if log.V(5).Enabled() { vms, err := v.getVirtualMachinesInCluster(ctx, supervisorMachineCtx) @@ -188,6 +193,112 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap // Set the VM state. Will get reset throughout the reconcile supervisorMachineCtx.VSphereMachine.Status.VMStatus = vmwarev1.VirtualMachineStatePending + var affInfo affinityInfo + if feature.Gates.Enabled(feature.NodeAutoPlacement) && + !infrautilv1.IsControlPlaneMachine(machineCtx.GetVSphereMachine()) { + vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{} + key := client.ObjectKey{ + Namespace: supervisorMachineCtx.Cluster.Namespace, + Name: supervisorMachineCtx.Cluster.Name, + } + err := v.Client.Get(ctx, key, vmOperatorVMGroup) + if err != nil { + if !apierrors.IsNotFound(err) { + return false, err + } + if apierrors.IsNotFound(err) { + log.V(4).Info("VirtualMachineGroup not found, requeueing", "Name", key.Name, "Namespace", key.Namespace) + return true, nil + } + } + + // Proceed only if the machine is a member of the VirtualMachineGroup. + if !v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) { + v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ + Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + }) + log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing", "VM Name", supervisorMachineCtx.Machine.Name) + return true, nil + } + + affInfo = affinityInfo{ + vmGroupName: vmOperatorVMGroup.Name, + } + + // Set the zone label using the annotation of the per-md zone mapping from VMG. + // This is for new VMs created during day-2 operations in VC 9.1. + nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + if zone, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { + affInfo.failureDomain = ptr.To(zone) + } + + // Fetch machine deployments without explicit failureDomain specified + // to use when setting the anti-affinity rules. + machineDeployments := &clusterv1.MachineDeploymentList{} + if err := v.Client.List(ctx, machineDeployments, + client.InNamespace(supervisorMachineCtx.Cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name}); err != nil { + return false, err + } + mdNames := []string{} + for _, machineDeployment := range machineDeployments.Items { + if machineDeployment.Spec.Template.Spec.FailureDomain == "" && machineDeployment.Name != nodePool { + mdNames = append(mdNames, machineDeployment.Name) + } + } + sort.Strings(mdNames) + + affInfo.affinitySpec = &vmoprv1.AffinitySpec{ + VMAffinity: &vmoprv1.VMAffinitySpec{ + RequiredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.MachineDeploymentNameLabel: nodePool, + }, + }, + TopologyKey: corev1.LabelTopologyZone, + }, + }, + }, + VMAntiAffinity: &vmoprv1.VMAntiAffinitySpec{ + PreferredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.MachineDeploymentNameLabel: nodePool, + }, + }, + TopologyKey: corev1.LabelHostname, + }, + }, + }, + } + if len(mdNames) > 0 { + affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution = append( + affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution, + vmoprv1.VMAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: clusterv1.MachineDeploymentNameLabel, + Operator: metav1.LabelSelectorOpIn, + Values: mdNames, + }, + }, + }, + TopologyKey: corev1.LabelTopologyZone, + }, + ) + } + } + + if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { + supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) + } + // Check for the presence of an existing object vmOperatorVM := &vmoprv1.VirtualMachine{} key, err := virtualMachineObjectKey(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.Machine.Namespace, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) @@ -208,7 +319,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } // Reconcile the VM Operator VirtualMachine. - if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM); err != nil { + if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM, &affInfo); err != nil { v1beta1conditions.MarkFalse(supervisorMachineCtx.VSphereMachine, infrav1.VMProvisionedCondition, vmwarev1.VMCreationFailedReason, clusterv1beta1.ConditionSeverityWarning, "failed to create or update VirtualMachine: %v", err) v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ @@ -378,7 +489,7 @@ func (v *VmopMachineService) GetHostInfo(ctx context.Context, machineCtx capvcon return vmOperatorVM.Status.Host, nil } -func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine) error { +func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine, affinityInfo *affinityInfo) error { // All Machine resources should define the version of Kubernetes to use. if supervisorMachineCtx.Machine.Spec.Version == "" { return errors.Errorf( @@ -472,7 +583,7 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis } // Assign the VM's labels. - vmOperatorVM.Labels = getVMLabels(supervisorMachineCtx, vmOperatorVM.Labels) + vmOperatorVM.Labels = getVMLabels(supervisorMachineCtx, vmOperatorVM.Labels, affinityInfo) addResourcePolicyAnnotations(supervisorMachineCtx, vmOperatorVM) @@ -494,6 +605,15 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis vmOperatorVM = typedModified } + if affinityInfo != nil && affinityInfo.affinitySpec != nil { + if vmOperatorVM.Spec.Affinity == nil { + vmOperatorVM.Spec.Affinity = affinityInfo.affinitySpec + } + if vmOperatorVM.Spec.GroupName == "" { + vmOperatorVM.Spec.GroupName = affinityInfo.vmGroupName + } + } + // Make sure the VSphereMachine owns the VM Operator VirtualMachine. if err := ctrlutil.SetControllerReference(supervisorMachineCtx.VSphereMachine, vmOperatorVM, v.Client.Scheme()); err != nil { return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", @@ -727,15 +847,18 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt }, } + // Before VC 9.1: // The CSI zone annotation must be set when using a zonal storage class, // which is required when the cluster has multiple (3) zones. // Single zone clusters (legacy/default) do not support zonal storage and must not // have the zone annotation set. + // Since VC 9.1: With Node Auto Placement enabled, failureDomain is optional and CAPV no longer + // sets PVC annotations. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer). zonal := len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1 if zone := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; zonal && zone != nil { topology := []map[string]string{ - {kubeTopologyZoneLabelKey: *zone}, + {corev1.LabelTopologyZone: *zone}, } b, err := json.Marshal(topology) if err != nil { @@ -777,7 +900,7 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt } // getVMLabels returns the labels applied to a VirtualMachine. -func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels map[string]string) map[string]string { +func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels map[string]string, affinityInfo *affinityInfo) map[string]string { if vmLabels == nil { vmLabels = map[string]string{} } @@ -789,9 +912,12 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels vmLabels[k] = v } - // Get the labels that determine the VM's placement inside of a stretched - // cluster. - topologyLabels := getTopologyLabels(supervisorMachineCtx) + // Get the labels that determine the VM's placement + var failureDomain *string + if affinityInfo != nil && affinityInfo.failureDomain != nil { + failureDomain = affinityInfo.failureDomain + } + topologyLabels := getTopologyLabels(supervisorMachineCtx, failureDomain) for k, v := range topologyLabels { vmLabels[k] = v } @@ -800,6 +926,11 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // resources associated with the target cluster. vmLabels[clusterv1.ClusterNameLabel] = supervisorMachineCtx.GetClusterContext().Cluster.Name + // Ensure the VM has the machine deployment name label + if !infrautilv1.IsControlPlaneMachine(supervisorMachineCtx.Machine) { + vmLabels[clusterv1.MachineDeploymentNameLabel] = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + } + return vmLabels } @@ -809,10 +940,17 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // // and thus the code is optimized as such. However, in the future // this function may return a more diverse topology. -func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) map[string]string { +func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, failureDomain *string) map[string]string { + // This is for explicit placement. if fd := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; fd != nil && *fd != "" { return map[string]string{ - kubeTopologyZoneLabelKey: *fd, + corev1.LabelTopologyZone: *fd, + } + } + // This is for automatic placement. + if failureDomain != nil && *failureDomain != "" { + return map[string]string{ + corev1.LabelTopologyZone: *failureDomain, } } return nil @@ -823,3 +961,16 @@ func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) ma func getMachineDeploymentNameForCluster(cluster *clusterv1.Cluster) string { return fmt.Sprintf("%s-workers-0", cluster.Name) } + +// checkVirtualMachineGroupMembership checks if the machine is in the first boot order group +// and performs logic if a match is found. +func (v *VmopMachineService) checkVirtualMachineGroupMembership(vmOperatorVMGroup *vmoprv1.VirtualMachineGroup, supervisorMachineCtx *vmware.SupervisorMachineContext) bool { + if len(vmOperatorVMGroup.Spec.BootOrder) > 0 { + for _, member := range vmOperatorVMGroup.Spec.BootOrder[0].Members { + if member.Name == supervisorMachineCtx.Machine.Name { + return true + } + } + } + return false +} diff --git a/pkg/services/vmoperator/vmopmachine_test.go b/pkg/services/vmoperator/vmopmachine_test.go index aa91556341..c36c9616ee 100644 --- a/pkg/services/vmoperator/vmopmachine_test.go +++ b/pkg/services/vmoperator/vmopmachine_test.go @@ -18,6 +18,8 @@ package vmoperator import ( "context" + "fmt" + "slices" "testing" "time" @@ -32,6 +34,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/utils/ptr" clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" @@ -40,6 +43,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/fake" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/network" @@ -65,6 +69,49 @@ func updateReconciledVMStatus(ctx context.Context, vmService VmopMachineService, Expect(err).ShouldNot(HaveOccurred()) } +func verifyVMAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName string) { + Expect(vmopVM.Spec.Affinity.VMAffinity).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity.VMAffinity.RequiredDuringSchedulingPreferredDuringExecution).To(HaveLen(1)) + + vmAffinityTerm := vmopVM.Spec.Affinity.VMAffinity.RequiredDuringSchedulingPreferredDuringExecution[0] + Expect(vmAffinityTerm.LabelSelector.MatchLabels).To(HaveKeyWithValue(clusterv1.MachineDeploymentNameLabel, machineDeploymentName)) + Expect(vmAffinityTerm.TopologyKey).To(Equal(corev1.LabelTopologyZone)) +} + +func verifyVMAntiAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName string, extraMDs ...string) { + Expect(vmopVM.Spec.Affinity.VMAntiAffinity).ShouldNot(BeNil()) + + expectedNumAntiAffinityTerms := 1 + if len(extraMDs) > 0 { + expectedNumAntiAffinityTerms = 2 + } + + antiAffinityTerms := vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution + Expect(antiAffinityTerms).To(HaveLen(expectedNumAntiAffinityTerms)) + + // First anti-affinity constraint - same machine deployment, different hosts + antiAffinityTerm1 := antiAffinityTerms[0] + Expect(antiAffinityTerm1.LabelSelector.MatchLabels).To(HaveKeyWithValue(clusterv1.MachineDeploymentNameLabel, machineDeploymentName)) + Expect(antiAffinityTerm1.TopologyKey).To(Equal(corev1.LabelHostname)) + + // Second anti-affinity term - different machine deployments + if len(extraMDs) > 0 { + isSortedAlphabetically := func(actual []string) (bool, error) { + return slices.IsSorted(actual), nil + } + antiAffinityTerm2 := antiAffinityTerms[1] + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions).To(HaveLen(1)) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Key).To(Equal(clusterv1.MachineDeploymentNameLabel)) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Operator).To(Equal(metav1.LabelSelectorOpIn)) + + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Values).To(HaveLen(len(extraMDs))) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Values).To( + WithTransform(isSortedAlphabetically, BeTrue()), + "Expected extra machine deployments to be sorted alphabetically", + ) + } +} + const ( machineName = "test-machine" clusterName = "test-cluster" @@ -81,6 +128,32 @@ const ( clusterNameLabel = clusterv1.ClusterNameLabel ) +func createMachineDeployment(name, namespace, clusterName, failureDomain string) *clusterv1.MachineDeployment { + md := &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + // FailureDomain will be set conditionally below + }, + }, + }, + } + + // Only set failure domain if it's provided and not empty + if failureDomain != "" { + md.Spec.Template.Spec.FailureDomain = failureDomain + } + + return md +} + var _ = Describe("VirtualMachine tests", func() { var ( @@ -655,6 +728,304 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmopVM.Spec.Volumes[i]).To(BeEquivalentTo(vmVolume)) } }) + + Context("With node auto placement feature gate enabled", func() { + BeforeEach(func() { + t := GinkgoT() + featuregatetesting.SetFeatureGateDuringTest(t, feature.Gates, feature.NodeAutoPlacement, true) + }) + + // control plane machine is the machine with the control plane label set + Specify("Reconcile valid control plane Machine", func() { + // Control plane machines should not have auto placement logic applied + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + expectedConditions = append(expectedConditions, clusterv1beta1.Condition{ + Type: infrav1.VMProvisionedCondition, + Status: corev1.ConditionFalse, + Reason: vmwarev1.VMProvisionStartedReason, + Message: "", + }) + + By("VirtualMachine is created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + verifyOutput(supervisorMachineContext) + + By("Verify that control plane machine does not have affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).To(BeNil()) + + By("Verify that control plane machine has correct labels") + Expect(vmopVM.Labels[nodeSelectorKey]).To(Equal(roleControlPlane)) + + By("Verify that machine-deployment label is not set for control plane") + Expect(vmopVM.Labels).ToNot(HaveKey(clusterv1.MachineDeploymentNameLabel)) + }) + + Context("For worker machine", func() { + var ( + machineDeploymentName string + vmGroup *vmoprv1.VirtualMachineGroup + ) + + BeforeEach(func() { + // Create a worker machine (no control plane label) + machineDeploymentName = "test-md" + workerMachineName := "test-worker-machine" + machine = util.CreateMachine(workerMachineName, clusterName, k8sVersion, false) + machine.Labels[clusterv1.MachineDeploymentNameLabel] = machineDeploymentName + + vsphereMachine = util.CreateVSphereMachine(workerMachineName, clusterName, className, imageName, storageClass, false) + + clusterContext, controllerManagerContext := util.CreateClusterContext(cluster, vsphereCluster) + supervisorMachineContext = util.CreateMachineContext(clusterContext, machine, vsphereMachine) + supervisorMachineContext.ControllerManagerContext = controllerManagerContext + + // Create a VirtualMachineGroup for the cluster + vmGroup = &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: corev1.NamespaceDefault, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + { + Name: workerMachineName, + Kind: "VirtualMachine", + }, + }, + }, + }, + }, + } + Expect(vmService.Client.Create(ctx, vmGroup)).To(Succeed()) + + // Create a MachineDeployment for the worker + machineDeployment := createMachineDeployment(machineDeploymentName, corev1.NamespaceDefault, clusterName, "") + Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) + }) + + Specify("Reconcile valid Machine with no failure domain set", func() { + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + + By("Verify that worker machine has affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) + + By("Verify VM affinity rules are set correctly") + verifyVMAffinityRules(vmopVM, machineDeploymentName) + + By("Verify VM anti-affinity rules are set correctly") + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName) + + By("Verify that worker machine has machine deployment label set") + Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) + + By("Verify that GroupName is set from VirtualMachineGroup") + Expect(vmopVM.Spec.GroupName).To(Equal(clusterName)) + }) + + Specify("Reconcile machine with failure domain set", func() { + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + failureDomainName := "zone-1" + machineDeploymentName := "test-md-with-fd" + workerMachineName := "test-worker-machine-with-fd" + fdClusterName := "test-cluster-fd" + + // Create a separate cluster for this test to avoid VirtualMachineGroup conflicts + fdCluster := util.CreateCluster(fdClusterName) + fdVSphereCluster := util.CreateVSphereCluster(fdClusterName) + fdVSphereCluster.Status.ResourcePolicyName = resourcePolicyName + + // Create a worker machine with failure domain + machine = util.CreateMachine(workerMachineName, fdClusterName, k8sVersion, false) + machine.Labels[clusterv1.MachineDeploymentNameLabel] = machineDeploymentName + machine.Spec.FailureDomain = failureDomainName + + vsphereMachine = util.CreateVSphereMachine(workerMachineName, fdClusterName, className, imageName, storageClass, false) + + fdClusterContext, fdControllerManagerContext := util.CreateClusterContext(fdCluster, fdVSphereCluster) + supervisorMachineContext = util.CreateMachineContext(fdClusterContext, machine, vsphereMachine) + supervisorMachineContext.ControllerManagerContext = fdControllerManagerContext + + // Create a VirtualMachineGroup for the cluster with per-md zone annotation + vmGroup := &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: fdClusterName, + Namespace: corev1.NamespaceDefault, + Annotations: map[string]string{ + fmt.Sprintf("zone.cluster.x-k8s.io/%s", machineDeploymentName): failureDomainName, + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + { + Name: workerMachineName, + Kind: "VirtualMachine", + }, + }, + }, + }, + }, + } + Expect(vmService.Client.Create(ctx, vmGroup)).To(Succeed()) + + // Create a MachineDeployment for the worker with no explicit failure domain + machineDeployment := createMachineDeployment(machineDeploymentName, corev1.NamespaceDefault, fdClusterName, "") + Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is created with auto placement and failure domain") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + + By("Verify that worker machine has affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) + + By("Verify VM affinity rules are set correctly") + verifyVMAffinityRules(vmopVM, machineDeploymentName) + + By("Verify VM anti-affinity rules are set correctly") + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName) + + By("Verify that worker machine has correct labels including topology") + Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) + Expect(vmopVM.Labels[corev1.LabelTopologyZone]).To(Equal(failureDomainName)) + + By("Verify that GroupName is set from VirtualMachineGroup") + Expect(vmopVM.Spec.GroupName).To(Equal(fdClusterName)) + }) + + Context("For multiple machine deployments", func() { + const ( + otherMdName1 = "other-md-1" + otherMdName2 = "other-md-2" + ) + + BeforeEach(func() { + otherMd1 := createMachineDeployment(otherMdName1, corev1.NamespaceDefault, clusterName, "") + Expect(vmService.Client.Create(ctx, otherMd1)).To(Succeed()) + + otherMd2 := createMachineDeployment(otherMdName2, corev1.NamespaceDefault, clusterName, "") + Expect(vmService.Client.Create(ctx, otherMd2)).To(Succeed()) + + // Create a MachineDeployment with failure domain + otherMdWithFd := createMachineDeployment("other-md-with-fd", corev1.NamespaceDefault, clusterName, "zone-1") + Expect(vmService.Client.Create(ctx, otherMdWithFd)).To(Succeed()) + }) + + Specify("Reconcile valid machine with additional anti-affinity term added", func() { + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + + By("Verify that worker machine has affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) + + By("Verify VM affinity rules are set correctly") + verifyVMAffinityRules(vmopVM, machineDeploymentName) + + By("Verify VM anti-affinity rules are set correctly") + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName, otherMdName1, otherMdName2) + }) + }) + }) + + }) }) Context("Delete tests", func() { diff --git a/test/framework/vmoperator/vmoperator.go b/test/framework/vmoperator/vmoperator.go index c80ec76545..2c1e367b01 100644 --- a/test/framework/vmoperator/vmoperator.go +++ b/test/framework/vmoperator/vmoperator.go @@ -534,7 +534,7 @@ func ReconcileDependencies(ctx context.Context, c client.Client, dependenciesCon Namespace: config.Namespace, }, Spec: vmoprv1.VirtualMachineImageSpec{ - ProviderRef: vmoprv1common.LocalObjectRef{ + ProviderRef: &vmoprv1common.LocalObjectRef{ Kind: "ContentLibraryItem", }, }, diff --git a/test/go.mod b/test/go.mod index bcab8743c0..db1b6ea8b6 100644 --- a/test/go.mod +++ b/test/go.mod @@ -8,15 +8,15 @@ replace sigs.k8s.io/cluster-api/test => sigs.k8s.io/cluster-api/test v1.11.0-rc. replace sigs.k8s.io/cluster-api-provider-vsphere => ../ -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505 +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c -// The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-testsz -replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 +// The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests +replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.8.6 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c github.com/vmware/govmomi v0.52.0 ) diff --git a/test/go.sum b/test/go.sum index 8ac8dfd79b..e5e682ab61 100644 --- a/test/go.sum +++ b/test/go.sum @@ -360,8 +360,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw=