Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduling/pods"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments"
reservationscontroller "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/controller"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/failover"
"github.com/cobaltcore-dev/cortex/pkg/conf"
"github.com/cobaltcore-dev/cortex/pkg/monitoring"
Expand Down Expand Up @@ -487,18 +486,37 @@ func main() {
os.Exit(1)
}
}
if slices.Contains(mainConfig.EnabledControllers, "reservations-controller") {
setupLog.Info("enabling controller", "controller", "reservations-controller")
monitor := reservationscontroller.NewControllerMonitor(multiclusterClient)
if slices.Contains(mainConfig.EnabledControllers, "committed-resource-reservations-controller") {
setupLog.Info("enabling controller", "controller", "committed-resource-reservations-controller")
monitor := reservations.NewMonitor(multiclusterClient)
metrics.Registry.MustRegister(&monitor)
reservationsControllerConfig := conf.GetConfigOrDie[reservationscontroller.Config]()
commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
commitmentsDefaults := commitments.DefaultConfig()
if commitmentsConfig.RequeueIntervalActive == 0 {
commitmentsConfig.RequeueIntervalActive = commitmentsDefaults.RequeueIntervalActive
}
if commitmentsConfig.RequeueIntervalRetry == 0 {
commitmentsConfig.RequeueIntervalRetry = commitmentsDefaults.RequeueIntervalRetry
}
if commitmentsConfig.PipelineDefault == "" {
commitmentsConfig.PipelineDefault = commitmentsDefaults.PipelineDefault
}
if commitmentsConfig.SchedulerURL == "" {
commitmentsConfig.SchedulerURL = commitmentsDefaults.SchedulerURL
}
if commitmentsConfig.ChangeAPIWatchReservationsTimeout == 0 {
commitmentsConfig.ChangeAPIWatchReservationsTimeout = commitmentsDefaults.ChangeAPIWatchReservationsTimeout
}
if commitmentsConfig.ChangeAPIWatchReservationsPollInterval == 0 {
commitmentsConfig.ChangeAPIWatchReservationsPollInterval = commitmentsDefaults.ChangeAPIWatchReservationsPollInterval
}

if err := (&reservationscontroller.ReservationReconciler{
if err := (&commitments.CommitmentReservationController{
Client: multiclusterClient,
Scheme: mgr.GetScheme(),
Conf: reservationsControllerConfig,
Conf: commitmentsConfig,
}).SetupWithManager(mgr, multiclusterClient); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Reservation")
setupLog.Error(err, "unable to create controller", "controller", "CommitmentReservation")
os.Exit(1)
}
}
Expand Down Expand Up @@ -677,9 +695,13 @@ func main() {
setupLog.Info("starting commitments syncer")
syncer := commitments.NewSyncer(multiclusterClient)
syncerConfig := conf.GetConfigOrDie[commitments.SyncerConfig]()
syncerDefaults := commitments.DefaultSyncerConfig()
if syncerConfig.SyncInterval == 0 {
syncerConfig.SyncInterval = syncerDefaults.SyncInterval
}
if err := (&task.Runner{
Client: multiclusterClient,
Interval: time.Hour,
Interval: syncerConfig.SyncInterval,
Name: "commitments-sync-task",
Run: func(ctx context.Context) error { return syncer.SyncReservations(ctx) },
Init: func(ctx context.Context) error { return syncer.Init(ctx, syncerConfig) },
Expand Down
1 change: 1 addition & 0 deletions helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ spec:

This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor).
Specifically, this pipeline is used for general purpose workloads.
It is also used for (CR/HA) reservation requests.
type: filter-weigher
createDecisions: false
# Fetch all placement candidates, ignoring nova's preselection.
Expand Down
31 changes: 27 additions & 4 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,33 @@ cortex-scheduling-controllers:
- nova-deschedulings-executor
- hypervisor-overcommit-controller
- explanation-controller
- reservations-controller
- committed-resource-reservations-controller
- failover-reservations-controller
enabledTasks:
- nova-decisions-cleanup-task
# Endpoints configuration for reservations controller
endpoints:
novaExternalScheduler: "http://localhost:8080/scheduler/nova/external"
# CommittedResourceFlavorGroupPipelines maps flavor group IDs to pipeline names for CR reservations
# This allows different scheduling strategies per flavor group (e.g., HANA vs GP)
committedResourceFlavorGroupPipelines:
"2152": "kvm-hana-bin-packing-all-filters-enabled" # HANA flavor group
"2101": "kvm-general-purpose-load-balancing-all-filters-enabled" # General Purpose flavor group
"*": "kvm-general-purpose-load-balancing-all-filters-enabled" # Catch-all fallback
# Default pipeline for CR reservations when no CommittedResourceFlavorGroupPipelines entry matches
committedResourcePipelineDefault: "kvm-general-purpose-load-balancing-all-filters-enabled"
# How often to re-verify active reservations
# 5m = 300000000000 nanoseconds
committedResourceRequeueIntervalActive: 300000000000
# How often to retry when knowledge is not ready
# 1m = 60000000000 nanoseconds
committedResourceRequeueIntervalRetry: 60000000000
# Timeout for watching reservations to become ready before rolling back
# 10s = 10000000000 nanoseconds
committedResourceChangeAPIWatchReservationsTimeout: 10000000000
# How often to poll reservation status during watch
# 500ms = 500000000 nanoseconds
committedResourceChangeAPIWatchReservationsPollInterval: 500000000
# Whether the change-commitments API endpoint is active
# When false, the endpoint returns HTTP 503. The info endpoint remains available.
committedResourceEnableChangeCommitmentsAPI: true
# OvercommitMappings is a list of mappings that map hypervisor traits to
# overcommit ratios. Note that this list is applied in order, so if there
# are multiple mappings applying to the same hypervisors, the last mapping
Expand Down Expand Up @@ -189,6 +209,9 @@ cortex-knowledge-controllers:
- datasource-controllers
- knowledge-controllers
- kpis-controller
# How often the commitments syncer reconciles Limes commitments to Reservation CRDs
# 1h = 3600000000000 nanoseconds
committedResourceSyncInterval: 3600000000000
enabledTasks:
- commitments-sync-task

Expand Down
Loading
Loading