Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,6 @@ func main() {
Client: multiclusterClient,
Scheme: mgr.GetScheme(),
Monitor: monitor,
Conf: conf.GetConfigOrDie[openstack.OpenStackDatasourceReconcilerConfig](),
}).SetupWithManager(mgr, multiclusterClient); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "OpenStackDatasourceReconciler")
os.Exit(1)
Expand Down
6 changes: 3 additions & 3 deletions helm/bundles/cortex-cinder/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: v2
name: cortex-cinder
description: A Helm chart deploying Cortex for Cinder.
type: application
version: 0.0.52
version: 0.0.53
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex-postgres
Expand All @@ -16,12 +16,12 @@ dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40
alias: cortex-knowledge-controllers
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40
alias: cortex-scheduling-controllers

# Owner info adds a configmap to the kubernetes cluster with information on
Expand Down
4 changes: 2 additions & 2 deletions helm/bundles/cortex-crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ apiVersion: v2
name: cortex-crds
description: A Helm chart deploying Cortex CRDs.
type: application
version: 0.0.52
version: 0.0.53
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40

# Owner info adds a configmap to the kubernetes cluster with information on
# the service owner. This makes it easier to find out who to contact in case
Expand Down
4 changes: 2 additions & 2 deletions helm/bundles/cortex-ironcore/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ apiVersion: v2
name: cortex-ironcore
description: A Helm chart deploying Cortex for IronCore.
type: application
version: 0.0.52
version: 0.0.53
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40

# Owner info adds a configmap to the kubernetes cluster with information on
# the service owner. This makes it easier to find out who to contact in case
Expand Down
6 changes: 3 additions & 3 deletions helm/bundles/cortex-manila/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: v2
name: cortex-manila
description: A Helm chart deploying Cortex for Manila.
type: application
version: 0.0.52
version: 0.0.53
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex-postgres
Expand All @@ -16,12 +16,12 @@ dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40
alias: cortex-knowledge-controllers
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40
alias: cortex-scheduling-controllers

# Owner info adds a configmap to the kubernetes cluster with information on
Expand Down
6 changes: 3 additions & 3 deletions helm/bundles/cortex-nova/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: v2
name: cortex-nova
description: A Helm chart deploying Cortex for Nova.
type: application
version: 0.0.52
version: 0.0.53
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex-postgres
Expand All @@ -16,12 +16,12 @@ dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40
alias: cortex-knowledge-controllers
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40
alias: cortex-scheduling-controllers

# Owner info adds a configmap to the kubernetes cluster with information on
Expand Down
4 changes: 3 additions & 1 deletion helm/bundles/cortex-nova/alerts/nova.alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,9 @@ groups:
the datasource controller's workqueue overprioritizing other datasources.

- alert: CortexNovaExistingDatasourcesLackingBehind
expr: sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600
expr: |
sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600
and on(datasource) cortex_datasource_state{state="ready",domain="nova"} == 1
for: 10m
labels:
context: datasources
Expand Down
4 changes: 2 additions & 2 deletions helm/bundles/cortex-pods/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ apiVersion: v2
name: cortex-pods
description: A Helm chart deploying Cortex for Pods.
type: application
version: 0.0.52
version: 0.0.53
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.39
version: 0.0.40

# Owner info adds a configmap to the kubernetes cluster with information on
# the service owner. This makes it easier to find out who to contact in case
Expand Down
4 changes: 2 additions & 2 deletions helm/library/cortex/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: cortex
description: A Helm chart to distribute cortex.
type: application
version: 0.0.39
appVersion: "sha-f437366b"
version: 0.0.40
appVersion: "sha-be8840bc"
icon: "https://example.com/icon.png"
dependencies: []
29 changes: 25 additions & 4 deletions internal/knowledge/datasources/plugins/openstack/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/knowledge/datasources"
"github.com/cobaltcore-dev/cortex/internal/knowledge/db"
"github.com/cobaltcore-dev/cortex/pkg/conf"
"github.com/cobaltcore-dev/cortex/pkg/keystone"
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
"github.com/cobaltcore-dev/cortex/pkg/sso"
Expand All @@ -24,20 +25,25 @@ import (
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

type OpenStackDatasourceReconcilerConfig struct {
type config struct {
// The controller will only touch resources with this scheduling domain.
SchedulingDomain v1alpha1.SchedulingDomain `json:"schedulingDomain"`
// Secret ref to keystone credentials stored in a k8s secret.
KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef"`
// Secret ref to SSO credentials stored in a k8s secret, if applicable.
SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef"`
// The number of parallel reconciles to allow for the controller.
// By default, this will be set to 1.
ParallelReconciles *int `json:"openstackDatasourceControllerParallelReconciles,omitempty"`
}

type Syncer interface {
Expand All @@ -54,8 +60,9 @@ type OpenStackDatasourceReconciler struct {
Scheme *runtime.Scheme
// Datasources monitor.
Monitor datasources.Monitor

// Config for the reconciler.
Conf OpenStackDatasourceReconcilerConfig
conf config
}

// Reconcile is part of the main kubernetes reconciliation loop which aims to
Expand Down Expand Up @@ -281,16 +288,21 @@ func predicateIgnoreStatusConditions() predicate.Predicate {
}

func (r *OpenStackDatasourceReconciler) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
var err error
r.conf, err = conf.GetConfig[config]()
if err != nil {
return err
}
bldr := multicluster.BuildController(mcl, mgr)
// Watch datasource changes across all clusters.
bldr, err := bldr.WatchesMulticluster(
bldr, err = bldr.WatchesMulticluster(
&v1alpha1.Datasource{},
&handler.EnqueueRequestForObject{},
predicate.NewPredicateFuncs(func(obj client.Object) bool {
// Only react to datasources matching the operator.
ds := obj.(*v1alpha1.Datasource)
// Ignore all datasources outside our scheduling domain.
if ds.Spec.SchedulingDomain != r.Conf.SchedulingDomain {
if ds.Spec.SchedulingDomain != r.conf.SchedulingDomain {
return false
}
// Ignore all datasources that are not of type openstack.
Expand All @@ -305,5 +317,14 @@ func (r *OpenStackDatasourceReconciler) SetupWithManager(mgr manager.Manager, mc
return err
}
return bldr.Named("cortex-openstack-datasource").
WithOptions(controller.TypedOptions[reconcile.Request]{
// Allow parallel reconciles if configured, otherwise default to 1.
MaxConcurrentReconciles: func() int {
if r.conf.ParallelReconciles != nil {
return *r.conf.ParallelReconciles
}
return 1
}(),
}).
Complete(r)
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func TestOpenStackDatasourceReconciler_Creation(t *testing.T) {
Client: client,
Scheme: scheme,
Monitor: datasources.Monitor{},
Conf: OpenStackDatasourceReconcilerConfig{SchedulingDomain: "test-operator"},
conf: config{SchedulingDomain: "test-operator"},
}

if reconciler.Client == nil {
Expand All @@ -43,8 +43,8 @@ func TestOpenStackDatasourceReconciler_Creation(t *testing.T) {
t.Error("Scheme should not be nil")
}

if reconciler.Conf.SchedulingDomain != "test-operator" {
t.Errorf("Expected scheduling domain 'test-operator', got %s", reconciler.Conf.SchedulingDomain)
if reconciler.conf.SchedulingDomain != "test-operator" {
t.Errorf("Expected scheduling domain 'test-operator', got %s", reconciler.conf.SchedulingDomain)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ func (api *novaAPI) GetAllMigrations(ctx context.Context) ([]Migration, error) {
initialURL := api.sc.Endpoint + "os-migrations"
var nextURL = &initialURL
var migrations []Migration
seen := make(map[string]struct{})
seen := make(map[int]struct{})
for nextURL != nil {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, *nextURL, http.NoBody)
if err != nil {
Expand Down Expand Up @@ -354,11 +354,11 @@ func (api *novaAPI) GetAllMigrations(ctx context.Context) ([]Migration, error) {
return nil, err
}
for _, m := range list.Migrations {
if _, ok := seen[m.UUID]; ok {
slog.Warn("skipping duplicate migration", "uuid", m.UUID)
if _, ok := seen[m.ID]; ok {
slog.Warn("skipping duplicate migration", "id", m.ID)
continue
}
seen[m.UUID] = struct{}{}
seen[m.ID] = struct{}{}
migrations = append(migrations, m)
}
nextURL = nil
Expand Down
28 changes: 13 additions & 15 deletions internal/scheduling/cinder/external_scheduler_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R
// Exit early if the request method is not POST.
if r.Method != http.MethodPost {
internalErr := fmt.Errorf("invalid request method: %s", r.Method)
c.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method")
c.Respond(nil, http.StatusMethodNotAllowed, internalErr, "invalid request method")
return
}

Expand All @@ -103,7 +103,7 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R
// If configured, log out the complete request body.
body, err := io.ReadAll(r.Body)
if err != nil {
c.Respond(http.StatusInternalServerError, err, "failed to read request body")
c.Respond(nil, http.StatusInternalServerError, err, "failed to read request body")
return
}
raw := runtime.RawExtension{Raw: body}
Expand All @@ -112,17 +112,15 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R
cp := body
reader := bytes.NewReader(cp)
if err := json.NewDecoder(reader).Decode(&requestData); err != nil {
c.Respond(http.StatusBadRequest, err, "failed to decode request body")
c.Respond(nil, http.StatusBadRequest, err, "failed to decode request body")
return
}
slog.Info(
"handling POST request", "url", "/scheduler/cinder/external",
"hosts", len(requestData.Hosts), "spec", requestData.Spec,
)
logger := slog.With(requestData.GetTraceLogArgs())
logger.Info("handling POST request", "url", "/scheduler/cinder/external", "body", string(body))

if ok, reason := httpAPI.canRunScheduler(requestData); !ok {
internalErr := fmt.Errorf("cannot run scheduler: %s", reason)
c.Respond(http.StatusBadRequest, internalErr, reason)
c.Respond(logger, http.StatusBadRequest, internalErr, reason)
return
}

Expand All @@ -131,10 +129,10 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R
var err error
requestData.Pipeline, err = httpAPI.inferPipelineName(requestData)
if err != nil {
c.Respond(http.StatusBadRequest, err, err.Error())
c.Respond(logger, http.StatusBadRequest, err, err.Error())
return
}
slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline)
logger.Info("inferred pipeline name", "pipeline", requestData.Pipeline)
}

// Create the decision object in kubernetes.
Expand All @@ -154,24 +152,24 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R
}
ctx := r.Context()
if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil {
c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision")
c.Respond(logger, http.StatusInternalServerError, err, "failed to process scheduling decision")
return
}
// Check if the decision contains status conditions indicating an error.
if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) {
c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed")
c.Respond(logger, http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed")
return
}
if decision.Status.Result == nil {
c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed")
c.Respond(logger, http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed")
return
}
hosts := decision.Status.Result.OrderedHosts
response := api.ExternalSchedulerResponse{Hosts: hosts}
w.Header().Set("Content-Type", "application/json")
if err = json.NewEncoder(w).Encode(response); err != nil {
c.Respond(http.StatusInternalServerError, err, "failed to encode response")
c.Respond(logger, http.StatusInternalServerError, err, "failed to encode response")
return
}
c.Respond(http.StatusOK, nil, "Success")
c.Respond(logger, http.StatusOK, nil, "Success")
}
8 changes: 6 additions & 2 deletions internal/scheduling/lib/api_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (m *APIMonitor) Callback(w http.ResponseWriter, r *http.Request, pattern st

// Respond to the request with the given code and error.
// Also log the time it took to handle the request.
func (c MonitoredCallback) Respond(code int, err error, text string) {
func (c MonitoredCallback) Respond(logger *slog.Logger, code int, err error, text string) {
if c.apiMonitor != nil && c.apiMonitor.ApiRequestsTimer != nil {
observer := c.apiMonitor.ApiRequestsTimer.WithLabelValues(
c.r.Method,
Expand All @@ -64,7 +64,11 @@ func (c MonitoredCallback) Respond(code int, err error, text string) {
observer.Observe(time.Since(c.t).Seconds())
}
if err != nil {
slog.Error("failed to handle request", "error", err)
if logger == nil {
slog.Error("failed to handle request", "error", err)
} else {
logger.Error("failed to handle request", "error", err)
}
http.Error(c.w, text, code)
return
}
Expand Down
Loading