diff --git a/cmd/main.go b/cmd/main.go index f0923ab44..fc3c9b1b8 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -509,7 +509,6 @@ func main() { Client: multiclusterClient, Scheme: mgr.GetScheme(), Monitor: monitor, - Conf: conf.GetConfigOrDie[openstack.OpenStackDatasourceReconcilerConfig](), }).SetupWithManager(mgr, multiclusterClient); err != nil { setupLog.Error(err, "unable to create controller", "controller", "OpenStackDatasourceReconciler") os.Exit(1) diff --git a/helm/bundles/cortex-cinder/Chart.yaml b/helm/bundles/cortex-cinder/Chart.yaml index 5fc3cd067..f1e70ab3b 100644 --- a/helm/bundles/cortex-cinder/Chart.yaml +++ b/helm/bundles/cortex-cinder/Chart.yaml @@ -5,7 +5,7 @@ apiVersion: v2 name: cortex-cinder description: A Helm chart deploying Cortex for Cinder. type: application -version: 0.0.52 +version: 0.0.53 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres @@ -16,12 +16,12 @@ dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-crds/Chart.yaml b/helm/bundles/cortex-crds/Chart.yaml index d5d5c714f..ce69780d6 100644 --- a/helm/bundles/cortex-crds/Chart.yaml +++ b/helm/bundles/cortex-crds/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-crds description: A Helm chart deploying Cortex CRDs. type: application -version: 0.0.52 +version: 0.0.53 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/bundles/cortex-ironcore/Chart.yaml b/helm/bundles/cortex-ironcore/Chart.yaml index a516027c4..24d4e5610 100644 --- a/helm/bundles/cortex-ironcore/Chart.yaml +++ b/helm/bundles/cortex-ironcore/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-ironcore description: A Helm chart deploying Cortex for IronCore. type: application -version: 0.0.52 +version: 0.0.53 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/bundles/cortex-manila/Chart.yaml b/helm/bundles/cortex-manila/Chart.yaml index b755207a3..34f3728ee 100644 --- a/helm/bundles/cortex-manila/Chart.yaml +++ b/helm/bundles/cortex-manila/Chart.yaml @@ -5,7 +5,7 @@ apiVersion: v2 name: cortex-manila description: A Helm chart deploying Cortex for Manila. type: application -version: 0.0.52 +version: 0.0.53 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres @@ -16,12 +16,12 @@ dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-nova/Chart.yaml b/helm/bundles/cortex-nova/Chart.yaml index 3f403d729..097e45646 100644 --- a/helm/bundles/cortex-nova/Chart.yaml +++ b/helm/bundles/cortex-nova/Chart.yaml @@ -5,7 +5,7 @@ apiVersion: v2 name: cortex-nova description: A Helm chart deploying Cortex for Nova. type: application -version: 0.0.52 +version: 0.0.53 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres @@ -16,12 +16,12 @@ dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-nova/alerts/nova.alerts.yaml b/helm/bundles/cortex-nova/alerts/nova.alerts.yaml index 0fb9ec9b1..41bf29794 100644 --- a/helm/bundles/cortex-nova/alerts/nova.alerts.yaml +++ b/helm/bundles/cortex-nova/alerts/nova.alerts.yaml @@ -627,7 +627,9 @@ groups: the datasource controller's workqueue overprioritizing other datasources. - alert: CortexNovaExistingDatasourcesLackingBehind - expr: sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600 + expr: | + sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600 + and on(datasource) cortex_datasource_state{state="ready",domain="nova"} == 1 for: 10m labels: context: datasources diff --git a/helm/bundles/cortex-pods/Chart.yaml b/helm/bundles/cortex-pods/Chart.yaml index 3274766ff..77f33a31d 100644 --- a/helm/bundles/cortex-pods/Chart.yaml +++ b/helm/bundles/cortex-pods/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-pods description: A Helm chart deploying Cortex for Pods. type: application -version: 0.0.52 +version: 0.0.53 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.0.39 + version: 0.0.40 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/library/cortex/Chart.yaml b/helm/library/cortex/Chart.yaml index 3a44a3030..031a846e2 100644 --- a/helm/library/cortex/Chart.yaml +++ b/helm/library/cortex/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: cortex description: A Helm chart to distribute cortex. type: application -version: 0.0.39 -appVersion: "sha-f437366b" +version: 0.0.40 +appVersion: "sha-be8840bc" icon: "https://example.com/icon.png" dependencies: [] diff --git a/internal/knowledge/datasources/plugins/openstack/controller.go b/internal/knowledge/datasources/plugins/openstack/controller.go index f94cebd06..f259dd3fe 100644 --- a/internal/knowledge/datasources/plugins/openstack/controller.go +++ b/internal/knowledge/datasources/plugins/openstack/controller.go @@ -12,6 +12,7 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources" "github.com/cobaltcore-dev/cortex/internal/knowledge/db" + "github.com/cobaltcore-dev/cortex/pkg/conf" "github.com/cobaltcore-dev/cortex/pkg/keystone" "github.com/cobaltcore-dev/cortex/pkg/multicluster" "github.com/cobaltcore-dev/cortex/pkg/sso" @@ -24,20 +25,25 @@ import ( "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -type OpenStackDatasourceReconcilerConfig struct { +type config struct { // The controller will only touch resources with this scheduling domain. SchedulingDomain v1alpha1.SchedulingDomain `json:"schedulingDomain"` // Secret ref to keystone credentials stored in a k8s secret. KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef"` // Secret ref to SSO credentials stored in a k8s secret, if applicable. SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef"` + // The number of parallel reconciles to allow for the controller. + // By default, this will be set to 1. + ParallelReconciles *int `json:"openstackDatasourceControllerParallelReconciles,omitempty"` } type Syncer interface { @@ -54,8 +60,9 @@ type OpenStackDatasourceReconciler struct { Scheme *runtime.Scheme // Datasources monitor. Monitor datasources.Monitor + // Config for the reconciler. - Conf OpenStackDatasourceReconcilerConfig + conf config } // Reconcile is part of the main kubernetes reconciliation loop which aims to @@ -281,16 +288,21 @@ func predicateIgnoreStatusConditions() predicate.Predicate { } func (r *OpenStackDatasourceReconciler) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { + var err error + r.conf, err = conf.GetConfig[config]() + if err != nil { + return err + } bldr := multicluster.BuildController(mcl, mgr) // Watch datasource changes across all clusters. - bldr, err := bldr.WatchesMulticluster( + bldr, err = bldr.WatchesMulticluster( &v1alpha1.Datasource{}, &handler.EnqueueRequestForObject{}, predicate.NewPredicateFuncs(func(obj client.Object) bool { // Only react to datasources matching the operator. ds := obj.(*v1alpha1.Datasource) // Ignore all datasources outside our scheduling domain. - if ds.Spec.SchedulingDomain != r.Conf.SchedulingDomain { + if ds.Spec.SchedulingDomain != r.conf.SchedulingDomain { return false } // Ignore all datasources that are not of type openstack. @@ -305,5 +317,14 @@ func (r *OpenStackDatasourceReconciler) SetupWithManager(mgr manager.Manager, mc return err } return bldr.Named("cortex-openstack-datasource"). + WithOptions(controller.TypedOptions[reconcile.Request]{ + // Allow parallel reconciles if configured, otherwise default to 1. + MaxConcurrentReconciles: func() int { + if r.conf.ParallelReconciles != nil { + return *r.conf.ParallelReconciles + } + return 1 + }(), + }). Complete(r) } diff --git a/internal/knowledge/datasources/plugins/openstack/controller_test.go b/internal/knowledge/datasources/plugins/openstack/controller_test.go index c75778cbe..899e83237 100644 --- a/internal/knowledge/datasources/plugins/openstack/controller_test.go +++ b/internal/knowledge/datasources/plugins/openstack/controller_test.go @@ -32,7 +32,7 @@ func TestOpenStackDatasourceReconciler_Creation(t *testing.T) { Client: client, Scheme: scheme, Monitor: datasources.Monitor{}, - Conf: OpenStackDatasourceReconcilerConfig{SchedulingDomain: "test-operator"}, + conf: config{SchedulingDomain: "test-operator"}, } if reconciler.Client == nil { @@ -43,8 +43,8 @@ func TestOpenStackDatasourceReconciler_Creation(t *testing.T) { t.Error("Scheme should not be nil") } - if reconciler.Conf.SchedulingDomain != "test-operator" { - t.Errorf("Expected scheduling domain 'test-operator', got %s", reconciler.Conf.SchedulingDomain) + if reconciler.conf.SchedulingDomain != "test-operator" { + t.Errorf("Expected scheduling domain 'test-operator', got %s", reconciler.conf.SchedulingDomain) } } diff --git a/internal/knowledge/datasources/plugins/openstack/nova/nova_api.go b/internal/knowledge/datasources/plugins/openstack/nova/nova_api.go index 2f41c54a6..ca25868f2 100644 --- a/internal/knowledge/datasources/plugins/openstack/nova/nova_api.go +++ b/internal/knowledge/datasources/plugins/openstack/nova/nova_api.go @@ -325,7 +325,7 @@ func (api *novaAPI) GetAllMigrations(ctx context.Context) ([]Migration, error) { initialURL := api.sc.Endpoint + "os-migrations" var nextURL = &initialURL var migrations []Migration - seen := make(map[string]struct{}) + seen := make(map[int]struct{}) for nextURL != nil { req, err := http.NewRequestWithContext(ctx, http.MethodGet, *nextURL, http.NoBody) if err != nil { @@ -354,11 +354,11 @@ func (api *novaAPI) GetAllMigrations(ctx context.Context) ([]Migration, error) { return nil, err } for _, m := range list.Migrations { - if _, ok := seen[m.UUID]; ok { - slog.Warn("skipping duplicate migration", "uuid", m.UUID) + if _, ok := seen[m.ID]; ok { + slog.Warn("skipping duplicate migration", "id", m.ID) continue } - seen[m.UUID] = struct{}{} + seen[m.ID] = struct{}{} migrations = append(migrations, m) } nextURL = nil diff --git a/internal/scheduling/cinder/external_scheduler_api.go b/internal/scheduling/cinder/external_scheduler_api.go index 7169228f4..3f67bd080 100644 --- a/internal/scheduling/cinder/external_scheduler_api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -93,7 +93,7 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R // Exit early if the request method is not POST. if r.Method != http.MethodPost { internalErr := fmt.Errorf("invalid request method: %s", r.Method) - c.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method") + c.Respond(nil, http.StatusMethodNotAllowed, internalErr, "invalid request method") return } @@ -103,7 +103,7 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R // If configured, log out the complete request body. body, err := io.ReadAll(r.Body) if err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to read request body") + c.Respond(nil, http.StatusInternalServerError, err, "failed to read request body") return } raw := runtime.RawExtension{Raw: body} @@ -112,17 +112,15 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R cp := body reader := bytes.NewReader(cp) if err := json.NewDecoder(reader).Decode(&requestData); err != nil { - c.Respond(http.StatusBadRequest, err, "failed to decode request body") + c.Respond(nil, http.StatusBadRequest, err, "failed to decode request body") return } - slog.Info( - "handling POST request", "url", "/scheduler/cinder/external", - "hosts", len(requestData.Hosts), "spec", requestData.Spec, - ) + logger := slog.With(requestData.GetTraceLogArgs()) + logger.Info("handling POST request", "url", "/scheduler/cinder/external", "body", string(body)) if ok, reason := httpAPI.canRunScheduler(requestData); !ok { internalErr := fmt.Errorf("cannot run scheduler: %s", reason) - c.Respond(http.StatusBadRequest, internalErr, reason) + c.Respond(logger, http.StatusBadRequest, internalErr, reason) return } @@ -131,10 +129,10 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R var err error requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) if err != nil { - c.Respond(http.StatusBadRequest, err, err.Error()) + c.Respond(logger, http.StatusBadRequest, err, err.Error()) return } - slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) + logger.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } // Create the decision object in kubernetes. @@ -154,24 +152,24 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R } ctx := r.Context() if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") + c.Respond(logger, http.StatusInternalServerError, err, "failed to process scheduling decision") return } // Check if the decision contains status conditions indicating an error. if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + c.Respond(logger, http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") return } if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + c.Respond(logger, http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") return } hosts := decision.Status.Result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to encode response") + c.Respond(logger, http.StatusInternalServerError, err, "failed to encode response") return } - c.Respond(http.StatusOK, nil, "Success") + c.Respond(logger, http.StatusOK, nil, "Success") } diff --git a/internal/scheduling/lib/api_monitor.go b/internal/scheduling/lib/api_monitor.go index 6f03e08e6..349ea118c 100644 --- a/internal/scheduling/lib/api_monitor.go +++ b/internal/scheduling/lib/api_monitor.go @@ -53,7 +53,7 @@ func (m *APIMonitor) Callback(w http.ResponseWriter, r *http.Request, pattern st // Respond to the request with the given code and error. // Also log the time it took to handle the request. -func (c MonitoredCallback) Respond(code int, err error, text string) { +func (c MonitoredCallback) Respond(logger *slog.Logger, code int, err error, text string) { if c.apiMonitor != nil && c.apiMonitor.ApiRequestsTimer != nil { observer := c.apiMonitor.ApiRequestsTimer.WithLabelValues( c.r.Method, @@ -64,7 +64,11 @@ func (c MonitoredCallback) Respond(code int, err error, text string) { observer.Observe(time.Since(c.t).Seconds()) } if err != nil { - slog.Error("failed to handle request", "error", err) + if logger == nil { + slog.Error("failed to handle request", "error", err) + } else { + logger.Error("failed to handle request", "error", err) + } http.Error(c.w, text, code) return } diff --git a/internal/scheduling/lib/api_monitor_test.go b/internal/scheduling/lib/api_monitor_test.go index 173cf74b7..5686f5396 100644 --- a/internal/scheduling/lib/api_monitor_test.go +++ b/internal/scheduling/lib/api_monitor_test.go @@ -146,7 +146,7 @@ func TestMonitoredCallback_Respond_WithoutError(t *testing.T) { // Add a small delay to ensure measurable duration time.Sleep(1 * time.Millisecond) - callback.Respond(tt.code, nil, tt.text) + callback.Respond(nil, tt.code, nil, tt.text) // When there's no error, Respond doesn't set the HTTP status code // It only records metrics with the provided code @@ -265,7 +265,7 @@ func TestMonitoredCallback_Respond_WithError(t *testing.T) { // Add a small delay to ensure measurable duration time.Sleep(1 * time.Millisecond) - callback.Respond(tt.code, tt.err, tt.text) + callback.Respond(nil, tt.code, tt.err, tt.text) // Verify HTTP response if w.Code != tt.code { @@ -341,14 +341,14 @@ func TestMonitoredCallback_Respond_NilMonitor(t *testing.T) { } // Should not panic even with nil monitor - callback.Respond(200, nil, "") + callback.Respond(nil, 200, nil, "") // Test with nil ApiRequestsTimer monitor := &APIMonitor{ApiRequestsTimer: nil} callback.apiMonitor = monitor // Should not panic even with nil timer - callback.Respond(200, nil, "") + callback.Respond(nil, 200, nil, "") } func TestMonitoredCallback_TimeMeasurement(t *testing.T) { @@ -375,7 +375,7 @@ func TestMonitoredCallback_TimeMeasurement(t *testing.T) { sleepDuration := 10 * time.Millisecond time.Sleep(sleepDuration) - callback.Respond(200, nil, "") + callback.Respond(nil, 200, nil, "") // Verify the metric was recorded by gathering metrics metricFamily, err := registry.Gather() @@ -425,7 +425,7 @@ func TestMonitoredCallback_HTTPMethods(t *testing.T) { w := httptest.NewRecorder() callback := monitor.Callback(w, req, "/test") - callback.Respond(200, nil, "") + callback.Respond(nil, 200, nil, "") // Verify the method label is recorded correctly metricFamily, err := registry.Gather() diff --git a/internal/scheduling/manila/external_scheduler_api.go b/internal/scheduling/manila/external_scheduler_api.go index 64e96c0bd..2ab39b5d4 100644 --- a/internal/scheduling/manila/external_scheduler_api.go +++ b/internal/scheduling/manila/external_scheduler_api.go @@ -93,7 +93,7 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R // Exit early if the request method is not POST. if r.Method != http.MethodPost { internalErr := fmt.Errorf("invalid request method: %s", r.Method) - c.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method") + c.Respond(nil, http.StatusMethodNotAllowed, internalErr, "invalid request method") return } @@ -103,7 +103,7 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R // If configured, log out the complete request body. body, err := io.ReadAll(r.Body) if err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to read request body") + c.Respond(nil, http.StatusInternalServerError, err, "failed to read request body") return } raw := runtime.RawExtension{Raw: body} @@ -112,17 +112,15 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R cp := body reader := bytes.NewReader(cp) if err := json.NewDecoder(reader).Decode(&requestData); err != nil { - c.Respond(http.StatusBadRequest, err, "failed to decode request body") + c.Respond(nil, http.StatusBadRequest, err, "failed to decode request body") return } - slog.Info( - "handling POST request", "url", "/scheduler/manila/external", - "hosts", len(requestData.Hosts), "spec", requestData.Spec, - ) + logger := slog.With(requestData.GetTraceLogArgs()) + logger.Info("handling POST request", "url", "/scheduler/manila/external", "body", string(body)) if ok, reason := httpAPI.canRunScheduler(requestData); !ok { internalErr := fmt.Errorf("cannot run scheduler: %s", reason) - c.Respond(http.StatusBadRequest, internalErr, reason) + c.Respond(logger, http.StatusBadRequest, internalErr, reason) return } @@ -131,10 +129,10 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R var err error requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) if err != nil { - c.Respond(http.StatusBadRequest, err, err.Error()) + c.Respond(logger, http.StatusBadRequest, err, err.Error()) return } - slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) + logger.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } // Create the decision object in kubernetes. @@ -154,24 +152,24 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R } ctx := r.Context() if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") + c.Respond(logger, http.StatusInternalServerError, err, "failed to process scheduling decision") return } // Check if the decision contains status conditions indicating an error. if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + c.Respond(logger, http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") return } if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + c.Respond(logger, http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") return } hosts := decision.Status.Result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to encode response") + c.Respond(logger, http.StatusInternalServerError, err, "failed to encode response") return } - c.Respond(http.StatusOK, nil, "Success") + c.Respond(logger, http.StatusOK, nil, "Success") } diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index d731f8ffa..8179edb12 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -178,7 +178,7 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req // Exit early if the request method is not POST. if r.Method != http.MethodPost { internalErr := fmt.Errorf("invalid request method: %s", r.Method) - c.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method") + c.Respond(nil, http.StatusMethodNotAllowed, internalErr, "invalid request method") return } @@ -188,7 +188,7 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req // If configured, log out the complete request body. body, err := io.ReadAll(r.Body) if err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to read request body") + c.Respond(nil, http.StatusInternalServerError, err, "failed to read request body") return } raw := runtime.RawExtension{Raw: body} @@ -197,17 +197,15 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req cp := body reader := bytes.NewReader(cp) if err := json.NewDecoder(reader).Decode(&requestData); err != nil { - c.Respond(http.StatusBadRequest, err, "failed to decode request body") + c.Respond(nil, http.StatusBadRequest, err, "failed to decode request body") return } - slog.Info( - "handling POST request", "url", "/scheduler/nova/external", - "hosts", len(requestData.Hosts), "spec", requestData.Spec, - ) + logger := slog.With(requestData.GetTraceLogArgs()) + logger.Info("handling POST request", "url", "/scheduler/nova/external", "body", string(body)) if ok, reason := httpAPI.canRunScheduler(requestData); !ok { internalErr := fmt.Errorf("cannot run scheduler: %s", reason) - c.Respond(http.StatusBadRequest, internalErr, reason) + c.Respond(logger, http.StatusBadRequest, internalErr, reason) return } @@ -216,10 +214,10 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req var err error requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) if err != nil { - c.Respond(http.StatusBadRequest, err, err.Error()) + c.Respond(logger, http.StatusBadRequest, err, err.Error()) return } - slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) + logger.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } decision := &v1alpha1.Decision{ @@ -242,22 +240,22 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req } ctx := r.Context() if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") + c.Respond(logger, http.StatusInternalServerError, err, "failed to process scheduling decision") return } // Check if the decision contains status conditions indicating an error. if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + c.Respond(logger, http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") return } if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + c.Respond(logger, http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") return } hosts := decision.Status.Result.OrderedHosts if httpAPI.config.NovaLimitHostsToRequest { hosts = limitHostsToRequest(requestData, hosts) - slog.Info("limited hosts to request", + logger.Info("limited hosts to request", "hosts", hosts, "originalHosts", decision.Status.Result.OrderedHosts) } // This is a hack to address the problem that Nova only uses the first host in hosts for evacuation requests. @@ -269,8 +267,8 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to encode response") + c.Respond(logger, http.StatusInternalServerError, err, "failed to encode response") return } - c.Respond(http.StatusOK, nil, "Success") + c.Respond(logger, http.StatusOK, nil, "Success") }