cobaltcore-dev · mblos · Mar 24, 2026 · Mar 23, 2026 · Mar 24, 2026 · Mar 24, 2026
@@ -324,10 +324,6 @@ func main() {
 		}
 		nova.NewAPI(filterWeigherController).Init(mux)
 
-		// Initialize commitments API for LIQUID interface
-		commitmentsAPI := commitments.NewAPI(multiclusterClient)
-		commitmentsAPI.Init(mux, metrics.Registry)
-
 		// Detector pipeline controller setup.
 		novaClient := nova.NewNovaClient()
 		novaClientConfig := conf.GetConfigOrDie[nova.NovaClientConfig]()
@@ -337,6 +333,12 @@ func main() {
 			setupLog.Error(err, "unable to initialize nova client")
 			os.Exit(1)
 		}
+
+		// Initialize commitments API for LIQUID interface (with Nova client for usage reporting)
+		commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
+		commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, novaClient)
+		commitmentsAPI.Init(mux, metrics.Registry)
+
 		deschedulingsController := &nova.DetectorPipelineController{
 			Monitor: detectorPipelineMonitor,
 			Breaker: &nova.DetectorCycleBreaker{NovaClient: novaClient},

@@ -368,3 +368,100 @@ groups:
         to be scheduled. Affected commitment changes are rolled back and Limes
         will see them as failed. Consider investigating the scheduler performance
         or increasing the timeout configuration.
+
+  # Committed Resource Usage API Alerts
+  - alert: CortexNovaCommittedResourceUsageHttpRequest400sTooHigh
+    expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
+    for: 5m
+    labels:
+      context: committed-resource-api
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Committed Resource usage API HTTP 400 errors too high"
+      description: >
+        The committed resource usage API (Limes LIQUID integration) is responding
+        with HTTP 4xx errors. This may indicate invalid project IDs or malformed
+        requests from Limes. Limes will typically retry these requests.
+
+  - alert: CortexNovaCommittedResourceUsageHttpRequest500sTooHigh
+    expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
+    for: 5m
+    labels:
+      context: committed-resource-api
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Committed Resource usage API HTTP 500 errors too high"
+      description: >
+        The committed resource usage API (Limes LIQUID integration) is responding
+        with HTTP 5xx errors. This indicates internal problems fetching reservations
+        or Nova server data. Limes may receive stale or incomplete usage data.
+
+  - alert: CortexNovaCommittedResourceUsageLatencyTooHigh
+    expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_usage_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 5
+    for: 5m
+    labels:
+      context: committed-resource-api
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Committed Resource usage API latency too high"
+      description: >
+        The committed resource usage API (Limes LIQUID integration) is experiencing
+        high latency (p95 > 5s). This may indicate slow Nova API responses or
+        database queries. Limes scrapes may time out, affecting quota reporting.
+
+  # Committed Resource Capacity API Alerts
+  - alert: CortexNovaCommittedResourceCapacityHttpRequest400sTooHigh
+    expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
+    for: 5m
+    labels:
+      context: committed-resource-api
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Committed Resource capacity API HTTP 400 errors too high"
+      description: >
+        The committed resource capacity API (Limes LIQUID integration) is responding
+        with HTTP 4xx errors. This may indicate malformed requests from Limes.
+
+  - alert: CortexNovaCommittedResourceCapacityHttpRequest500sTooHigh
+    expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
+    for: 5m
+    labels:
+      context: committed-resource-api
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Committed Resource capacity API HTTP 500 errors too high"
+      description: >
+        The committed resource capacity API (Limes LIQUID integration) is responding
+        with HTTP 5xx errors. This indicates internal problems calculating cluster
+        capacity. Limes may receive stale or incomplete capacity data.
+
+  - alert: CortexNovaCommittedResourceCapacityLatencyTooHigh
+    expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_capacity_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 5
+    for: 5m
+    labels:
+      context: committed-resource-api
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Committed Resource capacity API latency too high"
+      description: >
+        The committed resource capacity API (Limes LIQUID integration) is experiencing
+        high latency (p95 > 5s). This may indicate slow database queries or knowledge
+        CRD retrieval. Limes scrapes may time out, affecting capacity reporting.
@@ -72,6 +72,10 @@ func (m *mockExecutorNovaClient) GetServerMigrations(ctx context.Context, id str
 	return []migration{}, nil
 }
 
+func (m *mockExecutorNovaClient) ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error) {
+	return []ServerDetail{}, nil
+}
+
 func TestExecutor_Reconcile(t *testing.T) {
 	scheme := runtime.NewScheme()
 	err := v1alpha1.AddToScheme(scheme)

@@ -39,6 +39,10 @@ func (m *mockDetectorCycleBreakerNovaClient) GetServerMigrations(ctx context.Con
 	return []migration{}, nil
 }
 
+func (m *mockDetectorCycleBreakerNovaClient) ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error) {
+	return []ServerDetail{}, nil
+}
+
 func TestDetectorCycleBreaker_Filter(t *testing.T) {
 	tests := []struct {
 		name       string

@@ -38,6 +38,21 @@ type migration struct {
 	DestCompute   string `json:"dest_compute"`
 }
 
+// ServerDetail contains extended server information for usage reporting.
+type ServerDetail struct {
+	ID               string `json:"id"`
+	Name             string `json:"name"`
+	Status           string `json:"status"`
+	TenantID         string `json:"tenant_id"`
+	Created          string `json:"created"`
+	AvailabilityZone string `json:"OS-EXT-AZ:availability_zone"`
+	Hypervisor       string `json:"OS-EXT-SRV-ATTR:hypervisor_hostname"`
+	FlavorName       string // Populated from nested flavor.original_name
+	FlavorRAM        uint64 // Populated from nested flavor.ram
+	FlavorVCPUs      uint64 // Populated from nested flavor.vcpus
+	FlavorDisk       uint64 // Populated from nested flavor.disk
+}
+
 type NovaClient interface {
 	// Initialize the Nova API with the Keystone authentication.
 	Init(ctx context.Context, client client.Client, conf NovaClientConfig) error
@@ -47,6 +62,8 @@ type NovaClient interface {
 	LiveMigrate(ctx context.Context, id string) error
 	// Get migrations for a server by ID.
 	GetServerMigrations(ctx context.Context, id string) ([]migration, error)
+	// List all servers for a project with detailed info.
+	ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error)
 }
 
 type novaClient struct {
@@ -160,3 +177,86 @@ func (api *novaClient) GetServerMigrations(ctx context.Context, id string) ([]mi
 	slog.Info("fetched migrations for vm", "id", id, "count", len(migrations))
 	return migrations, nil
 }
+
+// ListProjectServers retrieves all servers for a project with detailed info.
+func (api *novaClient) ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error) {
+	// Build URL with pagination support
+	initialURL := api.sc.Endpoint + "servers/detail?all_tenants=true&tenant_id=" + projectID
+	var nextURL = &initialURL
+	var result []ServerDetail
+
+	for nextURL != nil {
+		req, err := http.NewRequestWithContext(ctx, http.MethodGet, *nextURL, http.NoBody)
+		if err != nil {
+			return nil, err
+		}
+		req.Header.Set("X-Auth-Token", api.sc.Token())
+		req.Header.Set("X-OpenStack-Nova-API-Version", api.sc.Microversion)
+
+		resp, err := api.sc.HTTPClient.Do(req)
+		if err != nil {
+			return nil, err
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+		}
+
+		// Response structure with nested flavor
+		var list struct {
+			Servers []struct {
+				ID               string `json:"id"`
+				Name             string `json:"name"`
+				Status           string `json:"status"`
+				TenantID         string `json:"tenant_id"`
+				Created          string `json:"created"`
+				AvailabilityZone string `json:"OS-EXT-AZ:availability_zone"`
+				Hypervisor       string `json:"OS-EXT-SRV-ATTR:hypervisor_hostname"`
+				Flavor           struct {
+					OriginalName string `json:"original_name"`
+					RAM          uint64 `json:"ram"`
+					VCPUs        uint64 `json:"vcpus"`
+					Disk         uint64 `json:"disk"`
+				} `json:"flavor"`
+			} `json:"servers"`
+			Links []struct {
+				Rel  string `json:"rel"`
+				Href string `json:"href"`
+			} `json:"servers_links"`
+		}
+
+		if err := json.NewDecoder(resp.Body).Decode(&list); err != nil {
+			return nil, err
+		}
+
+		// Convert to ServerDetail
+		for _, s := range list.Servers {
+			result = append(result, ServerDetail{
+				ID:               s.ID,
+				Name:             s.Name,
+				Status:           s.Status,
+				TenantID:         s.TenantID,
+				Created:          s.Created,
+				AvailabilityZone: s.AvailabilityZone,
+				Hypervisor:       s.Hypervisor,
+				FlavorName:       s.Flavor.OriginalName,
+				FlavorRAM:        s.Flavor.RAM,
+				FlavorVCPUs:      s.Flavor.VCPUs,
+				FlavorDisk:       s.Flavor.Disk,
+			})
+		}
+
+		// Check for next page
+		nextURL = nil
+		for _, link := range list.Links {
+			if link.Rel == "next" {
+				nextURL = &link.Href
+				break
+			}
+		}
+	}
+
+	slog.Info("fetched servers for project", "projectID", projectID, "count", len(result))
+	return result, nil
+}
@@ -4,37 +4,54 @@
 package commitments
 
 import (
+	"context"
 	"net/http"
 	"sync"
 
+	"github.com/cobaltcore-dev/cortex/internal/scheduling/nova"
 	"github.com/prometheus/client_golang/prometheus"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
+// UsageNovaClient is a minimal interface for the Nova client needed by the usage API.
+// This allows for easy mocking in tests without implementing the full NovaClient interface.
+type UsageNovaClient interface {
+	ListProjectServers(ctx context.Context, projectID string) ([]nova.ServerDetail, error)
+}
+
 // HTTPAPI implements Limes LIQUID commitment validation endpoints.
 type HTTPAPI struct {
-	client  client.Client
-	config  Config
-	monitor ChangeCommitmentsAPIMonitor
+	client          client.Client
+	config          Config
+	novaClient      UsageNovaClient
+	monitor         ChangeCommitmentsAPIMonitor
+	usageMonitor    ReportUsageAPIMonitor
+	capacityMonitor ReportCapacityAPIMonitor
 	// Mutex to serialize change-commitments requests
 	changeMutex sync.Mutex
 }
 
 func NewAPI(client client.Client) *HTTPAPI {
-	return NewAPIWithConfig(client, DefaultConfig())
+	return NewAPIWithConfig(client, DefaultConfig(), nil)
 }
 
-func NewAPIWithConfig(client client.Client, config Config) *HTTPAPI {
+func NewAPIWithConfig(client client.Client, config Config, novaClient UsageNovaClient) *HTTPAPI {
 	return &HTTPAPI{
-		client:  client,
-		config:  config,
-		monitor: NewChangeCommitmentsAPIMonitor(),
+		client:          client,
+		config:          config,
+		novaClient:      novaClient,
+		monitor:         NewChangeCommitmentsAPIMonitor(),
+		usageMonitor:    NewReportUsageAPIMonitor(),
+		capacityMonitor: NewReportCapacityAPIMonitor(),
 	}
 }
 
 func (api *HTTPAPI) Init(mux *http.ServeMux, registry prometheus.Registerer) {
 	registry.MustRegister(&api.monitor)
+	registry.MustRegister(&api.usageMonitor)
+	registry.MustRegister(&api.capacityMonitor)
 	mux.HandleFunc("/v1/commitments/change-commitments", api.HandleChangeCommitments)
 	mux.HandleFunc("/v1/commitments/report-capacity", api.HandleReportCapacity)
 	mux.HandleFunc("/v1/commitments/info", api.HandleInfo)
+	mux.HandleFunc("/v1/commitments/projects/", api.HandleReportUsage) // matches /v1/commitments/projects/:project_id/report-usage
 }
@@ -991,7 +991,7 @@ func newCommitmentTestEnv(
 	// Use custom config if provided, otherwise use default
 	var api *HTTPAPI
 	if customConfig != nil {
-		api = NewAPIWithConfig(wrappedClient, *customConfig)
+		api = NewAPIWithConfig(wrappedClient, *customConfig, nil)
 	} else {
 		api = NewAPI(wrappedClient)
 	}

@@ -72,7 +72,7 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l
 	// Build resources map
 	resources := make(map[liquid.ResourceName]liquid.ResourceInfo)
 	for groupName, groupData := range flavorGroups {
-		resourceName := liquid.ResourceName("ram_" + groupName)
+		resourceName := liquid.ResourceName(commitmentResourceNamePrefix + groupName)
 
 		flavorNames := make([]string, 0, len(groupData.Flavors))
 		for _, flavor := range groupData.Flavors {