Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions internal/prometheus/auth.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package prometheus

import (
"net/http"
"sync/atomic"
)

// AuthGate is the per-request resource read check used by handlers that read
// K8s spec data via the shared informer cache. The cache is populated using
// Radar's service-account permissions, so without this gate any authenticated
// user could fetch any namespace's spec by guessing names. Server.canRead is
// the concrete implementation; passing it via SetAuthGate avoids an import
// cycle (server imports prometheus, not the other way around).
//
// Implementations should return true when auth is disabled or the user can
// read the resource; false to refuse with 403.
type AuthGate func(r *http.Request, group, resource, namespace, verb string) bool

var authGate atomic.Pointer[AuthGate]

// SetAuthGate installs the request-scoped authorization check. Pass nil to
// disable gating (only appropriate for tests).
func SetAuthGate(fn AuthGate) {
if fn == nil {
authGate.Store(nil)
return
}
authGate.Store(&fn)
}

// canRead consults the installed AuthGate. Returns true when no gate is
// installed (e.g. tests, transitional state during init) so the gate is
// strictly additive — never accidentally locks out the OSS no-auth path.
func canRead(r *http.Request, group, resource, namespace, verb string) bool {
g := authGate.Load()
if g == nil {
return true
}
return (*g)(r, group, resource, namespace, verb)
}
2 changes: 2 additions & 0 deletions internal/prometheus/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ func RegisterRoutes(r chi.Router) {
r.Get("/prometheus/namespace/{namespace}", handleNamespaceMetrics)
r.Get("/prometheus/cluster", handleClusterMetrics)
r.Get("/prometheus/query", handleRawQuery)
r.Get("/prometheus/pvc/{namespace}/{name}", handlePVCUsage)
r.Get("/prometheus/rightsizing/{kind}/{namespace}/{name}", handleRightsizing)
}

func writeJSON(w http.ResponseWriter, status int, v interface{}) {
Expand Down
22 changes: 21 additions & 1 deletion internal/prometheus/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@ const (
CategoryNetworkRX MetricCategory = "network_rx"
CategoryNetworkTX MetricCategory = "network_tx"
CategoryFilesystem MetricCategory = "filesystem"
// CategoryRestarts is sourced from KSM and represents the rate-of-change
// of container restart counters; gracefully degrades when KSM isn't scraped.
CategoryRestarts MetricCategory = "restarts"
)

// AllCategories returns all metric categories in display order.
func AllCategories() []MetricCategory {
return []MetricCategory{CategoryCPU, CategoryMemory, CategoryNetworkRX, CategoryNetworkTX, CategoryFilesystem}
return []MetricCategory{CategoryCPU, CategoryMemory, CategoryNetworkRX, CategoryNetworkTX, CategoryFilesystem, CategoryRestarts}
}

// CategoryLabel returns a human-readable label for a metric category.
Expand All @@ -52,6 +55,8 @@ func CategoryLabel(cat MetricCategory) string {
return "Network Transmitted"
case CategoryFilesystem:
return "Filesystem"
case CategoryRestarts:
return "Restarts"
default:
return string(cat)
}
Expand All @@ -70,6 +75,8 @@ func CategoryUnit(cat MetricCategory) string {
return "bytes/s"
case CategoryFilesystem:
return "bytes/s"
case CategoryRestarts:
return "count"
default:
return ""
}
Expand Down Expand Up @@ -97,6 +104,8 @@ func SupportedKinds() []string {
func CategoriesForKind(kind string) []MetricCategory {
switch strings.ToLower(kind) {
case "node":
// Nodes have neither workload restart semantics nor the network/filesystem
// container metrics — node-exporter covers them separately on the Node page.
return []MetricCategory{CategoryCPU, CategoryMemory, CategoryFilesystem}
default:
return AllCategories()
Expand Down Expand Up @@ -205,6 +214,13 @@ func buildPodQuery(namespace, podName string, category MetricCategory, filterCon
}

switch category {
case CategoryRestarts:
// changes() over a 1h window gives the count of restarts during that window;
// using a long window keeps the chart legible (most pods never restart).
// Sums across containers so a multi-container pod surfaces one line per pod.
return fmt.Sprintf(
`sum by (pod,namespace) (changes(kube_pod_container_status_restarts_total{namespace='%s',pod='%s'}[1h]))`,
ns, pod)
case CategoryCPU:
return fmt.Sprintf(
`sum(rate(container_cpu_usage_seconds_total{%snamespace='%s',pod='%s'}[5m])) by (pod,namespace)`,
Expand Down Expand Up @@ -240,6 +256,10 @@ func buildWorkloadQuery(namespace, workloadName string, category MetricCategory,
}

switch category {
case CategoryRestarts:
return fmt.Sprintf(
`sum by (pod,namespace) (changes(kube_pod_container_status_restarts_total{namespace='%s',pod=~'%s'}[1h]))`,
ns, podPattern)
case CategoryCPU:
return fmt.Sprintf(
`sum(rate(container_cpu_usage_seconds_total{%snamespace='%s',pod=~'%s'}[5m])) by (pod,namespace)`,
Expand Down
Loading
Loading