diff --git a/internal/opencost/handlers.go b/internal/opencost/handlers.go
index 38a1d98e9..c6ec8c8f2 100644
--- a/internal/opencost/handlers.go
+++ b/internal/opencost/handlers.go
@@ -3,16 +3,14 @@ package opencost
 import (
 	"encoding/json"
 	"log"
-	"math"
 	"net/http"
-	"sort"
 	"strings"
-	"time"
 
 	"github.com/go-chi/chi/v5"
 
 	"github.com/skyhook-io/radar/internal/k8s"
 	prometheuspkg "github.com/skyhook-io/radar/internal/prometheus"
+	pkgopencost "github.com/skyhook-io/radar/pkg/opencost"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 )
@@ -29,217 +27,16 @@ func RegisterRoutes(r chi.Router) {
 func handleSummary(w http.ResponseWriter, r *http.Request) {
 	client := prometheuspkg.GetClient()
 	if client == nil {
-		writeJSON(w, http.StatusOK, CostSummary{Available: false, Reason: ReasonNoPrometheus})
+		writeJSON(w, http.StatusOK, pkgopencost.CostSummary{Available: false, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	// Check if Prometheus is reachable (triggers discovery if needed)
-	_, _, err := client.EnsureConnected(r.Context())
-	if err != nil {
+	if _, _, err := client.EnsureConnected(r.Context()); err != nil {
 		log.Printf("[opencost] EnsureConnected failed (summary): %v", err)
-		writeJSON(w, http.StatusOK, CostSummary{Available: false, Reason: ReasonNoPrometheus})
-		return
-	}
-
-	// Query per-namespace CPU cost
-	// container_cpu_allocation is a gauge (current allocated cores), not a counter — use avg_over_time.
-	// label_replace handles honor_labels=false setups where Prometheus renames the original
-	// namespace label to exported_namespace and sets namespace to the scrape target's namespace.
-	cpuResult, err := client.Query(r.Context(),
-		`sum by (namespace) (label_replace(avg_over_time(container_cpu_allocation{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") * on(node) group_left() node_cpu_hourly_cost)`)
-	if err != nil {
-		// Try the opencost_container metric name variant (this IS a counter, so rate is correct)
-		cpuResult, err = client.Query(r.Context(),
-			`sum by (namespace) (label_replace(rate(opencost_container_cpu_cost_total[1h]), "namespace", "$1", "exported_namespace", "(.+)"))`)
-		if err != nil {
-			log.Printf("[opencost] CPU cost query failed: %v", err)
-			writeJSON(w, http.StatusOK, CostSummary{Available: false, Reason: ReasonQueryError})
-			return
-		}
-	}
-
-	// Query per-namespace memory cost
-	// container_memory_allocation_bytes is a gauge — use avg_over_time
-	memResult, err := client.Query(r.Context(),
-		`sum by (namespace) (label_replace(avg_over_time(container_memory_allocation_bytes{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") / 1073741824 * on(node) group_left() node_ram_hourly_cost)`)
-	if err != nil {
-		// Try the opencost_container metric name variant (this IS a counter, so rate is correct)
-		memResult, err = client.Query(r.Context(),
-			`sum by (namespace) (label_replace(rate(opencost_container_memory_cost_total[1h]), "namespace", "$1", "exported_namespace", "(.+)"))`)
-		if err != nil {
-			log.Printf("[opencost] Memory cost query failed: %v", err)
-			writeJSON(w, http.StatusOK, CostSummary{Available: false, Reason: ReasonQueryError})
-			return
-		}
-	}
-
-	// If both queries returned empty results, OpenCost metrics aren't available
-	if len(cpuResult.Series) == 0 && len(memResult.Series) == 0 {
-		writeJSON(w, http.StatusOK, CostSummary{Available: false, Reason: ReasonNoMetrics})
+		writeJSON(w, http.StatusOK, pkgopencost.CostSummary{Available: false, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	// Query actual CPU usage cost (for efficiency calculation)
-	// cAdvisor metrics use "instance" for the node hostname, while OpenCost uses "node",
-	// so we label_replace to bridge the join.
-	cpuUsageMap := make(map[string]float64)
-	cpuUsageResult, err := client.Query(r.Context(),
-		`sum by (namespace) (label_replace(rate(container_cpu_usage_seconds_total{container!="", namespace!=""}[1h]), "node", "$1", "instance", "(.+?)(?::\\d+)?$") * on(node) group_left() node_cpu_hourly_cost)`)
-	if err == nil {
-		for _, s := range cpuUsageResult.Series {
-			ns := s.Labels["namespace"]
-			if ns != "" && len(s.DataPoints) > 0 {
-				cpuUsageMap[ns] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	// Query actual memory usage cost (for efficiency calculation)
-	memUsageMap := make(map[string]float64)
-	memUsageResult, err := client.Query(r.Context(),
-		`sum by (namespace) (label_replace(container_memory_working_set_bytes{container!="", namespace!=""}, "node", "$1", "instance", "(.+?)(?::\\d+)?$") / 1073741824 * on(node) group_left() node_ram_hourly_cost)`)
-	if err == nil {
-		for _, s := range memUsageResult.Series {
-			ns := s.Labels["namespace"]
-			if ns != "" && len(s.DataPoints) > 0 {
-				memUsageMap[ns] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	// Query storage (PV) cost per namespace
-	storageMap := make(map[string]float64)
-	storageResult, err := client.Query(r.Context(),
-		`sum by (namespace) (pv_hourly_cost * on(persistentvolume) group_left(namespace) kube_persistentvolume_claim_ref)`)
-	if err == nil {
-		for _, s := range storageResult.Series {
-			ns := s.Labels["namespace"]
-			if ns != "" && len(s.DataPoints) > 0 {
-				storageMap[ns] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	// Build per-namespace cost map
-	nsMap := make(map[string]*NamespaceCost)
-
-	for _, s := range cpuResult.Series {
-		ns := s.Labels["namespace"]
-		if ns == "" {
-			continue
-		}
-		if _, ok := nsMap[ns]; !ok {
-			nsMap[ns] = &NamespaceCost{Name: ns}
-		}
-		if len(s.DataPoints) > 0 {
-			nsMap[ns].CPUCost = s.DataPoints[len(s.DataPoints)-1].Value
-		}
-	}
-
-	for _, s := range memResult.Series {
-		ns := s.Labels["namespace"]
-		if ns == "" {
-			continue
-		}
-		if _, ok := nsMap[ns]; !ok {
-			nsMap[ns] = &NamespaceCost{Name: ns}
-		}
-		if len(s.DataPoints) > 0 {
-			nsMap[ns].MemoryCost = s.DataPoints[len(s.DataPoints)-1].Value
-		}
-	}
-
-	// Calculate totals
-	var totalHourlyCost, totalStorageCost, totalUsageCost, totalAllocCost float64
-	namespaces := make([]NamespaceCost, 0, len(nsMap))
-	for _, nc := range nsMap {
-		nc.HourlyCost = nc.CPUCost + nc.MemoryCost
-		nc.StorageCost = storageMap[nc.Name]
-		nc.HourlyCost += nc.StorageCost
-		totalStorageCost += nc.StorageCost
-
-		// Efficiency
-		nc.CPUUsageCost = cpuUsageMap[nc.Name]
-		nc.MemoryUsageCost = memUsageMap[nc.Name]
-		allocCost := nc.CPUCost + nc.MemoryCost // allocation cost (excl storage)
-		usageCost := nc.CPUUsageCost + nc.MemoryUsageCost
-		if allocCost > 0 && usageCost > 0 {
-			nc.Efficiency = roundTo((usageCost/allocCost)*100, 1)
-			if nc.Efficiency > 100 {
-				nc.Efficiency = 100
-			}
-			nc.IdleCost = allocCost - usageCost
-			if nc.IdleCost < 0 {
-				nc.IdleCost = 0
-			}
-		}
-		totalAllocCost += allocCost
-		totalUsageCost += usageCost
-
-		totalHourlyCost += nc.HourlyCost
-		namespaces = append(namespaces, *nc)
-	}
-
-	// Also try to get node-level total cost for a more accurate total
-	nodeResult, err := client.Query(r.Context(), `sum(node_total_hourly_cost)`)
-	if err == nil && len(nodeResult.Series) > 0 && len(nodeResult.Series[0].DataPoints) > 0 {
-		nodeCost := nodeResult.Series[0].DataPoints[0].Value
-		if nodeCost > totalHourlyCost {
-			totalHourlyCost = nodeCost
-		}
-	}
-
-	// Sort by cost descending
-	sort.Slice(namespaces, func(i, j int) bool {
-		return namespaces[i].HourlyCost > namespaces[j].HourlyCost
-	})
-
-	// Cluster-level efficiency
-	var clusterEfficiency float64
-	var totalIdleCost float64
-	if totalAllocCost > 0 && totalUsageCost > 0 {
-		clusterEfficiency = roundTo((totalUsageCost/totalAllocCost)*100, 1)
-		if clusterEfficiency > 100 {
-			clusterEfficiency = 100
-		}
-		totalIdleCost = totalAllocCost - totalUsageCost
-		if totalIdleCost < 0 {
-			totalIdleCost = 0
-		}
-	}
-
-	// Round to 4 decimal places for cleaner JSON
-	totalHourlyCost = roundTo(totalHourlyCost, 4)
-	totalStorageCost = roundTo(totalStorageCost, 4)
-	totalIdleCost = roundTo(totalIdleCost, 4)
-	for i := range namespaces {
-		namespaces[i].HourlyCost = roundTo(namespaces[i].HourlyCost, 4)
-		namespaces[i].CPUCost = roundTo(namespaces[i].CPUCost, 4)
-		namespaces[i].MemoryCost = roundTo(namespaces[i].MemoryCost, 4)
-		namespaces[i].StorageCost = roundTo(namespaces[i].StorageCost, 4)
-		namespaces[i].CPUUsageCost = roundTo(namespaces[i].CPUUsageCost, 4)
-		namespaces[i].MemoryUsageCost = roundTo(namespaces[i].MemoryUsageCost, 4)
-		namespaces[i].IdleCost = roundTo(namespaces[i].IdleCost, 4)
-	}
-
-	writeJSON(w, http.StatusOK, CostSummary{
-		Available:         true,
-		Currency:          "USD",
-		Window:            "1h",
-		TotalHourlyCost:   totalHourlyCost,
-		TotalStorageCost:  totalStorageCost,
-		TotalIdleCost:     totalIdleCost,
-		ClusterEfficiency: clusterEfficiency,
-		Namespaces:        namespaces,
-	})
-}
-
-func roundTo(val float64, places int) float64 {
-	if math.IsNaN(val) || math.IsInf(val, 0) {
-		return 0
-	}
-	pow := math.Pow(10, float64(places))
-	return math.Round(val*pow) / pow
+	writeJSON(w, http.StatusOK, pkgopencost.ComputeCostSummaryFromProm(
+		r.Context(), client.Prom(), pkgopencost.SummaryOptions{}))
 }
 
 func writeJSON(w http.ResponseWriter, status int, v interface{}) {
@@ -250,12 +47,6 @@ func writeJSON(w http.ResponseWriter, status int, v interface{}) {
 	}
 }
 
-// workloadKey identifies a workload by name and kind for aggregation.
-type workloadKey struct {
-	name string
-	kind string
-}
-
 // handleWorkloads returns workload-level cost breakdown for a namespace.
 func handleWorkloads(w http.ResponseWriter, r *http.Request) {
 	ns := r.URL.Query().Get("namespace")
@@ -266,423 +57,91 @@ func handleWorkloads(w http.ResponseWriter, r *http.Request) {
 
 	client := prometheuspkg.GetClient()
 	if client == nil {
-		writeJSON(w, http.StatusOK, WorkloadCostResponse{Namespace: ns, Reason: ReasonNoPrometheus})
+		writeJSON(w, http.StatusOK, pkgopencost.WorkloadCostResponse{Namespace: ns, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	_, _, err := client.EnsureConnected(r.Context())
-	if err != nil {
+	if _, _, err := client.EnsureConnected(r.Context()); err != nil {
 		log.Printf("[opencost] EnsureConnected failed (workloads): %v", err)
-		writeJSON(w, http.StatusOK, WorkloadCostResponse{Namespace: ns, Reason: ReasonNoPrometheus})
+		writeJSON(w, http.StatusOK, pkgopencost.WorkloadCostResponse{Namespace: ns, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
 
-	// Sanitize namespace for safe PromQL label interpolation
-	safeNS := prometheuspkg.SanitizeLabelValue(ns)
-
-	// Query per-pod CPU cost in this namespace.
-	// Use "or" to handle both honor_labels configurations:
-	//   exported_namespace="X"  → honor_labels=false (namespace was renamed)
-	//   namespace="X", exported_namespace=""  → honor_labels=true (no renaming, label absent)
-	cpuQuery := `sum by (pod) ((avg_over_time(container_cpu_allocation{exported_namespace="` + safeNS + `"}[1h]) or avg_over_time(container_cpu_allocation{namespace="` + safeNS + `", exported_namespace=""}[1h])) * on(node) group_left() node_cpu_hourly_cost)`
-	cpuResult, err := client.Query(r.Context(), cpuQuery)
-	if err != nil {
-		cpuQuery = `sum by (pod) (rate(opencost_container_cpu_cost_total{exported_namespace="` + safeNS + `"}[1h]) or rate(opencost_container_cpu_cost_total{namespace="` + safeNS + `", exported_namespace=""}[1h]))`
-		cpuResult, err = client.Query(r.Context(), cpuQuery)
-		if err != nil {
-			log.Printf("[opencost] Workload CPU cost query failed for %s: %v", ns, err)
-			writeJSON(w, http.StatusOK, WorkloadCostResponse{Namespace: ns, Reason: ReasonQueryError})
-			return
-		}
-	}
-
-	// Query per-pod memory cost in this namespace
-	memQuery := `sum by (pod) ((avg_over_time(container_memory_allocation_bytes{exported_namespace="` + safeNS + `"}[1h]) or avg_over_time(container_memory_allocation_bytes{namespace="` + safeNS + `", exported_namespace=""}[1h])) / 1073741824 * on(node) group_left() node_ram_hourly_cost)`
-	memResult, err := client.Query(r.Context(), memQuery)
-	if err != nil {
-		memQuery = `sum by (pod) (rate(opencost_container_memory_cost_total{exported_namespace="` + safeNS + `"}[1h]) or rate(opencost_container_memory_cost_total{namespace="` + safeNS + `", exported_namespace=""}[1h]))`
-		memResult, err = client.Query(r.Context(), memQuery)
-		if err != nil {
-			log.Printf("[opencost] Workload memory cost query failed for %s: %v", ns, err)
-			writeJSON(w, http.StatusOK, WorkloadCostResponse{Namespace: ns, Reason: ReasonQueryError})
-			return
-		}
-	}
-
-	// Query per-pod CPU usage cost (for efficiency)
-	podCPUUsage := make(map[string]float64)
-	cpuUsageQuery := `sum by (pod) (label_replace(rate(container_cpu_usage_seconds_total{container!="", namespace="` + safeNS + `"}[1h]), "node", "$1", "instance", "(.+?)(?::\\d+)?$") * on(node) group_left() node_cpu_hourly_cost)`
-	cpuUsageResult, usageErr := client.Query(r.Context(), cpuUsageQuery)
-	if usageErr == nil {
-		for _, s := range cpuUsageResult.Series {
-			pod := s.Labels["pod"]
-			if pod != "" && len(s.DataPoints) > 0 {
-				podCPUUsage[pod] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	// Query per-pod memory usage cost (for efficiency)
-	podMemUsage := make(map[string]float64)
-	memUsageQuery := `sum by (pod) (label_replace(container_memory_working_set_bytes{container!="", namespace="` + safeNS + `"}, "node", "$1", "instance", "(.+?)(?::\\d+)?$") / 1073741824 * on(node) group_left() node_ram_hourly_cost)`
-	memUsageResult, usageErr := client.Query(r.Context(), memUsageQuery)
-	if usageErr == nil {
-		for _, s := range memUsageResult.Series {
-			pod := s.Labels["pod"]
-			if pod != "" && len(s.DataPoints) > 0 {
-				podMemUsage[pod] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	// Build per-pod cost map
-	type podCost struct {
-		cpuCost     float64
-		memoryCost  float64
-		cpuUsage    float64
-		memoryUsage float64
-	}
-	podCosts := make(map[string]*podCost)
-
-	for _, s := range cpuResult.Series {
-		pod := s.Labels["pod"]
-		if pod == "" {
-			continue
-		}
-		if _, ok := podCosts[pod]; !ok {
-			podCosts[pod] = &podCost{}
-		}
-		if len(s.DataPoints) > 0 {
-			podCosts[pod].cpuCost = s.DataPoints[len(s.DataPoints)-1].Value
-		}
-	}
-
-	for _, s := range memResult.Series {
-		pod := s.Labels["pod"]
-		if pod == "" {
-			continue
-		}
-		if _, ok := podCosts[pod]; !ok {
-			podCosts[pod] = &podCost{}
-		}
-		if len(s.DataPoints) > 0 {
-			podCosts[pod].memoryCost = s.DataPoints[len(s.DataPoints)-1].Value
-		}
-	}
-
-	// Merge usage data into pod costs
-	for pod, pc := range podCosts {
-		pc.cpuUsage = podCPUUsage[pod]
-		pc.memoryUsage = podMemUsage[pod]
-	}
+	writeJSON(w, http.StatusOK, pkgopencost.ComputeWorkloadsFromProm(
+		r.Context(), client.Prom(), ns, buildPodOwnerLookup(ns)))
+}
 
-	// Resolve pod -> workload using K8s cache owner references
-	podOwnerMap := make(map[string]workloadKey)
+// buildPodOwnerLookup snapshots radar's pod informer for `ns` so
+// pkg/opencost.ComputeWorkloadsFromProm can resolve pod→workload without
+// depending on client-go.
+func buildPodOwnerLookup(ns string) pkgopencost.PodOwnerLookup {
 	rc := k8s.GetResourceCache()
-	if rc != nil && rc.Pods() != nil {
-		pods, _ := rc.Pods().Pods(ns).List(labels.Everything())
-		for _, p := range pods {
-			podOwnerMap[p.Name] = resolveOwner(p.OwnerReferences)
-		}
+	if rc == nil || rc.Pods() == nil {
+		return nil
 	}
-
-	workloadMap := make(map[workloadKey]*WorkloadCost)
-	for podName, pc := range podCosts {
-		owner, ok := podOwnerMap[podName]
-		if !ok {
-			// Fallback: strip pod hash suffixes to guess workload name
-			owner = workloadKey{name: stripPodSuffix(podName), kind: "standalone"}
-		}
-
-		wl, exists := workloadMap[owner]
-		if !exists {
-			wl = &WorkloadCost{Name: owner.name, Kind: owner.kind}
-			workloadMap[owner] = wl
-		}
-		wl.CPUCost += pc.cpuCost
-		wl.MemoryCost += pc.memoryCost
-		wl.CPUUsageCost += pc.cpuUsage
-		wl.MemoryUsageCost += pc.memoryUsage
-		wl.Replicas++
+	pods, err := rc.Pods().Pods(ns).List(labels.Everything())
+	if err != nil || len(pods) == 0 {
+		return nil
 	}
-
-	// Build sorted result
-	workloads := make([]WorkloadCost, 0, len(workloadMap))
-	for _, wl := range workloadMap {
-		wl.HourlyCost = wl.CPUCost + wl.MemoryCost
-		// Compute efficiency
-		allocCost := wl.CPUCost + wl.MemoryCost
-		usageCost := wl.CPUUsageCost + wl.MemoryUsageCost
-		if allocCost > 0 && usageCost > 0 {
-			wl.Efficiency = roundTo((usageCost/allocCost)*100, 1)
-			if wl.Efficiency > 100 {
-				wl.Efficiency = 100
-			}
-			wl.IdleCost = allocCost - usageCost
-			if wl.IdleCost < 0 {
-				wl.IdleCost = 0
-			}
-		}
-		wl.HourlyCost = roundTo(wl.HourlyCost, 4)
-		wl.CPUCost = roundTo(wl.CPUCost, 4)
-		wl.MemoryCost = roundTo(wl.MemoryCost, 4)
-		wl.CPUUsageCost = roundTo(wl.CPUUsageCost, 4)
-		wl.MemoryUsageCost = roundTo(wl.MemoryUsageCost, 4)
-		wl.IdleCost = roundTo(wl.IdleCost, 4)
-		workloads = append(workloads, *wl)
+	owners := make(map[string]pkgopencost.WorkloadOwner, len(pods))
+	for _, p := range pods {
+		owners[p.Name] = resolvePodOwner(p.OwnerReferences)
+	}
+	return func(podName string) (pkgopencost.WorkloadOwner, bool) {
+		o, ok := owners[podName]
+		return o, ok
 	}
-	sort.Slice(workloads, func(i, j int) bool {
-		return workloads[i].HourlyCost > workloads[j].HourlyCost
-	})
-
-	writeJSON(w, http.StatusOK, WorkloadCostResponse{
-		Available: true,
-		Namespace: ns,
-		Workloads: workloads,
-	})
 }
 
-// resolveOwner walks owner references to find the top-level workload.
-// For pods owned by ReplicaSets, it strips the RS hash suffix to get the Deployment name.
-func resolveOwner(owners []metav1.OwnerReference) workloadKey {
-	if len(owners) == 0 {
-		return workloadKey{kind: "standalone"}
+// resolvePodOwner walks owner references to find the top-level workload.
+// Pods owned by a ReplicaSet are mapped back to the parent Deployment by
+// stripping the RS hash suffix.
+func resolvePodOwner(refs []metav1.OwnerReference) pkgopencost.WorkloadOwner {
+	if len(refs) == 0 {
+		return pkgopencost.WorkloadOwner{Kind: "standalone"}
 	}
-
-	owner := owners[0]
-
-	// If owned by a ReplicaSet, strip hash suffix to get the Deployment name
+	owner := refs[0]
 	if owner.Kind == "ReplicaSet" {
-		deployName := stripReplicaSetSuffix(owner.Name)
-		if deployName != owner.Name {
-			return workloadKey{name: deployName, kind: "Deployment"}
+		if deployName := stripReplicaSetSuffix(owner.Name); deployName != owner.Name {
+			return pkgopencost.WorkloadOwner{Name: deployName, Kind: "Deployment"}
 		}
 	}
-
-	return workloadKey{name: owner.Name, kind: owner.Kind}
+	return pkgopencost.WorkloadOwner{Name: owner.Name, Kind: owner.Kind}
 }
 
-// stripReplicaSetSuffix removes the hash suffix from a ReplicaSet name
-// (e.g., "myapp-7f8d9c" -> "myapp").
 func stripReplicaSetSuffix(name string) string {
-	idx := strings.LastIndex(name, "-")
-	if idx > 0 {
+	if idx := strings.LastIndex(name, "-"); idx > 0 {
 		return name[:idx]
 	}
 	return name
 }
 
-// stripPodSuffix removes pod hash suffixes to approximate the workload name.
-// e.g., "myapp-7f8d9c-xyz12" -> "myapp"
-func stripPodSuffix(name string) string {
-	// Strip last segment (pod hash)
-	idx := strings.LastIndex(name, "-")
-	if idx <= 0 {
-		return name
-	}
-	name = name[:idx]
-	// Strip RS hash segment
-	idx = strings.LastIndex(name, "-")
-	if idx <= 0 {
-		return name
-	}
-	return name[:idx]
-}
-
-// parseCostTimeRange parses the "range" query parameter into start/end/step for cost trends.
-func parseCostTimeRange(rangeStr string) (start, end time.Time, step time.Duration, label string) {
-	end = time.Now()
-	switch rangeStr {
-	case "6h":
-		start = end.Add(-6 * time.Hour)
-		step = 15 * time.Minute
-		label = "6h"
-	case "7d":
-		start = end.Add(-7 * 24 * time.Hour)
-		step = 6 * time.Hour
-		label = "7d"
-	default: // "24h"
-		start = end.Add(-24 * time.Hour)
-		step = time.Hour
-		label = "24h"
-	}
-	return
-}
-
 // handleTrend returns cost trend data over time as a stacked series per namespace.
 func handleTrend(w http.ResponseWriter, r *http.Request) {
 	client := prometheuspkg.GetClient()
 	if client == nil {
-		writeJSON(w, http.StatusOK, CostTrendResponse{Available: false, Reason: ReasonNoPrometheus})
+		writeJSON(w, http.StatusOK, pkgopencost.CostTrendResponse{Available: false, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	_, _, err := client.EnsureConnected(r.Context())
-	if err != nil {
+	if _, _, err := client.EnsureConnected(r.Context()); err != nil {
 		log.Printf("[opencost] EnsureConnected failed (trend): %v", err)
-		writeJSON(w, http.StatusOK, CostTrendResponse{Available: false, Reason: ReasonNoPrometheus})
-		return
-	}
-
-	rangeStr := r.URL.Query().Get("range")
-	start, end, step, label := parseCostTimeRange(rangeStr)
-
-	// Combined CPU + memory allocation cost per namespace over time.
-	// label_replace normalises exported_namespace → namespace when honor_labels=false.
-	query := `sum by (namespace) (
-  label_replace(avg_over_time(container_cpu_allocation{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") * on(node) group_left() node_cpu_hourly_cost
-) + sum by (namespace) (
-  label_replace(avg_over_time(container_memory_allocation_bytes{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") / 1073741824 * on(node) group_left() node_ram_hourly_cost
-)`
-
-	result, err := client.QueryRange(r.Context(), query, start, end, step)
-	if err != nil {
-		log.Printf("[opencost] Trend query failed: %v", err)
-		writeJSON(w, http.StatusOK, CostTrendResponse{Available: false, Reason: ReasonQueryError})
+		writeJSON(w, http.StatusOK, pkgopencost.CostTrendResponse{Available: false, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	if len(result.Series) == 0 {
-		writeJSON(w, http.StatusOK, CostTrendResponse{Available: false, Reason: ReasonNoMetrics})
-		return
-	}
-
-	// Rank namespaces by latest cost to pick top 8
-	type nsRank struct {
-		ns       string
-		lastCost float64
-		idx      int
-	}
-	ranks := make([]nsRank, 0, len(result.Series))
-	for i, s := range result.Series {
-		ns := s.Labels["namespace"]
-		if ns == "" {
-			continue
-		}
-		var last float64
-		if len(s.DataPoints) > 0 {
-			last = s.DataPoints[len(s.DataPoints)-1].Value
-		}
-		ranks = append(ranks, nsRank{ns: ns, lastCost: last, idx: i})
-	}
-	sort.Slice(ranks, func(i, j int) bool { return ranks[i].lastCost > ranks[j].lastCost })
-
-	const maxSeries = 8
-	topSet := make(map[int]bool)
-	series := make([]CostTrendSeries, 0, maxSeries+1)
-	for i, r := range ranks {
-		if i >= maxSeries {
-			break
-		}
-		topSet[r.idx] = true
-		s := result.Series[r.idx]
-		dps := make([]CostDataPoint, 0, len(s.DataPoints))
-		for _, dp := range s.DataPoints {
-			dps = append(dps, CostDataPoint{Timestamp: dp.Timestamp, Value: roundTo(dp.Value, 4)})
-		}
-		series = append(series, CostTrendSeries{Namespace: r.ns, DataPoints: dps})
-	}
-
-	// Aggregate remaining into "other"
-	if len(ranks) > maxSeries {
-		// Collect all timestamps from any overflow series
-		otherMap := make(map[int64]float64)
-		for i, s := range result.Series {
-			if topSet[i] {
-				continue
-			}
-			for _, dp := range s.DataPoints {
-				otherMap[dp.Timestamp] += dp.Value
-			}
-		}
-		if len(otherMap) > 0 {
-			dps := make([]CostDataPoint, 0, len(otherMap))
-			for ts, val := range otherMap {
-				dps = append(dps, CostDataPoint{Timestamp: ts, Value: roundTo(val, 4)})
-			}
-			sort.Slice(dps, func(i, j int) bool { return dps[i].Timestamp < dps[j].Timestamp })
-			series = append(series, CostTrendSeries{Namespace: "other", DataPoints: dps})
-		}
-	}
-
-	writeJSON(w, http.StatusOK, CostTrendResponse{
-		Available: true,
-		Range:     label,
-		Series:    series,
-	})
+	writeJSON(w, http.StatusOK, pkgopencost.ComputeCostTrendFromProm(
+		r.Context(), client.Prom(), pkgopencost.TrendPromOptions{Range: r.URL.Query().Get("range")}))
 }
 
 // handleNodes returns per-node cost breakdown.
 func handleNodes(w http.ResponseWriter, r *http.Request) {
 	client := prometheuspkg.GetClient()
 	if client == nil {
-		writeJSON(w, http.StatusOK, NodeCostResponse{Available: false, Reason: ReasonNoPrometheus})
+		writeJSON(w, http.StatusOK, pkgopencost.NodeCostResponse{Available: false, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	_, _, err := client.EnsureConnected(r.Context())
-	if err != nil {
+	if _, _, err := client.EnsureConnected(r.Context()); err != nil {
 		log.Printf("[opencost] EnsureConnected failed (nodes): %v", err)
-		writeJSON(w, http.StatusOK, NodeCostResponse{Available: false, Reason: ReasonNoPrometheus})
+		writeJSON(w, http.StatusOK, pkgopencost.NodeCostResponse{Available: false, Reason: pkgopencost.ReasonNoPrometheus})
 		return
 	}
-
-	// Query per-node total hourly cost (includes labels: node, instance_type, region)
-	totalResult, err := client.Query(r.Context(), `node_total_hourly_cost`)
-	if err != nil {
-		log.Printf("[opencost] Node cost query failed: %v", err)
-		writeJSON(w, http.StatusOK, NodeCostResponse{Available: false, Reason: ReasonQueryError})
-		return
-	}
-	if len(totalResult.Series) == 0 {
-		writeJSON(w, http.StatusOK, NodeCostResponse{Available: false, Reason: ReasonNoMetrics})
-		return
-	}
-
-	// Query per-node CPU and memory costs
-	cpuMap := make(map[string]float64)
-	cpuResult, err := client.Query(r.Context(), `node_cpu_hourly_cost`)
-	if err == nil {
-		for _, s := range cpuResult.Series {
-			node := s.Labels["node"]
-			if node != "" && len(s.DataPoints) > 0 {
-				cpuMap[node] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	memMap := make(map[string]float64)
-	memResult, err := client.Query(r.Context(), `node_ram_hourly_cost`)
-	if err == nil {
-		for _, s := range memResult.Series {
-			node := s.Labels["node"]
-			if node != "" && len(s.DataPoints) > 0 {
-				memMap[node] = s.DataPoints[len(s.DataPoints)-1].Value
-			}
-		}
-	}
-
-	nodes := make([]NodeCost, 0, len(totalResult.Series))
-	for _, s := range totalResult.Series {
-		node := s.Labels["node"]
-		if node == "" || len(s.DataPoints) == 0 {
-			continue
-		}
-		nc := NodeCost{
-			Name:         node,
-			InstanceType: s.Labels["instance_type"],
-			Region:       s.Labels["region"],
-			HourlyCost:   roundTo(s.DataPoints[len(s.DataPoints)-1].Value, 4),
-			CPUCost:      roundTo(cpuMap[node], 4),
-			MemoryCost:   roundTo(memMap[node], 4),
-		}
-		nodes = append(nodes, nc)
-	}
-
-	sort.Slice(nodes, func(i, j int) bool { return nodes[i].HourlyCost > nodes[j].HourlyCost })
-
-	writeJSON(w, http.StatusOK, NodeCostResponse{
-		Available: true,
-		Nodes:     nodes,
-	})
+	writeJSON(w, http.StatusOK, pkgopencost.ComputeNodeCosts(r.Context(), client.Prom()))
 }
diff --git a/internal/prometheus/client.go b/internal/prometheus/client.go
index 97660a51c..6fd4441fa 100644
--- a/internal/prometheus/client.go
+++ b/internal/prometheus/client.go
@@ -2,14 +2,10 @@ package prometheus
 
 import (
 	"context"
-	"encoding/json"
-	"fmt"
-	"io"
+	"errors"
 	"log"
 	"maps"
 	"net/http"
-	"net/url"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -18,19 +14,24 @@ import (
 	"k8s.io/client-go/rest"
 
 	"github.com/skyhook-io/radar/internal/errorlog"
+	"github.com/skyhook-io/radar/pkg/prom"
 )
 
-// Client is a Prometheus HTTP API client with auto-discovery.
+// Client is radar's application-scoped Prometheus client. It holds the
+// K8s-aware state required for kubectl-like port-forward discovery, along
+// with a pkg/prom.Client that performs the actual HTTP calls once an
+// endpoint has been discovered.
 type Client struct {
 	mu sync.RWMutex
 
-	// Discovered/configured connection
-	baseURL  string // e.g. "http://localhost:54321" or "http://prometheus.monitoring.svc:9090"
-	basePath string // e.g. "/select/0/prometheus" for vmselect
+	// Effective connection (populated after discover succeeds).
+	baseURL  string
+	basePath string
+	prom     *prom.Client // rebuilt whenever baseURL/basePath changes
 
 	// Discovery state
 	discovered       bool
-	discoveryService *ServiceInfo // discovered service info for port-forward
+	discoveryService *prom.ServiceInfo // discovered service info for port-forward
 	manualURL        string       // --prometheus-url override
 	headers          map[string]string
 
@@ -39,27 +40,10 @@ type Client struct {
 	k8sConfig   *rest.Config
 	contextName string
 
+	// Shared HTTP client used when constructing the underlying pkg/prom.Client.
 	httpClient *http.Client
 }
 
-// ServiceInfo holds info about a discovered Prometheus service.
-type ServiceInfo struct {
-	Namespace string `json:"namespace"`
-	Name      string `json:"name"`
-	Port      int    `json:"port"`
-	BasePath  string `json:"basePath,omitempty"`
-}
-
-// Status represents the current Prometheus connection status.
-type Status struct {
-	Available   bool         `json:"available"`
-	Connected   bool         `json:"connected"`
-	Address     string       `json:"address,omitempty"`
-	Service     *ServiceInfo `json:"service,omitempty"`
-	ContextName string       `json:"contextName,omitempty"`
-	Error       string       `json:"error,omitempty"`
-}
-
 // Global client instance
 var (
 	globalClient *Client
@@ -75,9 +59,7 @@ func Initialize(client kubernetes.Interface, config *rest.Config, contextName st
 		k8sClient:   client,
 		k8sConfig:   config,
 		contextName: contextName,
-		httpClient: &http.Client{
-			Timeout: 10 * time.Second,
-		},
+		httpClient:  &http.Client{Timeout: 10 * time.Second},
 	}
 }
 
@@ -102,6 +84,9 @@ func SetHeaders(h map[string]string) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	c.headers = copyHeaders(h)
+	// Drop the cached prom.Client so the next request rebuilds its transport
+	// with the new headers.
+	c.prom = nil
 }
 
 func copyHeaders(h map[string]string) map[string]string {
@@ -113,18 +98,6 @@ func copyHeaders(h map[string]string) map[string]string {
 	return out
 }
 
-// SetURL overrides discovery with a specific Prometheus URL.
-// Clears existing connection state so the next EnsureConnected uses this URL.
-func (c *Client) SetURL(rawURL string) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	c.manualURL = strings.TrimRight(rawURL, "/")
-	c.baseURL = ""
-	c.basePath = ""
-	c.discovered = false
-	c.discoveryService = nil
-}
-
 // GetClient returns the global Prometheus client (may be nil).
 func GetClient() *Client {
 	clientMu.RLock()
@@ -140,6 +113,7 @@ func Reset() {
 		globalClient.mu.Lock()
 		globalClient.baseURL = ""
 		globalClient.basePath = ""
+		globalClient.prom = nil
 		globalClient.discovered = false
 		globalClient.discoveryService = nil
 		globalClient.mu.Unlock()
@@ -154,10 +128,10 @@ func Reinitialize(client kubernetes.Interface, config *rest.Config, contextName
 	manualURL := ""
 	var headers map[string]string
 	if globalClient != nil {
-		// SetURL / SetHeaders write these under the per-client mutex after
-		// dropping clientMu, so reading without c.mu here would race even
-		// though we hold clientMu exclusively. copyHeaders also detaches the
-		// map from the old client so a late mutation can't bleed through.
+		// SetManualURL / SetHeaders write these under the per-client mutex
+		// after dropping clientMu, so reading without c.mu here would race
+		// even though we hold clientMu exclusively. copyHeaders also detaches
+		// the map from the old client so a late mutation can't bleed through.
 		globalClient.mu.RLock()
 		manualURL = globalClient.manualURL
 		headers = copyHeaders(globalClient.headers)
@@ -170,24 +144,22 @@ func Reinitialize(client kubernetes.Interface, config *rest.Config, contextName
 		contextName: contextName,
 		manualURL:   manualURL,
 		headers:     headers,
-		httpClient: &http.Client{
-			Timeout: 10 * time.Second,
-		},
+		httpClient:  &http.Client{Timeout: 10 * time.Second},
 	}
 }
 
 // GetStatus returns the current Prometheus connection status.
-func (c *Client) GetStatus() Status {
+func (c *Client) GetStatus() prom.Status {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
-	var svc *ServiceInfo
+	var svc *prom.ServiceInfo
 	if c.discoveryService != nil {
 		cp := *c.discoveryService
 		svc = &cp
 	}
 
-	return Status{
+	return prom.Status{
 		Available:   c.baseURL != "",
 		Connected:   c.baseURL != "",
 		Address:     c.baseURL,
@@ -196,277 +168,145 @@ func (c *Client) GetStatus() Status {
 	}
 }
 
-// EnsureConnected attempts to discover and connect to Prometheus if not already connected.
-// Returns the base URL and base path, or an error.
+// EnsureConnected attempts to discover and connect to Prometheus if not
+// already connected. Returns the base URL and base path, or an error.
 func (c *Client) EnsureConnected(ctx context.Context) (string, string, error) {
 	c.mu.RLock()
-	if c.baseURL != "" {
-		// Verify cached address still works
-		base := c.baseURL
-		bp := c.basePath
-		c.mu.RUnlock()
-		if c.probe(ctx, base+bp) {
-			return base, bp, nil
+	base := c.baseURL
+	bp := c.basePath
+	c.mu.RUnlock()
+
+	if base != "" {
+		// Probe whatever we already have, building the pkg/prom.Client
+		// on-demand. The cached client may be nil here for two reasons:
+		// (a) a concurrent request hasn't yet primed getPromClient, or
+		// (b) SetHeaders cleared the cache to force a header reload.
+		// In both cases the connection itself is still valid; only the
+		// cached client wrapper needs rebuilding. Pre-extraction probed
+		// solely on base!="", so this preserves that behavior.
+		if p := c.getPromClient(); p != nil {
+			ok, reason := p.Probe(ctx)
+			if ok {
+				return base, bp, nil
+			}
+			log.Printf("[prometheus] cached connection to %s failed probe (reason=%s), rediscovering", base, reason)
+			c.mu.Lock()
+			c.baseURL = ""
+			c.basePath = ""
+			c.prom = nil
+			c.discovered = false
+			c.mu.Unlock()
 		}
-		// Stale — clear and rediscover
-		c.mu.Lock()
-		c.baseURL = ""
-		c.basePath = ""
-		c.discovered = false
-		c.mu.Unlock()
-	} else {
-		c.mu.RUnlock()
 	}
 
 	return c.discover(ctx)
 }
 
-// QueryRange executes a Prometheus range query.
-func (c *Client) QueryRange(ctx context.Context, query string, start, end time.Time, step time.Duration) (*QueryResult, error) {
-	base, basePath, err := c.EnsureConnected(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	params := url.Values{
-		"query": {query},
-		"start": {strconv.FormatInt(start.Unix(), 10)},
-		"end":   {strconv.FormatInt(end.Unix(), 10)},
-		"step":  {fmt.Sprintf("%.0f", step.Seconds())},
-	}
-
-	reqURL := fmt.Sprintf("%s%s/api/v1/query_range?%s", base, basePath, params.Encode())
-	return c.doQuery(ctx, reqURL)
+// Prom returns the underlying pkg/prom.Client for callers that compose
+// cost math on top of raw Query/QueryRange (e.g.,
+// pkg/opencost.ComputeCostSummaryFromProm). Unlike Query/QueryRange this
+// does NOT call EnsureConnected; callers must have done so to ensure a
+// baseURL is set. Returns nil if discovery has not run.
+func (c *Client) Prom() *prom.Client {
+	return c.getPromClient()
 }
 
-// Query executes a Prometheus instant query.
-func (c *Client) Query(ctx context.Context, query string) (*QueryResult, error) {
-	base, basePath, err := c.EnsureConnected(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	params := url.Values{
-		"query": {query},
-	}
-
-	reqURL := fmt.Sprintf("%s%s/api/v1/query?%s", base, basePath, params.Encode())
-	return c.doQuery(ctx, reqURL)
-}
-
-func (c *Client) doQuery(ctx context.Context, reqURL string) (*QueryResult, error) {
-	req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
-	if err != nil {
-		return nil, fmt.Errorf("creating request: %w", err)
-	}
-	c.applyHeaders(req)
-
-	resp, err := c.httpClient.Do(req)
-	if err != nil {
-		errorlog.Record("prometheus", "error", "HTTP request failed: %v", err)
-		return nil, fmt.Errorf("querying prometheus: %w", err)
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20)) // 10 MB cap
-	if err != nil {
-		return nil, fmt.Errorf("reading response: %w", err)
-	}
-
-	if resp.StatusCode != http.StatusOK {
-		errorlog.Record("prometheus", "error", "returned status %d: %s", resp.StatusCode, string(body))
-		return nil, fmt.Errorf("prometheus returned status %d: %s", resp.StatusCode, string(body))
+// getPromClient returns a pkg/prom.Client pointed at the current baseURL/basePath,
+// building (and caching) one if necessary. Callers must hold the read or
+// write lock appropriately; see QueryRange/Query.
+func (c *Client) getPromClient() *prom.Client {
+	c.mu.RLock()
+	if c.prom != nil {
+		p := c.prom
+		c.mu.RUnlock()
+		return p
 	}
+	base, bp, httpC := c.baseURL, c.basePath, c.httpClient
+	headers := copyHeaders(c.headers)
+	c.mu.RUnlock()
 
-	var promResp promResponse
-	if err := json.Unmarshal(body, &promResp); err != nil {
-		return nil, fmt.Errorf("parsing response: %w", err)
+	if base == "" {
+		return nil
 	}
 
-	if promResp.Status != "success" {
-		return nil, fmt.Errorf("prometheus error: %s (%s)", promResp.Error, promResp.ErrorType)
+	tr := prom.NewHTTPTransport(base, bp, httpC)
+	tr.Headers = headers
+	p := prom.NewClient(tr)
+	c.mu.Lock()
+	// Double-check in case another goroutine built one.
+	if c.prom == nil {
+		c.prom = p
+	} else {
+		p = c.prom
 	}
-
-	return parseQueryResult(promResp.Data)
+	c.mu.Unlock()
+	return p
 }
 
-// applyHeaders attaches the configured custom headers to req under the
-// client's read lock, so a concurrent SetHeaders / Reinitialize doesn't race.
-func (c *Client) applyHeaders(req *http.Request) {
+// probe checks if a Prometheus endpoint at `addr` is reachable and has at
+// least one active scrape target, using pkg/prom.Client.Probe. Records a
+// targeted log entry for every non-OK outcome so operators can see why a
+// candidate was rejected — particularly important for auth failures (401/403)
+// and empty instances, which would otherwise silently fall through the
+// discovery candidate list.
+func (c *Client) probe(ctx context.Context, addr string) bool {
 	c.mu.RLock()
-	defer c.mu.RUnlock()
-	for k, v := range c.headers {
-		req.Header.Set(k, v)
+	httpC := c.httpClient
+	headers := copyHeaders(c.headers)
+	c.mu.RUnlock()
+	tr := prom.NewHTTPTransport(addr, "", httpC)
+	tr.Headers = headers
+	ok, reason := prom.NewClient(tr).Probe(ctx)
+	if !ok {
+		logProbeRejection(addr, reason)
 	}
+	return ok
 }
 
-// probe checks if a Prometheus endpoint is reachable and has data.
-// An instance that responds HTTP 200 but returns zero results for "up"
-// (no active scrape targets) is treated as unreachable so discovery
-// continues to the next candidate.
-func (c *Client) probe(ctx context.Context, addr string) bool {
-	testCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
-	defer cancel()
-
-	req, err := http.NewRequestWithContext(testCtx, "GET", addr+"/api/v1/query?query=up", nil)
-	if err != nil {
-		return false
+// logProbeRejection records an appropriate log entry for each rejection
+// reason. Auth failures get errorlog at error level (likely operator
+// misconfiguration); empty instances get warning level (cluster state);
+// other failures use stdlib log so they appear in the discovery audit
+// trail without flooding errorlog.
+func logProbeRejection(addr string, reason prom.ProbeReason) {
+	switch reason {
+	case prom.ProbeReasonAuthError:
+		errorlog.Record("prometheus", "error",
+			"endpoint %s rejected credentials (HTTP 401/403, check --prometheus-header)", addr)
+	case prom.ProbeReasonEmptyInstance:
+		errorlog.Record("prometheus", "warning",
+			"endpoint %s has no active scrape targets (empty instance), skipping", addr)
+	case prom.ProbeReasonNotPrometheus:
+		log.Printf("[prometheus] endpoint %s responded but not in Prometheus format, skipping", addr)
+	case prom.ProbeReasonPromError:
+		log.Printf("[prometheus] endpoint %s returned Prometheus error status, skipping", addr)
+	case prom.ProbeReasonTransportError:
+		log.Printf("[prometheus] endpoint %s unreachable, skipping", addr)
 	}
-	c.applyHeaders(req)
-
-	resp, err := c.httpClient.Do(req)
-	if err != nil {
-		return false
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		// Surface auth failures explicitly — otherwise a misconfigured Bearer
-		// token shows up as "Prometheus not found" after discovery falls
-		// through every candidate.
-		if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
-			errorlog.Record("prometheus", "error", "endpoint %s returned HTTP %d (check --prometheus-header credentials)", addr, resp.StatusCode)
-		}
-		return false
-	}
-
-	// Verify the instance actually has scrape targets. An empty VictoriaMetrics
-	// or Prometheus instance returns 200 with zero results — skip it.
-	// 10 MB matches doQuery's limit so a large cluster's `up` response fits.
-	body, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20))
-	if err != nil {
-		return false
-	}
-	var promResp struct {
-		Status string `json:"status"`
-		Data   struct {
-			Result []json.RawMessage `json:"result"`
-		} `json:"data"`
-	}
-	if err := json.Unmarshal(body, &promResp); err != nil {
-		// A 200 response that isn't Prometheus JSON is almost certainly not
-		// Prometheus (captive portal, ingress login page, misconfigured proxy).
-		return false
-	}
-	if promResp.Status != "success" {
-		// Some proxies return 200 with a Prometheus-shaped error body.
-		return false
-	}
-	if len(promResp.Data.Result) == 0 {
-		errorlog.Record("prometheus", "warning", "endpoint %s has no active scrape targets (empty instance), skipping", addr)
-		return false
-	}
-	return true
 }
 
-// Prometheus API response types
-
-type promResponse struct {
-	Status    string          `json:"status"`
-	Data      json.RawMessage `json:"data"`
-	ErrorType string          `json:"errorType,omitempty"`
-	Error     string          `json:"error,omitempty"`
-}
-
-// QueryResult is the parsed result of a Prometheus query.
-type QueryResult struct {
-	ResultType string   `json:"resultType"`
-	Series     []Series `json:"series"`
-}
-
-// Series is a single time series from a Prometheus query.
-type Series struct {
-	Labels     map[string]string `json:"labels"`
-	DataPoints []DataPoint       `json:"dataPoints"`
-}
-
-// DataPoint is a single (timestamp, value) pair.
-type DataPoint struct {
-	Timestamp int64   `json:"timestamp"`
-	Value     float64 `json:"value"`
-}
-
-func parseQueryResult(data json.RawMessage) (*QueryResult, error) {
-	var raw struct {
-		ResultType string `json:"resultType"`
-		Result     []struct {
-			Metric map[string]string `json:"metric"`
-			Values [][]interface{}   `json:"values"` // for matrix
-			Value  []interface{}     `json:"value"`  // for vector
-		} `json:"result"`
-	}
-
-	if err := json.Unmarshal(data, &raw); err != nil {
-		return nil, fmt.Errorf("parsing result: %w", err)
-	}
-
-	result := &QueryResult{
-		ResultType: raw.ResultType,
-		Series:     make([]Series, 0, len(raw.Result)),
+// QueryRange executes a Prometheus range query via the underlying pkg/prom.Client.
+func (c *Client) QueryRange(ctx context.Context, query string, start, end time.Time, step time.Duration) (*prom.QueryResult, error) {
+	if _, _, err := c.EnsureConnected(ctx); err != nil {
+		return nil, err
 	}
-
-	for _, r := range raw.Result {
-		series := Series{
-			Labels: r.Metric,
-		}
-
-		if raw.ResultType == "matrix" {
-			series.DataPoints = make([]DataPoint, 0, len(r.Values))
-			for _, v := range r.Values {
-				dp, err := parseDataPoint(v)
-				if err != nil {
-					log.Printf("[prometheus] Skipping invalid data point: %v", err)
-					continue
-				}
-				series.DataPoints = append(series.DataPoints, dp)
-			}
-		} else if raw.ResultType == "vector" && r.Value != nil {
-			dp, err := parseDataPoint(r.Value)
-			if err != nil {
-				log.Printf("[prometheus] Skipping invalid vector data point: %v", err)
-			} else {
-				series.DataPoints = []DataPoint{dp}
-			}
-		}
-
-		result.Series = append(result.Series, series)
+	p := c.getPromClient()
+	if p == nil {
+		// Concurrent Reset cleared baseURL between EnsureConnected returning
+		// and getPromClient — the connection was reset under us.
+		return nil, errors.New("prometheus connection was reset")
 	}
-
-	return result, nil
+	return p.QueryRange(ctx, query, start, end, step)
 }
 
-func parseDataPoint(v []interface{}) (DataPoint, error) {
-	if len(v) != 2 {
-		return DataPoint{}, fmt.Errorf("expected 2 elements, got %d", len(v))
-	}
-
-	// Timestamp can be float64 or json.Number
-	var ts float64
-	switch t := v[0].(type) {
-	case float64:
-		ts = t
-	case json.Number:
-		var err error
-		ts, err = t.Float64()
-		if err != nil {
-			return DataPoint{}, fmt.Errorf("parsing timestamp: %w", err)
-		}
-	default:
-		return DataPoint{}, fmt.Errorf("unexpected timestamp type: %T", v[0])
-	}
-
-	// Value is always a string in Prometheus responses
-	valStr, ok := v[1].(string)
-	if !ok {
-		return DataPoint{}, fmt.Errorf("expected string value, got %T", v[1])
+// Query executes a Prometheus instant query via the underlying pkg/prom.Client.
+func (c *Client) Query(ctx context.Context, query string) (*prom.QueryResult, error) {
+	if _, _, err := c.EnsureConnected(ctx); err != nil {
+		return nil, err
 	}
-	val, err := strconv.ParseFloat(valStr, 64)
-	if err != nil {
-		return DataPoint{}, fmt.Errorf("parsing value %q: %w", valStr, err)
+	p := c.getPromClient()
+	if p == nil {
+		return nil, errors.New("prometheus connection was reset")
 	}
-
-	return DataPoint{
-		Timestamp: int64(ts),
-		Value:     val,
-	}, nil
+	return p.Query(ctx, query)
 }
diff --git a/internal/prometheus/client_test.go b/internal/prometheus/client_test.go
index 83fb76c11..910644f0c 100644
--- a/internal/prometheus/client_test.go
+++ b/internal/prometheus/client_test.go
@@ -87,7 +87,7 @@ func TestProbe(t *testing.T) {
 	}
 }
 
-func TestHeadersOnQuery(t *testing.T) {
+func TestHeadersOnProbe(t *testing.T) {
 	var gotAuth, gotOrg atomic.Value
 	gotAuth.Store("")
 	gotOrg.Store("")
@@ -96,8 +96,6 @@ func TestHeadersOnQuery(t *testing.T) {
 		gotAuth.Store(r.Header.Get("Authorization"))
 		gotOrg.Store(r.Header.Get("X-Scope-OrgID"))
 		w.WriteHeader(http.StatusOK)
-		// One result so probe()'s "empty instance" check passes; doQuery
-		// doesn't care about the body shape for this test.
 		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"job":"prometheus"},"value":[1700000000,"1"]}]}}`))
 	}))
 	defer srv.Close()
@@ -110,8 +108,8 @@ func TestHeadersOnQuery(t *testing.T) {
 		},
 	}
 
-	if _, err := c.doQuery(context.Background(), srv.URL+"/api/v1/query?query=up"); err != nil {
-		t.Fatalf("doQuery failed: %v", err)
+	if !c.probe(context.Background(), srv.URL) {
+		t.Fatal("probe() returned false for healthy server")
 	}
 	if got := gotAuth.Load().(string); got != "Bearer test-token" {
 		t.Errorf("Authorization header = %q, want %q", got, "Bearer test-token")
@@ -119,16 +117,6 @@ func TestHeadersOnQuery(t *testing.T) {
 	if got := gotOrg.Load().(string); got != "tenant-7" {
 		t.Errorf("X-Scope-OrgID header = %q, want %q", got, "tenant-7")
 	}
-
-	// probe() must carry the same headers — otherwise discovery would 401
-	// against an auth-protected endpoint before any real query runs.
-	gotAuth.Store("")
-	if !c.probe(context.Background(), srv.URL) {
-		t.Fatal("probe() returned false for healthy server")
-	}
-	if got := gotAuth.Load().(string); got != "Bearer test-token" {
-		t.Errorf("probe Authorization header = %q, want %q", got, "Bearer test-token")
-	}
 }
 
 func TestHeadersNoneWhenUnset(t *testing.T) {
@@ -139,13 +127,13 @@ func TestHeadersNoneWhenUnset(t *testing.T) {
 			sawAuth.Store(true)
 		}
 		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"job":"prometheus"},"value":[1700000000,"1"]}]}}`))
 	}))
 	defer srv.Close()
 
 	c := &Client{httpClient: &http.Client{Timeout: 5 * time.Second}}
-	if _, err := c.doQuery(context.Background(), srv.URL+"/api/v1/query?query=up"); err != nil {
-		t.Fatalf("doQuery failed: %v", err)
+	if !c.probe(context.Background(), srv.URL) {
+		t.Fatal("probe() returned false for healthy server")
 	}
 	if sawAuth.Load() {
 		t.Error("Authorization header sent when none configured")
diff --git a/internal/prometheus/discovery.go b/internal/prometheus/discovery.go
index 4994903c5..10cceede8 100644
--- a/internal/prometheus/discovery.go
+++ b/internal/prometheus/discovery.go
@@ -4,76 +4,27 @@ import (
 	"context"
 	"fmt"
 	"log"
-	"sort"
 	"strings"
 
-	corev1 "k8s.io/api/core/v1"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-
 	"github.com/skyhook-io/radar/internal/errorlog"
 	"github.com/skyhook-io/radar/internal/portforward"
+	"github.com/skyhook-io/radar/pkg/prom"
 )
 
-// Well-known Prometheus/VictoriaMetrics service locations
-// (similar to traffic/caretta.go but with different ordering for workload metrics discovery).
-var wellKnownLocations = []struct {
-	namespace string
-	name      string
-	port      int    // 0 = use service's first port
-	basePath  string // sub-path for Prometheus API
-}{
-	// VictoriaMetrics — monitoring namespace first (workload metrics)
-	{"monitoring", "victoria-metrics-victoria-metrics-single-server", 8428, ""},
-	{"monitoring", "victoria-metrics-single-server", 8428, ""},
-	{"monitoring", "vmsingle", 8428, ""},
-	{"monitoring", "vmselect", 8481, "/select/0/prometheus"},
-	{"victoria-metrics", "victoria-metrics-victoria-metrics-single-server", 8428, ""},
-	{"victoria-metrics", "victoria-metrics-single-server", 8428, ""},
-	{"victoria-metrics", "vmsingle", 8428, ""},
-	{"victoria-metrics", "vmselect", 8481, "/select/0/prometheus"},
-	// kube-prometheus-stack
-	{"monitoring", "kube-prometheus-stack-prometheus", 9090, ""},
-	{"monitoring", "prometheus-kube-prometheus-prometheus", 9090, ""},
-	{"monitoring", "prometheus-operated", 9090, ""},
-	// Standard Prometheus
-	{"opencost", "prometheus-server", 0, ""},
-	{"monitoring", "prometheus-server", 0, ""},
-	{"prometheus", "prometheus-server", 0, ""},
-	{"observability", "prometheus-server", 0, ""},
-	{"metrics", "prometheus-server", 0, ""},
-	{"kube-system", "prometheus", 0, ""},
-	{"default", "prometheus", 0, ""},
-	// VictoriaMetrics — caretta namespace (traffic-specific, may lack workload metrics)
-	{"caretta", "caretta-vm", 8428, ""},
-}
-
-// Namespaces commonly used for metrics services
-var metricsNamespaces = map[string]bool{
-	"monitoring":       true,
-	"prometheus":       true,
-	"observability":    true,
-	"metrics":          true,
-	"victoria-metrics": true,
-	"caretta":          true,
-	"opencost":         true,
-}
-
-// Namespaces to skip during dynamic discovery
-var skipNamespaces = map[string]bool{
-	"kube-public":     true,
-	"kube-node-lease": true,
-}
-
 // discover finds and connects to Prometheus using a multi-layer approach:
 //  1. Manual URL override (--prometheus-url)
 //  2. Existing traffic system port-forward
-//  3. Well-known service locations
-//  4. Dynamic cluster-wide discovery with scoring
+//  3. Well-known service locations (via pkg/prom.Discover)
+//  4. Dynamic cluster-wide discovery with scoring (via pkg/prom.Discover)
+//
+// Well-known + dynamic candidate enumeration lives in pkg/prom.Discover so
+// it can be shared by any consumer of the package. This function owns
+// Radar's port-forward fallback, which is only needed when Radar runs
+// outside the cluster and can't reach in-cluster Service DNS directly.
 //
 // The lock is only held briefly to read/write state, not during network I/O.
 func (c *Client) discover(ctx context.Context) (string, string, error) {
-	// Layer 1: Manual URL override (read under lock)
+	// Layer 1: Manual URL override
 	c.mu.RLock()
 	manualURL := c.manualURL
 	contextName := c.contextName
@@ -91,7 +42,7 @@ func (c *Client) discover(ctx context.Context) (string, string, error) {
 		return "", "", fmt.Errorf("manual Prometheus URL %s not reachable", addr)
 	}
 
-	// Layer 2: Check if traffic system already has a port-forward
+	// Layer 2: Reuse traffic system's existing port-forward if present
 	if pfAddr := portforward.GetAddress(contextName); pfAddr != "" {
 		if c.probe(ctx, pfAddr) {
 			log.Printf("[prometheus] Using traffic system port-forward: %s", pfAddr)
@@ -104,324 +55,97 @@ func (c *Client) discover(ctx context.Context) (string, string, error) {
 		return "", "", fmt.Errorf("no Kubernetes client available for discovery")
 	}
 
-	// Layer 3: Well-known service locations — try each reachable candidate
-	candidates := c.findWellKnownServices(ctx)
-	if len(candidates) > 0 {
-		log.Printf("[prometheus] Found %d well-known service(s), probing...", len(candidates))
+	// Layers 3 + 4: Enumerate candidates via the shared pkg/prom discovery
+	// logic. Well-known first, then dynamic fallbacks.
+	candidates, err := prom.Discover(ctx, k8sClient, prom.DiscoverOptions{
+		IncludeDynamic: true,
+		Logger: func(format string, args ...interface{}) {
+			log.Printf("[prometheus] "+format, args...)
+		},
+	})
+	if err != nil {
+		log.Printf("[prometheus] Discover error: %v", err)
 	}
-
-	for _, info := range candidates {
-		if c.probe(ctx, info.clusterAddr+info.basePath) {
-			log.Printf("[prometheus] Connected to %s/%s at %s", info.namespace, info.name, info.clusterAddr)
-			c.setDiscoveryService(info)
-			c.markConnected(info.clusterAddr, info.basePath)
-			return info.clusterAddr, info.basePath, nil
-		}
-		log.Printf("[prometheus] Well-known service %s/%s not reachable in-cluster, trying next...", info.namespace, info.name)
+	if len(candidates) == 0 {
+		errorlog.Record("prometheus", "warning", "no Prometheus service found in cluster")
+		return "", "", fmt.Errorf("no Prometheus service found in cluster")
 	}
 
-	// If well-known services exist but none reachable in-cluster, try port-forward on first candidate
-	if len(candidates) > 0 {
-		info := candidates[0]
-		log.Printf("[prometheus] No well-known service reachable in-cluster, trying port-forward to %s/%s...", info.namespace, info.name)
-		c.setDiscoveryService(info)
+	log.Printf("[prometheus] Found %d candidate(s), probing...", len(candidates))
 
-		connInfo, pfErr := portforward.Start(ctx, info.namespace, info.name, info.targetPort, contextName)
-		if pfErr == nil {
-			addr := connInfo.Address
-			if c.probe(ctx, addr+info.basePath) {
-				c.markConnected(addr, info.basePath)
-				return addr, info.basePath, nil
-			}
-			log.Printf("[prometheus] Well-known service %s/%s not responding after port-forward, falling back to dynamic discovery", info.namespace, info.name)
-			portforward.Stop()
-		} else {
-			errorlog.Record("prometheus", "error", "port-forward to %s/%s failed: %v", info.namespace, info.name, pfErr)
+	// First pass: probe each candidate at its in-cluster address. Works when
+	// radar is running in-cluster OR when the user's shell can route to the
+	// cluster DNS (rare, but cheap to try).
+	for _, cand := range candidates {
+		addr := cand.ClusterAddr + cand.BasePath
+		if c.probe(ctx, addr) {
+			log.Printf("[prometheus] Connected to %s/%s at %s (source=%s, score=%d)",
+				cand.Namespace, cand.Name, cand.ClusterAddr, cand.Source, cand.Score)
+			c.setDiscoveryServiceFromCandidate(cand)
+			c.markConnected(cand.ClusterAddr, cand.BasePath)
+			return cand.ClusterAddr, cand.BasePath, nil
 		}
 	}
 
-	// Layer 4: Dynamic discovery
-	info := c.discoverDynamic(ctx)
-	if info == nil {
-		c.mu.Lock()
-		c.discoveryService = nil
-		c.mu.Unlock()
-		errorlog.Record("prometheus", "warning", "no Prometheus service found in cluster")
-		return "", "", fmt.Errorf("no Prometheus service found in cluster")
-	}
+	// Fallback: try port-forwarding candidates in priority order. This path is
+	// normally reached when Radar runs outside the cluster, where in-cluster
+	// Service DNS cannot resolve from the user's machine.
+	var lastErr error
+	for _, cand := range candidates {
+		log.Printf("[prometheus] No candidate reachable in-cluster, starting port-forward to %s/%s...",
+			cand.Namespace, cand.Name)
+		c.setDiscoveryServiceFromCandidate(cand)
 
-	c.setDiscoveryService(info)
+		connInfo, pfErr := portforward.Start(ctx, cand.Namespace, cand.Name, cand.TargetPort, contextName)
+		if pfErr != nil {
+			lastErr = fmt.Errorf("port-forward to %s/%s failed: %w", cand.Namespace, cand.Name, pfErr)
+			errorlog.Record("prometheus", "error", "port-forward to %s/%s failed: %v", cand.Namespace, cand.Name, pfErr)
+			continue
+		}
 
-	if c.probe(ctx, info.clusterAddr+info.basePath) {
-		log.Printf("[prometheus] Connected to %s/%s at %s (dynamic)", info.namespace, info.name, info.clusterAddr)
-		c.markConnected(info.clusterAddr, info.basePath)
-		return info.clusterAddr, info.basePath, nil
-	}
+		addr := connInfo.Address
+		if c.probe(ctx, addr+cand.BasePath) {
+			c.markConnected(addr, cand.BasePath)
+			return addr, cand.BasePath, nil
+		}
 
-	log.Printf("[prometheus] Service %s/%s not reachable in-cluster, starting port-forward...", info.namespace, info.name)
-	connInfo, err := portforward.Start(ctx, info.namespace, info.name, info.targetPort, contextName)
-	if err != nil {
-		errorlog.Record("prometheus", "error", "port-forward to %s/%s failed: %v", info.namespace, info.name, err)
-		return "", "", fmt.Errorf("port-forward to %s/%s failed: %w", info.namespace, info.name, err)
+		portforward.Stop()
+		lastErr = fmt.Errorf("Prometheus at %s/%s not responding after port-forward", cand.Namespace, cand.Name)
+		errorlog.Record("prometheus", "error", "Prometheus at %s/%s not responding after port-forward", cand.Namespace, cand.Name)
 	}
 
-	addr := connInfo.Address
-	if c.probe(ctx, addr+info.basePath) {
-		c.markConnected(addr, info.basePath)
-		return addr, info.basePath, nil
+	c.mu.Lock()
+	c.discoveryService = nil
+	c.mu.Unlock()
+	if lastErr != nil {
+		return "", "", lastErr
 	}
-
-	portforward.Stop()
-	errorlog.Record("prometheus", "error", "Prometheus at %s/%s not responding after port-forward", info.namespace, info.name)
-	return "", "", fmt.Errorf("Prometheus at %s/%s not responding after port-forward", info.namespace, info.name)
+	return "", "", fmt.Errorf("no Prometheus service found in cluster")
 }
 
-// setDiscoveryService records the discovered service metadata under write lock.
-func (c *Client) setDiscoveryService(info *serviceInfo) {
+// setDiscoveryServiceFromCandidate records the discovered service metadata
+// from a pkg/prom.Candidate.
+func (c *Client) setDiscoveryServiceFromCandidate(cand prom.Candidate) {
 	c.mu.Lock()
-	c.discoveryService = &ServiceInfo{
-		Namespace: info.namespace,
-		Name:      info.name,
-		Port:      info.port,
-		BasePath:  info.basePath,
+	c.discoveryService = &prom.ServiceInfo{
+		Namespace: cand.Namespace,
+		Name:      cand.Name,
+		Port:      cand.Port,
+		BasePath:  cand.BasePath,
 	}
 	c.mu.Unlock()
 }
 
-// markConnected records the active connection and marks discovery as complete.
+// markConnected records the active connection and marks discovery as
+// complete. Also clears any cached pkg/prom.Client so the next
+// getPromClient rebuilds against the (possibly new) address — otherwise
+// a stale cached client could survive a discovery that landed on a
+// different endpoint.
 func (c *Client) markConnected(addr, basePath string) {
 	c.mu.Lock()
 	c.baseURL = addr
 	c.basePath = basePath
+	c.prom = nil
 	c.discovered = true
 	c.mu.Unlock()
 }
-
-type serviceInfo struct {
-	namespace   string
-	name        string
-	port        int // service port (for cluster-internal address)
-	targetPort  int // container port (for port-forwarding to pod)
-	clusterAddr string
-	basePath    string
-}
-
-func (c *Client) findWellKnownServices(ctx context.Context) []*serviceInfo {
-	c.mu.RLock()
-	k8sClient := c.k8sClient
-	c.mu.RUnlock()
-
-	var results []*serviceInfo
-	for _, loc := range wellKnownLocations {
-		svc, err := k8sClient.CoreV1().Services(loc.namespace).Get(ctx, loc.name, metav1.GetOptions{})
-		if err != nil {
-			if !apierrors.IsNotFound(err) {
-				log.Printf("[prometheus] Error checking well-known service %s/%s: %v", loc.namespace, loc.name, err)
-			}
-			continue
-		}
-
-		port := resolvePort(*svc, loc.port)
-		addr := buildClusterAddr(svc.Name, svc.Namespace, svc.Spec.ClusterIP, port)
-		tp := resolveTargetPort(*svc, port)
-
-		log.Printf("[prometheus] Found well-known service: %s/%s:%d (targetPort=%d)", svc.Namespace, svc.Name, port, tp)
-		results = append(results, &serviceInfo{
-			namespace:   svc.Namespace,
-			name:        svc.Name,
-			port:        port,
-			targetPort:  tp,
-			clusterAddr: addr,
-			basePath:    loc.basePath,
-		})
-	}
-	return results
-}
-
-type scoredCandidate struct {
-	info  serviceInfo
-	score int
-}
-
-func (c *Client) discoverDynamic(ctx context.Context) *serviceInfo {
-	log.Printf("[prometheus] Starting dynamic discovery...")
-
-	c.mu.RLock()
-	k8sClient := c.k8sClient
-	c.mu.RUnlock()
-
-	svcs, err := k8sClient.CoreV1().Services("").List(ctx, metav1.ListOptions{})
-	if err != nil {
-		log.Printf("[prometheus] Failed to list services: %v", err)
-		return nil
-	}
-
-	var candidates []scoredCandidate
-	for _, svc := range svcs.Items {
-		score, bp := scoreService(svc)
-		if score <= 0 {
-			continue
-		}
-		port := resolvePort(svc, 0)
-		candidates = append(candidates, scoredCandidate{
-			info: serviceInfo{
-				namespace:   svc.Namespace,
-				name:        svc.Name,
-				port:        port,
-				targetPort:  resolveTargetPort(svc, port),
-				clusterAddr: buildClusterAddr(svc.Name, svc.Namespace, svc.Spec.ClusterIP, port),
-				basePath:    bp,
-			},
-			score: score,
-		})
-	}
-
-	if len(candidates) == 0 {
-		log.Printf("[prometheus] Dynamic discovery found no candidates")
-		return nil
-	}
-
-	sort.Slice(candidates, func(i, j int) bool {
-		return candidates[i].score > candidates[j].score
-	})
-
-	limit := min(len(candidates), 5)
-	log.Printf("[prometheus] Found %d candidates, top %d:", len(candidates), limit)
-	for i := range limit {
-		log.Printf("[prometheus]   %s/%s (score=%d)", candidates[i].info.namespace, candidates[i].info.name, candidates[i].score)
-	}
-
-	// Validate top candidates (no lock held during probes)
-	for i := range limit {
-		cand := &candidates[i]
-		addr := cand.info.clusterAddr
-
-		if c.probe(ctx, addr+cand.info.basePath) {
-			log.Printf("[prometheus] Validated: %s/%s", cand.info.namespace, cand.info.name)
-			return &cand.info
-		}
-	}
-
-	// Return best unvalidated candidate (caller will port-forward)
-	best := &candidates[0]
-	log.Printf("[prometheus] No candidates reachable in-cluster, returning best: %s/%s (score=%d)",
-		best.info.namespace, best.info.name, best.score)
-	return &best.info
-}
-
-// scoreService computes a heuristic score for a service being Prometheus-compatible.
-func scoreService(svc corev1.Service) (score int, basePath string) {
-	labels := svc.Labels
-	name := svc.Name
-	ns := svc.Namespace
-
-	if svc.Spec.Type == corev1.ServiceTypeExternalName {
-		return 0, ""
-	}
-	if skipNamespaces[ns] {
-		return 0, ""
-	}
-
-	// Label signals
-	appName := labels["app.kubernetes.io/name"]
-	appLabel := labels["app"]
-	component := labels["app.kubernetes.io/component"]
-
-	switch appName {
-	case "prometheus":
-		score += 100
-	case "victoria-metrics-single", "vmsingle":
-		score += 100
-	case "vmselect":
-		score += 90
-		basePath = "/select/0/prometheus"
-	case "thanos-query", "thanos-querier":
-		score += 80
-	}
-
-	switch appLabel {
-	case "prometheus", "prometheus-server":
-		score += 80
-	case "vmsingle":
-		score += 80
-	case "vmselect":
-		score += 80
-		basePath = "/select/0/prometheus"
-	}
-
-	if score > 0 && component == "server" {
-		score += 20
-	}
-
-	// Port signals
-	for _, p := range svc.Spec.Ports {
-		switch p.Port {
-		case 9090: // Prometheus default
-			score += 30
-		case 8428: // VictoriaMetrics single-node default
-			score += 30
-		case 8481: // VictoriaMetrics vmselect default
-			score += 25
-		case 9009: // Thanos Query default
-			score += 25
-		}
-		if strings.Contains(strings.ToLower(p.Name), "prometheus") {
-			score += 10
-		}
-	}
-
-	// Name signals
-	nameLower := strings.ToLower(name)
-	if strings.Contains(nameLower, "prometheus") {
-		score += 20
-	}
-	if strings.Contains(nameLower, "victoria") || strings.Contains(nameLower, "vmsingle") || strings.Contains(nameLower, "vmselect") {
-		score += 20
-		if strings.Contains(nameLower, "vmselect") && basePath == "" {
-			basePath = "/select/0/prometheus"
-		}
-	}
-	if strings.Contains(nameLower, "thanos") {
-		score += 15
-	}
-
-	// Namespace signal
-	if metricsNamespaces[ns] {
-		score += 10
-	}
-
-	return score, basePath
-}
-
-func resolvePort(svc corev1.Service, defaultPort int) int {
-	if defaultPort != 0 {
-		return defaultPort
-	}
-	if len(svc.Spec.Ports) > 0 {
-		return int(svc.Spec.Ports[0].Port)
-	}
-	return 80
-}
-
-// resolveTargetPort returns the container port for port-forwarding.
-// When the service port differs from the container's targetPort (e.g., service:80 → container:9090),
-// port-forwarding needs the container port since it bypasses the Service and connects directly to the pod.
-func resolveTargetPort(svc corev1.Service, servicePort int) int {
-	for _, p := range svc.Spec.Ports {
-		if int(p.Port) == servicePort {
-			if p.TargetPort.IntVal > 0 {
-				return int(p.TargetPort.IntVal)
-			}
-			// targetPort unset or zero defaults to the service port
-			return servicePort
-		}
-	}
-	return servicePort
-}
-
-func buildClusterAddr(name, namespace, clusterIP string, port int) string {
-	if clusterIP == "None" {
-		return fmt.Sprintf("http://%s-0.%s.%s.svc.cluster.local:%d", name, name, namespace, port)
-	}
-	return fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", name, namespace, port)
-}
diff --git a/internal/prometheus/handlers.go b/internal/prometheus/handlers.go
index 8b223b406..a7239423a 100644
--- a/internal/prometheus/handlers.go
+++ b/internal/prometheus/handlers.go
@@ -6,13 +6,13 @@ import (
 	"fmt"
 	"log"
 	"net/http"
-	"net/url"
 	"strings"
 	"time"
 
 	"github.com/go-chi/chi/v5"
 	"github.com/skyhook-io/radar/internal/errorlog"
 	"github.com/skyhook-io/radar/internal/k8s"
+	"github.com/skyhook-io/radar/pkg/prom"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/labels"
 )
@@ -46,14 +46,17 @@ func writeError(w http.ResponseWriter, status int, msg string) {
 func handleStatus(w http.ResponseWriter, r *http.Request) {
 	client := GetClient()
 	if client == nil {
-		writeJSON(w, http.StatusOK, Status{Available: false, Error: "Prometheus client not initialized"})
+		writeJSON(w, http.StatusOK, prom.Status{Available: false, Error: "Prometheus client not initialized"})
 		return
 	}
 	writeJSON(w, http.StatusOK, client.GetStatus())
 }
 
-// handleConnect triggers Prometheus discovery and connection.
-// Accepts optional "url" query param to override discovery with a specific endpoint.
+// handleConnect triggers Prometheus discovery and connection. The endpoint
+// has no body or query parameters — the Prometheus URL is configured at
+// process startup via --prometheus-url, never per-request. Accepting a URL
+// here would let any caller redirect Prometheus queries to an arbitrary
+// host (SSRF) since radar binds to 0.0.0.0 by default.
 func handleConnect(w http.ResponseWriter, r *http.Request) {
 	client := GetClient()
 	if client == nil {
@@ -61,16 +64,6 @@ func handleConnect(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	// Allow URL override via query param (resets existing connection)
-	if overrideURL := r.URL.Query().Get("url"); overrideURL != "" {
-		u, err := url.Parse(overrideURL)
-		if err != nil || (u.Scheme != "http" && u.Scheme != "https") {
-			writeError(w, http.StatusBadRequest, "invalid URL: must be a valid HTTP(S) URL")
-			return
-		}
-		client.SetURL(overrideURL)
-	}
-
 	_, _, err := client.EnsureConnected(r.Context())
 	if err != nil {
 		log.Printf("[prometheus] Connection failed: %v", err)
@@ -136,10 +129,10 @@ type ResourceMetricsResponse struct {
 	Kind      string         `json:"kind"`
 	Namespace string         `json:"namespace,omitempty"`
 	Name      string         `json:"name"`
-	Category  MetricCategory `json:"category"`
+	Category  prom.MetricCategory `json:"category"`
 	Unit      string         `json:"unit"`
 	Range     string         `json:"range"`
-	Result    *QueryResult   `json:"result"`
+	Result    *prom.QueryResult   `json:"result"`
 	Query     string         `json:"query,omitempty"` // PromQL query used (included when result is empty for diagnostics)
 	Hint      string         `json:"hint,omitempty"`  // Contextual hint when results are empty (e.g. cri-docker label issues)
 }
@@ -157,14 +150,14 @@ func handleResourceMetrics(w http.ResponseWriter, r *http.Request) {
 	namespace := chi.URLParam(r, "namespace")
 	name := chi.URLParam(r, "name")
 
-	category := MetricCategory(r.URL.Query().Get("category"))
+	category := prom.MetricCategory(r.URL.Query().Get("category"))
 	if category == "" {
-		category = CategoryCPU
+		category = prom.CategoryCPU
 	}
 
 	// Validate kind is supported
 	supported := false
-	for _, k := range SupportedKinds() {
+	for _, k := range prom.SupportedKinds() {
 		if strings.EqualFold(k, kind) {
 			kind = k // normalize casing
 			supported = true
@@ -177,7 +170,7 @@ func handleResourceMetrics(w http.ResponseWriter, r *http.Request) {
 	}
 
 	// Validate category
-	validCategories := CategoriesForKind(kind)
+	validCategories := prom.CategoriesForKind(kind)
 	categoryValid := false
 	for _, c := range validCategories {
 		if c == category {
@@ -190,7 +183,7 @@ func handleResourceMetrics(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	query := BuildQuery(kind, namespace, name, category)
+	query := prom.BuildQuery(kind, namespace, name, category)
 	if query == "" {
 		writeError(w, http.StatusBadRequest, "cannot build query for "+kind+"/"+string(category))
 		return
@@ -201,22 +194,22 @@ func handleResourceMetrics(w http.ResponseWriter, r *http.Request) {
 
 	result, err := client.QueryRange(r.Context(), query, start, end, step)
 	if err != nil {
-		log.Printf("[prometheus] Query failed for %s/%s/%s (%s): %v", kind, namespace, name, category, err)
-		errorlog.Record("prometheus", "error", "query failed for %s/%s/%s (%s): %v", kind, namespace, name, category, err)
+		log.Printf("[prometheus] Query failed for %q/%q/%q (%q): %v", kind, namespace, name, category, err)
+		errorlog.Record("prometheus", "error", "query failed for %q/%q/%q (%q): %v", kind, namespace, name, category, err)
 		writeError(w, http.StatusBadGateway, "Prometheus query failed: "+err.Error())
 		return
 	}
 
 	result, query = retryWithoutContainerFilter(r.Context(), client, result, query, category, start, end, step,
-		func() string { return BuildQueryNoContainerFilter(kind, namespace, name, category) },
-		fmt.Sprintf("Primary query empty for %s/%s/%s (%s)", kind, namespace, name, category))
+		func() string { return prom.BuildQueryNoContainerFilter(kind, namespace, name, category) },
+		fmt.Sprintf("Primary query empty for %q/%q/%q (%q)", kind, namespace, name, category))
 
 	resp := ResourceMetricsResponse{
 		Kind:      kind,
 		Namespace: namespace,
 		Name:      name,
 		Category:  category,
-		Unit:      CategoryUnitForKind(kind, category),
+		Unit:      prom.CategoryUnitForKind(kind, category),
 		Range:     rangeStr,
 		Result:    result,
 	}
@@ -225,8 +218,8 @@ func handleResourceMetrics(w http.ResponseWriter, r *http.Request) {
 	if len(result.Series) == 0 {
 		resp.Query = query
 		resp.Hint = detectCRIDockerHint(kind, namespace, name)
-		log.Printf("[prometheus] Empty result for %s/%s/%s (%s), query: %s", kind, namespace, name, category, query)
-		errorlog.Record("prometheus", "warning", "empty result for %s/%s/%s (%s), query: %s", kind, namespace, name, category, query)
+		log.Printf("[prometheus] Empty result for %q/%q/%q (%q), query: %q", kind, namespace, name, category, query)
+		errorlog.Record("prometheus", "warning", "empty result for %q/%q/%q (%q), query: %q", kind, namespace, name, category, query)
 	}
 	writeJSON(w, http.StatusOK, resp)
 }
@@ -249,12 +242,12 @@ func handleClusterScopedResourceMetrics(w http.ResponseWriter, r *http.Request)
 	}
 	kind = "Node"
 
-	category := MetricCategory(r.URL.Query().Get("category"))
+	category := prom.MetricCategory(r.URL.Query().Get("category"))
 	if category == "" {
-		category = CategoryCPU
+		category = prom.CategoryCPU
 	}
 
-	validCategories := CategoriesForKind(kind)
+	validCategories := prom.CategoriesForKind(kind)
 	categoryValid := false
 	for _, c := range validCategories {
 		if c == category {
@@ -267,7 +260,7 @@ func handleClusterScopedResourceMetrics(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	query := BuildQuery(kind, "", name, category)
+	query := prom.BuildQuery(kind, "", name, category)
 	if query == "" {
 		writeError(w, http.StatusBadRequest, "cannot build query for "+kind+"/"+string(category))
 		return
@@ -278,8 +271,8 @@ func handleClusterScopedResourceMetrics(w http.ResponseWriter, r *http.Request)
 
 	result, err := client.QueryRange(r.Context(), query, start, end, step)
 	if err != nil {
-		log.Printf("[prometheus] Query failed for %s/%s (%s): %v", kind, name, category, err)
-		errorlog.Record("prometheus", "error", "query failed for %s/%s (%s): %v", kind, name, category, err)
+		log.Printf("[prometheus] Query failed for %q/%q (%q): %v", kind, name, category, err)
+		errorlog.Record("prometheus", "error", "query failed for %q/%q (%q): %v", kind, name, category, err)
 		writeError(w, http.StatusBadGateway, "Prometheus query failed: "+err.Error())
 		return
 	}
@@ -288,14 +281,14 @@ func handleClusterScopedResourceMetrics(w http.ResponseWriter, r *http.Request)
 		Kind:     kind,
 		Name:     name,
 		Category: category,
-		Unit:     CategoryUnitForKind(kind, category),
+		Unit:     prom.CategoryUnitForKind(kind, category),
 		Range:    rangeStr,
 		Result:   result,
 	}
 	if len(result.Series) == 0 {
 		resp.Query = query
-		log.Printf("[prometheus] Empty result for %s/%s (%s), query: %s", kind, name, category, query)
-		errorlog.Record("prometheus", "warning", "empty result for %s/%s (%s), query: %s", kind, name, category, query)
+		log.Printf("[prometheus] Empty result for %q/%q (%q), query: %q", kind, name, category, query)
+		errorlog.Record("prometheus", "warning", "empty result for %q/%q (%q), query: %q", kind, name, category, query)
 	}
 	writeJSON(w, http.StatusOK, resp)
 }
@@ -303,10 +296,10 @@ func handleClusterScopedResourceMetrics(w http.ResponseWriter, r *http.Request)
 // NamespaceMetricsResponse is the response shape for namespace-level metrics.
 type NamespaceMetricsResponse struct {
 	Namespace string         `json:"namespace"`
-	Category  MetricCategory `json:"category"`
+	Category  prom.MetricCategory `json:"category"`
 	Unit      string         `json:"unit"`
 	Range     string         `json:"range"`
-	Result    *QueryResult   `json:"result"`
+	Result    *prom.QueryResult   `json:"result"`
 }
 
 // handleNamespaceMetrics returns aggregate metrics for a namespace.
@@ -318,12 +311,12 @@ func handleNamespaceMetrics(w http.ResponseWriter, r *http.Request) {
 	}
 
 	namespace := chi.URLParam(r, "namespace")
-	category := MetricCategory(r.URL.Query().Get("category"))
+	category := prom.MetricCategory(r.URL.Query().Get("category"))
 	if category == "" {
-		category = CategoryCPU
+		category = prom.CategoryCPU
 	}
 
-	query := BuildNamespaceQuery(namespace, category)
+	query := prom.BuildNamespaceQuery(namespace, category)
 	if query == "" {
 		writeError(w, http.StatusBadRequest, "unsupported category for namespace: "+string(category))
 		return
@@ -334,20 +327,20 @@ func handleNamespaceMetrics(w http.ResponseWriter, r *http.Request) {
 
 	result, err := client.QueryRange(r.Context(), query, start, end, step)
 	if err != nil {
-		log.Printf("[prometheus] Namespace query failed for %s (%s): %v", namespace, category, err)
-		errorlog.Record("prometheus", "error", "namespace query failed for %s (%s): %v", namespace, category, err)
+		log.Printf("[prometheus] Namespace query failed for %q (%q): %v", namespace, category, err)
+		errorlog.Record("prometheus", "error", "namespace query failed for %q (%q): %v", namespace, category, err)
 		writeError(w, http.StatusBadGateway, "Prometheus query failed: "+err.Error())
 		return
 	}
 
 	result, _ = retryWithoutContainerFilter(r.Context(), client, result, query, category, start, end, step,
-		func() string { return BuildNamespaceQueryNoContainerFilter(namespace, category) },
-		fmt.Sprintf("Namespace query empty for %s (%s)", namespace, category))
+		func() string { return prom.BuildNamespaceQueryNoContainerFilter(namespace, category) },
+		fmt.Sprintf("Namespace query empty for %q (%q)", namespace, category))
 
 	writeJSON(w, http.StatusOK, NamespaceMetricsResponse{
 		Namespace: namespace,
 		Category:  category,
-		Unit:      CategoryUnit(category),
+		Unit:      prom.CategoryUnit(category),
 		Range:     rangeStr,
 		Result:    result,
 	})
@@ -355,10 +348,10 @@ func handleNamespaceMetrics(w http.ResponseWriter, r *http.Request) {
 
 // ClusterMetricsResponse is the response shape for cluster-level metrics.
 type ClusterMetricsResponse struct {
-	Category MetricCategory `json:"category"`
+	Category prom.MetricCategory `json:"category"`
 	Unit     string         `json:"unit"`
 	Range    string         `json:"range"`
-	Result   *QueryResult   `json:"result"`
+	Result   *prom.QueryResult   `json:"result"`
 }
 
 // handleClusterMetrics returns aggregate metrics for the entire cluster.
@@ -369,12 +362,12 @@ func handleClusterMetrics(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	category := MetricCategory(r.URL.Query().Get("category"))
+	category := prom.MetricCategory(r.URL.Query().Get("category"))
 	if category == "" {
-		category = CategoryCPU
+		category = prom.CategoryCPU
 	}
 
-	query := BuildClusterQuery(category)
+	query := prom.BuildClusterQuery(category)
 	if query == "" {
 		writeError(w, http.StatusBadRequest, "unsupported category for cluster: "+string(category))
 		return
@@ -385,19 +378,19 @@ func handleClusterMetrics(w http.ResponseWriter, r *http.Request) {
 
 	result, err := client.QueryRange(r.Context(), query, start, end, step)
 	if err != nil {
-		log.Printf("[prometheus] Cluster query failed (%s): %v", category, err)
-		errorlog.Record("prometheus", "error", "cluster query failed (%s): %v", category, err)
+		log.Printf("[prometheus] Cluster query failed (%q): %v", category, err)
+		errorlog.Record("prometheus", "error", "cluster query failed (%q): %v", category, err)
 		writeError(w, http.StatusBadGateway, "Prometheus query failed: "+err.Error())
 		return
 	}
 
 	result, _ = retryWithoutContainerFilter(r.Context(), client, result, query, category, start, end, step,
-		func() string { return BuildClusterQueryNoContainerFilter(category) },
-		fmt.Sprintf("Cluster query empty (%s)", category))
+		func() string { return prom.BuildClusterQueryNoContainerFilter(category) },
+		fmt.Sprintf("Cluster query empty (%q)", category))
 
 	writeJSON(w, http.StatusOK, ClusterMetricsResponse{
 		Category: category,
-		Unit:     CategoryUnit(category),
+		Unit:     prom.CategoryUnit(category),
 		Range:    rangeStr,
 		Result:   result,
 	})
@@ -449,8 +442,8 @@ func handleRawQuery(w http.ResponseWriter, r *http.Request) {
 // when the primary result is empty and the category uses that filter. This handles
 // cri-docker and other setups where cAdvisor metrics lack the container label.
 // Returns the updated result (original or fallback) and the query that produced it.
-func retryWithoutContainerFilter(ctx context.Context, client *Client, result *QueryResult, query string, category MetricCategory, start, end time.Time, step time.Duration, buildFallback func() string, logPrefix string) (*QueryResult, string) {
-	if len(result.Series) > 0 || !categoryUsesContainerFilter(category) {
+func retryWithoutContainerFilter(ctx context.Context, client *Client, result *prom.QueryResult, query string, category prom.MetricCategory, start, end time.Time, step time.Duration, buildFallback func() string, logPrefix string) (*prom.QueryResult, string) {
+	if len(result.Series) > 0 || !prom.CategoryUsesContainerFilter(category) {
 		return result, query
 	}
 	fallbackQuery := buildFallback()
diff --git a/internal/prometheus/queries_test.go b/internal/prometheus/queries_test.go
index ca7fd1480..bfc67fd3f 100644
--- a/internal/prometheus/queries_test.go
+++ b/internal/prometheus/queries_test.go
@@ -3,6 +3,8 @@ package prometheus
 import (
 	"strings"
 	"testing"
+
+	"github.com/skyhook-io/radar/pkg/prom"
 )
 
 func TestMemoryQueriesDedupeScrapeJobsBeforeSumming(t *testing.T) {
@@ -13,22 +15,22 @@ func TestMemoryQueriesDedupeScrapeJobsBeforeSumming(t *testing.T) {
 	}{
 		{
 			name:  "pod",
-			query: BuildQuery("Pod", "dify-new", "dify-new-postgresql-primary-0", CategoryMemory),
+			query: prom.BuildQuery("Pod", "dify-new", "dify-new-postgresql-primary-0", prom.CategoryMemory),
 			want:  "sum by (pod,namespace) (max by (pod,namespace,container)",
 		},
 		{
 			name:  "workload",
-			query: BuildQuery("StatefulSet", "dify-new", "dify-new-postgresql-primary", CategoryMemory),
+			query: prom.BuildQuery("StatefulSet", "dify-new", "dify-new-postgresql-primary", prom.CategoryMemory),
 			want:  "sum by (pod,namespace) (max by (pod,namespace,container)",
 		},
 		{
 			name:  "namespace",
-			query: BuildNamespaceQuery("dify-new", CategoryMemory),
+			query: prom.BuildNamespaceQuery("dify-new", prom.CategoryMemory),
 			want:  "sum(max by (namespace,pod,container)",
 		},
 		{
 			name:  "cluster",
-			query: BuildClusterQuery(CategoryMemory),
+			query: prom.BuildClusterQuery(prom.CategoryMemory),
 			want:  "sum(max by (namespace,pod,container)",
 		},
 	}
diff --git a/internal/prometheus/rightsizing.go b/internal/prometheus/rightsizing.go
index 0390210c2..ec52d51db 100644
--- a/internal/prometheus/rightsizing.go
+++ b/internal/prometheus/rightsizing.go
@@ -11,6 +11,7 @@ import (
 	"github.com/go-chi/chi/v5"
 	"github.com/skyhook-io/radar/internal/errorlog"
 	"github.com/skyhook-io/radar/internal/k8s"
+	"github.com/skyhook-io/radar/pkg/prom"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 )
@@ -305,9 +306,9 @@ func computeRightsizingRow(ctx context.Context, client *Client, namespace, workl
 // queryContainerP95 returns the P95 of a container's CPU/memory usage over the
 // rightsizing window. Returns nil (no error) when there's no data.
 func queryContainerP95(ctx context.Context, client *Client, namespace, workload, container, resKind string) (*float64, error) {
-	ns := SanitizeLabelValue(namespace)
-	podPattern := fmt.Sprintf("%s-.*", escapeRegexMeta(SanitizeLabelValue(workload)))
-	cn := SanitizeLabelValue(container)
+	ns := prom.SanitizeLabelValue(namespace)
+	podPattern := fmt.Sprintf("%s-.*", prom.EscapeRegexMeta(prom.SanitizeLabelValue(workload)))
+	cn := prom.SanitizeLabelValue(container)
 	windowSec := int64(rightsizingWindow.Seconds())
 
 	var query string
@@ -520,8 +521,8 @@ func handlePVCUsage(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	ns := SanitizeLabelValue(namespace)
-	pvc := SanitizeLabelValue(name)
+	ns := prom.SanitizeLabelValue(namespace)
+	pvc := prom.SanitizeLabelValue(name)
 
 	// kubelet's native label is `persistentvolumeclaim`; clusters with custom
 	// relabeling that renamed it will return no series and the gauge hides.
@@ -560,7 +561,7 @@ func handlePVCUsage(w http.ResponseWriter, r *http.Request) {
 	writeJSON(w, http.StatusOK, resp)
 }
 
-func firstValue(res *QueryResult) *float64 {
+func firstValue(res *prom.QueryResult) *float64 {
 	if res == nil || len(res.Series) == 0 || len(res.Series[0].DataPoints) == 0 {
 		return nil
 	}
diff --git a/internal/traffic/caretta.go b/internal/traffic/caretta.go
index 0505b684f..e1035049b 100644
--- a/internal/traffic/caretta.go
+++ b/internal/traffic/caretta.go
@@ -19,7 +19,7 @@ import (
 
 	"github.com/skyhook-io/radar/internal/errorlog"
 	"github.com/skyhook-io/radar/internal/portforward"
-	promclient "github.com/skyhook-io/radar/internal/prometheus"
+	"github.com/skyhook-io/radar/pkg/prom"
 )
 
 const (
@@ -336,7 +336,7 @@ func (c *CarettaSource) queryPrometheusForFlows(ctx context.Context, promAddr st
 	query := "caretta_links_observed"
 	if opts.Namespace != "" {
 		// Filter by namespace (either client or server)
-		safeNS := promclient.SanitizeLabelValue(opts.Namespace)
+		safeNS := prom.SanitizeLabelValue(opts.Namespace)
 		query = fmt.Sprintf(`caretta_links_observed{client_namespace="%s"} or caretta_links_observed{server_namespace="%s"}`,
 			safeNS, safeNS)
 	}
diff --git a/internal/traffic/istio.go b/internal/traffic/istio.go
index 9a74ea7d2..4cff077d0 100644
--- a/internal/traffic/istio.go
+++ b/internal/traffic/istio.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/skyhook-io/radar/internal/portforward"
 	promclient "github.com/skyhook-io/radar/internal/prometheus"
+	"github.com/skyhook-io/radar/pkg/prom"
 )
 
 const (
@@ -153,7 +154,7 @@ func (s *IstioSource) queryHTTPFlows(ctx context.Context, client *promclient.Cli
 	// Main query: all requests, no response_code grouping
 	query := `sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, destination_service_name, request_protocol, reporter) (rate(istio_requests_total{reporter="destination"}[5m]))`
 	if opts.Namespace != "" {
-		safeNS := promclient.SanitizeLabelValue(opts.Namespace)
+		safeNS := prom.SanitizeLabelValue(opts.Namespace)
 		query = fmt.Sprintf(`sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, destination_service_name, request_protocol, reporter) (rate(istio_requests_total{reporter="destination", source_workload_namespace="%s"}[5m])) or sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, destination_service_name, request_protocol, reporter) (rate(istio_requests_total{reporter="destination", destination_workload_namespace="%s"}[5m]))`,
 			safeNS, safeNS)
 	}
@@ -161,7 +162,7 @@ func (s *IstioSource) queryHTTPFlows(ctx context.Context, client *promclient.Cli
 	// Error query: 5xx only
 	errorQuery := `sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, reporter) (rate(istio_requests_total{reporter="destination", response_code=~"5.."}[5m]))`
 	if opts.Namespace != "" {
-		safeNS := promclient.SanitizeLabelValue(opts.Namespace)
+		safeNS := prom.SanitizeLabelValue(opts.Namespace)
 		errorQuery = fmt.Sprintf(`sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, reporter) (rate(istio_requests_total{reporter="destination", response_code=~"5..", source_workload_namespace="%s"}[5m])) or sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, reporter) (rate(istio_requests_total{reporter="destination", response_code=~"5..", destination_workload_namespace="%s"}[5m]))`,
 			safeNS, safeNS)
 	}
@@ -294,14 +295,14 @@ func (s *IstioSource) queryByteMetrics(ctx context.Context, client *promclient.C
 	sentQuery := `sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace) (rate(istio_request_bytes_sum{reporter="destination"}[5m]))`
 	recvQuery := `sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace) (rate(istio_response_bytes_sum{reporter="destination"}[5m]))`
 	if opts.Namespace != "" {
-		safeNS := promclient.SanitizeLabelValue(opts.Namespace)
+		safeNS := prom.SanitizeLabelValue(opts.Namespace)
 		sentQuery = fmt.Sprintf(`sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace) (rate(istio_request_bytes_sum{reporter="destination", source_workload_namespace="%s"}[5m])) or sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace) (rate(istio_request_bytes_sum{reporter="destination", destination_workload_namespace="%s"}[5m]))`,
 			safeNS, safeNS)
 		recvQuery = fmt.Sprintf(`sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace) (rate(istio_response_bytes_sum{reporter="destination", source_workload_namespace="%s"}[5m])) or sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace) (rate(istio_response_bytes_sum{reporter="destination", destination_workload_namespace="%s"}[5m]))`,
 			safeNS, safeNS)
 	}
 
-	parseByteResult := func(result *promclient.QueryResult, target map[flowKey]float64) {
+	parseByteResult := func(result *prom.QueryResult, target map[flowKey]float64) {
 		if result == nil {
 			return
 		}
@@ -345,7 +346,7 @@ func (s *IstioSource) queryByteMetrics(ctx context.Context, client *promclient.C
 func (s *IstioSource) queryTCPFlows(ctx context.Context, client *promclient.Client, opts FlowOptions) ([]Flow, error) {
 	query := `sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, destination_service_name, reporter) (rate(istio_tcp_connections_opened_total{reporter="destination"}[5m]))`
 	if opts.Namespace != "" {
-		safeNS := promclient.SanitizeLabelValue(opts.Namespace)
+		safeNS := prom.SanitizeLabelValue(opts.Namespace)
 		query = fmt.Sprintf(`sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, destination_service_name, reporter) (rate(istio_tcp_connections_opened_total{reporter="destination", source_workload_namespace="%s"}[5m])) or sum by (source_workload, source_workload_namespace, destination_workload, destination_workload_namespace, destination_service_name, reporter) (rate(istio_tcp_connections_opened_total{reporter="destination", destination_workload_namespace="%s"}[5m]))`,
 			safeNS, safeNS)
 	}
diff --git a/pkg/opencost/compute.go b/pkg/opencost/compute.go
new file mode 100644
index 000000000..e134c4139
--- /dev/null
+++ b/pkg/opencost/compute.go
@@ -0,0 +1,509 @@
+package opencost
+
+import (
+	"context"
+	"log"
+	"math"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// windowHours parses an OpenCost window string (e.g. "1h", "24h", "7d",
+// "30d") into a number of hours. OpenCost's /allocation returns totalCost
+// summed over the whole window; to present an hourly rate (which then
+// multiplies by 730 for monthly projection) we divide by this value. Falls
+// back to 1.0 for unknown inputs so callers degrade gracefully rather than
+// silently zero out costs.
+func windowHours(w string) float64 {
+	s := strings.TrimSpace(strings.ToLower(w))
+	if s == "" {
+		return 1
+	}
+	if len(s) < 2 {
+		return 1
+	}
+	unit := s[len(s)-1]
+	numStr := s[:len(s)-1]
+	n, err := strconv.ParseFloat(numStr, 64)
+	if err != nil || n <= 0 {
+		return 1
+	}
+	switch unit {
+	case 'h':
+		return n
+	case 'd':
+		return n * 24
+	case 'w':
+		return n * 24 * 7
+	case 'm':
+		// Ambiguous (minutes vs months). OpenCost uses "30d" for months, so
+		// treat lone "m" as minutes for safety.
+		return n / 60
+	}
+	return 1
+}
+
+// SummaryOptions tunes ComputeCostSummary behavior.
+type SummaryOptions struct {
+	// Currency label returned in the response (default "USD").
+	Currency string
+
+	// Window passed to OpenCost and echoed in the response (default "1h").
+	// For PromQL paths this is a response label only; the query itself has
+	// fixed time windows baked in. For REST paths it's forwarded to OpenCost.
+	Window string
+
+	// Aggregate controls how rows are grouped. "namespace" (default),
+	// "controller", "pod". Passed straight to OpenCost's aggregate param.
+	Aggregate string
+
+	// Filter is an OpenCost /allocation filter expression (v1.106+).
+	// Commonly used to scope pod/controller queries to a single namespace.
+	// Example: `namespace:"kube-system"`
+	Filter string
+
+	// NamespaceFilter is a client-side namespace scope applied after the
+	// OpenCost response is received. Set alongside Filter — older OpenCost
+	// versions silently ignore the REST `filter` param, so we have to post-
+	// filter rows by their Properties["namespace"] to actually honor the
+	// drill-down scope.
+	NamespaceFilter string
+}
+
+// ComputeCostSummary is the default compute path: asks OpenCost's REST API
+// for namespace-level allocation over the window and maps the response into
+// our normalized CostSummary.
+//
+// Why REST by default: OpenCost computes cost internally (cloud pricing +
+// Kubernetes allocation data) and exposes the results two ways — REST at
+// /allocation/assets/cloudCost and Prometheus metrics at /metrics. REST
+// works wherever OpenCost works; the Prometheus path requires a scrape
+// config that's often missing on clusters where OpenCost was installed
+// manually. REST is also simpler (one pre-aggregated call instead of ~6
+// PromQL queries + client-side math).
+//
+// When to reach for ComputeCostSummaryFromProm instead:
+//   - You need custom label aggregations beyond what /allocation exposes.
+//   - You want per-node hourly pricing as time series.
+//   - You're correlating cost with live Prometheus metrics (deploy events,
+//     HPA state, container_cpu_usage, etc.) in the same query.
+//
+// Contract:
+//   - REST unreachable or returns error → Available=false, Reason=ReasonQueryError.
+//   - REST returns empty data (OpenCost up but has no cost rows yet) →
+//     Available=false, Reason=ReasonNoMetrics.
+//   - Otherwise Available=true with namespace rows + totals filled in.
+//   - Numbers rounded to 4dp for JSON cleanliness.
+func ComputeCostSummary(ctx context.Context, client *RESTClient, opts SummaryOptions) *CostSummary {
+	if opts.Currency == "" {
+		opts.Currency = "USD"
+	}
+	if opts.Window == "" {
+		opts.Window = "1h"
+	}
+
+	aggregate := opts.Aggregate
+	if aggregate == "" {
+		aggregate = "namespace"
+	}
+	resp, err := client.GetAllocation(ctx, AllocationOptions{
+		Window:      opts.Window,
+		Aggregate:   aggregate,
+		Filter:      opts.Filter,
+		IncludeIdle: true,
+	})
+	if err != nil {
+		log.Printf("[opencost] /allocation summary failed: %v", err)
+		return &CostSummary{Available: false, Reason: ReasonQueryError}
+	}
+	if resp == nil || len(resp.Data) == 0 {
+		return &CostSummary{Available: false, Reason: ReasonNoMetrics}
+	}
+
+	// /allocation returns an array of time windows. For a single bucket we
+	// merge across all windows; normally there's just one.
+	//
+	// Older OpenCost versions (< v1.106) silently ignore the REST filter param,
+	// so when NamespaceFilter is set we post-filter rows by their
+	// Properties["namespace"]. The __idle__ synthetic row has no namespace, so
+	// it naturally drops out of a scoped drill-down — desired.
+	combined := make(map[string]*Allocation)
+	for _, bucket := range resp.Data {
+		for name, a := range bucket {
+			if a == nil {
+				continue
+			}
+			if opts.NamespaceFilter != "" {
+				ns, _ := a.Properties["namespace"].(string)
+				if ns != opts.NamespaceFilter {
+					continue
+				}
+			}
+			if existing, ok := combined[name]; ok {
+				existing.CPUCost += a.CPUCost
+				existing.RAMCost += a.RAMCost
+				existing.PVCost += a.PVCost
+				existing.NetworkCost += a.NetworkCost
+				existing.LoadBalancerCost += a.LoadBalancerCost
+				existing.SharedCost += a.SharedCost
+				existing.ExternalCost += a.ExternalCost
+				existing.TotalCost += a.TotalCost
+				existing.CPUCoreUsageAverage += a.CPUCoreUsageAverage
+				existing.RAMByteUsageAverage += a.RAMByteUsageAverage
+			} else {
+				cp := *a
+				combined[name] = &cp
+			}
+		}
+	}
+
+	if len(combined) == 0 {
+		return &CostSummary{Available: false, Reason: ReasonNoMetrics}
+	}
+
+	namespaces := make([]NamespaceCost, 0, len(combined))
+	var totalHourlyCost, totalStorageCost, totalNetworkCost, totalIdleCost float64
+	var totalAllocCost, totalUsageCost float64
+
+	for name, a := range combined {
+		// OpenCost emits __idle__ as a synthetic row for unallocated node
+		// capacity. Surface it as a dedicated idle total, not a namespace.
+		//
+		// Sign quirk: OpenCost can report __idle__ with negative costs when
+		// the cluster's allocated sum over-counts relative to node pricing
+		// (burstable workloads exceeding their request, or pricing-model
+		// rounding). Clamp negative idle to 0 — idle is conceptually
+		// "unused capacity cost", always non-negative.
+		if name == "__idle__" {
+			idle := a.CPUCost + a.RAMCost
+			if idle < 0 {
+				idle = 0
+			}
+			totalIdleCost += idle
+			// Intentionally do NOT add __idle__ to totalHourlyCost —
+			// totalHourlyCost is the sum of allocated spend. Idle is
+			// surfaced separately as TotalIdleCost so callers can render
+			// or sum it as needed.
+			continue
+		}
+		// OpenCost aggregates orphan pods (those with no controller) into a
+		// synthetic "__unallocated__" row when grouping by controller. On some
+		// cluster configurations this row also absorbs cluster-level idle,
+		// making it appear larger than the parent namespace. Drop it to keep
+		// the drill-down consistent — named controllers tell the real story.
+		if name == "__unallocated__" {
+			continue
+		}
+		nc := NamespaceCost{
+			Name:        name,
+			Kind:        aggregate,
+			CPUCost:     a.CPUCost,
+			MemoryCost:  a.RAMCost,
+			StorageCost: a.PVCost,
+			NetworkCost: a.NetworkCost,
+			HourlyCost:  a.TotalCost,
+		}
+		// For non-namespace aggregates, OpenCost stamps the parent namespace
+		// in Properties so the UI can thread children under their parent
+		// without a second query.
+		if aggregate != "namespace" {
+			if ns, ok := a.Properties["namespace"].(string); ok {
+				nc.Namespace = ns
+			}
+		}
+		allocCost := nc.CPUCost + nc.MemoryCost
+		if a.TotalEfficiency > 0 && allocCost > 0 {
+			// Cap per-row efficiency at 1.0 BEFORE accumulating into the
+			// cluster total. OpenCost occasionally reports TotalEfficiency
+			// > 1 (burstable pods exceeding their request, measurement
+			// noise); without this cap a single outlier could push the
+			// cluster total above 100%.
+			rowEff := a.TotalEfficiency
+			if rowEff > 1 {
+				rowEff = 1
+			}
+			usageCost := rowEff * allocCost
+			nc.CPUUsageCost = usageCost * safeRatio(nc.CPUCost, allocCost)
+			nc.MemoryUsageCost = usageCost - nc.CPUUsageCost
+			nc.Efficiency = efficiencyPct(usageCost, allocCost)
+			nc.IdleCost = idleFromUsage(usageCost, allocCost)
+			// Accumulate cost-weighted, matching ComputeCostSummaryFromProm.
+			// An unweighted mean would let a $0.01 row at 10% efficiency
+			// drag down the cluster number identically to a $100 row.
+			totalAllocCost += allocCost
+			totalUsageCost += usageCost
+		}
+		totalHourlyCost += nc.HourlyCost
+		totalStorageCost += nc.StorageCost
+		totalNetworkCost += nc.NetworkCost
+		// Per-namespace idle (allocated-not-used) is separate from the
+		// __idle__ row (unassigned node capacity). Both are real waste the
+		// user can act on, so aggregate them together.
+		totalIdleCost += nc.IdleCost
+		namespaces = append(namespaces, nc)
+	}
+
+	sort.Slice(namespaces, func(i, j int) bool {
+		return namespaces[i].HourlyCost > namespaces[j].HourlyCost
+	})
+
+	clusterEfficiency := efficiencyPct(totalUsageCost, totalAllocCost)
+
+	// Normalize window-total to hourly. OpenCost's /allocation returns
+	// totalCost summed over the entire window; we want rate so the UI can
+	// multiply by 730 for monthly projections regardless of the window
+	// picker state. Efficiency is unitless (usage/alloc ratio) so it does
+	// not need normalization.
+	hours := windowHours(opts.Window)
+	if hours <= 0 {
+		hours = 1
+	}
+	normalize := func(v float64) float64 { return v / hours }
+	totalHourlyCost = normalize(totalHourlyCost)
+	totalStorageCost = normalize(totalStorageCost)
+	totalNetworkCost = normalize(totalNetworkCost)
+	totalIdleCost = normalize(totalIdleCost)
+	for i := range namespaces {
+		namespaces[i].HourlyCost = normalize(namespaces[i].HourlyCost)
+		namespaces[i].CPUCost = normalize(namespaces[i].CPUCost)
+		namespaces[i].MemoryCost = normalize(namespaces[i].MemoryCost)
+		namespaces[i].StorageCost = normalize(namespaces[i].StorageCost)
+		namespaces[i].NetworkCost = normalize(namespaces[i].NetworkCost)
+		namespaces[i].CPUUsageCost = normalize(namespaces[i].CPUUsageCost)
+		namespaces[i].MemoryUsageCost = normalize(namespaces[i].MemoryUsageCost)
+		namespaces[i].IdleCost = normalize(namespaces[i].IdleCost)
+	}
+
+	// Round everything for JSON stability.
+	totalHourlyCost = roundTo(totalHourlyCost, 4)
+	totalStorageCost = roundTo(totalStorageCost, 4)
+	totalNetworkCost = roundTo(totalNetworkCost, 4)
+	totalIdleCost = roundTo(totalIdleCost, 4)
+	for i := range namespaces {
+		namespaces[i].HourlyCost = roundTo(namespaces[i].HourlyCost, 4)
+		namespaces[i].CPUCost = roundTo(namespaces[i].CPUCost, 4)
+		namespaces[i].MemoryCost = roundTo(namespaces[i].MemoryCost, 4)
+		namespaces[i].StorageCost = roundTo(namespaces[i].StorageCost, 4)
+		namespaces[i].NetworkCost = roundTo(namespaces[i].NetworkCost, 4)
+		namespaces[i].CPUUsageCost = roundTo(namespaces[i].CPUUsageCost, 4)
+		namespaces[i].MemoryUsageCost = roundTo(namespaces[i].MemoryUsageCost, 4)
+		namespaces[i].IdleCost = roundTo(namespaces[i].IdleCost, 4)
+	}
+
+	return &CostSummary{
+		Available:         true,
+		Currency:          opts.Currency,
+		Window:            opts.Window,
+		TotalHourlyCost:   totalHourlyCost,
+		TotalStorageCost:  totalStorageCost,
+		TotalNetworkCost:  totalNetworkCost,
+		TotalIdleCost:     totalIdleCost,
+		ClusterEfficiency: clusterEfficiency,
+		Namespaces:        namespaces,
+	}
+}
+
+// safeRatio returns num/den or 0 when den is non-positive.
+func safeRatio(num, den float64) float64 {
+	if den <= 0 {
+		return 0
+	}
+	return num / den
+}
+
+// ComputeCostSummaryFromProm is the PromQL-based compute path, for callers
+// that have a scraped-OpenCost Prometheus available rather than the REST
+// API (or that need to correlate cost with live Prometheus metrics in the
+// same query).
+//
+// Contract:
+//   - If the primary OpenCost allocation metrics are absent entirely, the
+//     returned summary has Available=false and Reason=ReasonNoMetrics.
+//   - If the underlying query fails outright, Available=false and
+//     Reason=ReasonQueryError. Errors are never returned — callers serve
+//     the typed reason to the UI.
+//   - Numbers are rounded to 4 decimal places for cleaner JSON.
+func ComputeCostSummaryFromProm(ctx context.Context, client *prom.Client, opts SummaryOptions) *CostSummary {
+	if client == nil {
+		return &CostSummary{Available: false, Reason: ReasonNoPrometheus}
+	}
+	if opts.Currency == "" {
+		opts.Currency = "USD"
+	}
+	if opts.Window == "" {
+		opts.Window = "1h"
+	}
+
+	cpuResult, err := client.Query(ctx,
+		`sum by (namespace) (label_replace(avg_over_time(container_cpu_allocation{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") * on(node) group_left() node_cpu_hourly_cost)`)
+	if err != nil {
+		log.Printf("[opencost] CPU allocation query failed, trying opencost_container_cpu_cost_total: %v", err)
+		cpuResult, err = client.Query(ctx,
+			`sum by (namespace) (label_replace(rate(opencost_container_cpu_cost_total[1h]), "namespace", "$1", "exported_namespace", "(.+)"))`)
+		if err != nil {
+			log.Printf("[opencost] CPU allocation fallback query also failed: %v", err)
+			return &CostSummary{Available: false, Reason: ReasonQueryError}
+		}
+	}
+
+	memResult, err := client.Query(ctx,
+		`sum by (namespace) (label_replace(avg_over_time(container_memory_allocation_bytes{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") / 1073741824 * on(node) group_left() node_ram_hourly_cost)`)
+	if err != nil {
+		log.Printf("[opencost] memory allocation query failed, trying opencost_container_memory_cost_total: %v", err)
+		memResult, err = client.Query(ctx,
+			`sum by (namespace) (label_replace(rate(opencost_container_memory_cost_total[1h]), "namespace", "$1", "exported_namespace", "(.+)"))`)
+		if err != nil {
+			log.Printf("[opencost] memory allocation fallback query also failed: %v", err)
+			return &CostSummary{Available: false, Reason: ReasonQueryError}
+		}
+	}
+
+	if len(cpuResult.Series) == 0 && len(memResult.Series) == 0 {
+		return &CostSummary{Available: false, Reason: ReasonNoMetrics}
+	}
+
+	// Usage queries are best-effort: efficiency / idle are derived from them
+	// and zero out cleanly if the queries fail, but a silent failure here can
+	// look identical to a low-utilization workload — so log when it happens.
+	cpuUsageRes, cpuUsageErr := client.Query(ctx,
+		`sum by (namespace) (label_replace(rate(container_cpu_usage_seconds_total{container!="", namespace!=""}[1h]), "node", "$1", "instance", "(.+?)(?::\\d+)?$") * on(node) group_left() node_cpu_hourly_cost)`)
+	if cpuUsageErr != nil {
+		log.Printf("[opencost] CPU usage query failed (efficiency will be 0 for affected rows): %v", cpuUsageErr)
+	}
+	cpuUsageMap := lastValuePerLabel(cpuUsageRes, cpuUsageErr, "namespace")
+
+	memUsageRes, memUsageErr := client.Query(ctx,
+		`sum by (namespace) (label_replace(container_memory_working_set_bytes{container!="", namespace!=""}, "node", "$1", "instance", "(.+?)(?::\\d+)?$") / 1073741824 * on(node) group_left() node_ram_hourly_cost)`)
+	if memUsageErr != nil {
+		log.Printf("[opencost] memory usage query failed (efficiency will be 0 for affected rows): %v", memUsageErr)
+	}
+	memUsageMap := lastValuePerLabel(memUsageRes, memUsageErr, "namespace")
+
+	storageRes, storageErr := client.Query(ctx,
+		`sum by (namespace) (pv_hourly_cost * on(persistentvolume) group_left(namespace) kube_persistentvolume_claim_ref)`)
+	if storageErr != nil {
+		log.Printf("[opencost] storage cost query failed (storage costs will be 0): %v", storageErr)
+	}
+	storageMap := lastValuePerLabel(storageRes, storageErr, "namespace")
+
+	nsMap := make(map[string]*NamespaceCost)
+	mergeSeriesIntoNamespaceField(cpuResult, nsMap, func(nc *NamespaceCost, v float64) { nc.CPUCost = v })
+	mergeSeriesIntoNamespaceField(memResult, nsMap, func(nc *NamespaceCost, v float64) { nc.MemoryCost = v })
+
+	var totalHourlyCost, totalStorageCost, totalUsageCost, totalAllocCost float64
+	namespaces := make([]NamespaceCost, 0, len(nsMap))
+	for _, nc := range nsMap {
+		nc.HourlyCost = nc.CPUCost + nc.MemoryCost
+		nc.StorageCost = storageMap[nc.Name]
+		nc.HourlyCost += nc.StorageCost
+		totalStorageCost += nc.StorageCost
+
+		nc.CPUUsageCost = cpuUsageMap[nc.Name]
+		nc.MemoryUsageCost = memUsageMap[nc.Name]
+		allocCost := nc.CPUCost + nc.MemoryCost
+		usageCost := nc.CPUUsageCost + nc.MemoryUsageCost
+		nc.Efficiency = efficiencyPct(usageCost, allocCost)
+		nc.IdleCost = idleFromUsage(usageCost, allocCost)
+		totalAllocCost += allocCost
+		totalUsageCost += usageCost
+		totalHourlyCost += nc.HourlyCost
+		namespaces = append(namespaces, *nc)
+	}
+
+	if nodeResult, err := client.Query(ctx, `sum(node_total_hourly_cost)`); err == nil && len(nodeResult.Series) > 0 && len(nodeResult.Series[0].DataPoints) > 0 {
+		if nodeCost := nodeResult.Series[0].DataPoints[0].Value; nodeCost > totalHourlyCost {
+			totalHourlyCost = nodeCost
+		}
+	}
+
+	sort.Slice(namespaces, func(i, j int) bool {
+		return namespaces[i].HourlyCost > namespaces[j].HourlyCost
+	})
+
+	clusterEfficiency := efficiencyPct(totalUsageCost, totalAllocCost)
+	totalIdleCost := idleFromUsage(totalUsageCost, totalAllocCost)
+
+	totalHourlyCost = roundTo(totalHourlyCost, 4)
+	totalStorageCost = roundTo(totalStorageCost, 4)
+	totalIdleCost = roundTo(totalIdleCost, 4)
+	for i := range namespaces {
+		namespaces[i].HourlyCost = roundTo(namespaces[i].HourlyCost, 4)
+		namespaces[i].CPUCost = roundTo(namespaces[i].CPUCost, 4)
+		namespaces[i].MemoryCost = roundTo(namespaces[i].MemoryCost, 4)
+		namespaces[i].StorageCost = roundTo(namespaces[i].StorageCost, 4)
+		namespaces[i].CPUUsageCost = roundTo(namespaces[i].CPUUsageCost, 4)
+		namespaces[i].MemoryUsageCost = roundTo(namespaces[i].MemoryUsageCost, 4)
+		namespaces[i].IdleCost = roundTo(namespaces[i].IdleCost, 4)
+	}
+
+	return &CostSummary{
+		Available:         true,
+		Currency:          opts.Currency,
+		Window:            opts.Window,
+		TotalHourlyCost:   totalHourlyCost,
+		TotalStorageCost:  totalStorageCost,
+		TotalIdleCost:     totalIdleCost,
+		ClusterEfficiency: clusterEfficiency,
+		Namespaces:        namespaces,
+	}
+}
+
+func mergeSeriesIntoNamespaceField(result *prom.QueryResult, nsMap map[string]*NamespaceCost, set func(*NamespaceCost, float64)) {
+	if result == nil {
+		return
+	}
+	for _, s := range result.Series {
+		ns := s.Labels["namespace"]
+		if ns == "" {
+			continue
+		}
+		nc, ok := nsMap[ns]
+		if !ok {
+			nc = &NamespaceCost{Name: ns}
+			nsMap[ns] = nc
+		}
+		if len(s.DataPoints) > 0 {
+			set(nc, s.DataPoints[len(s.DataPoints)-1].Value)
+		}
+	}
+}
+
+// roundTo rounds to `places` decimal places, returning 0 for NaN/Inf
+// to keep JSON responses stable.
+func roundTo(val float64, places int) float64 {
+	if math.IsNaN(val) || math.IsInf(val, 0) {
+		return 0
+	}
+	pow := math.Pow(10, float64(places))
+	return math.Round(val*pow) / pow
+}
+
+// efficiencyPct returns 100 * usage / alloc rounded to 1 decimal,
+// clamped to [0, 100]. Returns 0 when usage or alloc is non-positive
+// (treated as "no data" — distinct from "100% idle").
+func efficiencyPct(usage, alloc float64) float64 {
+	if usage <= 0 || alloc <= 0 {
+		return 0
+	}
+	eff := roundTo((usage/alloc)*100, 1)
+	if eff > 100 {
+		eff = 100
+	}
+	return eff
+}
+
+// idleFromUsage returns max(alloc - usage, 0) but only when both are
+// positive. Mirrors efficiencyPct's "no data ≠ 100% idle" semantics.
+func idleFromUsage(usage, alloc float64) float64 {
+	if usage <= 0 || alloc <= 0 {
+		return 0
+	}
+	idle := alloc - usage
+	if idle < 0 {
+		return 0
+	}
+	return idle
+}
diff --git a/pkg/opencost/compute_rest_test.go b/pkg/opencost/compute_rest_test.go
new file mode 100644
index 000000000..d98b4d4f8
--- /dev/null
+++ b/pkg/opencost/compute_rest_test.go
@@ -0,0 +1,282 @@
+package opencost
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"testing"
+)
+
+// fakeOpenCost returns a RESTClient backed by a httptest server that serves
+// canned JSON for /allocation. Caller provides the raw response body.
+func fakeOpenCost(t *testing.T, bodyForAllocation string) *RESTClient {
+	t.Helper()
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		switch r.URL.Path {
+		case "/allocation":
+			_, _ = w.Write([]byte(bodyForAllocation))
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	t.Cleanup(srv.Close)
+	tr := &httpTransport{baseURL: srv.URL, client: srv.Client()}
+	return NewRESTClient(tr)
+}
+
+// httpTransport is a minimal Transport backed by net/http for tests.
+type httpTransport struct {
+	baseURL string
+	client  *http.Client
+}
+
+func (t *httpTransport) Do(ctx context.Context, method, path string, params url.Values) ([]byte, error) {
+	u := t.baseURL + path
+	if len(params) > 0 {
+		u = u + "?" + params.Encode()
+	}
+	req, err := http.NewRequestWithContext(ctx, method, u, nil)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := t.client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = resp.Body.Close() }()
+	buf := make([]byte, 0, 4096)
+	tmp := make([]byte, 4096)
+	for {
+		n, err := resp.Body.Read(tmp)
+		if n > 0 {
+			buf = append(buf, tmp[:n]...)
+		}
+		if err != nil {
+			break
+		}
+	}
+	return buf, nil
+}
+
+func (t *httpTransport) Address() string { return t.baseURL }
+
+// buildAllocationResponse builds a valid OpenCost /allocation body from a
+// namespace→totalCost map, filling CPU and RAM with a 60/40 split so the
+// test can verify splits too. Efficiency defaults to 50%.
+func buildAllocationResponse(t *testing.T, rows map[string]float64) string {
+	t.Helper()
+	window := make(map[string]*Allocation, len(rows))
+	for ns, total := range rows {
+		window[ns] = &Allocation{
+			Name:            ns,
+			CPUCost:         total * 0.6,
+			RAMCost:         total * 0.4,
+			TotalCost:       total,
+			TotalEfficiency: 0.5,
+		}
+	}
+	resp := AllocationResponse{
+		Code: 200,
+		Data: []map[string]*Allocation{window},
+	}
+	b, err := json.Marshal(resp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return string(b)
+}
+
+func TestComputeCostSummary_REST_HappyPath(t *testing.T) {
+	body := buildAllocationResponse(t, map[string]float64{
+		"checkout": 5.00,
+		"payments": 2.00,
+		"user-svc": 0.75,
+	})
+	client := fakeOpenCost(t, body)
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{})
+	if !got.Available {
+		t.Fatalf("expected Available=true; got %+v", got)
+	}
+	if got.Currency != "USD" || got.Window != "1h" {
+		t.Errorf("defaults not applied: currency=%q window=%q", got.Currency, got.Window)
+	}
+	if len(got.Namespaces) != 3 {
+		t.Fatalf("want 3 namespaces, got %d", len(got.Namespaces))
+	}
+	// Sorted by HourlyCost desc.
+	if got.Namespaces[0].Name != "checkout" {
+		t.Errorf("want checkout first, got %s", got.Namespaces[0].Name)
+	}
+	if got.Namespaces[0].HourlyCost != 5.00 {
+		t.Errorf("checkout HourlyCost=%v, want 5.00", got.Namespaces[0].HourlyCost)
+	}
+	if got.Namespaces[0].CPUCost != 3.00 { // 60% of 5
+		t.Errorf("checkout CPUCost=%v, want 3.00", got.Namespaces[0].CPUCost)
+	}
+	// Efficiency 50% roundtrip
+	if got.Namespaces[0].Efficiency != 50 {
+		t.Errorf("efficiency=%v, want 50", got.Namespaces[0].Efficiency)
+	}
+	// Cluster totals: sum of 5+2+0.75 = 7.75
+	if got.TotalHourlyCost != 7.75 {
+		t.Errorf("TotalHourlyCost=%v, want 7.75", got.TotalHourlyCost)
+	}
+}
+
+func TestComputeCostSummary_REST_IdleRowSurfaced(t *testing.T) {
+	// OpenCost emits __idle__ for unallocated node capacity. We surface it
+	// as TotalIdleCost (not a namespace row), and do NOT roll it into
+	// TotalHourlyCost — total hourly is the sum of *allocated* spend, so
+	// the UI can render idle as a separate cell without double-counting.
+	window := map[string]*Allocation{
+		"checkout": {Name: "checkout", CPUCost: 1.0, RAMCost: 0.5, TotalCost: 1.5, TotalEfficiency: 0.6},
+		"__idle__": {Name: "__idle__", CPUCost: 0.8, RAMCost: 0.2, TotalCost: 1.0},
+	}
+	body, _ := json.Marshal(AllocationResponse{Code: 200, Data: []map[string]*Allocation{window}})
+	client := fakeOpenCost(t, string(body))
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{})
+	if !got.Available {
+		t.Fatal("want Available=true")
+	}
+	// TotalIdleCost is the sum of __idle__ (1.0, cluster-level unused
+	// capacity) + per-namespace idle (checkout: alloc 1.5 × (1 - eff 0.6)
+	// = 0.6). The UI surfaces both together as "waste".
+	if got.TotalIdleCost != 1.6 {
+		t.Errorf("TotalIdleCost=%v, want 1.6 (__idle__ 1.0 + checkout ns-idle 0.6)", got.TotalIdleCost)
+	}
+	for _, ns := range got.Namespaces {
+		if ns.Name == "__idle__" {
+			t.Error("__idle__ must not appear as a regular namespace row")
+		}
+	}
+	// Allocated-only total = 1.5 for checkout; __idle__ excluded.
+	if got.TotalHourlyCost != 1.5 {
+		t.Errorf("TotalHourlyCost=%v, want 1.5 (allocated only; __idle__ goes to TotalIdleCost)", got.TotalHourlyCost)
+	}
+}
+
+func TestComputeCostSummary_REST_NegativeIdleClampedToZero(t *testing.T) {
+	// Real-world: OpenCost can report a negative __idle__ totalCost when
+	// burstable workloads over-consume vs node pricing. The __idle__
+	// contribution clamps to 0; per-namespace idle (positive, from
+	// under-utilization) still counts in the total.
+	window := map[string]*Allocation{
+		"app":      {Name: "app", CPUCost: 0.5, RAMCost: 0.1, TotalCost: 0.6, TotalEfficiency: 0.4},
+		"__idle__": {Name: "__idle__", CPUCost: -0.3, RAMCost: -0.1},
+	}
+	body, _ := json.Marshal(AllocationResponse{Code: 200, Data: []map[string]*Allocation{window}})
+	client := fakeOpenCost(t, string(body))
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{})
+	// Expect: __idle__ clamped to 0, app ns-idle = 0.6 × (1 - 0.4) = 0.36.
+	if got.TotalIdleCost != 0.36 {
+		t.Errorf("TotalIdleCost=%v, want 0.36 (__idle__ clamped, app ns-idle 0.36)", got.TotalIdleCost)
+	}
+	if got.TotalHourlyCost != 0.6 {
+		t.Errorf("TotalHourlyCost should still be 0.6 (allocated only); got %v", got.TotalHourlyCost)
+	}
+}
+
+func TestComputeCostSummary_REST_WindowNormalization(t *testing.T) {
+	// OpenCost's /allocation returns totalCost summed over the whole
+	// window. We must divide by the window's hours to present a rate so
+	// the UI can multiply by 730 for monthly projection without
+	// ballooning the numbers when the user picks 24h / 7d / 30d.
+	window := map[string]*Allocation{
+		"svc": {Name: "svc", CPUCost: 24.0, RAMCost: 0, TotalCost: 24.0, TotalEfficiency: 0.5},
+	}
+	body, _ := json.Marshal(AllocationResponse{Code: 200, Data: []map[string]*Allocation{window}})
+	client := fakeOpenCost(t, string(body))
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{Window: "24h"})
+	if !got.Available {
+		t.Fatal("want Available=true")
+	}
+	// 24.0 total over 24h → $1/hr.
+	if got.TotalHourlyCost != 1.0 {
+		t.Errorf("TotalHourlyCost=%v, want 1.0 ($24 total / 24h = $1/hr)", got.TotalHourlyCost)
+	}
+	if got.Namespaces[0].HourlyCost != 1.0 {
+		t.Errorf("svc.HourlyCost=%v, want 1.0", got.Namespaces[0].HourlyCost)
+	}
+}
+
+func TestComputeCostSummary_REST_EfficiencyCappedBeforeAveraging(t *testing.T) {
+	// OpenCost TotalEfficiency can exceed 1 for burstable workloads. A
+	// single runaway row must not dominate the fleet average.
+	window := map[string]*Allocation{
+		"normal":    {Name: "normal", CPUCost: 1.0, RAMCost: 0, TotalCost: 1.0, TotalEfficiency: 0.2},
+		"burstable": {Name: "burstable", CPUCost: 1.0, RAMCost: 0, TotalCost: 1.0, TotalEfficiency: 100.0},
+	}
+	body, _ := json.Marshal(AllocationResponse{Code: 200, Data: []map[string]*Allocation{window}})
+	client := fakeOpenCost(t, string(body))
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{})
+	// Burstable capped at 100%. Normal = 20%. Mean = 60%.
+	if got.ClusterEfficiency < 58 || got.ClusterEfficiency > 62 {
+		t.Errorf("ClusterEfficiency=%v, want ~60 (cap+avg)", got.ClusterEfficiency)
+	}
+	// Per-row caps too.
+	for _, ns := range got.Namespaces {
+		if ns.Efficiency > 100 {
+			t.Errorf("%s efficiency=%v exceeds cap", ns.Name, ns.Efficiency)
+		}
+	}
+}
+
+func TestComputeCostSummary_REST_NoMetricsReason(t *testing.T) {
+	body, _ := json.Marshal(AllocationResponse{Code: 200, Data: []map[string]*Allocation{{}}})
+	client := fakeOpenCost(t, string(body))
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{})
+	if got.Available {
+		t.Error("expected Available=false for empty allocation data")
+	}
+	if got.Reason != ReasonNoMetrics {
+		t.Errorf("Reason=%q, want %q", got.Reason, ReasonNoMetrics)
+	}
+}
+
+func TestComputeCostSummary_REST_QueryErrorReason(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadGateway)
+	}))
+	defer srv.Close()
+	client := NewRESTClient(&httpTransport{baseURL: srv.URL, client: srv.Client()})
+
+	got := ComputeCostSummary(context.Background(), client, SummaryOptions{})
+	if got.Available {
+		t.Error("expected Available=false on 502")
+	}
+	// Any non-2xx yields parse-failure on empty body or json error → Reason maps to query_error.
+	if got.Reason != ReasonQueryError {
+		t.Errorf("Reason=%q, want %q", got.Reason, ReasonQueryError)
+	}
+}
+
+func TestComputeCostSummary_REST_ForwardsWindow(t *testing.T) {
+	var capturedQuery url.Values
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		capturedQuery = r.URL.Query()
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"code":200,"data":[{}]}`))
+	}))
+	defer srv.Close()
+	client := NewRESTClient(&httpTransport{baseURL: srv.URL, client: srv.Client()})
+
+	_ = ComputeCostSummary(context.Background(), client, SummaryOptions{Window: "7d"})
+	if capturedQuery.Get("window") != "7d" {
+		t.Errorf("window not forwarded: got %q", capturedQuery.Get("window"))
+	}
+	if capturedQuery.Get("aggregate") != "namespace" {
+		t.Errorf("aggregate not set: got %q", capturedQuery.Get("aggregate"))
+	}
+	if capturedQuery.Get("includeIdle") != "true" {
+		t.Errorf("includeIdle not set: got %q", capturedQuery.Get("includeIdle"))
+	}
+}
diff --git a/pkg/opencost/compute_test.go b/pkg/opencost/compute_test.go
new file mode 100644
index 000000000..ef658cef8
--- /dev/null
+++ b/pkg/opencost/compute_test.go
@@ -0,0 +1,253 @@
+package opencost
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// scriptedProm returns a prom.Client backed by a httptest server that
+// serves canned responses keyed by a predicate applied to the PromQL query.
+// Predicates are tried in order; the first matching one wins.
+func scriptedProm(t *testing.T, cases []scriptedCase) *prom.Client {
+	t.Helper()
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		q := r.URL.Query().Get("query")
+		for _, c := range cases {
+			if c.matches(q) {
+				w.Header().Set("Content-Type", "application/json")
+				_, _ = w.Write([]byte(c.body))
+				return
+			}
+		}
+		// Default: success with empty result.
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+	}))
+	t.Cleanup(srv.Close)
+	return prom.NewClient(prom.NewHTTPTransport(srv.URL, "", nil))
+}
+
+type scriptedCase struct {
+	contains string
+	body     string
+}
+
+func (c scriptedCase) matches(q string) bool {
+	return strings.Contains(q, c.contains)
+}
+
+// vectorBody helps build a minimal Prometheus vector response.
+func vectorBody(samples map[string]float64) string {
+	type result struct {
+		Metric map[string]string `json:"metric"`
+		Value  []interface{}     `json:"value"`
+	}
+	body := struct {
+		Status string `json:"status"`
+		Data   struct {
+			ResultType string   `json:"resultType"`
+			Result     []result `json:"result"`
+		} `json:"data"`
+	}{Status: "success"}
+	body.Data.ResultType = "vector"
+	for ns, v := range samples {
+		body.Data.Result = append(body.Data.Result, result{
+			Metric: map[string]string{"namespace": ns},
+			Value:  []interface{}{1700000000.0, formatFloat(v)},
+		})
+	}
+	b, _ := json.Marshal(body)
+	return string(b)
+}
+
+func scalarBody(v float64) string {
+	type result struct {
+		Metric map[string]string `json:"metric"`
+		Value  []interface{}     `json:"value"`
+	}
+	body := struct {
+		Status string `json:"status"`
+		Data   struct {
+			ResultType string   `json:"resultType"`
+			Result     []result `json:"result"`
+		} `json:"data"`
+	}{Status: "success"}
+	body.Data.ResultType = "vector"
+	body.Data.Result = []result{{Metric: map[string]string{}, Value: []interface{}{1700000000.0, formatFloat(v)}}}
+	b, _ := json.Marshal(body)
+	return string(b)
+}
+
+// formatFloat renders a value the way Prometheus does — a numeric string
+// with enough precision to round-trip the test inputs exactly.
+func formatFloat(v float64) string {
+	return strconv.FormatFloat(v, 'f', -1, 64)
+}
+
+func TestComputeCostSummary_HappyPath(t *testing.T) {
+	client := scriptedProm(t, []scriptedCase{
+		{contains: "container_cpu_allocation", body: vectorBody(map[string]float64{"checkout": 2.0, "payments": 1.0})},
+		{contains: "container_memory_allocation_bytes", body: vectorBody(map[string]float64{"checkout": 3.0, "payments": 0.5})},
+		{contains: "container_cpu_usage_seconds_total", body: vectorBody(map[string]float64{"checkout": 0.8, "payments": 0.6})},
+		{contains: "container_memory_working_set_bytes", body: vectorBody(map[string]float64{"checkout": 1.2, "payments": 0.25})},
+		{contains: "pv_hourly_cost", body: vectorBody(map[string]float64{"checkout": 0.05})},
+		{contains: "node_total_hourly_cost", body: scalarBody(8.0)}, // exceeds sum of namespaces, so it wins
+	})
+
+	got := ComputeCostSummaryFromProm(context.Background(), client, SummaryOptions{})
+	if !got.Available {
+		t.Fatalf("summary unavailable: %+v", got)
+	}
+	if got.Currency != "USD" || got.Window != "1h" {
+		t.Errorf("currency/window defaults: %+v", got)
+	}
+	if got.TotalHourlyCost != 8.0 {
+		t.Errorf("TotalHourlyCost=%v, want 8.0 (node_total_hourly_cost ceiling)", got.TotalHourlyCost)
+	}
+	if got.TotalStorageCost != 0.05 {
+		t.Errorf("TotalStorageCost=%v, want 0.05", got.TotalStorageCost)
+	}
+	// totalAlloc = (2+3) + (1+0.5) = 6.5; totalUsage = (0.8+1.2) + (0.6+0.25) = 2.85
+	// clusterEff = 2.85/6.5 * 100 = 43.85 → 43.8 at 1 dp
+	if got.ClusterEfficiency < 43 || got.ClusterEfficiency > 44 {
+		t.Errorf("ClusterEfficiency=%v, want ~43.8", got.ClusterEfficiency)
+	}
+	// totalIdle = 6.5 - 2.85 = 3.65
+	if got.TotalIdleCost < 3.5 || got.TotalIdleCost > 3.8 {
+		t.Errorf("TotalIdleCost=%v, want ~3.65", got.TotalIdleCost)
+	}
+	if len(got.Namespaces) != 2 {
+		t.Fatalf("expected 2 namespaces, got %d", len(got.Namespaces))
+	}
+	// Sorted by HourlyCost desc; checkout = 2+3+0.05 = 5.05 > payments = 1+0.5 = 1.5
+	if got.Namespaces[0].Name != "checkout" {
+		t.Errorf("first namespace should be checkout (higher cost); got %s", got.Namespaces[0].Name)
+	}
+	if got.Namespaces[0].HourlyCost != 5.05 {
+		t.Errorf("checkout.HourlyCost=%v, want 5.05", got.Namespaces[0].HourlyCost)
+	}
+}
+
+func TestComputeCostSummary_NoMetricsReason(t *testing.T) {
+	client := scriptedProm(t, []scriptedCase{
+		// All queries return empty vector results.
+	})
+	got := ComputeCostSummaryFromProm(context.Background(), client, SummaryOptions{})
+	if got.Available {
+		t.Error("expected Available=false when no metrics")
+	}
+	if got.Reason != ReasonNoMetrics {
+		t.Errorf("Reason=%q, want %q", got.Reason, ReasonNoMetrics)
+	}
+}
+
+func TestComputeCostSummary_QueryErrorReason(t *testing.T) {
+	// Both primary and opencost_* fallback fail with HTTP error.
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadGateway)
+	}))
+	defer srv.Close()
+	client := prom.NewClient(prom.NewHTTPTransport(srv.URL, "", nil))
+
+	got := ComputeCostSummaryFromProm(context.Background(), client, SummaryOptions{})
+	if got.Available {
+		t.Error("expected Available=false on query error")
+	}
+	if got.Reason != ReasonQueryError {
+		t.Errorf("Reason=%q", got.Reason)
+	}
+}
+
+func TestComputeCostSummary_FallsBackToOpencostMetricNames(t *testing.T) {
+	// First query (container_cpu_allocation) returns an error, then
+	// the fallback (opencost_container_cpu_cost_total) succeeds.
+	//
+	// Simulated with a counter that errors the first time and succeeds the
+	// second. The test uses an HTTP handler that inspects the query string
+	// and returns accordingly.
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		q := r.URL.Query().Get("query")
+		switch {
+		case strings.Contains(q, "container_cpu_allocation"):
+			w.WriteHeader(http.StatusBadGateway)
+		case strings.Contains(q, "opencost_container_cpu_cost_total"):
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(vectorBody(map[string]float64{"checkout": 2.0})))
+		case strings.Contains(q, "container_memory_allocation_bytes"):
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(vectorBody(map[string]float64{"checkout": 1.0})))
+		default:
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+		}
+	}))
+	defer srv.Close()
+	client := prom.NewClient(prom.NewHTTPTransport(srv.URL, "", nil))
+
+	got := ComputeCostSummaryFromProm(context.Background(), client, SummaryOptions{})
+	if !got.Available {
+		t.Fatalf("expected Available=true with fallback metrics; %+v", got)
+	}
+	if len(got.Namespaces) != 1 || got.Namespaces[0].Name != "checkout" {
+		t.Errorf("unexpected namespaces: %+v", got.Namespaces)
+	}
+}
+
+func TestComputeCostSummary_RoundsValues(t *testing.T) {
+	client := scriptedProm(t, []scriptedCase{
+		{contains: "container_cpu_allocation", body: vectorBody(map[string]float64{"x": 1.123456789})},
+		{contains: "container_memory_allocation_bytes", body: vectorBody(map[string]float64{"x": 2.987654321})},
+	})
+	got := ComputeCostSummaryFromProm(context.Background(), client, SummaryOptions{})
+	if !got.Available {
+		t.Fatalf("summary unavailable: %+v", got)
+	}
+	nc := got.Namespaces[0]
+	if nc.CPUCost != 1.1235 {
+		t.Errorf("CPU rounding: got %v, want 1.1235", nc.CPUCost)
+	}
+	if nc.MemoryCost != 2.9877 {
+		t.Errorf("Memory rounding: got %v, want 2.9877", nc.MemoryCost)
+	}
+}
+
+func TestWindowHours(t *testing.T) {
+	cases := []struct {
+		in   string
+		want float64
+	}{
+		// Standard units
+		{"1h", 1},
+		{"24h", 24},
+		{"7d", 168},
+		{"1w", 168},
+		{"30d", 720},
+		// Decimal hours (rare but accepted)
+		{"1.5h", 1.5},
+		// Minutes — documented decision to treat lone "m" as minutes,
+		// not months. Pinned here so the windowHours("m") comment can't
+		// be quietly "fixed" to mean months.
+		{"5m", 5.0 / 60},
+		// Fallbacks: empty, missing unit, parse error, non-positive
+		{"", 1},
+		{"h", 1},
+		{"-5h", 1},
+		{"0h", 1},
+		{"abch", 1},
+		// Unknown unit
+		{"3y", 1},
+	}
+	for _, tc := range cases {
+		got := windowHours(tc.in)
+		if got != tc.want {
+			t.Errorf("windowHours(%q) = %v, want %v", tc.in, got, tc.want)
+		}
+	}
+}
diff --git a/pkg/opencost/nodes.go b/pkg/opencost/nodes.go
new file mode 100644
index 000000000..e3eda73d7
--- /dev/null
+++ b/pkg/opencost/nodes.go
@@ -0,0 +1,69 @@
+package opencost
+
+import (
+	"context"
+	"log"
+	"sort"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// ComputeNodeCosts returns per-node hourly cost breakdown sourced from the
+// OpenCost-exported Prometheus metrics (node_total_hourly_cost,
+// node_cpu_hourly_cost, node_ram_hourly_cost). Sorted descending by hourly
+// cost. Errors map to typed Reason values; never returned to callers because
+// the HTTP layer serves them in-band.
+func ComputeNodeCosts(ctx context.Context, client *prom.Client) *NodeCostResponse {
+	if client == nil {
+		return &NodeCostResponse{Available: false, Reason: ReasonNoPrometheus}
+	}
+
+	totalResult, err := client.Query(ctx, `node_total_hourly_cost`)
+	if err != nil {
+		log.Printf("[opencost] node_total_hourly_cost query failed: %v", err)
+		return &NodeCostResponse{Available: false, Reason: ReasonQueryError}
+	}
+	if len(totalResult.Series) == 0 {
+		return &NodeCostResponse{Available: false, Reason: ReasonNoMetrics}
+	}
+
+	cpuResult, cpuErr := client.Query(ctx, `node_cpu_hourly_cost`)
+	cpuMap := lastValuePerLabel(cpuResult, cpuErr, "node")
+	memResult, memErr := client.Query(ctx, `node_ram_hourly_cost`)
+	memMap := lastValuePerLabel(memResult, memErr, "node")
+
+	nodes := make([]NodeCost, 0, len(totalResult.Series))
+	for _, s := range totalResult.Series {
+		node := s.Labels["node"]
+		if node == "" || len(s.DataPoints) == 0 {
+			continue
+		}
+		nodes = append(nodes, NodeCost{
+			Name:         node,
+			InstanceType: s.Labels["instance_type"],
+			Region:       s.Labels["region"],
+			HourlyCost:   roundTo(s.DataPoints[len(s.DataPoints)-1].Value, 4),
+			CPUCost:      roundTo(cpuMap[node], 4),
+			MemoryCost:   roundTo(memMap[node], 4),
+		})
+	}
+
+	sort.Slice(nodes, func(i, j int) bool { return nodes[i].HourlyCost > nodes[j].HourlyCost })
+
+	return &NodeCostResponse{Available: true, Nodes: nodes}
+}
+
+func lastValuePerLabel(result *prom.QueryResult, err error, label string) map[string]float64 {
+	out := make(map[string]float64)
+	if err != nil || result == nil {
+		return out
+	}
+	for _, s := range result.Series {
+		v := s.Labels[label]
+		if v == "" || len(s.DataPoints) == 0 {
+			continue
+		}
+		out[v] = s.DataPoints[len(s.DataPoints)-1].Value
+	}
+	return out
+}
diff --git a/pkg/opencost/rest_client.go b/pkg/opencost/rest_client.go
new file mode 100644
index 000000000..200759ec1
--- /dev/null
+++ b/pkg/opencost/rest_client.go
@@ -0,0 +1,145 @@
+package opencost
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/url"
+)
+
+// RESTClient talks to OpenCost's HTTP API via an injected Transport.
+//
+// Why this exists alongside the PromQL client: OpenCost computes cost
+// internally (combining Kubernetes allocation data with cloud pricing),
+// then exposes results two ways:
+//
+//  1. REST at /allocation, /assets, /cloudCost — this package's surface.
+//  2. Prometheus-format metrics at /metrics — requires a scrape config
+//     in a reachable Prometheus instance. Covered by pkg/prom.
+//
+// Many clusters have (1) working but (2) not wired up (Prometheus exists
+// but no scrape job for OpenCost's /metrics). REST works everywhere OpenCost
+// works, so it's the default compute path.
+type RESTClient struct {
+	t Transport
+}
+
+// NewRESTClient wraps the given Transport.
+func NewRESTClient(t Transport) *RESTClient {
+	return &RESTClient{t: t}
+}
+
+// AllocationOptions controls an /allocation query.
+type AllocationOptions struct {
+	// Window is a human-readable duration or a start/end range. Default "1h".
+	// Examples: "1h", "24h", "7d", "2024-01-01T00:00:00Z,2024-01-08T00:00:00Z"
+	Window string
+
+	// Aggregate controls how rows are grouped. Any value OpenCost supports:
+	// "namespace" (default), "controller", "pod", "container",
+	// "cluster", "label:<name>", etc.
+	Aggregate string
+
+	// Step controls time-bucketing. "1h", "1d", "1w". Empty => single bucket.
+	Step string
+
+	// IncludeIdle adds a synthetic __idle__ row representing unallocated
+	// node capacity. Usually "true" so the UI can surface idle cost.
+	IncludeIdle bool
+
+	// IncludeSharedCost includes shared/overhead costs in the result.
+	IncludeSharedCost bool
+
+	// Filter is a comma-separated OpenCost filter expression (v1.106+).
+	// Empty means no filter.
+	Filter string
+}
+
+func (o AllocationOptions) toQuery() url.Values {
+	q := url.Values{}
+	if o.Window != "" {
+		q.Set("window", o.Window)
+	} else {
+		q.Set("window", "1h")
+	}
+	if o.Aggregate != "" {
+		q.Set("aggregate", o.Aggregate)
+	}
+	if o.Step != "" {
+		q.Set("step", o.Step)
+	}
+	if o.IncludeIdle {
+		q.Set("includeIdle", "true")
+	}
+	if o.IncludeSharedCost {
+		q.Set("includeSharedCost", "true")
+	}
+	if o.Filter != "" {
+		q.Set("filter", o.Filter)
+	}
+	return q
+}
+
+// Allocation is the per-row allocation data OpenCost returns. Fields are
+// the subset of OpenCost's schema this package's compute path consumes;
+// full field list is in OpenCost's documentation.
+//
+// Costs are in the configured currency (USD by default) and sum to the
+// given window (not per-hour unless window=1h).
+type Allocation struct {
+	Name  string `json:"name"`
+	Start string `json:"start,omitempty"`
+	End   string `json:"end,omitempty"`
+
+	CPUCores              float64 `json:"cpuCores,omitempty"`
+	CPUCoreRequestAverage float64 `json:"cpuCoreRequestAverage,omitempty"`
+	CPUCoreUsageAverage   float64 `json:"cpuCoreUsageAverage,omitempty"`
+	CPUCost               float64 `json:"cpuCost,omitempty"`
+
+	RAMBytes              float64 `json:"ramBytes,omitempty"`
+	RAMByteRequestAverage float64 `json:"ramByteRequestAverage,omitempty"`
+	RAMByteUsageAverage   float64 `json:"ramByteUsageAverage,omitempty"`
+	RAMCost               float64 `json:"ramCost,omitempty"`
+
+	GPUCount float64 `json:"gpuCount,omitempty"`
+	GPUCost  float64 `json:"gpuCost,omitempty"`
+
+	PVCost           float64 `json:"pvCost,omitempty"`
+	NetworkCost      float64 `json:"networkCost,omitempty"`
+	LoadBalancerCost float64 `json:"loadBalancerCost,omitempty"`
+	SharedCost       float64 `json:"sharedCost,omitempty"`
+	ExternalCost     float64 `json:"externalCost,omitempty"`
+
+	TotalCost       float64 `json:"totalCost,omitempty"`
+	TotalEfficiency float64 `json:"totalEfficiency,omitempty"` // 0..1
+
+	// Properties holds arbitrary dimension values (namespace, cluster, labels…).
+	// Populated per OpenCost's response shape.
+	Properties map[string]interface{} `json:"properties,omitempty"`
+}
+
+// AllocationResponse is the envelope OpenCost returns from /allocation.
+// The `data` field is an array of time-window dicts: each dict maps an
+// aggregate row name (e.g. a namespace name, or "__idle__") → Allocation.
+type AllocationResponse struct {
+	Code    int                      `json:"code"`
+	Status  string                   `json:"status,omitempty"`
+	Data    []map[string]*Allocation `json:"data"`
+	Message string                   `json:"message,omitempty"`
+}
+
+// GetAllocation issues a GET /allocation call.
+func (c *RESTClient) GetAllocation(ctx context.Context, opts AllocationOptions) (*AllocationResponse, error) {
+	body, err := c.t.Do(ctx, "GET", "/allocation", opts.toQuery())
+	if err != nil {
+		return nil, fmt.Errorf("opencost.GetAllocation: %w", err)
+	}
+	var resp AllocationResponse
+	if err := json.Unmarshal(body, &resp); err != nil {
+		return nil, fmt.Errorf("opencost.GetAllocation: parse response from %s: %w", c.t.Address(), err)
+	}
+	if resp.Code != 0 && resp.Code != 200 {
+		return &resp, fmt.Errorf("opencost: HTTP %d: %s", resp.Code, resp.Message)
+	}
+	return &resp, nil
+}
diff --git a/pkg/opencost/transport.go b/pkg/opencost/transport.go
new file mode 100644
index 000000000..732136c97
--- /dev/null
+++ b/pkg/opencost/transport.go
@@ -0,0 +1,24 @@
+package opencost
+
+import (
+	"context"
+	"net/url"
+)
+
+// Transport is the HTTP transport used by RESTClient to reach OpenCost's
+// REST API. Same shape as pkg/prom.Transport (path + params in, body out)
+// so a single concrete type in a caller can satisfy both interfaces.
+//
+// Typical implementations: direct HTTP against a known URL (in-cluster or
+// kubectl port-forwarded), a tunneled proxy transport for callers that
+// can't reach the cluster directly, and an httptest server in unit tests.
+type Transport interface {
+	// Do issues a request to path (e.g. "/allocation") with query
+	// parameters and returns the raw response body. Non-2xx responses
+	// should be returned as errors so callers don't have to re-check.
+	Do(ctx context.Context, method, path string, params url.Values) ([]byte, error)
+
+	// Address returns a diagnostic identifier for this transport (the
+	// upstream URL, or a human-readable description).
+	Address() string
+}
diff --git a/pkg/opencost/trend.go b/pkg/opencost/trend.go
new file mode 100644
index 000000000..53d418b12
--- /dev/null
+++ b/pkg/opencost/trend.go
@@ -0,0 +1,205 @@
+package opencost
+
+import (
+	"context"
+	"log"
+	"sort"
+	"time"
+)
+
+// TrendOptions controls ComputeCostTrend.
+type TrendOptions struct {
+	// Window is the overall time range (e.g. "7d", "30d"). Defaults to "24h".
+	Window string
+
+	// Step is the bucket size inside the window. Defaults based on Window:
+	// 1h → 5m, 24h → 1h, 7d → 6h, 30d → 1d. If set, overrides the default.
+	Step string
+
+	// Aggregate controls how rows inside each bucket are grouped. Defaults
+	// to "namespace" so callers can produce both total and per-namespace
+	// series from the same response. Use "cluster" when only the fleet-total
+	// line is needed (cheaper for the backend + OpenCost).
+	Aggregate string
+}
+
+// ComputeCostTrend queries OpenCost's /allocation with a step parameter
+// and returns a bucketed cost trend. Each CostTrendSeries becomes one line
+// on the UI chart; a synthetic "__total__" series carries the cluster-level
+// sum so the default view doesn't have to re-sum on the client.
+//
+// Contract mirrors ComputeCostSummary:
+//   - REST unreachable / parse error → Available=false, Reason=ReasonQueryError.
+//   - OpenCost responds but has no buckets → Available=false,
+//     Reason=ReasonNoMetrics.
+//   - Otherwise Available=true with one CostTrendSeries per (aggregate row)
+//     — always including a "__total__" aggregate — ordered by bucket
+//     timestamp ascending.
+//
+// Each data point's Value is normalized to $/hr for the bucket (OpenCost's
+// per-bucket totalCost ÷ bucket duration), matching the hourly-rate
+// convention used throughout the Costs UI. The UI multiplies by 730 for
+// monthly projections or hours-in-period for retrospective totals.
+func ComputeCostTrend(ctx context.Context, client *RESTClient, opts TrendOptions) *CostTrendResponse {
+	window := opts.Window
+	if window == "" {
+		window = "24h"
+	}
+	aggregate := opts.Aggregate
+	if aggregate == "" {
+		aggregate = "namespace"
+	}
+	step := opts.Step
+	if step == "" {
+		step = defaultStep(window)
+	}
+
+	resp, err := client.GetAllocation(ctx, AllocationOptions{
+		Window:      window,
+		Aggregate:   aggregate,
+		Step:        step,
+		IncludeIdle: false, // idle is a summary concept; drop it here to keep the chart focused on spend
+	})
+	if err != nil {
+		log.Printf("[opencost] /allocation trend failed (window=%s step=%s): %v", window, step, err)
+		return &CostTrendResponse{Available: false, Reason: ReasonQueryError, Range: window}
+	}
+	if resp == nil || len(resp.Data) == 0 {
+		return &CostTrendResponse{Available: false, Reason: ReasonNoMetrics, Range: window}
+	}
+
+	bucketHours := windowHours(step)
+	skippedBuckets := 0
+
+	// Walk buckets in order. For each bucket, accumulate per-aggregate
+	// totals and the bucket timestamp (parsed from one row's Start, since
+	// every row in a bucket shares the same window).
+	seriesByName := make(map[string][]CostDataPoint)
+	totals := make([]CostDataPoint, 0, len(resp.Data))
+
+	for _, bucket := range resp.Data {
+		if len(bucket) == 0 {
+			continue
+		}
+		ts := bucketTimestamp(bucket)
+		if ts == 0 {
+			// No parseable Start on any row — skip rather than stamping all
+			// points at the Unix epoch, which would collapse the chart.
+			skippedBuckets++
+			continue
+		}
+		var bucketTotal float64
+		for name, a := range bucket {
+			if a == nil || name == "__idle__" {
+				continue
+			}
+			// Normalize to hourly rate for this bucket. OpenCost returns
+			// totalCost summed across the bucket; dividing by bucket
+			// duration (hours) gives the $/hr rate the UI consumes.
+			value := a.TotalCost / bucketHours
+			seriesByName[name] = append(seriesByName[name], CostDataPoint{
+				Timestamp: ts,
+				Value:     roundTo(value, 4),
+			})
+			bucketTotal += a.TotalCost
+		}
+		totals = append(totals, CostDataPoint{
+			Timestamp: ts,
+			Value:     roundTo(bucketTotal/bucketHours, 4),
+		})
+	}
+
+	if skippedBuckets > 0 {
+		log.Printf("[opencost] trend dropped %d bucket(s) with no parseable timestamp (window=%s step=%s)", skippedBuckets, window, step)
+	}
+
+	if len(totals) == 0 {
+		return &CostTrendResponse{Available: false, Reason: ReasonNoMetrics, Range: window}
+	}
+
+	// Assemble the response. Put __total__ first so the UI can find it
+	// without scanning, then per-namespace series sorted by peak spend
+	// (descending). Non-total series are sorted so the chart's default
+	// stacking shows the biggest spenders consistently across refreshes.
+	series := make([]CostTrendSeries, 0, len(seriesByName)+1)
+	series = append(series, CostTrendSeries{
+		Namespace:  "__total__",
+		DataPoints: sortByTimestamp(totals),
+	})
+
+	type namedSeries struct {
+		name   string
+		peak   float64
+		points []CostDataPoint
+	}
+	byPeak := make([]namedSeries, 0, len(seriesByName))
+	for name, pts := range seriesByName {
+		pts = sortByTimestamp(pts)
+		peak := 0.0
+		for _, p := range pts {
+			if p.Value > peak {
+				peak = p.Value
+			}
+		}
+		byPeak = append(byPeak, namedSeries{name: name, peak: peak, points: pts})
+	}
+	sort.Slice(byPeak, func(i, j int) bool { return byPeak[i].peak > byPeak[j].peak })
+	for _, s := range byPeak {
+		series = append(series, CostTrendSeries{
+			Namespace:  s.name,
+			DataPoints: s.points,
+		})
+	}
+
+	return &CostTrendResponse{
+		Available: true,
+		Range:     window,
+		Series:    series,
+	}
+}
+
+// defaultStep picks a sensible bucket size for a window. We bias toward
+// fewer, coarser buckets than a typical charting library would because
+// OpenCost's /allocation with step= scales roughly with bucket count —
+// a 24h query at 1h step takes ~30s on a test cluster vs ~3s at 6h step.
+// Callers behind short request deadlines need the response well under
+// that budget.
+//
+// Bucket counts we target: 1h → 12, 24h → 4, 7d → 7, 30d → 15.
+func defaultStep(window string) string {
+	hours := windowHours(window)
+	switch {
+	case hours <= 1:
+		return "5m"
+	case hours <= 24:
+		return "6h"
+	case hours <= 24*7:
+		return "1d"
+	default:
+		return "2d"
+	}
+}
+
+// bucketTimestamp returns a Unix-seconds timestamp derived from the first
+// allocation row in the bucket (each row in a bucket shares the same
+// window, so any row is representative). Seconds because the PromQL trend
+// path emits seconds, and both paths feed the same CostDataPoint.Timestamp
+// field — the UI assumes seconds at the render layer.
+func bucketTimestamp(bucket map[string]*Allocation) int64 {
+	for _, a := range bucket {
+		if a == nil {
+			continue
+		}
+		if a.Start != "" {
+			if t, err := time.Parse(time.RFC3339, a.Start); err == nil {
+				return t.Unix()
+			}
+		}
+	}
+	return 0
+}
+
+func sortByTimestamp(pts []CostDataPoint) []CostDataPoint {
+	sort.Slice(pts, func(i, j int) bool { return pts[i].Timestamp < pts[j].Timestamp })
+	return pts
+}
diff --git a/pkg/opencost/trend_prom.go b/pkg/opencost/trend_prom.go
new file mode 100644
index 000000000..f9f9a727e
--- /dev/null
+++ b/pkg/opencost/trend_prom.go
@@ -0,0 +1,125 @@
+package opencost
+
+import (
+	"context"
+	"log"
+	"sort"
+	"time"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// TrendPromOptions controls ComputeCostTrendFromProm.
+type TrendPromOptions struct {
+	// Range is "6h", "24h", "7d" (default "24h"). Drives the start/end and
+	// step of the underlying range query and is echoed on the response.
+	Range string
+
+	// MaxSeries is the top-N namespaces kept; the rest are aggregated into
+	// a single "other" series. Defaults to 8 when zero.
+	MaxSeries int
+}
+
+// ComputeCostTrendFromProm returns a stacked per-namespace cost trend from
+// OpenCost-exported Prometheus metrics. The top MaxSeries namespaces by
+// latest cost are returned as individual series; the remainder is collapsed
+// into a single "other" series.
+//
+// Contract mirrors ComputeCostSummaryFromProm:
+//   - Underlying range query fails → Available=false, Reason=ReasonQueryError.
+//   - No series returned → Available=false, Reason=ReasonNoMetrics.
+func ComputeCostTrendFromProm(ctx context.Context, client *prom.Client, opts TrendPromOptions) *CostTrendResponse {
+	if client == nil {
+		return &CostTrendResponse{Available: false, Reason: ReasonNoPrometheus}
+	}
+
+	start, end, step, label := resolveTrendRange(opts.Range)
+	maxSeries := opts.MaxSeries
+	if maxSeries <= 0 {
+		maxSeries = 8
+	}
+
+	const query = `sum by (namespace) (
+  label_replace(avg_over_time(container_cpu_allocation{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") * on(node) group_left() node_cpu_hourly_cost
+) + sum by (namespace) (
+  label_replace(avg_over_time(container_memory_allocation_bytes{namespace!=""}[1h]), "namespace", "$1", "exported_namespace", "(.+)") / 1073741824 * on(node) group_left() node_ram_hourly_cost
+)`
+
+	result, err := client.QueryRange(ctx, query, start, end, step)
+	if err != nil {
+		log.Printf("[opencost] PromQL trend range query failed (range=%s): %v", label, err)
+		return &CostTrendResponse{Available: false, Reason: ReasonQueryError}
+	}
+	if len(result.Series) == 0 {
+		return &CostTrendResponse{Available: false, Reason: ReasonNoMetrics}
+	}
+
+	type nsRank struct {
+		ns       string
+		lastCost float64
+		idx      int
+	}
+	ranks := make([]nsRank, 0, len(result.Series))
+	for i, s := range result.Series {
+		ns := s.Labels["namespace"]
+		if ns == "" {
+			continue
+		}
+		var last float64
+		if len(s.DataPoints) > 0 {
+			last = s.DataPoints[len(s.DataPoints)-1].Value
+		}
+		ranks = append(ranks, nsRank{ns: ns, lastCost: last, idx: i})
+	}
+	sort.Slice(ranks, func(i, j int) bool { return ranks[i].lastCost > ranks[j].lastCost })
+
+	topSet := make(map[int]bool, maxSeries)
+	series := make([]CostTrendSeries, 0, maxSeries+1)
+	for i, r := range ranks {
+		if i >= maxSeries {
+			break
+		}
+		topSet[r.idx] = true
+		s := result.Series[r.idx]
+		dps := make([]CostDataPoint, 0, len(s.DataPoints))
+		for _, dp := range s.DataPoints {
+			dps = append(dps, CostDataPoint{Timestamp: dp.Timestamp, Value: roundTo(dp.Value, 4)})
+		}
+		series = append(series, CostTrendSeries{Namespace: r.ns, DataPoints: dps})
+	}
+
+	if len(ranks) > maxSeries {
+		otherMap := make(map[int64]float64)
+		for i, s := range result.Series {
+			if topSet[i] {
+				continue
+			}
+			for _, dp := range s.DataPoints {
+				otherMap[dp.Timestamp] += dp.Value
+			}
+		}
+		if len(otherMap) > 0 {
+			dps := make([]CostDataPoint, 0, len(otherMap))
+			for ts, val := range otherMap {
+				dps = append(dps, CostDataPoint{Timestamp: ts, Value: roundTo(val, 4)})
+			}
+			sort.Slice(dps, func(i, j int) bool { return dps[i].Timestamp < dps[j].Timestamp })
+			series = append(series, CostTrendSeries{Namespace: "other", DataPoints: dps})
+		}
+	}
+
+	return &CostTrendResponse{Available: true, Range: label, Series: series}
+}
+
+// resolveTrendRange returns the start/end/step/label for the named Range.
+func resolveTrendRange(rangeStr string) (start, end time.Time, step time.Duration, label string) {
+	end = time.Now()
+	switch rangeStr {
+	case "6h":
+		return end.Add(-6 * time.Hour), end, 15 * time.Minute, "6h"
+	case "7d":
+		return end.Add(-7 * 24 * time.Hour), end, 6 * time.Hour, "7d"
+	default:
+		return end.Add(-24 * time.Hour), end, time.Hour, "24h"
+	}
+}
diff --git a/pkg/opencost/trend_prom_test.go b/pkg/opencost/trend_prom_test.go
new file mode 100644
index 000000000..d1915965e
--- /dev/null
+++ b/pkg/opencost/trend_prom_test.go
@@ -0,0 +1,183 @@
+package opencost
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// matrixBody builds a Prometheus range-query (matrix) response for the
+// given per-namespace series. Each series gets the same set of (ts, value)
+// data points, with the last point used as the ranking value.
+func matrixBody(series []namespaceSeries) string {
+	type point = []interface{}
+	type entry struct {
+		Metric map[string]string `json:"metric"`
+		Values []point           `json:"values"`
+	}
+	body := struct {
+		Status string `json:"status"`
+		Data   struct {
+			ResultType string  `json:"resultType"`
+			Result     []entry `json:"result"`
+		} `json:"data"`
+	}{Status: "success"}
+	body.Data.ResultType = "matrix"
+	for _, s := range series {
+		values := make([]point, 0, len(s.points))
+		for _, p := range s.points {
+			values = append(values, point{float64(p.ts), formatFloat(p.v)})
+		}
+		body.Data.Result = append(body.Data.Result, entry{
+			Metric: map[string]string{"namespace": s.ns},
+			Values: values,
+		})
+	}
+	b, _ := json.Marshal(body)
+	return string(b)
+}
+
+type namespaceSeries struct {
+	ns     string
+	points []dpoint
+}
+type dpoint struct {
+	ts int64
+	v  float64
+}
+
+func rangeProm(t *testing.T, body string) *prom.Client {
+	t.Helper()
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(body))
+	}))
+	t.Cleanup(srv.Close)
+	return prom.NewClient(prom.NewHTTPTransport(srv.URL, "", nil))
+}
+
+func TestComputeCostTrendFromProm_TopNAndOther(t *testing.T) {
+	// 5 namespaces, ranked by latest value: a=10, b=8, c=5, d=3, e=1.
+	// MaxSeries=2 → top two (a, b) returned individually; c/d/e collapsed
+	// into a single "other" series with summed per-timestamp values.
+	client := rangeProm(t, matrixBody([]namespaceSeries{
+		{"a", []dpoint{{1700000000, 9}, {1700003600, 10}}},
+		{"b", []dpoint{{1700000000, 7}, {1700003600, 8}}},
+		{"c", []dpoint{{1700000000, 4}, {1700003600, 5}}},
+		{"d", []dpoint{{1700000000, 2}, {1700003600, 3}}},
+		{"e", []dpoint{{1700000000, 1}, {1700003600, 1}}},
+	}))
+
+	got := ComputeCostTrendFromProm(context.Background(), client, TrendPromOptions{
+		Range:     "24h",
+		MaxSeries: 2,
+	})
+	if !got.Available {
+		t.Fatalf("expected Available=true, got %+v", got)
+	}
+	if got.Range != "24h" {
+		t.Errorf("Range: got %q, want %q", got.Range, "24h")
+	}
+	if len(got.Series) != 3 {
+		t.Fatalf("expected 3 series (2 top + other), got %d: %v", len(got.Series), namesOf(got.Series))
+	}
+
+	// First two series are top-N by last value.
+	if got.Series[0].Namespace != "a" || got.Series[1].Namespace != "b" {
+		t.Errorf("top series: got [%s, %s], want [a, b]", got.Series[0].Namespace, got.Series[1].Namespace)
+	}
+
+	// Third series is "other" — c+d+e summed per timestamp.
+	other := got.Series[2]
+	if other.Namespace != "other" {
+		t.Errorf("third series namespace: got %q, want %q", other.Namespace, "other")
+	}
+	if len(other.DataPoints) != 2 {
+		t.Fatalf("other should have 2 points, got %d", len(other.DataPoints))
+	}
+	// Points are sorted by Timestamp ascending.
+	if other.DataPoints[0].Timestamp != 1700000000 {
+		t.Errorf("other[0].Timestamp: got %d", other.DataPoints[0].Timestamp)
+	}
+	// c+d+e at ts=1700000000 = 4+2+1 = 7
+	if other.DataPoints[0].Value != 7 {
+		t.Errorf("other[0].Value: got %v, want 7", other.DataPoints[0].Value)
+	}
+	// c+d+e at ts=1700003600 = 5+3+1 = 9
+	if other.DataPoints[1].Value != 9 {
+		t.Errorf("other[1].Value: got %v, want 9", other.DataPoints[1].Value)
+	}
+}
+
+func TestComputeCostTrendFromProm_AllUnderMaxSeriesNoOther(t *testing.T) {
+	// 2 namespaces, MaxSeries=8 → no "other" series.
+	client := rangeProm(t, matrixBody([]namespaceSeries{
+		{"a", []dpoint{{1700000000, 1}}},
+		{"b", []dpoint{{1700000000, 2}}},
+	}))
+	got := ComputeCostTrendFromProm(context.Background(), client, TrendPromOptions{Range: "24h"})
+	if !got.Available {
+		t.Fatalf("expected Available=true, got %+v", got)
+	}
+	if len(got.Series) != 2 {
+		t.Errorf("expected 2 series (no 'other'), got %d: %v", len(got.Series), namesOf(got.Series))
+	}
+	for _, s := range got.Series {
+		if s.Namespace == "other" {
+			t.Errorf("unexpected 'other' series with %d points: %+v", len(s.DataPoints), s.DataPoints)
+		}
+	}
+}
+
+func TestComputeCostTrendFromProm_EmptyNamespaceLabelSkipped(t *testing.T) {
+	// A series with no namespace label must not appear in the output (it
+	// can't be ranked or attributed). The implementation skips it during
+	// the rank pass.
+	client := rangeProm(t, matrixBody([]namespaceSeries{
+		{"", []dpoint{{1700000000, 99}}}, // would be top by value, but unnamed
+		{"a", []dpoint{{1700000000, 1}}},
+	}))
+	got := ComputeCostTrendFromProm(context.Background(), client, TrendPromOptions{Range: "24h"})
+	if !got.Available {
+		t.Fatalf("expected Available=true, got %+v", got)
+	}
+	for _, s := range got.Series {
+		if s.Namespace == "" {
+			t.Errorf("unexpected empty-namespace series in output: %+v", s)
+		}
+	}
+}
+
+func TestComputeCostTrendFromProm_NilClient(t *testing.T) {
+	got := ComputeCostTrendFromProm(context.Background(), nil, TrendPromOptions{Range: "24h"})
+	if got.Available {
+		t.Errorf("expected Available=false with nil client")
+	}
+	if got.Reason != ReasonNoPrometheus {
+		t.Errorf("Reason: got %q, want %q", got.Reason, ReasonNoPrometheus)
+	}
+}
+
+func TestComputeCostTrendFromProm_NoSeries(t *testing.T) {
+	emptyBody := `{"status":"success","data":{"resultType":"matrix","result":[]}}`
+	client := rangeProm(t, emptyBody)
+	got := ComputeCostTrendFromProm(context.Background(), client, TrendPromOptions{Range: "24h"})
+	if got.Available {
+		t.Errorf("expected Available=false on no series")
+	}
+	if got.Reason != ReasonNoMetrics {
+		t.Errorf("Reason: got %q, want %q", got.Reason, ReasonNoMetrics)
+	}
+}
+
+func namesOf(series []CostTrendSeries) []string {
+	out := make([]string, len(series))
+	for i, s := range series {
+		out[i] = s.Namespace
+	}
+	return out
+}
diff --git a/internal/opencost/types.go b/pkg/opencost/types.go
similarity index 88%
rename from internal/opencost/types.go
rename to pkg/opencost/types.go
index c977a53f8..8c8d22b61 100644
--- a/internal/opencost/types.go
+++ b/pkg/opencost/types.go
@@ -16,18 +16,24 @@ type CostSummary struct {
 	Window            string          `json:"window,omitempty"`
 	TotalHourlyCost   float64         `json:"totalHourlyCost,omitempty"`
 	TotalStorageCost  float64         `json:"totalStorageCost,omitempty"`
+	TotalNetworkCost  float64         `json:"totalNetworkCost,omitempty"`
 	TotalIdleCost     float64         `json:"totalIdleCost,omitempty"`
 	ClusterEfficiency float64         `json:"clusterEfficiency,omitempty"` // 0-100
 	Namespaces        []NamespaceCost `json:"namespaces,omitempty"`
 }
 
-// NamespaceCost holds per-namespace cost breakdown.
+// NamespaceCost holds per-row cost breakdown. The name reflects the
+// default aggregation; the struct is also used for controller and pod
+// rows — Kind disambiguates (empty = namespace).
 type NamespaceCost struct {
 	Name            string  `json:"name"`
+	Kind            string  `json:"kind,omitempty"` // "namespace" (default if empty) | "controller" | "pod"
+	Namespace       string  `json:"namespace,omitempty"` // populated for controller/pod rows
 	HourlyCost      float64 `json:"hourlyCost"`
 	CPUCost         float64 `json:"cpuCost"`
 	MemoryCost      float64 `json:"memoryCost"`
 	StorageCost     float64 `json:"storageCost,omitempty"`
+	NetworkCost     float64 `json:"networkCost,omitempty"`
 	CPUUsageCost    float64 `json:"cpuUsageCost,omitempty"`
 	MemoryUsageCost float64 `json:"memoryUsageCost,omitempty"`
 	Efficiency      float64 `json:"efficiency,omitempty"` // 0-100
diff --git a/pkg/opencost/workloads.go b/pkg/opencost/workloads.go
new file mode 100644
index 000000000..e94cee8fe
--- /dev/null
+++ b/pkg/opencost/workloads.go
@@ -0,0 +1,175 @@
+package opencost
+
+import (
+	"context"
+	"log"
+	"sort"
+	"strings"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// WorkloadOwner identifies a workload by name and kind.
+type WorkloadOwner struct {
+	Name string
+	Kind string
+}
+
+// PodOwnerLookup returns the workload owner for a pod name in a given
+// namespace, or (false) if the lookup cannot resolve it. Callers with an
+// in-process K8s informer cache supply this directly; callers without can
+// satisfy it from any other pod-metadata source. Keeping the dependency
+// abstract here keeps pkg/opencost free of k8s.io/client-go.
+type PodOwnerLookup func(podName string) (WorkloadOwner, bool)
+
+// ComputeWorkloadsFromProm returns workload-level cost breakdown for a
+// namespace, sourced from OpenCost-exported Prometheus metrics with a
+// caller-supplied pod→owner mapping (typically from a K8s informer cache).
+//
+// When ownerLookup is nil or can't resolve a pod, the pod is assigned to a
+// fallback "standalone" workload whose name is the pod name with its hash
+// suffixes stripped — best-effort grouping for orphan pods.
+func ComputeWorkloadsFromProm(ctx context.Context, client *prom.Client, namespace string, ownerLookup PodOwnerLookup) *WorkloadCostResponse {
+	if client == nil {
+		return &WorkloadCostResponse{Namespace: namespace, Available: false, Reason: ReasonNoPrometheus}
+	}
+	if namespace == "" {
+		return &WorkloadCostResponse{Available: false, Reason: ReasonQueryError}
+	}
+
+	safeNS := prom.SanitizeLabelValue(namespace)
+
+	cpuResult, err := client.Query(ctx,
+		`sum by (pod) ((avg_over_time(container_cpu_allocation{exported_namespace="`+safeNS+`"}[1h]) or avg_over_time(container_cpu_allocation{namespace="`+safeNS+`", exported_namespace=""}[1h])) * on(node) group_left() node_cpu_hourly_cost)`)
+	if err != nil {
+		log.Printf("[opencost] workloads CPU query failed for ns=%q, trying opencost_container_cpu_cost_total: %v", namespace, err)
+		cpuResult, err = client.Query(ctx,
+			`sum by (pod) (rate(opencost_container_cpu_cost_total{exported_namespace="`+safeNS+`"}[1h]) or rate(opencost_container_cpu_cost_total{namespace="`+safeNS+`", exported_namespace=""}[1h]))`)
+		if err != nil {
+			log.Printf("[opencost] workloads CPU fallback query also failed for ns=%q: %v", namespace, err)
+			return &WorkloadCostResponse{Namespace: namespace, Available: false, Reason: ReasonQueryError}
+		}
+	}
+
+	memResult, err := client.Query(ctx,
+		`sum by (pod) ((avg_over_time(container_memory_allocation_bytes{exported_namespace="`+safeNS+`"}[1h]) or avg_over_time(container_memory_allocation_bytes{namespace="`+safeNS+`", exported_namespace=""}[1h])) / 1073741824 * on(node) group_left() node_ram_hourly_cost)`)
+	if err != nil {
+		log.Printf("[opencost] workloads memory query failed for ns=%q, trying opencost_container_memory_cost_total: %v", namespace, err)
+		memResult, err = client.Query(ctx,
+			`sum by (pod) (rate(opencost_container_memory_cost_total{exported_namespace="`+safeNS+`"}[1h]) or rate(opencost_container_memory_cost_total{namespace="`+safeNS+`", exported_namespace=""}[1h]))`)
+		if err != nil {
+			log.Printf("[opencost] workloads memory fallback query also failed for ns=%q: %v", namespace, err)
+			return &WorkloadCostResponse{Namespace: namespace, Available: false, Reason: ReasonQueryError}
+		}
+	}
+
+	cpuUsageResult, cpuUsageErr := client.Query(ctx,
+		`sum by (pod) (label_replace(rate(container_cpu_usage_seconds_total{container!="", namespace="`+safeNS+`"}[1h]), "node", "$1", "instance", "(.+?)(?::\\d+)?$") * on(node) group_left() node_cpu_hourly_cost)`)
+	if cpuUsageErr != nil {
+		log.Printf("[opencost] workloads CPU usage query failed for ns=%q (efficiency will be 0): %v", namespace, cpuUsageErr)
+	}
+	memUsageResult, memUsageErr := client.Query(ctx,
+		`sum by (pod) (label_replace(container_memory_working_set_bytes{container!="", namespace="`+safeNS+`"}, "node", "$1", "instance", "(.+?)(?::\\d+)?$") / 1073741824 * on(node) group_left() node_ram_hourly_cost)`)
+	if memUsageErr != nil {
+		log.Printf("[opencost] workloads memory usage query failed for ns=%q (efficiency will be 0): %v", namespace, memUsageErr)
+	}
+
+	if len(cpuResult.Series) == 0 && len(memResult.Series) == 0 {
+		// Queries succeeded but returned nothing — either the namespace has
+		// no scraped pods or OpenCost metrics aren't present. Surface the
+		// typed reason so the UI can render contextual guidance rather than
+		// an empty list.
+		return &WorkloadCostResponse{Namespace: namespace, Available: false, Reason: ReasonNoMetrics}
+	}
+
+	podCPUUsage := lastValuePerLabel(cpuUsageResult, cpuUsageErr, "pod")
+	podMemUsage := lastValuePerLabel(memUsageResult, memUsageErr, "pod")
+
+	type podCost struct {
+		cpuCost, memoryCost, cpuUsage, memoryUsage float64
+	}
+	podCosts := make(map[string]*podCost)
+	setPodLast := func(result *prom.QueryResult, set func(*podCost, float64)) {
+		if result == nil {
+			return
+		}
+		for _, s := range result.Series {
+			pod := s.Labels["pod"]
+			if pod == "" || len(s.DataPoints) == 0 {
+				continue
+			}
+			pc, ok := podCosts[pod]
+			if !ok {
+				pc = &podCost{}
+				podCosts[pod] = pc
+			}
+			set(pc, s.DataPoints[len(s.DataPoints)-1].Value)
+		}
+	}
+	setPodLast(cpuResult, func(pc *podCost, v float64) { pc.cpuCost = v })
+	setPodLast(memResult, func(pc *podCost, v float64) { pc.memoryCost = v })
+	for pod, pc := range podCosts {
+		pc.cpuUsage = podCPUUsage[pod]
+		pc.memoryUsage = podMemUsage[pod]
+	}
+
+	workloadMap := make(map[WorkloadOwner]*WorkloadCost)
+	for podName, pc := range podCosts {
+		owner, ok := WorkloadOwner{}, false
+		if ownerLookup != nil {
+			owner, ok = ownerLookup(podName)
+		}
+		if !ok {
+			owner = WorkloadOwner{Name: stripPodSuffix(podName), Kind: "standalone"}
+		}
+
+		wl, exists := workloadMap[owner]
+		if !exists {
+			wl = &WorkloadCost{Name: owner.Name, Kind: owner.Kind}
+			workloadMap[owner] = wl
+		}
+		wl.CPUCost += pc.cpuCost
+		wl.MemoryCost += pc.memoryCost
+		wl.CPUUsageCost += pc.cpuUsage
+		wl.MemoryUsageCost += pc.memoryUsage
+		wl.Replicas++
+	}
+
+	workloads := make([]WorkloadCost, 0, len(workloadMap))
+	for _, wl := range workloadMap {
+		allocCost := wl.CPUCost + wl.MemoryCost
+		usageCost := wl.CPUUsageCost + wl.MemoryUsageCost
+		wl.HourlyCost = allocCost
+		wl.Efficiency = efficiencyPct(usageCost, allocCost)
+		wl.IdleCost = idleFromUsage(usageCost, allocCost)
+		wl.HourlyCost = roundTo(wl.HourlyCost, 4)
+		wl.CPUCost = roundTo(wl.CPUCost, 4)
+		wl.MemoryCost = roundTo(wl.MemoryCost, 4)
+		wl.CPUUsageCost = roundTo(wl.CPUUsageCost, 4)
+		wl.MemoryUsageCost = roundTo(wl.MemoryUsageCost, 4)
+		wl.IdleCost = roundTo(wl.IdleCost, 4)
+		workloads = append(workloads, *wl)
+	}
+	sort.Slice(workloads, func(i, j int) bool { return workloads[i].HourlyCost > workloads[j].HourlyCost })
+
+	return &WorkloadCostResponse{
+		Available: true,
+		Namespace: namespace,
+		Workloads: workloads,
+	}
+}
+
+// stripPodSuffix removes pod hash suffixes to approximate the workload name
+// when owner-ref lookup fails. e.g. "myapp-7f8d9c-xyz12" → "myapp".
+func stripPodSuffix(name string) string {
+	idx := strings.LastIndex(name, "-")
+	if idx <= 0 {
+		return name
+	}
+	name = name[:idx]
+	idx = strings.LastIndex(name, "-")
+	if idx <= 0 {
+		return name
+	}
+	return name[:idx]
+}
diff --git a/pkg/opencost/workloads_test.go b/pkg/opencost/workloads_test.go
new file mode 100644
index 000000000..79c317a58
--- /dev/null
+++ b/pkg/opencost/workloads_test.go
@@ -0,0 +1,189 @@
+package opencost
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/skyhook-io/radar/pkg/prom"
+)
+
+// podVectorBody builds a PromQL vector response where each result row has
+// only a `pod` label — matching what `sum by (pod) (...)` queries return.
+func podVectorBody(samples map[string]float64) string {
+	type result struct {
+		Metric map[string]string `json:"metric"`
+		Value  []interface{}     `json:"value"`
+	}
+	body := struct {
+		Status string `json:"status"`
+		Data   struct {
+			ResultType string   `json:"resultType"`
+			Result     []result `json:"result"`
+		} `json:"data"`
+	}{Status: "success"}
+	body.Data.ResultType = "vector"
+	for pod, v := range samples {
+		body.Data.Result = append(body.Data.Result, result{
+			Metric: map[string]string{"pod": pod},
+			Value:  []interface{}{1700000000.0, formatFloat(v)},
+		})
+	}
+	b, _ := json.Marshal(body)
+	return string(b)
+}
+
+// workloadsProm returns a prom.Client where every PromQL query returns the
+// same canned pod-keyed body.
+func workloadsProm(t *testing.T, body string) *prom.Client {
+	t.Helper()
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(body))
+	}))
+	t.Cleanup(srv.Close)
+	return prom.NewClient(prom.NewHTTPTransport(srv.URL, "", nil))
+}
+
+func TestComputeWorkloads_OwnerLookupResolves(t *testing.T) {
+	// Three pods reported by PromQL; ownerLookup resolves all three to two
+	// distinct workloads. Replicas should be 2 + 1, not 3 standalone rows.
+	// worker pod cost (5.0) > sum of api pods (1.0 + 1.0 = 2.0) so sort
+	// is deterministic. The same vector body is returned for all four
+	// queries (CPU alloc, mem alloc, CPU usage, mem usage), so the
+	// per-pod HourlyCost is 2× the input value (cpu + mem).
+	client := workloadsProm(t, podVectorBody(map[string]float64{
+		"api-7f8d9c-xyz12":  1.0,
+		"api-7f8d9c-abc34":  1.0,
+		"worker-deadbeef01": 5.0,
+	}))
+	lookup := func(pod string) (WorkloadOwner, bool) {
+		switch pod {
+		case "api-7f8d9c-xyz12", "api-7f8d9c-abc34":
+			return WorkloadOwner{Name: "api", Kind: "Deployment"}, true
+		case "worker-deadbeef01":
+			return WorkloadOwner{Name: "worker", Kind: "Job"}, true
+		}
+		return WorkloadOwner{}, false
+	}
+	got := ComputeWorkloadsFromProm(context.Background(), client, "default", lookup)
+	if !got.Available {
+		t.Fatalf("expected Available=true, got %+v", got)
+	}
+	if len(got.Workloads) != 2 {
+		t.Fatalf("expected 2 workloads, got %d: %+v", len(got.Workloads), got.Workloads)
+	}
+	// workloads are sorted descending by HourlyCost; worker (2.0 + 2.0 mem
+	// from the same query body) comes first.
+	if got.Workloads[0].Name != "worker" || got.Workloads[0].Kind != "Job" {
+		t.Errorf("first workload: got %s/%s, want worker/Job", got.Workloads[0].Name, got.Workloads[0].Kind)
+	}
+	if got.Workloads[0].Replicas != 1 {
+		t.Errorf("worker replicas: got %d, want 1", got.Workloads[0].Replicas)
+	}
+	if got.Workloads[1].Name != "api" || got.Workloads[1].Kind != "Deployment" {
+		t.Errorf("second workload: got %s/%s, want api/Deployment", got.Workloads[1].Name, got.Workloads[1].Kind)
+	}
+	if got.Workloads[1].Replicas != 2 {
+		t.Errorf("api replicas: got %d, want 2", got.Workloads[1].Replicas)
+	}
+}
+
+func TestComputeWorkloads_OwnerLookupNilFallsBackToPodSuffixStrip(t *testing.T) {
+	// nil lookup → every pod falls through to stripPodSuffix; kind="standalone".
+	client := workloadsProm(t, podVectorBody(map[string]float64{
+		"api-7f8d9c-xyz12": 1.0,
+	}))
+	got := ComputeWorkloadsFromProm(context.Background(), client, "default", nil)
+	if !got.Available {
+		t.Fatalf("expected Available=true, got %+v", got)
+	}
+	if len(got.Workloads) != 1 {
+		t.Fatalf("expected 1 workload, got %d", len(got.Workloads))
+	}
+	if got.Workloads[0].Name != "api" || got.Workloads[0].Kind != "standalone" {
+		t.Errorf("got %s/%s, want api/standalone", got.Workloads[0].Name, got.Workloads[0].Kind)
+	}
+}
+
+func TestComputeWorkloads_OwnerLookupUnresolvedPodFallsBack(t *testing.T) {
+	// Lookup resolves one pod, returns false for the other — false case must
+	// still produce a row (with the stripPodSuffix-derived name) rather than
+	// silently dropping the pod.
+	client := workloadsProm(t, podVectorBody(map[string]float64{
+		"api-7f8d9c-xyz12":   1.0,
+		"orphan-pod-abc-123": 1.0,
+	}))
+	lookup := func(pod string) (WorkloadOwner, bool) {
+		if pod == "api-7f8d9c-xyz12" {
+			return WorkloadOwner{Name: "api", Kind: "Deployment"}, true
+		}
+		return WorkloadOwner{}, false
+	}
+	got := ComputeWorkloadsFromProm(context.Background(), client, "default", lookup)
+	if !got.Available {
+		t.Fatalf("expected Available=true, got %+v", got)
+	}
+	if len(got.Workloads) != 2 {
+		t.Fatalf("expected 2 workloads, got %d: %+v", len(got.Workloads), got.Workloads)
+	}
+	// Find the orphan — should have kind="standalone" and stripped name.
+	var orphan *WorkloadCost
+	for i := range got.Workloads {
+		if got.Workloads[i].Kind == "standalone" {
+			orphan = &got.Workloads[i]
+			break
+		}
+	}
+	if orphan == nil {
+		t.Fatalf("no standalone workload found in %+v", got.Workloads)
+	}
+	if orphan.Name != "orphan-pod" {
+		// stripPodSuffix strips two trailing -suffixes: orphan-pod-abc-123 → orphan-pod
+		t.Errorf("orphan name: got %q, want %q", orphan.Name, "orphan-pod")
+	}
+}
+
+func TestComputeWorkloads_EmptyResultReturnsNoMetricsReason(t *testing.T) {
+	// Queries succeed but return zero series — should surface ReasonNoMetrics
+	// (not Available=true with empty workloads list).
+	emptyBody := `{"status":"success","data":{"resultType":"vector","result":[]}}`
+	client := workloadsProm(t, emptyBody)
+	got := ComputeWorkloadsFromProm(context.Background(), client, "default", nil)
+	if got.Available {
+		t.Errorf("expected Available=false on empty results, got Available=true")
+	}
+	if got.Reason != ReasonNoMetrics {
+		t.Errorf("Reason: got %q, want %q", got.Reason, ReasonNoMetrics)
+	}
+}
+
+func TestComputeWorkloads_NilClient(t *testing.T) {
+	got := ComputeWorkloadsFromProm(context.Background(), nil, "default", nil)
+	if got.Available {
+		t.Errorf("expected Available=false with nil client")
+	}
+	if got.Reason != ReasonNoPrometheus {
+		t.Errorf("Reason: got %q, want %q", got.Reason, ReasonNoPrometheus)
+	}
+}
+
+func TestStripPodSuffix(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		{"myapp-7f8d9c-xyz12", "myapp"},        // deployment pod (rs-hash + pod-hash)
+		{"myapp-xyz12", "myapp"},               // single suffix (e.g. CronJob)
+		{"mywf-step-1-abc12-xyz", "mywf-step-1"}, // multi-segment workflow name
+		{"plain", "plain"},                     // no dashes
+		{"-leading", "-leading"},               // leading-dash edge case
+	}
+	for _, tc := range cases {
+		got := stripPodSuffix(tc.in)
+		if got != tc.want {
+			t.Errorf("stripPodSuffix(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}
diff --git a/pkg/prom/client.go b/pkg/prom/client.go
new file mode 100644
index 000000000..2ae91a06d
--- /dev/null
+++ b/pkg/prom/client.go
@@ -0,0 +1,172 @@
+package prom
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/url"
+	"strconv"
+	"time"
+)
+
+// Client is a Prometheus HTTP API client that delegates all network calls to
+// the injected Transport. The Client itself is stateless with respect to
+// discovery — callers are responsible for constructing an appropriate
+// Transport (direct HTTP, kubectl port-forward, or any other tunnel).
+type Client struct {
+	t Transport
+}
+
+// NewClient wraps the given Transport.
+func NewClient(t Transport) *Client {
+	return &Client{t: t}
+}
+
+// Query executes an instant PromQL query.
+func (c *Client) Query(ctx context.Context, promQL string) (*QueryResult, error) {
+	return c.issueQuery(ctx, "/api/v1/query", url.Values{"query": {promQL}})
+}
+
+// QueryRange executes a PromQL range query.
+func (c *Client) QueryRange(ctx context.Context, promQL string, start, end time.Time, step time.Duration) (*QueryResult, error) {
+	params := url.Values{
+		"query": {promQL},
+		"start": {strconv.FormatInt(start.Unix(), 10)},
+		"end":   {strconv.FormatInt(end.Unix(), 10)},
+		"step":  {fmt.Sprintf("%.0f", step.Seconds())},
+	}
+	return c.issueQuery(ctx, "/api/v1/query_range", params)
+}
+
+func (c *Client) issueQuery(ctx context.Context, path string, params url.Values) (*QueryResult, error) {
+	body, err := c.t.Do(ctx, "GET", path, params)
+	if err != nil {
+		return nil, err
+	}
+
+	var pr promResponse
+	if err := json.Unmarshal(body, &pr); err != nil {
+		return nil, fmt.Errorf("prom: parse response from %s: %w", c.t.Address(), err)
+	}
+	if pr.Status != "success" {
+		return nil, fmt.Errorf("prom: query error from %s: %s (%s)", c.t.Address(), pr.Error, pr.ErrorType)
+	}
+	return parseQueryResult(pr.Data)
+}
+
+// ProbeReason explains a Probe result. An empty string on true = ok.
+// On false, Reason indicates why discovery should skip this candidate.
+type ProbeReason string
+
+const (
+	ProbeReasonTransportError ProbeReason = "transport_error" // network/HTTP failure
+	ProbeReasonAuthError      ProbeReason = "auth_error"      // HTTP 401/403 — credentials rejected
+	ProbeReasonNotPrometheus  ProbeReason = "not_prometheus"  // 200 but response body isn't prom JSON (captive portal, login page)
+	ProbeReasonPromError      ProbeReason = "prom_error"      // prom responded with status=error
+	ProbeReasonEmptyInstance  ProbeReason = "empty_instance"  // prom responded success but zero "up" results
+)
+
+// Probe checks if a Prometheus endpoint is reachable and has at least one
+// active scrape target. Returns (ok, reason). When ok is true the reason is
+// empty; when ok is false the reason indicates why (callers may use this
+// for targeted logging — e.g., warn once per empty-instance discovery
+// skip).
+//
+// Uses a 3-second timeout regardless of the context deadline to fail fast.
+func (c *Client) Probe(ctx context.Context) (bool, ProbeReason) {
+	probeCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
+	defer cancel()
+
+	body, err := c.t.Do(probeCtx, "GET", "/api/v1/query", url.Values{"query": {"up"}})
+	if err != nil {
+		var httpErr *HTTPError
+		if errors.As(err, &httpErr) && (httpErr.StatusCode == 401 || httpErr.StatusCode == 403) {
+			return false, ProbeReasonAuthError
+		}
+		return false, ProbeReasonTransportError
+	}
+
+	var pr struct {
+		Status string `json:"status"`
+		Data   struct {
+			Result []json.RawMessage `json:"result"`
+		} `json:"data"`
+	}
+	if err := json.Unmarshal(body, &pr); err != nil {
+		return false, ProbeReasonNotPrometheus
+	}
+	if pr.Status != "success" {
+		return false, ProbeReasonPromError
+	}
+	if len(pr.Data.Result) == 0 {
+		return false, ProbeReasonEmptyInstance
+	}
+	return true, ""
+}
+
+func parseQueryResult(data json.RawMessage) (*QueryResult, error) {
+	var raw struct {
+		ResultType string `json:"resultType"`
+		Result     []struct {
+			Metric map[string]string `json:"metric"`
+			Values [][]interface{}   `json:"values"` // for matrix
+			Value  []interface{}     `json:"value"`  // for vector
+		} `json:"result"`
+	}
+
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return nil, fmt.Errorf("prom: parse result: %w", err)
+	}
+
+	result := &QueryResult{
+		ResultType: raw.ResultType,
+		Series:     make([]Series, 0, len(raw.Result)),
+	}
+
+	for _, r := range raw.Result {
+		series := Series{Labels: r.Metric}
+
+		switch raw.ResultType {
+		case "matrix":
+			series.DataPoints = make([]DataPoint, 0, len(r.Values))
+			for _, v := range r.Values {
+				if dp, ok := parseDataPoint(v); ok {
+					series.DataPoints = append(series.DataPoints, dp)
+				}
+			}
+		case "vector":
+			if r.Value != nil {
+				if dp, ok := parseDataPoint(r.Value); ok {
+					series.DataPoints = []DataPoint{dp}
+				}
+			}
+		}
+
+		result.Series = append(result.Series, series)
+	}
+
+	return result, nil
+}
+
+func parseDataPoint(v []interface{}) (DataPoint, bool) {
+	if len(v) != 2 {
+		return DataPoint{}, false
+	}
+
+	ts, ok := v[0].(float64)
+	if !ok {
+		return DataPoint{}, false
+	}
+
+	valStr, sok := v[1].(string)
+	if !sok {
+		return DataPoint{}, false
+	}
+	val, err := strconv.ParseFloat(valStr, 64)
+	if err != nil {
+		return DataPoint{}, false
+	}
+
+	return DataPoint{Timestamp: int64(ts), Value: val}, true
+}
diff --git a/pkg/prom/client_test.go b/pkg/prom/client_test.go
new file mode 100644
index 000000000..dd8333ca0
--- /dev/null
+++ b/pkg/prom/client_test.go
@@ -0,0 +1,177 @@
+package prom
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+)
+
+// fakeProm returns an HTTPTransport pointed at a test server with a scripted
+// response for /api/v1/query and /api/v1/query_range.
+func fakeProm(t *testing.T, handler http.HandlerFunc) *HTTPTransport {
+	t.Helper()
+	srv := httptest.NewServer(handler)
+	t.Cleanup(srv.Close)
+	return NewHTTPTransport(srv.URL, "", nil)
+}
+
+func TestClient_Query_ParsesVector(t *testing.T) {
+	body := `{
+	  "status":"success",
+	  "data":{
+	    "resultType":"vector",
+	    "result":[
+	      {"metric":{"namespace":"checkout"},"value":[1700000000, "42.5"]}
+	    ]
+	  }
+	}`
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		if !strings.HasSuffix(r.URL.Path, "/api/v1/query") {
+			t.Errorf("unexpected path %q", r.URL.Path)
+		}
+		if got := r.URL.Query().Get("query"); got != "up" {
+			t.Errorf("query param = %q, want up", got)
+		}
+		_, _ = w.Write([]byte(body))
+	})
+
+	c := NewClient(tr)
+	res, err := c.Query(context.Background(), "up")
+	if err != nil {
+		t.Fatalf("Query: %v", err)
+	}
+	if res.ResultType != "vector" || len(res.Series) != 1 {
+		t.Fatalf("bad result: %+v", res)
+	}
+	s := res.Series[0]
+	if s.Labels["namespace"] != "checkout" {
+		t.Errorf("label: %v", s.Labels)
+	}
+	if len(s.DataPoints) != 1 || s.DataPoints[0].Timestamp != 1700000000 || s.DataPoints[0].Value != 42.5 {
+		t.Errorf("datapoint: %+v", s.DataPoints)
+	}
+}
+
+func TestClient_QueryRange_ParsesMatrix(t *testing.T) {
+	body := `{
+	  "status":"success",
+	  "data":{
+	    "resultType":"matrix",
+	    "result":[
+	      {"metric":{"pod":"p1"},"values":[[1700000000,"1"],[1700000060,"2"]]}
+	    ]
+	  }
+	}`
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		if !strings.HasSuffix(r.URL.Path, "/api/v1/query_range") {
+			t.Errorf("unexpected path %q", r.URL.Path)
+		}
+		if r.URL.Query().Get("step") == "" {
+			t.Error("step missing")
+		}
+		_, _ = w.Write([]byte(body))
+	})
+
+	c := NewClient(tr)
+	res, err := c.QueryRange(context.Background(), `rate(x[1m])`,
+		time.Unix(1700000000, 0), time.Unix(1700000060, 0), 30*time.Second)
+	if err != nil {
+		t.Fatalf("QueryRange: %v", err)
+	}
+	if res.ResultType != "matrix" || len(res.Series[0].DataPoints) != 2 {
+		t.Fatalf("bad result: %+v", res)
+	}
+}
+
+func TestClient_Query_PropagatesPromError(t *testing.T) {
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		_, _ = w.Write([]byte(`{"status":"error","errorType":"bad_data","error":"parse error"}`))
+	})
+	c := NewClient(tr)
+	_, err := c.Query(context.Background(), "up")
+	if err == nil || !strings.Contains(err.Error(), "parse error") {
+		t.Errorf("expected prom error, got %v", err)
+	}
+}
+
+func TestClient_Query_HTTPErrorIsTyped(t *testing.T) {
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadGateway)
+		_, _ = w.Write([]byte("upstream busy"))
+	})
+	c := NewClient(tr)
+	_, err := c.Query(context.Background(), "up")
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	var httpErr *HTTPError
+	if !errors.As(err, &httpErr) {
+		t.Fatalf("want *HTTPError, got %T: %v", err, err)
+	}
+	if httpErr.StatusCode != http.StatusBadGateway {
+		t.Errorf("status: %d", httpErr.StatusCode)
+	}
+}
+
+func TestClient_Probe_RejectsEmptyInstance(t *testing.T) {
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+	})
+	c := NewClient(tr)
+	ok, reason := c.Probe(context.Background())
+	if ok {
+		t.Error("probe should reject instance with empty up result")
+	}
+	if reason != ProbeReasonEmptyInstance {
+		t.Errorf("reason = %q, want empty_instance", reason)
+	}
+}
+
+func TestClient_Probe_AcceptsActiveInstance(t *testing.T) {
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{},"value":[1,"1"]}]}}`))
+	})
+	c := NewClient(tr)
+	ok, reason := c.Probe(context.Background())
+	if !ok {
+		t.Error("probe should accept active instance")
+	}
+	if reason != "" {
+		t.Errorf("reason should be empty on success, got %q", reason)
+	}
+}
+
+func TestClient_Probe_RejectsNonPromBody(t *testing.T) {
+	tr := fakeProm(t, func(w http.ResponseWriter, r *http.Request) {
+		_, _ = w.Write([]byte(`<html>captive portal</html>`))
+	})
+	c := NewClient(tr)
+	ok, reason := c.Probe(context.Background())
+	if ok {
+		t.Error("probe should reject non-JSON body")
+	}
+	if reason != ProbeReasonNotPrometheus {
+		t.Errorf("reason = %q, want not_prometheus", reason)
+	}
+}
+
+func TestHTTPTransport_BasePathIncluded(t *testing.T) {
+	var capturedPath string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		capturedPath = r.URL.Path
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+	}))
+	defer srv.Close()
+
+	tr := NewHTTPTransport(srv.URL, "/select/0/prometheus", nil)
+	c := NewClient(tr)
+	_, _ = c.Query(context.Background(), "up")
+	if capturedPath != "/select/0/prometheus/api/v1/query" {
+		t.Errorf("base path not applied: got %q", capturedPath)
+	}
+}
+
diff --git a/pkg/prom/discovery.go b/pkg/prom/discovery.go
new file mode 100644
index 000000000..88f815bf8
--- /dev/null
+++ b/pkg/prom/discovery.go
@@ -0,0 +1,299 @@
+package prom
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+)
+
+// CandidateSource describes how a candidate was found.
+type CandidateSource string
+
+const (
+	CandidateSourceWellKnown CandidateSource = "well_known"
+	CandidateSourceDynamic   CandidateSource = "dynamic"
+)
+
+// Candidate is a Prometheus-compatible service the caller can attempt to
+// reach. Discover populates the fields and orders candidates by priority,
+// but does not probe — it leaves the transport choice (direct HTTP vs.
+// port-forward vs. tunneled proxy) to the caller.
+type Candidate struct {
+	Namespace   string
+	Name        string
+	Port        int             // service port (for in-cluster addressing)
+	TargetPort  int             // container port (for port-forwarding to the pod)
+	ClusterAddr string          // http://{name}.{ns}.svc.cluster.local:{port}
+	BasePath    string          // e.g. "/select/0/prometheus" for vmselect
+	Score       int             // relative likelihood of being Prometheus
+	Source      CandidateSource // well_known | dynamic
+}
+
+// DiscoverOptions tunes Discover's behavior.
+type DiscoverOptions struct {
+	// IncludeDynamic controls whether a cluster-wide service scan is performed.
+	// The scan is an O(all services) List call plus a scoring pass; skip it
+	// for callers that only need a quick well-known check.
+	IncludeDynamic bool
+
+	// MaxDynamic caps the number of dynamic candidates returned. Default 5.
+	MaxDynamic int
+
+	// Logger is optional; if set, Discover emits verbose progress messages.
+	Logger func(format string, args ...interface{})
+}
+
+// WellKnownLocations is the ordered list of namespaces + service names where
+// Prometheus-compatible services are commonly installed.
+var WellKnownLocations = []struct {
+	Namespace string
+	Name      string
+	Port      int    // 0 = use service's first port
+	BasePath  string // sub-path for Prometheus API
+}{
+	// VictoriaMetrics — monitoring namespace first (workload metrics)
+	{"monitoring", "victoria-metrics-victoria-metrics-single-server", 8428, ""},
+	{"monitoring", "victoria-metrics-single-server", 8428, ""},
+	{"monitoring", "vmsingle", 8428, ""},
+	{"monitoring", "vmselect", 8481, "/select/0/prometheus"},
+	{"victoria-metrics", "victoria-metrics-victoria-metrics-single-server", 8428, ""},
+	{"victoria-metrics", "victoria-metrics-single-server", 8428, ""},
+	{"victoria-metrics", "vmsingle", 8428, ""},
+	{"victoria-metrics", "vmselect", 8481, "/select/0/prometheus"},
+	// kube-prometheus-stack
+	{"monitoring", "kube-prometheus-stack-prometheus", 9090, ""},
+	{"monitoring", "prometheus-kube-prometheus-prometheus", 9090, ""},
+	{"monitoring", "prometheus-operated", 9090, ""},
+	// Standard Prometheus
+	{"opencost", "prometheus-server", 0, ""},
+	{"monitoring", "prometheus-server", 0, ""},
+	{"prometheus", "prometheus-server", 0, ""},
+	{"observability", "prometheus-server", 0, ""},
+	{"metrics", "prometheus-server", 0, ""},
+	{"kube-system", "prometheus", 0, ""},
+	{"default", "prometheus", 0, ""},
+	// VictoriaMetrics — caretta namespace (traffic-specific, may lack workload metrics)
+	{"caretta", "caretta-vm", 8428, ""},
+}
+
+// metricsNamespaces are commonly used for metrics services; used as a scoring
+// signal in dynamic discovery.
+var metricsNamespaces = map[string]bool{
+	"monitoring":       true,
+	"prometheus":       true,
+	"observability":    true,
+	"metrics":          true,
+	"victoria-metrics": true,
+	"caretta":          true,
+	"opencost":         true,
+}
+
+// skipNamespaces are excluded from dynamic discovery.
+var skipNamespaces = map[string]bool{
+	"kube-public":     true,
+	"kube-node-lease": true,
+}
+
+// Discover enumerates candidate Prometheus-compatible services reachable to
+// the given k8sClient. Well-known locations are returned first in declared
+// priority order, optionally followed by dynamically-discovered services
+// ranked by ScoreService.
+//
+// Discover does NOT probe any candidate — callers decide how to reach each
+// (direct HTTP, port-forward, tunneled proxy) and then use
+// pkg/prom.Client.Probe to validate.
+func Discover(ctx context.Context, k8sClient kubernetes.Interface, opts DiscoverOptions) ([]Candidate, error) {
+	if k8sClient == nil {
+		return nil, fmt.Errorf("prom.Discover: k8sClient is nil")
+	}
+	if opts.MaxDynamic <= 0 {
+		opts.MaxDynamic = 5
+	}
+	logf := opts.Logger
+	if logf == nil {
+		logf = func(string, ...interface{}) {}
+	}
+
+	var out []Candidate
+
+	// Layer 1: well-known locations. Preserve declared order for determinism.
+	for _, loc := range WellKnownLocations {
+		svc, err := k8sClient.CoreV1().Services(loc.Namespace).Get(ctx, loc.Name, metav1.GetOptions{})
+		if err != nil {
+			if !apierrors.IsNotFound(err) {
+				logf("prom.Discover: error checking %s/%s: %v", loc.Namespace, loc.Name, err)
+			}
+			continue
+		}
+		port := resolvePort(*svc, loc.Port)
+		out = append(out, Candidate{
+			Namespace:   svc.Namespace,
+			Name:        svc.Name,
+			Port:        port,
+			TargetPort:  resolveTargetPort(*svc, port),
+			ClusterAddr: buildClusterAddr(svc.Name, svc.Namespace, svc.Spec.ClusterIP, port),
+			BasePath:    loc.BasePath,
+			Source:      CandidateSourceWellKnown,
+		})
+	}
+
+	if !opts.IncludeDynamic {
+		return out, nil
+	}
+
+	// Layer 2: dynamic cluster-wide scan, scored + sorted.
+	svcs, err := k8sClient.CoreV1().Services("").List(ctx, metav1.ListOptions{})
+	if err != nil {
+		logf("prom.Discover: failed to list services: %v", err)
+		return out, nil // well-known results still useful
+	}
+
+	var scored []Candidate
+	for _, svc := range svcs.Items {
+		score, bp := ScoreService(svc)
+		if score <= 0 {
+			continue
+		}
+		port := resolvePort(svc, 0)
+		scored = append(scored, Candidate{
+			Namespace:   svc.Namespace,
+			Name:        svc.Name,
+			Port:        port,
+			TargetPort:  resolveTargetPort(svc, port),
+			ClusterAddr: buildClusterAddr(svc.Name, svc.Namespace, svc.Spec.ClusterIP, port),
+			BasePath:    bp,
+			Score:       score,
+			Source:      CandidateSourceDynamic,
+		})
+	}
+
+	sort.Slice(scored, func(i, j int) bool {
+		return scored[i].Score > scored[j].Score
+	})
+
+	if len(scored) > opts.MaxDynamic {
+		scored = scored[:opts.MaxDynamic]
+	}
+	return append(out, scored...), nil
+}
+
+// ScoreService computes a heuristic score for a service being
+// Prometheus-compatible. Returns the score and an inferred BasePath for
+// vmselect-style services.
+func ScoreService(svc corev1.Service) (score int, basePath string) {
+	if svc.Spec.Type == corev1.ServiceTypeExternalName {
+		return 0, ""
+	}
+	if skipNamespaces[svc.Namespace] {
+		return 0, ""
+	}
+
+	labels := svc.Labels
+	appName := labels["app.kubernetes.io/name"]
+	appLabel := labels["app"]
+	component := labels["app.kubernetes.io/component"]
+
+	switch appName {
+	case "prometheus":
+		score += 100
+	case "victoria-metrics-single", "vmsingle":
+		score += 100
+	case "vmselect":
+		score += 90
+		basePath = "/select/0/prometheus"
+	case "thanos-query", "thanos-querier":
+		score += 80
+	}
+
+	switch appLabel {
+	case "prometheus", "prometheus-server":
+		score += 80
+	case "vmsingle":
+		score += 80
+	case "vmselect":
+		score += 80
+		basePath = "/select/0/prometheus"
+	}
+
+	if score > 0 && component == "server" {
+		score += 20
+	}
+
+	for _, p := range svc.Spec.Ports {
+		switch p.Port {
+		case 9090: // Prometheus default
+			score += 30
+		case 8428: // VictoriaMetrics single-node default
+			score += 30
+		case 8481: // VictoriaMetrics vmselect default
+			score += 25
+		case 9009: // Thanos Query default
+			score += 25
+		}
+		if strings.Contains(strings.ToLower(p.Name), "prometheus") {
+			score += 10
+		}
+	}
+
+	nameLower := strings.ToLower(svc.Name)
+	if strings.Contains(nameLower, "prometheus") {
+		score += 20
+	}
+	if strings.Contains(nameLower, "victoria") || strings.Contains(nameLower, "vmsingle") || strings.Contains(nameLower, "vmselect") {
+		score += 20
+		if strings.Contains(nameLower, "vmselect") && basePath == "" {
+			basePath = "/select/0/prometheus"
+		}
+	}
+	if strings.Contains(nameLower, "thanos") {
+		score += 15
+	}
+
+	if metricsNamespaces[svc.Namespace] {
+		score += 10
+	}
+
+	return score, basePath
+}
+
+func resolvePort(svc corev1.Service, defaultPort int) int {
+	if defaultPort != 0 {
+		return defaultPort
+	}
+	if len(svc.Spec.Ports) > 0 {
+		return int(svc.Spec.Ports[0].Port)
+	}
+	return 80
+}
+
+// resolveTargetPort returns the container port, for port-forwarding which
+// bypasses the Service. When the service port differs from the container's
+// targetPort (e.g., service:80 → container:9090), port-forward needs the
+// container port.
+func resolveTargetPort(svc corev1.Service, servicePort int) int {
+	for _, p := range svc.Spec.Ports {
+		if int(p.Port) == servicePort {
+			if p.TargetPort.IntVal > 0 {
+				return int(p.TargetPort.IntVal)
+			}
+			return servicePort
+		}
+	}
+	return servicePort
+}
+
+// buildClusterAddr returns the in-cluster HTTP URL for a service. Headless
+// services (ClusterIP=None) use a pod-0 hostname; this is best-effort and
+// really meant for stateful Prometheus deployments with predictable names.
+func buildClusterAddr(name, namespace, clusterIP string, port int) string {
+	if clusterIP == "None" {
+		return fmt.Sprintf("http://%s-0.%s.%s.svc.cluster.local:%d", name, name, namespace, port)
+	}
+	return fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", name, namespace, port)
+}
diff --git a/pkg/prom/discovery_test.go b/pkg/prom/discovery_test.go
new file mode 100644
index 000000000..6d3b85743
--- /dev/null
+++ b/pkg/prom/discovery_test.go
@@ -0,0 +1,242 @@
+package prom
+
+import (
+	"context"
+	"testing"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+func TestScoreService_TableDriven(t *testing.T) {
+	tests := []struct {
+		name        string
+		svc         corev1.Service
+		wantMin     int
+		wantMax     int
+		wantBasePath string
+	}{
+		{
+			name: "plain prometheus by app.kubernetes.io/name + port",
+			svc: corev1.Service{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "prometheus-server",
+					Namespace: "monitoring",
+					Labels:    map[string]string{"app.kubernetes.io/name": "prometheus"},
+				},
+				Spec: corev1.ServiceSpec{
+					Ports: []corev1.ServicePort{{Port: 9090}},
+				},
+			},
+			wantMin: 100 + 30 + 20 + 10, // name + port + name-contains + metrics ns
+			wantMax: 500,
+		},
+		{
+			name: "vmselect sets basePath",
+			svc: corev1.Service{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "vmselect",
+					Namespace: "monitoring",
+					Labels:    map[string]string{"app.kubernetes.io/name": "vmselect"},
+				},
+				Spec: corev1.ServiceSpec{
+					Ports: []corev1.ServicePort{{Port: 8481}},
+				},
+			},
+			wantMin:      90 + 25 + 20 + 10,
+			wantMax:      200,
+			wantBasePath: "/select/0/prometheus",
+		},
+		{
+			name: "thanos-query scores lower than prometheus but non-zero",
+			svc: corev1.Service{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "thanos-query",
+					Namespace: "observability",
+					Labels:    map[string]string{"app.kubernetes.io/name": "thanos-query"},
+				},
+				Spec: corev1.ServiceSpec{
+					Ports: []corev1.ServicePort{{Port: 9009}},
+				},
+			},
+			wantMin: 80 + 25 + 15 + 10,
+			wantMax: 200,
+		},
+		{
+			name: "unrelated service scores zero",
+			svc: corev1.Service{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "redis",
+					Namespace: "default",
+					Labels:    map[string]string{"app": "redis"},
+				},
+				Spec: corev1.ServiceSpec{
+					Ports: []corev1.ServicePort{{Port: 6379}},
+				},
+			},
+			wantMax: 0,
+		},
+		{
+			name: "ExternalName excluded",
+			svc: corev1.Service{
+				ObjectMeta: metav1.ObjectMeta{Name: "prometheus", Namespace: "monitoring"},
+				Spec:       corev1.ServiceSpec{Type: corev1.ServiceTypeExternalName},
+			},
+			wantMax: 0,
+		},
+		{
+			name: "skip-namespace excluded",
+			svc: corev1.Service{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "prometheus",
+					Namespace: "kube-public",
+					Labels:    map[string]string{"app.kubernetes.io/name": "prometheus"},
+				},
+			},
+			wantMax: 0,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			score, bp := ScoreService(tc.svc)
+			if score < tc.wantMin || (tc.wantMax > 0 && score > tc.wantMax) {
+				t.Errorf("score=%d, want in [%d, %d]", score, tc.wantMin, tc.wantMax)
+			}
+			if tc.wantMax == 0 && score != 0 {
+				t.Errorf("score=%d, want 0", score)
+			}
+			if tc.wantBasePath != "" && bp != tc.wantBasePath {
+				t.Errorf("basePath=%q, want %q", bp, tc.wantBasePath)
+			}
+		})
+	}
+}
+
+func TestDiscover_WellKnownFirst(t *testing.T) {
+	// Install a standard prometheus-server at a well-known location
+	// plus an unrelated redis service.
+	wellKnown := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{Name: "prometheus-server", Namespace: "monitoring"},
+		Spec: corev1.ServiceSpec{
+			ClusterIP: "10.0.0.1",
+			Ports:     []corev1.ServicePort{{Port: 80, TargetPort: intstr.FromInt(9090)}},
+		},
+	}
+	redis := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{Name: "redis", Namespace: "default"},
+		Spec: corev1.ServiceSpec{
+			ClusterIP: "10.0.0.2",
+			Ports:     []corev1.ServicePort{{Port: 6379}},
+		},
+	}
+	// Install an additional unknown-but-scoring dynamic candidate.
+	thanos := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "thanos-query",
+			Namespace: "observability",
+			Labels:    map[string]string{"app.kubernetes.io/name": "thanos-query"},
+		},
+		Spec: corev1.ServiceSpec{
+			ClusterIP: "10.0.0.3",
+			Ports:     []corev1.ServicePort{{Port: 9009}},
+		},
+	}
+
+	k8s := fake.NewSimpleClientset(wellKnown, redis, thanos)
+	cands, err := Discover(context.Background(), k8s, DiscoverOptions{IncludeDynamic: true, MaxDynamic: 3})
+	if err != nil {
+		t.Fatalf("Discover: %v", err)
+	}
+	if len(cands) < 2 {
+		t.Fatalf("want at least 2 candidates, got %d", len(cands))
+	}
+
+	// First must be the well-known match.
+	if cands[0].Source != CandidateSourceWellKnown {
+		t.Errorf("cands[0].Source = %q, want well_known", cands[0].Source)
+	}
+	if cands[0].Namespace != "monitoring" || cands[0].Name != "prometheus-server" {
+		t.Errorf("cands[0] = %s/%s, want monitoring/prometheus-server", cands[0].Namespace, cands[0].Name)
+	}
+	if cands[0].ClusterAddr != "http://prometheus-server.monitoring.svc.cluster.local:80" {
+		t.Errorf("cluster addr = %q", cands[0].ClusterAddr)
+	}
+	if cands[0].TargetPort != 9090 {
+		t.Errorf("TargetPort = %d, want 9090", cands[0].TargetPort)
+	}
+
+	// Dynamic thanos match should be present.
+	var sawDynamicThanos bool
+	for _, c := range cands {
+		if c.Source == CandidateSourceDynamic && c.Name == "thanos-query" {
+			sawDynamicThanos = true
+			break
+		}
+	}
+	if !sawDynamicThanos {
+		t.Errorf("expected dynamic thanos candidate; got %+v", cands)
+	}
+
+	// Redis must not appear in any form.
+	for _, c := range cands {
+		if c.Name == "redis" {
+			t.Errorf("redis should not be a candidate: %+v", c)
+		}
+	}
+}
+
+func TestDiscover_SkipsDynamicWhenDisabled(t *testing.T) {
+	// Only a dynamic-scoring service is present (no well-known match).
+	prom := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "my-prometheus",
+			Namespace: "observability",
+			Labels:    map[string]string{"app.kubernetes.io/name": "prometheus"},
+		},
+		Spec: corev1.ServiceSpec{
+			ClusterIP: "10.0.0.5",
+			Ports:     []corev1.ServicePort{{Port: 9090}},
+		},
+	}
+
+	k8s := fake.NewSimpleClientset(prom)
+	cands, err := Discover(context.Background(), k8s, DiscoverOptions{IncludeDynamic: false})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(cands) != 0 {
+		t.Errorf("expected no candidates when dynamic is disabled and no well-known match; got %d", len(cands))
+	}
+}
+
+func TestDiscover_HeadlessServiceProducesPod0Addr(t *testing.T) {
+	headless := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{Name: "prometheus-server", Namespace: "monitoring"},
+		Spec: corev1.ServiceSpec{
+			ClusterIP: "None",
+			Ports:     []corev1.ServicePort{{Port: 9090}},
+		},
+	}
+	k8s := fake.NewSimpleClientset(headless)
+	cands, err := Discover(context.Background(), k8s, DiscoverOptions{})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(cands) != 1 {
+		t.Fatalf("want 1 candidate, got %d", len(cands))
+	}
+	want := "http://prometheus-server-0.prometheus-server.monitoring.svc.cluster.local:9090"
+	if cands[0].ClusterAddr != want {
+		t.Errorf("cluster addr = %q, want %q", cands[0].ClusterAddr, want)
+	}
+}
+
+func TestDiscover_NilClient(t *testing.T) {
+	_, err := Discover(context.Background(), nil, DiscoverOptions{})
+	if err == nil {
+		t.Error("expected error for nil client")
+	}
+}
diff --git a/internal/prometheus/queries.go b/pkg/prom/queries.go
similarity index 96%
rename from internal/prometheus/queries.go
rename to pkg/prom/queries.go
index c8fecbfc7..75192c558 100644
--- a/internal/prometheus/queries.go
+++ b/pkg/prom/queries.go
@@ -1,4 +1,4 @@
-package prometheus
+package prom
 
 import (
 	"fmt"
@@ -16,10 +16,10 @@ func SanitizeLabelValue(s string) string {
 	})
 }
 
-// escapeRegexMeta escapes regex metacharacters for PromQL =~ matching.
+// EscapeRegexMeta escapes regex metacharacters for PromQL =~ matching.
 var regexMeta = regexp.MustCompile(`([.+*?^${}()|[\]\\])`)
 
-func escapeRegexMeta(s string) string {
+func EscapeRegexMeta(s string) string {
 	return regexMeta.ReplaceAllString(s, `\\$1`)
 }
 
@@ -199,9 +199,9 @@ func buildClusterQueryInner(category MetricCategory, filterContainer bool) strin
 	}
 }
 
-// categoryUsesContainerFilter returns true if the category's queries include
-// the container!='' filter that may need fallback on cri-docker clusters.
-func categoryUsesContainerFilter(category MetricCategory) bool {
+// CategoryUsesContainerFilter returns true if the category's queries include
+// the container!='' filter that may need a fallback on cri-docker clusters.
+func CategoryUsesContainerFilter(category MetricCategory) bool {
 	return category == CategoryCPU || category == CategoryMemory
 }
 
@@ -249,7 +249,7 @@ func buildPodQuery(namespace, podName string, category MetricCategory, filterCon
 func buildWorkloadQuery(namespace, workloadName string, category MetricCategory, filterContainer bool) string {
 	ns := SanitizeLabelValue(namespace)
 	// Sanitize then escape regex metacharacters so e.g. "my.app" matches literally
-	podPattern := fmt.Sprintf("%s-.*", escapeRegexMeta(SanitizeLabelValue(workloadName)))
+	podPattern := fmt.Sprintf("%s-.*", EscapeRegexMeta(SanitizeLabelValue(workloadName)))
 	cf := ""
 	if filterContainer {
 		cf = "container!='',"
@@ -290,7 +290,7 @@ func buildNodeQuery(nodeName string, category MetricCategory) string {
 	// name or IP. The value often includes a port suffix, so we match with an optional port.
 	// This heuristic works for most standard deployments; clusters with custom relabeling
 	// may need the --prometheus-url flag plus adjusted recording rules.
-	sanitized := escapeRegexMeta(SanitizeLabelValue(nodeName))
+	sanitized := EscapeRegexMeta(SanitizeLabelValue(nodeName))
 	nodeFilter := fmt.Sprintf(`instance=~'%s(:\\d+)?'`, sanitized)
 
 	switch category {
diff --git a/pkg/prom/transport.go b/pkg/prom/transport.go
new file mode 100644
index 000000000..d1dfbad53
--- /dev/null
+++ b/pkg/prom/transport.go
@@ -0,0 +1,115 @@
+package prom
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+)
+
+// Transport is the pluggable HTTP transport used by Client to issue requests
+// to a Prometheus HTTP API. Implementations decide how the request physically
+// reaches Prometheus — typically either direct HTTP against a known URL
+// (in-cluster, or a kubectl port-forwarded localhost) or a tunneled proxy
+// transport that forwards requests through some external broker to an
+// in-cluster Prometheus.
+//
+// Transport is responsible for returning the raw upstream body bytes. Parsing
+// is the Client's concern.
+type Transport interface {
+	Do(ctx context.Context, method, path string, params url.Values) ([]byte, error)
+
+	// Address returns a human-readable identifier for this transport, used
+	// for status reporting and error messages — typically the base URL, or
+	// a short description of the proxy path for tunneled transports.
+	Address() string
+}
+
+// HTTPTransport is a direct-HTTP Transport. It targets BaseURL + BasePath +
+// the request path, and uses HTTPClient to send the request.
+//
+// BasePath is an optional prefix applied before Prometheus API paths and is
+// useful for vmselect-style deployments where the API lives under e.g.
+// "/select/0/prometheus".
+//
+// Headers, if non-empty, are applied to every request after the default
+// Accept header, so callers may override Accept by setting it here. Typical
+// uses are Authorization: Bearer ... and tenant headers like X-Scope-OrgID.
+type HTTPTransport struct {
+	BaseURL    string
+	BasePath   string
+	HTTPClient *http.Client
+	Headers    map[string]string
+}
+
+// NewHTTPTransport constructs an HTTPTransport with a default 10-second
+// timeout if none is provided.
+func NewHTTPTransport(baseURL, basePath string, httpClient *http.Client) *HTTPTransport {
+	if httpClient == nil {
+		httpClient = &http.Client{Timeout: 10 * time.Second}
+	}
+	return &HTTPTransport{
+		BaseURL:    strings.TrimRight(baseURL, "/"),
+		BasePath:   basePath,
+		HTTPClient: httpClient,
+	}
+}
+
+// Do issues a request and returns the response body bytes. Non-2xx status
+// codes yield a *HTTPError; callers can use errors.As to extract the
+// status code and upstream body (Probe distinguishes 401/403 from other
+// transport errors this way, for example).
+func (t *HTTPTransport) Do(ctx context.Context, method, path string, params url.Values) ([]byte, error) {
+	full := t.BaseURL + t.BasePath + path
+	if len(params) > 0 {
+		if strings.Contains(full, "?") {
+			full = full + "&" + params.Encode()
+		} else {
+			full = full + "?" + params.Encode()
+		}
+	}
+
+	req, err := http.NewRequestWithContext(ctx, method, full, nil)
+	if err != nil {
+		return nil, fmt.Errorf("prom.HTTPTransport: build request: %w", err)
+	}
+	req.Header.Set("Accept", "application/json")
+	for k, v := range t.Headers {
+		req.Header.Set(k, v)
+	}
+
+	resp, err := t.HTTPClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("prom.HTTPTransport: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20)) // 10 MiB cap
+	if err != nil {
+		return nil, fmt.Errorf("prom.HTTPTransport: read body: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, &HTTPError{StatusCode: resp.StatusCode, URL: full, Body: body}
+	}
+	return body, nil
+}
+
+// Address returns the effective base URL for diagnostics.
+func (t *HTTPTransport) Address() string {
+	return t.BaseURL + t.BasePath
+}
+
+// HTTPError is returned when Prometheus responds with a non-2xx status.
+type HTTPError struct {
+	StatusCode int
+	URL        string
+	Body       []byte
+}
+
+func (e *HTTPError) Error() string {
+	return fmt.Sprintf("prometheus returned %d for %s: %s", e.StatusCode, e.URL, string(e.Body))
+}
diff --git a/pkg/prom/transport_test.go b/pkg/prom/transport_test.go
new file mode 100644
index 000000000..7d0ec43e2
--- /dev/null
+++ b/pkg/prom/transport_test.go
@@ -0,0 +1,57 @@
+package prom
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestHTTPTransport_AppliesHeaders(t *testing.T) {
+	var gotAuth, gotTenant, gotAccept string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotAuth = r.Header.Get("Authorization")
+		gotTenant = r.Header.Get("X-Scope-OrgID")
+		gotAccept = r.Header.Get("Accept")
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+	}))
+	t.Cleanup(srv.Close)
+
+	tr := NewHTTPTransport(srv.URL, "", nil)
+	tr.Headers = map[string]string{
+		"Authorization": "Bearer secret",
+		"X-Scope-OrgID": "tenant-a",
+	}
+
+	if _, err := NewClient(tr).Query(context.Background(), "up"); err != nil {
+		t.Fatalf("Query: %v", err)
+	}
+	if gotAuth != "Bearer secret" {
+		t.Errorf("Authorization = %q, want %q", gotAuth, "Bearer secret")
+	}
+	if gotTenant != "tenant-a" {
+		t.Errorf("X-Scope-OrgID = %q, want %q", gotTenant, "tenant-a")
+	}
+	if gotAccept != "application/json" {
+		t.Errorf("Accept = %q, want application/json", gotAccept)
+	}
+}
+
+func TestHTTPTransport_HeadersOverrideAccept(t *testing.T) {
+	var gotAccept string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotAccept = r.Header.Get("Accept")
+		_, _ = w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[]}}`))
+	}))
+	t.Cleanup(srv.Close)
+
+	tr := NewHTTPTransport(srv.URL, "", nil)
+	tr.Headers = map[string]string{"Accept": "application/vnd.custom+json"}
+
+	if _, err := NewClient(tr).Query(context.Background(), "up"); err != nil {
+		t.Fatalf("Query: %v", err)
+	}
+	if gotAccept != "application/vnd.custom+json" {
+		t.Errorf("Accept = %q, want override", gotAccept)
+	}
+}
diff --git a/pkg/prom/types.go b/pkg/prom/types.go
new file mode 100644
index 000000000..f3ab8e1d3
--- /dev/null
+++ b/pkg/prom/types.go
@@ -0,0 +1,59 @@
+// Package prom provides a Prometheus HTTP API client with a pluggable
+// Transport so the same query, parsing, and discovery logic can be used
+// from any context that can reach a Prometheus endpoint — directly, via
+// kubectl port-forward, or through a tunneled proxy.
+//
+// The package is intentionally pure: no global state, no singletons, no
+// k8s client dependency in the Client itself. K8s-aware discovery is a
+// separate step that constructs a Transport.
+package prom
+
+import "encoding/json"
+
+// ServiceInfo describes a Prometheus-compatible service discovered in the
+// cluster. Used by discovery helpers and returned in Status.
+type ServiceInfo struct {
+	Namespace string `json:"namespace"`
+	Name      string `json:"name"`
+	Port      int    `json:"port"`
+	BasePath  string `json:"basePath,omitempty"` // e.g. "/select/0/prometheus" for vmselect
+}
+
+// Status represents the current Prometheus connection status as exposed to
+// callers/UI. Address is the effective URL (may be port-forwarded, a
+// tunneled proxy URL, or a direct service URL depending on the Transport).
+type Status struct {
+	Available   bool         `json:"available"`
+	Connected   bool         `json:"connected"`
+	Address     string       `json:"address,omitempty"`
+	Service     *ServiceInfo `json:"service,omitempty"`
+	ContextName string       `json:"contextName,omitempty"`
+	Error       string       `json:"error,omitempty"`
+}
+
+// QueryResult is the parsed result of a Prometheus query.
+type QueryResult struct {
+	ResultType string   `json:"resultType"`
+	Series     []Series `json:"series"`
+}
+
+// Series is a single time series from a Prometheus query.
+type Series struct {
+	Labels     map[string]string `json:"labels"`
+	DataPoints []DataPoint       `json:"dataPoints"`
+}
+
+// DataPoint is a single (timestamp, value) pair.
+type DataPoint struct {
+	Timestamp int64   `json:"timestamp"`
+	Value     float64 `json:"value"`
+}
+
+// promResponse is the raw shape returned by Prometheus HTTP API
+// /api/v1/query and /api/v1/query_range endpoints.
+type promResponse struct {
+	Status    string          `json:"status"`
+	Data      json.RawMessage `json:"data"`
+	ErrorType string          `json:"errorType,omitempty"`
+	Error     string          `json:"error,omitempty"`
+}
diff --git a/web/src/components/cost/CostView.tsx b/web/src/components/cost/CostView.tsx
index 0a24f1023..ffcb9dc9e 100644
--- a/web/src/components/cost/CostView.tsx
+++ b/web/src/components/cost/CostView.tsx
@@ -417,7 +417,7 @@ function CostHelpDialog({ onClose }: { onClose: () => void }) {
   return (
     <div className="fixed inset-0 z-50 flex items-center justify-center">
       <div className="absolute inset-0 bg-black/60 backdrop-blur-sm" onClick={onClose} />
-      <div className="relative dialog max-w-lg w-full mx-4 max-h-[80vh] overflow-y-auto">
+      <div className="relative dialog max-w-2xl w-full mx-4 max-h-[80vh] overflow-y-auto">
         {/* Header */}
         <div className="flex items-center justify-between p-4 border-b border-theme-border sticky top-0 bg-theme-surface rounded-t-lg">
           <div className="flex items-center gap-2">