Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e76e4fe
feat(resourcecontext): add Build generator + wire /api/ai/resources G…
nadaverell May 17, 2026
8c9e0b0
fix(ai-handlers): wire PolicyReport index into ResourceContext.Build
nadaverell May 17, 2026
2aec8ec
fix(resourcecontext): tier-aware PolicySummary — basic emits counts only
nadaverell May 17, 2026
045097c
refactor(resourcecontext): consume topology.Relationships.ManagedBy +…
nadaverell May 17, 2026
99c878c
fix(security): RBAC preflight on /api/ai/resources GET handler
nadaverell May 17, 2026
d0a91c7
fix(resourcecontext): audit cross-Kind contamination + RunsOn fallbac…
nadaverell May 17, 2026
74ff1d5
fix(security): route group-qualified AI GET to dynamic cache to avoid…
nadaverell May 17, 2026
04d8d40
feat(resourcecontext): attach summaryContext to list_resources + sear…
nadaverell May 17, 2026
8339c67
feat(resourcecontext): consolidate summary builder on T23 (ManagedBy …
nadaverell May 17, 2026
8309c10
fix(summaryContext): group-aware issue index, uncapped count, native …
nadaverell May 17, 2026
12eb7d1
fix(security+correctness): preflight RBAC on AI list + populate Probl…
nadaverell May 17, 2026
2f4ecaa
perf(summaryContext): memoize MCP topology builds + fix misleading fi…
nadaverell May 17, 2026
8ce001a
fix(summaryContext): cluster-scoped issueCount + CRD scan perf
nadaverell May 17, 2026
d1b95ed
fix(summaryContext+ai-list): dual-index search + group-aware list rou…
nadaverell May 17, 2026
e000dcb
fix(summaryContext): use Spec.Replicas + defer SummaryBuilder past tr…
nadaverell May 18, 2026
a18a9f3
chore(search): drop dead summaryBuilder parameter from buildHit
nadaverell May 18, 2026
c51a1a4
fix(resourcecontext): restore SummaryContext type after stray rename
nadaverell May 18, 2026
f3c1acf
refactor(summaryContext): lift shared core into internal/summarycontext
nadaverell May 18, 2026
c51e0b1
refactor(resourcecontext): rename SummaryContext → ResourceSummaryCon…
nadaverell May 18, 2026
34ad05c
refactor(summarycontext): centralize attach helpers + group extractors
nadaverell May 18, 2026
ac10b5b
fix(summarycontext): drop kindFilter so CRD plurals don't zero issueC…
nadaverell May 18, 2026
322d386
fix(resourcecontext): canonical kind + cross-group pseudo-kind for re…
nadaverell May 17, 2026
9f7887c
chore(resourcecontext): drop Hints prose projection from v1
nadaverell May 18, 2026
1b56e9b
docs(ai): declare /api/ai/* outside the OpenAPI spec, with reasoning
nadaverell May 18, 2026
bb3fe6d
fix(resourcecontext): pseudo-kind in fallback, swap CM/Secret reasons…
nadaverell May 18, 2026
444195a
chore(resourcecontext): drop speculative wire surface
nadaverell May 18, 2026
3e43759
fix(resourcecontext): address Bugbot findings on T6
nadaverell May 18, 2026
37983a1
fix(audit): match issue summary's nil-namespace guard for cluster-sco…
nadaverell May 18, 2026
3742ffa
fix: rebase fallout — group-aware FindingsFor + smoke-test import merge
nadaverell May 18, 2026
feb0073
test: extend fakeIssuesProvider with Kyverno methods for post-T11 Pro…
nadaverell May 18, 2026
9d6658a
fix: group-aware audit + issue summary lookups
nadaverell May 18, 2026
7a1f006
fix(resourcecontext): normalize audit severity to issue vocabulary on…
nadaverell May 18, 2026
a5c657b
merge T6 (#721) for combined testing
nadaverell May 18, 2026
98f08ff
merge T89 (#722) into T6+T89 test branch
nadaverell May 18, 2026
3d33d73
feat: T7 + log pipe primitives + content search + bench-driven descri…
nadaverell May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions internal/issues/issues.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,17 @@ func Compose(p Provider, f Filters) []Issue {
// severity desc, then last-seen desc, then kind/ns/name for stable
// tiebreaks.
func ComposeWithStats(p Provider, f Filters) ([]Issue, ComposeStats) {
// Negative Limit is the "uncapped" sentinel: callers that need the
// full matched set (per-resource issue indexes for /api/ai list +
// search summaryContext) pass NoLimit so a 5000-issue cluster
// doesn't silently drop counts for resources whose issues fall in
// the tail beyond MaxLimit. Zero still maps to DefaultLimit so the
// public /api/issues + MCP issues_list keep their tight caps.
uncapped := f.Limit < 0
if f.Limit == 0 {
f.Limit = DefaultLimit
}
if f.Limit > MaxLimit {
if !uncapped && f.Limit > MaxLimit {
f.Limit = MaxLimit
}

Expand Down Expand Up @@ -201,7 +208,7 @@ func ComposeWithStats(p Provider, f Filters) ([]Issue, ComposeStats) {
return out[i].Name < out[j].Name
})
stats.TotalMatched = len(out)
if len(out) > f.Limit {
if !uncapped && len(out) > f.Limit {
out = out[:f.Limit]
}
return out, stats
Expand All @@ -211,11 +218,24 @@ func ComposeWithStats(p Provider, f Filters) ([]Issue, ComposeStats) {
// warning Issue for each object that has a False Ready/Available/etc.
// condition. Skips kinds owned by curated checkers (Cluster API today)
// to avoid double-reporting.
//
// When f.Kinds is non-empty (e.g. summaryContext building a per-resource
// issue index for a list_resources call on a single kind), GVRs whose
// kind isn't in the filter are skipped BEFORE the ListDynamic call —
// without this gate, a pods-only request still scanned every watched
// CRD up front and applyFilters discarded the rows afterward. Kind
// comparison mirrors applyFilters: lowercase for case-insensitive
// match against the user's filter (which itself is canonicalized to
// the singular form upstream).
func detectGenericCRDIssues(p Provider, f Filters) []Issue {
gvrs := p.WatchedDynamic()
if len(gvrs) == 0 {
return nil
}
wantKind := map[string]bool{}
for _, k := range f.Kinds {
wantKind[strings.ToLower(k)] = true
}
var out []Issue
for _, gvr := range gvrs {
if isCuratedCRDGroup(gvr.Group) {
Expand All @@ -225,6 +245,15 @@ func detectGenericCRDIssues(p Provider, f Filters) []Issue {
if kind == "" {
continue
}
// applyFilters runs after Compose returns — but on hot paths that
// pin a single kind (summaryContext per-row index), routing the
// kind filter through here skips the per-GVR ListDynamic call
// entirely. Match in lowercase (same as applyFilters) so
// "Pod"/"pod" and CRD-typed "MyResource"/"myresource" both
// compare equal.
if len(wantKind) > 0 && !wantKind[strings.ToLower(kind)] {
continue
}
clusterScoped, _, _ := classifyDynamicScope(p, gvr, kind)
if clusterScoped && f.CanReadClusterScoped != nil && !f.CanReadClusterScoped(kind, gvr.Group) {
continue
Expand Down Expand Up @@ -303,6 +332,21 @@ func condTypeReason(condType, reason string) string {
// Source-specific normalization
// ---------------------------------------------------------------------------

// resolveGroup returns the explicit group if set, else falls back to the
// built-in (Kind→Group) table. Some legacy Problem emission sites in
// k8s.DetectProblems still leave Group="" for built-in workloads
// (Deployment, StatefulSet, etc.) — without this fallback, the
// group-aware consumer (computeIssueSummaryForResource) would silently
// drop those rows when looking up by canonical group like "apps".
// Centralised here so the (Kind→Group) map lives in one place across
// packages (pkg/audit owns the table; this is a pass-through).
func resolveGroup(group, kind string) string {
if group != "" {
return group
}
return bp.GroupForBuiltinKind(kind)
}

func fromProblem(p k8s.Problem, now time.Time) Issue {
sev := SeverityWarning
if p.Severity == "critical" {
Expand All @@ -313,7 +357,7 @@ func fromProblem(p k8s.Problem, now time.Time) Issue {
Severity: sev,
Source: SourceProblem,
Kind: p.Kind,
Group: p.Group,
Group: resolveGroup(p.Group, p.Kind),
Namespace: p.Namespace,
Name: p.Name,
Reason: p.Reason,
Expand All @@ -333,6 +377,7 @@ func fromAudit(fin bp.Finding, now time.Time) Issue {
Severity: sev,
Source: SourceAudit,
Kind: fin.Kind,
Group: resolveGroup(fin.Group, fin.Kind),
Namespace: fin.Namespace,
Name: fin.Name,
Reason: fin.CheckID,
Expand Down Expand Up @@ -413,10 +458,19 @@ func fromWarningEvent(e *corev1.Event) Issue {
if first.IsZero() {
first = last
}
// Event.InvolvedObject carries apiVersion (group/version); split out
// the group so cross-group consumers don't collide when a Knative
// Service and a core Service share name+ns.
group, _, _ := strings.Cut(e.InvolvedObject.APIVersion, "/")
if e.InvolvedObject.APIVersion != "" && !strings.Contains(e.InvolvedObject.APIVersion, "/") {
// "v1" → core group "".
group = ""
}
return Issue{
Severity: SeverityWarning,
Source: SourceEvent,
Kind: e.InvolvedObject.Kind,
Group: resolveGroup(group, e.InvolvedObject.Kind),
Namespace: e.Namespace,
Name: e.InvolvedObject.Name,
Reason: e.Reason,
Expand Down
84 changes: 84 additions & 0 deletions internal/issues/issues_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -622,3 +622,87 @@ func TestFlattenNamespacedProblems_EmptyInputReturnsNil(t *testing.T) {
t.Errorf("empty input should produce empty output, got %+v", out)
}
}

// countingProvider wraps fakeProvider and tallies ListDynamic calls per
// GVR. Used by TestDetectGenericCRDIssues_SkipsListWhenKindFiltered to
// pin that detectGenericCRDIssues short-circuits the per-GVR
// ListDynamic call when f.Kinds excludes the GVR's kind — on clusters
// with hundreds of watched CRDs, scanning every one for a pods-only
// summaryContext request was the dominant cost.
type countingProvider struct {
fakeProvider
listCalls map[schema.GroupVersionResource]int
}

func (c *countingProvider) ListDynamic(gvr schema.GroupVersionResource, ns string) ([]*unstructured.Unstructured, error) {
if c.listCalls == nil {
c.listCalls = map[schema.GroupVersionResource]int{}
}
c.listCalls[gvr]++
return c.fakeProvider.ListDynamic(gvr, ns)
}

// TestDetectGenericCRDIssues_SkipsListWhenKindFiltered pins the
// "scan all CRDs before kindFilter applies" perf fix in
// detectGenericCRDIssues. Pre-fix, a Compose call with Kinds=["Pod"]
// still iterated every watched CRD GVR and ran ListDynamic on each;
// applyFilters then discarded the non-matching rows at the end.
//
// On a cluster with hundreds of watched CRDs this dominated the
// summaryContext per-row index build for list_resources kind=pods.
// The fix routes f.Kinds awareness into detectGenericCRDIssues so
// non-matching GVRs skip the ListDynamic call entirely.
func TestDetectGenericCRDIssues_SkipsListWhenKindFiltered(t *testing.T) {
podGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
appGVR := schema.GroupVersionResource{Group: "argoproj.io", Version: "v1alpha1", Resource: "applications"}
npGVR := schema.GroupVersionResource{Group: "karpenter.sh", Version: "v1", Resource: "nodepools"}

p := &countingProvider{
fakeProvider: fakeProvider{
dynamic: map[schema.GroupVersionResource][]*unstructured.Unstructured{
podGVR: {}, // empty — only counts the call.
appGVR: {{Object: map[string]any{
"metadata": map[string]any{"name": "a", "namespace": "argocd"},
"status": map[string]any{
"conditions": []any{
map[string]any{"type": "Synced", "status": "False", "reason": "Drift"},
},
},
}}},
npGVR: {}, // empty — only counts the call.
},
kinds: map[schema.GroupVersionResource]string{
podGVR: "Pod",
appGVR: "Application",
npGVR: "NodePool",
},
},
}

// kindFilter restricts to Application — the other two GVRs must NOT
// be listed. detectGenericCRDIssues lowercases the kind comparison
// (mirrors applyFilters), so the canonical "Application" matches the
// emitted Kind for the argoproj.io GVR.
_ = detectGenericCRDIssues(p, Filters{Kinds: []string{"Application"}})

if got := p.listCalls[podGVR]; got != 0 {
t.Errorf("Pod GVR ListDynamic calls = %d, want 0 (kind filter must skip non-matching GVRs)", got)
}
if got := p.listCalls[npGVR]; got != 0 {
t.Errorf("NodePool GVR ListDynamic calls = %d, want 0 (kind filter must skip non-matching GVRs)", got)
}
if got := p.listCalls[appGVR]; got == 0 {
t.Errorf("Application GVR ListDynamic calls = %d, want >= 1 (matching kind must still be scanned)", got)
}

// Sanity: empty Kinds filter scans every GVR (no per-kind shortcut
// when caller didn't ask for one). Pins that the fix is filter-aware
// rather than always-skip.
p.listCalls = nil
_ = detectGenericCRDIssues(p, Filters{})
for gvr, want := range map[schema.GroupVersionResource]bool{podGVR: true, appGVR: true, npGVR: true} {
if got := p.listCalls[gvr] > 0; got != want {
t.Errorf("no kind filter: GVR %s called=%v, want %v", gvr.Resource, got, want)
}
}
}
7 changes: 7 additions & 0 deletions internal/issues/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,11 @@ type Filters struct {
const (
DefaultLimit = 200
MaxLimit = 1000
// NoLimit disables the result cap. Pass as Filters.Limit when the
// caller needs the full matched set (e.g. building a per-resource
// issue index for summaryContext — capping there would silently zero
// out counts for resources whose issues fall in the tail beyond
// MaxLimit on large clusters). Stats.TotalMatched is reliable
// regardless; this just turns off the post-sort slice.
NoLimit = -1
)
7 changes: 7 additions & 0 deletions internal/k8s/problems.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "Deployment",
Namespace: d.Namespace,
Name: d.Name,
Group: "apps",
Severity: "critical",
Reason: fmt.Sprintf("%d/%d available", d.Status.AvailableReplicas, d.Status.Replicas),
Age: FormatAge(ageDur),
Expand All @@ -78,6 +79,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "Deployment",
Namespace: d.Namespace,
Name: d.Name,
Group: "apps",
Severity: "critical",
Reason: "Rollout stuck",
Message: cond.Message,
Expand Down Expand Up @@ -107,6 +109,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "StatefulSet",
Namespace: ss.Namespace,
Name: ss.Name,
Group: "apps",
Severity: "critical",
Reason: fmt.Sprintf("%d/%d ready", ss.Status.ReadyReplicas, ss.Status.Replicas),
Age: FormatAge(ageDur),
Expand All @@ -133,6 +136,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "DaemonSet",
Namespace: ds.Namespace,
Name: ds.Name,
Group: "apps",
Severity: "critical",
Reason: fmt.Sprintf("%d unavailable", ds.Status.NumberUnavailable),
Age: FormatAge(ageDur),
Expand All @@ -157,6 +161,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "HorizontalPodAutoscaler",
Namespace: hp.Namespace,
Name: hp.Name,
Group: "autoscaling",
Severity: "medium",
Reason: hp.Problem,
Message: hp.Reason,
Expand All @@ -177,6 +182,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "CronJob",
Namespace: cp.Namespace,
Name: cp.Name,
Group: "batch",
Severity: "medium",
Reason: cp.Problem,
Message: cp.Reason,
Expand Down Expand Up @@ -251,6 +257,7 @@ func DetectProblems(cache *ResourceCache, namespace string) []Problem {
Kind: "Job",
Namespace: job.Namespace,
Name: job.Name,
Group: "batch",
Severity: "high",
Reason: fmt.Sprintf("Running for %s with no completions", FormatAge(ageDur)),
Age: FormatAge(ageDur),
Expand Down
Loading
Loading