Skip to content

Commit 0e1287c

Browse files
committed
Support multiCluster, and single cluster use-cases
1 parent f643613 commit 0e1287c

File tree

2 files changed

+81
-24
lines changed

2 files changed

+81
-24
lines changed

alerts/resource_alerts.libsonnet

Lines changed: 79 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,69 +24,126 @@
2424
rules: [
2525
{
2626
alert: 'KubeCPUOvercommit',
27+
labels: {
28+
severity: 'warning',
29+
},
30+
annotations: {
31+
summary: 'Cluster has overcommitted CPU resource requests.',
32+
},
33+
'for': '10m',
34+
} +
35+
if $._config.showMultiCluster then {
2736
expr: |||
2837
sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
2938
and
3039
(sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
3140
||| % $._config,
41+
annotations+: {
42+
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
43+
},
44+
} else {
45+
expr: |||
46+
sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
47+
and
48+
(sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
49+
||| % $._config,
50+
annotations+: {
51+
description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
52+
},
53+
},
54+
{
55+
alert: 'KubeMemoryOvercommit',
3256
labels: {
3357
severity: 'warning',
3458
},
3559
annotations: {
36-
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
37-
summary: 'Cluster has overcommitted CPU resource requests.',
60+
summary: 'Cluster has overcommitted memory resource requests.',
3861
},
3962
'for': '10m',
40-
},
41-
{
42-
alert: 'KubeMemoryOvercommit',
63+
} +
64+
if $._config.showMultiCluster then {
4365
expr: |||
4466
sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
4567
and
4668
(sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
4769
||| % $._config,
48-
labels: {
49-
severity: 'warning',
50-
},
51-
annotations: {
70+
annotations+: {
5271
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.' % $._config,
53-
summary: 'Cluster has overcommitted memory resource requests.',
5472
},
55-
'for': '10m',
73+
} else
74+
{
75+
expr: |||
76+
sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
77+
and
78+
(sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
79+
||| % $._config,
80+
annotations+: {
81+
description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.',
82+
},
5683
},
5784
{
5885
alert: 'KubeCPUQuotaOvercommit',
86+
labels: {
87+
severity: 'warning',
88+
},
89+
annotations: {
90+
summary: 'Cluster has overcommitted CPU resource requests.',
91+
},
92+
'for': '5m',
93+
} +
94+
if $._config.showMultiCluster then {
5995
expr: |||
6096
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"})) by (%(clusterLabel)s)
6197
/
6298
sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)
6399
> %(namespaceOvercommitFactor)s
64100
||| % $._config,
65-
labels: {
66-
severity: 'warning',
67-
},
68-
annotations: {
101+
annotations+: {
69102
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Namespaces.' % $._config,
70-
summary: 'Cluster has overcommitted CPU resource requests.',
71103
},
72-
'for': '5m',
73-
},
104+
} else
74105
{
75-
alert: 'KubeMemoryQuotaOvercommit',
76106
expr: |||
77-
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"})) by (%(clusterLabel)s)
107+
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"}))
78108
/
79-
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)
109+
sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})
80110
> %(namespaceOvercommitFactor)s
81111
||| % $._config,
112+
annotations+: {
113+
description: 'Cluster has overcommitted CPU resource requests for Namespaces.',
114+
},
115+
},
116+
{
117+
alert: 'KubeMemoryQuotaOvercommit',
82118
labels: {
83119
severity: 'warning',
84120
},
85121
annotations: {
86-
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Namespaces.' % $._config,
87122
summary: 'Cluster has overcommitted memory resource requests.',
88123
},
89124
'for': '5m',
125+
} +
126+
if $._config.showMultiCluster then {
127+
expr: |||
128+
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"})) by (%(clusterLabel)s)
129+
/
130+
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)
131+
> %(namespaceOvercommitFactor)s
132+
||| % $._config,
133+
annotations+: {
134+
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Namespaces.' % $._config,
135+
},
136+
} else
137+
{
138+
expr: |||
139+
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"}))
140+
/
141+
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})
142+
> %(namespaceOvercommitFactor)s
143+
||| % $._config,
144+
annotations+: {
145+
description: 'Cluster has overcommitted memory resource requests for Namespaces.',
146+
},
90147
},
91148
{
92149
alert: 'KubeQuotaAlmostFull',

tests.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,7 +1168,7 @@ tests:
11681168
- exp_labels:
11691169
severity: "warning"
11701170
exp_annotations:
1171-
description: 'Cluster has overcommitted CPU resource requests for Namespaces.'
1171+
description: 'Cluster has overcommitted CPU resource requests for Namespaces.'
11721172
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit"
11731173
summary: "Cluster has overcommitted CPU resource requests."
11741174

@@ -1211,6 +1211,6 @@ tests:
12111211
- exp_labels:
12121212
severity: "warning"
12131213
exp_annotations:
1214-
description: 'Cluster has overcommitted memory resource requests for Namespaces.'
1214+
description: 'Cluster has overcommitted memory resource requests for Namespaces.'
12151215
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit"
12161216
summary: "Cluster has overcommitted memory resource requests."

0 commit comments

Comments
 (0)