diff --git a/dashboards/scheduler.libsonnet b/dashboards/scheduler.libsonnet index 4a7d236a4..504f9d493 100644 --- a/dashboards/scheduler.libsonnet +++ b/dashboards/scheduler.libsonnet @@ -86,7 +86,7 @@ local var = g.dashboard.variable; + tsPanel.gridPos.withW(10) + tsPanel.standardOptions.withUnit('ops') + tsPanel.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum(rate(scheduler_e2e_scheduling_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeSchedulerSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance)' % $._config) + prometheus.new('${datasource}', 'sum(rate(scheduler_scheduling_attempt_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeSchedulerSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance)' % $._config) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{instance}} e2e' % $._config), prometheus.new('${datasource}', 'sum(rate(scheduler_binding_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeSchedulerSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance)' % $._config) @@ -103,7 +103,7 @@ local var = g.dashboard.variable; + tsPanel.gridPos.withW(10) + tsPanel.standardOptions.withUnit('s') + tsPanel.queryOptions.withTargets([ - prometheus.new('${datasource}', 'histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeSchedulerSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance, le))' % $._config) + prometheus.new('${datasource}', 'histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeSchedulerSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance, le))' % $._config) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{instance}} e2e' % $._config), prometheus.new('${datasource}', 'histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeSchedulerSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance, le))' % $._config) diff --git a/rules/kube_scheduler.libsonnet b/rules/kube_scheduler.libsonnet index f319642fb..2275f37f0 100644 --- a/rules/kube_scheduler.libsonnet +++ b/rules/kube_scheduler.libsonnet @@ -20,7 +20,7 @@ } for quantile in ['0.99', '0.9', '0.5'] for metric in [ - 'scheduler_e2e_scheduling_duration_seconds', + 'scheduler_scheduling_attempt_duration_seconds', 'scheduler_scheduling_algorithm_duration_seconds', 'scheduler_binding_duration_seconds', ] diff --git a/tests/kube_scheduler-test.yaml b/tests/kube_scheduler-test.yaml new file mode 100644 index 000000000..c84258e23 --- /dev/null +++ b/tests/kube_scheduler-test.yaml @@ -0,0 +1,131 @@ +rule_files: +- ../prometheus_rules.yaml + +evaluation_interval: 1m + +tests: +# Test scheduler_scheduling_attempt_duration_seconds quantile 0.5 +- interval: 1m + input_series: + - series: 'scheduler_scheduling_attempt_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_scheduling_attempt_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile{quantile="0.5"} + exp_samples: + - value: 0.5 + labels: 'cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.5"}' + +# Test scheduler_scheduling_attempt_duration_seconds quantile 0.9 +- interval: 1m + input_series: + - series: 'scheduler_scheduling_attempt_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_scheduling_attempt_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile{quantile="0.9"} + exp_samples: + - value: 0.9 + labels: 'cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.9"}' + +# Test scheduler_scheduling_attempt_duration_seconds quantile 0.99 +- interval: 1m + input_series: + - series: 'scheduler_scheduling_attempt_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_scheduling_attempt_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile{quantile="0.99"} + exp_samples: + - value: 0.99 + labels: 'cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.99"}' + +# Test scheduler_scheduling_algorithm_duration_seconds quantile 0.5 +- interval: 1m + input_series: + - series: 'scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile{quantile="0.5"} + exp_samples: + - value: 0.5 + labels: 'cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.5"}' + +# Test scheduler_scheduling_algorithm_duration_seconds quantile 0.9 +- interval: 1m + input_series: + - series: 'scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile{quantile="0.9"} + exp_samples: + - value: 0.9 + labels: 'cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.9"}' + +# Test scheduler_scheduling_algorithm_duration_seconds quantile 0.99 +- interval: 1m + input_series: + - series: 'scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile{quantile="0.99"} + exp_samples: + - value: 0.99 + labels: 'cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.99"}' + +# Test scheduler_binding_duration_seconds quantile 0.5 +- interval: 1m + input_series: + - series: 'scheduler_binding_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_binding_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile{quantile="0.5"} + exp_samples: + - value: 0.5 + labels: 'cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.5"}' + +# Test scheduler_binding_duration_seconds quantile 0.9 +- interval: 1m + input_series: + - series: 'scheduler_binding_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_binding_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile{quantile="0.9"} + exp_samples: + - value: 0.9 + labels: 'cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.9"}' + +# Test scheduler_binding_duration_seconds quantile 0.99 +- interval: 1m + input_series: + - series: 'scheduler_binding_duration_seconds_bucket{job="kube-scheduler",le="+Inf"}' + values: '0+60x5' + - series: 'scheduler_binding_duration_seconds_bucket{job="kube-scheduler",le="1"}' + values: '0+60x5' + promql_expr_test: + - eval_time: 5m + expr: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile{quantile="0.99"} + exp_samples: + - value: 0.99 + labels: 'cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile{job="kube-scheduler",quantile="0.99"}'