From d5f946a2f4789e2a9c3ab0713cd255016e6378d4 Mon Sep 17 00:00:00 2001 From: upodroid Date: Thu, 2 Jun 2022 21:11:20 +0100 Subject: [PATCH 1/3] add new monitoring stack --- infra/gcp/tests/monitoring/boskos.tf | 41 +++++++++++++++ infra/gcp/tests/monitoring/variables.tf | 12 +++++ prow/cluster/build/200-monitoring.yaml | 14 +++++ prow/cluster/build/400-boskos-deployment.yaml | 52 +++++++++++++++++++ prow/cluster/build/400-boskos.yaml | 23 ++++---- 5 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 infra/gcp/tests/monitoring/boskos.tf create mode 100644 infra/gcp/tests/monitoring/variables.tf create mode 100644 prow/cluster/build/200-monitoring.yaml diff --git a/infra/gcp/tests/monitoring/boskos.tf b/infra/gcp/tests/monitoring/boskos.tf new file mode 100644 index 00000000000..f1e6a295a66 --- /dev/null +++ b/infra/gcp/tests/monitoring/boskos.tf @@ -0,0 +1,41 @@ +resource "google_monitoring_alert_policy" "boskos_alerts" { + count = length(var.allowed_list) == 0 ? 1 : 0 + project = var.project + display_name = "boskos-alerts" + combiner = "OR" # required + + conditions { + display_name = "Boskos ran out of resources" + + condition_monitoring_query_language { + duration = "0s" + query = <<-EOT + fetch prometheus_target + | metric 'prometheus.googleapis.com/boskos_resources/gauge' + | { + t_0: + filter state == 'free' + ; + t_1: + ident + } + | group_by [metric.type] + | outer_join 0 + | condition t_0.value_boskos_resources_aggregate == 0 && t_1.value_boskos_resources_aggregate > 5 + | window 1m + + EOT + trigger { + count = 1 + } + } + } + + documentation { + content = "Boskos ran out of resources" + mime_type = "text/markdown" + } + + # gcloud beta monitoring channels list --project=oss-prow +# notification_channels = ["projects/${var.project}/notificationChannels/${var.notification_channel_id}"] +} diff --git a/infra/gcp/tests/monitoring/variables.tf b/infra/gcp/tests/monitoring/variables.tf new file mode 100644 index 00000000000..ee5a9c1feee --- /dev/null +++ b/infra/gcp/tests/monitoring/variables.tf @@ -0,0 +1,12 @@ +variable "project" { + type = string +} + +variable "notification_channel_id" { + type = string +} + +variable "allowed_list" { + type = set(string) + default = [] +} diff --git a/prow/cluster/build/200-monitoring.yaml b/prow/cluster/build/200-monitoring.yaml new file mode 100644 index 00000000000..8831c926003 --- /dev/null +++ b/prow/cluster/build/200-monitoring.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app.kubernetes.io/name: external-secrets + name: external-secrets + namespace: default +spec: + selector: + matchLabels: + app.kubernetes.io/name: external-secrets + endpoints: + - port: metrics + interval: 30s diff --git a/prow/cluster/build/400-boskos-deployment.yaml b/prow/cluster/build/400-boskos-deployment.yaml index 8cbb25913f4..e5823ad1ba8 100644 --- a/prow/cluster/build/400-boskos-deployment.yaml +++ b/prow/cluster/build/400-boskos-deployment.yaml @@ -40,6 +40,8 @@ spec: ports: - containerPort: 8080 protocol: TCP + - name: metrics + containerPort: 9090 resources: limits: cpu: "1" @@ -111,6 +113,56 @@ data: - knative-boskos-48 - knative-boskos-49 - knative-boskos-50 + - knative-boskos-51 + - knative-boskos-52 + - knative-boskos-53 + - knative-boskos-54 + - knative-boskos-55 + - knative-boskos-56 + - knative-boskos-57 + - knative-boskos-58 + - knative-boskos-59 + - knative-boskos-60 + - knative-boskos-61 + - knative-boskos-62 + - knative-boskos-63 + - knative-boskos-64 + - knative-boskos-65 + - knative-boskos-66 + - knative-boskos-67 + - knative-boskos-68 + - knative-boskos-69 + - knative-boskos-70 + - knative-boskos-71 + - knative-boskos-72 + - knative-boskos-73 + - knative-boskos-74 + - knative-boskos-75 + - knative-boskos-76 + - knative-boskos-77 + - knative-boskos-78 + - knative-boskos-79 + - knative-boskos-80 + - knative-boskos-81 + - knative-boskos-82 + - knative-boskos-83 + - knative-boskos-84 + - knative-boskos-85 + - knative-boskos-86 + - knative-boskos-87 + - knative-boskos-88 + - knative-boskos-89 + - knative-boskos-90 + - knative-boskos-91 + - knative-boskos-92 + - knative-boskos-93 + - knative-boskos-94 + - knative-boskos-95 + - knative-boskos-96 + - knative-boskos-97 + - knative-boskos-98 + - knative-boskos-99 + - knative-boskos-100 state: dirty type: gke-project kind: ConfigMap diff --git a/prow/cluster/build/400-boskos.yaml b/prow/cluster/build/400-boskos.yaml index 3265e6dfad0..4d1127a4ceb 100644 --- a/prow/cluster/build/400-boskos.yaml +++ b/prow/cluster/build/400-boskos.yaml @@ -164,18 +164,21 @@ spec: protocol: TCP port: 80 targetPort: 8080 + - name: metrics + port: 9090 + protocol: TCP + targetPort: 9090 --- -apiVersion: v1 -kind: Service +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring metadata: - name: boskos-metrics + name: boskos namespace: test-pods spec: selector: - app: boskos - ports: - - name: metrics - port: 9090 - protocol: TCP - targetPort: 9090 - type: LoadBalancer + matchLabels: + app: boskos + endpoints: + - port: metrics + path: /metrics + interval: 30s From 9858c26a19436de480c800f3b71556b9543f170c Mon Sep 17 00:00:00 2001 From: upodroid Date: Thu, 2 Jun 2022 21:14:49 +0100 Subject: [PATCH 2/3] fix makefile and add prow monitoring --- infra/gcp/tests/main.tf | 6 ++ prow/Makefile | 5 +- prow/Makefile.gcloud.mk | 2 +- prow/cluster/200-monitoring.yaml | 150 +++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 prow/cluster/200-monitoring.yaml diff --git a/infra/gcp/tests/main.tf b/infra/gcp/tests/main.tf index 21ad9aa8ee7..4fb75726481 100644 --- a/infra/gcp/tests/main.tf +++ b/infra/gcp/tests/main.tf @@ -21,3 +21,9 @@ module "project" { "compute.googleapis.com" ] } + +module "monitoring" { + source = "./monitoring" + project = module.project.project_id + notification_channel_id = "potato" +} diff --git a/prow/Makefile b/prow/Makefile index c68fc96cebf..54e33b379bb 100644 --- a/prow/Makefile +++ b/prow/Makefile @@ -18,8 +18,9 @@ include Makefile.gcloud.mk PROJECT ?= knative-tests PROJECT_BUILD ?= knative-tests REGION ?= us-central1 +ZONE ?= us-central1-f CLUSTER ?= prow -CLUSTER_BUILD ?= knative-prow-build-cluster +CLUSTER_BUILD ?= prow-build JOB_NAMESPACE ?= test-pods .PHONY: deploy @@ -31,8 +32,6 @@ deploy: get-cluster-credentials .PHONY: deploy-build deploy-build: get-build-cluster-credentials kubectl apply -f ./cluster/build/ - kubectl create configmap resources --from-file=config=./cluster/boskos/boskos_resources.yaml --dry-run --save-config -o yaml \ - | kubectl --namespace="$(JOB_NAMESPACE)" apply -f - .PHONY: deploy-monitoring deploy-monitoring: diff --git a/prow/Makefile.gcloud.mk b/prow/Makefile.gcloud.mk index 615bd7387d5..986fff14355 100644 --- a/prow/Makefile.gcloud.mk +++ b/prow/Makefile.gcloud.mk @@ -38,5 +38,5 @@ get-cluster-credentials: save-kubeconfig activate-serviceaccount .PHONY: get-build-cluster-credentials get-build-cluster-credentials: save-kubeconfig activate-serviceaccount - gcloud container clusters get-credentials "$(CLUSTER_BUILD)" --project="$(PROJECT_BUILD)" --zone="$(ZONE)" + gcloud container clusters get-credentials "$(CLUSTER_BUILD)" --project="$(PROJECT_BUILD)" --region="$(REGION)" # This file is sourced by ./Makefile, ./cluster/monitoring/Makefile, and ./cluster/monitoring/mixins/Makefile. diff --git a/prow/cluster/200-monitoring.yaml b/prow/cluster/200-monitoring.yaml new file mode 100644 index 00000000000..89520a278b0 --- /dev/null +++ b/prow/cluster/200-monitoring.yaml @@ -0,0 +1,150 @@ +# These will be consumed by GKE Managed Prometheus(GMP) services in the cluster. +# (Not related to prometheus-operator). +# Ref: +# https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#gmp-pod-monitoring. +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: deck + name: deck + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: deck +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: ghproxy + name: ghproxy + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: ghproxy +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: hook + name: hook + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: hook +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: plank + name: plank + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: prow-controller-manager +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: sinker + name: sinker + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: sinker +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: tide + name: tide + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: tide +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: horologium + name: horologium + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: horologium +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app: crier + name: crier + namespace: default +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + selector: + matchLabels: + app: crier + +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + app.kubernetes.io/name: kubernetes-external-secrets + app: kubernetes-external-secrets + name: kubernetes-external-secrets + namespace: default +spec: + endpoints: + - interval: 30s + port: prometheus + scheme: http + selector: + matchLabels: + app.kubernetes.io/name: kubernetes-external-secrets From d8f6a32a30fdd7bbeeea7714d694b8bf197e01ef Mon Sep 17 00:00:00 2001 From: upodroid Date: Fri, 3 Jun 2022 22:15:10 +0100 Subject: [PATCH 3/3] fi whitespace --- infra/gcp/tests/monitoring/boskos.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/infra/gcp/tests/monitoring/boskos.tf b/infra/gcp/tests/monitoring/boskos.tf index f1e6a295a66..98133f2a77f 100644 --- a/infra/gcp/tests/monitoring/boskos.tf +++ b/infra/gcp/tests/monitoring/boskos.tf @@ -23,7 +23,6 @@ resource "google_monitoring_alert_policy" "boskos_alerts" { | outer_join 0 | condition t_0.value_boskos_resources_aggregate == 0 && t_1.value_boskos_resources_aggregate > 5 | window 1m - EOT trigger { count = 1