Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deploy new monitoring stack #3360

Merged
merged 3 commits into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions infra/gcp/tests/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@ module "project" {
"compute.googleapis.com"
]
}

module "monitoring" {
source = "./monitoring"
project = module.project.project_id
notification_channel_id = "potato"
}
40 changes: 40 additions & 0 deletions infra/gcp/tests/monitoring/boskos.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
resource "google_monitoring_alert_policy" "boskos_alerts" {
count = length(var.allowed_list) == 0 ? 1 : 0
project = var.project
display_name = "boskos-alerts"
combiner = "OR" # required

conditions {
display_name = "Boskos ran out of resources"

condition_monitoring_query_language {
duration = "0s"
query = <<-EOT
fetch prometheus_target
| metric 'prometheus.googleapis.com/boskos_resources/gauge'
| {
t_0:
filter state == 'free'
;
t_1:
ident
}
| group_by [metric.type]
| outer_join 0
| condition t_0.value_boskos_resources_aggregate == 0 && t_1.value_boskos_resources_aggregate > 5
| window 1m
EOT
trigger {
count = 1
}
}
}

documentation {
content = "Boskos ran out of resources"
mime_type = "text/markdown"
}

# gcloud beta monitoring channels list --project=oss-prow
# notification_channels = ["projects/${var.project}/notificationChannels/${var.notification_channel_id}"]
}
12 changes: 12 additions & 0 deletions infra/gcp/tests/monitoring/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
variable "project" {
type = string
}

variable "notification_channel_id" {
type = string
}

variable "allowed_list" {
type = set(string)
default = []
}
5 changes: 2 additions & 3 deletions prow/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ include Makefile.gcloud.mk
PROJECT ?= knative-tests
PROJECT_BUILD ?= knative-tests
REGION ?= us-central1
ZONE ?= us-central1-f
CLUSTER ?= prow
CLUSTER_BUILD ?= knative-prow-build-cluster
CLUSTER_BUILD ?= prow-build
JOB_NAMESPACE ?= test-pods

.PHONY: deploy
Expand All @@ -31,8 +32,6 @@ deploy: get-cluster-credentials
.PHONY: deploy-build
deploy-build: get-build-cluster-credentials
kubectl apply -f ./cluster/build/
kubectl create configmap resources --from-file=config=./cluster/boskos/boskos_resources.yaml --dry-run --save-config -o yaml \
| kubectl --namespace="$(JOB_NAMESPACE)" apply -f -

.PHONY: deploy-monitoring
deploy-monitoring:
Expand Down
2 changes: 1 addition & 1 deletion prow/Makefile.gcloud.mk
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ get-cluster-credentials: save-kubeconfig activate-serviceaccount

.PHONY: get-build-cluster-credentials
get-build-cluster-credentials: save-kubeconfig activate-serviceaccount
gcloud container clusters get-credentials "$(CLUSTER_BUILD)" --project="$(PROJECT_BUILD)" --zone="$(ZONE)"
gcloud container clusters get-credentials "$(CLUSTER_BUILD)" --project="$(PROJECT_BUILD)" --region="$(REGION)"
# This file is sourced by ./Makefile, ./cluster/monitoring/Makefile, and ./cluster/monitoring/mixins/Makefile.
150 changes: 150 additions & 0 deletions prow/cluster/200-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# These will be consumed by GKE Managed Prometheus(GMP) services in the cluster.
# (Not related to prometheus-operator).
# Ref:
# https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#gmp-pod-monitoring.
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: deck
name: deck
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: deck
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: ghproxy
name: ghproxy
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: ghproxy
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: hook
name: hook
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: hook
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: plank
name: plank
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: prow-controller-manager
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: sinker
name: sinker
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: sinker
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: tide
name: tide
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: tide
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: horologium
name: horologium
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: horologium
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app: crier
name: crier
namespace: default
spec:
endpoints:
- interval: 30s
port: metrics
scheme: http
selector:
matchLabels:
app: crier

---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app.kubernetes.io/name: kubernetes-external-secrets
app: kubernetes-external-secrets
name: kubernetes-external-secrets
namespace: default
spec:
endpoints:
- interval: 30s
port: prometheus
scheme: http
selector:
matchLabels:
app.kubernetes.io/name: kubernetes-external-secrets
14 changes: 14 additions & 0 deletions prow/cluster/build/200-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
labels:
app.kubernetes.io/name: external-secrets
name: external-secrets
namespace: default
spec:
selector:
matchLabels:
app.kubernetes.io/name: external-secrets
endpoints:
- port: metrics
interval: 30s
52 changes: 52 additions & 0 deletions prow/cluster/build/400-boskos-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ spec:
ports:
- containerPort: 8080
protocol: TCP
- name: metrics
containerPort: 9090
resources:
limits:
cpu: "1"
Expand Down Expand Up @@ -111,6 +113,56 @@ data:
- knative-boskos-48
- knative-boskos-49
- knative-boskos-50
- knative-boskos-51
- knative-boskos-52
- knative-boskos-53
- knative-boskos-54
- knative-boskos-55
- knative-boskos-56
- knative-boskos-57
- knative-boskos-58
- knative-boskos-59
- knative-boskos-60
- knative-boskos-61
- knative-boskos-62
- knative-boskos-63
- knative-boskos-64
- knative-boskos-65
- knative-boskos-66
- knative-boskos-67
- knative-boskos-68
- knative-boskos-69
- knative-boskos-70
- knative-boskos-71
- knative-boskos-72
- knative-boskos-73
- knative-boskos-74
- knative-boskos-75
- knative-boskos-76
- knative-boskos-77
- knative-boskos-78
- knative-boskos-79
- knative-boskos-80
- knative-boskos-81
- knative-boskos-82
- knative-boskos-83
- knative-boskos-84
- knative-boskos-85
- knative-boskos-86
- knative-boskos-87
- knative-boskos-88
- knative-boskos-89
- knative-boskos-90
- knative-boskos-91
- knative-boskos-92
- knative-boskos-93
- knative-boskos-94
- knative-boskos-95
- knative-boskos-96
- knative-boskos-97
- knative-boskos-98
- knative-boskos-99
- knative-boskos-100
state: dirty
type: gke-project
kind: ConfigMap
Expand Down
23 changes: 13 additions & 10 deletions prow/cluster/build/400-boskos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,21 @@ spec:
protocol: TCP
port: 80
targetPort: 8080
- name: metrics
port: 9090
protocol: TCP
targetPort: 9090
---
apiVersion: v1
kind: Service
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
name: boskos-metrics
name: boskos
namespace: test-pods
spec:
selector:
app: boskos
ports:
- name: metrics
port: 9090
protocol: TCP
targetPort: 9090
type: LoadBalancer
matchLabels:
app: boskos
endpoints:
- port: metrics
path: /metrics
interval: 30s