-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.json
152 lines (140 loc) · 4.04 KB
/
data.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
{
"title": "Server Performance Monitoring", # title
"editable": true,
"time": {
"from": "now-1d",
"to": "now"
},
"templating": {
"list": [
{
"name": "server",
"type": "query",
"datasource": "mysql",
"query": "SHOW TABLES LIKE 'servers'" # Example query to get server names
}
]
},
"rows": [
{
"title": "Resource Utilization",
"height": "25%",
"panels": [
{
"title": "CPU Usage",
"type": "graph",
"span": 6,
"datasource": "mysql",
"targets": [
{
"target": "SELECT avg(cpu_usage) FROM servers WHERE server_name = $server GROUP BY time(1m)", # template variable for server selection
"refId": "A"
}
],
"options": {
"colorMode": "series",
"graphMode": "line",
"reduceOptions": {
"calcs": ["avg"]
}
}
},
{
"title": "Memory Usage",
"type": "gauge",
"span": 6,
"datasource": "mysql",
"targets": [
{
"target": "SELECT avg(memory_usage) FROM servers WHERE server_name = $server GROUP BY time(1m)",
"refId": "B"
}
],
"min": 0,
"max": 100,
"prefix": "%"
}
]
},
{
"title": "Request Traffic (Past Hour)",
"height": "25%",
"panels": [
{
"title": "HTTP Requests",
"type": "graph",
"span": 12,
"datasource": "prometheus",
"targets": [
{
"target": "http_requests_total{server='$server'}", # Use template variable for server selection
"legendFormat": "{{ server }} server"
}
],
"limit": 10,
"options": {
"timepicker": {
"refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "6h", "12h", "1d", "7d", "30d"]
}
}
}
]
},
{
"title": "Alerts",
"height": "25%",
"panels": [
{
"title": "Resource Threshold Alerts",
"type": "alertlist",
"span": 12,
"datasource": null, # Needs configuration for your alerting system
"
--------------------------------
# web servers
hostgroup "Web Servers" {
members = ["webserver1", "webserver2"]
}
# web server template
host "Web Server Template" {
groups = ["Web Servers"]
vars.check_interval = 60s # Check every minute
# Service to check HTTP status code with email notification
service "HTTP Status" {
check_command = http
vars.url = "http://localhost/3006"
http_status_codes = [200-399] # Expect success codes (2xx, 3xx)
notifications = [
"[email protected]" # Notify via email on failure
]
notification_options = {
# Define notification escalation options (e.g., retry after X minutes)
"timeperiod" = "24x7" # Always send notifications
"retry" = 3 # Retry notification 3 times
"retry_interval" = 10m # Retry after 10 minutes
}
}
# Service to check response time and store performance data
service "HTTP Response Time" {
check_command = http
vars.url = "http://localhost/3006"
http_status_codes = [200] # Expect only success code (200)
perfdata_file = http_response_time.perfdata # Store performance data
}
# Additional service to check for root disk space availability
service "Root Disk Space" {
check_command = check_disk
vars.mount_point = "/" # Check root disk space
vars.warning_threshold = 10 # Warn if free space falls below 10%
vars.critical_threshold = 5 # Critical if free space falls below 5%
}
}
# Define specific web servers inheriting the template with URL overrides
host "webserver1" {
inherits "Web Server Template"
vars.url = " "
}
host "webserver2" {
inherits "Web Server Template"
vars.url = " "
}