Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions grafana/alerts-rustchain.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
groups:
- name: rustchain_alerts
interval: 30s
rules:
# Alert: Node is down
- alert: NodeDown
expr: rustchain_node_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "RustChain node is down"
description: "RustChain node has been down for more than 1 minute"

# Alert: Epoch stuck (no progress for 15 minutes = 1.5 epochs)
- alert: EpochStuck
expr: (time() - rustchain_epoch_current) > 900
for: 5m
labels:
severity: warning
annotations:
summary: "Epoch progress stuck"
description: "Epoch has not progressed in 5 minutes"

# Alert: Database growing too fast
- alert: DatabaseGrowth
expr: rate(rustchain_db_size_bytes[1h]) > 1073741824 # > 1GB/hour
for: 10m
labels:
severity: warning
annotations:
summary: "Database growing too fast"
description: "Database is growing faster than 1GB/hour"

# Alert: No active miners
- alert: NoActiveMiners
expr: rustchain_miners_active == 0
for: 5m
labels:
severity: critical
annotations:
summary: "No active miners"
description: "No miners have attested in the last 2 epochs"

# Alert: Low enrolled miners
- alert: LowEnrolledMiners
expr: rustchain_epoch_enrolled_miners < 5
for: 10m
labels:
severity: warning
annotations:
summary: "Low miner enrollment"
description: "Less than 5 miners enrolled in current epoch"

# Alert: High API latency
- alert: HighAPILatency
expr: histogram_quantile(0.95, rate(rustchain_api_request_duration_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High API latency"
description: "API p95 latency is above 5 seconds"

# Alert: Backup too old
- alert: BackupTooOld
expr: rustchain_backup_age_hours > 24
for: 1h
labels:
severity: warning
annotations:
summary: "Backup is too old"
description: "Last backup is more than 24 hours old"

# Alert: Node restarted recently
- alert: NodeRestarted
expr: rustchain_node_uptime_seconds < 300
for: 1m
labels:
severity: info
annotations:
summary: "Node recently restarted"
description: "Node has been up for less than 5 minutes"
191 changes: 191 additions & 0 deletions grafana/dashboard-rustchain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"mappings": [{"options": {"0": {"color": "red", "index": 0, "text": "DOWN"}, "1": {"color": "green", "index": 1, "text": "UP"}}, "type": "value"}],
"thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}
},
"overrides": []
},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
"id": 1,
"options": {"orientation": "auto", "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, "showThresholdLabels": false, "showThresholdMarkers": true},
"pluginVersion": "8.0.0",
"targets": [{"expr": "rustchain_node_up", "refId": "A"}],
"title": "Node Status",
"type": "gauge"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {"axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "never", "spanNulls": true},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "s"
},
"overrides": []
},
"gridPos": {"h": 8, "w": 12, "x": 6, "y": 0},
"id": 2,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "single"}},
"pluginVersion": "8.0.0",
"targets": [{"expr": "rustchain_node_uptime_seconds", "legendFormat": "Uptime", "refId": "A"}],
"title": "Node Uptime",
"type": "timeseries"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 100}, {"color": "red", "value": 500}]},
"unit": "short"
},
"overrides": []
},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 0},
"id": 3,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, "text": {}, "textMode": "auto"},
"pluginVersion": "8.0.0",
"targets": [{"expr": "rustchain_epoch_current", "refId": "A"}],
"title": "Current Epoch",
"type": "stat"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "red", "value": 5}]},
"unit": "short"
},
"overrides": []
},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 4},
"id": 4,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, "text": {}, "textMode": "auto"},
"pluginVersion": "8.0.0",
"targets": [{"expr": "rustchain_epoch_enrolled_miners", "refId": "A"}],
"title": "Enrolled Miners",
"type": "stat"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {"axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "never", "spanNulls": true},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "short"
},
"overrides": []
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
"id": 5,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "single"}},
"pluginVersion": "8.0.0",
"targets": [
{"expr": "rustchain_miners_active", "legendFormat": "Active Miners", "refId": "A"},
{"expr": "rustchain_miners_total", "legendFormat": "Total Miners", "refId": "B"}
],
"title": "Miners Overview",
"type": "timeseries"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {"axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "never", "spanNulls": true},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
"id": 6,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "single"}},
"pluginVersion": "8.0.0",
"targets": [{"expr": "rustchain_db_size_bytes", "legendFormat": "DB Size", "refId": "A"}],
"title": "Database Size",
"type": "timeseries"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {"axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "never", "spanNulls": true},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "RTC"
},
"overrides": []
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
"id": 7,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "single"}},
"pluginVersion": "8.0.0",
"targets": [{"expr": "rustchain_total_supply_rtc", "legendFormat": "Total Supply", "refId": "A"}],
"title": "Total RTC Supply",
"type": "timeseries"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {"axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "never", "spanNulls": true},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "s"
},
"overrides": []
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
"id": 8,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "single"}},
"pluginVersion": "8.0.0",
"targets": [{"expr": "histogram_quantile(0.95, rate(rustchain_api_request_duration_seconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"}],
"title": "API Latency (p95)",
"type": "timeseries"
}
],
"schemaVersion": 27,
"style": "dark",
"tags": ["rustchain", "blockchain"],
"templating": {"list": []},
"time": {"from": "now-6h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "RustChain Node Dashboard",
"uid": "rustchain-node",
"version": 1
}
18 changes: 18 additions & 0 deletions grafana/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
global:
scrape_interval: 15s
evaluation_interval: 15s

alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']

rule_files:
- 'alerts-rustchain.yml'

scrape_configs:
- job_name: 'rustchain-node'
static_configs:
- targets: ['localhost:5000']
metrics_path: /metrics
scrape_interval: 15s
Loading