diff --git a/go.mod b/go.mod index 2a117ae3..b9554c23 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/spf13/pflag v1.0.6 github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.10.0 - github.com/tidwall/gjson v1.14.4 + github.com/tidwall/gjson v1.18.0 golang.org/x/mod v0.27.0 gopkg.in/ini.v1 v1.67.0 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index 496f8b77..6d788894 100644 --- a/go.sum +++ b/go.sum @@ -366,8 +366,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= -github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= -github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= diff --git a/pkg/linters/templates/README.md b/pkg/linters/templates/README.md index 9197dc88..fda5d025 100644 --- a/pkg/linters/templates/README.md +++ b/pkg/linters/templates/README.md @@ -1883,16 +1883,30 @@ Error: File contains hardcoded 'cluster.local' substring. Use '.Values.global.cl Object: templates/configmap.yaml ``` -**Cause:** Template contains hardcoded `cluster.local` string. +## Grafana Dashboard Validation Rules -**Solutions:** +The linter now includes comprehensive validation for Grafana dashboards based on best practices from the Deckhouse project: -1. **Use dynamic cluster domain:** +### Deprecated Panel Types - ```yaml - # Before - url: "http://api.d8-my-module.svc.cluster.local:8080" - - # After - url: "http://api.d8-my-module.svc.{{ .Values.global.clusterConfiguration.clusterDomain }}:8080" - ``` +- **graph** → **timeseries**: The `graph` panel type is deprecated and should be replaced with `timeseries` +- **flant-statusmap-panel** → **state-timeline**: The custom statusmap panel should use the standard `state-timeline` panel + +### Deprecated Intervals + +- **interval_rv**, **interval_sx3**, **interval_sx4**: These custom intervals are deprecated and should be replaced with Grafana's built-in `$__rate_interval` variable + +### Legacy Alert Rules + +- **Built-in alerts**: Panels with embedded alert rules should use external Alertmanager instead of Grafana's built-in alerting + +### Datasource Validation + +- **Legacy format**: Detects old datasource UID formats that need to be resaved with newer Grafana versions +- **Hardcoded UIDs**: Identifies hardcoded datasource UIDs that should use Grafana variables +- **Prometheus UIDs**: Ensures Prometheus datasources use recommended UID patterns (`$ds_prometheus` or `${ds_prometheus}`) + +### Template Variables + +- **Required variable**: Ensures dashboards contain the required `ds_prometheus` variable of type `datasource` +- **Query variables**: Validates that query variables use recommended datasource UIDs diff --git a/pkg/linters/templates/rules/grafana.go b/pkg/linters/templates/rules/grafana.go index 1593716f..6fd01319 100644 --- a/pkg/linters/templates/rules/grafana.go +++ b/pkg/linters/templates/rules/grafana.go @@ -1,5 +1,5 @@ /* -Copyright 2021 Flant JSC +Copyright 2025 Flant JSC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,8 +17,11 @@ limitations under the License. package rules import ( + "github.com/tidwall/gjson" + "k8s.io/utils/ptr" + "github.com/deckhouse/dmt/internal/fsutils" "github.com/deckhouse/dmt/internal/module" "github.com/deckhouse/dmt/pkg" "github.com/deckhouse/dmt/pkg/errors" @@ -84,6 +87,8 @@ func (r *GrafanaRule) ValidateGrafanaDashboards(m *module.Module, errorList *err return } + // Validate individual dashboard files + r.validateDashboardFiles(m, errorList) desiredContent := `include "helm_lib_grafana_dashboard_definitions` @@ -98,3 +103,283 @@ func (r *GrafanaRule) ValidateGrafanaDashboards(m *module.Module, errorList *err errorList.WithFilePath(monitoringFilePath). Errorf("The content of the 'templates/monitoring.yaml' should be equal to:\n%s\nGot:\n%s", desiredContent, string(content)) } + +// validateDashboardFiles validates individual grafana dashboard files +func (r *GrafanaRule) validateDashboardFiles(m *module.Module, errorList *errors.LintRuleErrorsList) { + searchPath := filepath.Join(m.GetPath(), "monitoring", "grafana-dashboards") + + entries := fsutils.GetFiles(searchPath, true, fsutils.FilterFileByExtensions(".json", ".tpl")) + + for _, entry := range entries { + r.validateDashboardFile(entry, errorList) + } +} + +// validateDashboardFile validates a single grafana dashboard file +func (r *GrafanaRule) validateDashboardFile(filePath string, errorList *errors.LintRuleErrorsList) { + content, err := os.ReadFile(filePath) + if err != nil { + errorList.WithFilePath(filePath).Errorf("failed to read dashboard file: %s", err) + return + } + + // Parse JSON content + dashboard := gjson.ParseBytes(content) + if !dashboard.IsObject() { + errorList.WithFilePath(filePath).Error("dashboard file is not valid JSON") + return + } + + // Extract panels and templates + panels := r.extractDashboardPanels(&dashboard) + templates := r.extractDashboardTemplates(&dashboard) + + // Validate panels + for _, panel := range panels { + r.validatePanel(&panel, filePath, errorList) + } + + // Validate templates + r.validateTemplates(templates, filePath, errorList) +} + +// extractDashboardPanels extracts all panels from dashboard including nested ones +func (*GrafanaRule) extractDashboardPanels(dashboard *gjson.Result) []gjson.Result { + panels := make([]gjson.Result, 0) + + // Extract panels from rows + rows := dashboard.Get("rows").Array() + for _, row := range rows { + rowPanels := row.Get("panels").Array() + panels = append(panels, rowPanels...) + } + + // Extract direct panels + directPanels := dashboard.Get("panels").Array() + for _, panel := range directPanels { + panelType := panel.Get("type").String() + if panelType == "row" { + // Extract panels from row + rowPanels := panel.Get("panels").Array() + panels = append(panels, rowPanels...) + } else { + panels = append(panels, panel) + } + } + + return panels +} + +// extractDashboardTemplates extracts template variables from dashboard +func (*GrafanaRule) extractDashboardTemplates(dashboard *gjson.Result) []gjson.Result { + templating := dashboard.Get("templating") + if !templating.Exists() { + return []gjson.Result{} + } + + list := templating.Get("list") + if !list.Exists() || !list.IsArray() { + return []gjson.Result{} + } + + return list.Array() +} + +// validatePanel validates a single panel +func (r *GrafanaRule) validatePanel(panel *gjson.Result, filePath string, errorList *errors.LintRuleErrorsList) { + panelTitle := panel.Get("title").String() + if panelTitle == "" { + panelTitle = "unnamed" + } + + // Check deprecated panel types + r.checkDeprecatedPanelTypes(panel, panelTitle, filePath, errorList) + + // Check deprecated intervals + r.checkDeprecatedIntervals(panel, panelTitle, filePath, errorList) + + // Check legacy alert rules + r.checkLegacyAlertRules(panel, panelTitle, filePath, errorList) + + // Check datasource validation + r.checkDatasourceValidation(panel, panelTitle, filePath, errorList) +} + +// checkDeprecatedPanelTypes checks for deprecated panel types +func (*GrafanaRule) checkDeprecatedPanelTypes(panel *gjson.Result, panelTitle, filePath string, errorList *errors.LintRuleErrorsList) { + panelType := panel.Get("type").String() + deprecatedTypes := map[string]string{ + "graph": "timeseries", + "flant-statusmap-panel": "state-timeline", + } + + if replaceWith, isDeprecated := deprecatedTypes[panelType]; isDeprecated { + errorList.WithFilePath(filePath).Errorf( + "Panel '%s' uses deprecated type '%s', consider using '%s'", + panelTitle, panelType, replaceWith, + ) + } +} + +// checkDeprecatedIntervals checks for deprecated intervals in panel queries +func (*GrafanaRule) checkDeprecatedIntervals(panel *gjson.Result, panelTitle, filePath string, errorList *errors.LintRuleErrorsList) { + deprecatedIntervals := []string{"interval_rv", "interval_sx3", "interval_sx4"} + targets := panel.Get("targets").Array() + + for _, target := range targets { + expr := target.Get("expr").String() + for _, deprecatedInterval := range deprecatedIntervals { + if strings.Contains(expr, deprecatedInterval) { + errorList.WithFilePath(filePath).Errorf( + "Panel '%s' contains deprecated interval '%s', consider using '$__rate_interval'", + panelTitle, deprecatedInterval, + ) + } + } + } +} + +// checkLegacyAlertRules checks for legacy alert rules in panels +func (*GrafanaRule) checkLegacyAlertRules(panel *gjson.Result, panelTitle, filePath string, errorList *errors.LintRuleErrorsList) { + alertRule := panel.Get("alert") + if alertRule.Exists() { + alertRuleName := alertRule.Get("name").String() + if alertRuleName == "" { + alertRuleName = "unnamed" + } + errorList.WithFilePath(filePath).Errorf( + "Panel '%s' contains legacy alert rule '%s', consider using external alertmanager", + panelTitle, alertRuleName, + ) + } +} + +// checkDatasourceValidation checks datasource UIDs in panel targets +func (*GrafanaRule) checkDatasourceValidation(panel *gjson.Result, panelTitle, filePath string, errorList *errors.LintRuleErrorsList) { + recommendedPrometheusUIDs := []string{"$ds_prometheus", "${ds_prometheus}"} + targets := panel.Get("targets").Array() + + for _, target := range targets { + datasource := target.Get("datasource") + if !datasource.Exists() { + continue + } + + var uidStr string + uid := datasource.Get("uid") + if uid.Exists() { + uidStr = uid.String() + } else { + // Legacy format - datasource UID is stored as string + uidStr = datasource.String() + errorList.WithFilePath(filePath).Errorf( + "Panel '%s' uses legacy datasource format, consider resaving dashboard using newer Grafana version", + panelTitle, + ) + } + + // Check for hardcoded UIDs + if !strings.HasPrefix(uidStr, "$") { + errorList.WithFilePath(filePath).Errorf( + "Panel '%s' contains hardcoded datasource UID '%s', consider using grafana variable of type 'Datasource'", + panelTitle, uidStr, + ) + } + + // Check Prometheus datasource UIDs + datasourceType := datasource.Get("type") + if datasourceType.Exists() && datasourceType.String() == "prometheus" { + isRecommended := false + for _, recommendedUID := range recommendedPrometheusUIDs { + if uidStr == recommendedUID { + isRecommended = true + break + } + } + + if !isRecommended { + errorList.WithFilePath(filePath).Errorf( + "Panel '%s' datasource should be one of: %s instead of '%s'", + panelTitle, strings.Join(recommendedPrometheusUIDs, ", "), uidStr, + ) + } + } + } +} + +// validateTemplates validates dashboard template variables +func (r *GrafanaRule) validateTemplates(templates []gjson.Result, filePath string, errorList *errors.LintRuleErrorsList) { + hasPrometheusDatasourceVariable := false + recommendedPrometheusUIDs := []string{"$ds_prometheus", "${ds_prometheus}"} + + for _, template := range templates { + // Check for required Prometheus datasource variable + if r.isPrometheusDatasourceTemplateVariable(&template) { + hasPrometheusDatasourceVariable = true + } + + // Check query variables for non-recommended datasource UIDs + if r.isNonRecommendedPrometheusDatasourceQueryVariable(&template, recommendedPrometheusUIDs) { + templateName := template.Get("name").String() + errorList.WithFilePath(filePath).Errorf( + "Dashboard variable '%s' should use one of: %s as its datasource", + templateName, strings.Join(recommendedPrometheusUIDs, ", "), + ) + } + } + + // Check if required Prometheus datasource variable exists + if !hasPrometheusDatasourceVariable { + errorList.WithFilePath(filePath).Errorf( + "Dashboard must contain prometheus variable with query type: 'prometheus' and name: 'ds_prometheus'", + ) + } +} + +// isPrometheusDatasourceTemplateVariable checks if template is the required Prometheus datasource variable +func (*GrafanaRule) isPrometheusDatasourceTemplateVariable(template *gjson.Result) bool { + templateType := template.Get("type") + if !templateType.Exists() || templateType.String() != "datasource" { + return false + } + + queryType := template.Get("query") + if !queryType.Exists() || queryType.String() != "prometheus" { + return false + } + + templateName := template.Get("name") + return templateName.Exists() && templateName.String() == "ds_prometheus" +} + +// isNonRecommendedPrometheusDatasourceQueryVariable checks if query variable uses non-recommended datasource +func (*GrafanaRule) isNonRecommendedPrometheusDatasourceQueryVariable(template *gjson.Result, recommendedUIDs []string) bool { + templateType := template.Get("type") + if !templateType.Exists() || templateType.String() != "query" { + return false + } + + datasource := template.Get("datasource") + if !datasource.Exists() { + return false + } + + datasourceType := datasource.Get("type") + if !datasourceType.Exists() || datasourceType.String() != "prometheus" { + return false + } + + datasourceUID := datasource.Get("uid") + if !datasourceUID.Exists() { + return false + } + + uidStr := datasourceUID.String() + for _, recommendedUID := range recommendedUIDs { + if uidStr == recommendedUID { + return false + } + } + + return true +} diff --git a/pkg/linters/templates/rules/grafana_test.go b/pkg/linters/templates/rules/grafana_test.go new file mode 100644 index 00000000..f2be435f --- /dev/null +++ b/pkg/linters/templates/rules/grafana_test.go @@ -0,0 +1,295 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rules + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" + + "github.com/deckhouse/dmt/pkg/errors" +) + +func TestExtractDashboardPanels(t *testing.T) { + rule := &GrafanaRule{} + + // Test dashboard with direct panels + dashboard := gjson.Parse(`{ + "panels": [ + {"type": "graph", "title": "Panel 1"}, + {"type": "row", "panels": [ + {"type": "timeseries", "title": "Panel 2"} + ]}, + {"type": "stat", "title": "Panel 3"} + ] + }`) + + panels := rule.extractDashboardPanels(&dashboard) + assert.Len(t, panels, 3) + assert.Equal(t, "Panel 1", panels[0].Get("title").String()) + assert.Equal(t, "Panel 2", panels[1].Get("title").String()) + assert.Equal(t, "Panel 3", panels[2].Get("title").String()) +} + +func TestExtractDashboardTemplates(t *testing.T) { + rule := &GrafanaRule{} + + // Test dashboard with templates + dashboard := gjson.Parse(`{ + "templating": { + "list": [ + {"name": "ds_prometheus", "type": "datasource", "query": "prometheus"}, + {"name": "namespace", "type": "query"} + ] + } + }`) + + templates := rule.extractDashboardTemplates(&dashboard) + assert.Len(t, templates, 2) + assert.Equal(t, "ds_prometheus", templates[0].Get("name").String()) + assert.Equal(t, "namespace", templates[1].Get("name").String()) +} + +func TestCheckDeprecatedPanelTypes(t *testing.T) { + tests := []struct { + name string + panelType string + expectedWarns int + }{ + { + name: "deprecated graph panel", + panelType: "graph", + expectedWarns: 1, + }, + { + name: "deprecated flant-statusmap-panel", + panelType: "flant-statusmap-panel", + expectedWarns: 1, + }, + { + name: "modern timeseries panel", + panelType: "timeseries", + expectedWarns: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rule := &GrafanaRule{} + errorList := errors.NewLintRuleErrorsList() + + panel := gjson.Parse(`{"type": "` + tt.panelType + `", "title": "Test Panel"}`) + rule.checkDeprecatedPanelTypes(&panel, "Test Panel", "/test.json", errorList) + + assert.Len(t, errorList.GetErrors(), tt.expectedWarns) + }) + } +} + +func TestCheckDeprecatedIntervals(t *testing.T) { + tests := []struct { + name string + expr string + expectedWarns int + }{ + { + name: "deprecated interval_rv", + expr: "rate(metric{job=\"test\"}[interval_rv])", + expectedWarns: 1, + }, + { + name: "deprecated interval_sx3", + expr: "rate(metric{job=\"test\"}[interval_sx3])", + expectedWarns: 1, + }, + { + name: "modern interval", + expr: "rate(metric{job=\"test\"}[$__rate_interval])", + expectedWarns: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rule := &GrafanaRule{} + errorList := errors.NewLintRuleErrorsList() + + // Create JSON with proper escaping + jsonStr := fmt.Sprintf(`{ + "title": "Test Panel", + "targets": [{"expr": %q}] + }`, tt.expr) + + panel := gjson.Parse(jsonStr) + rule.checkDeprecatedIntervals(&panel, "Test Panel", "/test.json", errorList) + + assert.Len(t, errorList.GetErrors(), tt.expectedWarns) + }) + } +} + +func TestCheckLegacyAlertRules(t *testing.T) { + tests := []struct { + name string + hasAlert bool + alertName string + expectedWarns int + }{ + { + name: "has legacy alert rule", + hasAlert: true, + alertName: "test_alert", + expectedWarns: 1, + }, + { + name: "no alert rule", + hasAlert: false, + alertName: "", + expectedWarns: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rule := &GrafanaRule{} + errorList := errors.NewLintRuleErrorsList() + + var panel gjson.Result + if tt.hasAlert { + panel = gjson.Parse(`{ + "title": "Test Panel", + "alert": {"name": "` + tt.alertName + `"} + }`) + } else { + panel = gjson.Parse(`{"title": "Test Panel"}`) + } + + rule.checkLegacyAlertRules(&panel, "Test Panel", "/test.json", errorList) + + assert.Len(t, errorList.GetErrors(), tt.expectedWarns) + }) + } +} + +func TestCheckDatasourceValidation(t *testing.T) { + tests := []struct { + name string + datasource string + expectedWarns int + }{ + { + name: "recommended prometheus datasource", + datasource: `{"type": "prometheus", "uid": "$ds_prometheus"}`, + expectedWarns: 0, + }, + { + name: "hardcoded datasource UID", + datasource: `{"type": "prometheus", "uid": "prometheus-123"}`, + expectedWarns: 2, // hardcoded UID + non-recommended prometheus UID + }, + { + name: "legacy datasource format", + datasource: `"prometheus-123"`, + expectedWarns: 2, // legacy format + hardcoded UID + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rule := &GrafanaRule{} + errorList := errors.NewLintRuleErrorsList() + + panel := gjson.Parse(`{ + "title": "Test Panel", + "targets": [{"datasource": ` + tt.datasource + `}] + }`) + rule.checkDatasourceValidation(&panel, "Test Panel", "/test.json", errorList) + + assert.Len(t, errorList.GetErrors(), tt.expectedWarns) + }) + } +} + +func TestValidateTemplates(t *testing.T) { + tests := []struct { + name string + templates string + expectedWarns int + }{ + { + name: "has required prometheus datasource variable", + templates: `[{"name": "ds_prometheus", "type": "datasource", "query": "prometheus"}, {"name": "namespace", "type": "query"}]`, + expectedWarns: 0, + }, + { + name: "missing required prometheus datasource variable", + templates: `[{"name": "namespace", "type": "query"}]`, + expectedWarns: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rule := &GrafanaRule{} + errorList := errors.NewLintRuleErrorsList() + + templates := gjson.Parse(`{"templating": {"list": ` + tt.templates + `}}`) + templateList := rule.extractDashboardTemplates(&templates) + rule.validateTemplates(templateList, "/test.json", errorList) + + assert.Len(t, errorList.GetErrors(), tt.expectedWarns) + }) + } +} + +func TestValidateDashboardFile(t *testing.T) { + // Create temporary test directory + tempDir, err := os.MkdirTemp("", "grafana-test") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + + // Create test dashboard file + dashboardContent := `{ + "panels": [ + {"type": "graph", "title": "Deprecated Panel"} + ], + "templating": { + "list": [ + {"name": "ds_prometheus", "type": "datasource", "query": "prometheus"} + ] + } + }` + + testFile := filepath.Join(tempDir, "test-dashboard.json") + err = os.WriteFile(testFile, []byte(dashboardContent), 0600) + require.NoError(t, err) + + rule := &GrafanaRule{} + + errorList := errors.NewLintRuleErrorsList() + rule.validateDashboardFile(testFile, errorList) + + // Should have warning about deprecated panel type + errorListResult := errorList.GetErrors() + assert.Len(t, errorListResult, 1) + assert.Contains(t, errorListResult[0].Text, "deprecated type 'graph'") +}