diff --git a/.github/workflows/config/labeler.yml b/.github/workflows/config/labeler.yml index 5cf70f7355c08..69f079e061e6c 100644 --- a/.github/workflows/config/labeler.yml +++ b/.github/workflows/config/labeler.yml @@ -365,6 +365,8 @@ integration/klaviyo: - klaviyo/**/* integration/kong: - kong/**/* +integration/kong_mesh: +- kong_mesh/**/* integration/krakend: - krakend/**/* integration/kube_apiserver_metrics: diff --git a/kong_mesh/CHANGELOG.md b/kong_mesh/CHANGELOG.md new file mode 100644 index 0000000000000..2fbc3a35dddd1 --- /dev/null +++ b/kong_mesh/CHANGELOG.md @@ -0,0 +1,7 @@ +# CHANGELOG - Kong Mesh + +## 1.0.0 / 2025-07-11 + +***Added***: + +* Initial Release \ No newline at end of file diff --git a/kong_mesh/README.md b/kong_mesh/README.md new file mode 100644 index 0000000000000..4ffdd64bc4dff --- /dev/null +++ b/kong_mesh/README.md @@ -0,0 +1,52 @@ + +# Agent Check: Kong Mesh + +## Overview + +This check monitors [Kong Mesh][1], a universal open-source control plane for service mesh that supports both Kubernetes and Universal mode (VMs and standalone containers). Kong Mesh is the enterprise version of [Kuma][2], developed by Kong. + +With the Datadog Kong Mesh integration, you can: +- Monitor the health and performance of the Kong Mesh control plane. +- Collect logs from both the control plane and the data plane proxies. +- Gain detailed insights into the internal traffic flows within your service mesh which helps monitor performance and ensure reliability. + +## Setup + +For monitoring Kong Mesh (control plane and Envoy data planes): +- Use the [Kuma integration][3] to collect both metrics and logs. Follow the [Configuration instructions][5] in the Kuma documentation. +- This integration provides prebuilt dashboards and monitors for convenience. Kong Mesh can be fully monitored using only the Kuma integration, which also includes dashboards and monitors. + +**Note:** You can also use the [Kuma integration][3] to monitor your Kong Mesh deployment. + +The `Kuma` Agent check required by the Kong Mesh integration is included in the [Datadog Agent][4] package. No additional installation is needed on your server. + +### Configuration + +#### Metric collection + +Metrics are collected from the control plane and the Envoy data planes. Refer to the [Kuma integration documentation][5] to set up metrics collection. + +#### Log collection + +Logs are collected from control plane and the Envoy data planes. Refer to the [Kuma integration documentation][5] to set up logs collection. + +**Note:** For the `source` property in the logs configuration, you can optionally replace or `kuma` with `kong_mesh`. + + +### Events + +The Kong Mesh integration does not include any events. + +## Troubleshooting + +Refer to the troubleshooting section of the [Kuma integration documentation][6] for troubleshooting steps. + +Need help? Contact [Datadog support][7]. + +[1]: https://konghq.com/products/kong-mesh +[2]: https://kuma.io/ +[3]: https://docs.datadoghq.com/integrations/kuma/#overview +[4]: /account/settings/agent/latest +[5]: https://docs.datadoghq.com/integrations/kuma/#configuration +[6]: https://docs.datadoghq.com/integrations/kuma/#troubleshooting +[7]: https://docs.datadoghq.com/help/ diff --git a/kong_mesh/assets/dashboards/kong_mesh_control_plane.json b/kong_mesh/assets/dashboards/kong_mesh_control_plane.json new file mode 100644 index 0000000000000..d8cbda3f794ba --- /dev/null +++ b/kong_mesh/assets/dashboards/kong_mesh_control_plane.json @@ -0,0 +1,1628 @@ +{ + "title": "Kong Mesh Control Plane", + "description": "## Kong Service Mesh Control Plane\n\nThis dashboard provides comprehensive monitoring for your Kong service mesh infrastructure. Track the health and performance of control plane components to ensure reliable service-to-service communication.\n\n Use the template variables to filter by control plane instance, mesh zone, or cluster.\n\nKong Mesh is the enterprise version of [Kuma](https://kuma.io/). \n\n**Further reading:**\n- [Datadog Kong Mesh Integration Documentation](https://docs.datadoghq.com/integrations/kong-mesh/)\n- [Kong Mesh Documentation](https://developer.konghq.com/mesh/)\n- [Kong Mesh Quickstart](https://developer.konghq.com/mesh/#install-kong-mesh)\n- [Kong Mesh Metrics Policy Reference](https://developer.konghq.com/mesh/policies/meshmetric/)\n- [Kuma Documentation](https://kuma.io/docs/latest/)", + "widgets": [ + { + "id": 7893170514368284, + "definition": { + "title": "", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5620421605366273, + "definition": { + "type": "image", + "url": "/static/images/logos/kong-mesh_large.svg", + "url_dark_theme": "/static/images/logos/kong-mesh_reversed_large.svg", + "sizing": "contain", + "margin": "md", + "has_background": true, + "has_border": true, + "vertical_align": "center", + "horizontal_align": "center" + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 2 } + }, + { + "id": 2807283505732892, + "definition": { + "type": "note", + "content": "## Kong Mesh Control Plane\n\nThis dashboard provides comprehensive monitoring for your Kong service mesh infrastructure. Track the health and performance of control plane components to ensure reliable service-to-service communication.\n\n Use the template variables to filter by control plane instance, mesh zone, or cluster.\n\nKong Mesh is the enterprise version of [Kuma](https://kuma.io/). \n\n**Further reading:**\n- [Datadog Kong Mesh Integration Documentation](https://docs.datadoghq.com/integrations/kong-mesh/)\n- [Kong Mesh Documentation](https://developer.konghq.com/mesh/)\n- [Kong Mesh Quickstart](https://developer.konghq.com/mesh/#install-kong-mesh)\n- [Kong Mesh Metrics Policy Reference](https://developer.konghq.com/mesh/policies/meshmetric/)\n- [Kuma Documentation](https://kuma.io/docs/latest/)", + "background_color": "white", + "font_size": "14", + "text_align": "left", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 2, "width": 6, "height": 6 } + } + ] + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 10 } + }, + { + "id": 7855114521341362, + "definition": { + "title": "API Server", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 3785836227166555, + "definition": { + "type": "note", + "content": "The API Server exposes Kong Mesh's REST API for configuration management. \n\n[API Reference Documentation](https://kuma.io/docs/latest/reference/http-api/)", + "background_color": "purple", + "font_size": "14", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 314939649143054, + "definition": { + "title": "HTTP OK Responses", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$instance_id AND $zone AND code_class IN (1xx,2xx,3xx) AND $cluster_name} by {code}.as_count()" + } + ], + "formulas": [{ "formula": "query1" }], + "style": { + "palette": "green", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 0, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 538705636441132, + "definition": { + "title": "HTTP Request Duration (Average)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.api_server.http_request_duration_seconds.sum{$zone,$instance_id,$cluster_name} by {handler}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$zone,$instance_id,$cluster_name} by {handler}.as_count()" + } + ], + "formulas": [ + { + "formula": "query1 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + } + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 3, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 5599124281027502, + "definition": { + "title": "HTTP Error Percentage", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$instance_id AND $zone AND code_class IN (4xx,5xx) AND $cluster_name} by {code}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$instance_id,$zone,$cluster_name}.as_count()" + } + ], + "formulas": [ + { + "formula": "query1 * 100 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + } + } + ], + "style": { + "palette": "red", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ], + "yaxis": { "include_zero": true, "min": "0", "max": "100" }, + "markers": [ + { + "label": " 5% ", + "value": "0 < y < 5", + "display_type": "ok dashed" + }, + { "value": "5 < y < 100", "display_type": "warning dashed" } + ] + }, + "layout": { "x": 0, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 7004689456425576, + "definition": { + "title": "HTTP Error Responses", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$instance_id AND $zone AND code_class IN (4xx,5xx) AND $cluster_name} by {code}.as_count()" + } + ], + "formulas": [{ "formula": "query1" }], + "style": { + "palette": "red", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 3, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 2146909446738880, + "definition": { + "title": "HTTP Response Codes", + "title_size": "16", + "title_align": "left", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$instance_id,$zone,$cluster_name} by {code}.as_count()", + "aggregator": "avg" + } + ], + "response_format": "scalar", + "style": { "palette": "classic" }, + "formulas": [{ "formula": "query1" }], + "sort": { + "count": 500, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + } + } + ], + "type": "sunburst", + "hide_total": false, + "legend": { "type": "automatic" } + }, + "layout": { "x": 0, "y": 5, "width": 3, "height": 4 } + }, + { + "id": 3293109215571591, + "definition": { + "title": "HTTP Request Handlers (Slowest)", + "title_size": "16", + "title_align": "left", + "type": "toplist", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.api_server.http_request_duration_seconds.sum{$zone,$instance_id,$cluster_name} by {handler}.as_count()", + "aggregator": "avg" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.api_server.http_request_duration_seconds.count{$zone,$instance_id,$cluster_name} by {handler}.as_count()", + "aggregator": "avg" + } + ], + "response_format": "scalar", + "formulas": [ + { + "formula": "query1 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + } + } + ], + "sort": { + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ], + "count": 10 + } + } + ], + "style": { + "display": { "type": "stacked", "legend": "automatic" }, + "palette": "dog_classic" + } + }, + "layout": { "x": 3, "y": 5, "width": 3, "height": 4 } + } + ] + }, + "layout": { "x": 6, "y": 0, "width": 6, "height": 10 } + }, + { + "id": 70369271856783, + "definition": { + "title": "Store", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 7474291901161303, + "definition": { + "type": "note", + "content": "The Store component provides persistent storage for all Kong Mesh resources including meshes, policies, and data plane configurations.", + "background_color": "purple", + "font_size": "14", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 12, "height": 1 } + }, + { + "id": 7258737497088913, + "definition": { + "title": "Store Latency", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.store.sum{$zone,$instance_id,$cluster_name} by {operation}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.store.count{$zone,$instance_id,$cluster_name} by {operation}.as_count()" + } + ], + "formulas": [ + { + "formula": "query1 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + } + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 4488770992649380, + "definition": { + "title": "Store Operations", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.store.count{$instance_id,$zone,$cluster_name} by {resource_type,operation}.as_rate()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 3, "y": 1, "width": 3, "height": 4 } + }, + { + "id": 8266214355863501, + "definition": { + "title": "Top Store Operations", + "title_size": "16", + "title_align": "left", + "type": "toplist", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.store.count{$instance_id,$zone,$cluster_name} by {resource_type,operation}.as_count()", + "aggregator": "sum" + } + ], + "response_format": "scalar", + "formulas": [{ "formula": "query1" }], + "sort": { + "count": 10, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + } + } + ], + "style": { + "display": { "type": "stacked", "legend": "automatic" } + } + }, + "layout": { "x": 6, "y": 1, "width": 6, "height": 4 } + }, + { + "id": 5156393943933849, + "definition": { + "title": "Store Cache Performance", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.store_cache.count{$instance_id,$zone,result:miss,$cluster_name}.as_rate().rollup(avg, 15)" + }, + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.store_cache.count{$instance_id,$zone,$cluster_name}.as_rate().rollup(avg, 15)" + }, + { + "data_source": "metrics", + "name": "query3", + "query": "sum:kuma.store_cache.count{$instance_id,$zone,result:hit-wait,$cluster_name}.as_rate().rollup(avg, 15)" + } + ], + "formulas": [ + { + "alias": "miss", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + }, + "formula": "query2 / query1 * 100" + }, + { + "alias": "hit-wait", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + }, + "formula": "query3 / query1 * 100" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ], + "yaxis": { "include_zero": true, "min": "0", "max": "100" }, + "markers": [ + { "value": "0 < y < 20", "display_type": "ok dashed" }, + { "value": "20 < y < 100", "display_type": "warning dashed" } + ] + }, + "layout": { "x": 0, "y": 3, "width": 3, "height": 2 } + } + ] + }, + "layout": { "x": 0, "y": 10, "width": 12, "height": 6 } + }, + { + "id": 511652203180117, + "definition": { + "title": "Kong Mesh Overview", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 7014562644700263, + "definition": { + "type": "note", + "content": "Monitor the overall health and scale of your Kong Mesh deployment. This section tracks key infrastructure metrics including the number of meshes, zones, control planes, and data plane proxies. Use these metrics to understand your service mesh topology and identify potential capacity issues.", + "background_color": "purple", + "font_size": "14", + "text_align": "left", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 7036472294759965, + "definition": { + "title": "Meshes", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.resources_count{$zone,$instance_id,resource_type:mesh,$cluster_name}" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 0, "y": 1, "width": 3, "height": 1 } + }, + { + "id": 6296165553849505, + "definition": { + "title": "Control Planes", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "count_nonzero(query1)" }], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.cp_info{$zone,$instance_id,$cluster_name}" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 3, "y": 1, "width": 3, "height": 1 } + }, + { + "id": 1961361683236054, + "definition": { + "title": "Zones", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.resources_count{$zone,$instance_id,resource_type:zone,$cluster_name}" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 0, "y": 2, "width": 3, "height": 1 } + }, + { + "id": 6954919323449161, + "definition": { + "title": "Dataplanes", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.resources_count{$zone,$instance_id,resource_type:dataplane,$cluster_name}" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 3, "y": 2, "width": 3, "height": 1 } + }, + { + "id": 4573596145869244, + "definition": { + "title": "Control Plane Instances", + "title_size": "16", + "title_align": "left", + "type": "query_table", + "requests": [ + { + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.cp_info{$zone,$instance_id,$cluster_name} by {zone,cluster_name,instance_id,kube_service,kuma_version}" + } + ], + "response_format": "scalar", + "sort": { + "count": 500, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + }, + "formulas": [ + { + "cell_display_mode": "number", + "alias": "Control Plane", + "formula": "query1" + } + ] + } + ], + "has_search_bar": "auto" + }, + "layout": { "x": 0, "y": 3, "width": 6, "height": 2 } + }, + { + "id": 3277487101734384, + "definition": { + "title": "Resources", + "title_size": "16", + "title_align": "left", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.resources_count{$zone,$instance_id,$cluster_name} by {resource_type}", + "aggregator": "avg" + } + ], + "response_format": "scalar", + "style": { "palette": "datadog16" }, + "formulas": [{ "formula": "query1" }], + "sort": { + "count": 500, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + } + } + ], + "type": "sunburst", + "legend": { "type": "automatic" } + }, + "layout": { "x": 0, "y": 5, "width": 6, "height": 4 } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 10, + "is_column_break": true + } + }, + { + "id": 8542817691306961, + "definition": { + "title": "xDS", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 1477222529832088, + "definition": { + "type": "note", + "content": "The xDS (discovery service) protocol enables dynamic configuration updates from the control plane to Envoy data plane proxies. This section monitors the health and performance of configuration delivery, which is critical for applying policies and routing changes.", + "background_color": "purple", + "font_size": "14", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 1631242990744119, + "definition": { + "title": "Active Connections", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.xds.streams_active{$zone,$instance_id,$cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "area" + } + ] + }, + "layout": { "x": 0, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 3691350682865210, + "definition": { + "title": "Envoy Config Generations", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "error", + "style": { "palette": "dd20", "palette_index": 17 }, + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "operation" + } + }, + "formula": "query2" + }, + { + "alias": "success", + "style": { "palette": "dd20", "palette_index": 3 }, + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "operation" + } + }, + "formula": "query1 - query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.xds.generation_errors.count{$zone,$instance_id,$cluster_name}.as_rate().rollup(avg, 15)" + }, + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.xds.generation.count{$zone,$instance_id,$cluster_name}.as_rate().rollup(avg, 15)" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 3, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 1846519345009893, + "definition": { + "title": "Responses", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "operation" + } + }, + "alias": "responses", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.xds.responses_sent.count{$zone,$instance_id,$cluster_name}.as_rate().rollup(avg, 15)" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 0, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 4308218408630842, + "definition": { + "title": "Config Delivery Latency (99th Percentile)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.xds.delivery.quantile{$zone,quantile:0.99,$instance_id,$cluster_name}" + } + ], + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + }, + "formula": "query1" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 3, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 6667086618085281, + "definition": { + "title": "Config Generation Latency (p99)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.xds.generation.quantile{$zone,quantile:0.99,proxy_type:dataplane,$instance_id,$cluster_name}" + } + ], + "formulas": [ + { + "formula": "query1", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + } + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 5, "width": 3, "height": 2 } + }, + { + "id": 5503152987182677, + "definition": { + "title": "Requests", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "operation" + } + }, + "alias": "received", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "sum:kuma.xds.requests_received.count{$zone,$instance_id,$cluster_name} by {confirmation}.as_count().rollup(avg, 15)" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 3, "y": 5, "width": 3, "height": 2 } + }, + { + "id": 3404201926886316, + "definition": { + "title": "Endpoints Cache Performance", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "avg:kuma.cla_cache{$instance_id,$zone,result:miss,$cluster_name}.rollup(avg, 15)" + }, + { + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.cla_cache{$instance_id,$zone,$cluster_name}.rollup(avg, 15)" + }, + { + "data_source": "metrics", + "name": "query3", + "query": "avg:kuma.cla_cache{$instance_id,$zone,result:hit-wait,$cluster_name}.rollup(avg, 15)" + } + ], + "formulas": [ + { + "alias": "miss", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + }, + "formula": "per_second(query2) / per_second(query1) * 100" + }, + { + "alias": "hit-wait", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + }, + "formula": "per_second(query3) / per_second(query1) * 100" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ], + "yaxis": { "include_zero": true, "min": "0", "max": "100" }, + "markers": [ + { "value": "20 < y < 100", "display_type": "warning dashed" }, + { "value": "0 < y < 20", "display_type": "ok dashed" } + ] + }, + "layout": { "x": 0, "y": 7, "width": 4, "height": 2 } + }, + { + "id": 3404201926886317, + "definition": { + "type": "note", + "content": "**hit-wait**: Requests waiting for another request to populate cache - normal during high concurrency", + "background_color": "purple", + "font_size": "12", + "text_align": "left", + "vertical_align": "center", + "show_tick": true, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 4, "y": 7, "width": 2, "height": 2 } + } + ] + }, + "layout": { "x": 6, "y": 0, "width": 6, "height": 10 } + }, + { + "id": 2708736787343333, + "definition": { + "title": "Leader", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 3741078862595649, + "definition": { + "type": "note", + "content": "Each Kong Mesh zone must have exactly one leader instance for proper operation.", + "background_color": "purple", + "font_size": "14", + "text_align": "left", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 8661086842802331, + "definition": { + "title": "Leaders", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "count_nonzero(query1)" }], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.leader{$zone,$instance_id,$cluster_name}" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 0, "y": 1, "width": 2, "height": 2 } + }, + { + "id": 2698193577791332, + "definition": { + "title": "Leader Instances", + "title_size": "16", + "title_align": "left", + "type": "query_table", + "requests": [ + { + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.leader{$zone,$instance_id,$cluster_name} by {cluster_name,zone,kube_service}" + } + ], + "response_format": "scalar", + "sort": { + "count": 500, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + }, + "formulas": [ + { + "cell_display_mode": "number", + "alias": "Leader", + "formula": "query1" + } + ] + } + ], + "has_search_bar": "auto" + }, + "layout": { "x": 2, "y": 1, "width": 4, "height": 2 } + }, + { + "id": 8614340111303112, + "definition": { + "title": "Leader Election Status", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:kuma.leader_election.master_status{$instance_id,$cluster_name} by {name,kube_service,host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ], + "yaxis": { "include_zero": true, "min": "0", "max": "1.5" }, + "markers": [ + { + "label": " Backup ", + "value": "-0.5 < y < 0.5", + "display_type": "info dashed" + }, + { + "label": " Leader ", + "value": "0.5 < y < 1.5", + "display_type": "ok dashed" + } + ] + }, + "layout": { "x": 0, "y": 3, "width": 6, "height": 2 } + } + ] + }, + "layout": { "x": 0, "y": 10, "width": 6, "height": 6 } + }, + { + "id": 8781148049763819, + "definition": { + "title": "Go Runtime", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5710359456796066, + "definition": { + "type": "note", + "content": "Go runtime metrics provide insights into the resource utilization and performance characteristics of Kong Mesh control plane components.", + "background_color": "purple", + "font_size": "14", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 5151573599414004, + "definition": { + "title": "Goroutines", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.go.goroutines{$zone,$instance_id,$cluster_name}" + } + ], + "formulas": [{ "number_format": {}, "formula": "query1" }], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 0, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 7354433772896516, + "definition": { + "title": "Threads", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.go.threads{$zone,$instance_id,$cluster_name}" + } + ], + "formulas": [{ "number_format": {}, "formula": "query1" }], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { "x": 3, "y": 1, "width": 3, "height": 2 } + }, + { + "id": 5669383081664033, + "definition": { + "title": "Memory Allocated", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.go.memstats.alloc_bytes{$zone,$instance_id,$cluster_name}.rollup(avg, 15)" + } + ], + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "byte_in_binary_bytes_family" + } + }, + "formula": "per_second(query1)" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 7170770919490113, + "definition": { + "title": "GC Latency (p75)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:kuma.go.gc.duration_seconds.quantile{$zone,$instance_id,quantile:0.75,$cluster_name}.rollup(avg, 15)" + } + ], + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + }, + "formula": "query1" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 3, "y": 3, "width": 3, "height": 2 } + } + ] + }, + "layout": { "x": 6, "y": 10, "width": 6, "height": 6 } + }, + { + "id": 842504469602646, + "definition": { + "title": "Control Plane Logs", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5895555829538668, + "definition": { + "type": "note", + "content": "Kong Mesh control plane logs provide detailed insights into system operations and error conditions. Log are aggregated by patterns.", + "background_color": "purple", + "font_size": "14", + "text_align": "center", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 12, "height": 1 } + }, + { + "id": 1173713165327207, + "definition": { + "title": "All Control Plane Logs", + "title_size": "16", + "title_align": "left", + "requests": [ + { + "columns": [ + { "field": "status_line", "width": "auto" }, + { "field": "matches", "width": "auto" }, + { "field": "volume", "width": "auto" }, + { "field": "@component", "width": "auto" }, + { "field": "source", "width": "auto" }, + { "field": "service", "width": "auto" }, + { "field": "message", "width": "auto" } + ], + "query": { + "data_source": "logs_pattern_stream", + "group_by": [ + { "facet": "@component" }, + { "facet": "source" }, + { "facet": "service" } + ], + "indexes": [], + "query_string": "source:kuma service:kuma-control-plane $cluster_name" + }, + "response_format": "event_list" + } + ], + "type": "list_stream" + }, + "layout": { "x": 0, "y": 1, "width": 12, "height": 4 } + }, + { + "id": 3679723625506016, + "definition": { + "title": "Control Plane Error Logs", + "title_size": "16", + "title_align": "left", + "requests": [ + { + "columns": [ + { "field": "status_line", "width": "auto" }, + { "field": "matches", "width": "auto" }, + { "field": "volume", "width": "auto" }, + { "field": "@component", "width": "auto" }, + { "field": "source", "width": "auto" }, + { "field": "service", "width": "auto" }, + { "field": "message", "width": "auto" } + ], + "query": { + "data_source": "logs_pattern_stream", + "group_by": [ + { "facet": "@component" }, + { "facet": "source" }, + { "facet": "service" } + ], + "indexes": [], + "query_string": "source:kuma service:kuma-control-plane status:error $cluster_name" + }, + "response_format": "event_list" + } + ], + "type": "list_stream" + }, + "layout": { "x": 0, "y": 5, "width": 12, "height": 5 } + } + ] + }, + "layout": { "x": 0, "y": 16, "width": 12, "height": 11 } + } + ], + "template_variables": [ + { + "name": "instance_id", + "prefix": "instance_id", + "available_values": [], + "default": "*" + }, + { + "name": "zone", + "prefix": "zone", + "available_values": [], + "default": "*" + }, + { + "name": "cluster_name", + "prefix": "cluster_name", + "available_values": [], + "default": "*" + } + ], + "layout_type": "ordered", + "notify_list": [], + "reflow_type": "fixed" +} diff --git a/kong_mesh/assets/dashboards/kong_mesh_service_communication.json b/kong_mesh/assets/dashboards/kong_mesh_service_communication.json new file mode 100644 index 0000000000000..fcda438d50e68 --- /dev/null +++ b/kong_mesh/assets/dashboards/kong_mesh_service_communication.json @@ -0,0 +1,740 @@ +{ + "title": "Kong Mesh Service Communication", + "description": "## Service Communication Insights\n\nThis dashboard tracks communication between services in your mesh, helping you:\n\n**Identify issues:**\n- Connection failures and timeouts\n- Increased latency between services\n- Traffic imbalances and hotspots\n\n**Further Reading:**\n- [Traffic Metrics Policy](https://kuma.io/docs/latest/policies/traffic-metrics/)\n- [Observability Guide](https://kuma.io/docs/latest/explore/observability/)\n\n[[suggested_dashboards]] (cloned)", + "widgets": [ + { + "id": 2802235483003109, + "definition": { + "title": "", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 7965937770186760, + "definition": { + "type": "image", + "url": "/static/images/logos/kong-mesh_large.svg", + "url_dark_theme": "/static/images/logos/kong-mesh_reversed_large.svg", + "sizing": "contain", + "margin": "md", + "has_background": true, + "has_border": true, + "vertical_align": "center", + "horizontal_align": "center" + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 2 } + }, + { + "id": 1097825529901868, + "definition": { + "type": "note", + "content": "## Service Communication Insights\n\nThis dashboard tracks communication between services in your mesh, helping you:\n\n- Connection failures and timeouts\n- Increased latency between services\n- Traffic imbalances and hotspots\n \nKong Mesh is the enterprise version of [Kuma](https://kuma.io/). \n\n**Further Reading:**\n- [Mesh Metrics Policy](https://developer.konghq.com/mesh/policies/meshmetric/)\n- [Observability Guide](https://developer.konghq.com/mesh/observability/)\n \n**Note:** If your environment also runs Envoy proxies that **aren’t** managed by Kong Mesh, you can set a mesh [template variable](https://docs.datadoghq.com/dashboards/template_variables/) in this dashboard to filter them out.\n", + "background_color": "white", + "font_size": "14", + "text_align": "left", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 2, "width": 6, "height": 3 } + } + ] + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 10 } + }, + { + "id": 9876543210987654, + "definition": { + "title": "Service Communication Overview", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 8765432109876543, + "definition": { + "type": "note", + "content": "Get a high-level view of service communication health across your mesh.", + "background_color": "purple", + "font_size": "14", + "text_align": "left", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 7654321098765432, + "definition": { + "title": "Request Rate", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq.count{$destination_cluster,$source_service,$zone,$mesh}.as_rate()" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 0, "y": 1, "width": 3, "height": 1 } + }, + { + "id": 4321098765432109, + "definition": { + "title": "Active Connections", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [{ "formula": "query1" }], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_cx_active{$destination_cluster,$source_service,$zone,$mesh}" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">", + "value": 0, + "palette": "white_on_green" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 3, "y": 1, "width": 3, "height": 1 } + }, + { + "id": 6543210987654321, + "definition": { + "title": "Success Percentage", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + }, + "formula": "(1 - query1 / query2) * 100" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq_xx.count{$destination_cluster AND $source_service AND $zone AND $mesh AND envoy_response_code_class IN (4, 5)}.as_rate()" + }, + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query2", + "query": "sum:envoy.cluster.upstream_rq_completed.count{$destination_cluster,$source_service,$zone,$mesh}.as_rate()" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": ">=", + "value": 0.95, + "palette": "white_on_green" + }, + { + "comparator": "<", + "value": 0.95, + "palette": "white_on_yellow" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 0, "y": 2, "width": 3, "height": 1 } + }, + { + "id": 5432109876543210, + "definition": { + "title": "Latency (Average)", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + }, + "formula": "query1 / query2" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq_time.sum{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + }, + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query2", + "query": "sum:envoy.cluster.upstream_rq_time.count{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + } + ], + "response_format": "scalar" + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { "include_zero": true }, + "type": "area" + } + }, + "layout": { "x": 3, "y": 2, "width": 3, "height": 1 } + }, + { + "id": 3210987654321098, + "definition": { + "title": "Top Service Pairs by Traffic", + "title_size": "16", + "title_align": "left", + "type": "query_table", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq.count{$zone,$mesh,$destination_cluster,$source_service} by {kuma_io_service,envoy_cluster}.as_rate()", + "aggregator": "avg" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + }, + "formulas": [ + { + "cell_display_mode": "bar", + "alias": "Request Rate", + "formula": "query1" + } + ] + } + ], + "has_search_bar": "auto" + }, + "layout": { "x": 0, "y": 3, "width": 6, "height": 2 } + }, + { + "id": 2109876543210987, + "definition": { + "title": "Service Communication", + "title_size": "16", + "title_align": "left", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq.count{$zone,$mesh,$destination_cluster,$source_service} by {kuma_io_service,envoy_cluster}.as_rate()", + "aggregator": "avg" + } + ], + "response_format": "scalar", + "style": { "palette": "classic" }, + "formulas": [{ "formula": "query1" }], + "sort": { + "count": 500, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + } + } + ], + "type": "sunburst", + "legend": { "type": "automatic" } + }, + "layout": { "x": 0, "y": 5, "width": 6, "height": 4 } + } + ] + }, + "layout": { "x": 6, "y": 0, "width": 6, "height": 10 } + }, + { + "id": 5555555555555555, + "definition": { + "title": "Request Performance", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 6666666666666666, + "definition": { + "type": "note", + "content": "Analyze request-level performance metrics to ensure services meet SLOs and identify optimization opportunities.", + "background_color": "purple", + "font_size": "14", + "text_align": "left", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 7777777777777777, + "definition": { + "title": "Request Rate", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq.count{$destination_cluster,$source_service,$zone,$mesh}.as_rate()" + } + ], + "formulas": [{ "formula": "query1" }], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 1, "width": 6, "height": 2 } + }, + { + "id": 8888888888888888, + "definition": { + "title": "Request Duration (Average)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq_time.sum{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:envoy.cluster.upstream_rq_time.count{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + } + ], + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + }, + "formula": "query1 / query2" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ], + "markers": [ + { "value": "y > 500", "display_type": "error dashed" }, + { "value": "y > 200", "display_type": "warning dashed" } + ] + }, + "layout": { "x": 0, "y": 3, "width": 6, "height": 2 } + }, + { + "id": 10000000000000000, + "definition": { + "title": "HTTP Response Codes", + "title_size": "16", + "title_align": "left", + "requests": [ + { + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_rq_xx.count{$destination_cluster,$source_service,$zone,$mesh} by {envoy_response_code_class}.as_count()", + "aggregator": "avg" + } + ], + "response_format": "scalar", + "style": { "palette": "semantic" }, + "formulas": [{ "formula": "query1" }], + "sort": { + "count": 500, + "order_by": [ + { "type": "formula", "index": 0, "order": "desc" } + ] + } + } + ], + "type": "sunburst", + "hide_total": false, + "legend": { "type": "automatic" } + }, + "layout": { "x": 0, "y": 5, "width": 6, "height": 4 } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 10, + "is_column_break": true + } + }, + { + "id": 3304026508757406, + "definition": { + "title": "Traffic Patterns", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 1234567890123456, + "definition": { + "type": "note", + "content": "Monitor service-to-service traffic patterns to understand communication flows and identify anomalies.", + "background_color": "purple", + "font_size": "14", + "text_align": "left", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { "x": 0, "y": 0, "width": 6, "height": 1 } + }, + { + "id": 584737675619954, + "definition": { + "title": "Bytes Transmitted to Destination", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_cx_tx_bytes.count{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + } + ], + "formulas": [{ "formula": "query1" }], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 1, "width": 6, "height": 2 } + }, + { + "id": 8620186812746711, + "definition": { + "title": "Connection Establishment Time (Average)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_cx_connect_ms.sum{$zone,$source_service,$destination_cluster,$mesh}.as_rate()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:envoy.cluster.upstream_cx_connect_ms.count{$zone,$source_service,$destination_cluster,$mesh}.as_rate()" + } + ], + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + }, + "formula": "query1 / query2" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 8405221309961872, + "definition": { + "title": "Connection and Request Errors", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_cx_destroy_remote_with_active_rq.count{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:envoy.cluster.upstream_cx_connect_timeout.count{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + }, + { + "data_source": "metrics", + "name": "query3", + "query": "sum:envoy.cluster.upstream_rq_timeout.count{$destination_cluster,$source_service,$zone,$mesh}.as_count()" + } + ], + "formulas": [ + { "formula": "query1" }, + { "formula": "query2" }, + { "formula": "query3" } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 3, "y": 3, "width": 3, "height": 2 } + }, + { + "id": 6621706999193019, + "definition": { + "title": "Connection Duration (Average)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:envoy.cluster.upstream_cx_length_ms.sum{$zone,$source_service,$destination_cluster,$mesh}.as_rate()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:envoy.cluster.upstream_cx_length_ms.count{$zone,$source_service,$destination_cluster,$mesh}.as_rate()" + } + ], + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + }, + "formula": "query1 / query2" + } + ], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { "x": 0, "y": 5, "width": 6, "height": 2 } + }, + { + "id": 4967730596643575, + "definition": { + "title": "Active Upstream Connections", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": ["avg", "min", "max", "value", "sum"], + "type": "timeseries", + "requests": [ + { + "response_format": "timeseries", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:envoy.cluster.upstream_cx_active{$destination_cluster,$zone,$source_service,$mesh}" + } + ], + "formulas": [{ "formula": "query1" }], + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "area" + } + ] + }, + "layout": { "x": 0, "y": 7, "width": 6, "height": 2 } + } + ] + }, + "layout": { "x": 6, "y": 0, "width": 6, "height": 10 } + } + ], + "template_variables": [ + { + "name": "source_service", + "prefix": "kuma_io_service", + "available_values": [], + "default": "*" + }, + { + "name": "destination_cluster", + "prefix": "envoy_cluster", + "available_values": [], + "default": "*" + }, + { + "name": "zone", + "prefix": "kuma_io_zone", + "available_values": [], + "default": "*" + }, + { "name": "mesh", "prefix": "mesh", "available_values": [], "default": "*" } + ], + "layout_type": "ordered", + "notify_list": [], + "reflow_type": "fixed" +} diff --git a/kong_mesh/assets/monitors/api_server_error_rate.json b/kong_mesh/assets/monitors/api_server_error_rate.json new file mode 100644 index 0000000000000..ccb1553cbf70d --- /dev/null +++ b/kong_mesh/assets/monitors/api_server_error_rate.json @@ -0,0 +1,34 @@ +{ + "version": 2, + "created_at": "2025-07-31", + "last_updated_at": "2025-07-31", + "title": "Kong Mesh API Server High Error Rate", + "tags": [ + "integration:kong-mesh" + ], + "description": "The Kong Mesh API Server exposes REST APIs for configuration management. This monitor alerts when the error rate (4xx and 5xx responses) exceeds 10% of total requests, indicating potential issues with API operations or client requests.", + "definition": { + "message": "{{#is_alert}}\n\nKong Mesh API Server is experiencing a high error rate on {{instance_id.name}} in zone {{zone.name}}.\n\nError rate: {{value}}%\n\nThis indicates that more than 10% of API requests are returning 4xx or 5xx status codes. Check the API server logs for details about the failing requests.\n\nCommon causes:\n- Invalid API requests (4xx errors)\n- Internal server errors (5xx errors)\n- Authentication/authorization issues\n- Resource constraints\n\n{{/is_alert}}", + "name": "[Kong Mesh] API Server High Error Rate", + "options": { + "thresholds": { + "critical": 10, + "warning": 5 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 60, + "silenced": {}, + "notify_no_data": false, + "renotify_interval": 0, + "require_full_window": false, + "escalation_message": "The Kong Mesh API server error rate remains high. This may impact configuration updates and management operations." + }, + "priority": 2, + "query": "sum(last_10m):( sum:kuma.api_server.http_request_duration_seconds.count{code_class:4xx OR code_class:5xx} by {instance_id,zone}.as_count() / sum:kuma.api_server.http_request_duration_seconds.count{*} by {instance_id,zone}.as_count() ) * 100 > 10", + "tags": [ + "integration:kong-mesh" + ], + "type": "query alert" + } +} \ No newline at end of file diff --git a/kong_mesh/assets/monitors/multiple_leaders_detected.json b/kong_mesh/assets/monitors/multiple_leaders_detected.json new file mode 100644 index 0000000000000..46bd5073a632a --- /dev/null +++ b/kong_mesh/assets/monitors/multiple_leaders_detected.json @@ -0,0 +1,33 @@ +{ + "version": 2, + "created_at": "2025-07-31", + "last_updated_at": "2025-07-31", + "title": "Kong Mesh Multiple Leaders Detected", + "tags": [ + "integration:kong-mesh" + ], + "description": "Each Kong Mesh zone must have exactly one leader instance for proper operation. This monitor alerts when multiple leaders are detected, which can cause split-brain scenarios and conflicting policy decisions.", + "definition": { + "message": "{{#is_alert}}\n\nMultiple Kong Mesh leaders detected in zone {{zone.name}}.\n\nLeader count: {{value}}\n\nMultiple leaders can cause split-brain scenarios.{{/is_alert}}", + "name": "[Kong Mesh] Multiple Leaders Detected", + "options": { + "thresholds": { + "critical": 1.5 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 60, + "silenced": {}, + "notify_no_data": false, + "renotify_interval": 300, + "require_full_window": true, + "evaluation_delay": 30 + }, + "priority": 1, + "query": "max(last_5m):sum:kuma.leader{*} by {zone,cluster_name} > 1.5", + "tags": [ + "integration:kong-mesh" + ], + "type": "query alert" + } +} diff --git a/kong_mesh/assets/monitors/no_leader_detected.json b/kong_mesh/assets/monitors/no_leader_detected.json new file mode 100644 index 0000000000000..25af805952110 --- /dev/null +++ b/kong_mesh/assets/monitors/no_leader_detected.json @@ -0,0 +1,34 @@ +{ + "version": 2, + "created_at": "2025-07-31", + "last_updated_at": "2025-07-31", + "title": "Kong Mesh No Leader Detected", + "tags": [ + "integration:kong-mesh" + ], + "description": "Each Kong Mesh zone must have exactly one leader instance for proper operation. This monitor alerts when no leader is detected, which prevents policy updates and proper control plane operation.", + "definition": { + "message": "{{#is_alert}}\n\nNo Kong Mesh leader detected in zone {{zone.name}}.\n\nLeader count: {{value}}{{/is_alert}}", + "name": "[Kong Mesh] No Leader Detected", + "options": { + "thresholds": { + "critical": 0.5 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 300, + "silenced": {}, + "notify_no_data": true, + "no_data_timeframe": 20, + "renotify_interval": 300, + "require_full_window": true, + "evaluation_delay": 60 + }, + "priority": 1, + "query": "min(last_10m):sum:kuma.leader{*} by {zone,cluster_name} < 0.5", + "tags": [ + "integration:kong-mesh" + ], + "type": "query alert" + } +} diff --git a/kong_mesh/assets/monitors/xds_delivery_latency.json b/kong_mesh/assets/monitors/xds_delivery_latency.json new file mode 100644 index 0000000000000..c8ee075d9c210 --- /dev/null +++ b/kong_mesh/assets/monitors/xds_delivery_latency.json @@ -0,0 +1,33 @@ +{ + "version": 2, + "created_at": "2025-07-31", + "last_updated_at": "2025-07-31", + "title": "Kong Mesh xDS High Configuration Delivery Latency", + "tags": [ + "integration:kong-mesh" + ], + "description": "Monitors the 99th percentile latency of xDS configuration delivery to data plane proxies. High latency can delay policy updates and impact the responsiveness of configuration changes in your service mesh.", + "definition": { + "message": "{{#is_alert}}\n\nKong Mesh xDS configuration delivery is experiencing high latency on {{instance_id.name}} in zone {{zone.name}}.\n\nP99 latency: {{value}}ms\n\nHigh configuration delivery latency delays the application of policy updates to data plane proxies. This can impact:\n- Time to apply traffic routing changes\n- Security policy enforcement delays\n- Service discovery update lag\n\nPossible causes:\n- Control plane resource constraints\n- Large number of data plane connections\n- Complex policy configurations\n- Network latency between control and data planes\n\n{{/is_alert}}", + "name": "[Kong Mesh] xDS High Configuration Delivery Latency", + "options": { + "thresholds": { + "critical": 5000, + "warning": 2000 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 60, + "silenced": {}, + "notify_no_data": false, + "renotify_interval": 0, + "require_full_window": false + }, + "priority": 3, + "query": "avg(last_10m):avg:kuma.xds.delivery.quantile{quantile:0.99} by {instance_id,zone} > 5000", + "tags": [ + "integration:kong-mesh" + ], + "type": "query alert" + } +} \ No newline at end of file diff --git a/kong_mesh/assets/monitors/xds_generation_errors.json b/kong_mesh/assets/monitors/xds_generation_errors.json new file mode 100644 index 0000000000000..36278280c2add --- /dev/null +++ b/kong_mesh/assets/monitors/xds_generation_errors.json @@ -0,0 +1,35 @@ +{ + "version": 2, + "created_at": "2025-07-31", + "last_updated_at": "2025-07-31", + "title": "Kong Mesh xDS Configuration Generation Errors", + "tags": [ + "integration:kong-mesh" + ], + "description": "The xDS protocol enables dynamic configuration updates from the control plane to data plane proxies. This monitor alerts when configuration generation errors occur, which can prevent policy updates from reaching the data plane.", + "definition": { + "message": "{{#is_alert}}\n\nKong Mesh is experiencing xDS configuration generation errors on {{instance_id.name}} in zone {{zone.name}}.\n\nError rate: {{value}} errors per second\n\nConfiguration generation errors prevent policy updates from being applied to data plane proxies. This can lead to:\n- Outdated routing rules\n- Missing security policies \n- Incorrect load balancing configurations\n\nCheck the control plane logs for detailed error messages.\n\n{{/is_alert}}", + "name": "[Kong Mesh] xDS Configuration Generation Errors", + "options": { + "thresholds": { + "critical": 5, + "warning": 1 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 60, + "silenced": {}, + "notify_no_data": false, + "renotify_interval": 300, + "require_full_window": false, + "timeout_h": 0, + "evaluation_delay": 0 + }, + "priority": 2, + "query": "avg(last_5m):sum:kuma.xds.generation_errors.count{*} by {instance_id,zone}.as_rate() > 5", + "tags": [ + "integration:kong-mesh" + ], + "type": "query alert" + } +} \ No newline at end of file diff --git a/kong_mesh/assets/saved_views/logs_overview.json b/kong_mesh/assets/saved_views/logs_overview.json new file mode 100644 index 0000000000000..3170723ba0480 --- /dev/null +++ b/kong_mesh/assets/saved_views/logs_overview.json @@ -0,0 +1,25 @@ +{ + "name": "Kong Mesh Access Logs Overview", + "type": "logs", + "page": "stream", + "query": "source:kong_mesh @kuma.destination_service:*", + "timerange": { + "interval_ms": 3600000 + }, + "visible_facets": [ + "source", + "host", + "service", + "@kuma.destination_service", + "@kuma.source_service", + "@kuma.source_address_without_port", + "@kuma.mesh", + "@kuma.upstream.host" + ], + "options": { + "show_date_column": true, + "show_message_column": true, + "message_display": "inline", + "show_timeline": true + } +} diff --git a/kong_mesh/assets/service_checks.json b/kong_mesh/assets/service_checks.json new file mode 100644 index 0000000000000..fd70df1f69a4d --- /dev/null +++ b/kong_mesh/assets/service_checks.json @@ -0,0 +1,18 @@ + +[ + { + "agent_version": "7.68.0", + "integration": "Kong Mesh", + "check": "kuma.openmetrics.health", + "statuses": [ + "ok", + "critical" + ], + "groups": [ + "host", + "endpoint" + ], + "name": "Kong Mesh OpenMetrics endpoint health", + "description": "Returns `CRITICAL` if the Agent is unable to connect to the Kong Mesh OpenMetrics endpoint, otherwise returns `OK`." + } +] diff --git a/kong_mesh/images/control_plane.png b/kong_mesh/images/control_plane.png new file mode 100644 index 0000000000000..ae563b4df9ede Binary files /dev/null and b/kong_mesh/images/control_plane.png differ diff --git a/kong_mesh/images/control_plane_dark.png b/kong_mesh/images/control_plane_dark.png new file mode 100644 index 0000000000000..efaaa063c2f41 Binary files /dev/null and b/kong_mesh/images/control_plane_dark.png differ diff --git a/kong_mesh/images/service_communication.png b/kong_mesh/images/service_communication.png new file mode 100644 index 0000000000000..de3ce21e85058 Binary files /dev/null and b/kong_mesh/images/service_communication.png differ diff --git a/kong_mesh/images/service_communication_dark.png b/kong_mesh/images/service_communication_dark.png new file mode 100644 index 0000000000000..a78e5f10a8e3b Binary files /dev/null and b/kong_mesh/images/service_communication_dark.png differ diff --git a/kong_mesh/manifest.json b/kong_mesh/manifest.json new file mode 100644 index 0000000000000..871ce975abd68 --- /dev/null +++ b/kong_mesh/manifest.json @@ -0,0 +1,80 @@ +{ + "manifest_version": "2.0.0", + "app_uuid": "46d3df79-b309-43ed-b49c-3f29a4983235", + "app_id": "kong-mesh", + "display_on_public_website": true, + "tile": { + "overview": "README.md#Overview", + "configuration": "README.md#Setup", + "support": "README.md#Support", + "changelog": "CHANGELOG.md", + "description": "Collect metrics and logs from Kong Mesh, an enterprise service mesh for Kubernetes and VMs.", + "title": "Kong Mesh", + "media": [ + { + "media_type": "image", + "caption": "Kong Mesh Control Plane", + "image_url": "images/control_plane.png" + }, + { + "media_type": "image", + "caption": "Kong Mesh Control Plane (Dark Theme)", + "image_url": "images/control_plane_dark.png" + }, + { + "media_type": "image", + "caption": "Kong Mesh Service Communication", + "image_url": "images/service_communication.png" + }, + { + "media_type": "image", + "caption": "Kong Mesh Service Communication (Dark Theme)", + "image_url": "images/service_communication_dark.png" + } + ], + "classifier_tags": [ + "Supported OS::Linux", + "Supported OS::Windows", + "Supported OS::macOS", + "Category::Network", + "Category::Kubernetes", + "Category::Containers", + "Offering::Integration", + "Submitted Data Type::Metrics", + "Submitted Data Type::Logs" + ] + }, + "assets": { + "integration": { + "auto_install": true, + "source_type_name": "Kong Mesh", + "source_type_id": 57086036, + "events": { + "creates_events": false + }, + "service_checks": { + "metadata_path": "assets/service_checks.json" + } + }, + "saved_views": { + "Kong Mesh Access Logs Overview": "assets/saved_views/logs_overview.json" + }, + "monitors": { + "Kong Mesh No Leader Detected": "assets/monitors/no_leader_detected.json", + "Kong Mesh Multiple Leaders Detected": "assets/monitors/multiple_leaders_detected.json", + "Kong Mesh API Server High Error Rate": "assets/monitors/api_server_error_rate.json", + "Kong Mesh xDS Configuration Generation Errors": "assets/monitors/xds_generation_errors.json", + "Kong Mesh xDS High Configuration Delivery Latency": "assets/monitors/xds_delivery_latency.json" + }, + "dashboards": { + "Kong Mesh Control Plane": "assets/dashboards/kong_mesh_control_plane.json", + "Kong Mesh Service Communication": "assets/dashboards/kong_mesh_service_communication.json" + } + }, + "author": { + "support_email": "help@datadoghq.com", + "name": "Datadog", + "homepage": "https://www.datadoghq.com", + "sales_email": "info@datadoghq.com" + } +}