diff --git a/Cargo.lock b/Cargo.lock index 4354e43af6..79b4b832cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3379,6 +3379,7 @@ dependencies = [ "rivet-metrics", "rivet-runner-protocol", "rivet-runtime", + "rivet-types", "serde", "serde_bare", "serde_json", diff --git a/engine/artifacts/config-schema.json b/engine/artifacts/config-schema.json index 064defe444..d800b7c2ea 100644 --- a/engine/artifacts/config-schema.json +++ b/engine/artifacts/config-schema.json @@ -960,6 +960,15 @@ "format": "uint", "minimum": 0.0 }, + "serverless_drain_grace_period": { + "description": "Drain grace period for serverless runners.\n\nThis time is subtracted from the configured request duration. Once `duration - grace` is reached, the runner is sent stop commands for all of its actors. After the grace period is over (i.e. the full duration is reached) the runner websocket is forcibly closed.\n\nDefault is 10 seconds.\n\nUnit is in milliseconds.\n\n**Experimental**", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, "serverless_retry_reset_duration": { "description": "How long a serverless runner goes without connection failures before it's retry count is reset to 0, effectively resetting its backoff to 0.\n\nUnit is in milliseconds.\n\n**Experimental**", "type": [ diff --git a/engine/artifacts/errors/guard.invalid_request_body.json b/engine/artifacts/errors/guard.invalid_request_body.json new file mode 100644 index 0000000000..c9eb742829 --- /dev/null +++ b/engine/artifacts/errors/guard.invalid_request_body.json @@ -0,0 +1,5 @@ +{ + "code": "invalid_request_body", + "group": "guard", + "message": "Unable to parse request body." +} \ No newline at end of file diff --git a/engine/artifacts/errors/guard.invalid_response_body.json b/engine/artifacts/errors/guard.invalid_response_body.json new file mode 100644 index 0000000000..0ac8786239 --- /dev/null +++ b/engine/artifacts/errors/guard.invalid_response_body.json @@ -0,0 +1,5 @@ +{ + "code": "invalid_response_body", + "group": "guard", + "message": "Unable to parse response body." +} \ No newline at end of file diff --git a/engine/docker/dev-host/grafana/dashboards/api.json b/engine/docker/dev-host/grafana/dashboards/api.json index 2623796db4..2af2eb2b2d 100644 --- a/engine/docker/dev-host/grafana/dashboards/api.json +++ b/engine/docker/dev-host/grafana/dashboards/api.json @@ -940,7 +940,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -948,7 +948,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -967,7 +967,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -975,7 +975,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-host/grafana/dashboards/cache.json b/engine/docker/dev-host/grafana/dashboards/cache.json index 282e099a8e..921af7008d 100644 --- a/engine/docker/dev-host/grafana/dashboards/cache.json +++ b/engine/docker/dev-host/grafana/dashboards/cache.json @@ -830,7 +830,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -838,7 +838,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -857,7 +857,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -865,7 +865,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-host/grafana/dashboards/futures.json b/engine/docker/dev-host/grafana/dashboards/futures.json index fdca7320ea..b57d243698 100644 --- a/engine/docker/dev-host/grafana/dashboards/futures.json +++ b/engine/docker/dev-host/grafana/dashboards/futures.json @@ -128,7 +128,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -136,7 +136,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -157,7 +157,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -165,7 +165,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-host/grafana/dashboards/gasoline.json b/engine/docker/dev-host/grafana/dashboards/gasoline.json index 0c6616f4e3..64916e56f2 100644 --- a/engine/docker/dev-host/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-host/grafana/dashboards/gasoline.json @@ -91,7 +91,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -127,7 +128,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -194,7 +195,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -230,7 +232,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (workflow_name) (rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -297,7 +299,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -333,7 +336,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -401,7 +404,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -437,7 +441,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name, error) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name, error) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}: {{error}}", "range": true, @@ -605,7 +609,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -641,7 +646,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (signal_name) (rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (signal_name) (\n max by(signal_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{signal_name}}", "range": true, @@ -1183,11 +1188,187 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 0, "y": 51 }, + "id": 20, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Core Usage", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 26, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "percentunit" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Load Shedding Ratio", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, "id": 23, "interval": "15s", "options": { @@ -1274,7 +1455,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 60 }, "id": 24, "interval": "15s", @@ -1354,12 +1535,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1368,6 +1547,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1377,14 +1559,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1397,7 +1578,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1405,23 +1586,20 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 68 }, - "id": 13, + "id": 36, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -1433,14 +1611,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval]))", + "format": "heatmap", + "legendFormat": "{{signal_name}}", "range": true, "refId": "A" } ], - "title": "Last Pull Workflows Duration", + "title": "Workflows Dispatched/s", "type": "timeseries" }, { @@ -1459,12 +1637,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1473,6 +1649,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1482,14 +1661,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1502,7 +1680,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1510,200 +1688,21 @@ "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 68 }, - "id": 14, + "id": 35, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_history_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", - "range": true, - "refId": "A" - } - ], - "title": "Last Pull Workflows History Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 20, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Core Usage", - "type": "heatmap" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 67 - }, - "id": 26, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "percentunit" + "sort": "none" } }, "pluginVersion": "11.6.7", @@ -1714,15 +1713,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=\"\"} [$__rate_interval]))", "format": "heatmap", - "legendFormat": "{{le}}", + "legendFormat": "{{sub_workflow_name}}", "range": true, "refId": "A" } ], - "title": "Load Shedding Ratio", - "type": "heatmap" + "title": "Workflows Dispatched/s (Not From Workflow)", + "type": "timeseries" }, { "datasource": { @@ -2162,41 +2161,9 @@ } ] }, - "unit": "none" + "unit": "sishort" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "epoxy_coordinator", - "epoxy_purger", - "namespace", - "pegboard_runner", - "pegboard_runner2", - "pegboard_runner_pool", - "pegboard_runner_pool_error_tracker", - "pegboard_serverless_conn" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 9, @@ -2612,7 +2579,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -2620,7 +2587,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2630,7 +2597,9 @@ }, { "current": { - "text": "All", + "text": [ + "All" + ], "value": [ "$__all" ] @@ -2639,7 +2608,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -2647,7 +2616,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2657,9 +2626,7 @@ }, { "current": { - "text": [ - "All" - ], + "text": "All", "value": [ "$__all" ] @@ -2686,12 +2653,12 @@ ] }, "time": { - "from": "now-15m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Gasoline", "uid": "636d22f9-d18f-4086-8b45-7c50886a105c", - "version": 7 + "version": 9 } \ No newline at end of file diff --git a/engine/docker/dev-host/grafana/dashboards/guard.json b/engine/docker/dev-host/grafana/dashboards/guard.json index f6190ff52c..cd9d61f4a6 100644 --- a/engine/docker/dev-host/grafana/dashboards/guard.json +++ b/engine/docker/dev-host/grafana/dashboards/guard.json @@ -1352,7 +1352,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -1360,7 +1360,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1379,7 +1379,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -1387,7 +1387,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-host/grafana/dashboards/operation.json b/engine/docker/dev-host/grafana/dashboards/operation.json index 7ccc0d3e0c..d4348cec8f 100644 --- a/engine/docker/dev-host/grafana/dashboards/operation.json +++ b/engine/docker/dev-host/grafana/dashboards/operation.json @@ -780,7 +780,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -788,7 +788,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -809,7 +809,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -817,7 +817,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-host/grafana/dashboards/pegboard.json b/engine/docker/dev-host/grafana/dashboards/pegboard.json index 52bd7422d4..8ed083805d 100644 --- a/engine/docker/dev-host/grafana/dashboards/pegboard.json +++ b/engine/docker/dev-host/grafana/dashboards/pegboard.json @@ -196,9 +196,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -381,9 +381,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -487,7 +487,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_outbound_req_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -591,7 +591,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_serverless_outbound_req_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -695,7 +695,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_connection_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -799,7 +799,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_runner_connection_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -903,7 +903,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_event_multiplexer_count{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1007,7 +1007,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_ingested_events_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1192,7 +1192,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_version_upgrade_drain_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1209,10 +1209,10 @@ { "current": { "text": [ - "prod" + "staging" ], "value": [ - "prod" + "staging" ] }, "datasource": { @@ -1270,5 +1270,5 @@ "timezone": "browser", "title": "Pegboard", "uid": "afa77odsjjk74d", - "version": 1 + "version": 2 } \ No newline at end of file diff --git a/engine/docker/dev-host/grafana/dashboards/tokio.json b/engine/docker/dev-host/grafana/dashboards/tokio.json index e9d9771791..786a3aeada 100644 --- a/engine/docker/dev-host/grafana/dashboards/tokio.json +++ b/engine/docker/dev-host/grafana/dashboards/tokio.json @@ -846,7 +846,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -854,7 +854,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -875,7 +875,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -883,7 +883,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-host/rivet-engine/config.jsonc b/engine/docker/dev-host/rivet-engine/config.jsonc index 87a23b0e07..814d17ecbb 100644 --- a/engine/docker/dev-host/rivet-engine/config.jsonc +++ b/engine/docker/dev-host/rivet-engine/config.jsonc @@ -37,17 +37,6 @@ "native_url": "http://127.0.0.1:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "127.0.0.1", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json index 2623796db4..2af2eb2b2d 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/api.json @@ -940,7 +940,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -948,7 +948,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -967,7 +967,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -975,7 +975,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json index 282e099a8e..921af7008d 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json @@ -830,7 +830,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -838,7 +838,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -857,7 +857,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -865,7 +865,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json index fdca7320ea..b57d243698 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json @@ -128,7 +128,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -136,7 +136,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -157,7 +157,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -165,7 +165,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json index 0c6616f4e3..64916e56f2 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json @@ -91,7 +91,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -127,7 +128,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -194,7 +195,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -230,7 +232,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (workflow_name) (rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -297,7 +299,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -333,7 +336,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -401,7 +404,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -437,7 +441,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name, error) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name, error) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}: {{error}}", "range": true, @@ -605,7 +609,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -641,7 +646,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (signal_name) (rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (signal_name) (\n max by(signal_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{signal_name}}", "range": true, @@ -1183,11 +1188,187 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 0, "y": 51 }, + "id": 20, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Core Usage", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 26, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "percentunit" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Load Shedding Ratio", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, "id": 23, "interval": "15s", "options": { @@ -1274,7 +1455,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 60 }, "id": 24, "interval": "15s", @@ -1354,12 +1535,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1368,6 +1547,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1377,14 +1559,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1397,7 +1578,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1405,23 +1586,20 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 68 }, - "id": 13, + "id": 36, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -1433,14 +1611,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval]))", + "format": "heatmap", + "legendFormat": "{{signal_name}}", "range": true, "refId": "A" } ], - "title": "Last Pull Workflows Duration", + "title": "Workflows Dispatched/s", "type": "timeseries" }, { @@ -1459,12 +1637,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1473,6 +1649,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1482,14 +1661,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1502,7 +1680,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1510,200 +1688,21 @@ "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 68 }, - "id": 14, + "id": 35, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_history_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", - "range": true, - "refId": "A" - } - ], - "title": "Last Pull Workflows History Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 20, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Core Usage", - "type": "heatmap" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 67 - }, - "id": 26, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "percentunit" + "sort": "none" } }, "pluginVersion": "11.6.7", @@ -1714,15 +1713,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=\"\"} [$__rate_interval]))", "format": "heatmap", - "legendFormat": "{{le}}", + "legendFormat": "{{sub_workflow_name}}", "range": true, "refId": "A" } ], - "title": "Load Shedding Ratio", - "type": "heatmap" + "title": "Workflows Dispatched/s (Not From Workflow)", + "type": "timeseries" }, { "datasource": { @@ -2162,41 +2161,9 @@ } ] }, - "unit": "none" + "unit": "sishort" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "epoxy_coordinator", - "epoxy_purger", - "namespace", - "pegboard_runner", - "pegboard_runner2", - "pegboard_runner_pool", - "pegboard_runner_pool_error_tracker", - "pegboard_serverless_conn" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 9, @@ -2612,7 +2579,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -2620,7 +2587,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2630,7 +2597,9 @@ }, { "current": { - "text": "All", + "text": [ + "All" + ], "value": [ "$__all" ] @@ -2639,7 +2608,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -2647,7 +2616,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2657,9 +2626,7 @@ }, { "current": { - "text": [ - "All" - ], + "text": "All", "value": [ "$__all" ] @@ -2686,12 +2653,12 @@ ] }, "time": { - "from": "now-15m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Gasoline", "uid": "636d22f9-d18f-4086-8b45-7c50886a105c", - "version": 7 + "version": 9 } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json index f6190ff52c..cd9d61f4a6 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json @@ -1352,7 +1352,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -1360,7 +1360,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1379,7 +1379,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -1387,7 +1387,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/operation.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/operation.json index 7ccc0d3e0c..d4348cec8f 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/operation.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/operation.json @@ -780,7 +780,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -788,7 +788,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -809,7 +809,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -817,7 +817,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/pegboard.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/pegboard.json index 52bd7422d4..8ed083805d 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/pegboard.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/pegboard.json @@ -196,9 +196,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -381,9 +381,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -487,7 +487,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_outbound_req_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -591,7 +591,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_serverless_outbound_req_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -695,7 +695,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_connection_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -799,7 +799,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_runner_connection_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -903,7 +903,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_event_multiplexer_count{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1007,7 +1007,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_ingested_events_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1192,7 +1192,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_version_upgrade_drain_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1209,10 +1209,10 @@ { "current": { "text": [ - "prod" + "staging" ], "value": [ - "prod" + "staging" ] }, "datasource": { @@ -1270,5 +1270,5 @@ "timezone": "browser", "title": "Pegboard", "uid": "afa77odsjjk74d", - "version": 1 + "version": 2 } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/tokio.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/tokio.json index e9d9771791..786a3aeada 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/tokio.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/tokio.json @@ -846,7 +846,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -854,7 +854,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -875,7 +875,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -883,7 +883,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/0/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/0/config.jsonc index 0f74e2a346..a61a8d52f1 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/0/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/0/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-a", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/1/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/1/config.jsonc index 0f74e2a346..a61a8d52f1 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/1/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/1/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-a", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/2/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/2/config.jsonc index 0f74e2a346..a61a8d52f1 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/2/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/2/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-a", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/0/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/0/config.jsonc index 0c940aaf6a..7898758e89 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/0/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/0/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-b", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/1/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/1/config.jsonc index 0c940aaf6a..7898758e89 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/1/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/1/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-b", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/2/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/2/config.jsonc index 0c940aaf6a..7898758e89 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/2/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/2/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-b", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/0/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/0/config.jsonc index b6218cd163..4d40c5693d 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/0/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/0/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-c", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/1/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/1/config.jsonc index b6218cd163..4d40c5693d 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/1/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/1/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-c", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/2/config.jsonc b/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/2/config.jsonc index b6218cd163..4d40c5693d 100644 --- a/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/2/config.jsonc +++ b/engine/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/2/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-c", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/api.json b/engine/docker/dev-multidc/core/grafana/dashboards/api.json index 2623796db4..2af2eb2b2d 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/api.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/api.json @@ -940,7 +940,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -948,7 +948,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -967,7 +967,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -975,7 +975,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/cache.json b/engine/docker/dev-multidc/core/grafana/dashboards/cache.json index 282e099a8e..921af7008d 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/cache.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/cache.json @@ -830,7 +830,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -838,7 +838,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -857,7 +857,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -865,7 +865,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/futures.json b/engine/docker/dev-multidc/core/grafana/dashboards/futures.json index fdca7320ea..b57d243698 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/futures.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/futures.json @@ -128,7 +128,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -136,7 +136,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -157,7 +157,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -165,7 +165,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json b/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json index 0c6616f4e3..64916e56f2 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/gasoline.json @@ -91,7 +91,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -127,7 +128,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -194,7 +195,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -230,7 +232,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (workflow_name) (rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -297,7 +299,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -333,7 +336,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -401,7 +404,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -437,7 +441,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name, error) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name, error) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}: {{error}}", "range": true, @@ -605,7 +609,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -641,7 +646,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (signal_name) (rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (signal_name) (\n max by(signal_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{signal_name}}", "range": true, @@ -1183,11 +1188,187 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 0, "y": 51 }, + "id": 20, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Core Usage", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 26, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "percentunit" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Load Shedding Ratio", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, "id": 23, "interval": "15s", "options": { @@ -1274,7 +1455,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 60 }, "id": 24, "interval": "15s", @@ -1354,12 +1535,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1368,6 +1547,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1377,14 +1559,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1397,7 +1578,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1405,23 +1586,20 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 68 }, - "id": 13, + "id": 36, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -1433,14 +1611,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval]))", + "format": "heatmap", + "legendFormat": "{{signal_name}}", "range": true, "refId": "A" } ], - "title": "Last Pull Workflows Duration", + "title": "Workflows Dispatched/s", "type": "timeseries" }, { @@ -1459,12 +1637,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1473,6 +1649,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1482,14 +1661,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1502,7 +1680,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1510,200 +1688,21 @@ "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 68 }, - "id": 14, + "id": 35, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_history_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", - "range": true, - "refId": "A" - } - ], - "title": "Last Pull Workflows History Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 20, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Core Usage", - "type": "heatmap" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 67 - }, - "id": 26, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "percentunit" + "sort": "none" } }, "pluginVersion": "11.6.7", @@ -1714,15 +1713,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=\"\"} [$__rate_interval]))", "format": "heatmap", - "legendFormat": "{{le}}", + "legendFormat": "{{sub_workflow_name}}", "range": true, "refId": "A" } ], - "title": "Load Shedding Ratio", - "type": "heatmap" + "title": "Workflows Dispatched/s (Not From Workflow)", + "type": "timeseries" }, { "datasource": { @@ -2162,41 +2161,9 @@ } ] }, - "unit": "none" + "unit": "sishort" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "epoxy_coordinator", - "epoxy_purger", - "namespace", - "pegboard_runner", - "pegboard_runner2", - "pegboard_runner_pool", - "pegboard_runner_pool_error_tracker", - "pegboard_serverless_conn" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 9, @@ -2612,7 +2579,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -2620,7 +2587,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2630,7 +2597,9 @@ }, { "current": { - "text": "All", + "text": [ + "All" + ], "value": [ "$__all" ] @@ -2639,7 +2608,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -2647,7 +2616,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2657,9 +2626,7 @@ }, { "current": { - "text": [ - "All" - ], + "text": "All", "value": [ "$__all" ] @@ -2686,12 +2653,12 @@ ] }, "time": { - "from": "now-15m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Gasoline", "uid": "636d22f9-d18f-4086-8b45-7c50886a105c", - "version": 7 + "version": 9 } \ No newline at end of file diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json index f6190ff52c..cd9d61f4a6 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json @@ -1352,7 +1352,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -1360,7 +1360,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1379,7 +1379,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -1387,7 +1387,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/operation.json b/engine/docker/dev-multidc/core/grafana/dashboards/operation.json index 7ccc0d3e0c..d4348cec8f 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/operation.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/operation.json @@ -780,7 +780,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -788,7 +788,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -809,7 +809,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -817,7 +817,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/pegboard.json b/engine/docker/dev-multidc/core/grafana/dashboards/pegboard.json index 52bd7422d4..8ed083805d 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/pegboard.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/pegboard.json @@ -196,9 +196,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -381,9 +381,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -487,7 +487,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_outbound_req_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -591,7 +591,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_serverless_outbound_req_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -695,7 +695,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_connection_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -799,7 +799,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_runner_connection_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -903,7 +903,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_event_multiplexer_count{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1007,7 +1007,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_ingested_events_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1192,7 +1192,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_version_upgrade_drain_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1209,10 +1209,10 @@ { "current": { "text": [ - "prod" + "staging" ], "value": [ - "prod" + "staging" ] }, "datasource": { @@ -1270,5 +1270,5 @@ "timezone": "browser", "title": "Pegboard", "uid": "afa77odsjjk74d", - "version": 1 + "version": 2 } \ No newline at end of file diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/tokio.json b/engine/docker/dev-multidc/core/grafana/dashboards/tokio.json index e9d9771791..786a3aeada 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/tokio.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/tokio.json @@ -846,7 +846,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -854,7 +854,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -875,7 +875,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -883,7 +883,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multidc/datacenters/dc-a/rivet-engine/config.jsonc b/engine/docker/dev-multidc/datacenters/dc-a/rivet-engine/config.jsonc index dff69c3809..4c9e465d85 100644 --- a/engine/docker/dev-multidc/datacenters/dc-a/rivet-engine/config.jsonc +++ b/engine/docker/dev-multidc/datacenters/dc-a/rivet-engine/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-a", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc/datacenters/dc-b/rivet-engine/config.jsonc b/engine/docker/dev-multidc/datacenters/dc-b/rivet-engine/config.jsonc index 6fa7e3b42d..f35557210f 100644 --- a/engine/docker/dev-multidc/datacenters/dc-b/rivet-engine/config.jsonc +++ b/engine/docker/dev-multidc/datacenters/dc-b/rivet-engine/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-b", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multidc/datacenters/dc-c/rivet-engine/config.jsonc b/engine/docker/dev-multidc/datacenters/dc-c/rivet-engine/config.jsonc index 95818a2bfb..c12f4bdc1d 100644 --- a/engine/docker/dev-multidc/datacenters/dc-c/rivet-engine/config.jsonc +++ b/engine/docker/dev-multidc/datacenters/dc-c/rivet-engine/config.jsonc @@ -61,17 +61,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client-dc-c", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multinode/grafana/dashboards/api.json b/engine/docker/dev-multinode/grafana/dashboards/api.json index 2623796db4..2af2eb2b2d 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/api.json +++ b/engine/docker/dev-multinode/grafana/dashboards/api.json @@ -940,7 +940,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -948,7 +948,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -967,7 +967,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -975,7 +975,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multinode/grafana/dashboards/cache.json b/engine/docker/dev-multinode/grafana/dashboards/cache.json index 282e099a8e..921af7008d 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/cache.json +++ b/engine/docker/dev-multinode/grafana/dashboards/cache.json @@ -830,7 +830,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -838,7 +838,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -857,7 +857,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -865,7 +865,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multinode/grafana/dashboards/futures.json b/engine/docker/dev-multinode/grafana/dashboards/futures.json index fdca7320ea..b57d243698 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/futures.json +++ b/engine/docker/dev-multinode/grafana/dashboards/futures.json @@ -128,7 +128,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -136,7 +136,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -157,7 +157,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -165,7 +165,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multinode/grafana/dashboards/gasoline.json b/engine/docker/dev-multinode/grafana/dashboards/gasoline.json index 0c6616f4e3..64916e56f2 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/gasoline.json +++ b/engine/docker/dev-multinode/grafana/dashboards/gasoline.json @@ -91,7 +91,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -127,7 +128,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -194,7 +195,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -230,7 +232,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (workflow_name) (rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -297,7 +299,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -333,7 +336,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -401,7 +404,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -437,7 +441,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name, error) (rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"})", + "expr": "sum by (workflow_name, error) (\n max by(workflow_name, error, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_dead{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}: {{error}}", "range": true, @@ -605,7 +609,8 @@ "value": 80 } ] - } + }, + "unit": "sishort" }, "overrides": [] }, @@ -641,7 +646,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (signal_name) (rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (signal_name) (\n max by(signal_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_signal_pending{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{signal_name}}", "range": true, @@ -1183,11 +1188,187 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 0, "y": 51 }, + "id": 20, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Core Usage", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 26, + "interval": "15s", + "options": { + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } + }, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "percentunit" + } + }, + "pluginVersion": "11.6.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Load Shedding Ratio", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, "id": 23, "interval": "15s", "options": { @@ -1274,7 +1455,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 60 }, "id": 24, "interval": "15s", @@ -1354,12 +1535,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1368,6 +1547,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1377,14 +1559,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1397,7 +1578,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1405,23 +1586,20 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 68 }, - "id": 13, + "id": 36, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -1433,14 +1611,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval]))", + "format": "heatmap", + "legendFormat": "{{signal_name}}", "range": true, "refId": "A" } ], - "title": "Last Pull Workflows Duration", + "title": "Workflows Dispatched/s", "type": "timeseries" }, { @@ -1459,12 +1637,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1473,6 +1649,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1482,14 +1661,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1502,7 +1680,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -1510,200 +1688,21 @@ "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 68 }, - "id": 14, + "id": 35, + "interval": "15s", "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_history_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", - "range": true, - "refId": "A" - } - ], - "title": "Last Pull Workflows History Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 20, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Core Usage", - "type": "heatmap" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 67 - }, - "id": 26, - "interval": "15s", - "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "percentunit" + "sort": "none" } }, "pluginVersion": "11.6.7", @@ -1714,15 +1713,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=\"\"} [$__rate_interval]))", "format": "heatmap", - "legendFormat": "{{le}}", + "legendFormat": "{{sub_workflow_name}}", "range": true, "refId": "A" } ], - "title": "Load Shedding Ratio", - "type": "heatmap" + "title": "Workflows Dispatched/s (Not From Workflow)", + "type": "timeseries" }, { "datasource": { @@ -2162,41 +2161,9 @@ } ] }, - "unit": "none" + "unit": "sishort" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "epoxy_coordinator", - "epoxy_purger", - "namespace", - "pegboard_runner", - "pegboard_runner2", - "pegboard_runner_pool", - "pegboard_runner_pool_error_tracker", - "pegboard_serverless_conn" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 9, @@ -2612,7 +2579,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -2620,7 +2587,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2630,7 +2597,9 @@ }, { "current": { - "text": "All", + "text": [ + "All" + ], "value": [ "$__all" ] @@ -2639,7 +2608,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -2647,7 +2616,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2657,9 +2626,7 @@ }, { "current": { - "text": [ - "All" - ], + "text": "All", "value": [ "$__all" ] @@ -2686,12 +2653,12 @@ ] }, "time": { - "from": "now-15m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Gasoline", "uid": "636d22f9-d18f-4086-8b45-7c50886a105c", - "version": 7 + "version": 9 } \ No newline at end of file diff --git a/engine/docker/dev-multinode/grafana/dashboards/guard.json b/engine/docker/dev-multinode/grafana/dashboards/guard.json index f6190ff52c..cd9d61f4a6 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/guard.json +++ b/engine/docker/dev-multinode/grafana/dashboards/guard.json @@ -1352,7 +1352,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -1360,7 +1360,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1379,7 +1379,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -1387,7 +1387,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multinode/grafana/dashboards/operation.json b/engine/docker/dev-multinode/grafana/dashboards/operation.json index 7ccc0d3e0c..d4348cec8f 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/operation.json +++ b/engine/docker/dev-multinode/grafana/dashboards/operation.json @@ -780,7 +780,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -788,7 +788,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -809,7 +809,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -817,7 +817,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multinode/grafana/dashboards/pegboard.json b/engine/docker/dev-multinode/grafana/dashboards/pegboard.json index 52bd7422d4..8ed083805d 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/pegboard.json +++ b/engine/docker/dev-multinode/grafana/dashboards/pegboard.json @@ -196,9 +196,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -381,9 +381,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -487,7 +487,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_outbound_req_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -591,7 +591,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_serverless_outbound_req_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -695,7 +695,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_connection_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -799,7 +799,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_runner_connection_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -903,7 +903,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_event_multiplexer_count{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1007,7 +1007,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_ingested_events_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1192,7 +1192,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_version_upgrade_drain_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1209,10 +1209,10 @@ { "current": { "text": [ - "prod" + "staging" ], "value": [ - "prod" + "staging" ] }, "datasource": { @@ -1270,5 +1270,5 @@ "timezone": "browser", "title": "Pegboard", "uid": "afa77odsjjk74d", - "version": 1 + "version": 2 } \ No newline at end of file diff --git a/engine/docker/dev-multinode/grafana/dashboards/tokio.json b/engine/docker/dev-multinode/grafana/dashboards/tokio.json index e9d9771791..786a3aeada 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/tokio.json +++ b/engine/docker/dev-multinode/grafana/dashboards/tokio.json @@ -846,7 +846,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -854,7 +854,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -875,7 +875,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -883,7 +883,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev-multinode/rivet-engine/0/config.jsonc b/engine/docker/dev-multinode/rivet-engine/0/config.jsonc index 78d35ea7a7..31b5ce77fe 100644 --- a/engine/docker/dev-multinode/rivet-engine/0/config.jsonc +++ b/engine/docker/dev-multinode/rivet-engine/0/config.jsonc @@ -37,17 +37,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multinode/rivet-engine/1/config.jsonc b/engine/docker/dev-multinode/rivet-engine/1/config.jsonc index 78d35ea7a7..31b5ce77fe 100644 --- a/engine/docker/dev-multinode/rivet-engine/1/config.jsonc +++ b/engine/docker/dev-multinode/rivet-engine/1/config.jsonc @@ -37,17 +37,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev-multinode/rivet-engine/2/config.jsonc b/engine/docker/dev-multinode/rivet-engine/2/config.jsonc index 78d35ea7a7..31b5ce77fe 100644 --- a/engine/docker/dev-multinode/rivet-engine/2/config.jsonc +++ b/engine/docker/dev-multinode/rivet-engine/2/config.jsonc @@ -37,17 +37,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/dev/grafana/dashboards/api.json b/engine/docker/dev/grafana/dashboards/api.json index 2623796db4..2af2eb2b2d 100644 --- a/engine/docker/dev/grafana/dashboards/api.json +++ b/engine/docker/dev/grafana/dashboards/api.json @@ -940,7 +940,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -948,7 +948,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -967,7 +967,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -975,7 +975,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev/grafana/dashboards/cache.json b/engine/docker/dev/grafana/dashboards/cache.json index 282e099a8e..921af7008d 100644 --- a/engine/docker/dev/grafana/dashboards/cache.json +++ b/engine/docker/dev/grafana/dashboards/cache.json @@ -830,7 +830,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -838,7 +838,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -857,7 +857,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -865,7 +865,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev/grafana/dashboards/futures.json b/engine/docker/dev/grafana/dashboards/futures.json index fdca7320ea..b57d243698 100644 --- a/engine/docker/dev/grafana/dashboards/futures.json +++ b/engine/docker/dev/grafana/dashboards/futures.json @@ -128,7 +128,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -136,7 +136,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -157,7 +157,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -165,7 +165,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev/grafana/dashboards/gasoline.json b/engine/docker/dev/grafana/dashboards/gasoline.json index 5bfb0bcb83..64916e56f2 100644 --- a/engine/docker/dev/grafana/dashboards/gasoline.json +++ b/engine/docker/dev/grafana/dashboards/gasoline.json @@ -94,48 +94,7 @@ }, "unit": "sishort" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "epoxy_coordinator", - "epoxy_purger", - "epoxy_replica", - "namespace", - "namespace_metrics_exporter", - "pegboard_actor", - "pegboard_actor_metrics", - "pegboard_actor_runner_name_selector_backfill", - "pegboard_runner", - "pegboard_runner2", - "pegboard_runner_pool", - "pegboard_runner_pool_error_tracker", - "pegboard_runner_pool_metadata_poller", - "pegboard_serverless_backfill", - "pegboard_serverless_conn", - "pegboard_serverless_runner", - "pegboard_serverless_runner2" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, @@ -273,7 +232,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", + "expr": "sum by (workflow_name) (\n max by(workflow_name, rivet_project, rivet_datacenter) (\n rivet_gasoline_workflow_sleeping{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, "legendFormat": "{{workflow_name}}", "range": true, @@ -1229,12 +1188,12 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 0, "y": 51 }, - "id": 23, + "id": 20, "interval": "15s", "options": { "calculate": false, @@ -1275,7 +1234,7 @@ "max": "60", "min": 0, "reverse": false, - "unit": "s" + "unit": "" } }, "pluginVersion": "11.6.7", @@ -1286,14 +1245,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_pull_workflows_duration_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, "refId": "A" } ], - "title": "Pull Workflows Duration", + "title": "CPU Core Usage", "type": "heatmap" }, { @@ -1317,12 +1276,12 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 12, "y": 51 }, - "id": 24, + "id": 26, "interval": "15s", "options": { "calculate": false, @@ -1363,7 +1322,7 @@ "max": "60", "min": 0, "reverse": false, - "unit": "s" + "unit": "percentunit" } }, "pluginVersion": "11.6.7", @@ -1374,14 +1333,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_pull_workflows_history_duration_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, "refId": "A" } ], - "title": "Pull Workflows History Duration", + "title": "Load Shedding Ratio", "type": "heatmap" }, { @@ -1391,59 +1350,16 @@ }, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, "scaleDistribution": { "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + } }, "overrides": [] }, @@ -1451,129 +1367,50 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 60 }, - "id": 13, + "id": 23, + "interval": "15s", "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "calculate": false, + "calculation": { + "xBuckets": { + "mode": "size" + } }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", - "range": true, - "refId": "A" - } - ], - "title": "Last Pull Workflows Duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 0.1, - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdBu", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 59 - }, - "id": 14, - "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false, - "sortBy": "Last *", - "sortDesc": true + "show": true + }, + "rowsFrame": { + "layout": "auto" }, "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "max": "60", + "min": 0, + "reverse": false, + "unit": "s" } }, "pluginVersion": "11.6.7", @@ -1584,15 +1421,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "max by (worker_id) (rivet_gasoline_last_pull_workflows_history_duration{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", - "instant": false, - "legendFormat": "{{worker_id}}", + "expr": "sum(increase(rivet_gasoline_pull_workflows_duration_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", "range": true, "refId": "A" } ], - "title": "Last Pull Workflows History Duration", - "type": "timeseries" + "title": "Pull Workflows Duration", + "type": "heatmap" }, { "datasource": { @@ -1615,12 +1452,12 @@ "overrides": [] }, "gridPos": { - "h": 9, + "h": 8, "w": 12, - "x": 0, - "y": 67 + "x": 12, + "y": 60 }, - "id": 20, + "id": 24, "interval": "15s", "options": { "calculate": false, @@ -1661,7 +1498,7 @@ "max": "60", "min": 0, "reverse": false, - "unit": "" + "unit": "s" } }, "pluginVersion": "11.6.7", @@ -1672,14 +1509,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_cpu_usage_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum(increase(rivet_gasoline_pull_workflows_history_duration_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, "refId": "A" } ], - "title": "CPU Core Usage", + "title": "Pull Workflows History Duration", "type": "heatmap" }, { @@ -1689,67 +1526,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 8, "w": 12, - "x": 12, - "y": 67 + "x": 0, + "y": 68 }, - "id": 26, + "id": 36, "interval": "15s", "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "percentunit" + "sort": "none" } }, "pluginVersion": "11.6.7", @@ -1760,15 +1611,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_load_shedding_ratio_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval])) by (le)", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval]))", "format": "heatmap", - "legendFormat": "{{le}}", + "legendFormat": "{{signal_name}}", "range": true, "refId": "A" } ], - "title": "Load Shedding Ratio", - "type": "heatmap" + "title": "Workflows Dispatched/s", + "type": "timeseries" }, { "datasource": { @@ -1777,68 +1628,81 @@ }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 76 + "x": 12, + "y": 68 }, - "id": 34, + "id": 35, "interval": "15s", "options": { - "calculate": false, - "calculation": { - "xBuckets": { - "mode": "size" - } - }, - "cellGap": 0, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "single", - "showColorScale": false, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "max": "60", - "min": 0, - "reverse": false, - "unit": "s" + "sort": "none" } }, "pluginVersion": "11.6.7", @@ -1849,15 +1713,15 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_workflow_wake_delta_duration_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval])) by (le)", + "expr": "sum by (sub_workflow_name) (rate(rivet_gasoline_workflow_dispatched_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=\"\"} [$__rate_interval]))", "format": "heatmap", - "legendFormat": "{{le}}", + "legendFormat": "{{sub_workflow_name}}", "range": true, "refId": "A" } ], - "title": "Workflow Wake Delta", - "type": "heatmap" + "title": "Workflows Dispatched/s (Not From Workflow)", + "type": "timeseries" }, { "datasource": { @@ -1882,10 +1746,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, + "x": 0, "y": 76 }, - "id": 35, + "id": 34, "interval": "15s", "options": { "calculate": false, @@ -1927,7 +1791,7 @@ "max": "60", "min": 0, "reverse": false, - "unit": "none" + "unit": "s" } }, "pluginVersion": "11.6.7", @@ -1938,14 +1802,14 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(increase(rivet_gasoline_workflow_leased_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval])) by (le)", + "expr": "sum(increase(rivet_gasoline_workflow_wake_delta_duration_bucket{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\",workflow_name=~\"$workflow_name\"} [$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, "refId": "A" } ], - "title": "Workflow Leases Per Tick", + "title": "Workflow Wake Delta", "type": "heatmap" }, { @@ -2705,17 +2569,17 @@ { "current": { "text": [ - "All" + "prod" ], "value": [ - "$__all" + "prod" ] }, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -2723,7 +2587,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2733,7 +2597,9 @@ }, { "current": { - "text": "All", + "text": [ + "All" + ], "value": [ "$__all" ] @@ -2742,7 +2608,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -2750,7 +2616,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2787,7 +2653,7 @@ ] }, "time": { - "from": "now-15m", + "from": "now-30m", "to": "now" }, "timepicker": {}, diff --git a/engine/docker/dev/grafana/dashboards/guard.json b/engine/docker/dev/grafana/dashboards/guard.json index f6190ff52c..cd9d61f4a6 100644 --- a/engine/docker/dev/grafana/dashboards/guard.json +++ b/engine/docker/dev/grafana/dashboards/guard.json @@ -1352,7 +1352,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -1360,7 +1360,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1379,7 +1379,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -1387,7 +1387,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev/grafana/dashboards/operation.json b/engine/docker/dev/grafana/dashboards/operation.json index 7ccc0d3e0c..d4348cec8f 100644 --- a/engine/docker/dev/grafana/dashboards/operation.json +++ b/engine/docker/dev/grafana/dashboards/operation.json @@ -780,7 +780,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -788,7 +788,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -809,7 +809,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -817,7 +817,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev/grafana/dashboards/pegboard.json b/engine/docker/dev/grafana/dashboards/pegboard.json index 52bd7422d4..8ed083805d 100644 --- a/engine/docker/dev/grafana/dashboards/pegboard.json +++ b/engine/docker/dev/grafana/dashboards/pegboard.json @@ -196,9 +196,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_actor_pending_allocation{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -381,9 +381,9 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", + "expr": "sum by (rivet_datacenter) (\n max by(rivet_project, rivet_datacenter) (\n rivet_pegboard_serverless_desired_slots{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}\n * on(k8s_pod_name) group_left(rivet_project, rivet_datacenter)\n (\n (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} == bool\n on(rivet_project, rivet_datacenter) group_left()\n max by (rivet_project, rivet_datacenter) (rivet_gasoline_worker_last_metrics_publish{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"}))\n )\n )\n)", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -487,7 +487,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_serverless_outbound_req_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -591,7 +591,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_serverless_outbound_req_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -695,7 +695,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_connection_active{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -799,7 +799,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_runner_connection_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -903,7 +903,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_event_multiplexer_count{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1007,7 +1007,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rate(rivet_pegboard_ingested_events_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"} [$__rate_interval]))", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1192,7 +1192,7 @@ "editorMode": "code", "expr": "sum by (rivet_datacenter) (rivet_pegboard_runner_version_upgrade_drain_total{rivet_project=~\"$project\",rivet_datacenter=~\"$datacenter\"})", "instant": false, - "legendFormat": "{{workflow_name}}", + "legendFormat": "{{rivet_datacenter}}", "range": true, "refId": "A" } @@ -1209,10 +1209,10 @@ { "current": { "text": [ - "prod" + "staging" ], "value": [ - "prod" + "staging" ] }, "datasource": { @@ -1270,5 +1270,5 @@ "timezone": "browser", "title": "Pegboard", "uid": "afa77odsjjk74d", - "version": 1 + "version": 2 } \ No newline at end of file diff --git a/engine/docker/dev/grafana/dashboards/tokio.json b/engine/docker/dev/grafana/dashboards/tokio.json index e9d9771791..786a3aeada 100644 --- a/engine/docker/dev/grafana/dashboards/tokio.json +++ b/engine/docker/dev/grafana/dashboards/tokio.json @@ -846,7 +846,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values(rivet_project)", + "definition": "label_values(up, rivet_project)", "includeAll": true, "label": "Project", "multi": true, @@ -854,7 +854,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(rivet_project)", + "query": "label_values(up, rivet_project)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -875,7 +875,7 @@ "type": "prometheus", "uid": "prometheus" }, - "definition": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "definition": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "includeAll": true, "label": "Datacenter", "multi": true, @@ -883,7 +883,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values({rivet_project=~\"$project\"},rivet_datacenter)", + "query": "label_values(up{rivet_project=~\"$project\"},rivet_datacenter)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/engine/docker/dev/rivet-engine/config.jsonc b/engine/docker/dev/rivet-engine/config.jsonc index 9c5ea86073..3158c9bf72 100644 --- a/engine/docker/dev/rivet-engine/config.jsonc +++ b/engine/docker/dev/rivet-engine/config.jsonc @@ -37,17 +37,6 @@ "native_url": "http://clickhouse:9301", "username": "system", "password": "default", - "provision_users": { - "vector": { - "username": "vector", - "password": "vector", - "role": "write" - } - }, "secure": false - }, - "vector_http": { - "host": "vector-client", - "port": 5022 } } \ No newline at end of file diff --git a/engine/docker/template/src/docker-compose.ts b/engine/docker/template/src/docker-compose.ts index 5e7dd6719b..62ad7279e3 100644 --- a/engine/docker/template/src/docker-compose.ts +++ b/engine/docker/template/src/docker-compose.ts @@ -343,6 +343,8 @@ export function generateDockerCompose(context: TemplateContext) { environment: [ `RIVET_ENDPOINT=http://${context.getServiceHost("rivet-engine", datacenter.name, 0)}:6420`, `RIVET_RUNNER_TOTAL_SLOTS=1000000`, + `AUTOSTART_RUNNER=1`, + `AUTOCONFIGURE_SERVERLESS=0` ], stop_grace_period: "4s", ports: isPrimary && i === 0 ? [`5050:5050`] : undefined, diff --git a/engine/docker/template/src/services/edge/rivet-engine.ts b/engine/docker/template/src/services/edge/rivet-engine.ts index 4fa1bf00fa..d388c6dcaa 100644 --- a/engine/docker/template/src/services/edge/rivet-engine.ts +++ b/engine/docker/template/src/services/edge/rivet-engine.ts @@ -63,10 +63,6 @@ export function generateDatacenterRivetEngine( password: "default", secure: false, }, - vector_http: { - host: context.getServiceHost("vector-client", datacenter.name), - port: 5022, - }, }; context.writeDatacenterServiceFile( diff --git a/engine/packages/config/src/config/pegboard.rs b/engine/packages/config/src/config/pegboard.rs index d3817dc548..75e9df57c4 100644 --- a/engine/packages/config/src/config/pegboard.rs +++ b/engine/packages/config/src/config/pegboard.rs @@ -178,6 +178,19 @@ pub struct Pegboard { pub runner_event_demuxer_gc_interval_ms: Option, /// Max time since last seen before actor is considered stale, in milliseconds. pub runner_event_demuxer_max_last_seen_ms: Option, + + /// Drain grace period for serverless runners. + /// + /// This time is subtracted from the configured request duration. Once `duration - grace` is reached, the + /// runner is sent stop commands for all of its actors. After the grace period is over (i.e. the full + /// duration is reached) the runner websocket is forcibly closed. + /// + /// Default is 10 seconds. + /// + /// Unit is in milliseconds. + /// + /// **Experimental** + pub serverless_drain_grace_period: Option, } impl Pegboard { @@ -370,4 +383,8 @@ impl Pegboard { pub fn runner_event_demuxer_max_last_seen_ms(&self) -> u64 { self.runner_event_demuxer_max_last_seen_ms.unwrap_or(30_000) } + + pub fn serverless_drain_grace_period(&self) -> u64 { + self.serverless_drain_grace_period.unwrap_or(10_000) + } } diff --git a/engine/packages/epoxy/src/http_client.rs b/engine/packages/epoxy/src/http_client.rs index 1eee2eee76..1e3a0d4af0 100644 --- a/engine/packages/epoxy/src/http_client.rs +++ b/engine/packages/epoxy/src/http_client.rs @@ -1,4 +1,4 @@ -use anyhow::*; +use anyhow::{Context, Result, bail}; use epoxy_protocol::{ PROTOCOL_VERSION, protocol::{self, ReplicaId}, @@ -37,7 +37,13 @@ where Fut: Future> + Send, T: Send, { - let quorum_size = utils::calculate_quorum(replica_ids.len(), quorum_type); + let target_responses = utils::calculate_fanout_quorum(replica_ids.len(), quorum_type); + + if target_responses == 0 { + tracing::warn!("no fanout, target is 0"); + + return Ok(Vec::new()); + } // Create futures for all replicas (excluding the sender) let mut responses = futures_util::stream::iter( @@ -57,32 +63,22 @@ where ) .collect::>() .await; - tracing::debug!(?quorum_size, len = ?responses.len(), ?quorum_type, "fanout quorum size"); - - // Choose how many successful responses we need before considering a success - let target_responses = match quorum_type { - // Only require 1 response - utils::QuorumType::Any => 1, - // Include all responses - utils::QuorumType::All => responses.len(), - // Subtract 1 from quorum size since we're not counting ourselves - utils::QuorumType::Fast | utils::QuorumType::Slow => quorum_size - 1, - }; + tracing::debug!(?target_responses, len=?responses.len(), "fanout target"); // Collect responses until we reach quorum or all futures complete let mut successful_responses = Vec::new(); while successful_responses.len() < target_responses { if let Some(response) = responses.next().await { match response { - std::result::Result::Ok(result) => match result { - std::result::Result::Ok(response) => { + Ok(result) => match result { + Ok(response) => { successful_responses.push(response); } - std::result::Result::Err(err) => { + Err(err) => { tracing::warn!(?err, "received error from replica"); } }, - std::result::Result::Err(err) => { + Err(err) => { tracing::warn!(?err, "received timeout from replica"); } } @@ -159,8 +155,8 @@ pub async fn send_message_to_address( .await; let response = match response_result { - std::result::Result::Ok(resp) => resp, - std::result::Result::Err(e) => { + Ok(resp) => resp, + Err(e) => { tracing::error!( to_replica = to_replica_id, replica_url = %replica_url, diff --git a/engine/packages/epoxy/src/utils.rs b/engine/packages/epoxy/src/utils.rs index cd7f51953a..a0840a0102 100644 --- a/engine/packages/epoxy/src/utils.rs +++ b/engine/packages/epoxy/src/utils.rs @@ -43,12 +43,54 @@ pub fn get_all_replicas(config: &protocol::ClusterConfig) -> Vec { config.replicas.iter().map(|r| r.replica_id).collect() } +// See EPaxos 4.3 pub fn calculate_quorum(n: usize, q: QuorumType) -> usize { - match q { - QuorumType::Fast => (n * 3) / 4 + 1, - QuorumType::Slow => n / 2 + 1, - QuorumType::All => n, - QuorumType::Any => 1, + match n { + // Nonsensical + 0 => 0, + 1 => 1, + // EPaxos does not apply to clusters with N < 3 because you cannot tolerate any faults. However we can + // still get correctness invariants to hold by requiring both nodes to agree on everything (quorum + // size is always 2) + 2 => match q { + QuorumType::Fast => 2, + QuorumType::Slow => 2, + QuorumType::All => 2, + QuorumType::Any => 1, + }, + // Note that for even N's we don't gain any extra fault tolerance but we get potentially better read + // latency. N=4 acts like N=3 in terms of fault tolerance. + n => { + let f = (n - 1) / 2; + + match q { + QuorumType::Fast => f + (f + 1) / 2, + QuorumType::Slow => f + 1, + QuorumType::All => n, + QuorumType::Any => 1, + } + } + } +} + +/// Calculates quorum size assuming the sender is excluded. +pub fn calculate_fanout_quorum(n: usize, q: QuorumType) -> usize { + match n { + // Nonsensical + 0 => 0, + 1 => 0, + // NOTE: See comments in `calculate_quorum` + 2 => 1, + n => { + let f = (n - 1) / 2; + + match q { + QuorumType::Fast => (f + (f + 1) / 2) - 1, + QuorumType::Slow => f, + QuorumType::All => n - 1, + QuorumType::Any => 1, + } + } } } diff --git a/engine/packages/gasoline/src/db/kv/mod.rs b/engine/packages/gasoline/src/db/kv/mod.rs index 1f51e40295..3be187cd45 100644 --- a/engine/packages/gasoline/src/db/kv/mod.rs +++ b/engine/packages/gasoline/src/db/kv/mod.rs @@ -481,7 +481,7 @@ impl Database for DatabaseKv { let start = Instant::now(); let now = rivet_util::timestamp::now(); - let mut last_ping_cache: Vec<(Id, i64)> = Vec::new(); + let mut last_ping_cache = HashMap::::new(); let mut lost_worker_ids = HashSet::new(); let mut expired_workflow_count = 0; @@ -518,8 +518,8 @@ impl Database for DatabaseKv { let last_ping_ts_key = keys::worker::LastPingTsKey::new(worker_id); // Get last ping of worker for this lease - let last_ping_ts = if let Some((_, last_ping_ts)) = - last_ping_cache.iter().find(|(k, _)| k == &worker_id) + let last_ping_ts = if let Some(last_ping_ts) = + last_ping_cache.get(&worker_id) { *last_ping_ts } else if let Some(last_ping_entry) = tx @@ -534,12 +534,12 @@ impl Database for DatabaseKv { let last_ping_ts = last_ping_ts_key.deserialize(&last_ping_entry)?; // Update cache - last_ping_cache.push((worker_id, last_ping_ts)); + last_ping_cache.insert(worker_id, last_ping_ts); last_ping_ts } else { // Update cache - last_ping_cache.push((worker_id, 0)); + last_ping_cache.insert(worker_id, 0); 0 }; diff --git a/engine/packages/guard-core/src/errors.rs b/engine/packages/guard-core/src/errors.rs index da75f5bd71..dc774b2a42 100644 --- a/engine/packages/guard-core/src/errors.rs +++ b/engine/packages/guard-core/src/errors.rs @@ -1,6 +1,24 @@ use rivet_error::*; use serde::{Deserialize, Serialize}; +#[derive(RivetError, Serialize, Deserialize)] +#[error( + "guard", + "invalid_request_body", + "Unable to parse request body.", + "Unable to parse request body: {0}." +)] +pub struct InvalidRequestBody(pub String); + +#[derive(RivetError, Serialize, Deserialize)] +#[error( + "guard", + "invalid_response_body", + "Unable to parse response body.", + "Unable to parse response body: {0}." +)] +pub struct InvalidResponseBody(pub String); + #[derive(RivetError, Serialize, Deserialize)] #[error( "guard", @@ -42,7 +60,7 @@ pub struct UriParseError(pub String); pub struct RequestBuildError(pub String); #[derive(RivetError)] -#[error("guard", "upstream_error", "Upstream error.", "Upstream error: {0}")] +#[error("guard", "upstream_error", "Upstream error.", "Upstream error: {0}.")] pub struct UpstreamError(pub String); #[derive(RivetError, Serialize, Deserialize)] diff --git a/engine/packages/guard-core/src/proxy_service.rs b/engine/packages/guard-core/src/proxy_service.rs index a63f53c6ca..bda3624061 100644 --- a/engine/packages/guard-core/src/proxy_service.rs +++ b/engine/packages/guard-core/src/proxy_service.rs @@ -1,7 +1,7 @@ use anyhow::{Context, Result, bail, ensure}; use bytes::Bytes; use futures_util::{SinkExt, StreamExt}; -use http_body_util::{BodyExt, Full}; +use http_body_util::{BodyExt, Full, Limited}; use hyper::{ Request, Response, StatusCode, body::Incoming as BodyIncoming, @@ -40,6 +40,7 @@ use crate::{ pub const X_FORWARDED_FOR: HeaderName = HeaderName::from_static("x-forwarded-for"); pub const X_RIVET_ERROR: HeaderName = HeaderName::from_static("x-rivet-error"); +pub const MAX_BODY_SIZE: usize = rivet_util::size::mebibytes(20) as usize; const PROXY_STATE_CACHE_TTL: Duration = Duration::from_secs(60 * 60); // 1 hour const WEBSOCKET_CLOSE_LINGER: Duration = Duration::from_millis(100); // Keep TCP connection open briefly after WebSocket close @@ -723,13 +724,11 @@ impl ProxyService { ResolveRouteOutput::Target(mut target) => { // Read the request body before proceeding with retries let (req_parts, body) = req.into_parts(); - let req_body = match http_body_util::BodyExt::collect(body).await { - Ok(collected) => collected.to_bytes(), - Err(err) => { - tracing::debug!(?err, "Failed to read request body"); - Bytes::new() - } - }; + let req_body = Limited::new(body, MAX_BODY_SIZE) + .collect() + .await + .map_err(|err| errors::InvalidRequestBody(err.to_string()).build())? + .to_bytes(); // Use a value-returning loop to handle both errors and successful responses let mut attempts = 0; @@ -742,7 +741,8 @@ impl ProxyService { // Create the final request with body let proxied_req = builder - .body(Full::::new(req_body.clone())) + // NOTE: the `Bytes` type is cheaply cloneable, this is not resource intensive + .body(Full::new(req_body.clone())) .map_err(|err| errors::RequestBuildError(err.to_string()).build())?; // Send the request with timeout @@ -800,10 +800,13 @@ impl ProxyService { return Ok(Response::from_parts(parts, streaming_body)); } else { // For non-streaming responses, buffer as before - let body_bytes = match BodyExt::collect(body).await { - Ok(collected) => collected.to_bytes(), - Err(_) => Bytes::new(), - }; + let body_bytes = Limited::new(body, MAX_BODY_SIZE) + .collect() + .await + .map_err(|err| { + errors::InvalidResponseBody(err.to_string()).build() + })? + .to_bytes(); let full_body = ResponseBody::Full(Full::new(body_bytes)); return Ok(Response::from_parts(parts, full_body)); @@ -857,15 +860,13 @@ impl ProxyService { ResolveRouteOutput::CustomServe(mut handler) => { // Collect request body let (req_parts, body) = req.into_parts(); - let collected_body = match http_body_util::BodyExt::collect(body).await { - Ok(collected) => collected.to_bytes(), - Err(err) => { - tracing::debug!(?err, "Failed to read request body"); - Bytes::new() - } - }; + let req_body = Limited::new(body, MAX_BODY_SIZE) + .collect() + .await + .map_err(|err| errors::InvalidRequestBody(err.to_string()).build())? + .to_bytes(); let req_collected = - hyper::Request::from_parts(req_parts, Full::::new(collected_body)); + hyper::Request::from_parts(req_parts, Full::::new(req_body)); // Attempt request let mut attempts = 0; diff --git a/engine/packages/guard-core/src/utils.rs b/engine/packages/guard-core/src/utils.rs index 8d3ecb17b6..5e1090d3df 100644 --- a/engine/packages/guard-core/src/utils.rs +++ b/engine/packages/guard-core/src/utils.rs @@ -181,6 +181,8 @@ pub(crate) fn err_into_response(err: anyhow::Error) -> Result StatusCode::SERVICE_UNAVAILABLE, ("guard", "actor_ready_timeout") => StatusCode::SERVICE_UNAVAILABLE, ("guard", "no_route") => StatusCode::NOT_FOUND, + ("guard", "invalid_request_body") => StatusCode::PAYLOAD_TOO_LARGE, + ("guard", "invalid_response_body") => StatusCode::BAD_GATEWAY, _ => StatusCode::BAD_REQUEST, }; diff --git a/engine/packages/pegboard-gateway/src/lib.rs b/engine/packages/pegboard-gateway/src/lib.rs index 73c7bf85f2..bd267ad50f 100644 --- a/engine/packages/pegboard-gateway/src/lib.rs +++ b/engine/packages/pegboard-gateway/src/lib.rs @@ -154,6 +154,7 @@ impl PegboardGateway { max_age: None, }); + // NOTE: Size constraints have already been applied by guard let body_bytes = req .into_body() .collect() diff --git a/engine/packages/pegboard-runner/Cargo.toml b/engine/packages/pegboard-runner/Cargo.toml index b350657a30..3566407f40 100644 --- a/engine/packages/pegboard-runner/Cargo.toml +++ b/engine/packages/pegboard-runner/Cargo.toml @@ -26,6 +26,7 @@ rivet-guard-core.workspace = true rivet-metrics.workspace = true rivet-runner-protocol.workspace = true rivet-runtime.workspace = true +rivet-types.workspace = true serde_bare.workspace = true serde_json.workspace = true serde.workspace = true diff --git a/engine/packages/pegboard-runner/src/conn.rs b/engine/packages/pegboard-runner/src/conn.rs index 4b3b5accb6..60e02a3788 100644 --- a/engine/packages/pegboard-runner/src/conn.rs +++ b/engine/packages/pegboard-runner/src/conn.rs @@ -6,13 +6,13 @@ use std::{ use anyhow::Context; use futures_util::StreamExt; use futures_util::TryStreamExt; -use gas::prelude::Id; use gas::prelude::*; use hyper_tungstenite::tungstenite::Message; use pegboard::ops::runner::update_alloc_idx::{Action, RunnerEligibility}; use rivet_data::converted::{ActorNameKeyData, MetadataKeyData}; use rivet_guard_core::WebSocketHandle; use rivet_runner_protocol::{self as protocol, versioned}; +use rivet_types::runner_configs::RunnerConfigKind; use universaldb::prelude::*; use vbare::OwnedVersionedData; @@ -280,9 +280,13 @@ pub async fn handle_init( ) })?; - let missed_commands = ctx - .udb()? - .run(|tx| { + let udb = ctx.udb()?; + let (runner_config_res, missed_commands) = tokio::try_join!( + ctx.op(pegboard::ops::runner_config::get::Input { + runners: vec![(conn.namespace_id, conn.runner_name.clone())], + bypass_cache: false, + }), + udb.run(|tx| { let init = init.clone(); async move { let tx = tx.with_subspace(pegboard::keys::subspace()); @@ -367,15 +371,23 @@ pub async fn handle_init( .await } }) - .custom_instrument(tracing::info_span!("runner_process_init_tx")) - .await?; + .custom_instrument(tracing::info_span!("runner_process_init_tx")), + )?; + + let is_serverless = runner_config_res.first().map_or(false, |c| { + matches!(c.config.kind, RunnerConfigKind::Serverless { .. }) + }); + let pb = ctx.config().pegboard(); // Send init packet let init_msg = versioned::ToClientMk2::wrap_latest(protocol::mk2::ToClient::ToClientInit( protocol::mk2::ToClientInit { runner_id: conn.runner_id.to_string(), metadata: protocol::mk2::ProtocolMetadata { - runner_lost_threshold: ctx.config().pegboard().runner_lost_threshold(), + runner_lost_threshold: pb.runner_lost_threshold(), + actor_stop_threshold: pb.actor_stop_threshold(), + serverless_drain_grace_period: is_serverless + .then(|| pb.serverless_drain_grace_period() as i64), }, }, )); diff --git a/engine/packages/pegboard-runner/src/errors.rs b/engine/packages/pegboard-runner/src/errors.rs index 0799858cf1..2c8fc3111d 100644 --- a/engine/packages/pegboard-runner/src/errors.rs +++ b/engine/packages/pegboard-runner/src/errors.rs @@ -1,18 +1,6 @@ use rivet_error::*; use serde::{Deserialize, Serialize}; -#[derive(RivetError, Serialize, Deserialize)] -#[error( - "guard", - "response_body_too_large", - "Response body too large.", - "Response body size {size} bytes exceeds maximum allowed {max_size} bytes." -)] -pub struct ResponseBodyTooLarge { - pub size: usize, - pub max_size: usize, -} - #[derive(RivetError, Debug)] #[error("ws")] pub enum WsError { diff --git a/engine/packages/pegboard-runner/src/ws_to_tunnel_task.rs b/engine/packages/pegboard-runner/src/ws_to_tunnel_task.rs index 4268dfeb27..bddedd416b 100644 --- a/engine/packages/pegboard-runner/src/ws_to_tunnel_task.rs +++ b/engine/packages/pegboard-runner/src/ws_to_tunnel_task.rs @@ -6,6 +6,7 @@ use gas::prelude::*; use hyper_tungstenite::tungstenite::Message; use pegboard::actor_kv; use pegboard::pubsub_subjects::GatewayReceiverSubject; +use rivet_guard_core::proxy_service::MAX_BODY_SIZE; use rivet_guard_core::websocket_handle::WebSocketReceiver; use rivet_runner_protocol::{self as protocol, PROTOCOL_MK2_VERSION, versioned}; use std::sync::{Arc, atomic::Ordering}; @@ -783,29 +784,15 @@ async fn handle_tunnel_message_mk2( ctx: &StandaloneCtx, msg: protocol::mk2::ToServerTunnelMessage, ) -> Result<()> { - // Check response body size limit for HTTP responses - if let protocol::mk2::ToServerTunnelMessageKind::ToServerResponseStart(ref resp) = - msg.message_kind - { - if let Some(ref body) = resp.body { - let max_response_body_size = - ctx.config().pegboard().runner_http_max_response_body_size(); - if body.len() > max_response_body_size { - return Err(errors::ResponseBodyTooLarge { - size: body.len(), - max_size: max_response_body_size, - } - .build()); - } - } + // Extract inner data length before consuming msg + let inner_data_len = tunnel_message_inner_data_len_mk2(&msg.message_kind); + + // Enforce incoming payload size + if inner_data_len > ctx.config().pegboard().runner_http_max_response_body_size() { + return Err(errors::WsError::InvalidPacket("payload too large".to_string()).build()); } - // Publish message to UPS let gateway_reply_to = GatewayReceiverSubject::new(msg.message_id.gateway_id).to_string(); - - // Extract inner data length before consuming msg - let inner_data_len = tunnel_message_inner_data_len(&msg.message_kind); - let msg_serialized = versioned::ToGateway::wrap_latest(protocol::mk2::ToGateway::ToServerTunnelMessage(msg)) .serialize_with_embedded_version(PROTOCOL_MK2_VERSION) @@ -817,6 +804,7 @@ async fn handle_tunnel_message_mk2( "publishing tunnel message to gateway" ); + // Publish message to UPS ctx.ups() .context("failed to get UPS instance for tunnel message")? .publish(&gateway_reply_to, &msg_serialized, PublishOpts::one()) @@ -831,22 +819,6 @@ async fn handle_tunnel_message_mk2( Ok(()) } -/// Returns the length of the inner data payload for a tunnel message kind. -fn tunnel_message_inner_data_len(kind: &protocol::mk2::ToServerTunnelMessageKind) -> usize { - use protocol::mk2::ToServerTunnelMessageKind; - match kind { - ToServerTunnelMessageKind::ToServerResponseStart(resp) => { - resp.body.as_ref().map_or(0, |b| b.len()) - } - ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => chunk.body.len(), - ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => msg.data.len(), - ToServerTunnelMessageKind::ToServerResponseAbort - | ToServerTunnelMessageKind::ToServerWebSocketOpen(_) - | ToServerTunnelMessageKind::ToServerWebSocketMessageAck(_) - | ToServerTunnelMessageKind::ToServerWebSocketClose(_) => 0, - } -} - #[tracing::instrument(skip_all)] async fn handle_tunnel_message_mk1( ctx: &StandaloneCtx, @@ -860,24 +832,17 @@ async fn handle_tunnel_message_mk1( return Ok(()); } - // Check response body size limit for HTTP responses - if let protocol::ToServerTunnelMessageKind::ToServerResponseStart(ref resp) = msg.message_kind { - if let Some(ref body) = resp.body { - let max_response_body_size = - ctx.config().pegboard().runner_http_max_response_body_size(); - if body.len() > max_response_body_size { - return Err(errors::ResponseBodyTooLarge { - size: body.len(), - max_size: max_response_body_size, - } - .build()); - } - } + // Extract inner data length before consuming msg + let inner_data_len = tunnel_message_inner_data_len_mk1(&msg.message_kind); + + // Enforce incoming payload size + if inner_data_len > ctx.config().pegboard().runner_http_max_response_body_size() { + return Err(errors::WsError::InvalidPacket("payload too large".to_string()).build()); } // Publish message to UPS let gateway_reply_to = GatewayReceiverSubject::new(msg.message_id.gateway_id).to_string(); - let msg_serialized = versioned::ToGateway::v3_to_v4(versioned::ToGateway::V3( + let msg_serialized = versioned::ToGateway::v3_to_v6(versioned::ToGateway::V3( protocol::ToGateway::ToServerTunnelMessage(msg), ))? .serialize_with_embedded_version(PROTOCOL_MK2_VERSION) @@ -896,6 +861,39 @@ async fn handle_tunnel_message_mk1( Ok(()) } +/// Returns the length of the inner data payload for a tunnel message kind. +fn tunnel_message_inner_data_len_mk2(kind: &protocol::mk2::ToServerTunnelMessageKind) -> usize { + use protocol::mk2::ToServerTunnelMessageKind; + match kind { + ToServerTunnelMessageKind::ToServerResponseStart(resp) => { + resp.body.as_ref().map_or(0, |b| b.len()) + } + ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => chunk.body.len(), + ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => msg.data.len(), + ToServerTunnelMessageKind::ToServerResponseAbort + | ToServerTunnelMessageKind::ToServerWebSocketOpen(_) + | ToServerTunnelMessageKind::ToServerWebSocketMessageAck(_) + | ToServerTunnelMessageKind::ToServerWebSocketClose(_) => 0, + } +} + +/// Returns the length of the inner data payload for a tunnel message kind. +fn tunnel_message_inner_data_len_mk1(kind: &protocol::ToServerTunnelMessageKind) -> usize { + use protocol::ToServerTunnelMessageKind; + match kind { + ToServerTunnelMessageKind::ToServerResponseStart(resp) => { + resp.body.as_ref().map_or(0, |b| b.len()) + } + ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => chunk.body.len(), + ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => msg.data.len(), + ToServerTunnelMessageKind::ToServerResponseAbort + | ToServerTunnelMessageKind::ToServerWebSocketOpen(_) + | ToServerTunnelMessageKind::ToServerWebSocketMessageAck(_) + | ToServerTunnelMessageKind::ToServerWebSocketClose(_) + | ToServerTunnelMessageKind::DeprecatedTunnelAck => 0, + } +} + /// Send ack message for deprecated tunnel versions. /// /// We have to parse as specifically a v2 message since we need the exact request & message ID diff --git a/engine/packages/pegboard/src/workflows/actor/runtime.rs b/engine/packages/pegboard/src/workflows/actor/runtime.rs index aa8f093e90..ec701bc201 100644 --- a/engine/packages/pegboard/src/workflows/actor/runtime.rs +++ b/engine/packages/pegboard/src/workflows/actor/runtime.rs @@ -8,10 +8,7 @@ use rand::prelude::SliceRandom; use rivet_runner_protocol::{ self as protocol, PROTOCOL_MK1_VERSION, PROTOCOL_MK2_VERSION, versioned, }; -use rivet_types::{ - actors::CrashPolicy, keys::namespace::runner_config::RunnerConfigVariant, - runner_configs::RunnerConfigKind, -}; +use rivet_types::{actors::CrashPolicy, keys::namespace::runner_config::RunnerConfigVariant}; use super::FailureReason; use std::time::Instant; @@ -246,21 +243,6 @@ async fn allocate_actor_v2( let crash_policy = state.crash_policy; let runner_name_selector = &state.runner_name_selector; - // Check if valid serverless config exists for the current ns + runner name - let runner_config_res = ctx - .op(crate::ops::runner_config::get::Input { - runners: vec![(namespace_id, runner_name_selector.clone())], - bypass_cache: false, - }) - .await?; - let has_valid_serverless = runner_config_res - .first() - .map(|runner| match &runner.config.kind { - RunnerConfigKind::Serverless { max_runners, .. } => *max_runners != 0, - _ => false, - }) - .unwrap_or_default(); - let runner_eligible_threshold = ctx.config().pegboard().runner_eligible_threshold(); let actor_allocation_candidate_sample_size = ctx .config() @@ -274,21 +256,40 @@ async fn allocate_actor_v2( .run(|tx| async move { let ping_threshold_ts = util::timestamp::now() - runner_eligible_threshold; - // Check if runner is an serverless runner - let for_serverless = tx - .with_subspace(namespace::keys::subspace()) - .exists( - &keys::runner_config::ByVariantKey::new( - namespace_id, - RunnerConfigVariant::Serverless, - runner_name_selector.clone(), - ), - Serializable, - ) - .await?; - let tx = tx.with_subspace(keys::subspace()); + // Check if a queue exists + let pending_actor_subspace = keys::subspace().subspace( + &keys::ns::PendingActorByRunnerNameSelectorKey::subspace( + namespace_id, + runner_name_selector.clone(), + ), + ); + + let ns_tx = tx.with_subspace(namespace::keys::subspace()); + let runner_config_variant_key = keys::runner_config::ByVariantKey::new( + namespace_id, + RunnerConfigVariant::Serverless, + runner_name_selector.clone(), + ); + let mut queue_stream = tx.get_ranges_keyvalues( + universaldb::RangeOption { + mode: StreamingMode::Exact, + limit: Some(1), + ..(&pending_actor_subspace).into() + }, + // NOTE: This is not Serializable because we don't want to conflict with other + // inserts/clears to this range + Snapshot, + ); + let (for_serverless_res, queue_exists_res) = tokio::join!( + // Check if runner is an serverless runner + ns_tx.exists(&runner_config_variant_key, Serializable), + queue_stream.next(), + ); + let for_serverless = for_serverless_res?; + let queue_exists = queue_exists_res.is_some(); + if for_serverless { tx.atomic_op( &rivet_types::keys::pegboard::ns::ServerlessDesiredSlotsKey::new( @@ -300,28 +301,6 @@ async fn allocate_actor_v2( ); } - // Check if a queue exists - let pending_actor_subspace = keys::subspace().subspace( - &keys::ns::PendingActorByRunnerNameSelectorKey::subspace( - namespace_id, - runner_name_selector.clone(), - ), - ); - let queue_exists = tx - .get_ranges_keyvalues( - universaldb::RangeOption { - mode: StreamingMode::Exact, - limit: Some(1), - ..(&pending_actor_subspace).into() - }, - // NOTE: This is not Serializable because we don't want to conflict with other - // inserts/clears to this range - Snapshot, - ) - .next() - .await - .is_some(); - if !queue_exists { let runner_alloc_subspace = keys::subspace().subspace(&keys::ns::RunnerAllocIdxKey::subspace( @@ -454,9 +433,9 @@ async fn allocate_actor_v2( // At this point in the txn there is no availability - match (crash_policy, input.force_allocate, has_valid_serverless) { + match (crash_policy, input.force_allocate, for_serverless) { (CrashPolicy::Sleep, false, false) => Ok(AllocateActorOutputV2 { - serverless: for_serverless, + serverless: false, status: AllocateActorStatus::Sleep, }), // Write the actor to the alloc queue to wait diff --git a/engine/packages/pegboard/src/workflows/serverless/conn.rs b/engine/packages/pegboard/src/workflows/serverless/conn.rs index 6646d45a2b..a442a7a685 100644 --- a/engine/packages/pegboard/src/workflows/serverless/conn.rs +++ b/engine/packages/pegboard/src/workflows/serverless/conn.rs @@ -25,8 +25,6 @@ const X_RIVET_TOTAL_SLOTS: HeaderName = HeaderName::from_static("x-rivet-total-s const X_RIVET_RUNNER_NAME: HeaderName = HeaderName::from_static("x-rivet-runner-name"); const X_RIVET_NAMESPACE_NAME: HeaderName = HeaderName::from_static("x-rivet-namespace-name"); -const DRAIN_GRACE_PERIOD: Duration = Duration::from_secs(10); - #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Input { pub pool_wf_id: Id, @@ -412,8 +410,9 @@ async fn outbound_req_inner( .with_label_values(&[&input.namespace_id.to_string(), &input.runner_name]) .inc(); - let sleep_until_drain = - Duration::from_secs(request_lifespan as u64).saturating_sub(DRAIN_GRACE_PERIOD); + let sleep_until_drain = Duration::from_secs(request_lifespan as u64).saturating_sub( + Duration::from_millis(ctx.config().pegboard().serverless_drain_grace_period()), + ); tokio::select! { res = stream_handler => { match res { @@ -518,7 +517,7 @@ async fn finish_non_critical_draining( // Wait for runner to shut down tokio::select! { res = wait_for_shutdown_fut => return res.map_err(Into::into), - _ = tokio::time::sleep(DRAIN_GRACE_PERIOD) => { + _ = tokio::time::sleep(Duration::from_millis(ctx.config().pegboard().serverless_drain_grace_period())) => { tracing::debug!(?runner_id, "reached drain grace period before runner shut down") } _ = term_signal.recv() => {} diff --git a/engine/sdks/rust/runner-protocol/src/lib.rs b/engine/sdks/rust/runner-protocol/src/lib.rs index 2b6ddcdb65..befa759806 100644 --- a/engine/sdks/rust/runner-protocol/src/lib.rs +++ b/engine/sdks/rust/runner-protocol/src/lib.rs @@ -6,10 +6,10 @@ pub mod versioned; // Re-export latest pub use generated::v3::*; -pub use generated::v5 as mk2; +pub use generated::v6 as mk2; pub const PROTOCOL_MK1_VERSION: u16 = 3; -pub const PROTOCOL_MK2_VERSION: u16 = 5; +pub const PROTOCOL_MK2_VERSION: u16 = 6; pub fn is_mk2(protocol_version: u16) -> bool { protocol_version > PROTOCOL_MK1_VERSION diff --git a/engine/sdks/rust/runner-protocol/src/versioned.rs b/engine/sdks/rust/runner-protocol/src/versioned.rs index 1f440010c6..f3cca5d099 100644 --- a/engine/sdks/rust/runner-protocol/src/versioned.rs +++ b/engine/sdks/rust/runner-protocol/src/versioned.rs @@ -2,23 +2,24 @@ use anyhow::{Ok, Result, bail}; use vbare::OwnedVersionedData; use crate::PROTOCOL_MK1_VERSION; -use crate::generated::{v1, v2, v3, v4, v5}; +use crate::generated::{v1, v2, v3, v4, v5, v6}; use crate::uuid_compat::{decode_bytes_from_uuid, encode_bytes_to_uuid}; pub enum ToClientMk2 { V4(v4::ToClient), V5(v5::ToClient), + V6(v6::ToClient), } impl OwnedVersionedData for ToClientMk2 { - type Latest = v5::ToClient; + type Latest = v6::ToClient; - fn wrap_latest(latest: v5::ToClient) -> Self { - ToClientMk2::V5(latest) + fn wrap_latest(latest: v6::ToClient) -> Self { + ToClientMk2::V6(latest) } fn unwrap_latest(self) -> Result { - if let ToClientMk2::V5(data) = self { + if let ToClientMk2::V6(data) = self { Ok(data) } else { bail!("version not latest"); @@ -29,6 +30,7 @@ impl OwnedVersionedData for ToClientMk2 { match version { 4 => Ok(ToClientMk2::V4(serde_bare::from_slice(payload)?)), 5 => Ok(ToClientMk2::V5(serde_bare::from_slice(payload)?)), + 6 => Ok(ToClientMk2::V6(serde_bare::from_slice(payload)?)), _ => bail!("invalid version: {version}"), } } @@ -37,17 +39,18 @@ impl OwnedVersionedData for ToClientMk2 { match self { ToClientMk2::V4(data) => serde_bare::to_vec(&data).map_err(Into::into), ToClientMk2::V5(data) => serde_bare::to_vec(&data).map_err(Into::into), + ToClientMk2::V6(data) => serde_bare::to_vec(&data).map_err(Into::into), } } fn deserialize_converters() -> Vec Result> { // No changes between v1 and v4 - vec![Ok, Ok, Ok, Self::v4_to_v5] + vec![Ok, Ok, Ok, Self::v4_to_v5, Self::v5_to_v6] } fn serialize_converters() -> Vec Result> { // No changes between v1 and v4 - vec![Self::v5_to_v4, Ok, Ok, Ok] + vec![Self::v6_to_v5, Self::v5_to_v4, Ok, Ok, Ok] } } @@ -225,22 +228,192 @@ impl ToClientMk2 { bail!("unexpected version"); } } + + fn v5_to_v6(self) -> Result { + if let ToClientMk2::V5(x) = self { + let inner = match x { + v5::ToClient::ToClientInit(init) => v6::ToClient::ToClientInit(v6::ToClientInit { + runner_id: init.runner_id, + metadata: v6::ProtocolMetadata { + runner_lost_threshold: init.metadata.runner_lost_threshold, + actor_stop_threshold: 0, + serverless_drain_grace_period: None, + }, + }), + v5::ToClient::ToClientCommands(commands) => v6::ToClient::ToClientCommands( + commands + .into_iter() + .map(|cmd| v6::CommandWrapper { + checkpoint: v6::ActorCheckpoint { + actor_id: cmd.checkpoint.actor_id, + generation: cmd.checkpoint.generation, + index: cmd.checkpoint.index, + }, + inner: match cmd.inner { + v5::Command::CommandStartActor(start) => { + v6::Command::CommandStartActor(v6::CommandStartActor { + config: v6::ActorConfig { + name: start.config.name, + key: start.config.key, + create_ts: start.config.create_ts, + input: start.config.input, + }, + hibernating_requests: start + .hibernating_requests + .into_iter() + .map(|req| v6::HibernatingRequest { + gateway_id: req.gateway_id, + request_id: req.request_id, + }) + .collect(), + }) + } + v5::Command::CommandStopActor => v6::Command::CommandStopActor, + }, + }) + .collect(), + ), + v5::ToClient::ToClientAckEvents(ack) => { + v6::ToClient::ToClientAckEvents(v6::ToClientAckEvents { + last_event_checkpoints: ack + .last_event_checkpoints + .into_iter() + .map(|cp| v6::ActorCheckpoint { + actor_id: cp.actor_id, + generation: cp.generation, + index: cp.index, + }) + .collect(), + }) + } + v5::ToClient::ToClientKvResponse(resp) => { + v6::ToClient::ToClientKvResponse(v6::ToClientKvResponse { + request_id: resp.request_id, + data: convert_kv_response_data_v5_to_v6(resp.data), + }) + } + v5::ToClient::ToClientTunnelMessage(msg) => { + v6::ToClient::ToClientTunnelMessage(v6::ToClientTunnelMessage { + message_id: v6::MessageId { + gateway_id: msg.message_id.gateway_id, + request_id: msg.message_id.request_id, + message_index: msg.message_id.message_index, + }, + message_kind: convert_to_client_tunnel_message_kind_v5_to_v6( + msg.message_kind, + ), + }) + } + v5::ToClient::ToClientPing(ping) => { + v6::ToClient::ToClientPing(v6::ToClientPing { ts: ping.ts }) + } + }; + + Ok(ToClientMk2::V6(inner)) + } else { + bail!("unexpected version"); + } + } + + fn v6_to_v5(self) -> Result { + if let ToClientMk2::V6(x) = self { + let inner = match x { + v6::ToClient::ToClientInit(init) => v5::ToClient::ToClientInit(v5::ToClientInit { + runner_id: init.runner_id, + metadata: v5::ProtocolMetadata { + runner_lost_threshold: init.metadata.runner_lost_threshold, + }, + }), + v6::ToClient::ToClientCommands(commands) => v5::ToClient::ToClientCommands( + commands + .into_iter() + .map(|cmd| v5::CommandWrapper { + checkpoint: v5::ActorCheckpoint { + actor_id: cmd.checkpoint.actor_id, + generation: cmd.checkpoint.generation, + index: cmd.checkpoint.index, + }, + inner: match cmd.inner { + v6::Command::CommandStartActor(start) => { + v5::Command::CommandStartActor(v5::CommandStartActor { + config: v5::ActorConfig { + name: start.config.name, + key: start.config.key, + create_ts: start.config.create_ts, + input: start.config.input, + }, + hibernating_requests: start + .hibernating_requests + .into_iter() + .map(|req| v5::HibernatingRequest { + gateway_id: req.gateway_id, + request_id: req.request_id, + }) + .collect(), + }) + } + v6::Command::CommandStopActor => v5::Command::CommandStopActor, + }, + }) + .collect(), + ), + v6::ToClient::ToClientAckEvents(ack) => { + v5::ToClient::ToClientAckEvents(v5::ToClientAckEvents { + last_event_checkpoints: ack + .last_event_checkpoints + .into_iter() + .map(|cp| v5::ActorCheckpoint { + actor_id: cp.actor_id, + generation: cp.generation, + index: cp.index, + }) + .collect(), + }) + } + v6::ToClient::ToClientKvResponse(resp) => { + v5::ToClient::ToClientKvResponse(v5::ToClientKvResponse { + request_id: resp.request_id, + data: convert_kv_response_data_v6_to_v5(resp.data), + }) + } + v6::ToClient::ToClientTunnelMessage(msg) => { + v5::ToClient::ToClientTunnelMessage(v5::ToClientTunnelMessage { + message_id: v5::MessageId { + gateway_id: msg.message_id.gateway_id, + request_id: msg.message_id.request_id, + message_index: msg.message_id.message_index, + }, + message_kind: convert_to_client_tunnel_message_kind_v6_to_v5( + msg.message_kind, + ), + }) + } + v6::ToClient::ToClientPing(ping) => { + v5::ToClient::ToClientPing(v5::ToClientPing { ts: ping.ts }) + } + }; + + Ok(ToClientMk2::V5(inner)) + } else { + bail!("unexpected version"); + } + } } pub enum ToServerMk2 { V4(v4::ToServer), - V5(v5::ToServer), + V6(v6::ToServer), } impl OwnedVersionedData for ToServerMk2 { - type Latest = v5::ToServer; + type Latest = v6::ToServer; - fn wrap_latest(latest: v5::ToServer) -> Self { - ToServerMk2::V5(latest) + fn wrap_latest(latest: v6::ToServer) -> Self { + ToServerMk2::V6(latest) } fn unwrap_latest(self) -> Result { - if let ToServerMk2::V5(data) = self { + if let ToServerMk2::V6(data) = self { Ok(data) } else { bail!("version not latest"); @@ -250,7 +423,8 @@ impl OwnedVersionedData for ToServerMk2 { fn deserialize_version(payload: &[u8], version: u16) -> Result { match version { 4 => Ok(ToServerMk2::V4(serde_bare::from_slice(payload)?)), - 5 => Ok(ToServerMk2::V5(serde_bare::from_slice(payload)?)), + // v5 and v6 have the same ToServer binary format + 5 | 6 => Ok(ToServerMk2::V6(serde_bare::from_slice(payload)?)), _ => bail!("invalid version: {version}"), } } @@ -258,26 +432,26 @@ impl OwnedVersionedData for ToServerMk2 { fn serialize_version(self, _version: u16) -> Result> { match self { ToServerMk2::V4(data) => serde_bare::to_vec(&data).map_err(Into::into), - ToServerMk2::V5(data) => serde_bare::to_vec(&data).map_err(Into::into), + ToServerMk2::V6(data) => serde_bare::to_vec(&data).map_err(Into::into), } } fn deserialize_converters() -> Vec Result> { - // No changes between v1 and v4 - vec![Ok, Ok, Ok, Self::v4_to_v5] + // No changes between v1 and v4, no changes between v5 and v6 + vec![Ok, Ok, Ok, Self::v4_to_v6, Ok] } fn serialize_converters() -> Vec Result> { - // No changes between v1 and v4 - vec![Self::v5_to_v4, Ok, Ok, Ok] + // No changes between v1 and v4, no changes between v5 and v6 + vec![Ok, Self::v6_to_v4, Ok, Ok, Ok] } } impl ToServerMk2 { - fn v4_to_v5(self) -> Result { + fn v4_to_v6(self) -> Result { if let ToServerMk2::V4(x) = self { let inner = match x { - v4::ToServer::ToServerInit(init) => v5::ToServer::ToServerInit(v5::ToServerInit { + v4::ToServer::ToServerInit(init) => v6::ToServer::ToServerInit(v6::ToServerInit { name: init.name, version: init.version, total_slots: init.total_slots, @@ -286,7 +460,7 @@ impl ToServerMk2 { .map(|(k, v)| { ( k, - v5::ActorName { + v6::ActorName { metadata: v.metadata, }, ) @@ -295,7 +469,7 @@ impl ToServerMk2 { }), metadata: init.metadata, }), - v4::ToServer::ToServerEvents(events) => v5::ToServer::ToServerEvents( + v4::ToServer::ToServerEvents(events) => v6::ToServer::ToServerEvents( events .into_iter() .map(|event| { @@ -305,27 +479,27 @@ impl ToServerMk2 { v4::Event::EventActorSetAlarm(alarm) => alarm.generation, }; - v5::EventWrapper { - checkpoint: v5::ActorCheckpoint { + v6::EventWrapper { + checkpoint: v6::ActorCheckpoint { actor_id: event.checkpoint.actor_id, generation, index: event.checkpoint.index, }, inner: match event.inner { v4::Event::EventActorIntent(intent) => { - v5::Event::EventActorIntent(v5::EventActorIntent { - intent: convert_actor_intent_v4_to_v5(intent.intent), + v6::Event::EventActorIntent(v6::EventActorIntent { + intent: convert_actor_intent_v4_to_v6(intent.intent), }) } v4::Event::EventActorStateUpdate(state) => { - v5::Event::EventActorStateUpdate( - v5::EventActorStateUpdate { - state: convert_actor_state_v4_to_v5(state.state), + v6::Event::EventActorStateUpdate( + v6::EventActorStateUpdate { + state: convert_actor_state_v4_to_v6(state.state), }, ) } v4::Event::EventActorSetAlarm(alarm) => { - v5::Event::EventActorSetAlarm(v5::EventActorSetAlarm { + v6::Event::EventActorSetAlarm(v6::EventActorSetAlarm { alarm_ts: alarm.alarm_ts, }) } @@ -335,11 +509,11 @@ impl ToServerMk2 { .collect(), ), v4::ToServer::ToServerAckCommands(ack) => { - v5::ToServer::ToServerAckCommands(v5::ToServerAckCommands { + v6::ToServer::ToServerAckCommands(v6::ToServerAckCommands { last_command_checkpoints: ack .last_command_checkpoints .into_iter() - .map(|cp| v5::ActorCheckpoint { + .map(|cp| v6::ActorCheckpoint { actor_id: cp.actor_id, generation: 0, // Unknown in v4, use default index: cp.index, @@ -347,41 +521,41 @@ impl ToServerMk2 { .collect(), }) } - v4::ToServer::ToServerStopping => v5::ToServer::ToServerStopping, + v4::ToServer::ToServerStopping => v6::ToServer::ToServerStopping, v4::ToServer::ToServerPong(pong) => { - v5::ToServer::ToServerPong(v5::ToServerPong { ts: pong.ts }) + v6::ToServer::ToServerPong(v6::ToServerPong { ts: pong.ts }) } v4::ToServer::ToServerKvRequest(req) => { - v5::ToServer::ToServerKvRequest(v5::ToServerKvRequest { + v6::ToServer::ToServerKvRequest(v6::ToServerKvRequest { actor_id: req.actor_id, request_id: req.request_id, - data: convert_kv_request_data_v4_to_v5(req.data), + data: convert_kv_request_data_v4_to_v6(req.data), }) } v4::ToServer::ToServerTunnelMessage(msg) => { - v5::ToServer::ToServerTunnelMessage(v5::ToServerTunnelMessage { - message_id: v5::MessageId { + v6::ToServer::ToServerTunnelMessage(v6::ToServerTunnelMessage { + message_id: v6::MessageId { gateway_id: msg.message_id.gateway_id, request_id: msg.message_id.request_id, message_index: msg.message_id.message_index, }, - message_kind: convert_to_server_tunnel_message_kind_v4_to_v5( + message_kind: convert_to_server_tunnel_message_kind_v4_to_v6( msg.message_kind, ), }) } }; - Ok(ToServerMk2::V5(inner)) + Ok(ToServerMk2::V6(inner)) } else { bail!("unexpected version"); } } - fn v5_to_v4(self) -> Result { - if let ToServerMk2::V5(x) = self { + fn v6_to_v4(self) -> Result { + if let ToServerMk2::V6(x) = self { let inner = match x { - v5::ToServer::ToServerInit(init) => v4::ToServer::ToServerInit(v4::ToServerInit { + v6::ToServer::ToServerInit(init) => v4::ToServer::ToServerInit(v4::ToServerInit { name: init.name, version: init.version, total_slots: init.total_slots, @@ -399,7 +573,7 @@ impl ToServerMk2 { }), metadata: init.metadata, }), - v5::ToServer::ToServerEvents(events) => v4::ToServer::ToServerEvents( + v6::ToServer::ToServerEvents(events) => v4::ToServer::ToServerEvents( events .into_iter() .map(|event| v4::EventWrapper { @@ -408,21 +582,21 @@ impl ToServerMk2 { index: event.checkpoint.index, }, inner: match event.inner { - v5::Event::EventActorIntent(intent) => { + v6::Event::EventActorIntent(intent) => { v4::Event::EventActorIntent(v4::EventActorIntent { actor_id: event.checkpoint.actor_id, generation: event.checkpoint.generation, - intent: convert_actor_intent_v5_to_v4(intent.intent), + intent: convert_actor_intent_v6_to_v4(intent.intent), }) } - v5::Event::EventActorStateUpdate(state) => { + v6::Event::EventActorStateUpdate(state) => { v4::Event::EventActorStateUpdate(v4::EventActorStateUpdate { actor_id: event.checkpoint.actor_id, generation: event.checkpoint.generation, - state: convert_actor_state_v5_to_v4(state.state), + state: convert_actor_state_v6_to_v4(state.state), }) } - v5::Event::EventActorSetAlarm(alarm) => { + v6::Event::EventActorSetAlarm(alarm) => { v4::Event::EventActorSetAlarm(v4::EventActorSetAlarm { actor_id: event.checkpoint.actor_id, generation: event.checkpoint.generation, @@ -433,7 +607,7 @@ impl ToServerMk2 { }) .collect(), ), - v5::ToServer::ToServerAckCommands(ack) => { + v6::ToServer::ToServerAckCommands(ack) => { v4::ToServer::ToServerAckCommands(v4::ToServerAckCommands { last_command_checkpoints: ack .last_command_checkpoints @@ -445,25 +619,25 @@ impl ToServerMk2 { .collect(), }) } - v5::ToServer::ToServerStopping => v4::ToServer::ToServerStopping, - v5::ToServer::ToServerPong(pong) => { + v6::ToServer::ToServerStopping => v4::ToServer::ToServerStopping, + v6::ToServer::ToServerPong(pong) => { v4::ToServer::ToServerPong(v4::ToServerPong { ts: pong.ts }) } - v5::ToServer::ToServerKvRequest(req) => { + v6::ToServer::ToServerKvRequest(req) => { v4::ToServer::ToServerKvRequest(v4::ToServerKvRequest { actor_id: req.actor_id, request_id: req.request_id, - data: convert_kv_request_data_v5_to_v4(req.data), + data: convert_kv_request_data_v6_to_v4(req.data), }) } - v5::ToServer::ToServerTunnelMessage(msg) => { + v6::ToServer::ToServerTunnelMessage(msg) => { v4::ToServer::ToServerTunnelMessage(v4::ToServerTunnelMessage { message_id: v4::MessageId { gateway_id: msg.message_id.gateway_id, request_id: msg.message_id.request_id, message_index: msg.message_id.message_index, }, - message_kind: convert_to_server_tunnel_message_kind_v5_to_v4( + message_kind: convert_to_server_tunnel_message_kind_v6_to_v4( msg.message_kind, )?, }) @@ -479,18 +653,18 @@ impl ToServerMk2 { pub enum ToRunnerMk2 { V4(v4::ToRunner), - V5(v5::ToRunner), + V6(v6::ToRunner), } impl OwnedVersionedData for ToRunnerMk2 { - type Latest = v5::ToRunner; + type Latest = v6::ToRunner; - fn wrap_latest(latest: v5::ToRunner) -> Self { - ToRunnerMk2::V5(latest) + fn wrap_latest(latest: v6::ToRunner) -> Self { + ToRunnerMk2::V6(latest) } fn unwrap_latest(self) -> Result { - if let ToRunnerMk2::V5(data) = self { + if let ToRunnerMk2::V6(data) = self { Ok(data) } else { bail!("version not latest"); @@ -500,7 +674,8 @@ impl OwnedVersionedData for ToRunnerMk2 { fn deserialize_version(payload: &[u8], version: u16) -> Result { match version { 4 => Ok(ToRunnerMk2::V4(serde_bare::from_slice(payload)?)), - 5 => Ok(ToRunnerMk2::V5(serde_bare::from_slice(payload)?)), + // v5 and v6 have the same ToRunner binary format + 5 | 6 => Ok(ToRunnerMk2::V6(serde_bare::from_slice(payload)?)), _ => bail!("invalid version: {version}"), } } @@ -508,36 +683,36 @@ impl OwnedVersionedData for ToRunnerMk2 { fn serialize_version(self, _version: u16) -> Result> { match self { ToRunnerMk2::V4(data) => serde_bare::to_vec(&data).map_err(Into::into), - ToRunnerMk2::V5(data) => serde_bare::to_vec(&data).map_err(Into::into), + ToRunnerMk2::V6(data) => serde_bare::to_vec(&data).map_err(Into::into), } } fn deserialize_converters() -> Vec Result> { - // No changes between v1 and v4 - vec![Ok, Ok, Ok, Self::v4_to_v5] + // No changes between v1 and v4, no changes between v5 and v6 + vec![Ok, Ok, Ok, Self::v4_to_v6, Ok] } fn serialize_converters() -> Vec Result> { - // No changes between v1 and v4 - vec![Self::v5_to_v4, Ok, Ok, Ok] + // No changes between v1 and v4, no changes between v5 and v6 + vec![Ok, Self::v6_to_v4, Ok, Ok, Ok] } } impl ToRunnerMk2 { - fn v4_to_v5(self) -> Result { + fn v4_to_v6(self) -> Result { if let ToRunnerMk2::V4(x) = self { let inner = match x { - v4::ToRunner::ToRunnerPing(ping) => v5::ToRunner::ToRunnerPing(v5::ToRunnerPing { + v4::ToRunner::ToRunnerPing(ping) => v6::ToRunner::ToRunnerPing(v6::ToRunnerPing { gateway_id: ping.gateway_id, request_id: ping.request_id, ts: ping.ts, }), - v4::ToRunner::ToRunnerClose => v5::ToRunner::ToRunnerClose, - v4::ToRunner::ToClientCommands(commands) => v5::ToRunner::ToClientCommands( + v4::ToRunner::ToRunnerClose => v6::ToRunner::ToRunnerClose, + v4::ToRunner::ToClientCommands(commands) => v6::ToRunner::ToClientCommands( commands .into_iter() - .map(|cmd| v5::CommandWrapper { - checkpoint: v5::ActorCheckpoint { + .map(|cmd| v6::CommandWrapper { + checkpoint: v6::ActorCheckpoint { actor_id: cmd.checkpoint.actor_id, generation: match &cmd.inner { v4::Command::CommandStartActor(start) => start.generation, @@ -547,8 +722,8 @@ impl ToRunnerMk2 { }, inner: match cmd.inner { v4::Command::CommandStartActor(start) => { - v5::Command::CommandStartActor(v5::CommandStartActor { - config: v5::ActorConfig { + v6::Command::CommandStartActor(v6::CommandStartActor { + config: v6::ActorConfig { name: start.config.name, key: start.config.key, create_ts: start.config.create_ts, @@ -557,24 +732,24 @@ impl ToRunnerMk2 { hibernating_requests: start .hibernating_requests .into_iter() - .map(|req| v5::HibernatingRequest { + .map(|req| v6::HibernatingRequest { gateway_id: req.gateway_id, request_id: req.request_id, }) .collect(), }) } - v4::Command::CommandStopActor(_) => v5::Command::CommandStopActor, + v4::Command::CommandStopActor(_) => v6::Command::CommandStopActor, }, }) .collect(), ), v4::ToRunner::ToClientAckEvents(ack) => { - v5::ToRunner::ToClientAckEvents(v5::ToClientAckEvents { + v6::ToRunner::ToClientAckEvents(v6::ToClientAckEvents { last_event_checkpoints: ack .last_event_checkpoints .into_iter() - .map(|cp| v5::ActorCheckpoint { + .map(|cp| v6::ActorCheckpoint { actor_id: cp.actor_id, generation: 0, // Unknown in v4, use default index: cp.index, @@ -583,35 +758,35 @@ impl ToRunnerMk2 { }) } v4::ToRunner::ToClientTunnelMessage(msg) => { - v5::ToRunner::ToClientTunnelMessage(v5::ToClientTunnelMessage { - message_id: v5::MessageId { + v6::ToRunner::ToClientTunnelMessage(v6::ToClientTunnelMessage { + message_id: v6::MessageId { gateway_id: msg.message_id.gateway_id, request_id: msg.message_id.request_id, message_index: msg.message_id.message_index, }, - message_kind: convert_to_client_tunnel_message_kind_v4_to_v5( + message_kind: convert_to_client_tunnel_message_kind_v4_to_v6( msg.message_kind, ), }) } }; - Ok(ToRunnerMk2::V5(inner)) + Ok(ToRunnerMk2::V6(inner)) } else { bail!("unexpected version"); } } - fn v5_to_v4(self) -> Result { - if let ToRunnerMk2::V5(x) = self { + fn v6_to_v4(self) -> Result { + if let ToRunnerMk2::V6(x) = self { let inner = match x { - v5::ToRunner::ToRunnerPing(ping) => v4::ToRunner::ToRunnerPing(v4::ToRunnerPing { + v6::ToRunner::ToRunnerPing(ping) => v4::ToRunner::ToRunnerPing(v4::ToRunnerPing { gateway_id: ping.gateway_id, request_id: ping.request_id, ts: ping.ts, }), - v5::ToRunner::ToRunnerClose => v4::ToRunner::ToRunnerClose, - v5::ToRunner::ToClientCommands(commands) => v4::ToRunner::ToClientCommands( + v6::ToRunner::ToRunnerClose => v4::ToRunner::ToRunnerClose, + v6::ToRunner::ToClientCommands(commands) => v4::ToRunner::ToClientCommands( commands .into_iter() .map(|cmd| v4::CommandWrapper { @@ -620,7 +795,7 @@ impl ToRunnerMk2 { index: cmd.checkpoint.index, }, inner: match cmd.inner { - v5::Command::CommandStartActor(start) => { + v6::Command::CommandStartActor(start) => { v4::Command::CommandStartActor(v4::CommandStartActor { generation: cmd.checkpoint.generation, config: v4::ActorConfig { @@ -639,7 +814,7 @@ impl ToRunnerMk2 { .collect(), }) } - v5::Command::CommandStopActor => { + v6::Command::CommandStopActor => { v4::Command::CommandStopActor(v4::CommandStopActor { generation: cmd.checkpoint.generation, }) @@ -648,7 +823,7 @@ impl ToRunnerMk2 { }) .collect(), ), - v5::ToRunner::ToClientAckEvents(ack) => { + v6::ToRunner::ToClientAckEvents(ack) => { v4::ToRunner::ToClientAckEvents(v4::ToClientAckEvents { last_event_checkpoints: ack .last_event_checkpoints @@ -660,14 +835,14 @@ impl ToRunnerMk2 { .collect(), }) } - v5::ToRunner::ToClientTunnelMessage(msg) => { + v6::ToRunner::ToClientTunnelMessage(msg) => { v4::ToRunner::ToClientTunnelMessage(v4::ToClientTunnelMessage { message_id: v4::MessageId { gateway_id: msg.message_id.gateway_id, request_id: msg.message_id.request_id, message_index: msg.message_id.message_index, }, - message_kind: convert_to_client_tunnel_message_kind_v5_to_v4( + message_kind: convert_to_client_tunnel_message_kind_v6_to_v4( msg.message_kind, ), }) @@ -1411,19 +1586,19 @@ impl OwnedVersionedData for ToRunner { pub enum ToGateway { V3(v3::ToGateway), - V5(v5::ToGateway), + V6(v6::ToGateway), } impl OwnedVersionedData for ToGateway { - type Latest = v5::ToGateway; + type Latest = v6::ToGateway; - fn wrap_latest(latest: v5::ToGateway) -> Self { - ToGateway::V5(latest) + fn wrap_latest(latest: v6::ToGateway) -> Self { + ToGateway::V6(latest) } fn unwrap_latest(self) -> Result { #[allow(irrefutable_let_patterns)] - if let ToGateway::V5(data) = self { + if let ToGateway::V6(data) = self { Ok(data) } else { bail!("version not latest"); @@ -1433,7 +1608,8 @@ impl OwnedVersionedData for ToGateway { fn deserialize_version(payload: &[u8], version: u16) -> Result { match version { 1 | 2 | 3 => Ok(ToGateway::V3(serde_bare::from_slice(payload)?)), - 4 | 5 => Ok(ToGateway::V5(serde_bare::from_slice(payload)?)), + // v4, v5, and v6 have the same ToGateway binary format + 4 | 5 | 6 => Ok(ToGateway::V6(serde_bare::from_slice(payload)?)), _ => bail!("invalid version: {version}"), } } @@ -1441,34 +1617,34 @@ impl OwnedVersionedData for ToGateway { fn serialize_version(self, _version: u16) -> Result> { match self { ToGateway::V3(data) => serde_bare::to_vec(&data).map_err(Into::into), - ToGateway::V5(data) => serde_bare::to_vec(&data).map_err(Into::into), + ToGateway::V6(data) => serde_bare::to_vec(&data).map_err(Into::into), } } fn deserialize_converters() -> Vec Result> { - // No changes between v1-v5 but we need a converter to bridge mk1 to mk2 - vec![Ok, Ok, Self::v3_to_v4, Ok] + // No changes between v1-v6 but we need a converter to bridge mk1 to mk2 + vec![Ok, Ok, Self::v3_to_v6, Ok, Ok] } fn serialize_converters() -> Vec Result> { - // No changes between v1-v5 but we need a converter to bridge mk2 to mk1 - vec![Ok, Self::v4_to_v3, Ok, Ok] + // No changes between v1-v6 but we need a converter to bridge mk2 to mk1 + vec![Ok, Ok, Self::v6_to_v3, Ok, Ok] } } impl ToGateway { - pub fn v3_to_v4(self) -> Result { + pub fn v3_to_v6(self) -> Result { if let ToGateway::V3(x) = self { let inner = match x { v3::ToGateway::ToGatewayPong(pong) => { - v5::ToGateway::ToGatewayPong(v5::ToGatewayPong { + v6::ToGateway::ToGatewayPong(v6::ToGatewayPong { request_id: pong.request_id, ts: pong.ts, }) } v3::ToGateway::ToServerTunnelMessage(msg) => { - v5::ToGateway::ToServerTunnelMessage(v5::ToServerTunnelMessage { - message_id: v5::MessageId { + v6::ToGateway::ToServerTunnelMessage(v6::ToServerTunnelMessage { + message_id: v6::MessageId { gateway_id: msg.message_id.gateway_id, request_id: msg.message_id.request_id, message_index: msg.message_id.message_index, @@ -1480,22 +1656,22 @@ impl ToGateway { } }; - Ok(ToGateway::V5(inner)) + Ok(ToGateway::V6(inner)) } else { bail!("unexpected version"); } } - fn v4_to_v3(self) -> Result { - if let ToGateway::V5(x) = self { + fn v6_to_v3(self) -> Result { + if let ToGateway::V6(x) = self { let inner = match x { - v5::ToGateway::ToGatewayPong(pong) => { + v6::ToGateway::ToGatewayPong(pong) => { v3::ToGateway::ToGatewayPong(v3::ToGatewayPong { request_id: pong.request_id, ts: pong.ts, }) } - v5::ToGateway::ToServerTunnelMessage(msg) => { + v6::ToGateway::ToServerTunnelMessage(msg) => { v3::ToGateway::ToServerTunnelMessage(v3::ToServerTunnelMessage { message_id: v3::MessageId { gateway_id: msg.message_id.gateway_id, @@ -1518,19 +1694,19 @@ impl ToGateway { pub enum ToServerlessServer { V3(v3::ToServerlessServer), - V5(v5::ToServerlessServer), + V6(v6::ToServerlessServer), } impl OwnedVersionedData for ToServerlessServer { - type Latest = v5::ToServerlessServer; + type Latest = v6::ToServerlessServer; - fn wrap_latest(latest: v5::ToServerlessServer) -> Self { - ToServerlessServer::V5(latest) + fn wrap_latest(latest: v6::ToServerlessServer) -> Self { + ToServerlessServer::V6(latest) } fn unwrap_latest(self) -> Result { #[allow(irrefutable_let_patterns)] - if let ToServerlessServer::V5(data) = self { + if let ToServerlessServer::V6(data) = self { Ok(data) } else { bail!("version not latest"); @@ -1540,7 +1716,8 @@ impl OwnedVersionedData for ToServerlessServer { fn deserialize_version(payload: &[u8], version: u16) -> Result { match version { 1 | 2 | 3 => Ok(ToServerlessServer::V3(serde_bare::from_slice(payload)?)), - 4 | 5 => Ok(ToServerlessServer::V5(serde_bare::from_slice(payload)?)), + // v4, v5, and v6 have the same ToServerlessServer binary format + 4 | 5 | 6 => Ok(ToServerlessServer::V6(serde_bare::from_slice(payload)?)), _ => bail!("invalid version: {version}"), } } @@ -1548,43 +1725,43 @@ impl OwnedVersionedData for ToServerlessServer { fn serialize_version(self, _version: u16) -> Result> { match self { ToServerlessServer::V3(data) => serde_bare::to_vec(&data).map_err(Into::into), - ToServerlessServer::V5(data) => serde_bare::to_vec(&data).map_err(Into::into), + ToServerlessServer::V6(data) => serde_bare::to_vec(&data).map_err(Into::into), } } fn deserialize_converters() -> Vec Result> { - // No changes between v1-v3, v4-v5 - vec![Ok, Ok, Self::v3_to_v4, Ok] + // No changes between v1-v3, v4-v6 + vec![Ok, Ok, Self::v3_to_v6, Ok, Ok] } fn serialize_converters() -> Vec Result> { - // No changes between v1-v3, v4-v5 - vec![Ok, Self::v4_to_v3, Ok, Ok] + // No changes between v1-v3, v4-v6 + vec![Ok, Ok, Self::v6_to_v3, Ok, Ok] } } impl ToServerlessServer { - fn v3_to_v4(self) -> Result { + fn v3_to_v6(self) -> Result { if let ToServerlessServer::V3(x) = self { let inner = match x { v3::ToServerlessServer::ToServerlessServerInit(init) => { - v5::ToServerlessServer::ToServerlessServerInit(v5::ToServerlessServerInit { + v6::ToServerlessServer::ToServerlessServerInit(v6::ToServerlessServerInit { runner_id: init.runner_id, runner_protocol_version: PROTOCOL_MK1_VERSION, }) } }; - Ok(ToServerlessServer::V5(inner)) + Ok(ToServerlessServer::V6(inner)) } else { bail!("unexpected version"); } } - fn v4_to_v3(self) -> Result { - if let ToServerlessServer::V5(x) = self { + fn v6_to_v3(self) -> Result { + if let ToServerlessServer::V6(x) = self { let inner = match x { - v5::ToServerlessServer::ToServerlessServerInit(init) => { + v6::ToServerlessServer::ToServerlessServerInit(init) => { v3::ToServerlessServer::ToServerlessServerInit(v3::ToServerlessServerInit { runner_id: init.runner_id, }) @@ -1600,18 +1777,18 @@ impl ToServerlessServer { pub enum ActorCommandKeyData { V4(v4::ActorCommandKeyData), - V5(v5::ActorCommandKeyData), + V6(v6::ActorCommandKeyData), } impl OwnedVersionedData for ActorCommandKeyData { - type Latest = v5::ActorCommandKeyData; + type Latest = v6::ActorCommandKeyData; - fn wrap_latest(latest: v5::ActorCommandKeyData) -> Self { - ActorCommandKeyData::V5(latest) + fn wrap_latest(latest: v6::ActorCommandKeyData) -> Self { + ActorCommandKeyData::V6(latest) } fn unwrap_latest(self) -> Result { - if let ActorCommandKeyData::V5(data) = self { + if let ActorCommandKeyData::V6(data) = self { Ok(data) } else { bail!("version not latest"); @@ -1621,7 +1798,8 @@ impl OwnedVersionedData for ActorCommandKeyData { fn deserialize_version(payload: &[u8], version: u16) -> Result { match version { 4 => Ok(ActorCommandKeyData::V4(serde_bare::from_slice(payload)?)), - 5 => Ok(ActorCommandKeyData::V5(serde_bare::from_slice(payload)?)), + // v5 and v6 have the same ActorCommandKeyData binary format + 5 | 6 => Ok(ActorCommandKeyData::V6(serde_bare::from_slice(payload)?)), _ => bail!("invalid version: {version}"), } } @@ -1629,28 +1807,28 @@ impl OwnedVersionedData for ActorCommandKeyData { fn serialize_version(self, _version: u16) -> Result> { match self { ActorCommandKeyData::V4(data) => serde_bare::to_vec(&data).map_err(Into::into), - ActorCommandKeyData::V5(data) => serde_bare::to_vec(&data).map_err(Into::into), + ActorCommandKeyData::V6(data) => serde_bare::to_vec(&data).map_err(Into::into), } } fn deserialize_converters() -> Vec Result> { - // No changes between v1 and v4 - vec![Ok, Ok, Ok, Self::v4_to_v5] + // No changes between v1 and v4, no changes between v5 and v6 + vec![Ok, Ok, Ok, Self::v4_to_v6, Ok] } fn serialize_converters() -> Vec Result> { - // No changes between v1 and v4 - vec![Self::v5_to_v4, Ok, Ok, Ok] + // No changes between v1 and v4, no changes between v5 and v6 + vec![Ok, Self::v6_to_v4, Ok, Ok, Ok] } } impl ActorCommandKeyData { - fn v4_to_v5(self) -> Result { + fn v4_to_v6(self) -> Result { if let ActorCommandKeyData::V4(x) = self { let inner = match x { v4::ActorCommandKeyData::CommandStartActor(start) => { - v5::ActorCommandKeyData::CommandStartActor(v5::CommandStartActor { - config: v5::ActorConfig { + v6::ActorCommandKeyData::CommandStartActor(v6::CommandStartActor { + config: v6::ActorConfig { name: start.config.name, key: start.config.key, create_ts: start.config.create_ts, @@ -1659,7 +1837,7 @@ impl ActorCommandKeyData { hibernating_requests: start .hibernating_requests .into_iter() - .map(|req| v5::HibernatingRequest { + .map(|req| v6::HibernatingRequest { gateway_id: req.gateway_id, request_id: req.request_id, }) @@ -1667,22 +1845,21 @@ impl ActorCommandKeyData { }) } v4::ActorCommandKeyData::CommandStopActor(_) => { - v5::ActorCommandKeyData::CommandStopActor + v6::ActorCommandKeyData::CommandStopActor } }; - Ok(ActorCommandKeyData::V5(inner)) + Ok(ActorCommandKeyData::V6(inner)) } else { bail!("unexpected version"); } } - fn v5_to_v4(self) -> Result { - if let ActorCommandKeyData::V5(x) = self { - // Since v4 commands have generation but v5 doesn't, we can't fully convert back - // We'll use generation 0 as a placeholder + fn v6_to_v4(self) -> Result { + if let ActorCommandKeyData::V6(x) = self { + // Since v4 commands have generation but v6 doesn't, use generation 0 as a placeholder let inner = match x { - v5::ActorCommandKeyData::CommandStartActor(start) => { + v6::ActorCommandKeyData::CommandStartActor(start) => { v4::ActorCommandKeyData::CommandStartActor(v4::CommandStartActor { generation: 0, // Lost during conversion config: v4::ActorConfig { @@ -1701,7 +1878,7 @@ impl ActorCommandKeyData { .collect(), }) } - v5::ActorCommandKeyData::CommandStopActor => { + v6::ActorCommandKeyData::CommandStopActor => { v4::ActorCommandKeyData::CommandStopActor(v4::CommandStopActor { generation: 0, // Lost during conversion }) @@ -2625,10 +2802,10 @@ fn convert_kv_metadata_v3_to_v2(metadata: v3::KvMetadata) -> v2::KvMetadata { fn convert_to_server_tunnel_message_kind_v3_to_v4( kind: v3::ToServerTunnelMessageKind, -) -> v5::ToServerTunnelMessageKind { +) -> v6::ToServerTunnelMessageKind { match kind { v3::ToServerTunnelMessageKind::ToServerResponseStart(resp) => { - v5::ToServerTunnelMessageKind::ToServerResponseStart(v5::ToServerResponseStart { + v6::ToServerTunnelMessageKind::ToServerResponseStart(v6::ToServerResponseStart { status: resp.status, headers: resp.headers, body: resp.body, @@ -2636,32 +2813,32 @@ fn convert_to_server_tunnel_message_kind_v3_to_v4( }) } v3::ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => { - v5::ToServerTunnelMessageKind::ToServerResponseChunk(v5::ToServerResponseChunk { + v6::ToServerTunnelMessageKind::ToServerResponseChunk(v6::ToServerResponseChunk { body: chunk.body, finish: chunk.finish, }) } v3::ToServerTunnelMessageKind::ToServerResponseAbort => { - v5::ToServerTunnelMessageKind::ToServerResponseAbort + v6::ToServerTunnelMessageKind::ToServerResponseAbort } v3::ToServerTunnelMessageKind::ToServerWebSocketOpen(open) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketOpen(v5::ToServerWebSocketOpen { + v6::ToServerTunnelMessageKind::ToServerWebSocketOpen(v6::ToServerWebSocketOpen { can_hibernate: open.can_hibernate, }) } v3::ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketMessage(v5::ToServerWebSocketMessage { + v6::ToServerTunnelMessageKind::ToServerWebSocketMessage(v6::ToServerWebSocketMessage { data: msg.data, binary: msg.binary, }) } v3::ToServerTunnelMessageKind::ToServerWebSocketMessageAck(ack) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketMessageAck( - v5::ToServerWebSocketMessageAck { index: ack.index }, + v6::ToServerTunnelMessageKind::ToServerWebSocketMessageAck( + v6::ToServerWebSocketMessageAck { index: ack.index }, ) } v3::ToServerTunnelMessageKind::ToServerWebSocketClose(close) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketClose(v5::ToServerWebSocketClose { + v6::ToServerTunnelMessageKind::ToServerWebSocketClose(v6::ToServerWebSocketClose { code: close.code, reason: close.reason, hibernate: close.hibernate, @@ -2670,16 +2847,16 @@ fn convert_to_server_tunnel_message_kind_v3_to_v4( v3::ToServerTunnelMessageKind::DeprecatedTunnelAck => { // v4 removed DeprecatedTunnelAck, this should not occur in practice // but if it does, we'll convert it to a response abort as a safe fallback - v5::ToServerTunnelMessageKind::ToServerResponseAbort + v6::ToServerTunnelMessageKind::ToServerResponseAbort } } } fn convert_to_server_tunnel_message_kind_v4_to_v3( - kind: v5::ToServerTunnelMessageKind, + kind: v6::ToServerTunnelMessageKind, ) -> Result { Ok(match kind { - v5::ToServerTunnelMessageKind::ToServerResponseStart(resp) => { + v6::ToServerTunnelMessageKind::ToServerResponseStart(resp) => { v3::ToServerTunnelMessageKind::ToServerResponseStart(v3::ToServerResponseStart { status: resp.status, headers: resp.headers, @@ -2687,32 +2864,32 @@ fn convert_to_server_tunnel_message_kind_v4_to_v3( stream: resp.stream, }) } - v5::ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => { + v6::ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => { v3::ToServerTunnelMessageKind::ToServerResponseChunk(v3::ToServerResponseChunk { body: chunk.body, finish: chunk.finish, }) } - v5::ToServerTunnelMessageKind::ToServerResponseAbort => { + v6::ToServerTunnelMessageKind::ToServerResponseAbort => { v3::ToServerTunnelMessageKind::ToServerResponseAbort } - v5::ToServerTunnelMessageKind::ToServerWebSocketOpen(open) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketOpen(open) => { v3::ToServerTunnelMessageKind::ToServerWebSocketOpen(v3::ToServerWebSocketOpen { can_hibernate: open.can_hibernate, }) } - v5::ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => { v3::ToServerTunnelMessageKind::ToServerWebSocketMessage(v3::ToServerWebSocketMessage { data: msg.data, binary: msg.binary, }) } - v5::ToServerTunnelMessageKind::ToServerWebSocketMessageAck(ack) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketMessageAck(ack) => { v3::ToServerTunnelMessageKind::ToServerWebSocketMessageAck( v3::ToServerWebSocketMessageAck { index: ack.index }, ) } - v5::ToServerTunnelMessageKind::ToServerWebSocketClose(close) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketClose(close) => { v3::ToServerTunnelMessageKind::ToServerWebSocketClose(v3::ToServerWebSocketClose { code: close.code, reason: close.reason, @@ -2724,7 +2901,7 @@ fn convert_to_server_tunnel_message_kind_v4_to_v3( // Used specifically for the gateway because there were no changes between mk2 and mk1 for the tunnel messages pub fn to_client_tunnel_message_mk2_to_mk1( - msg: v5::ToClientTunnelMessage, + msg: v6::ToClientTunnelMessage, ) -> v3::ToClientTunnelMessage { v3::ToClientTunnelMessage { message_id: v3::MessageId { @@ -2737,10 +2914,10 @@ pub fn to_client_tunnel_message_mk2_to_mk1( } fn convert_to_client_tunnel_message_kind_mk2_to_mk1( - kind: v5::ToClientTunnelMessageKind, + kind: v6::ToClientTunnelMessageKind, ) -> v3::ToClientTunnelMessageKind { match kind { - v5::ToClientTunnelMessageKind::ToClientRequestStart(req) => { + v6::ToClientTunnelMessageKind::ToClientRequestStart(req) => { v3::ToClientTunnelMessageKind::ToClientRequestStart(v3::ToClientRequestStart { actor_id: req.actor_id, method: req.method, @@ -2750,29 +2927,29 @@ fn convert_to_client_tunnel_message_kind_mk2_to_mk1( stream: req.stream, }) } - v5::ToClientTunnelMessageKind::ToClientRequestChunk(chunk) => { + v6::ToClientTunnelMessageKind::ToClientRequestChunk(chunk) => { v3::ToClientTunnelMessageKind::ToClientRequestChunk(v3::ToClientRequestChunk { body: chunk.body, finish: chunk.finish, }) } - v5::ToClientTunnelMessageKind::ToClientRequestAbort => { + v6::ToClientTunnelMessageKind::ToClientRequestAbort => { v3::ToClientTunnelMessageKind::ToClientRequestAbort } - v5::ToClientTunnelMessageKind::ToClientWebSocketOpen(ws) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketOpen(ws) => { v3::ToClientTunnelMessageKind::ToClientWebSocketOpen(v3::ToClientWebSocketOpen { actor_id: ws.actor_id, path: ws.path, headers: ws.headers, }) } - v5::ToClientTunnelMessageKind::ToClientWebSocketMessage(msg) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketMessage(msg) => { v3::ToClientTunnelMessageKind::ToClientWebSocketMessage(v3::ToClientWebSocketMessage { data: msg.data, binary: msg.binary, }) } - v5::ToClientTunnelMessageKind::ToClientWebSocketClose(close) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketClose(close) => { v3::ToClientTunnelMessageKind::ToClientWebSocketClose(v3::ToClientWebSocketClose { code: close.code, reason: close.reason, @@ -2781,106 +2958,6 @@ fn convert_to_client_tunnel_message_kind_mk2_to_mk1( } } -// Helper conversion functions for v4 <-> v5 - -fn convert_actor_intent_v4_to_v5(intent: v4::ActorIntent) -> v5::ActorIntent { - match intent { - v4::ActorIntent::ActorIntentSleep => v5::ActorIntent::ActorIntentSleep, - v4::ActorIntent::ActorIntentStop => v5::ActorIntent::ActorIntentStop, - } -} - -fn convert_actor_intent_v5_to_v4(intent: v5::ActorIntent) -> v4::ActorIntent { - match intent { - v5::ActorIntent::ActorIntentSleep => v4::ActorIntent::ActorIntentSleep, - v5::ActorIntent::ActorIntentStop => v4::ActorIntent::ActorIntentStop, - } -} - -fn convert_actor_state_v4_to_v5(state: v4::ActorState) -> v5::ActorState { - match state { - v4::ActorState::ActorStateRunning => v5::ActorState::ActorStateRunning, - v4::ActorState::ActorStateStopped(stopped) => { - v5::ActorState::ActorStateStopped(v5::ActorStateStopped { - code: convert_stop_code_v4_to_v5(stopped.code), - message: stopped.message, - }) - } - } -} - -fn convert_actor_state_v5_to_v4(state: v5::ActorState) -> v4::ActorState { - match state { - v5::ActorState::ActorStateRunning => v4::ActorState::ActorStateRunning, - v5::ActorState::ActorStateStopped(stopped) => { - v4::ActorState::ActorStateStopped(v4::ActorStateStopped { - code: convert_stop_code_v5_to_v4(stopped.code), - message: stopped.message, - }) - } - } -} - -fn convert_stop_code_v4_to_v5(code: v4::StopCode) -> v5::StopCode { - match code { - v4::StopCode::Ok => v5::StopCode::Ok, - v4::StopCode::Error => v5::StopCode::Error, - } -} - -fn convert_stop_code_v5_to_v4(code: v5::StopCode) -> v4::StopCode { - match code { - v5::StopCode::Ok => v4::StopCode::Ok, - v5::StopCode::Error => v4::StopCode::Error, - } -} - -fn convert_kv_request_data_v4_to_v5(data: v4::KvRequestData) -> v5::KvRequestData { - match data { - v4::KvRequestData::KvGetRequest(req) => { - v5::KvRequestData::KvGetRequest(v5::KvGetRequest { keys: req.keys }) - } - v4::KvRequestData::KvListRequest(req) => { - v5::KvRequestData::KvListRequest(v5::KvListRequest { - query: convert_kv_list_query_v4_to_v5(req.query), - reverse: req.reverse, - limit: req.limit, - }) - } - v4::KvRequestData::KvPutRequest(req) => v5::KvRequestData::KvPutRequest(v5::KvPutRequest { - keys: req.keys, - values: req.values, - }), - v4::KvRequestData::KvDeleteRequest(req) => { - v5::KvRequestData::KvDeleteRequest(v5::KvDeleteRequest { keys: req.keys }) - } - v4::KvRequestData::KvDropRequest => v5::KvRequestData::KvDropRequest, - } -} - -fn convert_kv_request_data_v5_to_v4(data: v5::KvRequestData) -> v4::KvRequestData { - match data { - v5::KvRequestData::KvGetRequest(req) => { - v4::KvRequestData::KvGetRequest(v4::KvGetRequest { keys: req.keys }) - } - v5::KvRequestData::KvListRequest(req) => { - v4::KvRequestData::KvListRequest(v4::KvListRequest { - query: convert_kv_list_query_v5_to_v4(req.query), - reverse: req.reverse, - limit: req.limit, - }) - } - v5::KvRequestData::KvPutRequest(req) => v4::KvRequestData::KvPutRequest(v4::KvPutRequest { - keys: req.keys, - values: req.values, - }), - v5::KvRequestData::KvDeleteRequest(req) => { - v4::KvRequestData::KvDeleteRequest(v4::KvDeleteRequest { keys: req.keys }) - } - v5::KvRequestData::KvDropRequest => v4::KvRequestData::KvDropRequest, - } -} - fn convert_kv_response_data_v4_to_v5(data: v4::KvResponseData) -> v5::KvResponseData { match data { v4::KvResponseData::KvErrorResponse(err) => { @@ -2951,38 +3028,6 @@ fn convert_kv_response_data_v5_to_v4(data: v5::KvResponseData) -> v4::KvResponse } } -fn convert_kv_list_query_v4_to_v5(query: v4::KvListQuery) -> v5::KvListQuery { - match query { - v4::KvListQuery::KvListAllQuery => v5::KvListQuery::KvListAllQuery, - v4::KvListQuery::KvListRangeQuery(range) => { - v5::KvListQuery::KvListRangeQuery(v5::KvListRangeQuery { - start: range.start, - end: range.end, - exclusive: range.exclusive, - }) - } - v4::KvListQuery::KvListPrefixQuery(prefix) => { - v5::KvListQuery::KvListPrefixQuery(v5::KvListPrefixQuery { key: prefix.key }) - } - } -} - -fn convert_kv_list_query_v5_to_v4(query: v5::KvListQuery) -> v4::KvListQuery { - match query { - v5::KvListQuery::KvListAllQuery => v4::KvListQuery::KvListAllQuery, - v5::KvListQuery::KvListRangeQuery(range) => { - v4::KvListQuery::KvListRangeQuery(v4::KvListRangeQuery { - start: range.start, - end: range.end, - exclusive: range.exclusive, - }) - } - v5::KvListQuery::KvListPrefixQuery(prefix) => { - v4::KvListQuery::KvListPrefixQuery(v4::KvListPrefixQuery { key: prefix.key }) - } - } -} - fn convert_kv_metadata_v4_to_v5(metadata: v4::KvMetadata) -> v5::KvMetadata { v5::KvMetadata { version: metadata.version, @@ -3087,12 +3132,234 @@ fn convert_to_client_tunnel_message_kind_v5_to_v4( } } -fn convert_to_server_tunnel_message_kind_v4_to_v5( +// MARK: v4 <-> v6 helpers (ToServer and ToRunner; v5 and v6 are structurally identical) + +fn convert_actor_intent_v4_to_v6(intent: v4::ActorIntent) -> v6::ActorIntent { + match intent { + v4::ActorIntent::ActorIntentSleep => v6::ActorIntent::ActorIntentSleep, + v4::ActorIntent::ActorIntentStop => v6::ActorIntent::ActorIntentStop, + } +} + +fn convert_actor_intent_v6_to_v4(intent: v6::ActorIntent) -> v4::ActorIntent { + match intent { + v6::ActorIntent::ActorIntentSleep => v4::ActorIntent::ActorIntentSleep, + v6::ActorIntent::ActorIntentStop => v4::ActorIntent::ActorIntentStop, + } +} + +fn convert_actor_state_v4_to_v6(state: v4::ActorState) -> v6::ActorState { + match state { + v4::ActorState::ActorStateRunning => v6::ActorState::ActorStateRunning, + v4::ActorState::ActorStateStopped(stopped) => { + v6::ActorState::ActorStateStopped(v6::ActorStateStopped { + code: convert_stop_code_v4_to_v6(stopped.code), + message: stopped.message, + }) + } + } +} + +fn convert_actor_state_v6_to_v4(state: v6::ActorState) -> v4::ActorState { + match state { + v6::ActorState::ActorStateRunning => v4::ActorState::ActorStateRunning, + v6::ActorState::ActorStateStopped(stopped) => { + v4::ActorState::ActorStateStopped(v4::ActorStateStopped { + code: convert_stop_code_v6_to_v4(stopped.code), + message: stopped.message, + }) + } + } +} + +fn convert_stop_code_v4_to_v6(code: v4::StopCode) -> v6::StopCode { + match code { + v4::StopCode::Ok => v6::StopCode::Ok, + v4::StopCode::Error => v6::StopCode::Error, + } +} + +fn convert_stop_code_v6_to_v4(code: v6::StopCode) -> v4::StopCode { + match code { + v6::StopCode::Ok => v4::StopCode::Ok, + v6::StopCode::Error => v4::StopCode::Error, + } +} + +fn convert_kv_request_data_v4_to_v6(data: v4::KvRequestData) -> v6::KvRequestData { + match data { + v4::KvRequestData::KvGetRequest(req) => { + v6::KvRequestData::KvGetRequest(v6::KvGetRequest { keys: req.keys }) + } + v4::KvRequestData::KvListRequest(req) => { + v6::KvRequestData::KvListRequest(v6::KvListRequest { + query: convert_kv_list_query_v4_to_v6(req.query), + reverse: req.reverse, + limit: req.limit, + }) + } + v4::KvRequestData::KvPutRequest(req) => v6::KvRequestData::KvPutRequest(v6::KvPutRequest { + keys: req.keys, + values: req.values, + }), + v4::KvRequestData::KvDeleteRequest(req) => { + v6::KvRequestData::KvDeleteRequest(v6::KvDeleteRequest { keys: req.keys }) + } + v4::KvRequestData::KvDropRequest => v6::KvRequestData::KvDropRequest, + } +} + +fn convert_kv_request_data_v6_to_v4(data: v6::KvRequestData) -> v4::KvRequestData { + match data { + v6::KvRequestData::KvGetRequest(req) => { + v4::KvRequestData::KvGetRequest(v4::KvGetRequest { keys: req.keys }) + } + v6::KvRequestData::KvListRequest(req) => { + v4::KvRequestData::KvListRequest(v4::KvListRequest { + query: convert_kv_list_query_v6_to_v4(req.query), + reverse: req.reverse, + limit: req.limit, + }) + } + v6::KvRequestData::KvPutRequest(req) => v4::KvRequestData::KvPutRequest(v4::KvPutRequest { + keys: req.keys, + values: req.values, + }), + v6::KvRequestData::KvDeleteRequest(req) => { + v4::KvRequestData::KvDeleteRequest(v4::KvDeleteRequest { keys: req.keys }) + } + v6::KvRequestData::KvDropRequest => v4::KvRequestData::KvDropRequest, + } +} + +fn convert_kv_list_query_v4_to_v6(query: v4::KvListQuery) -> v6::KvListQuery { + match query { + v4::KvListQuery::KvListAllQuery => v6::KvListQuery::KvListAllQuery, + v4::KvListQuery::KvListRangeQuery(range) => { + v6::KvListQuery::KvListRangeQuery(v6::KvListRangeQuery { + start: range.start, + end: range.end, + exclusive: range.exclusive, + }) + } + v4::KvListQuery::KvListPrefixQuery(prefix) => { + v6::KvListQuery::KvListPrefixQuery(v6::KvListPrefixQuery { key: prefix.key }) + } + } +} + +fn convert_kv_list_query_v6_to_v4(query: v6::KvListQuery) -> v4::KvListQuery { + match query { + v6::KvListQuery::KvListAllQuery => v4::KvListQuery::KvListAllQuery, + v6::KvListQuery::KvListRangeQuery(range) => { + v4::KvListQuery::KvListRangeQuery(v4::KvListRangeQuery { + start: range.start, + end: range.end, + exclusive: range.exclusive, + }) + } + v6::KvListQuery::KvListPrefixQuery(prefix) => { + v4::KvListQuery::KvListPrefixQuery(v4::KvListPrefixQuery { key: prefix.key }) + } + } +} + +fn convert_to_client_tunnel_message_kind_v4_to_v6( + kind: v4::ToClientTunnelMessageKind, +) -> v6::ToClientTunnelMessageKind { + match kind { + v4::ToClientTunnelMessageKind::ToClientRequestStart(req) => { + v6::ToClientTunnelMessageKind::ToClientRequestStart(v6::ToClientRequestStart { + actor_id: req.actor_id, + method: req.method, + path: req.path, + headers: req.headers, + body: req.body, + stream: req.stream, + }) + } + v4::ToClientTunnelMessageKind::ToClientRequestChunk(chunk) => { + v6::ToClientTunnelMessageKind::ToClientRequestChunk(v6::ToClientRequestChunk { + body: chunk.body, + finish: chunk.finish, + }) + } + v4::ToClientTunnelMessageKind::ToClientRequestAbort => { + v6::ToClientTunnelMessageKind::ToClientRequestAbort + } + v4::ToClientTunnelMessageKind::ToClientWebSocketOpen(ws) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketOpen(v6::ToClientWebSocketOpen { + actor_id: ws.actor_id, + path: ws.path, + headers: ws.headers, + }) + } + v4::ToClientTunnelMessageKind::ToClientWebSocketMessage(msg) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketMessage(v6::ToClientWebSocketMessage { + data: msg.data, + binary: msg.binary, + }) + } + v4::ToClientTunnelMessageKind::ToClientWebSocketClose(close) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketClose(v6::ToClientWebSocketClose { + code: close.code, + reason: close.reason, + }) + } + } +} + +fn convert_to_client_tunnel_message_kind_v6_to_v4( + kind: v6::ToClientTunnelMessageKind, +) -> v4::ToClientTunnelMessageKind { + match kind { + v6::ToClientTunnelMessageKind::ToClientRequestStart(req) => { + v4::ToClientTunnelMessageKind::ToClientRequestStart(v4::ToClientRequestStart { + actor_id: req.actor_id, + method: req.method, + path: req.path, + headers: req.headers, + body: req.body, + stream: req.stream, + }) + } + v6::ToClientTunnelMessageKind::ToClientRequestChunk(chunk) => { + v4::ToClientTunnelMessageKind::ToClientRequestChunk(v4::ToClientRequestChunk { + body: chunk.body, + finish: chunk.finish, + }) + } + v6::ToClientTunnelMessageKind::ToClientRequestAbort => { + v4::ToClientTunnelMessageKind::ToClientRequestAbort + } + v6::ToClientTunnelMessageKind::ToClientWebSocketOpen(ws) => { + v4::ToClientTunnelMessageKind::ToClientWebSocketOpen(v4::ToClientWebSocketOpen { + actor_id: ws.actor_id, + path: ws.path, + headers: ws.headers, + }) + } + v6::ToClientTunnelMessageKind::ToClientWebSocketMessage(msg) => { + v4::ToClientTunnelMessageKind::ToClientWebSocketMessage(v4::ToClientWebSocketMessage { + data: msg.data, + binary: msg.binary, + }) + } + v6::ToClientTunnelMessageKind::ToClientWebSocketClose(close) => { + v4::ToClientTunnelMessageKind::ToClientWebSocketClose(v4::ToClientWebSocketClose { + code: close.code, + reason: close.reason, + }) + } + } +} + +fn convert_to_server_tunnel_message_kind_v4_to_v6( kind: v4::ToServerTunnelMessageKind, -) -> v5::ToServerTunnelMessageKind { +) -> v6::ToServerTunnelMessageKind { match kind { v4::ToServerTunnelMessageKind::ToServerResponseStart(resp) => { - v5::ToServerTunnelMessageKind::ToServerResponseStart(v5::ToServerResponseStart { + v6::ToServerTunnelMessageKind::ToServerResponseStart(v6::ToServerResponseStart { status: resp.status, headers: resp.headers, body: resp.body, @@ -3100,32 +3367,32 @@ fn convert_to_server_tunnel_message_kind_v4_to_v5( }) } v4::ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => { - v5::ToServerTunnelMessageKind::ToServerResponseChunk(v5::ToServerResponseChunk { + v6::ToServerTunnelMessageKind::ToServerResponseChunk(v6::ToServerResponseChunk { body: chunk.body, finish: chunk.finish, }) } v4::ToServerTunnelMessageKind::ToServerResponseAbort => { - v5::ToServerTunnelMessageKind::ToServerResponseAbort + v6::ToServerTunnelMessageKind::ToServerResponseAbort } v4::ToServerTunnelMessageKind::ToServerWebSocketOpen(open) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketOpen(v5::ToServerWebSocketOpen { + v6::ToServerTunnelMessageKind::ToServerWebSocketOpen(v6::ToServerWebSocketOpen { can_hibernate: open.can_hibernate, }) } v4::ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketMessage(v5::ToServerWebSocketMessage { + v6::ToServerTunnelMessageKind::ToServerWebSocketMessage(v6::ToServerWebSocketMessage { data: msg.data, binary: msg.binary, }) } v4::ToServerTunnelMessageKind::ToServerWebSocketMessageAck(ack) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketMessageAck( - v5::ToServerWebSocketMessageAck { index: ack.index }, + v6::ToServerTunnelMessageKind::ToServerWebSocketMessageAck( + v6::ToServerWebSocketMessageAck { index: ack.index }, ) } v4::ToServerTunnelMessageKind::ToServerWebSocketClose(close) => { - v5::ToServerTunnelMessageKind::ToServerWebSocketClose(v5::ToServerWebSocketClose { + v6::ToServerTunnelMessageKind::ToServerWebSocketClose(v6::ToServerWebSocketClose { code: close.code, reason: close.reason, hibernate: close.hibernate, @@ -3134,11 +3401,11 @@ fn convert_to_server_tunnel_message_kind_v4_to_v5( } } -fn convert_to_server_tunnel_message_kind_v5_to_v4( - kind: v5::ToServerTunnelMessageKind, +fn convert_to_server_tunnel_message_kind_v6_to_v4( + kind: v6::ToServerTunnelMessageKind, ) -> Result { Ok(match kind { - v5::ToServerTunnelMessageKind::ToServerResponseStart(resp) => { + v6::ToServerTunnelMessageKind::ToServerResponseStart(resp) => { v4::ToServerTunnelMessageKind::ToServerResponseStart(v4::ToServerResponseStart { status: resp.status, headers: resp.headers, @@ -3146,32 +3413,32 @@ fn convert_to_server_tunnel_message_kind_v5_to_v4( stream: resp.stream, }) } - v5::ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => { + v6::ToServerTunnelMessageKind::ToServerResponseChunk(chunk) => { v4::ToServerTunnelMessageKind::ToServerResponseChunk(v4::ToServerResponseChunk { body: chunk.body, finish: chunk.finish, }) } - v5::ToServerTunnelMessageKind::ToServerResponseAbort => { + v6::ToServerTunnelMessageKind::ToServerResponseAbort => { v4::ToServerTunnelMessageKind::ToServerResponseAbort } - v5::ToServerTunnelMessageKind::ToServerWebSocketOpen(open) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketOpen(open) => { v4::ToServerTunnelMessageKind::ToServerWebSocketOpen(v4::ToServerWebSocketOpen { can_hibernate: open.can_hibernate, }) } - v5::ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketMessage(msg) => { v4::ToServerTunnelMessageKind::ToServerWebSocketMessage(v4::ToServerWebSocketMessage { data: msg.data, binary: msg.binary, }) } - v5::ToServerTunnelMessageKind::ToServerWebSocketMessageAck(ack) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketMessageAck(ack) => { v4::ToServerTunnelMessageKind::ToServerWebSocketMessageAck( v4::ToServerWebSocketMessageAck { index: ack.index }, ) } - v5::ToServerTunnelMessageKind::ToServerWebSocketClose(close) => { + v6::ToServerTunnelMessageKind::ToServerWebSocketClose(close) => { v4::ToServerTunnelMessageKind::ToServerWebSocketClose(v4::ToServerWebSocketClose { code: close.code, reason: close.reason, @@ -3180,3 +3447,179 @@ fn convert_to_server_tunnel_message_kind_v5_to_v4( } }) } + +// MARK: v5 <-> v6 helpers (ToClient; only ProtocolMetadata changed, other types are identical) + +fn convert_kv_response_data_v5_to_v6(data: v5::KvResponseData) -> v6::KvResponseData { + match data { + v5::KvResponseData::KvErrorResponse(err) => { + v6::KvResponseData::KvErrorResponse(v6::KvErrorResponse { + message: err.message, + }) + } + v5::KvResponseData::KvGetResponse(resp) => { + v6::KvResponseData::KvGetResponse(v6::KvGetResponse { + keys: resp.keys, + values: resp.values, + metadata: resp + .metadata + .into_iter() + .map(convert_kv_metadata_v5_to_v6) + .collect(), + }) + } + v5::KvResponseData::KvListResponse(resp) => { + v6::KvResponseData::KvListResponse(v6::KvListResponse { + keys: resp.keys, + values: resp.values, + metadata: resp + .metadata + .into_iter() + .map(convert_kv_metadata_v5_to_v6) + .collect(), + }) + } + v5::KvResponseData::KvPutResponse => v6::KvResponseData::KvPutResponse, + v5::KvResponseData::KvDeleteResponse => v6::KvResponseData::KvDeleteResponse, + v5::KvResponseData::KvDropResponse => v6::KvResponseData::KvDropResponse, + } +} + +fn convert_kv_response_data_v6_to_v5(data: v6::KvResponseData) -> v5::KvResponseData { + match data { + v6::KvResponseData::KvErrorResponse(err) => { + v5::KvResponseData::KvErrorResponse(v5::KvErrorResponse { + message: err.message, + }) + } + v6::KvResponseData::KvGetResponse(resp) => { + v5::KvResponseData::KvGetResponse(v5::KvGetResponse { + keys: resp.keys, + values: resp.values, + metadata: resp + .metadata + .into_iter() + .map(convert_kv_metadata_v6_to_v5) + .collect(), + }) + } + v6::KvResponseData::KvListResponse(resp) => { + v5::KvResponseData::KvListResponse(v5::KvListResponse { + keys: resp.keys, + values: resp.values, + metadata: resp + .metadata + .into_iter() + .map(convert_kv_metadata_v6_to_v5) + .collect(), + }) + } + v6::KvResponseData::KvPutResponse => v5::KvResponseData::KvPutResponse, + v6::KvResponseData::KvDeleteResponse => v5::KvResponseData::KvDeleteResponse, + v6::KvResponseData::KvDropResponse => v5::KvResponseData::KvDropResponse, + } +} + +fn convert_kv_metadata_v5_to_v6(metadata: v5::KvMetadata) -> v6::KvMetadata { + v6::KvMetadata { + version: metadata.version, + update_ts: metadata.update_ts, + } +} + +fn convert_kv_metadata_v6_to_v5(metadata: v6::KvMetadata) -> v5::KvMetadata { + v5::KvMetadata { + version: metadata.version, + update_ts: metadata.update_ts, + } +} + +fn convert_to_client_tunnel_message_kind_v5_to_v6( + kind: v5::ToClientTunnelMessageKind, +) -> v6::ToClientTunnelMessageKind { + match kind { + v5::ToClientTunnelMessageKind::ToClientRequestStart(req) => { + v6::ToClientTunnelMessageKind::ToClientRequestStart(v6::ToClientRequestStart { + actor_id: req.actor_id, + method: req.method, + path: req.path, + headers: req.headers, + body: req.body, + stream: req.stream, + }) + } + v5::ToClientTunnelMessageKind::ToClientRequestChunk(chunk) => { + v6::ToClientTunnelMessageKind::ToClientRequestChunk(v6::ToClientRequestChunk { + body: chunk.body, + finish: chunk.finish, + }) + } + v5::ToClientTunnelMessageKind::ToClientRequestAbort => { + v6::ToClientTunnelMessageKind::ToClientRequestAbort + } + v5::ToClientTunnelMessageKind::ToClientWebSocketOpen(ws) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketOpen(v6::ToClientWebSocketOpen { + actor_id: ws.actor_id, + path: ws.path, + headers: ws.headers, + }) + } + v5::ToClientTunnelMessageKind::ToClientWebSocketMessage(msg) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketMessage(v6::ToClientWebSocketMessage { + data: msg.data, + binary: msg.binary, + }) + } + v5::ToClientTunnelMessageKind::ToClientWebSocketClose(close) => { + v6::ToClientTunnelMessageKind::ToClientWebSocketClose(v6::ToClientWebSocketClose { + code: close.code, + reason: close.reason, + }) + } + } +} + +fn convert_to_client_tunnel_message_kind_v6_to_v5( + kind: v6::ToClientTunnelMessageKind, +) -> v5::ToClientTunnelMessageKind { + match kind { + v6::ToClientTunnelMessageKind::ToClientRequestStart(req) => { + v5::ToClientTunnelMessageKind::ToClientRequestStart(v5::ToClientRequestStart { + actor_id: req.actor_id, + method: req.method, + path: req.path, + headers: req.headers, + body: req.body, + stream: req.stream, + }) + } + v6::ToClientTunnelMessageKind::ToClientRequestChunk(chunk) => { + v5::ToClientTunnelMessageKind::ToClientRequestChunk(v5::ToClientRequestChunk { + body: chunk.body, + finish: chunk.finish, + }) + } + v6::ToClientTunnelMessageKind::ToClientRequestAbort => { + v5::ToClientTunnelMessageKind::ToClientRequestAbort + } + v6::ToClientTunnelMessageKind::ToClientWebSocketOpen(ws) => { + v5::ToClientTunnelMessageKind::ToClientWebSocketOpen(v5::ToClientWebSocketOpen { + actor_id: ws.actor_id, + path: ws.path, + headers: ws.headers, + }) + } + v6::ToClientTunnelMessageKind::ToClientWebSocketMessage(msg) => { + v5::ToClientTunnelMessageKind::ToClientWebSocketMessage(v5::ToClientWebSocketMessage { + data: msg.data, + binary: msg.binary, + }) + } + v6::ToClientTunnelMessageKind::ToClientWebSocketClose(close) => { + v5::ToClientTunnelMessageKind::ToClientWebSocketClose(v5::ToClientWebSocketClose { + code: close.code, + reason: close.reason, + }) + } + } +} diff --git a/engine/sdks/schemas/runner-protocol/v6.bare b/engine/sdks/schemas/runner-protocol/v6.bare new file mode 100644 index 0000000000..d469f724fb --- /dev/null +++ b/engine/sdks/schemas/runner-protocol/v6.bare @@ -0,0 +1,432 @@ +# Runner Protocol v1 + +# MARK: Core Primitives + +type Id str +type Json str + +type GatewayId data[4] +type RequestId data[4] +type MessageIndex u16 + +# MARK: KV + +# Basic types +type KvKey data +type KvValue data +type KvMetadata struct { + version: data + updateTs: i64 +} + +# Query types +type KvListAllQuery void +type KvListRangeQuery struct { + start: KvKey + end: KvKey + exclusive: bool +} + +type KvListPrefixQuery struct { + key: KvKey +} + +type KvListQuery union { + KvListAllQuery | + KvListRangeQuery | + KvListPrefixQuery +} + +# Request types +type KvGetRequest struct { + keys: list +} + +type KvListRequest struct { + query: KvListQuery + reverse: optional + limit: optional +} + +type KvPutRequest struct { + keys: list + values: list +} + +type KvDeleteRequest struct { + keys: list +} + +type KvDropRequest void + +# Response types +type KvErrorResponse struct { + message: str +} + +type KvGetResponse struct { + keys: list + values: list + metadata: list +} + +type KvListResponse struct { + keys: list + values: list + metadata: list +} + +type KvPutResponse void +type KvDeleteResponse void +type KvDropResponse void + +# Request/Response unions +type KvRequestData union { + KvGetRequest | + KvListRequest | + KvPutRequest | + KvDeleteRequest | + KvDropRequest +} + +type KvResponseData union { + KvErrorResponse | + KvGetResponse | + KvListResponse | + KvPutResponse | + KvDeleteResponse | + KvDropResponse +} + +# MARK: Actor + +# Core +type StopCode enum { + OK + ERROR +} + +type ActorName struct { + metadata: Json +} + +type ActorConfig struct { + name: str + key: optional + createTs: i64 + input: optional +} + +type ActorCheckpoint struct { + actorId: Id + generation: u32 + index: i64 +} + +# Intent +type ActorIntentSleep void + +type ActorIntentStop void + +type ActorIntent union { + ActorIntentSleep | + ActorIntentStop +} + +# State +type ActorStateRunning void + +type ActorStateStopped struct { + code: StopCode + message: optional +} + +type ActorState union { + ActorStateRunning | + ActorStateStopped +} + +# MARK: Events +type EventActorIntent struct { + intent: ActorIntent +} + +type EventActorStateUpdate struct { + state: ActorState +} + +type EventActorSetAlarm struct { + alarmTs: optional +} + +type Event union { + EventActorIntent | + EventActorStateUpdate | + EventActorSetAlarm +} + +type EventWrapper struct { + checkpoint: ActorCheckpoint + inner: Event +} + +# MARK: Commands + +type HibernatingRequest struct { + gatewayId: GatewayId + requestId: RequestId +} + +type CommandStartActor struct { + config: ActorConfig + hibernatingRequests: list +} + +type CommandStopActor void + +type Command union { + CommandStartActor | + CommandStopActor +} + +type CommandWrapper struct { + checkpoint: ActorCheckpoint + inner: Command +} + +# We redeclare this so its top level +type ActorCommandKeyData union { + CommandStartActor | + CommandStopActor +} + +# MARK: Tunnel + +# Message ID + +type MessageId struct { + # Globally unique ID + gatewayId: GatewayId + # Unique ID to the gateway + requestId: RequestId + # Unique ID to the request + messageIndex: MessageIndex +} + + +# HTTP +type ToClientRequestStart struct { + actorId: Id + method: str + path: str + headers: map + body: optional + stream: bool +} + +type ToClientRequestChunk struct { + body: data + finish: bool +} + +type ToClientRequestAbort void + +type ToServerResponseStart struct { + status: u16 + headers: map + body: optional + stream: bool +} + +type ToServerResponseChunk struct { + body: data + finish: bool +} + +type ToServerResponseAbort void + +# WebSocket +type ToClientWebSocketOpen struct { + actorId: Id + path: str + headers: map +} + +type ToClientWebSocketMessage struct { + data: data + binary: bool +} + +type ToClientWebSocketClose struct { + code: optional + reason: optional +} + +type ToServerWebSocketOpen struct { + canHibernate: bool +} + +type ToServerWebSocketMessage struct { + data: data + binary: bool +} + +type ToServerWebSocketMessageAck struct { + index: MessageIndex +} + +type ToServerWebSocketClose struct { + code: optional + reason: optional + hibernate: bool +} + +# To Server +type ToServerTunnelMessageKind union { + # HTTP + ToServerResponseStart | + ToServerResponseChunk | + ToServerResponseAbort | + + # WebSocket + ToServerWebSocketOpen | + ToServerWebSocketMessage | + ToServerWebSocketMessageAck | + ToServerWebSocketClose +} + +type ToServerTunnelMessage struct { + messageId: MessageId + messageKind: ToServerTunnelMessageKind +} + +# To Client +type ToClientTunnelMessageKind union { + # HTTP + ToClientRequestStart | + ToClientRequestChunk | + ToClientRequestAbort | + + # WebSocket + ToClientWebSocketOpen | + ToClientWebSocketMessage | + ToClientWebSocketClose +} + +type ToClientTunnelMessage struct { + messageId: MessageId + messageKind: ToClientTunnelMessageKind +} + +type ToClientPing struct { + ts: i64 +} + +# MARK: To Server +type ToServerInit struct { + name: str + version: u32 + totalSlots: u32 + prepopulateActorNames: optional> + metadata: optional +} + +type ToServerEvents list + +type ToServerAckCommands struct { + lastCommandCheckpoints: list +} + +type ToServerStopping void + +type ToServerPong struct { + ts: i64 +} + +type ToServerKvRequest struct { + actorId: Id + requestId: u32 + data: KvRequestData +} + +type ToServer union { + ToServerInit | + ToServerEvents | + ToServerAckCommands | + ToServerStopping | + ToServerPong | + ToServerKvRequest | + ToServerTunnelMessage +} + +# MARK: To Client +type ProtocolMetadata struct { + runnerLostThreshold: i64 + actorStopThreshold: i64 + serverlessDrainGracePeriod: optional +} + +type ToClientInit struct { + runnerId: Id + metadata: ProtocolMetadata +} + +type ToClientCommands list + +type ToClientAckEvents struct { + lastEventCheckpoints: list +} + +type ToClientKvResponse struct { + requestId: u32 + data: KvResponseData +} + +type ToClient union { + ToClientInit | + ToClientCommands | + ToClientAckEvents | + ToClientKvResponse | + ToClientTunnelMessage | + ToClientPing +} + +# MARK: To Runner +type ToRunnerPing struct { + gatewayId: GatewayId + requestId: RequestId + ts: i64 +} + +type ToRunnerClose void + +# We have to re-declare the entire union since BARE will not generate the +# ser/de for ToClient if it's not a top-level type +type ToRunner union { + ToRunnerPing | + ToRunnerClose | + ToClientCommands | + ToClientAckEvents | + ToClientTunnelMessage +} + +# MARK: To Gateway +type ToGatewayPong struct { + requestId: RequestId + ts: i64 +} + +type ToGateway union { + ToGatewayPong | + ToServerTunnelMessage +} + +# MARK: Serverless +type ToServerlessServerInit struct { + runnerId: Id + runnerProtocolVersion: u16 +} + +type ToServerlessServer union { + ToServerlessServerInit +} diff --git a/engine/sdks/typescript/runner-protocol/src/index.ts b/engine/sdks/typescript/runner-protocol/src/index.ts index b6b7619a84..0e391552ab 100644 --- a/engine/sdks/typescript/runner-protocol/src/index.ts +++ b/engine/sdks/typescript/runner-protocol/src/index.ts @@ -1743,16 +1743,22 @@ export function decodeToServer(bytes: Uint8Array): ToServer { */ export type ProtocolMetadata = { readonly runnerLostThreshold: i64 + readonly actorStopThreshold: i64 + readonly serverlessDrainGracePeriod: i64 | null } export function readProtocolMetadata(bc: bare.ByteCursor): ProtocolMetadata { return { runnerLostThreshold: bare.readI64(bc), + actorStopThreshold: bare.readI64(bc), + serverlessDrainGracePeriod: read7(bc), } } export function writeProtocolMetadata(bc: bare.ByteCursor, x: ProtocolMetadata): void { bare.writeI64(bc, x.runnerLostThreshold) + bare.writeI64(bc, x.actorStopThreshold) + write7(bc, x.serverlessDrainGracePeriod) } export type ToClientInit = { diff --git a/engine/sdks/typescript/runner/src/mod.ts b/engine/sdks/typescript/runner/src/mod.ts index 80834e10dc..405d28443f 100644 --- a/engine/sdks/typescript/runner/src/mod.ts +++ b/engine/sdks/typescript/runner/src/mod.ts @@ -208,8 +208,10 @@ export class Runner { #reconnectAttempt: number = 0; #reconnectTimeout?: NodeJS.Timeout; + // Protocol metadata + #protocolMetadata?: protocol.ProtocolMetadata; + // Runner lost threshold management - #runnerLostThreshold?: number; #runnerLostTimeout?: NodeJS.Timeout; // Event storage for resending @@ -839,14 +841,11 @@ export class Runner { this.#stopAllActors(); } - // Store the runner lost threshold from metadata - this.#runnerLostThreshold = init.metadata?.runnerLostThreshold - ? Number(init.metadata.runnerLostThreshold) - : undefined; + this.#protocolMetadata = init.metadata; this.log?.info({ msg: "received init", - runnerLostThreshold: this.#runnerLostThreshold, + protocolMetadata: this.#protocolMetadata, }); // Resend pending events @@ -888,27 +887,7 @@ export class Runner { }); if (!this.#shutdown) { - // Start runner lost timeout if we have a threshold and are not shutting down - if ( - !this.#runnerLostTimeout && - this.#runnerLostThreshold && - this.#runnerLostThreshold > 0 - ) { - this.log?.info({ - msg: "starting runner lost timeout", - seconds: this.#runnerLostThreshold / 1000, - }); - this.#runnerLostTimeout = setTimeout(() => { - try { - this.#handleLost(); - } catch (err) { - this.log?.error({ - msg: "error handling runner lost", - error: stringifyError(err), - }); - } - }, this.#runnerLostThreshold); - } + this.#startRunnerLostTimeout(); // Attempt to reconnect if not stopped this.#scheduleReconnect(); @@ -944,27 +923,7 @@ export class Runner { this.#ackInterval = undefined; } - // Start runner lost timeout if we have a threshold and are not shutting down - if ( - !this.#runnerLostTimeout && - this.#runnerLostThreshold && - this.#runnerLostThreshold > 0 - ) { - this.log?.info({ - msg: "starting runner lost timeout", - seconds: this.#runnerLostThreshold / 1000, - }); - this.#runnerLostTimeout = setTimeout(() => { - try { - this.#handleLost(); - } catch (err) { - this.log?.error({ - msg: "error handling runner lost", - error: stringifyError(err), - }); - } - }, this.#runnerLostThreshold); - } + this.#startRunnerLostTimeout(); // Attempt to reconnect if not stopped this.#scheduleReconnect(); @@ -976,6 +935,30 @@ export class Runner { }); } + #startRunnerLostTimeout() { + // Start runner lost timeout if we have a threshold and are not shutting down + if ( + !this.#runnerLostTimeout && + this.#protocolMetadata && + this.#protocolMetadata.runnerLostThreshold > 0 + ) { + this.log?.info({ + msg: "starting runner lost timeout", + seconds: this.#protocolMetadata.runnerLostThreshold / 1000n, + }); + this.#runnerLostTimeout = setTimeout(() => { + try { + this.#handleLost(); + } catch (err) { + this.log?.error({ + msg: "error handling runner lost", + error: stringifyError(err), + }); + } + }, Number(this.#protocolMetadata.runnerLostThreshold)); + } + } + #handleCommands(commands: protocol.ToClientCommands) { this.log?.info({ msg: "received commands", @@ -1859,4 +1842,8 @@ export class Runner { //this.#log?.log(`Cleaned up ${toDelete.length} expired KV requests`); } } + + getProtocolMetadata(): protocol.ProtocolMetadata | undefined { + return this.#protocolMetadata; + } } diff --git a/engine/sdks/typescript/runner/src/tunnel.ts b/engine/sdks/typescript/runner/src/tunnel.ts index 0175d15454..0894eaa3e0 100644 --- a/engine/sdks/typescript/runner/src/tunnel.ts +++ b/engine/sdks/typescript/runner/src/tunnel.ts @@ -15,7 +15,7 @@ import { stringifyToClientTunnelMessageKind, stringifyToServerTunnelMessageKind, } from "./stringify"; -import { arraysEqual, idToStr, stringifyError, unreachable } from "./utils"; +import { arraysEqual, idToStr, MAX_BODY_SIZE, stringifyError, unreachable } from "./utils"; import { HIBERNATABLE_SYMBOL, WebSocketTunnelAdapter, @@ -855,6 +855,10 @@ export class Tunnel { // Read the body first to get the actual content const body = response.body ? await response.arrayBuffer() : null; + if (body && body.byteLength > MAX_BODY_SIZE) { + throw new Error("Response body too large"); + } + // Convert headers to map and add Content-Length if not present const headers = new Map(); response.headers.forEach((value, key) => { @@ -1079,7 +1083,7 @@ export class Tunnel { }); if (clientMessageIndex < 0 || clientMessageIndex > 65535) - throw new Error("invalid websocket ack index"); + throw new Error("Invalid websocket ack index"); // Get the actor to find the gatewayId // @@ -1157,7 +1161,7 @@ function buildRequestForWebSocket( }; if (!path.startsWith("/")) { - throw new Error("path must start with leading slash"); + throw new Error("Path must start with leading slash"); } const request = new Request(`http://actor${path}`, { diff --git a/engine/sdks/typescript/runner/src/utils.ts b/engine/sdks/typescript/runner/src/utils.ts index c21c68ed64..b01d06be85 100644 --- a/engine/sdks/typescript/runner/src/utils.ts +++ b/engine/sdks/typescript/runner/src/utils.ts @@ -1,5 +1,8 @@ import { logger } from "./log"; +// 20MiB. Keep in sync with MAX_BODY_SIZE from engine/packages/guard-core/src/proxy_service.rs +export const MAX_BODY_SIZE = 20 * 1024 * 1024; + export function unreachable(x: never): never { throw `Unreachable: ${x}`; } diff --git a/engine/sdks/typescript/runner/src/websocket-tunnel-adapter.ts b/engine/sdks/typescript/runner/src/websocket-tunnel-adapter.ts index 42eaaf137d..75f1f6fbb8 100644 --- a/engine/sdks/typescript/runner/src/websocket-tunnel-adapter.ts +++ b/engine/sdks/typescript/runner/src/websocket-tunnel-adapter.ts @@ -1,7 +1,7 @@ import type { Logger } from "pino"; import { VirtualWebSocket, type UniversalWebSocket, type RivetMessageEvent } from "@rivetkit/virtual-websocket"; import type { Tunnel } from "./tunnel"; -import { wrappingAddU16, wrappingLteU16, wrappingSubU16 } from "./utils"; +import { MAX_BODY_SIZE, wrappingAddU16, wrappingLteU16, wrappingSubU16 } from "./utils"; export const HIBERNATABLE_SYMBOL = Symbol("hibernatable"); @@ -70,11 +70,20 @@ export class WebSocketTunnelAdapter { let messageData: string | ArrayBuffer; if (typeof data === "string") { + const encoder = new TextEncoder(); + if (encoder.encode(data).byteLength > MAX_BODY_SIZE) { + throw new Error("WebSocket message too large"); + } + messageData = data; } else if (data instanceof ArrayBuffer) { + if (data.byteLength > MAX_BODY_SIZE) throw new Error("WebSocket message too large"); + isBinary = true; messageData = data; } else if (ArrayBuffer.isView(data)) { + if (data.byteLength > MAX_BODY_SIZE) throw new Error("WebSocket message too large"); + isBinary = true; const view = data; const buffer = view.buffer instanceof SharedArrayBuffer diff --git a/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/conn-error-serialization.ts b/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/conn-error-serialization.ts new file mode 100644 index 0000000000..900943bbc8 --- /dev/null +++ b/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/conn-error-serialization.ts @@ -0,0 +1,27 @@ +import { actor } from "rivetkit"; +import { ActorError } from "@/actor/errors"; + +// Custom error that will be thrown in createConnState +class CustomConnectionError extends ActorError { + constructor(message: string) { + super("connection", "custom_error", message, { public: true }); + } +} + +/** + * Actor that throws a custom error in createConnState to test error serialization + */ +export const connErrorSerializationActor = actor({ + state: { + value: 0, + }, + createConnState: (_c, params: { shouldThrow?: boolean }) => { + if (params.shouldThrow) { + throw new CustomConnectionError("Test error from createConnState"); + } + return { initialized: true }; + }, + actions: { + getValue: (c) => c.state.value, + }, +}); diff --git a/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/registry.ts b/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/registry.ts index d85c7e881e..4b9b840a39 100644 --- a/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/registry.ts +++ b/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/registry.ts @@ -75,6 +75,8 @@ import { workflowSleepActor, workflowStopTeardownActor, } from "./workflow"; +import { startStopRaceActor, lifecycleObserver } from "./start-stop-race"; +import { connErrorSerializationActor } from "./conn-error-serialization"; // Consolidated setup with all actors export const registry = setup({ @@ -177,5 +179,10 @@ export const registry = setup({ // From access-control.ts accessControlActor, accessControlNoQueuesActor, + // From start-stop-race.ts + startStopRaceActor, + lifecycleObserver, + // From conn-error-serialization.ts + connErrorSerializationActor, }, }); diff --git a/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/start-stop-race.ts b/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/start-stop-race.ts new file mode 100644 index 0000000000..9fad609233 --- /dev/null +++ b/rivetkit-typescript/packages/rivetkit/fixtures/driver-test-suite/start-stop-race.ts @@ -0,0 +1,71 @@ +import { actor } from "rivetkit"; + +/** + * Actor designed to test start/stop race conditions. + * Has a slow initialization to make race conditions easier to trigger. + */ +export const startStopRaceActor = actor({ + state: { + initialized: false, + startTime: 0, + destroyCalled: false, + startCompleted: false, + }, + onWake: async (c) => { + c.state.startTime = Date.now(); + + // Simulate slow initialization to create window for race condition + await new Promise((resolve) => setTimeout(resolve, 100)); + + c.state.initialized = true; + c.state.startCompleted = true; + }, + onDestroy: (c) => { + c.state.destroyCalled = true; + // Don't save state here - the actor framework will save it automatically + }, + actions: { + getState: (c) => { + return { + initialized: c.state.initialized, + startTime: c.state.startTime, + destroyCalled: c.state.destroyCalled, + startCompleted: c.state.startCompleted, + }; + }, + ping: (c) => { + return "pong"; + }, + destroy: (c) => { + c.destroy(); + }, + }, +}); + +/** + * Observer actor to track lifecycle events from other actors + */ +export const lifecycleObserver = actor({ + state: { + events: [] as Array<{ + actorKey: string; + event: string; + timestamp: number; + }>, + }, + actions: { + recordEvent: (c, params: { actorKey: string; event: string }) => { + c.state.events.push({ + actorKey: params.actorKey, + event: params.event, + timestamp: Date.now(), + }); + }, + getEvents: (c) => { + return c.state.events; + }, + clearEvents: (c) => { + c.state.events = []; + }, + }, +}); diff --git a/rivetkit-typescript/packages/rivetkit/src/actor/config.ts b/rivetkit-typescript/packages/rivetkit/src/actor/config.ts index 06a1b77ca5..ae64722afe 100644 --- a/rivetkit-typescript/packages/rivetkit/src/actor/config.ts +++ b/rivetkit-typescript/packages/rivetkit/src/actor/config.ts @@ -208,9 +208,7 @@ export const ActorConfigSchema = z createVarsTimeout: z.number().positive().default(5000), createConnStateTimeout: z.number().positive().default(5000), onConnectTimeout: z.number().positive().default(5000), - // This must be less than engine config > pegboard.actor_stop_threshold onSleepTimeout: z.number().positive().default(5000), - // This must be less than engine config > pegboard.actor_stop_threshold onDestroyTimeout: z.number().positive().default(5000), stateSaveInterval: z.number().positive().default(10_000), actionTimeout: z.number().positive().default(60_000), diff --git a/rivetkit-typescript/packages/rivetkit/src/actor/instance/mod.ts b/rivetkit-typescript/packages/rivetkit/src/actor/instance/mod.ts index 197cc6948d..41d0d048f0 100644 --- a/rivetkit-typescript/packages/rivetkit/src/actor/instance/mod.ts +++ b/rivetkit-typescript/packages/rivetkit/src/actor/instance/mod.ts @@ -195,6 +195,19 @@ export class ActorInstance< // MARK: - Tracing #traces!: Traces; + // MARK: - Driver Overrides + /** + * Per-instance config option overrides applied by the driver after creation. + * When set, the effective option value is the minimum of the base config + * value and the override value. + */ + overrides: { + onSleepTimeout?: number; + onDestroyTimeout?: number; + runStopTimeout?: number; + waitUntilTimeout?: number; + } = {}; + // MARK: - Constructor constructor(config: ActorConfig) { this.#config = config; @@ -495,7 +508,7 @@ export class ActorInstance< } catch { } // Wait for run handler to complete - await this.#waitForRunHandler(this.#config.options.runStopTimeout); + await this.#waitForRunHandler(this.overrides.runStopTimeout !== undefined ? Math.min(this.#config.options.runStopTimeout, this.overrides.runStopTimeout) : this.#config.options.runStopTimeout); // Call onStop lifecycle if (mode === "sleep") { @@ -511,7 +524,7 @@ export class ActorInstance< // Wait for background tasks await this.#waitBackgroundPromises( - this.#config.options.waitUntilTimeout, + this.overrides.waitUntilTimeout !== undefined ? Math.min(this.#config.options.waitUntilTimeout, this.overrides.waitUntilTimeout) : this.#config.options.waitUntilTimeout, ); // Clear timeouts and save state @@ -1265,7 +1278,7 @@ export class ActorInstance< if (result instanceof Promise) { await deadline( result, - this.#config.options.onSleepTimeout, + this.overrides.onSleepTimeout !== undefined ? Math.min(this.#config.options.onSleepTimeout, this.overrides.onSleepTimeout) : this.#config.options.onSleepTimeout, ); } }, @@ -1297,7 +1310,7 @@ export class ActorInstance< if (result instanceof Promise) { await deadline( result, - this.#config.options.onDestroyTimeout, + this.overrides.onDestroyTimeout !== undefined ? Math.min(this.#config.options.onDestroyTimeout, this.overrides.onDestroyTimeout) : this.#config.options.onDestroyTimeout, ); } }, diff --git a/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/mod.ts b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/mod.ts index cf3590672e..98db320e1a 100644 --- a/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/mod.ts +++ b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/mod.ts @@ -17,6 +17,7 @@ import { runActorConnTests } from "./tests/actor-conn"; import { runActorConnHibernationTests } from "./tests/actor-conn-hibernation"; import { runActorConnStateTests } from "./tests/actor-conn-state"; import { runActorDbTests } from "./tests/actor-db"; +import { runConnErrorSerializationTests } from "./tests/conn-error-serialization"; import { runActorDestroyTests } from "./tests/actor-destroy"; import { runActorDriverTests } from "./tests/actor-driver"; import { runActorErrorHandlingTests } from "./tests/actor-error-handling"; @@ -111,6 +112,8 @@ export function runDriverTests( runActorConnHibernationTests(driverTestConfig); + runConnErrorSerializationTests(driverTestConfig); + runActorDbTests(driverTestConfig); runActorDestroyTests(driverTestConfig); diff --git a/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-driver.ts b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-driver.ts index 438348285a..efa2d96cd9 100644 --- a/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-driver.ts +++ b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-driver.ts @@ -1,5 +1,6 @@ import { describe } from "vitest"; import type { DriverTestConfig } from "../mod"; +import { runActorLifecycleTests } from "./actor-lifecycle"; import { runActorScheduleTests } from "./actor-schedule"; import { runActorSleepTests } from "./actor-sleep"; import { runActorStateTests } from "./actor-state"; @@ -14,5 +15,8 @@ export function runActorDriverTests(driverTestConfig: DriverTestConfig) { // Run actor sleep tests runActorSleepTests(driverTestConfig); + + // Run actor lifecycle tests + runActorLifecycleTests(driverTestConfig); }); } diff --git a/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-lifecycle.ts b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-lifecycle.ts new file mode 100644 index 0000000000..7333cfa977 --- /dev/null +++ b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/actor-lifecycle.ts @@ -0,0 +1,157 @@ +import { describe, expect, test } from "vitest"; +import type { DriverTestConfig } from "../mod"; +import { setupDriverTest } from "../utils"; + +export function runActorLifecycleTests(driverTestConfig: DriverTestConfig) { + describe("Actor Lifecycle Tests", () => { + test("actor stop during start waits for start to complete", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-stop-during-start-${Date.now()}`; + + // Create actor - this starts the actor + const actor = client.startStopRaceActor.getOrCreate([actorKey]); + + // Immediately try to call an action and then destroy + // This creates a race where the actor might not be fully started yet + const pingPromise = actor.ping(); + + // Get actor ID + const actorId = await actor.resolve(); + + // Destroy immediately while start might still be in progress + await actor.destroy(); + + // The ping should still complete successfully because destroy waits for start + const result = await pingPromise; + expect(result).toBe("pong"); + + // Verify actor was actually destroyed + let destroyed = false; + try { + await client.startStopRaceActor.getForId(actorId).ping(); + } catch (err: any) { + destroyed = true; + expect(err.group).toBe("actor"); + expect(err.code).toBe("not_found"); + } + expect(destroyed).toBe(true); + }); + + test("actor stop before actor instantiation completes cleans up handler", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-stop-before-instantiation-${Date.now()}`; + + // Create multiple actors rapidly to increase chance of race + const actors = Array.from({ length: 5 }, (_, i) => + client.startStopRaceActor.getOrCreate([ + `${actorKey}-${i}`, + ]), + ); + + // Resolve all actor IDs (this triggers start) + const ids = await Promise.all(actors.map((a) => a.resolve())); + + // Immediately destroy all actors + await Promise.all(actors.map((a) => a.destroy())); + + // Verify all actors were cleaned up + for (const id of ids) { + let destroyed = false; + try { + await client.startStopRaceActor.getForId(id).ping(); + } catch (err: any) { + destroyed = true; + expect(err.group).toBe("actor"); + expect(err.code).toBe("not_found"); + } + expect(destroyed, `actor ${id} should be destroyed`).toBe( + true, + ); + } + }); + + test("onBeforeActorStart completes before stop proceeds", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-before-actor-start-${Date.now()}`; + + // Create actor + const actor = client.startStopRaceActor.getOrCreate([actorKey]); + + // Call action to ensure actor is starting + const statePromise = actor.getState(); + + // Destroy immediately + await actor.destroy(); + + // State should be initialized because onBeforeActorStart must complete + const state = await statePromise; + expect(state.initialized).toBe(true); + expect(state.startCompleted).toBe(true); + }); + + test("multiple rapid create/destroy cycles handle race correctly", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + // Perform multiple rapid create/destroy cycles + for (let i = 0; i < 10; i++) { + const actorKey = `test-rapid-cycle-${Date.now()}-${i}`; + const actor = client.startStopRaceActor.getOrCreate([ + actorKey, + ]); + + // Trigger start + const resolvePromise = actor.resolve(); + + // Immediately destroy + const destroyPromise = actor.destroy(); + + // Both should complete without errors + await Promise.all([resolvePromise, destroyPromise]); + } + + // If we get here without errors, the race condition is handled correctly + expect(true).toBe(true); + }); + + test("actor stop called with no actor instance cleans up handler", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-cleanup-no-instance-${Date.now()}`; + + // Create and immediately destroy + const actor = client.startStopRaceActor.getOrCreate([actorKey]); + const id = await actor.resolve(); + await actor.destroy(); + + // Try to recreate with same key - should work without issues + const newActor = client.startStopRaceActor.getOrCreate([ + actorKey, + ]); + const result = await newActor.ping(); + expect(result).toBe("pong"); + + // Clean up + await newActor.destroy(); + }); + + test("onDestroy is called even when actor is destroyed during start", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-ondestroy-during-start-${Date.now()}`; + + // Create actor + const actor = client.startStopRaceActor.getOrCreate([actorKey]); + + // Start and immediately destroy + const statePromise = actor.getState(); + await actor.destroy(); + + // Verify onDestroy was called (requires actor to be started) + const state = await statePromise; + expect(state.destroyCalled).toBe(true); + }); + }); +} diff --git a/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/conn-error-serialization.ts b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/conn-error-serialization.ts new file mode 100644 index 0000000000..e5ccf1ef23 --- /dev/null +++ b/rivetkit-typescript/packages/rivetkit/src/driver-test-suite/tests/conn-error-serialization.ts @@ -0,0 +1,64 @@ +import { describe, expect, test } from "vitest"; +import type { DriverTestConfig } from "../mod"; +import { setupDriverTest } from "../utils"; + +export function runConnErrorSerializationTests(driverTestConfig: DriverTestConfig) { + describe("Connection Error Serialization Tests", () => { + test("error thrown in createConnState preserves group and code through WebSocket serialization", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-error-serialization-${Date.now()}`; + + // Create actor handle with params that will trigger error in createConnState + const actor = client.connErrorSerializationActor.getOrCreate( + [actorKey], + { params: { shouldThrow: true } }, + ); + + // Try to connect, which will trigger error in createConnState + const conn = actor.connect(); + + // Wait for connection to fail + let caughtError: any; + try { + // Try to call an action, which should fail because connection couldn't be established + await conn.getValue(); + } catch (err) { + caughtError = err; + } + + // Verify the error was caught + expect(caughtError).toBeDefined(); + + // Verify the error has the correct group and code from the original error + // Original error: new CustomConnectionError("...") with group="connection", code="custom_error" + expect(caughtError.group).toBe("connection"); + expect(caughtError.code).toBe("custom_error"); + + // Clean up + await conn.dispose(); + }); + + test("successful createConnState does not throw error", async (c) => { + const { client } = await setupDriverTest(c, driverTestConfig); + + const actorKey = `test-no-error-${Date.now()}`; + + // Create actor handle with params that will NOT trigger error + const actor = client.connErrorSerializationActor.getOrCreate( + [actorKey], + { params: { shouldThrow: false } }, + ); + + // Connect without triggering error + const conn = actor.connect(); + + // This should succeed + const value = await conn.getValue(); + expect(value).toBe(0); + + // Clean up + await conn.dispose(); + }); + }); +} diff --git a/rivetkit-typescript/packages/rivetkit/src/drivers/engine/actor-driver.ts b/rivetkit-typescript/packages/rivetkit/src/drivers/engine/actor-driver.ts index fb3b50f133..4adb8959fa 100644 --- a/rivetkit-typescript/packages/rivetkit/src/drivers/engine/actor-driver.ts +++ b/rivetkit-typescript/packages/rivetkit/src/drivers/engine/actor-driver.ts @@ -156,7 +156,7 @@ export class EngineActorDriver implements ActorDriver { onConnected: () => { this.#runnerStarted.resolve(undefined); }, - onDisconnected: (_code, _reason) => {}, + onDisconnected: (_code, _reason) => { }, onShutdown: () => { this.#runnerStopped.resolve(undefined); this.#isRunnerStopped = true; @@ -395,7 +395,7 @@ export class EngineActorDriver implements ActorDriver { async serverlessHandleStart(c: HonoContext): Promise { return streamSSE(c, async (stream) => { // NOTE: onAbort does not work reliably - stream.onAbort(() => {}); + stream.onAbort(() => { }); c.req.raw.signal.addEventListener("abort", () => { logger().debug("SSE aborted, shutting down runner"); @@ -497,8 +497,28 @@ export class EngineActorDriver implements ActorDriver { // Create actor instance const definition = lookupInRegistry(this.#config, actorConfig.name); + handler.actor = await definition.instantiate(); + // Apply protocol limits as per-instance overrides without mutating the shared definition + const protocolMetadata = this.#runner.getProtocolMetadata(); + if (protocolMetadata) { + logger().debug({ + msg: "applying config limits from protocol", + protocolMetadata, + }); + + const stopThresholdMax = Math.max(Number(protocolMetadata.actorStopThreshold) - 1000, 0); + handler.actor.overrides.onSleepTimeout = stopThresholdMax; + handler.actor.overrides.onDestroyTimeout = stopThresholdMax; + + if (protocolMetadata.serverlessDrainGracePeriod) { + const drainMax = Math.max(Number(protocolMetadata.serverlessDrainGracePeriod) - 1000, 0); + handler.actor.overrides.runStopTimeout = drainMax; + handler.actor.overrides.waitUntilTimeout = drainMax; + } + } + // Start actor await handler.actor.start( this, @@ -514,9 +534,9 @@ export class EngineActorDriver implements ActorDriver { const error = innerError instanceof Error ? new Error( - `Failed to start actor ${actorId}: ${innerError.message}`, - { cause: innerError }, - ) + `Failed to start actor ${actorId}: ${innerError.message}`, + { cause: innerError }, + ) : new Error(`Failed to start actor ${actorId}: ${String(innerError)}`); handler.actor = undefined; handler.actorStartError = error; @@ -559,15 +579,26 @@ export class EngineActorDriver implements ActorDriver { this.#actorStopIntent.delete(actorId); const handler = this.#actors.get(actorId); - if (handler?.actorStartPromise) { - const startError = - handler.actorStartError ?? - new Error(`Actor ${actorId} stopped before start completed`); - handler.actorStartError = startError; - handler.actorStartPromise.reject(startError); - handler.actorStartPromise = undefined; + if (!handler) { + logger().debug({ msg: "no runner actor handler to stop", actorId, reason }); + return; } - if (handler?.actor) { + + if (handler.actorStartPromise) { + try { + logger().debug({ msg: "runner actor stopping before it started, waiting", actorId, generation }); + await handler.actorStartPromise.promise; + } catch (err) { + // Start failed, but we still want to clean up the handler + logger().debug({ + msg: "actor start failed during stop, cleaning up handler", + actorId, + err: stringifyError(err), + }); + } + } + + if (handler.actor) { try { await handler.actor.onStop(reason); } catch (err) { @@ -577,7 +608,8 @@ export class EngineActorDriver implements ActorDriver { }); } } - if (handler) this.#actors.delete(actorId); + + this.#actors.delete(actorId); logger().debug({ msg: "runner actor stopped", actorId, reason }); } @@ -762,7 +794,7 @@ export class EngineActorDriver implements ActorDriver { entry.bufferedMessageSize >= CONN_BUFFERED_MESSAGE_SIZE_THRESHOLD ) { - // Reset buffered message size immeidatley (instead + // Reset buffered message size immediately (instead // of waiting for onAfterPersistConn) since we may // receive more messages before onAfterPersistConn // is called, which would called saveState diff --git a/scripts/misc/endian-converter.ts b/scripts/debug/endian-converter.ts similarity index 100% rename from scripts/misc/endian-converter.ts rename to scripts/debug/endian-converter.ts