diff --git a/.github/store-perf-thresholds.json b/.github/store-perf-thresholds.json new file mode 100644 index 0000000000..1a354c1c77 --- /dev/null +++ b/.github/store-perf-thresholds.json @@ -0,0 +1,21 @@ +{ + "SvDsoStoreIngestionPerformanceTest": { + "splice_perf_ingestion_total_time_ns": 600000000000 + }, + + "ScanStoreIngestionPerformanceTest": { + "splice_perf_ingestion_total_time_ns": 600000000000 + }, + + "UpdateHistoryIngestionPerformanceTest": { + "splice_perf_ingestion_total_time_ns": 1800000000000 + }, + + "UpdateHistoryReadPerformanceTest-getUpdate": { + "splice_perf_read_total_time_ns": 5000000000 + }, + + "UpdateHistoryReadPerformanceTest-encodeUpdate": { + "splice_perf_read_total_time_ns": 5000000000 + } +} diff --git a/.github/workflows/performance_tests.yml b/.github/workflows/performance_tests.yml index 3f6bc01550..4408415bde 100644 --- a/.github/workflows/performance_tests.yml +++ b/.github/workflows/performance_tests.yml @@ -69,6 +69,20 @@ jobs: run: | python3 scripts/performance/push_store_ingestion_perf_metrics.py + - name: Upload ingestion metrics artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: store-ingestion-perf-metrics-${{ github.run_id }} + path: /tmp/store-ingestion-perf-metrics + if-no-files-found: ignore + + - name: Check ingestion thresholds + id: perf_check + if: ${{ success() && github.event_name == 'schedule' }} + run: | + python3 scripts/performance/check_perf_thresholds.py /tmp/store-ingestion-perf-metrics || true + # sanity check - name: Check logs for errors uses: ./splice-shared-gha/.github/actions/sbt/execute_sbt_command @@ -86,10 +100,10 @@ jobs: name: "logs-ingestion-performance-tests" - name: Report Failures on Slack & Github - if: failure() && github.event_name == 'schedule' + if: ${{ github.event_name == 'schedule' && (failure() || steps.perf_check.outputs.has_breaches == 'true') }} uses: ./splice-shared-gha/.github/actions/tests/failure_notifications with: - job_subname: "Daily Store Ingestion Performance Tests" + job_subname: "Ingestion Tests" workload_identity_provider: "${{ secrets.GOOGLE_WORKLOAD_IDENTITY_PROVIDER }}" service_account: "${{ secrets.FAILURE_NOTIFICATIONS_INVOKER_SA }}" notifications_url: '${{ secrets.FAILURE_NOTIFICATIONS_INVOKER_URL }}' @@ -149,6 +163,20 @@ jobs: run: | python3 scripts/performance/push_store_ingestion_perf_metrics.py /tmp/store-read-perf-metrics + - name: Upload read metrics artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: store-read-perf-metrics-${{ github.run_id }} + path: /tmp/store-read-perf-metrics + if-no-files-found: ignore + + - name: Check read thresholds + id: perf_check + if: ${{ success() && github.event_name == 'schedule' }} + run: | + python3 scripts/performance/check_perf_thresholds.py /tmp/store-read-perf-metrics || true + # sanity check - name: Check logs for errors uses: ./splice-shared-gha/.github/actions/sbt/execute_sbt_command @@ -163,10 +191,10 @@ jobs: name: "logs-read-performance-tests" - name: Report Failures on Slack & Github - if: failure() && github.event_name == 'schedule' + if: ${{ github.event_name == 'schedule' && (failure() || steps.perf_check.outputs.has_breaches == 'true') }} uses: ./splice-shared-gha/.github/actions/tests/failure_notifications with: - job_subname: "Daily Store Read Performance Tests" + job_subname: "Read Tests" workload_identity_provider: "${{ secrets.GOOGLE_WORKLOAD_IDENTITY_PROVIDER }}" service_account: "${{ secrets.FAILURE_NOTIFICATIONS_INVOKER_SA }}" notifications_url: '${{ secrets.FAILURE_NOTIFICATIONS_INVOKER_URL }}' diff --git a/scripts/performance/check_perf_thresholds.py b/scripts/performance/check_perf_thresholds.py new file mode 100644 index 0000000000..567461612c --- /dev/null +++ b/scripts/performance/check_perf_thresholds.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Compare perf metrics against the thresholds and write the result to GITHUB_OUTPUT: + - reads every metrics.json in the given dirs + - compares each metric to its thresholds in `.github/store-perf-thresholds.json` + - writes information of the breaches to GITHUB_OUTPUT so downstream steps can branch on them + - appends a Markdown summary of breaches to GITHUB_STEP_SUMMARY for visibility in GHA UI which is linked from GH issue + +Usage: + python3 check_perf_thresholds.py + +""" + +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path +from typing import Iterable + +DEFAULT_THRESHOLDS = Path(".github/store-perf-thresholds.json") +NEWLINE = "\n" +GITHUB_OUTPUT_HEREDOC_DELIM = "EOF_PERF_THRESHOLDS" + + +def load_thresholds(path: Path) -> dict: + with open(path) as f: + data = json.load(f) + if not isinstance(data, dict): + raise ValueError(f"thresholds file {path} must be a JSON object") + return {k: v for k, v in data.items() } + + +def iter_metric_files(dirs: Iterable[Path]) -> Iterable[Path]: + for d in dirs: + if not d.is_dir(): + print(f"warning: metrics dir {d} does not exist, skipping", file=sys.stderr) + continue + yield from sorted(d.glob("*.json")) + + +def get_metric_value(data: dict, name: str) -> float | None: + for m in data.get("metrics", []): + if m.get("name") == name: + try: + return float(m["value"]) + except (TypeError, ValueError): + return None + return None + + +def append_step_output(key: str, value: str) -> None: + """Append `key=value` to $GITHUB_OUTPUT so later steps can read it via + `steps..outputs.`. + """ + out_path = os.environ.get("GITHUB_OUTPUT") + if not out_path: + print( + "info: GITHUB_OUTPUT not set (running outside GitHub Actions?); " + f"skipping step output '{key}'", + file=sys.stderr, + ) + return + try: + with open(out_path, "a") as f: + if NEWLINE in value: + f.write(f"{key}<<{GITHUB_OUTPUT_HEREDOC_DELIM}\n{value}\n{GITHUB_OUTPUT_HEREDOC_DELIM}\n") + else: + f.write(f"{key}={value}\n") + except OSError as e: + print( f"warning: could not write GITHUB_OUTPUT ({out_path}): {e}", file=sys.stderr) + + +def append_step_summary(lines: list[str]) -> None: + """Append a Markdown to GitHub Actions' step summary. + """ + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + print( + "info: GITHUB_STEP_SUMMARY not set (running outside GitHub Actions?); " + "skipping run-page summary", + file=sys.stderr, + ) + return + try: + with open(summary_path, "a") as f: + f.write("\n".join(lines) + "\n") + except OSError as e: + print( f"warning: could not write GITHUB_STEP_SUMMARY ({summary_path}): {e}", file=sys.stderr ) + + +def evaluate_perf_threshold_breaches( metric_dirs: list[Path], thresholds: dict ) -> tuple[list[str], int]: + """Compare every metrics.json under `metric_dirs` to `thresholds`. + Returns (breach_lines, files_seen). + """ + breaches: list[str] = [] + files_seen = 0 + + for f in iter_metric_files(metric_dirs): + files_seen += 1 + try: + data = json.loads(f.read_text()) + except Exception as e: + print(f"warning: cannot parse {f}: {e}", file=sys.stderr) + continue + + test_name = data.get("test_name") or f.stem + rules = thresholds.get(test_name) + if not rules: + print(f"info: no thresholds configured for '{test_name}', skipping ({f.name})") + continue + + for metric_name, raw_threshold in rules.items(): + try: + threshold = float(raw_threshold) + except (TypeError, ValueError): + print( + f"warning: rule {test_name}::{metric_name} has non-numeric " + f"value {raw_threshold}, skipping", + file=sys.stderr, + ) + continue + + observed = get_metric_value(data, metric_name) + if observed is None: + print( + f"warning: metric '{metric_name}' not in {f.name} for '{test_name}'", + file=sys.stderr, + ) + continue + + if observed <= threshold: + print(f"OK {test_name} :: {metric_name} = {observed:.0f} (<= {threshold:.0f})") + continue + + pct = ((observed - threshold) / threshold * 100.0) if threshold else 0.0 + line = ( + f"BREACH {test_name} :: {metric_name} = {observed:.0f} " + f"(> {threshold:.0f}, +{pct:.2f}%)" + ) + print(line) + breaches.append(line) + + return breaches, files_seen + + +def main() -> int: + if len(sys.argv) < 2: + print( "warning: no metrics dir given; nothing to check", file=sys.stderr) + + thresholds_file = Path(str(DEFAULT_THRESHOLDS)) + thresholds = load_thresholds(thresholds_file) + metric_dirs = [Path(p) for p in sys.argv[1:]] + + breaches, files_seen = evaluate_perf_threshold_breaches(metric_dirs, thresholds) + + print(f"Done. files_seen={files_seen} breaches={len(breaches)}") + + # Breach signaling is done via GITHUB_OUTPUT + # downstream steps can branch on `steps..outputs.has_breaches` + append_step_output("has_breaches", "true" if breaches else "false") + append_step_output("breach_count", str(len(breaches))) + if breaches: + append_step_output("breaches", "\n".join(breaches)) + append_step_summary( + ["## Performance threshold breaches", "", "```", *breaches, "```"] + ) + +if __name__ == "__main__": + sys.exit(main())