diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 055a0b95b..433880d1a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -935,6 +935,22 @@ jobs: - name: Lint Helm chart run: helm lint helm/hindsight + test-standalone-start-script: + needs: [detect-changes] + if: >- + (github.event_name == 'workflow_dispatch' || + needs.detect-changes.outputs.docker == 'true' || + needs.detect-changes.outputs.ci == 'true') + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + with: + ref: ${{ github.event.pull_request.head.sha || '' }} + + - name: Run standalone start script tests + run: bash docker/standalone/test-start-all.sh + build-docker-images: needs: [detect-changes] if: >- @@ -3651,6 +3667,7 @@ jobs: - build-docs - test-rust-cli - lint-helm-chart + - test-standalone-start-script - build-docker-images - test-api - test-api-oracle diff --git a/docker/standalone/start-all.sh b/docker/standalone/start-all.sh index b5f163e6c..045db38e1 100755 --- a/docker/standalone/start-all.sh +++ b/docker/standalone/start-all.sh @@ -10,19 +10,45 @@ set -e # loss scenarios where a container restart caused the data directory to be # wiped despite a volume mount being present. # ============================================================================= -PG0_DATA_DIR="${HOME}/.pg0" -if [ -d "$PG0_DATA_DIR" ]; then +pg0_has_pg_version() { + local pg0_data_dir="$1" + + # pg0 has used more than one on-disk layout. Newer standalone images keep + # PostgreSQL data under instances//data, while older volumes may have + # placed PG_VERSION at or one level below the mount. + [ -f "$pg0_data_dir/PG_VERSION" ] && return 0 + compgen -G "$pg0_data_dir"/*/PG_VERSION > /dev/null 2>&1 && return 0 + compgen -G "$pg0_data_dir"/instances/*/data/PG_VERSION > /dev/null 2>&1 && return 0 + + return 1 +} + +check_pg0_data_integrity() { + local pg0_data_dir="$1" + + if [ ! -d "$pg0_data_dir" ]; then + return 0 + fi + # Look for actual PostgreSQL data directories (pg0 creates subdirs per instance) - if compgen -G "$PG0_DATA_DIR"/*/PG_VERSION > /dev/null 2>&1; then - echo "✅ Existing pg0 data directory detected at $PG0_DATA_DIR" - elif [ "$(ls -A "$PG0_DATA_DIR" 2>/dev/null)" ]; then - echo "⚠️ WARNING: pg0 data directory exists at $PG0_DATA_DIR but no PG_VERSION found." + if pg0_has_pg_version "$pg0_data_dir"; then + echo "✅ Existing pg0 data directory detected at $pg0_data_dir" + elif [ "$(ls -A "$pg0_data_dir" 2>/dev/null)" ]; then + echo "⚠️ WARNING: pg0 data directory exists at $pg0_data_dir but no PG_VERSION found." echo " This may indicate data corruption or an incomplete previous shutdown." echo " If you see all migrations running from scratch after this, your data may have been lost." echo " See: https://github.com/vectorize-io/hindsight/issues/675" fi + + return 0 +} + +if [ "${HINDSIGHT_START_ALL_SOURCE_ONLY:-false}" = "true" ]; then + return 0 2>/dev/null || exit 0 fi +check_pg0_data_integrity "${HOME}/.pg0" + # Service flags (default to true if not set) ENABLE_API="${HINDSIGHT_ENABLE_API:-true}" ENABLE_CP="${HINDSIGHT_ENABLE_CP:-true}" diff --git a/docker/standalone/test-start-all.sh b/docker/standalone/test-start-all.sh new file mode 100755 index 000000000..9f4f27e98 --- /dev/null +++ b/docker/standalone/test-start-all.sh @@ -0,0 +1,73 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +HINDSIGHT_START_ALL_SOURCE_ONLY=true +source "$SCRIPT_DIR/start-all.sh" +unset HINDSIGHT_START_ALL_SOURCE_ONLY + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +assert_contains() { + local output="$1" + local expected="$2" + + if [[ "$output" != *"$expected"* ]]; then + echo "Expected output to contain: $expected" + echo "Actual output:" + echo "$output" + exit 1 + fi +} + +assert_not_contains() { + local output="$1" + local unexpected="$2" + + if [[ "$output" == *"$unexpected"* ]]; then + echo "Expected output not to contain: $unexpected" + echo "Actual output:" + echo "$output" + exit 1 + fi +} + +assert_empty() { + local output="$1" + + if [ -n "$output" ]; then + echo "Expected no output, got:" + echo "$output" + exit 1 + fi +} + +mkdir -p "$TMP_DIR/empty" +assert_empty "$(check_pg0_data_integrity "$TMP_DIR/empty")" + +mkdir -p "$TMP_DIR/direct" +touch "$TMP_DIR/direct/PG_VERSION" +direct_output="$(check_pg0_data_integrity "$TMP_DIR/direct")" +assert_contains "$direct_output" "Existing pg0 data directory detected" +assert_not_contains "$direct_output" "WARNING" + +mkdir -p "$TMP_DIR/legacy/instance" +touch "$TMP_DIR/legacy/instance/PG_VERSION" +legacy_output="$(check_pg0_data_integrity "$TMP_DIR/legacy")" +assert_contains "$legacy_output" "Existing pg0 data directory detected" +assert_not_contains "$legacy_output" "WARNING" + +mkdir -p "$TMP_DIR/nested/instances/hindsight/data" +touch "$TMP_DIR/nested/instances/hindsight/data/PG_VERSION" +nested_output="$(check_pg0_data_integrity "$TMP_DIR/nested")" +assert_contains "$nested_output" "Existing pg0 data directory detected" +assert_not_contains "$nested_output" "WARNING" + +mkdir -p "$TMP_DIR/nonempty/instances/hindsight" +touch "$TMP_DIR/nonempty/instances/hindsight/instance.json" +nonempty_output="$(check_pg0_data_integrity "$TMP_DIR/nonempty")" +assert_contains "$nonempty_output" "WARNING: pg0 data directory exists" + +echo "start-all pg0 integrity checks passed"