google · pratt4 · Jun 1, 2025 · Jun 2, 2025 · Jun 16, 2025 · Jun 16, 2025
diff --git a/e2e-tests/controller-spark/controller_spark_sql_validation.sh b/e2e-tests/controller-spark/controller_spark_sql_validation.sh
@@ -20,6 +20,11 @@
 
 set -e
 
+# -------------------------------------------------------------------
+# Shared helper for robust Parquet row-count with retry/back-off
+# -------------------------------------------------------------------
+source "$(dirname "$0")/../lib/parquet_utils.sh"
+
 #################################################
 # Prints the usage
 #################################################
@@ -190,7 +195,7 @@ function wait_for_completion() {
   local runtime="15 minute"
   local end_time=$(date -ud "$runtime" +%s)
 
-  while [[ $(date -u +%s) -le $end_time ]]
+  while [[ $(date -u +%s) -le ${end_time} ]]
   do
     local pipeline_status=$(curl --location --request GET "${PIPELINE_CONTROLLER_URL}/status?" \
     --connect-timeout 5 \
@@ -238,25 +243,51 @@ function check_parquet() {
   fi
 
   # check whether output directory has received parquet files.
-  if [[ "$(ls -A $output)" ]]
+  if [[ "$(ls -A "${output}")" ]]
   then
-    local total_patients=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
-    "${output}/*/Patient/" | awk '{print $3}')
-    local total_encounters=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
-    "${output}/*/Encounter/" | awk '{print $3}')
-    local total_observations=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
-    "${output}/*/Observation/" | awk '{print $3}')
-
-    local total_patient_flat=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
-    "${output}/*/VIEWS_TIMESTAMP_*/patient_flat/" | awk '{print $3}')
-    local total_encounter_flat=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
-    "${output}/*/VIEWS_TIMESTAMP_*/encounter_flat/" | awk '{print $3}')
-    local total_obs_flat=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
-     "${output}/*/VIEWS_TIMESTAMP_*/observation_flat/" | awk '{print $3}')
-
-    print_message "Total patients: $total_patients"
-    print_message "Total encounters: $total_encounters"
-    print_message "Total observations: $total_observations"
+    # ------------------------------------------------------------------
+    # Row-counts with retry (shared helper)
+    # ------------------------------------------------------------------
+    local total_patients
+    total_patients=$(retry_rowcount \
+      "${output}/*/Patient/" \
+      "${TOTAL_TEST_PATIENTS}" \
+      "patients") || true
+
+    local total_encounters
+    total_encounters=$(retry_rowcount \
+      "${output}/*/Encounter/" \
+      "${TOTAL_TEST_ENCOUNTERS}" \
+      "encounters") || true
+
+    local total_observations
+    total_observations=$(retry_rowcount \
+      "${output}/*/Observation/" \
+      "${TOTAL_TEST_OBS}" \
+      "observations") || true
+
+    local total_patient_flat
+    total_patient_flat=$(retry_rowcount \
+      "${output}/*/VIEWS_TIMESTAMP_*/patient_flat/" \
+      "${TOTAL_VIEW_PATIENTS}" \
+      "patient_flat") || true
+
+    local total_encounter_flat
+    total_encounter_flat=$(retry_rowcount \
+      "${output}/*/VIEWS_TIMESTAMP_*/encounter_flat/" \
+      "${TOTAL_TEST_ENCOUNTERS}" \
+      "encounter_flat") || true
+
+    local total_obs_flat
+    total_obs_flat=$(retry_rowcount \
+      "${output}/*/VIEWS_TIMESTAMP_*/observation_flat/":"${output}/*/observation_flat/" \
+      "${TOTAL_TEST_OBS}" \
+      "observation_flat") || true
+    # ------------------------------------------------------------------
+
+    print_message "Total patients: ${total_patients}"
+    print_message "Total encounters: ${total_encounters}"
+    print_message "Total observations: ${total_observations}"
 
     print_message "Total patient flat rows: ${total_patient_flat}"
     print_message "Total encounter flat rows: ${total_encounter_flat}"
@@ -271,12 +302,12 @@ function check_parquet() {
             print_message "Pipeline transformation successfully completed."
     else
             print_message "Mismatch in count of records"
-            print_message "Actual total patients: $total_patients, expected total: $TOTAL_TEST_PATIENTS"
-            print_message "Actual total encounters: $total_encounters, expected total: $TOTAL_TEST_ENCOUNTERS"
-            print_message "Total observations: $total_observations, expected total: $TOTAL_TEST_OBS"
-            print_message "Actual total materialized view patients: $total_patient_flat, expected total: $TOTAL_VIEW_PATIENTS"
-            print_message "Actual total materialized view encounters: $total_encounter_flat, expected total: $TOTAL_TEST_ENCOUNTERS"
-            print_message "Actual total materialized view observations: $total_obs_flat, expected total: $TOTAL_TEST_OBS"
+            print_message "Actual total patients: ${total_patients}, expected total: ${TOTAL_TEST_PATIENTS}"
+            print_message "Actual total encounters: ${total_encounters}, expected total: ${TOTAL_TEST_ENCOUNTERS}"
+            print_message "Total observations: ${total_observations}, expected total: ${TOTAL_TEST_OBS}"
+            print_message "Actual total materialized view patients: ${total_patient_flat}, expected total: ${TOTAL_VIEW_PATIENTS}"
+            print_message "Actual total materialized view encounters: ${total_encounter_flat}, expected total: ${TOTAL_TEST_ENCOUNTERS}"
+            print_message "Actual total materialized view observations: ${total_obs_flat}, expected total: ${TOTAL_TEST_OBS}"
             exit 2
     fi
   else
@@ -292,7 +323,7 @@ function check_parquet() {
 #   PARQUET_SUBDIR
 #######################################################################
 function clear() {
-  rm -rf $HOME_PATH/$PARQUET_SUBDIR/*.json
+  rm -rf "${HOME_PATH}/${PARQUET_SUBDIR}"/*.json
 }
 
 #######################################################################

diff --git a/e2e-tests/lib/parquet_utils.sh b/e2e-tests/lib/parquet_utils.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+# Utility: robust Parquet row-count with retry/back-off.
+# Source from validation scripts.
+
+set -euo pipefail
+
+# retry_rowcount <globs> <expected> <label>
+#   globs     – colon-separated shell globs to Parquet folders
+#   expected  – integer row count we expect to see
+#   label     – metric name for log messages
+#
+# Prints the final count on stdout.
+# Returns 0 if expected count is reached; 1 otherwise.
+
+retry_rowcount() {
+  local globs="$1"
+  local expected="$2"
+  local label="$3"
+
+  # Allow CI to override retry cadence without touching code
+  local max_retries=${ROWCOUNT_MAX_RETRIES:-5}
+  local sleep_secs=${ROWCOUNT_SLEEP_SECS:-5}
+
+  local retries=0
+  local raw_count=0
+  local final_count=0
+
+  IFS=':' read -r -a paths <<<"${globs}"
+
+  while true; do
+    raw_count=0
+
+    # ── 1. Find a path that actually contains files
+    for p in "${paths[@]}"; do
+      shopt -s nullglob
+      local files=( "${p}" )
+      shopt -u nullglob
+
+      if [[ ${#files[@]} -gt 0 ]]; then
+        raw_count=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
+                    "${p}" 2>/dev/null | awk '{print $3}')
+        break
+      fi
+    done
+
+    # ── 2. Normalise raw_count
+    if [[ -z "${raw_count}" || ! "${raw_count}" =~ ^[0-9]+$ ]]; then
+      final_count=0
+    else
+      final_count="${raw_count}"
+    fi
+
+    # ── 3. Success?
+    if [[ "${final_count}" -eq "${expected}" ]]; then
+      echo "${final_count}"
+      return 0
+    fi
+
+    # ── 4.Optional Fast-fail if no files ever matched on the *first* pass -- this can be implemented in future
+
+
+    # ── 5. Give up?
+    if [[ "${retries}" -ge "${max_retries}" ]]; then
+      echo "${final_count}"
+      return 1
+    fi
+
+    # ── 6. Sleep & retry
+    retries=$((retries + 1))
+    echo "E2E TEST: [${label}] raw=${raw_count}, expected=${expected} — retry ${retries}/${max_retries} in ${sleep_secs}s" >&2
+    sleep "${sleep_secs}"
+  done
+}