From 33f59473d8a7ce671ce6cce866f3237977966d7f Mon Sep 17 00:00:00 2001 From: Steven Ng Date: Fri, 25 Jul 2025 15:33:49 -0700 Subject: [PATCH 1/2] Fix path to ingest script --- cob_datapipeline/gencon_ingest_dag.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cob_datapipeline/gencon_ingest_dag.py b/cob_datapipeline/gencon_ingest_dag.py index f6c2c3a5..47e7209d 100644 --- a/cob_datapipeline/gencon_ingest_dag.py +++ b/cob_datapipeline/gencon_ingest_dag.py @@ -64,9 +64,10 @@ Tasks with custom logic are relegated to individual Python files. """ +AIRFLOW_HOME = "/opt/airflow" INDEX_GENCON = BashOperator( task_id="index_gencon", - bash_command="/opt/airflow/dags/cob_datapipeline/scripts/ingest_gencon.sh ", + bash_command="/dags/cob_datapipeline/scripts/ingest_gencon.sh ", retries=1, env={ "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login, From 4cf9205ec4d5790c8b51c4dcf6fbb3adfee554f6 Mon Sep 17 00:00:00 2001 From: Steven Ng Date: Tue, 12 Aug 2025 10:20:11 -0600 Subject: [PATCH 2/2] Fix path to ingest script - Adds airflow user home path to ingest script path - Removes a debug message from ingest script --- cob_datapipeline/gencon_ingest_dag.py | 3 ++- cob_datapipeline/scripts/ingest_gencon.sh | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cob_datapipeline/gencon_ingest_dag.py b/cob_datapipeline/gencon_ingest_dag.py index 47e7209d..269f9be1 100644 --- a/cob_datapipeline/gencon_ingest_dag.py +++ b/cob_datapipeline/gencon_ingest_dag.py @@ -64,10 +64,11 @@ Tasks with custom logic are relegated to individual Python files. """ + AIRFLOW_HOME = "/opt/airflow" INDEX_GENCON = BashOperator( task_id="index_gencon", - bash_command="/dags/cob_datapipeline/scripts/ingest_gencon.sh ", + bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/ingest_gencon.sh ", retries=1, env={ "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login, diff --git a/cob_datapipeline/scripts/ingest_gencon.sh b/cob_datapipeline/scripts/ingest_gencon.sh index 7967f4c3..259e3e21 100755 --- a/cob_datapipeline/scripts/ingest_gencon.sh +++ b/cob_datapipeline/scripts/ingest_gencon.sh @@ -14,8 +14,6 @@ export PATH="$AIRFLOW_USER_HOME/.rbenv/shims:$AIRFLOW_USER_HOME/.rbenv/bin:$PATH export SOLR_URL="${SOLR_WEB_URL//\/\////$SOLR_AUTH_USER:$SOLR_AUTH_PASSWORD@}" export GENCON_INDEX_PATH="$PWD/gencon_index" -echo ">>> My Dreictory: $PWD" - # Get the raw CSV files from S3 aws s3 sync $GENCON_CSV_S3 $GENCON_TEMP_PATH --include "*.csv"