diff --git a/cob_datapipeline/gencon_ingest_dag.py b/cob_datapipeline/gencon_ingest_dag.py index f6c2c3a5..269f9be1 100644 --- a/cob_datapipeline/gencon_ingest_dag.py +++ b/cob_datapipeline/gencon_ingest_dag.py @@ -64,9 +64,11 @@ Tasks with custom logic are relegated to individual Python files. """ + +AIRFLOW_HOME = "/opt/airflow" INDEX_GENCON = BashOperator( task_id="index_gencon", - bash_command="/opt/airflow/dags/cob_datapipeline/scripts/ingest_gencon.sh ", + bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/ingest_gencon.sh ", retries=1, env={ "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login, diff --git a/cob_datapipeline/scripts/ingest_gencon.sh b/cob_datapipeline/scripts/ingest_gencon.sh index 7967f4c3..259e3e21 100755 --- a/cob_datapipeline/scripts/ingest_gencon.sh +++ b/cob_datapipeline/scripts/ingest_gencon.sh @@ -14,8 +14,6 @@ export PATH="$AIRFLOW_USER_HOME/.rbenv/shims:$AIRFLOW_USER_HOME/.rbenv/bin:$PATH export SOLR_URL="${SOLR_WEB_URL//\/\////$SOLR_AUTH_USER:$SOLR_AUTH_PASSWORD@}" export GENCON_INDEX_PATH="$PWD/gencon_index" -echo ">>> My Dreictory: $PWD" - # Get the raw CSV files from S3 aws s3 sync $GENCON_CSV_S3 $GENCON_TEMP_PATH --include "*.csv"