Bump to 1.10.1

puckel · puckel · commit 42902d01c612 · 2018-11-22T10:52:22.000+01:00
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-# VERSION 1.10.0-5
+# VERSION 1.10.1
 # AUTHOR: Matthieu "Puckel_" Roisil
 # DESCRIPTION: Basic Airflow container
 # BUILD: docker build --rm -t puckel/docker-airflow .
@@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive
 ENV TERM linux
 
 # Airflow
-ARG AIRFLOW_VERSION=1.10.0
+ARG AIRFLOW_VERSION=1.10.1
 ARG AIRFLOW_HOME=/usr/local/airflow
 ARG AIRFLOW_DEPS=""
 ARG PYTHON_DEPS=""
@@ -28,7 +28,6 @@ ENV LC_MESSAGES en_US.UTF-8
 RUN set -ex \
     && buildDeps=' \
         freetds-dev \
-        python3-dev \
         libkrb5-dev \
         libsasl2-dev \
         libssl-dev \
@@ -42,8 +41,6 @@ RUN set -ex \
         $buildDeps \
         freetds-bin \
         build-essential \
-        python3-pip \
-        python3-requests \
         default-libmysqlclient-dev \
         apt-utils \
         curl \
@@ -60,7 +57,7 @@ RUN set -ex \
     && pip install ndg-httpsclient \
     && pip install pyasn1 \
     && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \
-    && pip install 'celery[redis]>=4.1.1,<4.2.0' \
+    && pip install 'redis>=2.10.5,<3' \
     && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \
     && apt-get purge --auto-remove -yqq $buildDeps \
     && apt-get autoremove -yqq --purge \
diff --git a/config/airflow.cfg b/config/airflow.cfg
@@ -39,6 +39,7 @@ simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
 # we need to escape the curly braces by adding an additional curly brace
 log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
 log_processor_filename_template = {{ filename }}.log
+dag_processor_manager_log_location = /usr/local/airflow/logs/dag_processor_manager/dag_processor_manager.log
 
 # Hostname by providing a path to a callable, which will resolve the hostname
 hostname_callable = socket:getfqdn
@@ -59,6 +60,9 @@ executor = SequentialExecutor
 # If SqlAlchemy should pool database connections.
 sql_alchemy_pool_enabled = True
 
+# The encoding for the databases
+sql_engine_encoding = utf-8
+
 # The SqlAlchemy pool size is the maximum number of database connections
 # in the pool. 0 indicates no limit.
 sql_alchemy_pool_size = 5
@@ -73,6 +77,10 @@ sql_alchemy_pool_recycle = 1800
 # disconnects. Setting this to 0 disables retries.
 sql_alchemy_reconnect_timeout = 300
 
+# The schema to use for the metadata database
+# SqlAlchemy supports databases with the concept of multiple schemas.
+sql_alchemy_schema =
+
 # The amount of parallelism as a setting to the executor. This defines
 # the max number of task instances that should run simultaneously
 # on this airflow installation
@@ -142,6 +150,9 @@ killed_task_cleanup_time = 60
 # `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
 dag_run_conf_overrides_params = False
 
+# Worker initialisation check to validate Metadata Database connection
+worker_precheck = False
+
 [cli]
 # In what way should the cli access the API. The LocalClient will use the
 # database directly, while the json_client will use the api running on the
@@ -180,6 +191,9 @@ default_gpus = 0
 [hive]
 # Default mapreduce queue for HiveOperator tasks
 default_hive_mapred_queue =
+# Template for mapred_job_name in HiveOperator, supports the following named parameters:
+# hostname, dag_id, task_id, execution_date
+mapred_job_name_template = Airflow HiveOperator task for {hostname}.{dag_id}.{task_id}.{execution_date}
 
 [webserver]
 # The base url of your website as airflow cannot guess what domain or
@@ -227,7 +241,10 @@ access_logfile = -
 error_logfile = -
 
 # Expose the configuration file in the web server
-expose_config = False
+# This is only applicable for the flask-admin based web UI (non FAB-based).
+# In the FAB-based web UI with RBAC feature,
+# access to configuration is controlled by role permissions.
+expose_config = True
 
 # Set to true to turn on authentication:
 # https://airflow.incubator.apache.org/security.html#web-authentication
@@ -387,9 +404,7 @@ run_duration = -1
 # after how much time a new DAGs should be picked up from the filesystem
 min_file_process_interval = 0
 
-# How many seconds to wait between file-parsing loops to prevent the logs from being spammed.
-min_file_parsing_loop_time = 1
-
+# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
 dag_dir_list_interval = 300
 
 # How often should stats be printed to the logs
@@ -427,6 +442,10 @@ max_threads = 2
 
 authenticate = False
 
+# Turn off scheduler use of cron intervals by setting this to False.
+# DAGs submitted manually in the web UI or with trigger_dag will still run.
+use_job_schedule = True
+
 [ldap]
 # set this to ldaps://<your.ldap.server>:<port>
 uri =
@@ -491,7 +510,6 @@ reinit_frequency = 3600
 kinit_path = kinit
 keytab = airflow.keytab
 
-
 [github_enterprise]
 api_rev = v3
 
@@ -506,9 +524,11 @@ elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
 elasticsearch_end_of_log_mark = end_of_log
 
 [kubernetes]
-# The repository and tag of the Kubernetes Image for the Worker to Run
+# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
 worker_container_repository =
 worker_container_tag =
+worker_container_image_pull_policy = IfNotPresent
+worker_dags_folder =
 
 # If True (default), worker pods will be deleted upon termination
 delete_worker_pods = True
@@ -562,6 +582,11 @@ gcp_service_account_keys =
 # It will raise an exception if called from a process not running in a kubernetes environment.
 in_cluster = True
 
+[kubernetes_node_selectors]
+# The Key-value pairs to be given to worker pods.
+# The worker pods will be scheduled to the nodes of the specified key-value pairs.
+# Should be supplied in the format: key = value
+
 [kubernetes_secrets]
 # The scheduler mounts the following secrets into your workers as they are launched by the
 # scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the
diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml
@@ -16,7 +16,7 @@ services:
         #     - ./pgdata:/var/lib/postgresql/data/pgdata
 
     webserver:
-        image: puckel/docker-airflow:1.10.0-5
+        image: puckel/docker-airflow:1.10.1
         restart: always
         depends_on:
             - postgres
@@ -43,7 +43,7 @@ services:
             retries: 3
 
     flower:
-        image: puckel/docker-airflow:1.10.0-5
+        image: puckel/docker-airflow:1.10.1
         restart: always
         depends_on:
             - redis
@@ -55,7 +55,7 @@ services:
         command: flower
 
     scheduler:
-        image: puckel/docker-airflow:1.10.0-5
+        image: puckel/docker-airflow:1.10.1
         restart: always
         depends_on:
             - webserver
@@ -74,7 +74,7 @@ services:
         command: scheduler
 
     worker:
-        image: puckel/docker-airflow:1.10.0-5
+        image: puckel/docker-airflow:1.10.1
         restart: always
         depends_on:
             - scheduler
diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml
@@ -8,7 +8,7 @@ services:
             - POSTGRES_DB=airflow
 
     webserver:
-        image: puckel/docker-airflow:1.10.0-5
+        image: puckel/docker-airflow:1.10.1
         restart: always
         depends_on:
             - postgres
diff --git a/script/entrypoint.sh b/script/entrypoint.sh
@@ -82,6 +82,7 @@ case "$1" in
     exec airflow "$@"
     ;;
   flower)
+    sleep 10
     exec airflow "$@"
     ;;
   version)