Skip to content

Commit

Permalink
Add Kerberos Auth for PrestoHook (apache#10488)
Browse files Browse the repository at this point in the history
  • Loading branch information
mik-laj authored Oct 20, 2020
1 parent e3a0839 commit 1543923
Show file tree
Hide file tree
Showing 27 changed files with 775 additions and 220 deletions.
44 changes: 42 additions & 2 deletions airflow/providers/presto/hooks/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@
# specific language governing permissions and limitations
# under the License.
from typing import Optional, Any, Iterable
import os

import prestodb
from prestodb.exceptions import DatabaseError
from prestodb.transaction import IsolationLevel

from airflow import AirflowException
from airflow.configuration import conf
from airflow.hooks.dbapi_hook import DbApiHook
from airflow.models import Connection

Expand All @@ -31,6 +34,17 @@ class PrestoException(Exception):
"""


def _boolify(value):
if isinstance(value, bool):
return value
if isinstance(value, str):
if value.lower() == 'false':
return False
elif value.lower() == 'true':
return True
return value


class PrestoHook(DbApiHook):
"""
Interact with Presto through prestodb.
Expand All @@ -49,9 +63,28 @@ def get_conn(self) -> Connection:
db = self.get_connection(
self.presto_conn_id # type: ignore[attr-defined] # pylint: disable=no-member
)
auth = prestodb.auth.BasicAuthentication(db.login, db.password) if db.password else None
extra = db.extra_dejson
auth = None
if db.password and extra.get('auth') == 'kerberos':
raise AirflowException("Kerberos authorization doesn't support password.")
elif db.password:
auth = prestodb.auth.BasicAuthentication(db.login, db.password)
elif extra.get('auth') == 'kerberos':
auth = prestodb.auth.KerberosAuthentication(
config=extra.get('kerberos__config', os.environ.get('KRB5_CONFIG')),
service_name=extra.get('kerberos__service_name'),
mutual_authentication=_boolify(extra.get('kerberos__mutual_authentication', False)),
force_preemptive=_boolify(extra.get('kerberos__force_preemptive', False)),
hostname_override=extra.get('kerberos__hostname_override'),
sanitize_mutual_error_response=_boolify(
extra.get('kerberos__sanitize_mutual_error_response', True)
),
principal=extra.get('kerberos__principal', conf.get('kerberos', 'principal')),
delegate=_boolify(extra.get('kerberos__delegate', False)),
ca_bundle=extra.get('kerberos__ca_bundle'),
)

return prestodb.dbapi.connect(
presto_conn = prestodb.dbapi.connect(
host=db.host,
port=db.port,
user=db.login,
Expand All @@ -62,6 +95,13 @@ def get_conn(self) -> Connection:
auth=auth,
isolation_level=self.get_isolation_level(), # type: ignore[func-returns-value]
)
if extra.get('verify') is not None:
# Unfortunately verify parameter is available via public API.
# The PR is merged in the presto library, but has not been released.
# See: https://github.com/prestosql/presto-python-client/pull/31
presto_conn._http_session.verify = _boolify(extra['verify']) # pylint: disable=protected-access

return presto_conn

def get_isolation_level(self) -> Any:
"""Returns an isolation level"""
Expand Down
14 changes: 14 additions & 0 deletions breeze
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,9 @@ function breeze::parse_arguments() {
else
INTEGRATIONS+=("${INTEGRATION}")
fi
if [[ " ${INTEGRATIONS[*]} " =~ " presto " ]]; then
INTEGRATIONS+=("kerberos");
fi
echo
shift 2
;;
Expand Down Expand Up @@ -3115,6 +3118,10 @@ function breeze::run_breeze_command() {
fi
case "${command_to_run}" in
enter_breeze)
if [[ " ${INTEGRATIONS[*]} " =~ " kerberos " ]]; then
kerberos::create_kerberos_network
fi

if [[ ${PRODUCTION_IMAGE} == "true" ]]; then
"${dc_run_file}" run --service-ports --rm airflow "${@}"
"${SCRIPTS_CI_DIR}/tools/ci_fix_ownership.sh"
Expand All @@ -3134,6 +3141,10 @@ function breeze::run_breeze_command() {
"/opt/airflow/scripts/in_container/entrypoint_exec.sh" "${@}"
;;
run_tests)
if [[ " ${INTEGRATIONS[*]} " =~ " kerberos " ]]; then
kerberos::create_kerberos_network
fi

export RUN_TESTS="true"
readonly RUN_TESTS
"${BUILD_CACHE_DIR}/${DOCKER_COMPOSE_RUN_SCRIPT_FOR_CI}" run --service-ports --rm airflow "$@"
Expand All @@ -3147,6 +3158,9 @@ function breeze::run_breeze_command() {
fi
"${dc_run_file}" "${docker_compose_command}" "${EXTRA_DC_OPTIONS[@]}" "$@"
set -u
if [[ "${docker_compose_command}" = "down" ]]; then
kerberos::delete_kerberos_network
fi
;;
perform_static_checks)
breeze::make_sure_precommit_is_installed
Expand Down
46 changes: 41 additions & 5 deletions scripts/ci/docker-compose/integration-kerberos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,52 @@
---
version: "2.2"
services:
kerberos:
image: apache/airflow:krb5-kdc-server-2020.08.28
hostname: kerberos
kdc-server-example-com:
image: apache/airflow:krb5-kdc-server-2020.10.08
hostname: krb5-kdc-server-example-com
domainname: example.com
networks:
example.com:
ipv4_address: 10.5.0.2

volumes:
- kerberos-keytabs:/root/kerberos-keytabs
- /dev/urandom:/dev/random # Required to get non-blocking entropy source

environment:
- KRB5_TRACE=/dev/stderr
- POST_BOOTSTRAP_COMMAND=
/opt/kerberos-utils/create_admin.sh alice alice;
/opt/kerberos-utils/create_client.sh bob bob /root/kerberos-keytabs/airflow.keytab;
/opt/kerberos-utils/create_service.sh krb5-machine-example-com airflow
/root/kerberos-keytabs/airflow.keytab;
/opt/kerberos-utils/create_service.sh presto HTTP /root/kerberos-keytabs/presto.keytab;


airflow:
hostname: krb5-machine-example-com
domainname: example.com
networks:
example.com:
ipv4_address: 10.5.0.1
default: {}

depends_on:
- kerberos
- kdc-server-example-com
volumes:
- kerberos-keytabs:/root/kerberos-keytabs
- ../dockerfiles/krb5-kdc-server/krb5.conf:/etc/krb5.conf:ro
environment:
- INTEGRATION_KERBEROS=true
- KRB5_CONFIG=/etc/krb5.conf
- KRB5_KTNAME=/etc/airflow.keytab
- KRB5_KTNAME=/root/kerberos-keytabs/airflow.keytab
- KRB5_TRACE=/dev/stderr
- AIRFLOW__KERBEROS__KEYTAB=/root/kerberos-keytabs/airflow.keytab
- [email protected]

volumes:
kerberos-keytabs:

networks:
example.com:
external: true
22 changes: 21 additions & 1 deletion scripts/ci/docker-compose/integration-presto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,34 @@
version: "2.2"
services:
presto:
image: prestosql/presto:330
image: apache/airflow:presto-2020.10.08
container_name: presto
hostname: presto
domainname: example.com

networks:
example.com:
ipv4_address: 10.5.0.3

ports:
- "38080:8080"
- "37778:7778"

volumes:
- /dev/urandom:/dev/random # Required to get non-blocking entropy source
- ../dockerfiles/krb5-kdc-server/krb5.conf:/etc/krb5.conf:ro
- presto-db-volume:/data/presto
- kerberos-keytabs:/home/presto/kerberos-keytabs

environment:
- KRB5_CONFIG=/etc/krb5.conf
- KRB5_TRACE=/dev/stderr
- KRB5_KTNAME=/home/presto/kerberos-keytabs/presto.keytab
airflow:
environment:
- INTEGRATION_PRESTO=true
depends_on:
- presto

volumes:
presto-db-volume:
93 changes: 42 additions & 51 deletions scripts/ci/dockerfiles/krb5-kdc-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,64 +1,55 @@
# Dockerfile - kdc-server
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

FROM debian:buster-slim
# see docker-compose.yml

ARG AIRFLOW_KRB5KDCSERVER_VERSION
ARG COMMIT_SHA
FROM centos:7

# environment variables
ENV DEBIAN_FRONTEND noninteractive
# build environment
WORKDIR /root/

# Kerberos server
RUN apt-get update && apt-get install --no-install-recommends -y \
dumb-init \
ntp \
python-dev \
python-pip \
python-wheel \
python-setuptools \
python-pkg-resources \
krb5-admin-server \
krb5-kdc \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /var/log/supervisord/
# Dev stuff
RUN yum -y install curl wget

RUN mkdir /app/
# python
RUN curl "https://bootstrap.pypa.io/get-pip.py" -o /tmp/get-pip.py && \
python /tmp/get-pip.py && \
rm /tmp/get-pip.py

# Supervisord
RUN pip install supervisor==3.3.4

COPY ./krb-conf/server/kdc.conf /etc/krb5kdc/kdc.conf
COPY ./krb-conf/server/kadm5.acl /etc/krb5kdc/kadm5.acl
COPY ./krb-conf/client/krb5.conf /etc/krb5.conf
COPY ./start_kdc.sh /entrypoint
# supervisord
COPY ./supervisord.conf /etc/supervisord.conf

WORKDIR /app
RUN pip install supervisor==3.3.3 && \
mkdir -p /var/log/supervisord/

# kerberos server
RUN yum -y install ntp krb5-server krb5-libs

# kerberos server configuration
ENV KRB5_CONFIG=/etc/krb5.conf
ENV KRB5_KDC_PROFILE=/var/kerberos/krb5kdc/kdc.conf
RUN mkdir -pv /var/kerberos/krb5kdc/
COPY kdc.conf /var/kerberos/krb5kdc/kdc.conf
COPY kadm5.acl /var/kerberos/krb5kdc/kadm5.acl
COPY krb5.conf /etc/krb5.conf
RUN mkdir -pv /var/log/kerberos/ && \
touch /var/log/kerberos/kadmin.log && \
touch /var/log/kerberos/krb5lib.log && \
touch /var/log/kerberos/krb5.log && \
kdb5_util -r EXAMPLE.COM -P krb5 create -s

# kerberos utils
COPY utils /opt/kerberos-utils/

# supervisord configuration
COPY supervisord.conf /etc/supervisord.conf

# entrypoint
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

LABEL org.apache.airflow.component="krb5-kdc-server"
LABEL org.apache.airflow.airflow_krb5_kdc_server.version="${AIRFLOW_KRB5KDCSERVER_VERSION}"
LABEL org.apache.airflow.krb5-kdc-server.core.version="krb5"
LABEL org.apache.airflow.airflow_bats.version="${AIRFLOW_KRB5KDCSERVER_VERSION}"
LABEL org.apache.airflow.commit_sha="${COMMIT_SHA}"
LABEL maintainer="Apache Airflow Community <[email protected]>"

RUN chmod a+x /entrypoint

EXPOSE 88
# when container is starting
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD ["/usr/bin/supervisord", "-n", "-c", "/etc/supervisord.conf"]
2 changes: 1 addition & 1 deletion scripts/ci/dockerfiles/krb5-kdc-server/build_and_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ DOCKERHUB_REPO=${DOCKERHUB_REPO:="airflow"}
readonly DOCKERHUB_USER
readonly DOCKERHUB_REPO

AIRFLOW_KRB5KDCSERVER_VERSION="2020.08.28"
AIRFLOW_KRB5KDCSERVER_VERSION="2020.10.08"
readonly AIRFLOW_KRB5KDCSERVER_VERSION

COMMIT_SHA=$(git rev-parse HEAD)
Expand Down
15 changes: 13 additions & 2 deletions ...krb5-kdc-server/krb-conf/server/kadm5.acl → ...dockerfiles/krb5-kdc-server/entrypoint.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
Expand All @@ -15,4 +16,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
*/[email protected] *

set -xeuo pipefail

(
export
sleep 2;
if [[ -v POST_BOOTSTRAP_COMMAND ]]; then
bash -c "$POST_BOOTSTRAP_COMMAND"
fi
) &
exec "$@"
8 changes: 8 additions & 0 deletions scripts/ci/dockerfiles/krb5-kdc-server/kadm5.acl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# /var/kerberos/krb5kdc/kadm5.acl -- Kerberos V5 general configuration.
#
# This file is the access control list for krb5 administration.
# When this file is edited run /etc/init.d/krb5-admin-server restart to activate
# One common way to set up Kerberos administration is to allow any principal
# ending in /admin is given full administrative rights.
# To enable this, uncomment the following line:
*/[email protected] *
Loading

0 comments on commit 1543923

Please sign in to comment.