Skip to content

[PECOBLR-727] Add kerberos support for proxy auth #675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Aug 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
415 changes: 414 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pyarrow = [
{ version = ">=18.0.0", python = ">=3.13", optional=true }
]
pyjwt = "^2.0.0"
requests-kerberos = {version = "^0.15.0", optional = true}


[tool.poetry.extras]
Expand Down
8 changes: 2 additions & 6 deletions src/databricks/sql/auth/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ def __init__(
retry_stop_after_attempts_duration: Optional[float] = None,
retry_delay_default: Optional[float] = None,
retry_dangerous_codes: Optional[List[int]] = None,
http_proxy: Optional[str] = None,
proxy_username: Optional[str] = None,
proxy_password: Optional[str] = None,
proxy_auth_method: Optional[str] = None,
pool_connections: Optional[int] = None,
pool_maxsize: Optional[int] = None,
user_agent: Optional[str] = None,
Expand Down Expand Up @@ -79,9 +77,7 @@ def __init__(
)
self.retry_delay_default = retry_delay_default or 5.0
self.retry_dangerous_codes = retry_dangerous_codes or []
self.http_proxy = http_proxy
self.proxy_username = proxy_username
self.proxy_password = proxy_password
self.proxy_auth_method = proxy_auth_method
self.pool_connections = pool_connections or 10
self.pool_maxsize = pool_maxsize or 20
self.user_agent = user_agent
Expand Down
49 changes: 25 additions & 24 deletions src/databricks/sql/auth/thrift_http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,19 @@
from urllib3.util import make_headers
from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy
from databricks.sql.types import SSLOptions
from databricks.sql.common.http_utils import (
detect_and_parse_proxy,
)

logger = logging.getLogger(__name__)


class THttpClient(thrift.transport.THttpClient.THttpClient):
realhost: Optional[str]
realport: Optional[int]
proxy_uri: Optional[str]
proxy_auth: Optional[Dict[str, str]]

def __init__(
self,
auth_provider,
Expand All @@ -29,6 +37,7 @@ def __init__(
ssl_options: Optional[SSLOptions] = None,
max_connections: int = 1,
retry_policy: Union[DatabricksRetryPolicy, int] = 0,
**kwargs,
):
self._ssl_options = ssl_options

Expand Down Expand Up @@ -58,27 +67,25 @@ def __init__(
self.path = parsed.path
if parsed.query:
self.path += "?%s" % parsed.query
try:
proxy = urllib.request.getproxies()[self.scheme]
except KeyError:
proxy = None
else:
if urllib.request.proxy_bypass(self.host):
proxy = None
if proxy:
parsed = urllib.parse.urlparse(proxy)

# Handle proxy settings using shared utility
proxy_auth_method = kwargs.get("_proxy_auth_method")
proxy_uri, proxy_auth = detect_and_parse_proxy(
self.scheme, self.host, proxy_auth_method=proxy_auth_method
)

if proxy_uri:
parsed_proxy = urllib.parse.urlparse(proxy_uri)
# realhost and realport are the host and port of the actual request
self.realhost = self.host
self.realport = self.port

# this is passed to ProxyManager
self.proxy_uri: str = proxy
self.host = parsed.hostname
self.port = parsed.port
self.proxy_auth = self.basic_proxy_auth_headers(parsed)
self.proxy_uri = proxy_uri
self.host = parsed_proxy.hostname
self.port = parsed_proxy.port
self.proxy_auth = proxy_auth
else:
self.realhost = self.realport = self.proxy_auth = None
self.realhost = self.realport = self.proxy_auth = self.proxy_uri = None

self.max_connections = max_connections

Expand Down Expand Up @@ -204,15 +211,9 @@ def flush(self):
)
)

@staticmethod
def basic_proxy_auth_headers(proxy):
if proxy is None or not proxy.username:
return None
ap = "%s:%s" % (
urllib.parse.unquote(proxy.username),
urllib.parse.unquote(proxy.password),
)
return make_headers(proxy_basic_auth=ap)
def using_proxy(self) -> bool:
"""Check if proxy is being used."""
return self.realhost is not None

def set_retry_command_type(self, value: CommandType):
"""Pass the provided CommandType to the retry policy"""
Expand Down
56 changes: 21 additions & 35 deletions src/databricks/sql/backend/sea/utils/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from databricks.sql.exc import (
RequestError,
)
from databricks.sql.common.http_utils import (
detect_and_parse_proxy,
)

logger = logging.getLogger(__name__)

Expand All @@ -30,9 +33,9 @@ class SeaHttpClient:
retry_policy: Union[DatabricksRetryPolicy, int]
_pool: Optional[Union[HTTPConnectionPool, HTTPSConnectionPool]]
proxy_uri: Optional[str]
proxy_host: Optional[str]
proxy_port: Optional[int]
proxy_auth: Optional[Dict[str, str]]
realhost: Optional[str]
realport: Optional[int]

def __init__(
self,
Expand Down Expand Up @@ -121,44 +124,27 @@ def __init__(
)
self.retry_policy = 0

# Handle proxy settings
try:
# returns a dictionary of scheme -> proxy server URL mappings.
# https://docs.python.org/3/library/urllib.request.html#urllib.request.getproxies
proxy = urllib.request.getproxies().get(self.scheme)
except (KeyError, AttributeError):
# No proxy found or getproxies() failed - disable proxy
proxy = None
else:
# Proxy found, but check if this host should bypass proxy
if self.host and urllib.request.proxy_bypass(self.host):
proxy = None # Host bypasses proxy per system rules

if proxy:
parsed_proxy = urllib.parse.urlparse(proxy)
self.proxy_host = self.host
self.proxy_port = self.port
self.proxy_uri = proxy
# Handle proxy settings using shared utility
proxy_auth_method = kwargs.get("_proxy_auth_method")
proxy_uri, proxy_auth = detect_and_parse_proxy(
self.scheme, self.host, proxy_auth_method=proxy_auth_method
)

if proxy_uri:
parsed_proxy = urllib.parse.urlparse(proxy_uri)
self.realhost = self.host
self.realport = self.port
self.proxy_uri = proxy_uri
self.host = parsed_proxy.hostname
self.port = parsed_proxy.port or (443 if self.scheme == "https" else 80)
self.proxy_auth = self._basic_proxy_auth_headers(parsed_proxy)
self.proxy_auth = proxy_auth
else:
self.proxy_host = None
self.proxy_port = None
self.proxy_auth = None
self.proxy_uri = None
self.realhost = self.realport = self.proxy_auth = self.proxy_uri = None

# Initialize connection pool
self._pool = None
self._open()

def _basic_proxy_auth_headers(self, proxy_parsed) -> Optional[Dict[str, str]]:
"""Create basic auth headers for proxy if credentials are provided."""
if proxy_parsed is None or not proxy_parsed.username:
return None
ap = f"{urllib.parse.unquote(proxy_parsed.username)}:{urllib.parse.unquote(proxy_parsed.password)}"
return make_headers(proxy_basic_auth=ap)

def _open(self):
"""Initialize the connection pool."""
pool_kwargs = {"maxsize": self.max_connections}
Expand Down Expand Up @@ -186,8 +172,8 @@ def _open(self):
proxy_headers=self.proxy_auth,
)
self._pool = proxy_manager.connection_from_host(
host=self.proxy_host,
port=self.proxy_port,
host=self.realhost,
port=self.realport,
scheme=self.scheme,
pool_kwargs=pool_kwargs,
)
Expand All @@ -201,7 +187,7 @@ def close(self):

def using_proxy(self) -> bool:
"""Check if proxy is being used."""
return self.proxy_host is not None
return self.realhost is not None

def set_retry_command_type(self, command_type: CommandType):
"""Set the command type for retry policy decision making."""
Expand Down
6 changes: 6 additions & 0 deletions src/databricks/sql/backend/thrift_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ def __init__(
self.force_dangerous_codes = kwargs.get("_retry_dangerous_codes", [])

additional_transport_args = {}

# Add proxy authentication method if specified
proxy_auth_method = kwargs.get("_proxy_auth_method")
if proxy_auth_method:
additional_transport_args["_proxy_auth_method"] = proxy_auth_method

_max_redirects: Union[None, int] = kwargs.get("_retry_max_redirects")

if _max_redirects:
Expand Down
100 changes: 100 additions & 0 deletions src/databricks/sql/common/http_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import ssl
import urllib.parse
import urllib.request
import logging
from typing import Dict, Any, Optional, Tuple, Union

from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager
from urllib3.util import make_headers

from databricks.sql.auth.retry import DatabricksRetryPolicy
from databricks.sql.types import SSLOptions

logger = logging.getLogger(__name__)


def detect_and_parse_proxy(
scheme: str,
host: Optional[str],
skip_bypass: bool = False,
proxy_auth_method: Optional[str] = None,
) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
"""
Detect system proxy and return proxy URI and headers using standardized logic.
Args:
scheme: URL scheme (http/https)
host: Target hostname (optional, only needed for bypass checking)
skip_bypass: If True, skip proxy bypass checking and return proxy config if found
proxy_auth_method: Authentication method ('basic', 'negotiate', or None)
Returns:
Tuple of (proxy_uri, proxy_headers) or (None, None) if no proxy
"""
try:
# returns a dictionary of scheme -> proxy server URL mappings.
# https://docs.python.org/3/library/urllib.request.html#urllib.request.getproxies
proxy = urllib.request.getproxies().get(scheme)
except (KeyError, AttributeError):
# No proxy found or getproxies() failed - disable proxy
proxy = None
else:
# Proxy found, but check if this host should bypass proxy (unless skipped)
if not skip_bypass and host and urllib.request.proxy_bypass(host):
proxy = None # Host bypasses proxy per system rules

if not proxy:
return None, None

parsed_proxy = urllib.parse.urlparse(proxy)

# Generate appropriate auth headers based on method
if proxy_auth_method == "negotiate":
proxy_headers = _generate_negotiate_headers(parsed_proxy.hostname)
elif proxy_auth_method == "basic" or proxy_auth_method is None:
# Default to basic if method not specified (backward compatibility)
proxy_headers = create_basic_proxy_auth_headers(parsed_proxy)
else:
raise ValueError(f"Unsupported proxy_auth_method: {proxy_auth_method}")

return proxy, proxy_headers


def _generate_negotiate_headers(
proxy_hostname: Optional[str],
) -> Optional[Dict[str, str]]:
"""Generate Kerberos/SPNEGO authentication headers"""
try:
from requests_kerberos import HTTPKerberosAuth

logger.debug(
"Attempting to generate Kerberos SPNEGO token for proxy: %s", proxy_hostname
)
auth = HTTPKerberosAuth()
negotiate_details = auth.generate_request_header(
None, proxy_hostname, is_preemptive=True
)
if negotiate_details:
return {"proxy-authorization": negotiate_details}
else:
logger.debug("Unable to generate kerberos proxy auth headers")
except Exception as e:
logger.error("Error generating Kerberos proxy auth headers: %s", e)

return None


def create_basic_proxy_auth_headers(parsed_proxy) -> Optional[Dict[str, str]]:
"""
Create basic auth headers for proxy if credentials are provided.
Args:
parsed_proxy: Parsed proxy URL from urllib.parse.urlparse()
Returns:
Dictionary of proxy auth headers or None if no credentials
"""
if parsed_proxy is None or not parsed_proxy.username:
return None
ap = f"{urllib.parse.unquote(parsed_proxy.username)}:{urllib.parse.unquote(parsed_proxy.password)}"
return make_headers(proxy_basic_auth=ap)
Loading
Loading