Skip to content

Commit 8e97878

Browse files
[PECOBLR-727] Add kerberos support for proxy auth (#675)
* unify ssl proxy Signed-off-by: Vikrant Puppala <[email protected]> * unify ssl proxy Signed-off-by: Vikrant Puppala <[email protected]> * simplify change Signed-off-by: Vikrant Puppala <[email protected]> * add utils class Signed-off-by: Vikrant Puppala <[email protected]> * Allow per request proxy decision Signed-off-by: Vikrant Puppala <[email protected]> * Add kerberos auth support Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * update dependencies Signed-off-by: Vikrant Puppala <[email protected]> * fix mypy Signed-off-by: Vikrant Puppala <[email protected]> * fix lint Signed-off-by: Vikrant Puppala <[email protected]> * fix lint Signed-off-by: Vikrant Puppala <[email protected]> * lazy logging Signed-off-by: Vikrant Puppala <[email protected]> --------- Signed-off-by: Vikrant Puppala <[email protected]>
1 parent d3df719 commit 8e97878

File tree

11 files changed

+690
-98
lines changed

11 files changed

+690
-98
lines changed

poetry.lock

Lines changed: 414 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pyarrow = [
2626
{ version = ">=18.0.0", python = ">=3.13", optional=true }
2727
]
2828
pyjwt = "^2.0.0"
29+
requests-kerberos = {version = "^0.15.0", optional = true}
2930

3031

3132
[tool.poetry.extras]

src/databricks/sql/auth/common.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ def __init__(
4646
retry_stop_after_attempts_duration: Optional[float] = None,
4747
retry_delay_default: Optional[float] = None,
4848
retry_dangerous_codes: Optional[List[int]] = None,
49-
http_proxy: Optional[str] = None,
50-
proxy_username: Optional[str] = None,
51-
proxy_password: Optional[str] = None,
49+
proxy_auth_method: Optional[str] = None,
5250
pool_connections: Optional[int] = None,
5351
pool_maxsize: Optional[int] = None,
5452
user_agent: Optional[str] = None,
@@ -79,9 +77,7 @@ def __init__(
7977
)
8078
self.retry_delay_default = retry_delay_default or 5.0
8179
self.retry_dangerous_codes = retry_dangerous_codes or []
82-
self.http_proxy = http_proxy
83-
self.proxy_username = proxy_username
84-
self.proxy_password = proxy_password
80+
self.proxy_auth_method = proxy_auth_method
8581
self.pool_connections = pool_connections or 10
8682
self.pool_maxsize = pool_maxsize or 20
8783
self.user_agent = user_agent

src/databricks/sql/auth/thrift_http_client.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,19 @@
1515
from urllib3.util import make_headers
1616
from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy
1717
from databricks.sql.types import SSLOptions
18+
from databricks.sql.common.http_utils import (
19+
detect_and_parse_proxy,
20+
)
1821

1922
logger = logging.getLogger(__name__)
2023

2124

2225
class THttpClient(thrift.transport.THttpClient.THttpClient):
26+
realhost: Optional[str]
27+
realport: Optional[int]
28+
proxy_uri: Optional[str]
29+
proxy_auth: Optional[Dict[str, str]]
30+
2331
def __init__(
2432
self,
2533
auth_provider,
@@ -29,6 +37,7 @@ def __init__(
2937
ssl_options: Optional[SSLOptions] = None,
3038
max_connections: int = 1,
3139
retry_policy: Union[DatabricksRetryPolicy, int] = 0,
40+
**kwargs,
3241
):
3342
self._ssl_options = ssl_options
3443

@@ -58,27 +67,25 @@ def __init__(
5867
self.path = parsed.path
5968
if parsed.query:
6069
self.path += "?%s" % parsed.query
61-
try:
62-
proxy = urllib.request.getproxies()[self.scheme]
63-
except KeyError:
64-
proxy = None
65-
else:
66-
if urllib.request.proxy_bypass(self.host):
67-
proxy = None
68-
if proxy:
69-
parsed = urllib.parse.urlparse(proxy)
7070

71+
# Handle proxy settings using shared utility
72+
proxy_auth_method = kwargs.get("_proxy_auth_method")
73+
proxy_uri, proxy_auth = detect_and_parse_proxy(
74+
self.scheme, self.host, proxy_auth_method=proxy_auth_method
75+
)
76+
77+
if proxy_uri:
78+
parsed_proxy = urllib.parse.urlparse(proxy_uri)
7179
# realhost and realport are the host and port of the actual request
7280
self.realhost = self.host
7381
self.realport = self.port
74-
7582
# this is passed to ProxyManager
76-
self.proxy_uri: str = proxy
77-
self.host = parsed.hostname
78-
self.port = parsed.port
79-
self.proxy_auth = self.basic_proxy_auth_headers(parsed)
83+
self.proxy_uri = proxy_uri
84+
self.host = parsed_proxy.hostname
85+
self.port = parsed_proxy.port
86+
self.proxy_auth = proxy_auth
8087
else:
81-
self.realhost = self.realport = self.proxy_auth = None
88+
self.realhost = self.realport = self.proxy_auth = self.proxy_uri = None
8289

8390
self.max_connections = max_connections
8491

@@ -204,15 +211,9 @@ def flush(self):
204211
)
205212
)
206213

207-
@staticmethod
208-
def basic_proxy_auth_headers(proxy):
209-
if proxy is None or not proxy.username:
210-
return None
211-
ap = "%s:%s" % (
212-
urllib.parse.unquote(proxy.username),
213-
urllib.parse.unquote(proxy.password),
214-
)
215-
return make_headers(proxy_basic_auth=ap)
214+
def using_proxy(self) -> bool:
215+
"""Check if proxy is being used."""
216+
return self.realhost is not None
216217

217218
def set_retry_command_type(self, value: CommandType):
218219
"""Pass the provided CommandType to the retry policy"""

src/databricks/sql/backend/sea/utils/http_client.py

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from databricks.sql.exc import (
1616
RequestError,
1717
)
18+
from databricks.sql.common.http_utils import (
19+
detect_and_parse_proxy,
20+
)
1821

1922
logger = logging.getLogger(__name__)
2023

@@ -30,9 +33,9 @@ class SeaHttpClient:
3033
retry_policy: Union[DatabricksRetryPolicy, int]
3134
_pool: Optional[Union[HTTPConnectionPool, HTTPSConnectionPool]]
3235
proxy_uri: Optional[str]
33-
proxy_host: Optional[str]
34-
proxy_port: Optional[int]
3536
proxy_auth: Optional[Dict[str, str]]
37+
realhost: Optional[str]
38+
realport: Optional[int]
3639

3740
def __init__(
3841
self,
@@ -121,44 +124,27 @@ def __init__(
121124
)
122125
self.retry_policy = 0
123126

124-
# Handle proxy settings
125-
try:
126-
# returns a dictionary of scheme -> proxy server URL mappings.
127-
# https://docs.python.org/3/library/urllib.request.html#urllib.request.getproxies
128-
proxy = urllib.request.getproxies().get(self.scheme)
129-
except (KeyError, AttributeError):
130-
# No proxy found or getproxies() failed - disable proxy
131-
proxy = None
132-
else:
133-
# Proxy found, but check if this host should bypass proxy
134-
if self.host and urllib.request.proxy_bypass(self.host):
135-
proxy = None # Host bypasses proxy per system rules
136-
137-
if proxy:
138-
parsed_proxy = urllib.parse.urlparse(proxy)
139-
self.proxy_host = self.host
140-
self.proxy_port = self.port
141-
self.proxy_uri = proxy
127+
# Handle proxy settings using shared utility
128+
proxy_auth_method = kwargs.get("_proxy_auth_method")
129+
proxy_uri, proxy_auth = detect_and_parse_proxy(
130+
self.scheme, self.host, proxy_auth_method=proxy_auth_method
131+
)
132+
133+
if proxy_uri:
134+
parsed_proxy = urllib.parse.urlparse(proxy_uri)
135+
self.realhost = self.host
136+
self.realport = self.port
137+
self.proxy_uri = proxy_uri
142138
self.host = parsed_proxy.hostname
143139
self.port = parsed_proxy.port or (443 if self.scheme == "https" else 80)
144-
self.proxy_auth = self._basic_proxy_auth_headers(parsed_proxy)
140+
self.proxy_auth = proxy_auth
145141
else:
146-
self.proxy_host = None
147-
self.proxy_port = None
148-
self.proxy_auth = None
149-
self.proxy_uri = None
142+
self.realhost = self.realport = self.proxy_auth = self.proxy_uri = None
150143

151144
# Initialize connection pool
152145
self._pool = None
153146
self._open()
154147

155-
def _basic_proxy_auth_headers(self, proxy_parsed) -> Optional[Dict[str, str]]:
156-
"""Create basic auth headers for proxy if credentials are provided."""
157-
if proxy_parsed is None or not proxy_parsed.username:
158-
return None
159-
ap = f"{urllib.parse.unquote(proxy_parsed.username)}:{urllib.parse.unquote(proxy_parsed.password)}"
160-
return make_headers(proxy_basic_auth=ap)
161-
162148
def _open(self):
163149
"""Initialize the connection pool."""
164150
pool_kwargs = {"maxsize": self.max_connections}
@@ -186,8 +172,8 @@ def _open(self):
186172
proxy_headers=self.proxy_auth,
187173
)
188174
self._pool = proxy_manager.connection_from_host(
189-
host=self.proxy_host,
190-
port=self.proxy_port,
175+
host=self.realhost,
176+
port=self.realport,
191177
scheme=self.scheme,
192178
pool_kwargs=pool_kwargs,
193179
)
@@ -201,7 +187,7 @@ def close(self):
201187

202188
def using_proxy(self) -> bool:
203189
"""Check if proxy is being used."""
204-
return self.proxy_host is not None
190+
return self.realhost is not None
205191

206192
def set_retry_command_type(self, command_type: CommandType):
207193
"""Set the command type for retry policy decision making."""

src/databricks/sql/backend/thrift_backend.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ def __init__(
191191
self.force_dangerous_codes = kwargs.get("_retry_dangerous_codes", [])
192192

193193
additional_transport_args = {}
194+
195+
# Add proxy authentication method if specified
196+
proxy_auth_method = kwargs.get("_proxy_auth_method")
197+
if proxy_auth_method:
198+
additional_transport_args["_proxy_auth_method"] = proxy_auth_method
199+
194200
_max_redirects: Union[None, int] = kwargs.get("_retry_max_redirects")
195201

196202
if _max_redirects:
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import ssl
2+
import urllib.parse
3+
import urllib.request
4+
import logging
5+
from typing import Dict, Any, Optional, Tuple, Union
6+
7+
from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager
8+
from urllib3.util import make_headers
9+
10+
from databricks.sql.auth.retry import DatabricksRetryPolicy
11+
from databricks.sql.types import SSLOptions
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def detect_and_parse_proxy(
17+
scheme: str,
18+
host: Optional[str],
19+
skip_bypass: bool = False,
20+
proxy_auth_method: Optional[str] = None,
21+
) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
22+
"""
23+
Detect system proxy and return proxy URI and headers using standardized logic.
24+
25+
Args:
26+
scheme: URL scheme (http/https)
27+
host: Target hostname (optional, only needed for bypass checking)
28+
skip_bypass: If True, skip proxy bypass checking and return proxy config if found
29+
proxy_auth_method: Authentication method ('basic', 'negotiate', or None)
30+
31+
Returns:
32+
Tuple of (proxy_uri, proxy_headers) or (None, None) if no proxy
33+
"""
34+
try:
35+
# returns a dictionary of scheme -> proxy server URL mappings.
36+
# https://docs.python.org/3/library/urllib.request.html#urllib.request.getproxies
37+
proxy = urllib.request.getproxies().get(scheme)
38+
except (KeyError, AttributeError):
39+
# No proxy found or getproxies() failed - disable proxy
40+
proxy = None
41+
else:
42+
# Proxy found, but check if this host should bypass proxy (unless skipped)
43+
if not skip_bypass and host and urllib.request.proxy_bypass(host):
44+
proxy = None # Host bypasses proxy per system rules
45+
46+
if not proxy:
47+
return None, None
48+
49+
parsed_proxy = urllib.parse.urlparse(proxy)
50+
51+
# Generate appropriate auth headers based on method
52+
if proxy_auth_method == "negotiate":
53+
proxy_headers = _generate_negotiate_headers(parsed_proxy.hostname)
54+
elif proxy_auth_method == "basic" or proxy_auth_method is None:
55+
# Default to basic if method not specified (backward compatibility)
56+
proxy_headers = create_basic_proxy_auth_headers(parsed_proxy)
57+
else:
58+
raise ValueError(f"Unsupported proxy_auth_method: {proxy_auth_method}")
59+
60+
return proxy, proxy_headers
61+
62+
63+
def _generate_negotiate_headers(
64+
proxy_hostname: Optional[str],
65+
) -> Optional[Dict[str, str]]:
66+
"""Generate Kerberos/SPNEGO authentication headers"""
67+
try:
68+
from requests_kerberos import HTTPKerberosAuth
69+
70+
logger.debug(
71+
"Attempting to generate Kerberos SPNEGO token for proxy: %s", proxy_hostname
72+
)
73+
auth = HTTPKerberosAuth()
74+
negotiate_details = auth.generate_request_header(
75+
None, proxy_hostname, is_preemptive=True
76+
)
77+
if negotiate_details:
78+
return {"proxy-authorization": negotiate_details}
79+
else:
80+
logger.debug("Unable to generate kerberos proxy auth headers")
81+
except Exception as e:
82+
logger.error("Error generating Kerberos proxy auth headers: %s", e)
83+
84+
return None
85+
86+
87+
def create_basic_proxy_auth_headers(parsed_proxy) -> Optional[Dict[str, str]]:
88+
"""
89+
Create basic auth headers for proxy if credentials are provided.
90+
91+
Args:
92+
parsed_proxy: Parsed proxy URL from urllib.parse.urlparse()
93+
94+
Returns:
95+
Dictionary of proxy auth headers or None if no credentials
96+
"""
97+
if parsed_proxy is None or not parsed_proxy.username:
98+
return None
99+
ap = f"{urllib.parse.unquote(parsed_proxy.username)}:{urllib.parse.unquote(parsed_proxy.password)}"
100+
return make_headers(proxy_basic_auth=ap)

0 commit comments

Comments
 (0)