@@ -356,7 +356,7 @@ impl BalancerService {
356356 let port: u16 = port. parse ( ) . expect ( "unexpected port" ) ;
357357
358358 let https = HttpsBalancer {
359- resolver : Arc :: from ( create_default_resolver ( ) ) ,
359+ resolver : Arc :: from ( TenantDnsResolver :: new ( ) ) ,
360360 tls : https_tls,
361361 resolve_template : Arc :: from ( addr) ,
362362 port,
@@ -1049,7 +1049,7 @@ async fn cancel_request(
10491049}
10501050
10511051struct HttpsBalancer {
1052- resolver : Arc < TokioResolver > ,
1052+ resolver : Arc < TenantDnsResolver > ,
10531053 tls : Option < ReloadingSslContext > ,
10541054 resolve_template : Arc < str > ,
10551055 port : u16 ,
@@ -1060,7 +1060,7 @@ struct HttpsBalancer {
10601060
10611061impl HttpsBalancer {
10621062 async fn resolve (
1063- resolver : & TokioResolver ,
1063+ resolver : & TenantDnsResolver ,
10641064 resolve_template : & str ,
10651065 port : u16 ,
10661066 servername : Option < & str > ,
@@ -1086,7 +1086,9 @@ impl HttpsBalancer {
10861086 let tenant = resolver. tenant ( & addr) . await ;
10871087
10881088 // Now do the regular ip lookup, regardless of if there was a CNAME.
1089- let envd_addr = lookup_with_resolver ( resolver, & format ! ( "{addr}:{port}" ) ) . await ?;
1089+ let envd_addr = resolver
1090+ . resolve_tenant_from_sni_host ( & format ! ( "{addr}:{port}" ) )
1091+ . await ?;
10901092
10911093 Ok ( ResolvedAddr {
10921094 addr : envd_addr,
@@ -1100,21 +1102,15 @@ trait ResolverExt {
11001102 async fn tenant ( & self , addr : & str ) -> Option < String > ;
11011103}
11021104
1103- impl ResolverExt for TokioResolver {
1105+ impl ResolverExt for TenantDnsResolver {
11041106 /// Finds the tenant of a DNS address. Errors or lack of cname resolution here are ok, because
11051107 /// this is only used for metrics.
11061108 async fn tenant ( & self , addr : & str ) -> Option < String > {
11071109 debug ! ( "resolving tenant for {:?}" , addr) ;
1108- // Lookup the CNAME. If there's a CNAME, find the tenant.
1109- let lookup = self . lookup ( addr, RecordType :: CNAME ) . await ;
1110- if let Ok ( lookup) = lookup {
1111- for record in lookup. iter ( ) {
1112- if let Some ( cname) = record. as_cname ( ) {
1113- let cname = cname. to_string ( ) ;
1114- debug ! ( "cname: {cname}" ) ;
1115- return extract_tenant_from_cname ( & cname) ;
1116- }
1117- }
1110+ // Lookup the CNAME using the caching resolver
1111+ if let Ok ( Some ( cname) ) = self . resolve_cname ( addr) . await {
1112+ debug ! ( "cname: {cname}" ) ;
1113+ return extract_tenant_from_cname ( & cname) ;
11181114 }
11191115 None
11201116 }
@@ -1262,8 +1258,8 @@ impl<T: AsyncRead + AsyncWrite + Unpin + Send> ClientStream for T {}
12621258
12631259#[ derive( Debug ) ]
12641260pub enum BalancerResolver {
1265- Static ( TokioResolver , String ) ,
1266- MultiTenant ( TokioResolver , FronteggResolver , Option < SniResolver > ) ,
1261+ Static ( TenantDnsResolver , String ) ,
1262+ MultiTenant ( TenantDnsResolver , FronteggResolver , Option < SniResolver > ) ,
12671263}
12681264
12691265impl BalancerResolver {
@@ -1309,7 +1305,7 @@ impl BalancerResolver {
13091305 let sni_addr = sni_addr_template. replace ( "{}" , servername) ;
13101306 let tenant = dns_resolver. tenant ( & sni_addr) . await ;
13111307 let sni_addr = format ! ( "{sni_addr}:{port}" ) ;
1312- let addr = lookup_with_resolver ( dns_resolver, & sni_addr) . await ?;
1308+ let addr = dns_resolver. resolve_tenant_from_sni_host ( & sni_addr) . await ?;
13131309 if tenant. is_some ( ) {
13141310 debug ! ( "SNI header found for tenant {:?}" , tenant) ;
13151311 }
@@ -1340,7 +1336,7 @@ impl BalancerResolver {
13401336
13411337 let addr =
13421338 addr_template. replace ( "{}" , & auth_session. tenant_id ( ) . to_string ( ) ) ;
1343- let addr = lookup_with_resolver ( dns_resolver, & addr) . await ?;
1339+ let addr = dns_resolver. resolve_tenant_from_sni_host ( & addr) . await ?;
13441340 let tenant = Some ( auth_session. tenant_id ( ) . to_string ( ) ) ;
13451341 if tenant. is_some ( ) {
13461342 debug ! ( "SNI header NOT found for tenant {:?}" , tenant) ;
@@ -1363,7 +1359,7 @@ impl BalancerResolver {
13631359 }
13641360 BalancerResolver :: Static ( resolver, addr) => {
13651361 // Use the shared resolver instance for caching benefits
1366- let addr = lookup_with_resolver ( resolver, addr) . await ?;
1362+ let addr = resolver. resolve_tenant_from_sni_host ( addr) . await ?;
13671363 Ok ( ResolvedAddr {
13681364 addr,
13691365 password : None ,
@@ -1408,85 +1404,119 @@ pub fn create_default_resolver() -> TokioResolver {
14081404 . build ( )
14091405}
14101406
1411- /// Follows CNAME chain to resolve a hostname to IP addresses, ensuring each step is cached.
1412- ///
1413- /// hickory's `lookup_ip` only caches the final step when CNAMEs are involved. This function
1414- /// explicitly queries for CNAME records at each step, following the chain until we reach a
1415- /// hostname that resolves to A records. Each query is cached individually by hickory.
1407+ /// Creates a resolver with caching disabled for DNS lookups.
1408+ pub fn create_non_caching_resolver ( ) -> TokioResolver {
1409+ let mut resolver_opts = ResolverOpts :: default ( ) ;
1410+ resolver_opts. cache_size = 0 ; // Disable caching
1411+ resolver_opts. ip_strategy = LookupIpStrategy :: Ipv4thenIpv6 ;
1412+
1413+ // Read system DNS configuration or fall back to defaults
1414+ let ( config, opts) = read_system_conf ( )
1415+ . map ( |( config, mut opts) | {
1416+ // Override specific options while keeping system DNS servers
1417+ opts. cache_size = resolver_opts. cache_size ;
1418+ ( config, opts)
1419+ } )
1420+ . unwrap_or_else ( |err| {
1421+ warn ! (
1422+ "Failed to read system DNS configuration, using defaults: {}" ,
1423+ err
1424+ ) ;
1425+ ( ResolverConfig :: default ( ) , resolver_opts)
1426+ } ) ;
1427+
1428+ Resolver :: builder_with_config ( config, TokioConnectionProvider :: default ( ) )
1429+ . with_options ( opts)
1430+ . build ( )
1431+ }
1432+
1433+ /// A resolver that uses separate caching and non-caching resolvers for different record types.
14161434///
1417- /// Returns the final hostname and its A record lookup response.
1418- async fn follow_cname_chain (
1419- resolver : & TokioResolver ,
1420- initial_host : & str ,
1421- ) -> Result < hickory_resolver:: lookup_ip:: LookupIp , anyhow:: Error > {
1422- let mut current_host = initial_host. to_string ( ) ;
1423- let max_hops = 10 ; // Prevent infinite loops in case of misconfigured DNS
1424-
1425- for hop in 0 ..max_hops {
1426- // Explicitly query for CNAME records to ensure this step is cached
1427- match resolver. lookup ( & current_host, RecordType :: CNAME ) . await {
1435+ /// This struct contains:
1436+ /// - A caching resolver for CNAME lookups
1437+ /// - A non-caching resolver for A record lookups
1438+ #[ derive( Debug ) ]
1439+ pub struct TenantDnsResolver {
1440+ caching_resolver : TokioResolver ,
1441+ non_caching_resolver : TokioResolver ,
1442+ }
1443+
1444+ impl TenantDnsResolver {
1445+ /// Creates a new TenantDnsResolver with default caching and non-caching resolvers.
1446+ pub fn new ( ) -> Self {
1447+ Self {
1448+ caching_resolver : create_default_resolver ( ) ,
1449+ non_caching_resolver : create_non_caching_resolver ( ) ,
1450+ }
1451+ }
1452+
1453+ /// Resolves a CNAME record using the caching resolver.
1454+ /// Returns the CNAME target hostname, or None if no CNAME exists.
1455+ pub async fn resolve_cname ( & self , hostname : & str ) -> Result < Option < String > , anyhow:: Error > {
1456+ match self
1457+ . caching_resolver
1458+ . lookup ( hostname, RecordType :: CNAME )
1459+ . await
1460+ {
14281461 Ok ( cname_response) => {
14291462 // Check if we got a CNAME record back
14301463 if let Some ( cname_record) = cname_response. iter ( ) . next ( ) {
14311464 if let Some ( cname_data) = cname_record. as_cname ( ) {
1432- // Follow the CNAME to the next hostname
1433- let next_host = cname_data. to_string ( ) ;
1434- tracing:: debug!(
1435- hop = hop,
1436- from = %current_host,
1437- to = %next_host,
1438- "Following CNAME"
1439- ) ;
1440- current_host = next_host;
1441- continue ;
1465+ return Ok ( Some ( cname_data. to_string ( ) ) ) ;
14421466 }
14431467 }
1444- // No CNAME found, fall through to A record lookup
1445- break ;
1446- }
1447- Err ( _) => {
1448- // No CNAME record exists, this is likely the final hostname
1449- break ;
1468+ Ok ( None )
14501469 }
1470+ Err ( _) => Ok ( None ) ,
14511471 }
14521472 }
14531473
1454- // Now lookup A records for the final hostname in the chain
1455- // We don't want to cache this
1456- let response = resolver
1457- . lookup_ip ( & current_host)
1458- . await
1459- . with_context ( || format ! ( "Failed to resolve A record for hostname: {}" , current_host) ) ?;
1474+ /// Resolves A records without caching.
1475+ /// Returns a LookupIp containing all A records for the hostname.
1476+ pub async fn resolve_arec_no_cache (
1477+ & self ,
1478+ hostname : & str ,
1479+ ) -> Result < hickory_resolver:: lookup_ip:: LookupIp , anyhow:: Error > {
1480+ self . non_caching_resolver
1481+ . lookup_ip ( hostname)
1482+ . await
1483+ . with_context ( || format ! ( "Failed to resolve A record for hostname: {}" , hostname) )
1484+ }
14601485
1461- Ok ( response)
1462- // Ok((current_host, a_response))
1463- }
1486+ /// Returns the first IP address resolved from the provided hostname using the DualResolver.
1487+ ///
1488+ /// CNAMEs are resolved with caching, while A records are resolved without caching.
1489+ async fn resolve_tenant_from_sni_host (
1490+ self : & TenantDnsResolver ,
1491+ name : & str ,
1492+ ) -> Result < SocketAddr , anyhow:: Error > {
1493+ // Parse out the hostname and port from name (format: "hostname:port")
1494+ let ( host, port_str) = name
1495+ . rsplit_once ( ':' )
1496+ . ok_or_else ( || anyhow:: anyhow!( "Invalid address format, expected 'hostname:port'" ) ) ?;
1497+ let port: u16 = port_str
1498+ . parse ( )
1499+ . with_context ( || format ! ( "Invalid port in address: {}" , name) ) ?;
1500+
1501+ // Resolve initial CNAME with caching if it exists...
1502+ // these are generally static and we can generally ignore TTLS
1503+ //
1504+ // Resolve arec from host without caching... these can change at anytime
1505+ // in generally we should ignore TTLs.
1506+ let ip = if let Some ( resolved_host) = self . resolve_cname ( host) . await ? {
1507+ self . resolve_arec_no_cache ( & resolved_host) . await ?
1508+ } else {
1509+ self . resolve_arec_no_cache ( host) . await ?
1510+ } ;
1511+
1512+ // Extract IP address from A records
1513+ let maybe_socket_addr = ip. iter ( ) . find_map ( |r| Some ( SocketAddr :: new ( r, port) ) ) ;
14641514
1465- /// Returns the first IP address resolved from the provided hostname using the hickory resolver for caching.
1466- async fn lookup_with_resolver (
1467- resolver : & TokioResolver ,
1468- name : & str ,
1469- ) -> Result < SocketAddr , anyhow:: Error > {
1470- // Parse out the hostname and port from name (format: "hostname:port")
1471- let ( host, port_str) = name
1472- . rsplit_once ( ':' )
1473- . ok_or_else ( || anyhow:: anyhow!( "Invalid address format, expected 'hostname:port'" ) ) ?;
1474- let port: u16 = port_str
1475- . parse ( )
1476- . with_context ( || format ! ( "Invalid port in address: {}" , name) ) ?;
1477-
1478- // Follow the CNAME chain and get A records, ensuring each step is cached
1479- let ip_response = follow_cname_chain ( resolver, host) . await ?;
1480-
1481- // Extract IP address from A records
1482- let maybe_socket_addr = ip_response
1483- . iter ( )
1484- . find_map ( |r| Some ( SocketAddr :: new ( r, port) ) ) ;
1485-
1486- if let Some ( addr) = maybe_socket_addr {
1487- Ok ( addr)
1488- } else {
1489- anyhow:: bail!( "No A records found in DNS response" )
1515+ if let Some ( addr) = maybe_socket_addr {
1516+ Ok ( addr)
1517+ } else {
1518+ anyhow:: bail!( "No A records found in DNS response" )
1519+ }
14901520 }
14911521}
14921522
0 commit comments