grpc · easwars · Nov 14, 2025 · Nov 14, 2025 · dfawley · Nov 20, 2025
diff --git a/clientconn.go b/clientconn.go
@@ -332,7 +332,7 @@ func (cc *ClientConn) addTraceEvent(msg string) {
 			Severity: channelz.CtInfo,
 		}
 	}
-	channelz.AddTraceEvent(logger, cc.channelz, 0, ted)
+	channelz.AddTraceEvent(logger, cc.channelz, 1, ted)
 }
 
 type idler ClientConn

diff --git a/dial_test.go b/dial_test.go
@@ -20,6 +20,7 @@ package grpc
 
 import (
 	"context"
+	"fmt"
 	"net"
 	"strings"
 	"testing"
@@ -312,3 +313,36 @@ func (s) TestResolverAddressesWithTypedNilAttribute(t *testing.T) {
 type stringerVal struct{ s string }
 
 func (s stringerVal) String() string { return s.s }
+
+const errResolverBuildercheme = "test-resolver-build-failure"
+
+// errResolverBuilder is a resolver builder that returns an error from its Build
+// method.
+type errResolverBuilder struct {
+	err error
+}
+
+func (b *errResolverBuilder) Build(resolver.Target, resolver.ClientConn, resolver.BuildOptions) (resolver.Resolver, error) {
+	return nil, b.err
+}
+
+func (b *errResolverBuilder) Scheme() string {
+	return errResolverBuildercheme
+}
+
+// Tests that Dial returns an error if the resolver builder returns an error
+// from its Build method.
+func (s) TestDial_ResolverBuilder_Error(t *testing.T) {
+	resolverErr := fmt.Errorf("resolver builder error")
+	dopts := []DialOption{
+		WithTransportCredentials(insecure.NewCredentials()),
+		WithResolvers(&errResolverBuilder{err: resolverErr}),
+	}
+	_, err := Dial(errResolverBuildercheme+":///test.server", dopts...)
+	if err == nil {
+		t.Fatalf("Dial() succeeded when it should have failed")
+	}
+	if !strings.Contains(err.Error(), resolverErr.Error()) {
+		t.Fatalf("Dial() failed with error %v, want %v", err, resolverErr)
+	}
+}
diff --git a/internal/idle/idle.go b/internal/idle/idle.go
@@ -234,16 +234,22 @@ func (m *Manager) ExitIdleMode() error {
 		return nil
 	}
 
-	if err := m.enforcer.ExitIdleMode(); err != nil {
-		return fmt.Errorf("failed to exit idle mode: %w", err)
-	}
+	// This can fail if resolver creation fails. In that case, we want to
+	// return the error to the caller so that the RPC can fail. But we still
+	// need to undo the idle entry process, and ensure that the idle timer is
+	// started again.
+	err := m.enforcer.ExitIdleMode()
 
 	// Undo the idle entry process. This also respects any new RPC attempts.
 	atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
 	m.actuallyIdle = false
 
 	// Start a new timer to fire after the configured idle timeout.
 	m.resetIdleTimerLocked(m.timeout)
+
+	if err != nil {
+		return fmt.Errorf("failed to exit idle mode: %v", err)
+	}
 	return nil
 }
 

diff --git a/internal/idle/idle_test.go b/internal/idle/idle_test.go
@@ -21,6 +21,7 @@ package idle
 import (
 	"context"
 	"fmt"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"testing"
@@ -44,13 +45,14 @@ func Test(t *testing.T) {
 }
 
 type testEnforcer struct {
+	exitIdleErr error
 	exitIdleCh  chan struct{}
 	enterIdleCh chan struct{}
 }
 
 func (ti *testEnforcer) ExitIdleMode() error {
 	ti.exitIdleCh <- struct{}{}
-	return nil
+	return ti.exitIdleErr
 
 }
 
@@ -381,3 +383,42 @@ func (s) TestManager_IdleTimeoutRacesWithOnCallBegin(t *testing.T) {
 		})
 	}
 }
+
+// TestManager_ExitIdleError tests the case where ExitIdleMode on the enforcer
+// returns an error. It verifies that the idle timer is started and the channel
+// eventually attempts to enter idle mode.
+func (s) TestManager_ExitIdleError(t *testing.T) {
+	callbackCh := overrideNewTimer(t)
+	exitIdleErr := fmt.Errorf("exit idle error")
+	enforcer := newTestEnforcer()
+	enforcer.exitIdleErr = exitIdleErr
+
+	mgr := NewManager(enforcer, defaultTestIdleTimeout)
+	defer mgr.Close()
+
+	// Call ExitIdleMode and expect it to fail.
+	if err := mgr.ExitIdleMode(); err == nil || !strings.Contains(err.Error(), "exit idle error") {
+		t.Fatalf("mgr.ExitIdleMode() returned: %v, want error: %v", err, exitIdleErr)
+	}
+
+	// Verify that ExitIdleMode was called on the enforcer.
+	select {
+	case <-enforcer.exitIdleCh:
+	case <-time.After(defaultTestShortTimeout):
+		t.Fatal("Timeout waiting for ExitIdleMode to be called on the enforcer")
+	}
+
+	// The timer should have been started. Wait for it to fire.
+	select {
+	case <-callbackCh:
+	case <-time.After(2 * defaultTestIdleTimeout):
+		t.Fatal("Timeout waiting for idle timer callback to fire")
+	}
+
+	// After the timer fires, the manager should attempt to enter idle mode.
+	select {
+	case <-enforcer.enterIdleCh:
+	case <-time.After(defaultTestShortTimeout):
+		t.Fatal("Timeout waiting for EnterIdleMode to be called on the enforcer")
+	}
+}
diff --git a/resolver_wrapper.go b/resolver_wrapper.go
@@ -23,6 +23,7 @@ import (
 	"strings"
 	"sync"
 
+	"google.golang.org/grpc/connectivity"
 	"google.golang.org/grpc/internal/channelz"
 	"google.golang.org/grpc/internal/grpcsync"
 	"google.golang.org/grpc/internal/pretty"
@@ -79,6 +80,18 @@ func (ccr *ccResolverWrapper) start() error {
 			Authority:            ccr.cc.authority,
 			MetricsRecorder:      ccr.cc.metricsRecorderList,
 		}
+
+		// https://github.com/grpc/grpc/blob/master/doc/connectivity-semantics-and-api.md
+		// defines CONNECTING as follows:
+		// - The channel is trying to establish a connection and is waiting to
+		//   make progress on one of the steps involved in name resolution, TCP
+		//   connection establishment or TLS handshake. This may be used as the
+		//   initial state for channels upon creation.
+		//
+		// We are starting the name resolver here as part of exiting IDLE, so
+		// transitioning to CONNECTING is the right thing to do.
+		ccr.cc.csMgr.updateState(connectivity.Connecting)
+
 		var err error
 		// The delegating resolver is used unless:
 		//   - A custom dialer is provided via WithContextDialer dialoption or
@@ -90,6 +103,17 @@ func (ccr *ccResolverWrapper) start() error {
 		} else {
 			ccr.resolver, err = delegatingresolver.New(ccr.cc.parsedTarget, ccr, opts, ccr.cc.resolverBuilder, ccr.cc.dopts.enableLocalDNSResolution)
 		}
+
+		// If resolver creation fails, transition to TransientFailure. This is
+		// useful for channels created using `NewClient` and the returned error
+		// will be returned to the user when they try to make the first RPC.
+		// This is also useful when a channel is exiting IDLE state.
+		//
+		// The returned error will be returned to channels created using `Dial`.
+		if err != nil {
+			logger.Warningf("Failed to start resolver: %v", err)
+			ccr.cc.csMgr.updateState(connectivity.TransientFailure)
+		}
 		errCh <- err
 	})
 	return <-errCh

diff --git a/test/clientconn_state_transition_test.go b/test/clientconn_state_transition_test.go
@@ -583,8 +583,6 @@ func (s) TestConnectivityStateSubscriber(t *testing.T) {
 // Test verifies that a channel starts off in IDLE and transitions to CONNECTING
 // when Connect() is called, and stays there when there are no resolver updates.
 func (s) TestStateTransitions_WithConnect_NoResolverUpdate(t *testing.T) {
-	t.Skip("The channel remains in IDLE until the LB policy updates the state to CONNECTING. This is a bug and the channel should transition to CONNECTING as soon as Connect() is called. See issue #7686.")
-
 	backend := stubserver.StartTestService(t, nil)
 	defer backend.Stop()
 
@@ -618,8 +616,6 @@ func (s) TestStateTransitions_WithConnect_NoResolverUpdate(t *testing.T) {
 // Test verifies that a channel starts off in IDLE and transitions to CONNECTING
 // when Connect() is called, and stays there when there are no resolver updates.
 func (s) TestStateTransitions_WithRPC_NoResolverUpdate(t *testing.T) {
-	t.Skip("The channel remains in IDLE until the LB policy updates the state to CONNECTING. This is a bug and the channel should transition to CONNECTING as soon as an RPC call is made. See issue #7686.")
-
 	backend := stubserver.StartTestService(t, nil)
 	defer backend.Stop()
 
@@ -641,8 +637,7 @@ func (s) TestStateTransitions_WithRPC_NoResolverUpdate(t *testing.T) {
 
 	// Make an RPC call to transition the channel to CONNECTING.
 	go func() {
-		_, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{})
-		if err == nil {
+		if _, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{}); err == nil {
 			t.Errorf("Expected RPC to fail, but it succeeded")
 		}
 	}()
@@ -656,3 +651,111 @@ func (s) TestStateTransitions_WithRPC_NoResolverUpdate(t *testing.T) {
 	defer shortCancel()
 	testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Connecting)
 }
+
+const testResolverBuildFailureScheme = "test-resolver-build-failure"
+
+// testResolverBuilder is a resolver builder that fails the first time its
+// Build method is called, and succeeds thereafter.
+type testResolverBuilder struct {
+	logger interface {
+		Logf(format string, args ...any)
+	}
+	buildCalled bool
+	manualR     *manual.Resolver
+}
+
+func (b *testResolverBuilder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) {
+	b.logger.Logf("testResolverBuilder: Build called with target: %v", target)
+	if !b.buildCalled {
+		b.buildCalled = true
+		b.logger.Logf("testResolverBuilder: returning build failure")
+		return nil, fmt.Errorf("simulated resolver build failure")
+	}
+	return b.manualR.Build(target, cc, opts)
+}
+
+func (b *testResolverBuilder) Scheme() string {
+	return testResolverBuildFailureScheme
+}
+
+// Tests for state transitions when the resolver initially fails to build.
+func (s) TestStateTransitions_ResolverBuildFailure(t *testing.T) {
+	tests := []struct {
+		name         string
+		exitIdleFunc func(ctx context.Context, cc *grpc.ClientConn) error
+	}{
+		{
+			name: "exitIdleByConnecting",
+			exitIdleFunc: func(_ context.Context, cc *grpc.ClientConn) error {
+				cc.Connect()
+				return nil
+			},
+		},
+		{
+			name: "exitIdleByRPC",
+			exitIdleFunc: func(ctx context.Context, cc *grpc.ClientConn) error {
+				if _, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{}); err != nil {
+					return fmt.Errorf("EmptyCall RPC failed: %v", err)
+				}
+				return nil
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mr := manual.NewBuilderWithScheme("whatever" + tt.name)
+			backend := stubserver.StartTestService(t, nil)
+			defer backend.Stop()
+			mr.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}})
+
+			dopts := []grpc.DialOption{
+				grpc.WithTransportCredentials(insecure.NewCredentials()),
+				grpc.WithResolvers(&testResolverBuilder{logger: t, manualR: mr}),
+				grpc.WithIdleTimeout(time.Second),
+			}
+
+			cc, err := grpc.NewClient(testResolverBuildFailureScheme+":///", dopts...)
+			if err != nil {
+				t.Fatalf("Failed to create new client: %v", err)
+			}
+			defer cc.Close()
+
+			// Ensure that the client is in IDLE before connecting.
+			ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
+			defer cancel()
+			testutils.AwaitState(ctx, t, cc, connectivity.Idle)
+
+			// Subscribe to state updates.
+			stateCh := make(chan connectivity.State, 1)
+			s := &funcConnectivityStateSubscriber{
+				onMsg: func(s connectivity.State) {
+					select {
+					case stateCh <- s:
+					case <-ctx.Done():
+					}
+				},
+			}
+			internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, s)
+
+			if state := cc.GetState(); state != connectivity.Idle {
+				t.Fatalf("Expected initial state to be IDLE, got %v", state)
+			}
+
+			cc.Connect()
+			wantStates := []connectivity.State{
+				connectivity.Connecting,       // When channel exits IDLE for the first time.
+				connectivity.TransientFailure, // Resolver build failure.
+				connectivity.Idle,             // After idle timeout.
+				connectivity.Connecting,       // When channel exits IDLE again.
+				connectivity.Ready,            // Successful resolver build and connection to backend.
+			}
+			for _, wantState := range wantStates {
+				waitForState(ctx, t, stateCh, wantState)
+				if wantState == connectivity.Idle {
+					tt.exitIdleFunc(ctx, cc)
+				}
+			}
+
+		})
+	}
+}