-
Notifications
You must be signed in to change notification settings - Fork 4.6k
client: Change connectivity state to CONNECTING when creating the name resolver #8710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ import ( | |
| "strings" | ||
| "sync" | ||
|
|
||
| "google.golang.org/grpc/connectivity" | ||
| "google.golang.org/grpc/internal/channelz" | ||
| "google.golang.org/grpc/internal/grpcsync" | ||
| "google.golang.org/grpc/internal/pretty" | ||
|
|
@@ -79,6 +80,18 @@ func (ccr *ccResolverWrapper) start() error { | |
| Authority: ccr.cc.authority, | ||
| MetricsRecorder: ccr.cc.metricsRecorderList, | ||
| } | ||
|
|
||
| // https://github.com/grpc/grpc/blob/master/doc/connectivity-semantics-and-api.md | ||
| // defines CONNECTING as follows: | ||
| // - The channel is trying to establish a connection and is waiting to | ||
| // make progress on one of the steps involved in name resolution, TCP | ||
| // connection establishment or TLS handshake. This may be used as the | ||
| // initial state for channels upon creation. | ||
| // | ||
| // We are starting the name resolver here as part of exiting IDLE, so | ||
| // transitioning to CONNECTING is the right thing to do. | ||
|
Comment on lines
+84
to
+92
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO comments should be short and to the point. Short comments make the code take up less space, which makes it easier to read and understand. Long comments make long functions extremely long and not fit on the page. Honestly, I think a comment for this action isn't even necessary. But if you think we need one, this could be: // Set state to CONNECTING before building the name resolver
// so the channel does not remain in IDLE. |
||
| ccr.cc.csMgr.updateState(connectivity.Connecting) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this (and the error handling below) go into |
||
|
|
||
| var err error | ||
| // The delegating resolver is used unless: | ||
| // - A custom dialer is provided via WithContextDialer dialoption or | ||
|
|
@@ -90,6 +103,17 @@ func (ccr *ccResolverWrapper) start() error { | |
| } else { | ||
| ccr.resolver, err = delegatingresolver.New(ccr.cc.parsedTarget, ccr, opts, ccr.cc.resolverBuilder, ccr.cc.dopts.enableLocalDNSResolution) | ||
| } | ||
|
|
||
| // If resolver creation fails, transition to TransientFailure. This is | ||
| // useful for channels created using `NewClient` and the returned error | ||
| // will be returned to the user when they try to make the first RPC. | ||
| // This is also useful when a channel is exiting IDLE state. | ||
| // | ||
| // The returned error will be returned to channels created using `Dial`. | ||
| if err != nil { | ||
| logger.Warningf("Failed to start resolver: %v", err) | ||
| ccr.cc.csMgr.updateState(connectivity.TransientFailure) | ||
| } | ||
| errCh <- err | ||
| }) | ||
| return <-errCh | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -583,8 +583,6 @@ func (s) TestConnectivityStateSubscriber(t *testing.T) { | |
| // Test verifies that a channel starts off in IDLE and transitions to CONNECTING | ||
| // when Connect() is called, and stays there when there are no resolver updates. | ||
| func (s) TestStateTransitions_WithConnect_NoResolverUpdate(t *testing.T) { | ||
| t.Skip("The channel remains in IDLE until the LB policy updates the state to CONNECTING. This is a bug and the channel should transition to CONNECTING as soon as Connect() is called. See issue #7686.") | ||
|
|
||
| backend := stubserver.StartTestService(t, nil) | ||
| defer backend.Stop() | ||
|
|
||
|
|
@@ -618,8 +616,6 @@ func (s) TestStateTransitions_WithConnect_NoResolverUpdate(t *testing.T) { | |
| // Test verifies that a channel starts off in IDLE and transitions to CONNECTING | ||
| // when Connect() is called, and stays there when there are no resolver updates. | ||
| func (s) TestStateTransitions_WithRPC_NoResolverUpdate(t *testing.T) { | ||
| t.Skip("The channel remains in IDLE until the LB policy updates the state to CONNECTING. This is a bug and the channel should transition to CONNECTING as soon as an RPC call is made. See issue #7686.") | ||
|
|
||
| backend := stubserver.StartTestService(t, nil) | ||
| defer backend.Stop() | ||
|
|
||
|
|
@@ -641,8 +637,7 @@ func (s) TestStateTransitions_WithRPC_NoResolverUpdate(t *testing.T) { | |
|
|
||
| // Make an RPC call to transition the channel to CONNECTING. | ||
| go func() { | ||
| _, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{}) | ||
| if err == nil { | ||
| if _, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{}); err == nil { | ||
| t.Errorf("Expected RPC to fail, but it succeeded") | ||
| } | ||
| }() | ||
|
|
@@ -656,3 +651,111 @@ func (s) TestStateTransitions_WithRPC_NoResolverUpdate(t *testing.T) { | |
| defer shortCancel() | ||
| testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Connecting) | ||
| } | ||
|
|
||
| const testResolverBuildFailureScheme = "test-resolver-build-failure" | ||
|
|
||
| // testResolverBuilder is a resolver builder that fails the first time its | ||
| // Build method is called, and succeeds thereafter. | ||
| type testResolverBuilder struct { | ||
| logger interface { | ||
| Logf(format string, args ...any) | ||
| } | ||
| buildCalled bool | ||
| manualR *manual.Resolver | ||
| } | ||
|
|
||
| func (b *testResolverBuilder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) { | ||
| b.logger.Logf("testResolverBuilder: Build called with target: %v", target) | ||
| if !b.buildCalled { | ||
| b.buildCalled = true | ||
| b.logger.Logf("testResolverBuilder: returning build failure") | ||
| return nil, fmt.Errorf("simulated resolver build failure") | ||
| } | ||
| return b.manualR.Build(target, cc, opts) | ||
| } | ||
|
|
||
| func (b *testResolverBuilder) Scheme() string { | ||
| return testResolverBuildFailureScheme | ||
| } | ||
|
|
||
| // Tests for state transitions when the resolver initially fails to build. | ||
| func (s) TestStateTransitions_ResolverBuildFailure(t *testing.T) { | ||
| tests := []struct { | ||
| name string | ||
| exitIdleFunc func(ctx context.Context, cc *grpc.ClientConn) error | ||
| }{ | ||
| { | ||
| name: "exitIdleByConnecting", | ||
| exitIdleFunc: func(_ context.Context, cc *grpc.ClientConn) error { | ||
| cc.Connect() | ||
| return nil | ||
| }, | ||
| }, | ||
| { | ||
| name: "exitIdleByRPC", | ||
| exitIdleFunc: func(ctx context.Context, cc *grpc.ClientConn) error { | ||
| if _, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{}); err != nil { | ||
| return fmt.Errorf("EmptyCall RPC failed: %v", err) | ||
| } | ||
| return nil | ||
| }, | ||
| }, | ||
| } | ||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| mr := manual.NewBuilderWithScheme("whatever" + tt.name) | ||
| backend := stubserver.StartTestService(t, nil) | ||
| defer backend.Stop() | ||
| mr.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) | ||
|
|
||
| dopts := []grpc.DialOption{ | ||
| grpc.WithTransportCredentials(insecure.NewCredentials()), | ||
| grpc.WithResolvers(&testResolverBuilder{logger: t, manualR: mr}), | ||
| grpc.WithIdleTimeout(time.Second), | ||
| } | ||
|
|
||
| cc, err := grpc.NewClient(testResolverBuildFailureScheme+":///", dopts...) | ||
| if err != nil { | ||
| t.Fatalf("Failed to create new client: %v", err) | ||
| } | ||
| defer cc.Close() | ||
|
|
||
| // Ensure that the client is in IDLE before connecting. | ||
| ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) | ||
| defer cancel() | ||
| testutils.AwaitState(ctx, t, cc, connectivity.Idle) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't need an |
||
|
|
||
| // Subscribe to state updates. | ||
| stateCh := make(chan connectivity.State, 1) | ||
| s := &funcConnectivityStateSubscriber{ | ||
| onMsg: func(s connectivity.State) { | ||
| select { | ||
| case stateCh <- s: | ||
| case <-ctx.Done(): | ||
| } | ||
| }, | ||
| } | ||
| internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, s) | ||
|
|
||
| if state := cc.GetState(); state != connectivity.Idle { | ||
| t.Fatalf("Expected initial state to be IDLE, got %v", state) | ||
| } | ||
|
Comment on lines
+740
to
+742
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The AwaitState above already tested this IIUC |
||
|
|
||
| cc.Connect() | ||
| wantStates := []connectivity.State{ | ||
| connectivity.Connecting, // When channel exits IDLE for the first time. | ||
| connectivity.TransientFailure, // Resolver build failure. | ||
| connectivity.Idle, // After idle timeout. | ||
| connectivity.Connecting, // When channel exits IDLE again. | ||
| connectivity.Ready, // Successful resolver build and connection to backend. | ||
| } | ||
| for _, wantState := range wantStates { | ||
| waitForState(ctx, t, stateCh, wantState) | ||
| if wantState == connectivity.Idle { | ||
| tt.exitIdleFunc(ctx, cc) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make this test the actual RPC error when we use an RPC to exit idle? |
||
| } | ||
| } | ||
|
|
||
| }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo