Skip to content

Commit 7bf431e

Browse files
committed
rls: Avoid missed config update from reentrancy
Since ChildPolicyWrapper() called into the child before childPolicyMap.put(), it is possible for that child to call back into RLS and further update state without that child being known. When CDS is_dynamic=true (since ca99a8c), it registers the cluster with XdsDependencyManager, which adds a watch to XdsClient. If XdsClient already has the results cached then the watch callback can be enqueued immediately onto the syncContext and execute still within the constructor. Calling into the child with the lock held isn't great, as it allows for this type of reentrancy bug. But that'll take larger changes to fix. b/464116731
1 parent 337c7b8 commit 7bf431e

File tree

2 files changed

+29
-12
lines changed

2 files changed

+29
-12
lines changed

rls/src/main/java/io/grpc/rls/LbPolicyConfiguration.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,10 @@ ChildPolicyWrapper createOrGet(String target) {
245245
RefCountedChildPolicyWrapper pooledChildPolicyWrapper = childPolicyMap.get(target);
246246
if (pooledChildPolicyWrapper == null) {
247247
ChildPolicyWrapper childPolicyWrapper = new ChildPolicyWrapper(
248-
target, childPolicy, childLbResolvedAddressFactory, childLbHelperProvider);
248+
target, childPolicy, childLbHelperProvider);
249249
pooledChildPolicyWrapper = RefCountedChildPolicyWrapper.of(childPolicyWrapper);
250250
childPolicyMap.put(target, pooledChildPolicyWrapper);
251+
childPolicyWrapper.start(childLbResolvedAddressFactory);
251252
return pooledChildPolicyWrapper.getObject();
252253
} else {
253254
ChildPolicyWrapper childPolicyWrapper = pooledChildPolicyWrapper.getObject();
@@ -294,7 +295,6 @@ static final class ChildPolicyWrapper {
294295
public ChildPolicyWrapper(
295296
String target,
296297
ChildLoadBalancingPolicy childPolicy,
297-
final ResolvedAddressFactory childLbResolvedAddressFactory,
298298
ChildLoadBalancerHelperProvider childLbHelperProvider) {
299299
this.target = target;
300300
this.helper = new ChildPolicyReportingHelper(childLbHelperProvider);
@@ -307,6 +307,9 @@ public ChildPolicyWrapper(
307307
this.childLbConfig = lbConfig.getConfig();
308308
helper.getChannelLogger().log(
309309
ChannelLogLevel.DEBUG, "RLS child lb created. config: {0}", childLbConfig);
310+
}
311+
312+
void start(ResolvedAddressFactory childLbResolvedAddressFactory) {
310313
helper.getSynchronizationContext().execute(
311314
new Runnable() {
312315
@Override

rls/src/test/java/io/grpc/rls/LbPolicyConfigurationTest.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.mockito.ArgumentMatchers.any;
2222
import static org.mockito.Mockito.doReturn;
2323
import static org.mockito.Mockito.mock;
24+
import static org.mockito.Mockito.times;
2425
import static org.mockito.Mockito.verify;
2526
import static org.mockito.Mockito.when;
2627

@@ -44,8 +45,8 @@
4445
import io.grpc.rls.LbPolicyConfiguration.ChildPolicyWrapper.ChildPolicyReportingHelper;
4546
import io.grpc.rls.LbPolicyConfiguration.InvalidChildPolicyConfigException;
4647
import io.grpc.rls.LbPolicyConfiguration.RefCountedChildPolicyWrapperFactory;
47-
import java.lang.Thread.UncaughtExceptionHandler;
4848
import java.util.Map;
49+
import java.util.concurrent.atomic.AtomicBoolean;
4950
import org.junit.Before;
5051
import org.junit.Test;
5152
import org.junit.runner.RunWith;
@@ -60,6 +61,9 @@ public class LbPolicyConfigurationTest {
6061
private final LoadBalancer lb = mock(LoadBalancer.class);
6162
private final SubchannelStateManager subchannelStateManager = new SubchannelStateManagerImpl();
6263
private final SubchannelPicker picker = mock(SubchannelPicker.class);
64+
private final SynchronizationContext syncContext = new SynchronizationContext((t, e) -> {
65+
throw new AssertionError(e);
66+
});
6367
private final ResolvedAddressFactory resolvedAddressFactory =
6468
new ResolvedAddressFactory() {
6569
@Override
@@ -81,15 +85,7 @@ public ResolvedAddresses create(Object childLbConfig) {
8185
@Before
8286
public void setUp() {
8387
doReturn(mock(ChannelLogger.class)).when(helper).getChannelLogger();
84-
doReturn(
85-
new SynchronizationContext(
86-
new UncaughtExceptionHandler() {
87-
@Override
88-
public void uncaughtException(Thread t, Throwable e) {
89-
throw new AssertionError(e);
90-
}
91-
}))
92-
.when(helper).getSynchronizationContext();
88+
doReturn(syncContext).when(helper).getSynchronizationContext();
9389
doReturn(lb).when(lbProvider).newLoadBalancer(any(Helper.class));
9490
doReturn(ConfigOrError.fromConfig(new Object()))
9591
.when(lbProvider).parseLoadBalancingPolicyConfig(ArgumentMatchers.<Map<String, ?>>any());
@@ -186,4 +182,22 @@ public void updateBalancingState_triggersListener() {
186182
// picker governs childPickers will be reported to parent LB
187183
verify(helper).updateBalancingState(ConnectivityState.READY, picker);
188184
}
185+
186+
@Test
187+
public void refCountedGetOrCreate_addsChildBeforeConfiguringChild() {
188+
AtomicBoolean calledAlready = new AtomicBoolean();
189+
when(lb.acceptResolvedAddresses(any(ResolvedAddresses.class))).thenAnswer(i -> {
190+
if (!calledAlready.get()) {
191+
calledAlready.set(true);
192+
// Should end up calling this function again, as this child should already be added to the
193+
// list of children. In practice, this can be caused by CDS is_dynamic=true starting a watch
194+
// when XdsClient already has the cluster cached (e.g., from another channel).
195+
syncContext.execute(() ->
196+
factory.acceptResolvedAddressFactory(resolvedAddressFactory));
197+
}
198+
return Status.OK;
199+
});
200+
ChildPolicyWrapper unused = factory.createOrGet("foo.google.com");
201+
verify(lb, times(2)).acceptResolvedAddresses(any(ResolvedAddresses.class));
202+
}
189203
}

0 commit comments

Comments
 (0)