Skip to content

Commit

Permalink
[flink] FlinkRunner initializes the same split twice (#31313) (#33606)
Browse files Browse the repository at this point in the history
* [flink] FlinkRunner initializes the same split twice (#31313)
  • Loading branch information
je-ik authored Jan 20, 2025
1 parent 2af6058 commit f357174
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"runFor": "#33146"
"runFor": "#33606"
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.source.unbounded.FlinkUnboundedSource;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.util.construction.UnboundedReadFromBoundedSource;
import org.apache.flink.api.common.eventtime.Watermark;
import org.apache.flink.api.connector.source.Boundedness;
import org.apache.flink.api.connector.source.Source;
Expand Down Expand Up @@ -73,18 +71,6 @@ public static <T> FlinkUnboundedSource<T> unbounded(
return new FlinkUnboundedSource<>(stepName, source, serializablePipelineOptions, numSplits);
}

public static FlinkUnboundedSource<byte[]> unboundedImpulse(long shutdownSourceAfterIdleMs) {
FlinkPipelineOptions flinkPipelineOptions = FlinkPipelineOptions.defaults();
flinkPipelineOptions.setShutdownSourcesAfterIdleMs(shutdownSourceAfterIdleMs);
return new FlinkUnboundedSource<>(
"Impulse",
new UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter<>(
new BeamImpulseSource()),
new SerializablePipelineOptions(flinkPipelineOptions),
1,
record -> BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
}

public static FlinkBoundedSource<byte[]> boundedImpulse() {
return new FlinkBoundedSource<>(
"Impulse",
Expand Down Expand Up @@ -117,7 +103,8 @@ public Boundedness getBoundedness() {

@Override
public SplitEnumerator<FlinkSourceSplit<T>, Map<Integer, List<FlinkSourceSplit<T>>>>
createEnumerator(SplitEnumeratorContext<FlinkSourceSplit<T>> enumContext) throws Exception {
createEnumerator(SplitEnumeratorContext<FlinkSourceSplit<T>> enumContext) {

return new FlinkSourceSplitEnumerator<>(
enumContext, beamSource, serializablePipelineOptions.get(), numSplits);
}
Expand All @@ -126,11 +113,11 @@ public Boundedness getBoundedness() {
public SplitEnumerator<FlinkSourceSplit<T>, Map<Integer, List<FlinkSourceSplit<T>>>>
restoreEnumerator(
SplitEnumeratorContext<FlinkSourceSplit<T>> enumContext,
Map<Integer, List<FlinkSourceSplit<T>>> checkpoint)
throws Exception {
Map<Integer, List<FlinkSourceSplit<T>>> checkpoint) {

FlinkSourceSplitEnumerator<T> enumerator =
new FlinkSourceSplitEnumerator<>(
enumContext, beamSource, serializablePipelineOptions.get(), numSplits);
enumContext, beamSource, serializablePipelineOptions.get(), numSplits, true);
checkpoint.forEach(
(subtaskId, splitsForSubtask) -> enumerator.addSplitsBack(splitsForSubtask, subtaskId));
return enumerator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,40 @@ public FlinkSourceSplitEnumerator(
Source<T> beamSource,
PipelineOptions pipelineOptions,
int numSplits) {

this(context, beamSource, pipelineOptions, numSplits, false);
}

public FlinkSourceSplitEnumerator(
SplitEnumeratorContext<FlinkSourceSplit<T>> context,
Source<T> beamSource,
PipelineOptions pipelineOptions,
int numSplits,
boolean splitsInitialized) {

this.context = context;
this.beamSource = beamSource;
this.pipelineOptions = pipelineOptions;
this.numSplits = numSplits;
this.pendingSplits = new HashMap<>(numSplits);
this.splitsInitialized = false;
this.splitsInitialized = splitsInitialized;

LOG.info(
"Created new enumerator with parallelism {}, source {}, numSplits {}, initialized {}",
context.currentParallelism(),
beamSource,
numSplits,
splitsInitialized);
}

@Override
public void start() {
if (!splitsInitialized) {
initializeSplits();
}
}

private void initializeSplits() {
context.callAsync(
() -> {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.beam.runners.flink.FlinkPipelineOptions;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.TestBoundedCountingSource;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.TestCountingSource;
Expand Down Expand Up @@ -130,6 +132,49 @@ public void testAddSplitsBack() throws IOException {
}
}

@Test
public void testAddSplitsBackAfterRescale() throws Exception {
final int numSubtasks = 2;
final int numSplits = 10;
final int totalNumRecords = 10;
TestingSplitEnumeratorContext<FlinkSourceSplit<KV<Integer, Integer>>> testContext =
new TestingSplitEnumeratorContext<>(numSubtasks);
TestBoundedCountingSource testSource =
new TestBoundedCountingSource(numSplits, totalNumRecords);
final Map<Integer, List<FlinkSourceSplit<KV<Integer, Integer>>>> assignment;
try (FlinkSourceSplitEnumerator<KV<Integer, Integer>> splitEnumerator =
new FlinkSourceSplitEnumerator<>(
testContext, testSource, FlinkPipelineOptions.defaults(), numSplits)) {
splitEnumerator.start();
for (int i = 0; i < numSubtasks; i++) {
testContext.registerReader(i, String.valueOf(i));
splitEnumerator.addReader(i);
}
testContext.getExecutorService().triggerAll();
assignment =
testContext.getSplitAssignments().entrySet().stream()
.map(e -> KV.of(e.getKey(), e.getValue().getAssignedSplits()))
.collect(Collectors.toMap(KV::getKey, KV::getValue));
}

// add tasks back
testContext = new TestingSplitEnumeratorContext<>(numSubtasks);
try (FlinkSourceSplitEnumerator<KV<Integer, Integer>> splitEnumerator =
new FlinkSourceSplitEnumerator<>(
testContext, testSource, FlinkPipelineOptions.defaults(), numSplits, true)) {
splitEnumerator.start();
assignment.forEach(
(splitId, assignedSplits) -> splitEnumerator.addSplitsBack(assignedSplits, splitId));
testContext.registerReader(0, "0");
splitEnumerator.addReader(0);
testContext.getExecutorService().triggerAll();

List<FlinkSourceSplit<KV<Integer, Integer>>> splitsForReader =
testContext.getSplitAssignments().get(0).getAssignedSplits();
assertEquals(numSplits / numSubtasks, splitsForReader.size());
}
}

private void assignSplits(
TestingSplitEnumeratorContext<FlinkSourceSplit<KV<Integer, Integer>>> context,
Source<KV<Integer, Integer>> source,
Expand Down

0 comments on commit f357174

Please sign in to comment.