-
Notifications
You must be signed in to change notification settings - Fork 14.8k
MINOR: Fix time comparison with appendLingerMs in CoordinatorRuntime#maybeFlushCurrentBatch #20739
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4779,6 +4779,7 @@ public void testCompleteTransactionEventCompletesOnlyOnce() throws Exception { | |
| assertTrue(write1.isCompletedExceptionally()); | ||
| verify(runtimeMetrics, times(1)).recordEventPurgatoryTime(writeTimeout.toMillis() + 1L); | ||
| } | ||
|
|
||
| @Test | ||
| public void testCoordinatorExecutor() { | ||
| Duration writeTimeout = Duration.ofMillis(1000); | ||
|
|
@@ -4866,6 +4867,81 @@ public void testCoordinatorExecutor() { | |
| assertTrue(write1.isDone()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testLingerTimeComparisonInMaybeFlushCurrentBatch() throws Exception { | ||
| // Provides the runtime clock; we will advance it. | ||
| MockTimer clockTimer = new MockTimer(); | ||
| // Used for scheduling timer tasks; we won't advance it to avoid a timer-triggered batch flush. | ||
| MockTimer schedulerTimer = new MockTimer(); | ||
|
|
||
| MockPartitionWriter writer = new MockPartitionWriter(); | ||
|
|
||
| CoordinatorRuntime<MockCoordinatorShard, String> runtime = | ||
| new CoordinatorRuntime.Builder<MockCoordinatorShard, String>() | ||
| .withTime(clockTimer.time()) | ||
| .withTimer(schedulerTimer) | ||
| .withDefaultWriteTimeOut(Duration.ofMillis(20)) | ||
| .withLoader(new MockCoordinatorLoader()) | ||
| .withEventProcessor(new DirectEventProcessor()) | ||
| .withPartitionWriter(writer) | ||
| .withCoordinatorShardBuilderSupplier(new MockCoordinatorShardBuilderSupplier()) | ||
| .withCoordinatorRuntimeMetrics(mock(CoordinatorRuntimeMetrics.class)) | ||
| .withCoordinatorMetrics(mock(CoordinatorMetrics.class)) | ||
| .withSerializer(new StringSerializer()) | ||
| .withAppendLingerMs(10) | ||
| .withExecutorService(mock(ExecutorService.class)) | ||
| .build(); | ||
|
|
||
| // Schedule the loading. | ||
| runtime.scheduleLoadOperation(TP, 10); | ||
|
|
||
| // Verify the initial state. | ||
| CoordinatorRuntime<MockCoordinatorShard, String>.CoordinatorContext ctx = runtime.contextOrThrow(TP); | ||
| assertEquals(ACTIVE, ctx.state); | ||
| assertNull(ctx.currentBatch); | ||
|
|
||
| // Write #1. | ||
| CompletableFuture<String> write1 = runtime.scheduleWriteOperation("write#1", TP, Duration.ofMillis(20), | ||
| state -> new CoordinatorResult<>(List.of("record1"), "response1") | ||
| ); | ||
| assertFalse(write1.isDone()); | ||
| assertNotNull(ctx.currentBatch); | ||
| assertEquals(0, writer.entries(TP).size()); | ||
|
|
||
| // Verify that the linger timeout task is created; there will also be a default write timeout task. | ||
| assertEquals(2, schedulerTimer.size()); | ||
|
|
||
| // Advance past the linger time. | ||
| clockTimer.advanceClock(11); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you move this to line#4921? It makes more sense there, since the goal of advancing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the purpose of advancing the However, I think that after advancing the This ensures that |
||
|
|
||
| // At this point, there are still two scheduled tasks; the linger task has not fired | ||
| // because we did not advance the schedulerTimer. | ||
| assertEquals(2, schedulerTimer.size()); | ||
|
|
||
| // Write #2. | ||
| CompletableFuture<String> write2 = runtime.scheduleWriteOperation("write#2", TP, Duration.ofMillis(20), | ||
| state -> new CoordinatorResult<>(List.of("record2"), "response2") | ||
| ); | ||
|
|
||
| // The batch should have been flushed. | ||
| assertEquals(1, writer.entries(TP).size()); | ||
|
|
||
| // Because flushing the batch cancels the linger task, there should now be two write timeout tasks. | ||
| assertEquals(2, schedulerTimer.size()); | ||
|
|
||
| // Verify batch contains both two records | ||
| MemoryRecords batch = writer.entries(TP).get(0); | ||
| RecordBatch recordBatch = batch.firstBatch(); | ||
| assertEquals(2, recordBatch.countOrNull()); | ||
|
|
||
| // Commit and verify that writes are completed. | ||
| writer.commit(TP); | ||
| assertTrue(write1.isDone()); | ||
| assertTrue(write2.isDone()); | ||
| // Now that all scheduled tasks have been cancelled, the scheduler queue should be empty. | ||
| assertEquals(0, schedulerTimer.size()); | ||
| } | ||
|
|
||
| private static <S extends CoordinatorShard<U>, U> ArgumentMatcher<CoordinatorPlayback<U>> coordinatorMatcher( | ||
| CoordinatorRuntime<S, U> runtime, | ||
| TopicPartition tp | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Personally, I would prefer to remove this condition, as a time-based task already exists. This approach would also mean we are not changing the current behavior, since the condition was previously a no-op
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As you mentioned, time-based scheduled tasks will ensure that the current batch is flushed to the log once it has passed the append linger time.
Perhaps performing a conditional check here is intended to detect as promptly as possible when the current batch has passed the append linger time, so that flushing to the log can be carried out more promptly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a unit test here? Since the existing tests don't update the mock time, this bug was not disclosed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch! I think that we can keep the condition here. I agree with adding a unit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you all for your feedback and suggestions. I will add a unit test to cover this scenario.