Skip to content

Commit

Permalink
New max sustainable rate implementation (#329)
Browse files Browse the repository at this point in the history
## Motivation

Currently the WorkloadGenerator.findMaximumSustainableRate algorithm introduces
publish delay. It has been observed the algorithm settles on a publish rate that
neither producers nor consumers can keep up with. For example, on a Kafka
workload with 100 topics, 1kb message, the algorithm settled on a publish rate
of 2.5m msg/s when the producers could only actually achieve 2m msg/s.

## Changes

Implement a new algorithm that checks for both receive backlog and publish
backlog and adjusts the publish according and has a progressive rate ramp up
when there is no observed backlog.
  • Loading branch information
Dave Maughan authored Oct 24, 2022
1 parent 5967b3f commit 00502be
Show file tree
Hide file tree
Showing 9 changed files with 325 additions and 100 deletions.
22 changes: 18 additions & 4 deletions benchmark-framework/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,6 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
</dependency>
<dependency>
<groupId>org.asynchttpclient</groupId>
<artifactId>async-http-client</artifactId>
Expand All @@ -143,13 +139,31 @@
<artifactId>HdrHistogram</artifactId>
<version>2.1.12</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.github.stefanbirkner</groupId>
<artifactId>system-lambda</artifactId>
<version>1.2.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.openmessaging.benchmark;

import static java.util.concurrent.TimeUnit.SECONDS;
import static lombok.AccessLevel.PACKAGE;

import io.openmessaging.benchmark.utils.Env;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

@Slf4j
class RateController {
private static final long ONE_SECOND_IN_NANOS = SECONDS.toNanos(1);
private final long publishBacklogLimit;
private final long receiveBacklogLimit;
private final double minRampingFactor;
private final double maxRampingFactor;

@Getter(PACKAGE)
private double rampingFactor;

private long previousTotalPublished = 0;
private long previousTotalReceived = 0;

RateController() {
publishBacklogLimit = Env.getLong("PUBLISH_BACKLOG_LIMIT", 1_000);
receiveBacklogLimit = Env.getLong("RECEIVE_BACKLOG_LIMIT", 1_000);
minRampingFactor = Env.getDouble("MIN_RAMPING_FACTOR", 0.01);
maxRampingFactor = Env.getDouble("MAX_RAMPING_FACTOR", 1);
rampingFactor = maxRampingFactor;
}

double nextRate(double rate, long periodNanos, long totalPublished, long totalReceived) {
long expected = (long) ((rate / ONE_SECOND_IN_NANOS) * periodNanos);
long published = totalPublished - previousTotalPublished;
long received = totalReceived - previousTotalReceived;

previousTotalPublished = totalPublished;
previousTotalReceived = totalReceived;

if (log.isDebugEnabled()) {
log.debug(
"Current rate: {} -- Publish rate {} -- Receive Rate: {}",
rate,
rate(published, periodNanos),
rate(received, periodNanos));
}

long receiveBacklog = totalPublished - totalReceived;
if (receiveBacklog > receiveBacklogLimit) {
return nextRate(periodNanos, received, expected, receiveBacklog, "Receive");
}

long publishBacklog = expected - published;
if (publishBacklog > publishBacklogLimit) {
return nextRate(periodNanos, published, expected, publishBacklog, "Publish");
}

rampUp();

return rate + (rate * rampingFactor);
}

private double nextRate(long periodNanos, long actual, long expected, long backlog, String type) {
log.debug("{} backlog: {}", type, backlog);
rampDown();
long nextExpected = Math.max(0, expected - backlog);
double nextExpectedRate = rate(nextExpected, periodNanos);
double actualRate = rate(actual, periodNanos);
return Math.min(actualRate, nextExpectedRate);
}

private double rate(long count, long periodNanos) {
return (count / (double) periodNanos) * ONE_SECOND_IN_NANOS;
}

private void rampUp() {
rampingFactor = Math.min(maxRampingFactor, rampingFactor * 2);
}

private void rampDown() {
rampingFactor = Math.max(minRampingFactor, rampingFactor / 2);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -191,20 +191,13 @@ private void ensureTopicsAreReady() throws IOException {
*
* @param currentRate
*/
@SuppressWarnings("checkstyle:LineLength")
private void findMaximumSustainableRate(double currentRate) throws IOException {
double maxRate = Double.MAX_VALUE; // Discovered max sustainable rate
double minRate = 0.1;

CountersStats stats = worker.getCountersStats();

long localTotalMessagesSentCounter = stats.messagesSent;
long localTotalMessagesReceivedCounter = stats.messagesReceived;

int controlPeriodMillis = 3000;
long lastControlTimestamp = System.nanoTime();

int successfulPeriods = 0;
RateController rateController = new RateController();

while (!runCompleted) {
// Check every few seconds and adjust the rate
Expand All @@ -217,97 +210,13 @@ private void findMaximumSustainableRate(double currentRate) throws IOException {
// Consider multiple copies when using multiple subscriptions
stats = worker.getCountersStats();
long currentTime = System.nanoTime();
long totalMessagesSent = stats.messagesSent;
long totalMessagesReceived = stats.messagesReceived;
long messagesPublishedInPeriod = totalMessagesSent - localTotalMessagesSentCounter;
long messagesReceivedInPeriod = totalMessagesReceived - localTotalMessagesReceivedCounter;
double publishRateInLastPeriod =
messagesPublishedInPeriod
/ (double) (currentTime - lastControlTimestamp)
* TimeUnit.SECONDS.toNanos(1);
double receiveRateInLastPeriod =
messagesReceivedInPeriod
/ (double) (currentTime - lastControlTimestamp)
* TimeUnit.SECONDS.toNanos(1);

if (log.isDebugEnabled()) {
log.debug(
"total-send: {} -- total-received: {} -- int-sent: {} -- int-received: {} -- sent-rate: {} -- received-rate: {}",
totalMessagesSent,
totalMessagesReceived,
messagesPublishedInPeriod,
messagesReceivedInPeriod,
publishRateInLastPeriod,
receiveRateInLastPeriod);
}
long periodNanos = currentTime - lastControlTimestamp;

localTotalMessagesSentCounter = totalMessagesSent;
localTotalMessagesReceivedCounter = totalMessagesReceived;
lastControlTimestamp = currentTime;

if (log.isDebugEnabled()) {
log.debug(
"Current rate: {} -- Publish rate {} -- Consume Rate: {} -- min-rate: {} -- max-rate: {}",
dec.format(currentRate),
dec.format(publishRateInLastPeriod),
dec.format(receiveRateInLastPeriod),
dec.format(minRate),
dec.format(maxRate));
}

if (publishRateInLastPeriod < currentRate * 0.95) {
// Producer is not able to publish as fast as requested
maxRate = currentRate * 1.1;
currentRate = minRate + (currentRate - minRate) / 2;

log.debug("Publishers are not meeting requested rate. reducing to {}", currentRate);
} else if (receiveRateInLastPeriod < publishRateInLastPeriod * 0.98) {
// If the consumers are building backlog, we should slow down publish rate
maxRate = currentRate;
currentRate = minRate + (currentRate - minRate) / 2;
log.debug("Consumers are not meeting requested rate. reducing to {}", currentRate);

// Slows the publishes to let the consumer time to absorb the backlog
worker.adjustPublishRate(minRate / 10);
while (true) {
stats = worker.getCountersStats();
long backlog =
workload.subscriptionsPerTopic * stats.messagesSent - stats.messagesReceived;
if (backlog < 1000) {
break;
}

try {
Thread.sleep(100);
} catch (InterruptedException e) {
return;
}
}

log.debug("Resuming load at reduced rate");
worker.adjustPublishRate(currentRate);

try {
// Wait some more time for the publish rate to catch up
Thread.sleep(500);
} catch (InterruptedException e) {
return;
}

stats = worker.getCountersStats();
localTotalMessagesSentCounter = stats.messagesSent;
localTotalMessagesReceivedCounter = stats.messagesReceived;

} else if (currentRate < maxRate) {
minRate = currentRate;
currentRate = Math.min(currentRate * 2, maxRate);
log.debug("No bottleneck found, increasing the rate to {}", currentRate);
} else if (++successfulPeriods > 3) {
minRate = currentRate * 0.95;
maxRate = currentRate * 1.05;
successfulPeriods = 0;
}

currentRate =
rateController.nextRate(
currentRate, periodNanos, stats.messagesSent, stats.messagesReceived);
worker.adjustPublishRate(currentRate);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.openmessaging.benchmark.utils;


import java.util.Optional;
import java.util.function.Function;

public final class Env {
private Env() {}

public static long getLong(String key, long defaultValue) {
return get(key, Long::parseLong, defaultValue);
}

public static double getDouble(String key, double defaultValue) {
return get(key, Double::parseDouble, defaultValue);
}

public static <T> T get(String key, Function<String, T> function, T defaultValue) {
return Optional.ofNullable(System.getenv(key)).map(function).orElse(defaultValue);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.openmessaging.benchmark;

import static java.util.concurrent.TimeUnit.SECONDS;
import static org.assertj.core.api.Assertions.assertThat;

import org.junit.jupiter.api.Test;

class RateControllerTest {
private final RateController rateController = new RateController();
private double rate = 10_000;
private long periodNanos = SECONDS.toNanos(1);

@Test
void receiveBacklog() {
assertThat(rateController.getRampingFactor()).isEqualTo(1);

// no backlog
rate = rateController.nextRate(rate, periodNanos, 10_000, 10_000);
assertThat(rate).isEqualTo(20_000);
assertThat(rateController.getRampingFactor()).isEqualTo(1);

// receive backlog
rate = rateController.nextRate(rate, periodNanos, 20_000, 15_000);
assertThat(rate).isEqualTo(5_000);
assertThat(rateController.getRampingFactor()).isEqualTo(0.5);
}

@Test
void publishBacklog() {
assertThat(rateController.getRampingFactor()).isEqualTo(1);

// no backlog
rate = rateController.nextRate(rate, periodNanos, 10_000, 10_000);
assertThat(rate).isEqualTo(20_000);
assertThat(rateController.getRampingFactor()).isEqualTo(1);

// publish backlog
rate = rateController.nextRate(rate, periodNanos, 15_000, 20_000);
assertThat(rate).isEqualTo(5_000);
assertThat(rateController.getRampingFactor()).isEqualTo(0.5);
}

@Test
void rampUp() {
assertThat(rateController.getRampingFactor()).isEqualTo(1);

// receive backlog
rate = rateController.nextRate(rate, periodNanos, 10_000, 5_000);
assertThat(rate).isEqualTo(5_000);
assertThat(rateController.getRampingFactor()).isEqualTo(0.5);

// no backlog
rate = rateController.nextRate(rate, periodNanos, 20_000, 20_000);
assertThat(rate).isEqualTo(10_000);
assertThat(rateController.getRampingFactor()).isEqualTo(1);
}
}
Loading

0 comments on commit 00502be

Please sign in to comment.