Skip to content

Hitless upgrades #3021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions packages/client/lib/client/commands-queue.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { SinglyLinkedList, DoublyLinkedNode, DoublyLinkedList } from './linked-list';
import { DoublyLinkedNode, DoublyLinkedList, EmptyAwareSinglyLinkedList } from './linked-list';
import encodeCommand from '../RESP/encoder';
import { Decoder, PUSH_TYPE_MAPPING, RESP_TYPES } from '../RESP/decoder';
import { TypeMapping, ReplyUnion, RespVersions, RedisArgument } from '../RESP/types';
import { ChannelListeners, PubSub, PubSubCommand, PubSubListener, PubSubType, PubSubTypeListeners } from './pub-sub';
import { AbortError, ErrorReply, TimeoutError } from '../errors';
import { MonitorCallback } from '.';
import EventEmitter from 'events';

export interface CommandOptions<T = TypeMapping> {
chainId?: symbol;
Expand Down Expand Up @@ -54,11 +55,12 @@ export default class RedisCommandsQueue {
readonly #respVersion;
readonly #maxLength;
readonly #toWrite = new DoublyLinkedList<CommandToWrite>();
readonly #waitingForReply = new SinglyLinkedList<CommandWaitingForReply>();
readonly #waitingForReply = new EmptyAwareSinglyLinkedList<CommandWaitingForReply>();
readonly #onShardedChannelMoved;
#chainInExecution: symbol | undefined;
readonly decoder;
readonly #pubSub = new PubSub();
readonly events = new EventEmitter();

get isPubSubActive() {
return this.#pubSub.isActive;
Expand Down Expand Up @@ -134,6 +136,12 @@ export default class RedisCommandsQueue {
}
break;
}
case 'MOVING': {
const [_, afterMs, url] = push;
const [host, port] = url.toString().split(':');
this.events.emit('moving', afterMs, host, Number(port))
break;
}
}
}
},
Expand All @@ -145,6 +153,17 @@ export default class RedisCommandsQueue {
this.#invalidateCallback = callback;
}

async waitForInflightCommandsToComplete(): Promise<void> {
// In-flight commands already completed
if(this.#waitingForReply.length === 0) {
return
};
// Otherwise wait for in-flight commands to fire `empty` event
return new Promise(resolve => {
this.#waitingForReply.events.on('empty', resolve)
});
}

addCommand<T>(
args: ReadonlyArray<RedisArgument>,
options?: CommandOptions
Expand Down
56 changes: 56 additions & 0 deletions packages/client/lib/client/enterprise-maintenance-manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import EventEmitter from "events";
import { RedisClientOptions } from ".";
import RedisCommandsQueue from "./commands-queue";
import RedisSocket from "./socket";

export default class EnterpriseMaintenanceManager extends EventEmitter {
commandsQueue: RedisCommandsQueue;
options: RedisClientOptions;
constructor(
commandsQueue: RedisCommandsQueue,
options: RedisClientOptions,
) {
super();
this.commandsQueue = commandsQueue;
this.options = options;

this.commandsQueue.events.on("moving", this.#onMoving);
}

// Queue:
// toWrite [ C D E ]
// waitingForReply [ A B ] - aka In-flight commands
//
// time: ---1-2---3-4-5-6---------------------------
//
// 1. [EVENT] MOVING PN received
// 2. [ACTION] Pause writing ( we need to wait for new socket to connect and for all in-flight commands to complete )
// 3. [EVENT] New socket connected
// 4. [EVENT] In-flight commands completed
// 5. [ACTION] Destroy old socket
// 6. [ACTION] Resume writing -> we are going to write to the new socket from now on
#onMoving = async (
_afterMs: number,
host: string,
port: number,
): Promise<void> => {
// 1 [EVENT] MOVING PN received
// 2 [ACTION] Pause writing
this.emit('pause')

const newSocket = new RedisSocket({
...this.options.socket,
host,
port,
});
await newSocket.connect();
// 3 [EVENT] New socket connected

await this.commandsQueue.waitForInflightCommandsToComplete();
// 4 [EVENT] In-flight commands completed

// 5 + 6
this.emit('resume', newSocket);

};
}
156 changes: 116 additions & 40 deletions packages/client/lib/client/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { BasicClientSideCache, ClientSideCacheConfig, ClientSideCacheProvider }
import { BasicCommandParser, CommandParser } from './parser';
import SingleEntryCache from '../single-entry-cache';
import { version } from '../../package.json'
import EnterpriseMaintenanceManager from './enterprise-maintenance-manager';

export interface RedisClientOptions<
M extends RedisModules = RedisModules,
Expand Down Expand Up @@ -144,6 +145,44 @@ export interface RedisClientOptions<
* Tag to append to library name that is sent to the Redis server
*/
clientInfoTag?: string;

/**
* Configuration for handling Redis Enterprise graceful maintenance scenarios.
*
* When Redis Enterprise performs maintenance operations, nodes will be replaced, resulting in disconnects.
* This configuration allows the client to handle these scenarios gracefully by automatically
* reconnecting and managing command execution during maintenance windows.
*
* @example Basic graceful maintenance configuration
* ```
* const client = createClient({
* gracefulMaintenance: {
* handleFailedCommands: 'retry',
* handleTimeouts: 'exception',
* }
* });
* ```
*
* @example Graceful maintenance with timeout smoothing
* ```
* const client = createClient({
* gracefulMaintenance: {
* handleFailedCommands: 'retry',
* handleTimeouts: 5000, // Extend timeouts to 5 seconds during maintenance
* }
* });
* ```
*/
gracefulMaintenance?: {
/**
* Designates how failed commands should be handled. A failed command is when the time isn’t sufficient to deal with the responses on the old connection before the server shuts it down
*/
handleFailedCommands: 'exception' | 'retry',
/**
* Specify whether we should throw a MaintenanceTimeout exception or provide more relaxed timeout, in order to minimize command timeouts during maintenance.
*/
handleTimeouts: 'exception' | number,
}
}

type WithCommands<
Expand Down Expand Up @@ -366,7 +405,7 @@ export default class RedisClient<
}

readonly #options?: RedisClientOptions<M, F, S, RESP, TYPE_MAPPING>;
readonly #socket: RedisSocket;
#socket: RedisSocket;
readonly #queue: RedisCommandsQueue;
#selectedDB = 0;
#monitorCallback?: MonitorCallback<TYPE_MAPPING>;
Expand All @@ -379,11 +418,16 @@ export default class RedisClient<
#watchEpoch?: number;
#clientSideCache?: ClientSideCacheProvider;
#credentialsSubscription: Disposable | null = null;
// Flag used to pause writing to the socket during maintenance windows.
// When true, prevents new commands from being written while waiting for:
// 1. New socket to be ready after maintenance redirect
// 2. In-flight commands on the old socket to complete
#paused = false;

get clientSideCache() {
return this._self.#clientSideCache;
}


get options(): RedisClientOptions<M, F, S, RESP> | undefined {
return this._self.#options;
}
Expand Down Expand Up @@ -417,6 +461,19 @@ export default class RedisClient<
return this._self.#dirtyWatch !== undefined
}

#pauseForMaintenance() {
this._self.#paused = true;
}

#resumeFromMaintenance(newSocket: RedisSocket) {
this._self.#socket.removeAllListeners();
this._self.#socket.destroy();
this._self.#socket = newSocket;
this._self.#paused = false;
this._self.#initiateSocket();
this._self.#maybeScheduleWrite();
}

/**
* Marks the client's WATCH command as invalidated due to a topology change.
* This will cause any subsequent EXEC in a transaction to fail with a WatchError.
Expand All @@ -431,7 +488,13 @@ export default class RedisClient<
this.#validateOptions(options)
this.#options = this.#initiateOptions(options);
this.#queue = this.#initiateQueue();
this.#socket = this.#initiateSocket();
this.#socket = this.#createSocket(this.#options);

if(options?.gracefulMaintenance) {
new EnterpriseMaintenanceManager(this.#queue, this.#options!)
.on('pause', this.#pauseForMaintenance.bind(this))
.on('resume', this.#resumeFromMaintenance.bind(this))
}

if (options?.clientSideCache) {
if (options.clientSideCache instanceof ClientSideCacheProvider) {
Expand All @@ -449,7 +512,12 @@ export default class RedisClient<
throw new Error('Client Side Caching is only supported with RESP3');
}

if (options?.gracefulMaintenance && options?.RESP !== 3) {
throw new Error('Graceful Maintenance is only supported with RESP3');
}

}

#initiateOptions(options?: RedisClientOptions<M, F, S, RESP, TYPE_MAPPING>): RedisClientOptions<M, F, S, RESP, TYPE_MAPPING> | undefined {

// Convert username/password to credentialsProvider if no credentialsProvider is already in place
Expand Down Expand Up @@ -657,38 +725,10 @@ export default class RedisClient<
return commands;
}

#initiateSocket(): RedisSocket {
const socketInitiator = async () => {
const promises = [],
chainId = Symbol('Socket Initiator');

const resubscribePromise = this.#queue.resubscribe(chainId);
if (resubscribePromise) {
promises.push(resubscribePromise);
}
async #initiateSocket(): Promise<void> {
await this.#socket.waitForReady();

if (this.#monitorCallback) {
promises.push(
this.#queue.monitor(
this.#monitorCallback,
{
typeMapping: this._commandOptions?.typeMapping,
chainId,
asap: true
}
)
);
}

promises.push(...(await this.#handshake(chainId, true)));

if (promises.length) {
this.#write();
return Promise.all(promises);
}
};

return new RedisSocket(socketInitiator, this.#options?.socket)
this.#socket
.on('data', chunk => {
try {
this.#queue.decoder.write(chunk);
Expand All @@ -706,15 +746,47 @@ export default class RedisClient<
this.#queue.flushAll(err);
}
})
.on('connect', () => this.emit('connect'))
.on('ready', () => {
this.emit('ready');
this.#setPingTimer();
this.#maybeScheduleWrite();
})
.on('reconnecting', () => this.emit('reconnecting'))
.on('drain', () => this.#maybeScheduleWrite())
.on('end', () => this.emit('end'));

const promises = [];
const chainId = Symbol('Socket Initiator');

const resubscribePromise = this.#queue.resubscribe(chainId);
if (resubscribePromise) {
promises.push(resubscribePromise);
}

if (this.#monitorCallback) {
promises.push(
this.#queue.monitor(
this.#monitorCallback,
{
typeMapping: this._commandOptions?.typeMapping,
chainId,
asap: true
}
)
);
}

promises.push(...(await this.#handshake(chainId, true)));

this.#setPingTimer();

if (promises.length) {
this.#write();
await Promise.all(promises);
}
}

#createSocket(options?: RedisClientOptions<M, F, S, RESP, TYPE_MAPPING>): RedisSocket {
return new RedisSocket(options?.socket)
.on('connect', () => this.emit('connect'))
.on('ready', () => {
this.emit('ready');
});
}

#pingTimer?: NodeJS.Timeout;
Expand Down Expand Up @@ -823,6 +895,7 @@ export default class RedisClient<

async connect() {
await this._self.#socket.connect();
await this._self.#initiateSocket();
return this as unknown as RedisClientType<M, F, S, RESP, TYPE_MAPPING>;
}

Expand Down Expand Up @@ -1055,6 +1128,9 @@ export default class RedisClient<
}

#write() {
if(this.#paused) {
return
}
this.#socket.write(this.#queue.commandsToWrite());
}

Expand Down
Loading
Loading