Skip to content

re2: env based queue selection algo #1775

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 7, 2025
Prev Previous commit
Next Next commit
WIP
  • Loading branch information
ericallam committed Mar 7, 2025
commit 614f74fd86e72f369f31233e8dc6ea57125aeb14
8 changes: 8 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@
"command": "pnpm run test --filter @internal/run-engine",
"cwd": "${workspaceFolder}",
"sourceMaps": true
},
{
"type": "node-terminal",
"request": "launch",
"name": "Debug RunQueue tests",
"command": "pnpm run test ./src/run-queue/index.test.ts",
"cwd": "${workspaceFolder}/internal-packages/run-engine",
"sourceMaps": true
}
]
}
4 changes: 2 additions & 2 deletions internal-packages/run-engine/src/engine/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ import {
isPendingExecuting,
} from "./statuses.js";
import { HeartbeatTimeouts, RunEngineOptions, TriggerParams } from "./types.js";
import { RunQueueShortKeyProducer } from "../run-queue/keyProducer.js";
import { RunQueueFullKeyProducer } from "../run-queue/keyProducer.js";
import { retryOutcomeFromCompletion } from "./retrying";

const workerCatalog = {
Expand Down Expand Up @@ -153,7 +153,7 @@ export class RunEngine {
);
this.runLock = new RunLocker({ redis: this.runLockRedis });

const keys = new RunQueueShortKeyProducer("rq:");
const keys = new RunQueueFullKeyProducer();

this.runQueue = new RunQueue({
name: "rq",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {
setupAuthenticatedEnvironment,
setupBackgroundWorker,
} from "@internal/testcontainers";
import { trace } from "@opentelemetry/api";
import { trace } from "@internal/tracing";
import { expect } from "vitest";
import { RunEngine } from "../index.js";
import { setTimeout } from "timers/promises";
Expand Down
4 changes: 2 additions & 2 deletions internal-packages/run-engine/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export { RunEngine, RunDuplicateIdempotencyKeyError } from "./engine/index";
export type { EventBusEventArgs } from "./engine/eventBus";
export { RunEngine, RunDuplicateIdempotencyKeyError } from "./engine/index.js";
export type { EventBusEventArgs } from "./engine/eventBus.js";
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export type FairDequeuingStrategyBiases = {
export type FairDequeuingStrategyOptions = {
redis: RedisOptions;
keys: RunQueueKeyProducer;
defaultEnvConcurrency?: number;
defaultEnvConcurrencyLimit?: number;
parentQueueLimit?: number;
tracer?: Tracer;
seed?: string;
Expand Down Expand Up @@ -97,7 +97,7 @@ export class FairDequeuingStrategy implements RunQueueFairDequeueStrategy {
> = new Map();
private _redis: Redis;

private _defaultEnvConcurrency: number;
private _defaultEnvConcurrencyLimit: number;
private _parentQueueLimit: number;

constructor(private options: FairDequeuingStrategyOptions) {
Expand All @@ -115,7 +115,7 @@ export class FairDequeuingStrategy implements RunQueueFairDequeueStrategy {
this._rng = seedrandom(options.seed);
this._redis = createRedisClient(options.redis);

this._defaultEnvConcurrency = options.defaultEnvConcurrency ?? 10;
this._defaultEnvConcurrencyLimit = options.defaultEnvConcurrencyLimit ?? 10;
this._parentQueueLimit = options.parentQueueLimit ?? 100;
}

Expand Down Expand Up @@ -561,13 +561,13 @@ export class FairDequeuingStrategy implements RunQueueFairDequeueStrategy {
const value = await this._redis.get(key);

if (!value) {
return this._defaultEnvConcurrency;
return this._defaultEnvConcurrencyLimit;
}

return Number(value);
});

return result.val ?? this._defaultEnvConcurrency;
return result.val ?? this._defaultEnvConcurrencyLimit;
});
}

Expand Down
7 changes: 4 additions & 3 deletions internal-packages/run-engine/src/run-queue/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { RunQueue } from "./index.js";
import { InputPayload } from "./types.js";
import { createRedisClient } from "@internal/redis";
import { FairDequeuingStrategy } from "./fairDequeuingStrategy.js";
import { RunQueueShortKeyProducer } from "./keyProducer.js";
import { RunQueueFullKeyProducer } from "./keyProducer.js";

const testOptions = {
name: "rq",
Expand All @@ -23,7 +23,7 @@ const testOptions = {
maxTimeoutInMs: 1_000,
randomize: true,
},
keys: new RunQueueShortKeyProducer("rq:"),
keys: new RunQueueFullKeyProducer(),
};

const authenticatedEnvProd = {
Expand Down Expand Up @@ -383,7 +383,7 @@ describe("RunQueue", () => {
}
);

redisTest(
redisTest.only(
"Dequeue multiple messages from the queue",
{ timeout: 5_000 },
async ({ redisContainer }) => {
Expand All @@ -408,6 +408,7 @@ describe("RunQueue", () => {
// Create 20 messages with different runIds and some with different queues
const messages = Array.from({ length: 20 }, (_, i) => ({
...messageProd,
taskIdentifier: i < 15 ? "task/my-task" : "task/other-task", // Mix up the queues
runId: `r${i + 1}`,
queue: i < 15 ? "task/my-task" : "task/other-task", // Mix up the queues
}));
Expand Down
150 changes: 48 additions & 102 deletions internal-packages/run-engine/src/run-queue/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,11 @@ export class RunQueue {
return this.#trace(
"dequeueMessageInSharedQueue",
async (span) => {
const envQueues = await this.queuePriorityStrategy.distributeFairQueuesFromParentQueue(
masterQueue,
consumerId
);
const envQueues =
await this.options.queuePriorityStrategy.distributeFairQueuesFromParentQueue(
masterQueue,
consumerId
);

span.setAttribute("environment_count", envQueues.length);

Expand All @@ -275,76 +276,57 @@ export class RunQueue {

const messages: DequeuedMessage[] = [];

// Keep track of queues we've tried that didn't return a message
const emptyQueues = new Set<string>();

// Continue until we've hit max count or tried all queues
while (messages.length < maxCount) {
// Calculate how many more messages we need
const remainingCount = maxCount - messages.length;
if (remainingCount <= 0) break;

// Find all available queues across environments that we haven't marked as empty
const availableEnvQueues = envQueues
.map((env) => ({
env: env,
queues: env.queues.filter((queue) => !emptyQueues.has(queue)),
}))
.filter((env) => env.queues.length > 0);
// Each env starts with its list of candidate queues
const tenantQueues: Record<string, string[]> = {};

if (availableEnvQueues.length === 0) break;
// Initialize tenantQueues with the queues for each env
for (const env of envQueues) {
tenantQueues[env.envId] = [...env.queues]; // Create a copy of the queues array
}

attemptedEnvs += availableEnvQueues.length;
// Continue until we've hit max count or all tenants have empty queue lists
while (
messages.length < maxCount &&
Object.values(tenantQueues).some((queues) => queues.length > 0)
) {
for (const env of envQueues) {
attemptedEnvs++;

// Skip if this tenant has no more queues
if (tenantQueues[env.envId].length === 0) {
continue;
}

// Create a dequeue operation for each environment, taking one queue from each
const dequeueOperations = availableEnvQueues.map(({ env, queues }) => {
const queue = queues[0];
// Pop the next queue (using round-robin order)
const queue = tenantQueues[env.envId].shift()!;
attemptedQueues++;

return {
queue,
operation: this.#callDequeueMessage({
messageQueue: queue,
concurrencyLimitKey: this.keys.concurrencyLimitKeyFromQueue(queue),
currentConcurrencyKey: this.keys.currentConcurrencyKeyFromQueue(queue),
envConcurrencyLimitKey: this.keys.envConcurrencyLimitKeyFromQueue(queue),
envCurrentConcurrencyKey: this.keys.envCurrentConcurrencyKeyFromQueue(queue),
projectCurrentConcurrencyKey:
this.keys.projectCurrentConcurrencyKeyFromQueue(queue),
messageKeyPrefix: this.keys.messageKeyPrefixFromQueue(queue),
envQueueKey: this.keys.envQueueKeyFromQueue(queue),
taskCurrentConcurrentKeyPrefix:
this.keys.taskIdentifierCurrentConcurrencyKeyPrefixFromQueue(queue),
}),
};
});

// Execute all dequeue operations in parallel
const results = await Promise.all(
dequeueOperations.map(async ({ queue, operation }) => {
const message = await operation;
return { queue, message };
})
);
// Attempt to dequeue from this queue
const message = await this.#callDequeueMessage({
messageQueue: queue,
concurrencyLimitKey: this.keys.concurrencyLimitKeyFromQueue(queue),
currentConcurrencyKey: this.keys.currentConcurrencyKeyFromQueue(queue),
envConcurrencyLimitKey: this.keys.envConcurrencyLimitKeyFromQueue(queue),
envCurrentConcurrencyKey: this.keys.envCurrentConcurrencyKeyFromQueue(queue),
projectCurrentConcurrencyKey: this.keys.projectCurrentConcurrencyKeyFromQueue(queue),
messageKeyPrefix: this.keys.messageKeyPrefixFromQueue(queue),
envQueueKey: this.keys.envQueueKeyFromQueue(queue),
taskCurrentConcurrentKeyPrefix:
this.keys.taskIdentifierCurrentConcurrencyKeyPrefixFromQueue(queue),
});

// Process results
let foundAnyMessage = false;
for (const { queue, message } of results) {
if (message) {
messages.push(message);
foundAnyMessage = true;
} else {
// Mark this queue as empty
emptyQueues.add(queue);
// Re-add this queue at the end, since it might have more messages
tenantQueues[env.envId].push(queue);
}
}

// If we couldn't get a message from any queue in any env, break
if (!foundAnyMessage) break;
// If message is null, do not re-add the queue in this cycle

// If we've marked all queues as empty, break
const totalQueues = envQueues.reduce((sum, env) => sum + env.queues.length, 0);
if (emptyQueues.size >= totalQueues) break;
// If we've reached maxCount, break out of the loop
if (messages.length >= maxCount) {
break;
}
}
}

span.setAttributes({
Expand Down Expand Up @@ -635,42 +617,6 @@ export class RunQueue {
);
}

queueConcurrencyScanStream(
count: number = 100,
onEndCallback?: () => void,
onErrorCallback?: (error: Error) => void
) {
const pattern = this.keys.queueCurrentConcurrencyScanPattern();

this.logger.debug("Starting queue concurrency scan stream", {
pattern,
component: "runqueue",
operation: "queueConcurrencyScanStream",
service: this.name,
count,
});

const redis = this.redis.duplicate();

const stream = redis.scanStream({
match: pattern,
type: "set",
count,
});

stream.on("end", () => {
onEndCallback?.();
redis.quit();
});

stream.on("error", (error) => {
onErrorCallback?.(error);
redis.quit();
});

return { stream, redis };
}

async quit() {
await this.subscriber.unsubscribe();
await this.subscriber.quit();
Expand Down Expand Up @@ -1103,9 +1049,9 @@ local earliestMessage = redis.call('ZRANGE', childQueue, 0, 0, 'WITHSCORES')
for _, parentQueue in ipairs(decodedPayload.masterQueues) do
local prefixedParentQueue = keyPrefix .. parentQueue
if #earliestMessage == 0 then
redis.call('ZREM', prefixedParentQueue, childQueue)
redis.call('ZREM', prefixedParentQueue, childQueueName)
else
redis.call('ZADD', prefixedParentQueue, earliestMessage[2], childQueue)
redis.call('ZADD', prefixedParentQueue, earliestMessage[2], childQueueName)
end
end

Expand Down
Loading