fix: health check skips session liveness checks when gateway unavailable (#165)

This commit is contained in:
Lauren ten Hoor
2026-02-13 19:40:43 +08:00
parent 19a2cb3bfc
commit 8b5cfedb01

View File

@@ -79,9 +79,10 @@ export type SessionLookup = Map<string, GatewaySession>;
/** /**
* Query gateway status and build a lookup map of active sessions. * Query gateway status and build a lookup map of active sessions.
* Caches result for the duration of a health check pass. * Returns null if gateway is unavailable (timeout, error, etc).
* Callers should skip session liveness checks if null — unknown ≠ dead.
*/ */
export async function fetchGatewaySessions(): Promise<SessionLookup> { export async function fetchGatewaySessions(): Promise<SessionLookup | null> {
const lookup: SessionLookup = new Map(); const lookup: SessionLookup = new Map();
try { try {
@@ -98,20 +99,21 @@ export async function fetchGatewaySessions(): Promise<SessionLookup> {
lookup.set(session.key, session); lookup.set(session.key, session);
} }
} }
} catch {
// Gateway unavailable — return empty map (all sessions will be treated as missing)
}
return lookup; return lookup;
} catch {
// Gateway unavailable — return null (don't assume sessions are dead)
return null;
}
} }
/** /**
* Check if a session key exists in the gateway and is considered "alive". * Check if a session key exists in the gateway and is considered "alive".
* A session is alive if it exists. We don't consider percentUsed or abortedLastRun * A session is alive if it exists. We don't consider percentUsed or abortedLastRun
* as dead indicators — those are normal states for reusable sessions. * as dead indicators — those are normal states for reusable sessions.
* Returns false if sessions lookup is null (gateway unavailable).
*/ */
function isSessionAlive(sessionKey: string, sessions: SessionLookup): boolean { function isSessionAlive(sessionKey: string, sessions: SessionLookup | null): boolean {
return sessions.has(sessionKey); return sessions ? sessions.has(sessionKey) : false;
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@@ -144,7 +146,7 @@ export async function checkWorkerHealth(opts: {
role: Role; role: Role;
autoFix: boolean; autoFix: boolean;
provider: IssueProvider; provider: IssueProvider;
sessions: SessionLookup; sessions: SessionLookup | null;
/** Workflow config (defaults to DEFAULT_WORKFLOW) */ /** Workflow config (defaults to DEFAULT_WORKFLOW) */
workflow?: WorkflowConfig; workflow?: WorkflowConfig;
}): Promise<HealthFix[]> { }): Promise<HealthFix[]> {
@@ -252,8 +254,9 @@ export async function checkWorkerHealth(opts: {
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Case 1: Active with correct label but session is dead/missing // Case 1: Active with correct label but session is dead/missing
// Skip if sessions lookup unavailable (gateway timeout) — unknown ≠ dead
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
if (worker.active && sessionKey && !isSessionAlive(sessionKey, sessions)) { if (worker.active && sessionKey && sessions && !isSessionAlive(sessionKey, sessions)) {
const fix: HealthFix = { const fix: HealthFix = {
issue: { issue: {
type: "session_dead", type: "session_dead",
@@ -307,8 +310,9 @@ export async function checkWorkerHealth(opts: {
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Case 3: Active with correct label and alive session — check for staleness // Case 3: Active with correct label and alive session — check for staleness
// Skip if sessions lookup unavailable (gateway timeout)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
if (worker.active && worker.startTime && sessionKey && isSessionAlive(sessionKey, sessions)) { if (worker.active && worker.startTime && sessionKey && sessions && isSessionAlive(sessionKey, sessions)) {
const hours = (Date.now() - new Date(worker.startTime).getTime()) / 3_600_000; const hours = (Date.now() - new Date(worker.startTime).getTime()) / 3_600_000;
if (hours > 2) { if (hours > 2) {
const fix: HealthFix = { const fix: HealthFix = {