From bf75db88c0d5ca96ddf1c1300855a00f708f7816 Mon Sep 17 00:00:00 2001 From: Lauren ten Hoor Date: Fri, 13 Feb 2026 20:30:28 +0800 Subject: [PATCH] feat: health check scans for orphaned Doing/Testing labels (#170) --- lib/services/health.ts | 88 ++++++++++++++++++++++++++++++++++++++- lib/services/heartbeat.ts | 18 ++++++-- lib/tools/health.ts | 19 +++++++-- 3 files changed, 118 insertions(+), 7 deletions(-) diff --git a/lib/services/health.ts b/lib/services/health.ts index 720079f..eb41649 100644 --- a/lib/services/health.ts +++ b/lib/services/health.ts @@ -43,7 +43,8 @@ export type HealthIssue = { | "stale_worker" // Case 3: active for >2h | "stuck_label" // Case 4: inactive but issue still has active label | "orphan_issue_id" // Case 5: inactive but issueId set - | "issue_gone"; // Case 6: active but issue deleted/closed + | "issue_gone" // Case 6: active but issue deleted/closed + | "orphaned_label"; // Case 7: active label but no worker tracking it severity: "critical" | "warning"; project: string; groupId: string; @@ -396,3 +397,88 @@ export async function checkWorkerHealth(opts: { return fixes; } + +// --------------------------------------------------------------------------- +// Orphaned label scan +// --------------------------------------------------------------------------- + +/** + * Scan for issues with active labels (Doing, Testing) that are NOT tracked + * in projects.json. This catches cases where: + * - Worker crashed and state was cleared (issueId: null) + * - Label was set externally + * - State corruption + * + * Returns fixes for all orphaned labels found. + */ +export async function scanOrphanedLabels(opts: { + workspaceDir: string; + groupId: string; + project: Project; + role: Role; + autoFix: boolean; + provider: IssueProvider; + /** Workflow config (defaults to DEFAULT_WORKFLOW) */ + workflow?: WorkflowConfig; +}): Promise { + const { + workspaceDir, groupId, project, role, autoFix, provider, + workflow = DEFAULT_WORKFLOW, + } = opts; + + const fixes: HealthFix[] = []; + const worker = getWorker(project, role); + + // Get labels from workflow config + const activeLabel = getActiveLabel(workflow, role); + const queueLabel = getRevertLabel(workflow, role); + + // Fetch all issues with the active label + let issuesWithLabel: Issue[]; + try { + issuesWithLabel = await provider.listIssuesByLabel(activeLabel); + } catch { + // Provider error (timeout, network, etc) — skip this scan + return fixes; + } + + // Check each issue to see if it's tracked in worker state + for (const issue of issuesWithLabel) { + const issueIdStr = String(issue.iid); + + // Check if this issue is tracked + const isTracked = worker.active && worker.issueId === issueIdStr; + + if (!isTracked) { + // Orphaned label: issue has active label but no worker tracking it + const fix: HealthFix = { + issue: { + type: "orphaned_label", + severity: "critical", + project: project.name, + groupId, + role, + issueId: issueIdStr, + expectedLabel: queueLabel, + actualLabel: activeLabel, + message: `Issue #${issue.iid} has "${activeLabel}" label but no ${role.toUpperCase()} worker is tracking it`, + }, + fixed: false, + }; + + if (autoFix) { + try { + await provider.transitionLabel(issue.iid, activeLabel, queueLabel); + fix.fixed = true; + fix.labelReverted = `${activeLabel} → ${queueLabel}`; + } catch { + fix.labelRevertFailed = true; + } + } + + fixes.push(fix); + } + } + + return fixes; +} diff --git a/lib/services/heartbeat.ts b/lib/services/heartbeat.ts index ded699f..1b1f381 100644 --- a/lib/services/heartbeat.ts +++ b/lib/services/heartbeat.ts @@ -15,7 +15,7 @@ import fs from "node:fs"; import path from "node:path"; import { readProjects } from "../projects.js"; import { log as auditLog } from "../audit.js"; -import { checkWorkerHealth, fetchGatewaySessions, type SessionLookup } from "./health.js"; +import { checkWorkerHealth, scanOrphanedLabels, fetchGatewaySessions, type SessionLookup } from "./health.js"; import { projectTick } from "./tick.js"; import { createProvider } from "../providers/index.js"; @@ -307,7 +307,8 @@ async function performHealthPass( let fixedCount = 0; for (const role of ["dev", "qa"] as const) { - const fixes = await checkWorkerHealth({ + // Check worker health (session liveness, label consistency, etc) + const healthFixes = await checkWorkerHealth({ workspaceDir, groupId, project, @@ -316,7 +317,18 @@ async function performHealthPass( autoFix: true, provider, }); - fixedCount += fixes.filter((f) => f.fixed).length; + fixedCount += healthFixes.filter((f) => f.fixed).length; + + // Scan for orphaned labels (active labels with no tracking worker) + const orphanFixes = await scanOrphanedLabels({ + workspaceDir, + groupId, + project, + role, + autoFix: true, + provider, + }); + fixedCount += orphanFixes.filter((f) => f.fixed).length; } return fixedCount; diff --git a/lib/tools/health.ts b/lib/tools/health.ts index 6a60275..25434a5 100644 --- a/lib/tools/health.ts +++ b/lib/tools/health.ts @@ -8,6 +8,7 @@ * - stuck_label: inactive but issue has Doing/Testing label * - orphan_issue_id: inactive but issueId set * - issue_gone: active but issue deleted/closed + * - orphaned_label: active label but no worker tracking it (NEW) * * Read-only by default (surfaces issues). Pass fix=true to apply fixes. */ @@ -15,7 +16,7 @@ import { jsonResult } from "openclaw/plugin-sdk"; import type { ToolContext } from "../types.js"; import { readProjects, getProject } from "../projects.js"; import { log as auditLog } from "../audit.js"; -import { checkWorkerHealth, fetchGatewaySessions, type HealthFix } from "../services/health.js"; +import { checkWorkerHealth, scanOrphanedLabels, fetchGatewaySessions, type HealthFix } from "../services/health.js"; import { requireWorkspaceDir, resolveProvider } from "../tool-helpers.js"; export function createHealthTool() { @@ -51,7 +52,8 @@ export function createHealthTool() { const { provider } = await resolveProvider(project); for (const role of ["dev", "qa"] as const) { - const fixes = await checkWorkerHealth({ + // Worker health check (session liveness, label consistency, etc) + const healthFixes = await checkWorkerHealth({ workspaceDir, groupId: pid, project, @@ -60,7 +62,18 @@ export function createHealthTool() { autoFix: fix, provider, }); - issues.push(...fixes.map((f) => ({ ...f, project: project.name, role }))); + issues.push(...healthFixes.map((f) => ({ ...f, project: project.name, role }))); + + // Orphaned label scan (active labels with no tracking worker) + const orphanFixes = await scanOrphanedLabels({ + workspaceDir, + groupId: pid, + project, + role, + autoFix: fix, + provider, + }); + issues.push(...orphanFixes.map((f) => ({ ...f, project: project.name, role }))); } }