Merge pull request #171 from laurentenhoor/feat/170-orphaned-label-scan
feat: health check scans for orphaned Doing/Testing labels (#170)
This commit is contained in:
@@ -43,7 +43,8 @@ export type HealthIssue = {
|
||||
| "stale_worker" // Case 3: active for >2h
|
||||
| "stuck_label" // Case 4: inactive but issue still has active label
|
||||
| "orphan_issue_id" // Case 5: inactive but issueId set
|
||||
| "issue_gone"; // Case 6: active but issue deleted/closed
|
||||
| "issue_gone" // Case 6: active but issue deleted/closed
|
||||
| "orphaned_label"; // Case 7: active label but no worker tracking it
|
||||
severity: "critical" | "warning";
|
||||
project: string;
|
||||
groupId: string;
|
||||
@@ -396,3 +397,88 @@ export async function checkWorkerHealth(opts: {
|
||||
|
||||
return fixes;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Orphaned label scan
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scan for issues with active labels (Doing, Testing) that are NOT tracked
|
||||
* in projects.json. This catches cases where:
|
||||
* - Worker crashed and state was cleared (issueId: null)
|
||||
* - Label was set externally
|
||||
* - State corruption
|
||||
*
|
||||
* Returns fixes for all orphaned labels found.
|
||||
*/
|
||||
export async function scanOrphanedLabels(opts: {
|
||||
workspaceDir: string;
|
||||
groupId: string;
|
||||
project: Project;
|
||||
role: Role;
|
||||
autoFix: boolean;
|
||||
provider: IssueProvider;
|
||||
/** Workflow config (defaults to DEFAULT_WORKFLOW) */
|
||||
workflow?: WorkflowConfig;
|
||||
}): Promise<HealthFix[]> {
|
||||
const {
|
||||
workspaceDir, groupId, project, role, autoFix, provider,
|
||||
workflow = DEFAULT_WORKFLOW,
|
||||
} = opts;
|
||||
|
||||
const fixes: HealthFix[] = [];
|
||||
const worker = getWorker(project, role);
|
||||
|
||||
// Get labels from workflow config
|
||||
const activeLabel = getActiveLabel(workflow, role);
|
||||
const queueLabel = getRevertLabel(workflow, role);
|
||||
|
||||
// Fetch all issues with the active label
|
||||
let issuesWithLabel: Issue[];
|
||||
try {
|
||||
issuesWithLabel = await provider.listIssuesByLabel(activeLabel);
|
||||
} catch {
|
||||
// Provider error (timeout, network, etc) — skip this scan
|
||||
return fixes;
|
||||
}
|
||||
|
||||
// Check each issue to see if it's tracked in worker state
|
||||
for (const issue of issuesWithLabel) {
|
||||
const issueIdStr = String(issue.iid);
|
||||
|
||||
// Check if this issue is tracked
|
||||
const isTracked = worker.active && worker.issueId === issueIdStr;
|
||||
|
||||
if (!isTracked) {
|
||||
// Orphaned label: issue has active label but no worker tracking it
|
||||
const fix: HealthFix = {
|
||||
issue: {
|
||||
type: "orphaned_label",
|
||||
severity: "critical",
|
||||
project: project.name,
|
||||
groupId,
|
||||
role,
|
||||
issueId: issueIdStr,
|
||||
expectedLabel: queueLabel,
|
||||
actualLabel: activeLabel,
|
||||
message: `Issue #${issue.iid} has "${activeLabel}" label but no ${role.toUpperCase()} worker is tracking it`,
|
||||
},
|
||||
fixed: false,
|
||||
};
|
||||
|
||||
if (autoFix) {
|
||||
try {
|
||||
await provider.transitionLabel(issue.iid, activeLabel, queueLabel);
|
||||
fix.fixed = true;
|
||||
fix.labelReverted = `${activeLabel} → ${queueLabel}`;
|
||||
} catch {
|
||||
fix.labelRevertFailed = true;
|
||||
}
|
||||
}
|
||||
|
||||
fixes.push(fix);
|
||||
}
|
||||
}
|
||||
|
||||
return fixes;
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { readProjects } from "../projects.js";
|
||||
import { log as auditLog } from "../audit.js";
|
||||
import { checkWorkerHealth, fetchGatewaySessions, type SessionLookup } from "./health.js";
|
||||
import { checkWorkerHealth, scanOrphanedLabels, fetchGatewaySessions, type SessionLookup } from "./health.js";
|
||||
import { projectTick } from "./tick.js";
|
||||
import { createProvider } from "../providers/index.js";
|
||||
|
||||
@@ -307,7 +307,8 @@ async function performHealthPass(
|
||||
let fixedCount = 0;
|
||||
|
||||
for (const role of ["dev", "qa"] as const) {
|
||||
const fixes = await checkWorkerHealth({
|
||||
// Check worker health (session liveness, label consistency, etc)
|
||||
const healthFixes = await checkWorkerHealth({
|
||||
workspaceDir,
|
||||
groupId,
|
||||
project,
|
||||
@@ -316,7 +317,18 @@ async function performHealthPass(
|
||||
autoFix: true,
|
||||
provider,
|
||||
});
|
||||
fixedCount += fixes.filter((f) => f.fixed).length;
|
||||
fixedCount += healthFixes.filter((f) => f.fixed).length;
|
||||
|
||||
// Scan for orphaned labels (active labels with no tracking worker)
|
||||
const orphanFixes = await scanOrphanedLabels({
|
||||
workspaceDir,
|
||||
groupId,
|
||||
project,
|
||||
role,
|
||||
autoFix: true,
|
||||
provider,
|
||||
});
|
||||
fixedCount += orphanFixes.filter((f) => f.fixed).length;
|
||||
}
|
||||
|
||||
return fixedCount;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
* - stuck_label: inactive but issue has Doing/Testing label
|
||||
* - orphan_issue_id: inactive but issueId set
|
||||
* - issue_gone: active but issue deleted/closed
|
||||
* - orphaned_label: active label but no worker tracking it (NEW)
|
||||
*
|
||||
* Read-only by default (surfaces issues). Pass fix=true to apply fixes.
|
||||
*/
|
||||
@@ -15,7 +16,7 @@ import { jsonResult } from "openclaw/plugin-sdk";
|
||||
import type { ToolContext } from "../types.js";
|
||||
import { readProjects, getProject } from "../projects.js";
|
||||
import { log as auditLog } from "../audit.js";
|
||||
import { checkWorkerHealth, fetchGatewaySessions, type HealthFix } from "../services/health.js";
|
||||
import { checkWorkerHealth, scanOrphanedLabels, fetchGatewaySessions, type HealthFix } from "../services/health.js";
|
||||
import { requireWorkspaceDir, resolveProvider } from "../tool-helpers.js";
|
||||
|
||||
export function createHealthTool() {
|
||||
@@ -51,7 +52,8 @@ export function createHealthTool() {
|
||||
const { provider } = await resolveProvider(project);
|
||||
|
||||
for (const role of ["dev", "qa"] as const) {
|
||||
const fixes = await checkWorkerHealth({
|
||||
// Worker health check (session liveness, label consistency, etc)
|
||||
const healthFixes = await checkWorkerHealth({
|
||||
workspaceDir,
|
||||
groupId: pid,
|
||||
project,
|
||||
@@ -60,7 +62,18 @@ export function createHealthTool() {
|
||||
autoFix: fix,
|
||||
provider,
|
||||
});
|
||||
issues.push(...fixes.map((f) => ({ ...f, project: project.name, role })));
|
||||
issues.push(...healthFixes.map((f) => ({ ...f, project: project.name, role })));
|
||||
|
||||
// Orphaned label scan (active labels with no tracking worker)
|
||||
const orphanFixes = await scanOrphanedLabels({
|
||||
workspaceDir,
|
||||
groupId: pid,
|
||||
project,
|
||||
role,
|
||||
autoFix: fix,
|
||||
provider,
|
||||
});
|
||||
issues.push(...orphanFixes.map((f) => ({ ...f, project: project.name, role })));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user