feat: health check scans for orphaned Doing/Testing labels (#170)
This commit is contained in:
@@ -43,7 +43,8 @@ export type HealthIssue = {
|
|||||||
| "stale_worker" // Case 3: active for >2h
|
| "stale_worker" // Case 3: active for >2h
|
||||||
| "stuck_label" // Case 4: inactive but issue still has active label
|
| "stuck_label" // Case 4: inactive but issue still has active label
|
||||||
| "orphan_issue_id" // Case 5: inactive but issueId set
|
| "orphan_issue_id" // Case 5: inactive but issueId set
|
||||||
| "issue_gone"; // Case 6: active but issue deleted/closed
|
| "issue_gone" // Case 6: active but issue deleted/closed
|
||||||
|
| "orphaned_label"; // Case 7: active label but no worker tracking it
|
||||||
severity: "critical" | "warning";
|
severity: "critical" | "warning";
|
||||||
project: string;
|
project: string;
|
||||||
groupId: string;
|
groupId: string;
|
||||||
@@ -396,3 +397,88 @@ export async function checkWorkerHealth(opts: {
|
|||||||
|
|
||||||
return fixes;
|
return fixes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Orphaned label scan
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan for issues with active labels (Doing, Testing) that are NOT tracked
|
||||||
|
* in projects.json. This catches cases where:
|
||||||
|
* - Worker crashed and state was cleared (issueId: null)
|
||||||
|
* - Label was set externally
|
||||||
|
* - State corruption
|
||||||
|
*
|
||||||
|
* Returns fixes for all orphaned labels found.
|
||||||
|
*/
|
||||||
|
export async function scanOrphanedLabels(opts: {
|
||||||
|
workspaceDir: string;
|
||||||
|
groupId: string;
|
||||||
|
project: Project;
|
||||||
|
role: Role;
|
||||||
|
autoFix: boolean;
|
||||||
|
provider: IssueProvider;
|
||||||
|
/** Workflow config (defaults to DEFAULT_WORKFLOW) */
|
||||||
|
workflow?: WorkflowConfig;
|
||||||
|
}): Promise<HealthFix[]> {
|
||||||
|
const {
|
||||||
|
workspaceDir, groupId, project, role, autoFix, provider,
|
||||||
|
workflow = DEFAULT_WORKFLOW,
|
||||||
|
} = opts;
|
||||||
|
|
||||||
|
const fixes: HealthFix[] = [];
|
||||||
|
const worker = getWorker(project, role);
|
||||||
|
|
||||||
|
// Get labels from workflow config
|
||||||
|
const activeLabel = getActiveLabel(workflow, role);
|
||||||
|
const queueLabel = getRevertLabel(workflow, role);
|
||||||
|
|
||||||
|
// Fetch all issues with the active label
|
||||||
|
let issuesWithLabel: Issue[];
|
||||||
|
try {
|
||||||
|
issuesWithLabel = await provider.listIssuesByLabel(activeLabel);
|
||||||
|
} catch {
|
||||||
|
// Provider error (timeout, network, etc) — skip this scan
|
||||||
|
return fixes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check each issue to see if it's tracked in worker state
|
||||||
|
for (const issue of issuesWithLabel) {
|
||||||
|
const issueIdStr = String(issue.iid);
|
||||||
|
|
||||||
|
// Check if this issue is tracked
|
||||||
|
const isTracked = worker.active && worker.issueId === issueIdStr;
|
||||||
|
|
||||||
|
if (!isTracked) {
|
||||||
|
// Orphaned label: issue has active label but no worker tracking it
|
||||||
|
const fix: HealthFix = {
|
||||||
|
issue: {
|
||||||
|
type: "orphaned_label",
|
||||||
|
severity: "critical",
|
||||||
|
project: project.name,
|
||||||
|
groupId,
|
||||||
|
role,
|
||||||
|
issueId: issueIdStr,
|
||||||
|
expectedLabel: queueLabel,
|
||||||
|
actualLabel: activeLabel,
|
||||||
|
message: `Issue #${issue.iid} has "${activeLabel}" label but no ${role.toUpperCase()} worker is tracking it`,
|
||||||
|
},
|
||||||
|
fixed: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (autoFix) {
|
||||||
|
try {
|
||||||
|
await provider.transitionLabel(issue.iid, activeLabel, queueLabel);
|
||||||
|
fix.fixed = true;
|
||||||
|
fix.labelReverted = `${activeLabel} → ${queueLabel}`;
|
||||||
|
} catch {
|
||||||
|
fix.labelRevertFailed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fixes.push(fix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fixes;
|
||||||
|
}
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import fs from "node:fs";
|
|||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import { readProjects } from "../projects.js";
|
import { readProjects } from "../projects.js";
|
||||||
import { log as auditLog } from "../audit.js";
|
import { log as auditLog } from "../audit.js";
|
||||||
import { checkWorkerHealth, fetchGatewaySessions, type SessionLookup } from "./health.js";
|
import { checkWorkerHealth, scanOrphanedLabels, fetchGatewaySessions, type SessionLookup } from "./health.js";
|
||||||
import { projectTick } from "./tick.js";
|
import { projectTick } from "./tick.js";
|
||||||
import { createProvider } from "../providers/index.js";
|
import { createProvider } from "../providers/index.js";
|
||||||
|
|
||||||
@@ -307,7 +307,8 @@ async function performHealthPass(
|
|||||||
let fixedCount = 0;
|
let fixedCount = 0;
|
||||||
|
|
||||||
for (const role of ["dev", "qa"] as const) {
|
for (const role of ["dev", "qa"] as const) {
|
||||||
const fixes = await checkWorkerHealth({
|
// Check worker health (session liveness, label consistency, etc)
|
||||||
|
const healthFixes = await checkWorkerHealth({
|
||||||
workspaceDir,
|
workspaceDir,
|
||||||
groupId,
|
groupId,
|
||||||
project,
|
project,
|
||||||
@@ -316,7 +317,18 @@ async function performHealthPass(
|
|||||||
autoFix: true,
|
autoFix: true,
|
||||||
provider,
|
provider,
|
||||||
});
|
});
|
||||||
fixedCount += fixes.filter((f) => f.fixed).length;
|
fixedCount += healthFixes.filter((f) => f.fixed).length;
|
||||||
|
|
||||||
|
// Scan for orphaned labels (active labels with no tracking worker)
|
||||||
|
const orphanFixes = await scanOrphanedLabels({
|
||||||
|
workspaceDir,
|
||||||
|
groupId,
|
||||||
|
project,
|
||||||
|
role,
|
||||||
|
autoFix: true,
|
||||||
|
provider,
|
||||||
|
});
|
||||||
|
fixedCount += orphanFixes.filter((f) => f.fixed).length;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fixedCount;
|
return fixedCount;
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
* - stuck_label: inactive but issue has Doing/Testing label
|
* - stuck_label: inactive but issue has Doing/Testing label
|
||||||
* - orphan_issue_id: inactive but issueId set
|
* - orphan_issue_id: inactive but issueId set
|
||||||
* - issue_gone: active but issue deleted/closed
|
* - issue_gone: active but issue deleted/closed
|
||||||
|
* - orphaned_label: active label but no worker tracking it (NEW)
|
||||||
*
|
*
|
||||||
* Read-only by default (surfaces issues). Pass fix=true to apply fixes.
|
* Read-only by default (surfaces issues). Pass fix=true to apply fixes.
|
||||||
*/
|
*/
|
||||||
@@ -15,7 +16,7 @@ import { jsonResult } from "openclaw/plugin-sdk";
|
|||||||
import type { ToolContext } from "../types.js";
|
import type { ToolContext } from "../types.js";
|
||||||
import { readProjects, getProject } from "../projects.js";
|
import { readProjects, getProject } from "../projects.js";
|
||||||
import { log as auditLog } from "../audit.js";
|
import { log as auditLog } from "../audit.js";
|
||||||
import { checkWorkerHealth, fetchGatewaySessions, type HealthFix } from "../services/health.js";
|
import { checkWorkerHealth, scanOrphanedLabels, fetchGatewaySessions, type HealthFix } from "../services/health.js";
|
||||||
import { requireWorkspaceDir, resolveProvider } from "../tool-helpers.js";
|
import { requireWorkspaceDir, resolveProvider } from "../tool-helpers.js";
|
||||||
|
|
||||||
export function createHealthTool() {
|
export function createHealthTool() {
|
||||||
@@ -51,7 +52,8 @@ export function createHealthTool() {
|
|||||||
const { provider } = await resolveProvider(project);
|
const { provider } = await resolveProvider(project);
|
||||||
|
|
||||||
for (const role of ["dev", "qa"] as const) {
|
for (const role of ["dev", "qa"] as const) {
|
||||||
const fixes = await checkWorkerHealth({
|
// Worker health check (session liveness, label consistency, etc)
|
||||||
|
const healthFixes = await checkWorkerHealth({
|
||||||
workspaceDir,
|
workspaceDir,
|
||||||
groupId: pid,
|
groupId: pid,
|
||||||
project,
|
project,
|
||||||
@@ -60,7 +62,18 @@ export function createHealthTool() {
|
|||||||
autoFix: fix,
|
autoFix: fix,
|
||||||
provider,
|
provider,
|
||||||
});
|
});
|
||||||
issues.push(...fixes.map((f) => ({ ...f, project: project.name, role })));
|
issues.push(...healthFixes.map((f) => ({ ...f, project: project.name, role })));
|
||||||
|
|
||||||
|
// Orphaned label scan (active labels with no tracking worker)
|
||||||
|
const orphanFixes = await scanOrphanedLabels({
|
||||||
|
workspaceDir,
|
||||||
|
groupId: pid,
|
||||||
|
project,
|
||||||
|
role,
|
||||||
|
autoFix: fix,
|
||||||
|
provider,
|
||||||
|
});
|
||||||
|
issues.push(...orphanFixes.map((f) => ({ ...f, project: project.name, role })));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user