feat: redesign health check to triangulate projects.json, issue label, and session state (#143) (#145)

## Changes

- Remove `activeSessions` parameter from health check (was never populated)
- Add gateway session lookup via `openclaw gateway call status`
- Add issue label lookup via `provider.getIssue(issueId)`
- Implement detection matrix with 6 issue types:
  - session_dead: active worker but session missing in gateway
  - label_mismatch: active worker but issue not in Doing/Testing
  - stale_worker: active for >2h
  - stuck_label: inactive but issue has Doing/Testing label
  - orphan_issue_id: inactive but issueId set
  - issue_gone: active but issue deleted/closed

## Files

- lib/services/health.ts — complete rewrite with three-source triangulation
- lib/tools/health.ts — remove activeSessions param, fetch sessions from gateway
- lib/services/heartbeat.ts — remove empty activeSessions calls, pass sessions map
This commit is contained in:
Lauren ten Hoor
2026-02-13 16:20:21 +08:00
committed by GitHub
parent 4a029c1b3b
commit 825c5e6f50
3 changed files with 337 additions and 68 deletions

View File

@@ -1,13 +1,21 @@
/**
* health — Worker health scan with optional auto-fix.
*
* Triangulates projects.json, issue labels, and session state to detect:
* - session_dead: active worker but session missing in gateway
* - label_mismatch: active worker but issue not in expected label
* - stale_worker: active for >2h
* - stuck_label: inactive but issue has Doing/Testing label
* - orphan_issue_id: inactive but issueId set
* - issue_gone: active but issue deleted/closed
*
* Read-only by default (surfaces issues). Pass fix=true to apply fixes.
*/
import { jsonResult } from "openclaw/plugin-sdk";
import type { ToolContext } from "../types.js";
import { readProjects, getProject } from "../projects.js";
import { log as auditLog } from "../audit.js";
import { checkWorkerHealth, type HealthFix } from "../services/health.js";
import { checkWorkerHealth, fetchGatewaySessions, type HealthFix } from "../services/health.js";
import { requireWorkspaceDir, resolveProvider } from "../tool-helpers.js";
export function createHealthTool() {
@@ -20,20 +28,21 @@ export function createHealthTool() {
properties: {
projectGroupId: { type: "string", description: "Filter to specific project. Omit for all." },
fix: { type: "boolean", description: "Apply fixes for detected issues. Default: false (read-only)." },
activeSessions: { type: "array", items: { type: "string" }, description: "Active session IDs for zombie detection." },
},
},
async execute(_id: string, params: Record<string, unknown>) {
const workspaceDir = requireWorkspaceDir(ctx);
const fix = (params.fix as boolean) ?? false;
const activeSessions = (params.activeSessions as string[]) ?? [];
const groupId = params.projectGroupId as string | undefined;
const data = await readProjects(workspaceDir);
const projectIds = groupId ? [groupId] : Object.keys(data.projects);
// Fetch gateway sessions once for all projects
const sessions = await fetchGatewaySessions();
const issues: Array<HealthFix & { project: string; role: string }> = [];
for (const pid of projectIds) {
@@ -43,8 +52,13 @@ export function createHealthTool() {
for (const role of ["dev", "qa"] as const) {
const fixes = await checkWorkerHealth({
workspaceDir, groupId: pid, project, role, activeSessions,
autoFix: fix, provider,
workspaceDir,
groupId: pid,
project,
role,
sessions,
autoFix: fix,
provider,
});
issues.push(...fixes.map((f) => ({ ...f, project: project.name, role })));
}
@@ -55,14 +69,15 @@ export function createHealthTool() {
fix,
issuesFound: issues.length,
issuesFixed: issues.filter((i) => i.fixed).length,
sessionsCached: sessions.size,
});
return jsonResult({
success: true,
fix,
projectsScanned: projectIds.length,
sessionsQueried: sessions.size,
issues,
note: activeSessions.length === 0 ? "No activeSessions provided — zombie detection skipped." : undefined,
});
},
});