Files
devclaw-gitea/lib/services/health.ts
Lauren ten Hoor 3a58dde3ad fix: clear startTime when deactivating workers to prevent stale timestamps
Problem:
When workers were deactivated (task completed or fixed by health checks),
the startTime field was not being cleared. This caused:
- Inactive workers to retain stale timestamps
- Misleading duration data in projects.json
- Potential confusion in health checks and status displays

Example from projects.json:
{
  "qa": {
    "active": false,
    "issueId": null,
    "startTime": "2026-02-10T08:51:50.725Z",  // Stale!
    "tier": "qa"
  }
}

Root Cause:
The deactivateWorker() function only set active: false and issueId: null,
but did not clear startTime. Similarly, health check auto-fixes that
deactivated workers also failed to clear startTime.

Solution:
Always set startTime: null when deactivating a worker to ensure clean state.

Changes:
1. lib/projects.ts:
   - deactivateWorker() now sets startTime: null
   - Updated function comment to document this behavior

2. lib/services/health.ts:
   - All three auto-fix paths that deactivate workers now clear startTime:
     * active_no_session fix (line 77)
     * zombie_session fix (line 98)
     * stale_worker fix (line 138)

Impact:
- Inactive workers now have clean state (startTime: null)
- Duration calculations only apply to active workers
- Health checks work with accurate data
- No stale timestamps persisting across task completions
- Complements fix from #108 (which ensures startTime is set on activation)

Together with #108:
- #108: Always SET startTime when activating worker
- #113: Always CLEAR startTime when deactivating worker
- Result: startTime accurately reflects current task duration

Addresses issue #113
2026-02-11 00:28:30 +08:00

147 lines
4.6 KiB
TypeScript

/**
* Health service — worker health checks and auto-fix.
*
* Detects: active_no_session, zombie_session, stale_worker, inactive_with_issue.
* Used by both `status` (read-only) and `auto_pickup` (auto-fix).
*/
import type { StateLabel } from "../providers/provider.js";
import {
getSessionForTier,
getWorker,
updateWorker,
type Project,
} from "../projects.js";
export type HealthIssue = {
type: "active_no_session" | "zombie_session" | "stale_worker" | "inactive_with_issue";
severity: "critical" | "warning";
project: string;
groupId: string;
role: "dev" | "qa";
message: string;
tier?: string | null;
sessionKey?: string | null;
hoursActive?: number;
issueId?: string | null;
};
export type HealthFix = {
issue: HealthIssue;
fixed: boolean;
labelReverted?: string;
labelRevertFailed?: boolean;
};
export async function checkWorkerHealth(opts: {
workspaceDir: string;
groupId: string;
project: Project;
role: "dev" | "qa";
activeSessions: string[];
autoFix: boolean;
provider: {
transitionLabel(id: number, from: StateLabel, to: StateLabel): Promise<void>;
};
}): Promise<HealthFix[]> {
const { workspaceDir, groupId, project, role, activeSessions, autoFix, provider } = opts;
const fixes: HealthFix[] = [];
const worker = getWorker(project, role);
const sessionKey = worker.tier ? getSessionForTier(worker, worker.tier) : null;
const revertLabel: StateLabel = role === "dev" ? "To Do" : "To Test";
const currentLabel: StateLabel = role === "dev" ? "Doing" : "Testing";
async function revertIssueLabel(fix: HealthFix) {
if (!worker.issueId) return;
try {
const id = Number(worker.issueId.split(",")[0]);
await provider.transitionLabel(id, currentLabel, revertLabel);
fix.labelReverted = `${currentLabel}${revertLabel}`;
} catch {
fix.labelRevertFailed = true;
}
}
// Check 1: Active but no session key for current tier
if (worker.active && !sessionKey) {
const fix: HealthFix = {
issue: {
type: "active_no_session", severity: "critical",
project: project.name, groupId, role,
tier: worker.tier,
message: `${role.toUpperCase()} active but no session for tier "${worker.tier}"`,
},
fixed: false,
};
if (autoFix) {
await updateWorker(workspaceDir, groupId, role, { active: false, issueId: null, startTime: null });
fix.fixed = true;
}
fixes.push(fix);
}
// Check 2: Active with session but session is dead (zombie)
if (worker.active && sessionKey && activeSessions.length > 0 && !activeSessions.includes(sessionKey)) {
const fix: HealthFix = {
issue: {
type: "zombie_session", severity: "critical",
project: project.name, groupId, role,
sessionKey, tier: worker.tier,
message: `${role.toUpperCase()} session not in active sessions list`,
},
fixed: false,
};
if (autoFix) {
await revertIssueLabel(fix);
const sessions = { ...worker.sessions };
if (worker.tier) sessions[worker.tier] = null;
await updateWorker(workspaceDir, groupId, role, { active: false, issueId: null, startTime: null, sessions });
fix.fixed = true;
}
fixes.push(fix);
}
// Check 3: Inactive but still has issueId
if (!worker.active && worker.issueId) {
const fix: HealthFix = {
issue: {
type: "inactive_with_issue", severity: "warning",
project: project.name, groupId, role,
issueId: worker.issueId,
message: `${role.toUpperCase()} inactive but still has issueId "${worker.issueId}"`,
},
fixed: false,
};
if (autoFix) {
await updateWorker(workspaceDir, groupId, role, { issueId: null });
fix.fixed = true;
}
fixes.push(fix);
}
// Check 4: Active for >2 hours (stale)
if (worker.active && worker.startTime && sessionKey) {
const hours = (Date.now() - new Date(worker.startTime).getTime()) / 3_600_000;
if (hours > 2) {
const fix: HealthFix = {
issue: {
type: "stale_worker", severity: "warning",
project: project.name, groupId, role,
hoursActive: Math.round(hours * 10) / 10,
sessionKey, issueId: worker.issueId,
message: `${role.toUpperCase()} active for ${Math.round(hours * 10) / 10}h — may need attention`,
},
fixed: false,
};
if (autoFix) {
await revertIssueLabel(fix);
await updateWorker(workspaceDir, groupId, role, { active: false, issueId: null, startTime: null });
fix.fixed = true;
}
fixes.push(fix);
}
}
return fixes;
}