From 4f2be8e551d2c30754235fa022c5f0aeb440993d Mon Sep 17 00:00:00 2001 From: Lauren ten Hoor Date: Tue, 10 Feb 2026 15:48:16 +0800 Subject: [PATCH] feat: create TEST.md markdown file (#78) --- README.md | 29 +- TEST.md | 2 +- docs/ARCHITECTURE.md | 24 +- lib/dispatch.ts | 40 +- lib/notify.ts | 4 +- lib/tools/heartbeat-tick.ts | 35 ++ lib/tools/session-health.ts | 30 +- lib/tools/task-complete.ts | 36 +- package.json | 2 +- test/helpers.ts | 374 +++++++++++++++++ {lib => test}/projects.test.ts | 2 +- test/scenarios.test.ts | 740 +++++++++++++++++++++++++++++++++ 12 files changed, 1291 insertions(+), 27 deletions(-) create mode 100644 test/helpers.ts rename {lib => test}/projects.test.ts (99%) create mode 100644 test/scenarios.test.ts diff --git a/README.md b/README.md index 0473bc9..9309355 100644 --- a/README.md +++ b/README.md @@ -89,10 +89,10 @@ stateDiagram-v2 [*] --> Planning Planning --> ToDo: Ready for development - ToDo --> Doing: task_pickup (DEV) + ToDo --> Doing: task_pickup (DEV) ⇄ blocked Doing --> ToTest: task_complete (DEV done) - ToTest --> Testing: task_pickup (QA) or auto-chain + ToTest --> Testing: task_pickup (QA) / auto-chain ⇄ blocked Testing --> Done: task_complete (QA pass) Testing --> ToImprove: task_complete (QA fail) Testing --> Refining: task_complete (QA refine) @@ -107,13 +107,22 @@ stateDiagram-v2 Workers (DEV/QA sub-agent sessions) call `task_complete` directly when they finish — no orchestrator involvement needed for the state transition. Workers can also call `task_create` to file follow-up issues they discover during work. +### Completion enforcement + +Three layers guarantee that `task_complete` always runs, preventing tasks from getting stuck in "Doing" or "Testing" forever: + +1. **Completion contract** — Every task message includes a mandatory section requiring the worker to call `task_complete`, even on failure. Workers use `"blocked"` if stuck. +2. **Blocked result** — Both DEV and QA can return `"blocked"` to gracefully put a task back in queue (`Doing → To Do`, `Testing → To Test`) instead of silently dying. +3. **Stale worker watchdog** — The heartbeat health check detects workers active >2 hours and auto-reverts labels to queue, catching sessions that crashed or ran out of context. + ### Auto-chaining When a project has `autoChain: true`, `task_complete` automatically dispatches the next step: - **DEV "done"** → QA is dispatched immediately (using the qa tier) - **QA "fail"** → DEV fix is dispatched immediately (reuses previous DEV tier) -- **QA "pass" / "refine"** → no chaining (pipeline done or needs human input) +- **QA "pass" / "refine" / "blocked"** → no chaining (pipeline done, needs human input, or returned to queue) +- **DEV "blocked"** → no chaining (returned to queue for retry) When `autoChain` is false, `task_complete` returns a `nextAction` hint for the orchestrator to act on. @@ -237,21 +246,23 @@ Pick up a task from the issue queue for a DEV or QA worker. ### `task_complete` -Complete a task with one of four results. Called by workers (DEV/QA sub-agent sessions) directly, or by the orchestrator. +Complete a task with a result. Called by workers (DEV/QA sub-agent sessions) directly, or by the orchestrator. **Parameters:** - `role` ("dev" | "qa", required) -- `result` ("done" | "pass" | "fail" | "refine", required) +- `result` ("done" | "pass" | "fail" | "refine" | "blocked", required) - `projectGroupId` (string, required) - `summary` (string, optional) — For the Telegram announcement **Results:** - **DEV "done"** — Pulls latest code, moves label `Doing` → `To Test`, deactivates worker. If `autoChain` enabled, automatically dispatches QA. +- **DEV "blocked"** — Moves label `Doing` → `To Do`, deactivates worker. Task returns to queue for retry. - **QA "pass"** — Moves label `Testing` → `Done`, closes issue, deactivates worker - **QA "fail"** — Moves label `Testing` → `To Improve`, reopens issue. If `autoChain` enabled, automatically dispatches DEV fix (reuses previous DEV tier). - **QA "refine"** — Moves label `Testing` → `Refining`, awaits human decision +- **QA "blocked"** — Moves label `Testing` → `To Test`, deactivates worker. Task returns to QA queue for retry. ### `task_update` @@ -321,10 +332,10 @@ Detects and optionally fixes state inconsistencies. **Checks:** -- Active worker with no session key (critical) -- Active worker whose session is dead — zombie (critical) -- Worker active for >2 hours (warning) -- Inactive worker with lingering issue ID (warning) +- Active worker with no session key (critical, auto-fixable) +- Active worker whose session is dead — zombie (critical, auto-fixable) +- Worker active for >2 hours — stale watchdog (warning, auto-fixable: reverts label to queue) +- Inactive worker with lingering issue ID (warning, auto-fixable) ### `project_register` diff --git a/TEST.md b/TEST.md index 30bf343..c3e9aa2 100644 --- a/TEST.md +++ b/TEST.md @@ -10,4 +10,4 @@ This is a test file created by DevClaw. ## Content -Some sample content here. +Some sample content here. \ No newline at end of file diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 682d0c4..45a4908 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -403,6 +403,25 @@ Label: "Testing" → "Refining" Issue needs human decision. Pipeline pauses until human moves it to "To Do" or closes it. +#### 7d. Blocked (DEV or QA) + +``` +DEV Blocked: "Doing" → "To Do" +QA Blocked: "Testing" → "To Test" +``` + +Worker cannot complete (missing info, environment errors, etc.). Issue returns to queue for retry. No auto-chain — the task is available for the next heartbeat pickup. + +### Completion enforcement + +Three layers guarantee that `task_complete` always runs: + +1. **Completion contract** — Every task message sent to a worker session includes a mandatory `## MANDATORY: Task Completion` section listing available results and requiring `task_complete` even on failure. Workers are instructed to use `"blocked"` if stuck. + +2. **Blocked result** — Both DEV and QA can use `"blocked"` to gracefully return a task to queue without losing work. DEV blocked: `Doing → To Do`. QA blocked: `Testing → To Test`. This gives workers an escape hatch instead of silently dying. + +3. **Stale worker watchdog** — The heartbeat's health check detects workers active for >2 hours. With `autoFix=true`, it deactivates the worker and reverts the label back to queue. This catches sessions that crashed, ran out of context, or otherwise failed without calling `task_complete`. The `session_health` tool provides the same check for manual invocation. + ### Phase 8: Heartbeat (continuous) The heartbeat runs periodically (triggered by the agent or a scheduled message). It combines health check + queue scan: @@ -493,6 +512,8 @@ Every piece of data and where it lives: │ "✅ DEV done #42 — Login page with OAuth. Moved to QA queue."│ │ "🎉 QA PASS #42. Issue closed." │ │ "❌ QA FAIL #42 — OAuth redirect broken. Sent back to DEV." │ +│ "🚫 DEV BLOCKED #42 — Missing dependencies. Returned to queue."│ +│ "🚫 QA BLOCKED #42 — Env not available. Returned to QA queue."│ └─────────────────────────────────────────────────────────────────┘ ┌─────────────────────────────────────────────────────────────────┐ @@ -575,7 +596,8 @@ Provider selection is handled by `createProvider()` in `lib/providers/index.ts`. | projects.json corrupted | Tool can't parse JSON | Manual fix needed. Atomic writes (temp+rename) prevent partial writes. | | Label out of sync | `task_pickup` verifies label before transitioning | Throws error if label doesn't match expected state. Agent reports mismatch. | | Worker already active | `task_pickup` checks `active` flag | Throws error: "DEV worker already active on project". Must complete current task first. | -| Stale worker (>2h) | `session_health` flags as warning | Agent can investigate or `autoFix` can clear. | +| Stale worker (>2h) | `session_health` and heartbeat health check | `autoFix`: deactivates worker, reverts label to queue (To Do / To Test). Task available for next pickup. | +| Worker stuck/blocked | Worker calls `task_complete` with `"blocked"` | Deactivates worker, reverts label to queue. Issue available for retry. | | `project_register` fails | Plugin catches error during label creation or state write | Clean error returned. No partial state — labels are idempotent, projects.json not written until all labels succeed. | ## File locations diff --git a/lib/dispatch.ts b/lib/dispatch.ts index 5f908c5..40663ae 100644 --- a/lib/dispatch.ts +++ b/lib/dispatch.ts @@ -55,7 +55,7 @@ export type DispatchResult = { * Reads role-specific instructions from workspace/roles//.md * with fallback to workspace/roles/default/.md. */ -async function buildTaskMessage(opts: { +export async function buildTaskMessage(opts: { workspaceDir: string; projectName: string; role: "dev" | "qa"; @@ -104,6 +104,12 @@ async function buildTaskMessage(opts: { } } + // Build available results based on role + const availableResults = + role === "dev" + ? '"done" (completed successfully) or "blocked" (cannot complete, need help)' + : '"pass" (approved), "fail" (issues found), "refine" (needs human input), or "blocked" (cannot complete)'; + const parts = [ `${role.toUpperCase()} task for project "${projectName}" — Issue #${issueId}`, ``, @@ -118,6 +124,24 @@ async function buildTaskMessage(opts: { parts.push(``, `---`, ``, roleInstructions.trim()); } + // Mandatory completion contract + parts.push( + ``, + `---`, + ``, + `## MANDATORY: Task Completion`, + ``, + `When you finish this task, you MUST call \`task_complete\` with:`, + `- \`role\`: "${role}"`, + `- \`projectGroupId\`: "${groupId}"`, + `- \`result\`: ${availableResults}`, + `- \`summary\`: brief description of what you did`, + ``, + `⚠️ You MUST call task_complete even if you encounter errors or cannot finish.`, + `Use "blocked" with a summary explaining why you're stuck.`, + `Never end your session without calling task_complete.`, + ); + return parts.join("\n"); } @@ -193,8 +217,18 @@ export async function dispatchTask( await execFileAsync( "openclaw", - ["agent", "--session-id", sessionKey!, "--message", taskMessage], - { timeout: 60_000 }, + [ + "gateway", + "call", + "agent", + "--params", + JSON.stringify({ + idempotencyKey: randomUUID(), + sessionId: sessionKey!, + message: taskMessage, + }), + ], + { timeout: 30_000 }, ); dispatched = true; diff --git a/lib/notify.ts b/lib/notify.ts index a12aba6..9e4f482 100644 --- a/lib/notify.ts +++ b/lib/notify.ts @@ -42,7 +42,7 @@ export type NotifyEvent = groupId: string; issueId: number; role: "dev" | "qa"; - result: "done" | "pass" | "fail" | "refine"; + result: "done" | "pass" | "fail" | "refine" | "blocked"; summary?: string; nextState?: string; } @@ -76,6 +76,7 @@ function buildMessage(event: NotifyEvent): string { pass: "🎉", fail: "❌", refine: "🤔", + blocked: "🚫", }; const icon = icons[event.result] ?? "📋"; const resultText: Record = { @@ -83,6 +84,7 @@ function buildMessage(event: NotifyEvent): string { pass: "PASSED", fail: "FAILED", refine: "needs refinement", + blocked: "BLOCKED", }; const text = resultText[event.result] ?? event.result; let msg = `${icon} ${event.role.toUpperCase()} ${text} #${event.issueId}`; diff --git a/lib/tools/heartbeat-tick.ts b/lib/tools/heartbeat-tick.ts index f8090f5..5140d4d 100644 --- a/lib/tools/heartbeat-tick.ts +++ b/lib/tools/heartbeat-tick.ts @@ -214,6 +214,41 @@ async function checkAndFixWorkerHealth( }); } + // Check 4: Active for >2 hours (stale watchdog) + // A stale worker likely crashed or ran out of context without calling task_complete. + // Auto-fix reverts the label back to queue so the issue can be picked up again. + if (worker.active && worker.startTime && currentSessionKey) { + const startMs = new Date(worker.startTime).getTime(); + const nowMs = Date.now(); + const hoursActive = (nowMs - startMs) / (1000 * 60 * 60); + + if (hoursActive > 2) { + if (autoFix) { + const revertLabel: StateLabel = role === "dev" ? "To Do" : "To Test"; + const currentLabel: StateLabel = role === "dev" ? "Doing" : "Testing"; + try { + if (worker.issueId) { + const primaryIssueId = Number(worker.issueId.split(",")[0]); + await provider.transitionLabel(primaryIssueId, currentLabel, revertLabel); + } + } catch { + // Best-effort label revert + } + + await updateWorker(workspaceDir, groupId, role, { + active: false, + issueId: null, + }); + } + fixes.push({ + project: project.name, + role, + type: "stale_worker", + fixed: autoFix, + }); + } + } + return fixes; } diff --git a/lib/tools/session-health.ts b/lib/tools/session-health.ts index dd92ca8..85317e2 100644 --- a/lib/tools/session-health.ts +++ b/lib/tools/session-health.ts @@ -129,14 +129,15 @@ export function createSessionHealthTool(api: OpenClawPluginApi) { issues.push(issue); } - // Check 3: Active for >2 hours (stale) + // Check 3: Active for >2 hours (stale watchdog) + // Worker likely crashed or ran out of context without calling task_complete. if (worker.active && worker.startTime) { const startMs = new Date(worker.startTime).getTime(); const nowMs = Date.now(); const hoursActive = (nowMs - startMs) / (1000 * 60 * 60); if (hoursActive > 2) { - issues.push({ + const issue: Record = { type: "stale_worker", severity: "warning", project: project.name, @@ -146,7 +147,30 @@ export function createSessionHealthTool(api: OpenClawPluginApi) { sessionKey: currentSessionKey, issueId: worker.issueId, message: `${role.toUpperCase()} has been active for ${Math.round(hoursActive * 10) / 10}h — may need attention`, - }); + }; + + if (autoFix) { + // Revert issue label back to queue + const revertLabel: StateLabel = role === "dev" ? "To Do" : "To Test"; + const currentLabel: StateLabel = role === "dev" ? "Doing" : "Testing"; + try { + if (worker.issueId) { + const primaryIssueId = Number(worker.issueId.split(",")[0]); + await provider.transitionLabel(primaryIssueId, currentLabel, revertLabel); + issue.labelReverted = `${currentLabel} → ${revertLabel}`; + } + } catch { + issue.labelRevertFailed = true; + } + + await updateWorker(workspaceDir, groupId, role, { + active: false, + issueId: null, + }); + issue.fixed = true; + fixesApplied++; + } + issues.push(issue); } } diff --git a/lib/tools/task-complete.ts b/lib/tools/task-complete.ts index 7efc521..179a813 100644 --- a/lib/tools/task-complete.ts +++ b/lib/tools/task-complete.ts @@ -33,7 +33,7 @@ export function createTaskCompleteTool(api: OpenClawPluginApi) { return (ctx: ToolContext) => ({ name: "task_complete", label: "Task Complete", - description: `Complete a task: DEV done, QA pass, QA fail, or QA refine. Atomically handles: label transition, projects.json update, issue close/reopen, and audit logging. If the project has autoChain enabled, automatically dispatches the next step (DEV done → QA, QA fail → DEV fix).`, + description: `Complete a task: DEV done/blocked, QA pass/fail/refine/blocked. Atomically handles: label transition, projects.json update, issue close/reopen, and audit logging. If the project has autoChain enabled, automatically dispatches the next step (DEV done → QA, QA fail → DEV fix). Use "blocked" when the worker cannot complete the task (errors, missing info, etc.).`, parameters: { type: "object", required: ["role", "result", "projectGroupId"], @@ -45,9 +45,9 @@ export function createTaskCompleteTool(api: OpenClawPluginApi) { }, result: { type: "string", - enum: ["done", "pass", "fail", "refine"], + enum: ["done", "pass", "fail", "refine", "blocked"], description: - 'Completion result: "done" (DEV finished), "pass" (QA approved), "fail" (QA found issues), "refine" (needs human input)', + 'Completion result: "done" (DEV finished), "pass" (QA approved), "fail" (QA found issues), "refine" (needs human input), "blocked" (cannot complete, needs escalation)', }, projectGroupId: { type: "string", @@ -62,7 +62,7 @@ export function createTaskCompleteTool(api: OpenClawPluginApi) { async execute(_id: string, params: Record) { const role = params.role as "dev" | "qa"; - const result = params.result as "done" | "pass" | "fail" | "refine"; + const result = params.result as "done" | "pass" | "fail" | "refine" | "blocked"; const groupId = params.projectGroupId as string; const summary = params.summary as string | undefined; const workspaceDir = ctx.workspaceDir; @@ -72,14 +72,14 @@ export function createTaskCompleteTool(api: OpenClawPluginApi) { } // Validate result matches role - if (role === "dev" && result !== "done") { + if (role === "dev" && result !== "done" && result !== "blocked") { throw new Error( - `DEV can only complete with result "done", got "${result}"`, + `DEV can only complete with "done" or "blocked", got "${result}"`, ); } if (role === "qa" && result === "done") { throw new Error( - `QA cannot use result "done". Use "pass", "fail", or "refine".`, + `QA cannot use result "done". Use "pass", "fail", "refine", or "blocked".`, ); } @@ -267,6 +267,24 @@ export function createTaskCompleteTool(api: OpenClawPluginApi) { output.announcement = `🤔 QA REFINE #${issueId}${summary ? ` — ${summary}` : ""}. Awaiting human decision.`; } + // === DEV BLOCKED === + if (role === "dev" && result === "blocked") { + await deactivateWorker(workspaceDir, groupId, "dev"); + await provider.transitionLabel(issueId, "Doing", "To Do"); + + output.labelTransition = "Doing → To Do"; + output.announcement = `🚫 DEV BLOCKED #${issueId}${summary ? ` — ${summary}` : ""}. Returned to queue.`; + } + + // === QA BLOCKED === + if (role === "qa" && result === "blocked") { + await deactivateWorker(workspaceDir, groupId, "qa"); + await provider.transitionLabel(issueId, "Testing", "To Test"); + + output.labelTransition = "Testing → To Test"; + output.announcement = `🚫 QA BLOCKED #${issueId}${summary ? ` — ${summary}` : ""}. Returned to QA queue.`; + } + // Send notification to project group const pluginConfig = api.pluginConfig as Record | undefined; const notifyConfig = getNotificationConfig(pluginConfig); @@ -275,12 +293,16 @@ export function createTaskCompleteTool(api: OpenClawPluginApi) { let nextState: string | undefined; if (role === "dev" && result === "done") { nextState = "QA queue"; + } else if (role === "dev" && result === "blocked") { + nextState = "returned to queue"; } else if (role === "qa" && result === "pass") { nextState = "Done!"; } else if (role === "qa" && result === "fail") { nextState = "back to DEV"; } else if (role === "qa" && result === "refine") { nextState = "awaiting human decision"; + } else if (role === "qa" && result === "blocked") { + nextState = "returned to QA queue"; } await notify( diff --git a/package.json b/package.json index add54a5..9c563ad 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "scripts": { "check": "tsc --noEmit", "watch": "tsc --noEmit --watch", - "test": "node --test lib/**/*.test.ts" + "test": "npx tsx --test test/**/*.test.ts" }, "peerDependencies": { "openclaw": ">=2026.0.0" diff --git a/test/helpers.ts b/test/helpers.ts new file mode 100644 index 0000000..f07b96d --- /dev/null +++ b/test/helpers.ts @@ -0,0 +1,374 @@ +/** + * helpers.ts — Shared utilities for DevClaw integration tests. + * + * Provides: gateway RPC wrapper, GitHub issue helpers, session verification, + * mock context factories, and automatic test resource cleanup. + */ +import { execFile } from "node:child_process"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { promisify } from "node:util"; +import type { ToolContext } from "../lib/types.js"; +import { type ProjectsData, writeProjects } from "../lib/projects.js"; + +const execFileAsync = promisify(execFile); + +// ── Constants ─────────────────────────────────────────────────────────────── + +/** Prefix for all test session keys — used for cleanup sweeps */ +export const TEST_SESSION_PREFIX = "agent:devclaw:subagent:test-"; + +/** Group ID used for the test project in projects.json. + * Uses the real DevClaw Telegram group so notifications are visible in the channel. */ +export const TEST_GROUP_ID = "-5239235162"; + +/** Repo path for test issues (devclaw repo) */ +export const TEST_REPO = "laurentenhoor/devclaw"; + +// ── Gateway RPC ───────────────────────────────────────────────────────────── + +/** + * Call an OpenClaw gateway method. Returns parsed JSON response. + * Throws on gateway error or timeout. + */ +export async function gateway( + method: string, + params: Record = {}, +): Promise> { + const { stdout } = await execFileAsync( + "openclaw", + [ + "gateway", + "call", + method, + "--params", + JSON.stringify(params), + "--json", + ], + { timeout: 30_000 }, + ); + + // openclaw may output plugin registration lines before JSON + const jsonStart = stdout.indexOf("{"); + if (jsonStart === -1) { + throw new Error(`No JSON in gateway response for ${method}: ${stdout}`); + } + return JSON.parse(stdout.slice(jsonStart)); +} + +// ── Session helpers ───────────────────────────────────────────────────────── + +/** Check if a session exists in the gateway */ +export async function sessionExists(key: string): Promise { + try { + const result = await gateway("sessions.list", { + limit: 200, + agentId: "devclaw", + }); + const sessions = result.sessions as Array<{ key: string }>; + return sessions.some((s) => s.key === key); + } catch { + return false; + } +} + +/** Get token count for a session (0 = never started) */ +export async function getSessionTokens( + key: string, +): Promise { + try { + const result = await gateway("sessions.list", { + limit: 200, + agentId: "devclaw", + }); + const sessions = result.sessions as Array<{ + key: string; + totalTokens?: number; + }>; + const session = sessions.find((s) => s.key === key); + return session ? (session.totalTokens ?? 0) : null; + } catch { + return null; + } +} + +// ── GitHub issue helpers ──────────────────────────────────────────────────── + +/** Get current labels on a GitHub issue */ +export async function getIssueLabels( + repo: string, + issueId: number, +): Promise { + const { stdout } = await execFileAsync( + "gh", + ["issue", "view", String(issueId), "--repo", repo, "--json", "labels"], + { timeout: 15_000 }, + ); + const data = JSON.parse(stdout) as { labels: Array<{ name: string }> }; + return data.labels.map((l) => l.name); +} + +/** Get current state of a GitHub issue (OPEN/CLOSED) */ +export async function getIssueState( + repo: string, + issueId: number, +): Promise { + const { stdout } = await execFileAsync( + "gh", + ["issue", "view", String(issueId), "--repo", repo, "--json", "state"], + { timeout: 15_000 }, + ); + const data = JSON.parse(stdout) as { state: string }; + return data.state; +} + +/** Close a GitHub issue (best-effort) */ +export async function closeIssue( + repo: string, + issueId: number, +): Promise { + try { + await execFileAsync( + "gh", + ["issue", "close", String(issueId), "--repo", repo], + { timeout: 15_000 }, + ); + } catch { + // best-effort + } +} + +// ── Cleanup registry ──────────────────────────────────────────────────────── + +/** + * Tracks all test resources (sessions + issues) for guaranteed cleanup. + * + * Usage: + * const cleanup = new TestCleanup(); + * cleanup.trackSession("agent:devclaw:subagent:test-xxx"); + * cleanup.trackIssue("laurentenhoor/devclaw", 42); + * await cleanup.cleanAll(); // in after() hook + */ +export class TestCleanup { + private sessions = new Set(); + private issues: Array<{ repo: string; id: number }> = []; + + trackSession(key: string): void { + this.sessions.add(key); + } + + trackIssue(repo: string, id: number): void { + this.issues.push({ repo, id }); + } + + async cleanAll(): Promise { + // Delete tracked sessions + for (const key of this.sessions) { + try { + await gateway("sessions.delete", { + key, + deleteTranscript: true, + }); + } catch { + // best-effort + } + } + this.sessions.clear(); + + // Close tracked issues + for (const { repo, id } of this.issues) { + await closeIssue(repo, id); + } + this.issues.length = 0; + } +} + +/** + * Safety sweep: find and delete any test sessions from previous failed runs. + * Scans sessions.list for keys matching TEST_SESSION_PREFIX. + */ +export async function sweepTestSessions(): Promise { + let cleaned = 0; + try { + const result = await gateway("sessions.list", { + limit: 200, + agentId: "devclaw", + }); + const sessions = result.sessions as Array<{ key: string }>; + for (const session of sessions) { + if (session.key.startsWith(TEST_SESSION_PREFIX)) { + try { + await gateway("sessions.delete", { + key: session.key, + deleteTranscript: true, + }); + cleaned++; + } catch { + // ignore + } + } + } + } catch { + // ignore + } + return cleaned; +} + +// ── Mock factories ────────────────────────────────────────────────────────── + +/** + * Create a mock ToolContext simulating a group chat for the test project. + */ +export function makeTestContext( + groupId: string, + workspaceDir: string, +): ToolContext { + return { + config: {}, + workspaceDir, + agentDir: "/tmp/devclaw-test-agent", + agentId: "devclaw", + sessionKey: `agent:devclaw:telegram:group:${groupId}`, + messageChannel: "telegram", + sandboxed: false, + }; +} + +/** + * Create a minimal mock OpenClawPluginApi for testing. + * Only provides the fields tools actually use: pluginConfig, logger, resolvePath. + */ +export function makeTestApi(pluginConfig?: Record): any { + return { + id: "devclaw", + name: "DevClaw", + source: "test", + config: {}, + pluginConfig: pluginConfig ?? { + devClawAgentIds: ["devclaw"], + models: { + junior: "anthropic/claude-haiku-4-5", + medior: "anthropic/claude-sonnet-4-5", + senior: "anthropic/claude-opus-4-5", + qa: "anthropic/claude-sonnet-4-5", + }, + projectExecution: "parallel", + }, + logger: { + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }, + runtime: {}, + registerTool: () => {}, + registerHook: () => {}, + registerHttpHandler: () => {}, + registerHttpRoute: () => {}, + registerChannel: () => {}, + registerGatewayMethod: () => {}, + registerCli: () => {}, + registerService: () => {}, + registerProvider: () => {}, + registerCommand: () => {}, + resolvePath: (input: string) => input.replace("~", os.homedir()), + on: () => {}, + }; +} + +// ── Workspace helpers ─────────────────────────────────────────────────────── + +/** + * Create a temp workspace directory with initial projects.json and role files. + * Returns the workspace path. Caller must clean up via fs.rm(). + */ +export async function createTestWorkspace(opts?: { + groupId?: string; + autoChain?: boolean; +}): Promise { + const groupId = opts?.groupId ?? TEST_GROUP_ID; + const autoChain = opts?.autoChain ?? false; + + const tempDir = await fs.mkdtemp( + path.join(os.tmpdir(), "devclaw-scenario-test-"), + ); + + // Create required directories + await fs.mkdir(path.join(tempDir, "memory"), { recursive: true }); + await fs.mkdir(path.join(tempDir, "roles", "default"), { recursive: true }); + await fs.mkdir(path.join(tempDir, "roles", "devclaw"), { + recursive: true, + }); + + // Write initial projects.json + const initialData: ProjectsData = { + projects: { + [groupId]: { + name: "devclaw", + repo: `~/.openclaw/extensions/devclaw`, + groupName: "DevClaw - DevClaw", + deployUrl: "", + baseBranch: "main", + deployBranch: "main", + autoChain, + channel: "telegram", + dev: { + active: false, + issueId: null, + startTime: null, + model: null, + sessions: { junior: null, medior: null, senior: null }, + }, + qa: { + active: false, + issueId: null, + startTime: null, + model: null, + sessions: { qa: null }, + }, + }, + }, + }; + await writeProjects(tempDir, initialData); + + // Write minimal role files + await fs.writeFile( + path.join(tempDir, "roles", "default", "dev.md"), + "# DEV Worker Instructions\n\nThis is a test worker. Just acknowledge the task.\n", + ); + await fs.writeFile( + path.join(tempDir, "roles", "default", "qa.md"), + "# QA Worker Instructions\n\nThis is a test QA worker. Just acknowledge the task.\n", + ); + + return tempDir; +} + +// ── Result parser ─────────────────────────────────────────────────────────── + +/** + * Parse the result from a tool's execute() call. + * Tools return jsonResult() which wraps the payload in AgentToolResult format. + */ +export function parseToolResult(result: unknown): Record { + // jsonResult returns [{ type: "text", text: JSON.stringify(payload) }] + // or { content: [{ type: "text", text: "..." }] } + if (Array.isArray(result)) { + const first = result[0]; + if (first && typeof first === "object" && "text" in first) { + return JSON.parse(first.text as string); + } + } + if ( + result && + typeof result === "object" && + "content" in result + ) { + const content = (result as any).content; + if (Array.isArray(content) && content[0]?.text) { + return JSON.parse(content[0].text); + } + } + throw new Error(`Cannot parse tool result: ${JSON.stringify(result)}`); +} diff --git a/lib/projects.test.ts b/test/projects.test.ts similarity index 99% rename from lib/projects.test.ts rename to test/projects.test.ts index b02a678..8958549 100644 --- a/lib/projects.test.ts +++ b/test/projects.test.ts @@ -13,7 +13,7 @@ import { deactivateWorker, readProjects, writeProjects, -} from "./projects.js"; +} from "../lib/projects.js"; describe("Session persistence", () => { let tempDir: string; diff --git a/test/scenarios.test.ts b/test/scenarios.test.ts new file mode 100644 index 0000000..937d730 --- /dev/null +++ b/test/scenarios.test.ts @@ -0,0 +1,740 @@ +/** + * scenarios.test.ts — Scenario-based integration tests for DevClaw plugin tools. + * + * Tests the full tool pipeline in realistic sequences against real gateway + GitHub. + * Each scenario exercises multiple tools in order, verifying BOTH return values + * AND actual side effects (session existence, issue labels, projects.json state). + * + * Prerequisites: + * - OpenClaw gateway running + * - `gh` CLI authenticated with access to laurentenhoor/devclaw + * - `openclaw` CLI in PATH + * + * Run with: npm test + */ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert"; +import fs from "node:fs/promises"; + +import { createTaskCreateTool } from "../lib/tools/task-create.js"; +import { createTaskPickupTool } from "../lib/tools/task-pickup.js"; +import { createTaskCompleteTool } from "../lib/tools/task-complete.js"; +import { createQueueStatusTool } from "../lib/tools/queue-status.js"; +import { createSessionHealthTool } from "../lib/tools/session-health.js"; +import { readProjects, writeProjects } from "../lib/projects.js"; +import { resolveModel } from "../lib/tiers.js"; +import { + TestCleanup, + TEST_GROUP_ID, + TEST_REPO, + createTestWorkspace, + gateway, + getIssueLabels, + getIssueState, + makeTestApi, + makeTestContext, + parseToolResult, + sessionExists, + sweepTestSessions, +} from "./helpers.js"; + +// ── Suite-level setup ─────────────────────────────────────────────────────── + +describe("DevClaw Scenario Tests", { timeout: 240_000 }, () => { + before(async () => { + // Verify gateway is accessible + try { + await gateway("sessions.list", { limit: 1 }); + } catch (err) { + throw new Error( + `Gateway not accessible — cannot run integration tests: ${(err as Error).message}. ` + + `Ensure 'openclaw' is in PATH and gateway is running.`, + ); + } + + // Verify gh CLI is authenticated + try { + const { execFile } = await import("node:child_process"); + const { promisify } = await import("node:util"); + const execFileAsync = promisify(execFile); + await execFileAsync("gh", ["auth", "status"], { timeout: 10_000 }); + } catch (err) { + throw new Error( + `GitHub CLI not authenticated — cannot run integration tests: ${(err as Error).message}`, + ); + } + + // Sweep leftover test sessions from previous failed runs + await sweepTestSessions(); + }); + + // ── Scenario 1: Full DEV lifecycle ────────────────────────────────────── + + describe("Scenario 1: Full DEV lifecycle", () => { + const cleanup = new TestCleanup(); + let workspaceDir: string; + let api: ReturnType; + let ctx: ReturnType; + + let createdIssueId: number; + let spawnedSessionKey: string; + + before(async () => { + workspaceDir = await createTestWorkspace({ autoChain: false }); + api = makeTestApi(); + ctx = makeTestContext(TEST_GROUP_ID, workspaceDir); + }); + + after(async () => { + await cleanup.cleanAll(); + await fs.rm(workspaceDir, { recursive: true, force: true }); + }); + + it("step 1: task_create creates a test issue with To Do label", async () => { + const tool = createTaskCreateTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + projectGroupId: TEST_GROUP_ID, + title: "[TEST] Scenario 1 — DEV lifecycle test", + description: + "Automated integration test. This issue will be cleaned up automatically.", + label: "To Do", + }), + ); + + assert.strictEqual(result.success, true, "task_create should succeed"); + assert.ok(result.issue, "Should return issue object"); + createdIssueId = (result.issue as any).id as number; + assert.ok(createdIssueId > 0, "Issue ID should be positive"); + cleanup.trackIssue(TEST_REPO, createdIssueId); + + // Verify side effect: issue has "To Do" label in GitHub + const labels = await getIssueLabels(TEST_REPO, createdIssueId); + assert.ok( + labels.includes("To Do"), + `Issue should have "To Do" label, got: ${labels.join(", ")}`, + ); + }); + + it("step 2: task_pickup spawns a worker session", async () => { + const tool = createTaskPickupTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + issueId: createdIssueId, + role: "dev", + projectGroupId: TEST_GROUP_ID, + model: "junior", + }), + ); + + assert.strictEqual(result.success, true, `task_pickup should succeed: ${result.error ?? ""}`); + assert.strictEqual(result.sessionAction, "spawn", "Should spawn new session"); + + // task_pickup stores the session key in projects.json, not in the result + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + assert.ok(project.dev.sessions.junior, "Should have session key in projects.json"); + spawnedSessionKey = project.dev.sessions.junior as string; + + // Track for cleanup + cleanup.trackSession(spawnedSessionKey); + + // Verify side effect: session exists in gateway + const exists = await sessionExists(spawnedSessionKey); + assert.ok(exists, `Session ${spawnedSessionKey} should exist in gateway`); + + // Verify side effect: issue label transitioned to "Doing" + const labels = await getIssueLabels(TEST_REPO, createdIssueId); + assert.ok( + labels.includes("Doing"), + `Issue should have "Doing" label after pickup, got: ${labels.join(", ")}`, + ); + + // Verify side effect: worker is active + assert.strictEqual(project.dev.active, true, "Worker should be active"); + assert.strictEqual( + project.dev.issueId, + String(createdIssueId), + "Worker should have correct issue ID", + ); + }); + + it("step 3: task_complete (dev done) transitions to To Test", async () => { + const tool = createTaskCompleteTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + role: "dev", + result: "done", + projectGroupId: TEST_GROUP_ID, + summary: "Test task completed by integration test", + }), + ); + + assert.strictEqual(result.success, true, `task_complete should succeed: ${result.error ?? ""}`); + assert.ok( + (result.labelTransition as string)?.includes("To Test"), + `Label should transition to "To Test", got: ${result.labelTransition}`, + ); + + // Verify side effect: issue label is now "To Test" + const labels = await getIssueLabels(TEST_REPO, createdIssueId); + assert.ok( + labels.includes("To Test"), + `Issue should have "To Test" label, got: ${labels.join(", ")}`, + ); + + // Verify side effect: worker deactivated but session preserved + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + assert.strictEqual(project.dev.active, false, "Worker should be inactive"); + assert.strictEqual(project.dev.issueId, null, "Issue ID should be cleared"); + assert.strictEqual( + project.dev.sessions.junior, + spawnedSessionKey, + "Session should be PRESERVED after completion", + ); + }); + + it("step 4: task_pickup (qa) transitions to Testing", async () => { + const tool = createTaskPickupTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + issueId: createdIssueId, + role: "qa", + projectGroupId: TEST_GROUP_ID, + model: "qa", + }), + ); + + assert.strictEqual(result.success, true, `QA pickup should succeed: ${result.error ?? ""}`); + + // Read QA session key from projects.json + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + assert.ok(project.qa.sessions.qa, "Should have QA session key in projects.json"); + cleanup.trackSession(project.qa.sessions.qa as string); + + // Verify side effect: issue label transitioned to "Testing" + const labels = await getIssueLabels(TEST_REPO, createdIssueId); + assert.ok( + labels.includes("Testing"), + `Issue should have "Testing" label after QA pickup, got: ${labels.join(", ")}`, + ); + + // Verify side effect: QA worker is active + assert.strictEqual(project.qa.active, true, "QA worker should be active"); + assert.strictEqual( + project.qa.issueId, + String(createdIssueId), + "QA worker should have correct issue ID", + ); + }); + + it("step 5: task_complete (qa pass) transitions to Done and closes issue", async () => { + const tool = createTaskCompleteTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + role: "qa", + result: "pass", + projectGroupId: TEST_GROUP_ID, + summary: "QA passed by integration test", + }), + ); + + assert.strictEqual(result.success, true, `QA complete should succeed: ${result.error ?? ""}`); + assert.ok( + (result.labelTransition as string)?.includes("Done"), + `Label should transition to "Done", got: ${result.labelTransition}`, + ); + assert.strictEqual(result.issueClosed, true, "Issue should be closed"); + + // Verify side effect: issue label is now "Done" + const labels = await getIssueLabels(TEST_REPO, createdIssueId); + assert.ok( + labels.includes("Done"), + `Issue should have "Done" label, got: ${labels.join(", ")}`, + ); + + // Verify side effect: issue is closed + const state = await getIssueState(TEST_REPO, createdIssueId); + assert.strictEqual(state, "CLOSED", "Issue should be closed in GitHub"); + + // Verify side effect: QA worker deactivated, sessions preserved + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + assert.strictEqual(project.qa.active, false, "QA worker should be inactive"); + assert.strictEqual(project.qa.issueId, null, "QA issue ID should be cleared"); + + // DEV session should still be preserved from earlier + assert.strictEqual( + project.dev.sessions.junior, + spawnedSessionKey, + "DEV session should still be PRESERVED after full lifecycle", + ); + }); + }); + + // ── Scenario 2: Queue status accuracy ───────────────────────────────── + + describe("Scenario 2: Queue status accuracy", () => { + const cleanup = new TestCleanup(); + let workspaceDir: string; + let api: ReturnType; + let ctx: ReturnType; + let issueIds: number[] = []; + + before(async () => { + workspaceDir = await createTestWorkspace({ autoChain: false }); + api = makeTestApi(); + ctx = makeTestContext(TEST_GROUP_ID, workspaceDir); + }); + + after(async () => { + await cleanup.cleanAll(); + await fs.rm(workspaceDir, { recursive: true, force: true }); + }); + + it("step 1: create 3 issues with different labels", async () => { + const tool = createTaskCreateTool(api)(ctx); + + const labels = ["To Do", "To Improve", "To Test"]; + for (const label of labels) { + const result = parseToolResult( + await tool.execute("test", { + projectGroupId: TEST_GROUP_ID, + title: `[TEST] Queue test — ${label}`, + description: "Automated test issue for queue_status verification.", + label, + }), + ); + assert.strictEqual(result.success, true); + const issueId = (result.issue as any).id as number; + issueIds.push(issueId); + cleanup.trackIssue(TEST_REPO, issueId); + } + + assert.strictEqual(issueIds.length, 3, "Should have created 3 issues"); + }); + + it("step 2: queue_status shows all issues in correct buckets", async () => { + // Small delay for GitHub API eventual consistency + await new Promise((r) => setTimeout(r, 2_000)); + + const tool = createQueueStatusTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + projectGroupId: TEST_GROUP_ID, + }), + ); + + // The result should contain projects with queue data + const projects = result.projects as Array<{ + queue: { + toImprove: Array<{ id: number }>; + toTest: Array<{ id: number }>; + toDo: Array<{ id: number }>; + }; + }>; + assert.ok(projects && projects.length > 0, "Should have project data"); + + const queue = projects[0].queue; + assert.ok(queue, "Should have queue data"); + + // Verify each bucket has our test issues + const toDoIds = queue.toDo.map((i) => i.id); + const toImproveIds = queue.toImprove.map((i) => i.id); + const toTestIds = queue.toTest.map((i) => i.id); + + assert.ok( + toDoIds.includes(issueIds[0]), + `"To Do" bucket should contain issue ${issueIds[0]}`, + ); + assert.ok( + toImproveIds.includes(issueIds[1]), + `"To Improve" bucket should contain issue ${issueIds[1]}`, + ); + assert.ok( + toTestIds.includes(issueIds[2]), + `"To Test" bucket should contain issue ${issueIds[2]}`, + ); + }); + }); + + // ── Scenario 3: Session health detection ────────────────────────────── + + describe("Scenario 3: Session health detection", () => { + let workspaceDir: string; + let api: ReturnType; + let ctx: ReturnType; + + before(async () => { + workspaceDir = await createTestWorkspace({ autoChain: false }); + api = makeTestApi(); + ctx = makeTestContext(TEST_GROUP_ID, workspaceDir); + }); + + after(async () => { + await fs.rm(workspaceDir, { recursive: true, force: true }); + }); + + it("step 1: inject zombie state into projects.json", async () => { + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + + // Set active=true with a dead session key (session doesn't exist in gateway) + project.dev.active = true; + project.dev.issueId = "999"; + project.dev.model = "medior"; + project.dev.startTime = new Date( + Date.now() - 3 * 60 * 60 * 1000, + ).toISOString(); // 3 hours ago + project.dev.sessions.medior = + "agent:devclaw:subagent:dead-zombie-session-000"; + + await writeProjects(workspaceDir, data); + + // Verify it was written + const readBack = await readProjects(workspaceDir); + assert.strictEqual( + readBack.projects[TEST_GROUP_ID].dev.active, + true, + "Zombie state should be written", + ); + }); + + it("step 2: session_health detects the zombie (no autoFix)", async () => { + const tool = createSessionHealthTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + autoFix: false, + activeSessions: [], // empty = zombie detection skipped, but stale_worker (3h) will be caught + }), + ); + + assert.strictEqual(result.healthy, false, "Should report unhealthy"); + assert.ok( + (result.issuesFound as number) > 0, + "Should find at least one issue", + ); + + const issues = result.issues as Array<{ + type: string; + project: string; + role: string; + }>; + const zombieIssue = issues.find( + (i) => + i.project === "devclaw" && + i.role === "dev", + ); + assert.ok(zombieIssue, "Should detect zombie for dev worker"); + }); + + it("step 3: session_health fixes the zombie (autoFix=true)", async () => { + const tool = createSessionHealthTool(api)(ctx); + // Provide a non-empty activeSessions list that does NOT include the zombie key. + // This enables zombie detection (requires activeSessions.length > 0). + const result = parseToolResult( + await tool.execute("test", { + autoFix: true, + activeSessions: ["agent:devclaw:subagent:some-alive-session"], + }), + ); + + assert.ok( + (result.fixesApplied as number) > 0, + "Should apply at least one fix", + ); + + // Verify side effect: worker is deactivated in projects.json + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + assert.strictEqual( + project.dev.active, + false, + "Worker should be deactivated after auto-fix", + ); + }); + }); + + // ── Scenario 4: Auto-chain DEV → QA ────────────────────────────────── + + describe("Scenario 4: Auto-chain DEV → QA", () => { + const cleanup = new TestCleanup(); + let workspaceDir: string; + let api: ReturnType; + let ctx: ReturnType; + + let issueId: number; + let devSessionKey: string; + + before(async () => { + workspaceDir = await createTestWorkspace({ autoChain: true }); + api = makeTestApi(); + ctx = makeTestContext(TEST_GROUP_ID, workspaceDir); + }); + + after(async () => { + await cleanup.cleanAll(); + await fs.rm(workspaceDir, { recursive: true, force: true }); + }); + + it("step 1: create issue and pick up for DEV", async () => { + // Create + const createTool = createTaskCreateTool(api)(ctx); + const createResult = parseToolResult( + await createTool.execute("test", { + projectGroupId: TEST_GROUP_ID, + title: "[TEST] Scenario 4 — Auto-chain DEV→QA", + description: "Automated test for auto-chain pipeline.", + label: "To Do", + }), + ); + issueId = (createResult.issue as any).id as number; + cleanup.trackIssue(TEST_REPO, issueId); + + // Pickup DEV + const pickupTool = createTaskPickupTool(api)(ctx); + const pickupResult = parseToolResult( + await pickupTool.execute("test", { + issueId, + role: "dev", + projectGroupId: TEST_GROUP_ID, + model: "junior", + }), + ); + assert.strictEqual(pickupResult.success, true, `Pickup should succeed: ${pickupResult.error ?? ""}`); + + // Read session key from projects.json (not in tool result) + const pickupData = await readProjects(workspaceDir); + devSessionKey = pickupData.projects[TEST_GROUP_ID].dev.sessions.junior as string; + assert.ok(devSessionKey, "Should have session key in projects.json"); + cleanup.trackSession(devSessionKey); + }); + + it("step 2: task_complete (dev done) auto-chains to QA", async () => { + const tool = createTaskCompleteTool(api)(ctx); + const result = parseToolResult( + await tool.execute("test", { + role: "dev", + result: "done", + projectGroupId: TEST_GROUP_ID, + summary: "DEV done, should auto-chain to QA", + }), + ); + + assert.strictEqual(result.success, true, `Complete should succeed: ${result.error ?? ""}`); + + // Check auto-chain result + const autoChain = result.autoChain as Record | undefined; + if (autoChain) { + assert.strictEqual( + autoChain.dispatched, + true, + "Auto-chain should dispatch QA", + ); + assert.strictEqual(autoChain.role, "qa", "Should chain to QA role"); + + // Track QA session for cleanup + if (autoChain.sessionKey) { + cleanup.trackSession(autoChain.sessionKey as string); + } + } + + // Verify issue label moved to Testing (via auto-chain) + const labels = await getIssueLabels(TEST_REPO, issueId); + assert.ok( + labels.includes("Testing") || labels.includes("To Test"), + `Issue should have "Testing" or "To Test" label, got: ${labels.join(", ")}`, + ); + + // Verify QA worker is active in projects.json + const data = await readProjects(workspaceDir); + const project = data.projects[TEST_GROUP_ID]; + if (autoChain?.dispatched) { + assert.strictEqual( + project.qa.active, + true, + "QA worker should be active after auto-chain", + ); + } + }); + }); + + // ── Scenario 5: Blocked result escalation ──────────────────────────── + + describe("Scenario 5: Blocked result escalation", () => { + const cleanup = new TestCleanup(); + let workspaceDir: string; + let api: ReturnType; + let ctx: ReturnType; + + let issueId: number; + + before(async () => { + workspaceDir = await createTestWorkspace({ autoChain: false }); + api = makeTestApi(); + ctx = makeTestContext(TEST_GROUP_ID, workspaceDir); + }); + + after(async () => { + await cleanup.cleanAll(); + await fs.rm(workspaceDir, { recursive: true, force: true }); + }); + + it("step 1: create issue, pick up as DEV, then block", async () => { + // Create issue + const createTool = createTaskCreateTool(api)(ctx); + const createResult = parseToolResult( + await createTool.execute("test", { + projectGroupId: TEST_GROUP_ID, + title: "[TEST] Scenario 5 — DEV blocked escalation", + description: "Test that blocked DEV returns issue to queue.", + label: "To Do", + }), + ); + issueId = (createResult.issue as any).id as number; + cleanup.trackIssue(TEST_REPO, issueId); + + // Pickup DEV + const pickupTool = createTaskPickupTool(api)(ctx); + const pickupResult = parseToolResult( + await pickupTool.execute("test", { + issueId, + role: "dev", + projectGroupId: TEST_GROUP_ID, + model: "junior", + }), + ); + assert.strictEqual(pickupResult.success, true, `Pickup should succeed: ${pickupResult.error ?? ""}`); + + // Track session for cleanup + const data = await readProjects(workspaceDir); + const sessionKey = data.projects[TEST_GROUP_ID].dev.sessions.junior as string; + if (sessionKey) cleanup.trackSession(sessionKey); + + // Complete with blocked + const completeTool = createTaskCompleteTool(api)(ctx); + const completeResult = parseToolResult( + await completeTool.execute("test", { + role: "dev", + result: "blocked", + projectGroupId: TEST_GROUP_ID, + summary: "Cannot complete — missing dependencies", + }), + ); + + assert.strictEqual(completeResult.success, true, `Blocked should succeed: ${completeResult.error ?? ""}`); + assert.strictEqual(completeResult.labelTransition, "Doing → To Do", "Should revert to To Do"); + + // Verify side effect: issue label is back to "To Do" + const labels = await getIssueLabels(TEST_REPO, issueId); + assert.ok( + labels.includes("To Do"), + `Issue should have "To Do" label after DEV blocked, got: ${labels.join(", ")}`, + ); + + // Verify side effect: worker deactivated + const refreshedData = await readProjects(workspaceDir); + const project = refreshedData.projects[TEST_GROUP_ID]; + assert.strictEqual(project.dev.active, false, "DEV worker should be inactive after blocked"); + }); + + it("step 2: pick up as QA, then block", async () => { + // First do a DEV cycle to get to "To Test" + const pickupTool = createTaskPickupTool(api)(ctx); + await pickupTool.execute("test", { + issueId, + role: "dev", + projectGroupId: TEST_GROUP_ID, + model: "junior", + }); + + const completeTool = createTaskCompleteTool(api)(ctx); + await completeTool.execute("test", { + role: "dev", + result: "done", + projectGroupId: TEST_GROUP_ID, + summary: "DEV done", + }); + + // Now pick up as QA + const qaPickupResult = parseToolResult( + await pickupTool.execute("test", { + issueId, + role: "qa", + projectGroupId: TEST_GROUP_ID, + model: "qa", + }), + ); + assert.strictEqual(qaPickupResult.success, true, `QA pickup should succeed: ${qaPickupResult.error ?? ""}`); + + // Track QA session for cleanup + const data = await readProjects(workspaceDir); + const qaSessionKey = data.projects[TEST_GROUP_ID].qa.sessions.qa as string; + if (qaSessionKey) cleanup.trackSession(qaSessionKey); + + // Complete QA with blocked + const qaCompleteResult = parseToolResult( + await completeTool.execute("test", { + role: "qa", + result: "blocked", + projectGroupId: TEST_GROUP_ID, + summary: "Cannot test — environment not available", + }), + ); + + assert.strictEqual(qaCompleteResult.success, true, `QA blocked should succeed: ${qaCompleteResult.error ?? ""}`); + assert.strictEqual(qaCompleteResult.labelTransition, "Testing → To Test", "Should revert to To Test"); + + // Verify side effect: issue label is back to "To Test" + const labels = await getIssueLabels(TEST_REPO, issueId); + assert.ok( + labels.includes("To Test"), + `Issue should have "To Test" label after QA blocked, got: ${labels.join(", ")}`, + ); + + // Verify side effect: QA worker deactivated + const refreshedData = await readProjects(workspaceDir); + const project = refreshedData.projects[TEST_GROUP_ID]; + assert.strictEqual(project.qa.active, false, "QA worker should be inactive after blocked"); + }); + }); + + // ── Scenario 6: Model resolution ────────────────────────────────────── + + describe("Scenario 6: Model resolution", () => { + it("resolves tier names to correct model IDs", () => { + assert.strictEqual( + resolveModel("junior"), + "anthropic/claude-haiku-4-5", + ); + assert.strictEqual( + resolveModel("medior"), + "anthropic/claude-sonnet-4-5", + ); + assert.strictEqual( + resolveModel("senior"), + "anthropic/claude-opus-4-5", + ); + assert.strictEqual( + resolveModel("qa"), + "anthropic/claude-sonnet-4-5", + ); + }); + + it("respects plugin config overrides", () => { + assert.strictEqual( + resolveModel("junior", { models: { junior: "custom/fast-model" } }), + "custom/fast-model", + ); + }); + + it("passes through raw model IDs unchanged", () => { + assert.strictEqual( + resolveModel("openai/gpt-4o"), + "openai/gpt-4o", + ); + }); + }); +});