feat(pi-mill): align runtime with mill async API and migrate shared skills

+1

.gitignore

··· 3 3 .bun/ 4 4 dist/ 5 5 implement.factory.ts 6 + packages/pi-mill/.pi-skills/

+1 -1

README.md

+29 -29

packages/driver-pi/src/internal/pi.codec.test.ts

··· 1 1 import { describe, expect, it } from "bun:test"; 2 - import { Effect } from "effect"; 3 2 import { runWithRuntime } from "../public/test-runtime.api"; 4 3 import { decodePiProcessOutput } from "./pi.codec"; 5 4 6 5 describe("pi codec terminal sequencing", () => { 7 - it("rejects duplicate terminal agent_end lines deterministically", async () => { 6 + it("uses the last agent_end payload when multiple terminal events are emitted", async () => { 8 7 const output = [ 9 8 JSON.stringify({ type: "session", id: "session-test" }), 10 - JSON.stringify({ type: "agent_end", messages: [] }), 11 - JSON.stringify({ type: "agent_end", messages: [] }), 9 + JSON.stringify({ 10 + type: "agent_end", 11 + messages: [{ role: "assistant", content: [{ type: "text", text: "first" }] }], 12 + }), 13 + JSON.stringify({ 14 + type: "agent_end", 15 + messages: [{ role: "assistant", content: [{ type: "text", text: "second" }] }], 16 + }), 12 17 ].join("\n"); 13 18 14 - const decodeError = await runWithRuntime( 15 - Effect.flip( 16 - decodePiProcessOutput(output, { 17 - agent: "scout", 18 - model: "openai/gpt-5.3-codex", 19 - spawnId: "spawn_test", 20 - }), 21 - ), 19 + const decoded = await runWithRuntime( 20 + decodePiProcessOutput(output, { 21 + agent: "scout", 22 + model: "openai/gpt-5.3-codex", 23 + spawnId: "spawn_test", 24 + }), 22 25 ); 23 26 24 - expect(decodeError).toMatchObject({ 25 - _tag: "PiCodecError", 26 - }); 27 + expect(decoded.result.text).toBe("second"); 27 28 }); 28 29 29 - it("rejects non-terminal lines emitted after terminal agent_end", async () => { 30 + it("tolerates non-terminal retry events emitted after agent_end", async () => { 30 31 const output = [ 31 - JSON.stringify({ type: "agent_end", messages: [] }), 32 - JSON.stringify({ type: "tool_execution_start", toolName: "bash" }), 32 + JSON.stringify({ 33 + type: "agent_end", 34 + messages: [{ role: "assistant", content: [{ type: "text", text: "done" }] }], 35 + }), 36 + JSON.stringify({ type: "auto_retry_start" }), 33 37 ].join("\n"); 34 38 35 - const decodeError = await runWithRuntime( 36 - Effect.flip( 37 - decodePiProcessOutput(output, { 38 - agent: "scout", 39 - model: "openai/gpt-5.3-codex", 40 - spawnId: "spawn_test", 41 - }), 42 - ), 39 + const decoded = await runWithRuntime( 40 + decodePiProcessOutput(output, { 41 + agent: "scout", 42 + model: "openai/gpt-5.3-codex", 43 + spawnId: "spawn_test", 44 + }), 43 45 ); 44 46 45 - expect(decodeError).toMatchObject({ 46 - _tag: "PiCodecError", 47 - }); 47 + expect(decoded.result.text).toBe("done"); 48 48 }); 49 49 });

+1 -19

packages/driver-pi/src/internal/pi.codec.ts

··· 77 77 ), 78 78 ); 79 79 80 - const isTerminalEvent = (eventType: string | undefined): boolean => eventType === "agent_end"; 81 - 82 80 export const decodePiProcessOutput = ( 83 81 output: string, 84 82 input: DecodePiProcessInput, ··· 100 98 for (const decoded of decodedLines) { 101 99 const eventType = readString(decoded, "type"); 102 100 103 - if (terminalSeen && !isTerminalEvent(eventType)) { 104 - return yield* Effect.fail( 105 - new PiCodecError({ 106 - message: `Non-terminal line ${eventType ?? "unknown"} emitted after terminal agent_end.`, 107 - }), 108 - ); 109 - } 110 - 111 - if (isTerminalEvent(eventType)) { 112 - if (terminalSeen) { 113 - return yield* Effect.fail( 114 - new PiCodecError({ 115 - message: "Duplicate terminal agent_end lines are not allowed.", 116 - }), 117 - ); 118 - } 119 - 101 + if (eventType === "agent_end") { 120 102 terminalSeen = true; 121 103 122 104 const messages = decoded.messages;

+14 -17

packages/driver-pi/src/public/index.api.test.ts

··· 17 17 "console.log(JSON.stringify({type:'agent_end',messages:[{role:'assistant',content:[{type:'text',text:'fixture:'+prompt}],model,stopReason:'stop'}]}));"; 18 18 19 19 const DUPLICATE_TERMINAL_SCRIPT = 20 - "console.log(JSON.stringify({type:'agent_end',messages:[]}));" + 21 - "console.log(JSON.stringify({type:'agent_end',messages:[]}));"; 20 + "console.log(JSON.stringify({type:'agent_end',messages:[{role:'assistant',content:[{type:'text',text:'first'}]}]}));" + 21 + "console.log(JSON.stringify({type:'auto_retry_start'}));" + 22 + "console.log(JSON.stringify({type:'agent_end',messages:[{role:'assistant',content:[{type:'text',text:'second'}]}]}));"; 22 23 23 24 describe("createPiDriverRegistration", () => { 24 25 it("supports explicit model catalog overrides via codec", async () => { ··· 99 100 expect(session.pointer.length).toBeGreaterThan(0); 100 101 }); 101 102 102 - it("rejects malformed duplicate terminal output fixtures", async () => { 103 + it("accepts retry-style output and uses the last terminal payload", async () => { 103 104 const driver = createPiDriverRegistration({ 104 105 process: { 105 106 command: "bun", ··· 114 115 return; 115 116 } 116 117 117 - const spawnError = await Runtime.runPromise(runtime)( 118 + const output = await Runtime.runPromise(runtime)( 118 119 Effect.provide( 119 - Effect.flip( 120 - driver.runtime.spawn({ 121 - runId: "run_driver_duplicate", 122 - spawnId: "spawn_driver_duplicate", 123 - agent: "scout", 124 - systemPrompt: "You are concise.", 125 - prompt: "Say hello", 126 - model: "openai/gpt-5.3-codex", 127 - }), 128 - ), 120 + driver.runtime.spawn({ 121 + runId: "run_driver_duplicate", 122 + spawnId: "spawn_driver_duplicate", 123 + agent: "scout", 124 + systemPrompt: "You are concise.", 125 + prompt: "Say hello", 126 + model: "openai/gpt-5.3-codex", 127 + }), 129 128 BunContext.layer, 130 129 ), 131 130 ); 132 131 133 - expect(spawnError).toMatchObject({ 134 - _tag: "PiProcessDriverError", 135 - }); 132 + expect(output.result.text).toBe("second"); 136 133 }); 137 134 });

+75 -32

packages/pi-mill/README.md

··· 1 - # @mill/pi-mill 1 + # pi-mill 2 2 3 - A pi extension that provides the same `subagent` tool + TUI monitor workflow as your existing setup, but executes each child task through **mill** (`mill run --sync --json`) instead of spawning `pi` directly. 3 + A [pi](https://pi.dev) extension that adds a `subagent` tool, letting your AI coding agent spawn and orchestrate child agents through [mill](https://github.com/laulauland/mill). 4 4 5 - ## What stays the same 5 + When the orchestrating agent needs to delegate work — run tasks in parallel, assign specialized roles, or break a problem into sub-tasks — it writes a short TypeScript program using `mill.spawn()`. Each spawn becomes a mill run, which means driver selection, model routing, and session management all come from your mill config rather than being hardcoded. 6 6 7 - - `subagent` tool contract (`task` + `code`) 8 - - Program-mode orchestration with `factory.spawn(...)` 9 - - Async return (immediate run id, completion notification) 10 - - `/mill` overlay monitor 11 - - `pi --mill` standalone monitor 12 - - Status widget + batched completion notifications 13 - - Bundled skills (`mill-basics`, `mill-ralph-loop`, `mill-worktree`) 7 + ## Install 14 8 15 - ## What changed 9 + From npm: 16 10 17 - - Child execution is now delegated to `mill`. 18 - - Each `factory.spawn(...)` compiles to a tiny temporary mill program with one `mill.spawn(...)` call. 19 - - Driver/executor/model behavior comes from your mill defaults and config resolution. 11 + ```bash 12 + pi install npm:pi-mill 13 + ``` 20 14 21 - ## Install as a pi package 15 + From a local checkout: 22 16 23 17 ```bash 24 - pi install /absolute/path/to/mill/packages/pi-mill 18 + pi install /path/to/mill/packages/pi-mill 19 + ``` 20 + 21 + ## Prerequisites 22 + 23 + 1. `mill` must be on your `PATH` (or set a custom command in config). 24 + 2. A `mill.config.ts` with at least one driver/executor configured. 25 + 26 + ## How it works 27 + 28 + The extension registers a `subagent` tool that accepts two parameters: a `task` label and a `code` string containing TypeScript. 29 + 30 + The code runs with a `mill` global (similar to `process` or `console`). The core method is `mill.spawn()`: 31 + 32 + ```ts 33 + // Sequential — one agent after another 34 + const analysis = await mill.spawn({ 35 + agent: "analyzer", 36 + systemPrompt: "You analyze codebases for architectural patterns.", 37 + prompt: "Analyze the auth module in src/auth/", 38 + model: "anthropic/claude-sonnet-4-6", 39 + }); 40 + 41 + const fix = await mill.spawn({ 42 + agent: "fixer", 43 + systemPrompt: "You fix code issues.", 44 + prompt: `Fix the issues found: ${analysis.text}`, 45 + model: "openai-codex/gpt-5.3-codex", 46 + }); 47 + 48 + // Parallel — multiple agents at once 49 + const [tests, docs] = await Promise.all([ 50 + mill.spawn({ 51 + agent: "test-writer", 52 + systemPrompt: "You write tests.", 53 + prompt: "Write tests for src/auth/", 54 + model: "anthropic/claude-sonnet-4-6", 55 + }), 56 + mill.spawn({ 57 + agent: "documenter", 58 + systemPrompt: "You write documentation.", 59 + prompt: "Document the auth module.", 60 + model: "cerebras/zai-glm-4.7", 61 + }), 62 + ]); 25 63 ``` 26 64 27 - (or add as a local package in your pi settings). 65 + Each `mill.spawn()` submits an async mill run (`mill run --json`) and then follows completion via mill APIs (`watch` + `inspect`). Model selection, driver routing, and execution behavior all come from your mill configuration. 66 + 67 + Runs are **async by default** — the tool returns a `runId` immediately and delivers results via notification when complete. 68 + 69 + ## Monitoring 28 70 29 - ## Mill prerequisites 71 + - `/mill` — opens an overlay inside pi showing all active and completed runs 72 + - `pi --mill` — standalone full-screen monitor for watching runs from a separate terminal 73 + - A status widget shows run progress inline during conversations 30 74 31 - 1. `mill` must be on your `PATH` (or configure a custom command below). 32 - 2. Configure your global/project `mill.config.ts` with real drivers/executors as needed. 75 + Cancelling runs works via either monitor (mapped to `mill cancel`). 33 76 34 - ## Extension config 77 + ## Configuration 35 78 36 - Edit `index.ts`: 79 + Edit the `config` export in `index.ts`: 37 80 38 81 ```ts 39 82 export const config = { ··· 41 84 millCommand: "mill", 42 85 millArgs: [], 43 86 millRunsDir: undefined, 44 - prompt: "...optional extra guidance for the tool description...", 87 + prompt: "...", 45 88 }; 46 89 ``` 47 90 48 - - `maxDepth`: subagent nesting limit (`PI_FACTORY_DEPTH` guard) 49 - - `millCommand`: executable name/path for mill 50 - - `millArgs`: extra args prepended to every mill invocation 51 - - `millRunsDir`: optional override for `--runs-dir` 52 - - `prompt`: additional model/tool guidance appended to tool description 91 + | Option | Description | 92 + |---|---| 93 + | `maxDepth` | Subagent nesting limit. `1` = agents can spawn subagents, but those subagents cannot spawn their own. `0` = disabled. | 94 + | `millCommand` | Executable name or path for mill. | 95 + | `millArgs` | Extra args prepended to every mill invocation. | 96 + | `millRunsDir` | Override for `--runs-dir`. | 97 + | `prompt` | Additional guidance appended to the tool description (model selection hints, project conventions, etc). | 53 98 54 - ## Notes 99 + ## Context flow 55 100 56 - - Cancelling via `/mill` or `pi --mill` still works (PID-based). 57 - - `ExecutionResult.sessionPath` now contains mill driver `sessionRef` when available. 58 - - This package intentionally keeps the old UX while switching execution backend to mill. 101 + Each subagent receives the parent session path and can use `search_thread` to explore the orchestrator's conversation for context. Results (including each subagent's `sessionPath`) flow back to the orchestrator via `result.text`.

+1 -1

packages/pi-mill/contract.ts

··· 5 5 task: Type.String({ description: "Label/description for this program run." }), 6 6 code: Type.String({ 7 7 description: 8 - "TypeScript script using the `factory` global. Use factory.spawn() to orchestrate agents. The script runs as a top-level module — use await and Promise.all directly.", 8 + "TypeScript script using the `mill` global. Use mill.spawn() to orchestrate agents. The script runs as a top-level module — use await and Promise.all directly.", 9 9 }), 10 10 }); 11 11

+15 -15

packages/pi-mill/executors/program-executor.ts

··· 4 4 import { FactoryError, toErrorDetails } from "../errors.js"; 5 5 import type { ObservabilityStore } from "../observability.js"; 6 6 import { 7 - createFactory, 7 + createMillRuntime, 8 8 patchConsole, 9 9 patchPromiseAll, 10 10 prepareProgramModule, ··· 197 197 }); 198 198 }; 199 199 200 - let runtime: ReturnType<typeof createFactory> | null = null; 200 + let runtime: ReturnType<typeof createMillRuntime> | null = null; 201 201 try { 202 202 // Write program source as early as possible so failed preflight/confirmation runs 203 203 // still keep a legible copy of the attempted program. ··· 227 227 emit("running"); 228 228 obs.push(runId, "info", "program:start", { codeBytes: code.length }); 229 229 230 - runtime = createFactory(ctx, runId, obs, { 230 + runtime = createMillRuntime(ctx, runId, obs, { 231 231 defaultSignal: input.signal, 232 232 onTaskUpdate: (result) => { 233 233 resultsByTask.set(result.taskId, result); ··· 244 244 const restorePromise = patchPromiseAll(obs, runId); 245 245 const restoreConsole = patchConsole(obs, runId); 246 246 247 - // Inject the factory global 248 - const prev = (globalThis as any).factory; 249 - (globalThis as any).factory = runtime; 247 + // Inject runtime global. 248 + const prevMill = (globalThis as any).mill; 249 + const restoreGlobals = () => { 250 + if (prevMill === undefined) delete (globalThis as any).mill; 251 + else (globalThis as any).mill = prevMill; 252 + }; 253 + 254 + (globalThis as any).mill = runtime; 250 255 251 256 let importPromise: Promise<unknown>; 252 257 try { ··· 254 259 // Prevent unhandled rejection if importPromise rejects before being awaited 255 260 importPromise.catch(() => {}); 256 261 } catch (e) { 257 - // Restore immediately on synchronous throw 258 - if (prev === undefined) delete (globalThis as any).factory; 259 - else (globalThis as any).factory = prev; 262 + restoreGlobals(); 260 263 restorePromise(); 261 264 restoreConsole(); 262 265 throw e; ··· 264 267 265 268 if (input.signal) { 266 269 if (input.signal.aborted) { 267 - if (prev === undefined) delete (globalThis as any).factory; 268 - else (globalThis as any).factory = prev; 270 + restoreGlobals(); 269 271 restorePromise(); 270 272 restoreConsole(); 271 273 throw new FactoryError({ ··· 284 286 await Promise.race([importPromise, cancelled]); 285 287 } finally { 286 288 if (onAbort) input.signal?.removeEventListener("abort", onAbort); 287 - if (prev === undefined) delete (globalThis as any).factory; 288 - else (globalThis as any).factory = prev; 289 + restoreGlobals(); 289 290 restorePromise(); 290 291 restoreConsole(); 291 292 } ··· 293 294 try { 294 295 await importPromise; 295 296 } finally { 296 - if (prev === undefined) delete (globalThis as any).factory; 297 - else (globalThis as any).factory = prev; 297 + restoreGlobals(); 298 298 restorePromise(); 299 299 restoreConsole(); 300 300 }

+63 -19

packages/pi-mill/index.ts

··· 24 24 runId: summary.runId, 25 25 status: summary.status, 26 26 task: (summary.metadata as any)?.task, 27 + mill: { 28 + command: (summary.metadata as any)?.millCommand, 29 + args: (summary.metadata as any)?.millArgs, 30 + runsDir: (summary.metadata as any)?.millRunsDir, 31 + }, 27 32 startedAt: summary.observability?.startedAt, 28 33 completedAt: summary.observability?.endedAt ?? Date.now(), 29 34 results: summary.results.map((r) => ({ ··· 42 47 } 43 48 44 49 /** Write a partial run.json so external monitors (pi --mill) can see active runs. */ 45 - function writeRunningMarker(runId: string, task: string, artifactsDir: string): void { 50 + function writeRunningMarker( 51 + runId: string, 52 + task: string, 53 + artifactsDir: string, 54 + millConfig: { command: string; args: string[]; runsDir?: string }, 55 + ): void { 46 56 try { 47 57 const data = { 48 58 runId, 49 59 status: "running", 50 60 task, 61 + mill: { 62 + command: millConfig.command, 63 + args: millConfig.args, 64 + runsDir: millConfig.runsDir, 65 + }, 51 66 startedAt: Date.now(), 52 67 results: [], 53 68 }; ··· 252 267 function loadHistoricalRuns(ctx: ExtensionContext, registry: RunRegistry): void { 253 268 const sessionDir = ctx.sessionManager.getSessionDir(); 254 269 if (!sessionDir) return; 255 - const factoryDir = path.join(sessionDir, ".factory"); 256 - if (!fs.existsSync(factoryDir)) return; 270 + const millDir = path.join(sessionDir, ".mill"); 271 + if (!fs.existsSync(millDir)) return; 257 272 try { 258 - for (const entry of fs.readdirSync(factoryDir)) { 259 - const runJsonPath = path.join(factoryDir, entry, "run.json"); 273 + for (const entry of fs.readdirSync(millDir)) { 274 + const runJsonPath = path.join(millDir, entry, "run.json"); 260 275 if (!fs.existsSync(runJsonPath)) continue; 261 276 try { 262 277 const data = JSON.parse(fs.readFileSync(runJsonPath, "utf-8")); 278 + const artifactsDir = path.join(millDir, entry); 263 279 registry.loadHistorical({ 264 280 runId: data.runId, 265 281 status: data.status ?? "done", ··· 268 284 status: data.status ?? "done", 269 285 results: data.results ?? [], 270 286 error: data.error, 271 - metadata: { task: data.task }, 287 + metadata: { 288 + task: data.task, 289 + millCommand: data.mill?.command, 290 + millArgs: data.mill?.args, 291 + millRunsDir: data.mill?.runsDir, 292 + }, 293 + observability: { 294 + status: data.status ?? "done", 295 + events: [], 296 + artifacts: [], 297 + artifactsDir, 298 + startedAt: data.startedAt ?? Date.now(), 299 + endedAt: data.completedAt, 300 + }, 272 301 }, 273 302 startedAt: data.startedAt ?? Date.now(), 274 303 completedAt: data.completedAt, ··· 297 326 millRunsDir: undefined, 298 327 /** Extra text appended to the tool description. Use for model selection hints, project conventions, etc. */ 299 328 prompt: 300 - "Use openai/gpt-5.3-codex for most subagent operations, especially if they entail making changes across multiple files. If you need to search you can use faster models like cerebras/zai-glm-4.7. If you need to look at and reason over images (a screenshot is referenced) use google-gemini-cli/gemini-3-flash-preview to see the changes.", 329 + "Use openai-codex/gpt-5.3-codex for most subagent operations, especially if they entail making changes across multiple files. If you need to search you can use faster models like cerebras/zai-glm-4.7. If you need to look at and reason over images (a screenshot is referenced) use google-gemini-cli/gemini-3-flash-preview to see the changes.", 301 330 }; 302 331 303 332 // ──────────────────────────────────────────────────────────────────────── ··· 393 422 }); 394 423 395 424 pi.on("session_shutdown", async () => { 396 - // Don't cancel active runs — children are detached and will continue 425 + // Don't cancel active runs on extension shutdown. 397 426 stopPolling(); 398 427 }); 399 428 ··· 428 457 label: "Subagent", 429 458 description: [ 430 459 "Spawn subagents for delegated or orchestrated work.", 431 - "Execution backend: mill (mill run --sync --json). Configure drivers/executors/models via mill.config.ts.", 460 + "Execution backend: mill async APIs (submit + watch + inspect). Configure drivers/executors/models via mill.config.ts.", 432 461 `Enabled models: ${modelsText}`, 433 - "Write a TypeScript script. `factory` is a global (like `process` or `console`). Use factory.spawn() to orchestrate agents.", 434 - "factory.spawn() returns a Promise<ExecutionResult>. Use `await` for sequential, `Promise.all` for parallel.", 462 + "Write a TypeScript script. `mill` is a global (like `process` or `console`). Use mill.spawn() to orchestrate agents.", 463 + "mill.spawn() returns a Promise<ExecutionResult>. Use `await` for sequential, `Promise.all` for parallel.", 435 464 "Each spawn needs: agent, systemPrompt, prompt, model. cwd defaults to process.cwd().", 436 465 "systemPrompt defines WHO the agent is (behavior, principles, methodology). prompt defines WHAT it should do now (specific files, specific work). Don't put task details in systemPrompt.", 437 466 "Context flow: each subagent gets the parent session path and can use search_thread to explore it. Each subagent's session is persisted and available via result.sessionPath. Result text is auto-populated on result.text.", 438 467 "Async by default: returns immediately with a runId. Results are delivered via notification when complete. Do NOT poll or check for results — just continue with other work and the notification will arrive automatically.", 439 468 "Model selection: use provider/model-id format (e.g. 'anthropic/claude-opus-4-6', 'cerebras/zai-glm-4.7'). Match model capability to task complexity. Use smaller/faster models for simple tasks, stronger models for complex reasoning. Vary your choices across the enabled models — don't default to one.", 440 - "Available types: Factory, ExecutionResult, SpawnInput, UsageStats.", 469 + "Available types: Mill, ExecutionResult, SpawnInput, UsageStats.", 441 470 ...(config.prompt ? [config.prompt] : []), 442 471 ].join(" "), 443 472 parameters: SubagentSchema, ··· 490 519 491 520 const abort = new AbortController(); 492 521 493 - // Don't wire the parent tool signal — subagent runs are detached and 494 - // should survive turn cancellation. Use "c" in /mill or pi --mill 495 - // to explicitly cancel a run. 522 + // Don't wire the parent tool signal — subagent runs should survive turn 523 + // cancellation. Use "c" in /mill or pi --mill to explicitly cancel a run. 496 524 497 525 const promise = executeProgram({ 498 526 ctx, ··· 522 550 523 551 // Write running marker so external monitors (pi --mill) see active runs 524 552 const runArtifactsDir = observability.get(runId)?.artifactsDir; 525 - if (runArtifactsDir) writeRunningMarker(runId, params.task, runArtifactsDir); 553 + if (runArtifactsDir) { 554 + writeRunningMarker(runId, params.task, runArtifactsDir, { 555 + command: config.millCommand, 556 + args: config.millArgs, 557 + runsDir: config.millRunsDir, 558 + }); 559 + } 526 560 527 561 // Wire completion: update observability, widget, and notify 528 562 promise.then( ··· 539 573 const fullSummary = { 540 574 ...summary, 541 575 observability: observability.toSummary(runId), 542 - metadata: { task: params.task }, 576 + metadata: { 577 + task: params.task, 578 + millCommand: config.millCommand, 579 + millArgs: config.millArgs, 580 + millRunsDir: config.millRunsDir, 581 + }, 543 582 }; 544 583 registry.complete(runId, fullSummary); 545 584 widget.update(registry.getVisible(), ctx); ··· 560 599 results: [], 561 600 error: details, 562 601 observability: observability.toSummary(runId), 563 - metadata: { task: params.task }, 602 + metadata: { 603 + task: params.task, 604 + millCommand: config.millCommand, 605 + millArgs: config.millArgs, 606 + millRunsDir: config.millRunsDir, 607 + }, 564 608 }; 565 609 registry.fail(runId, details); 566 610 notifyCompletion(pi, registry, failedSummary); ··· 585 629 ]; 586 630 if (artifactsDir) { 587 631 lines.push(`Artifacts: ${artifactsDir}`); 588 - lines.push(`Status: ${artifactsDir}/run.json (written on completion)`); 632 + lines.push(`Status: ${artifactsDir}/run.json (running marker + final summary)`); 589 633 lines.push(`Sessions: ${artifactsDir}/sessions/`); 590 634 } 591 635 return {

+9 -8

packages/pi-mill/monitor.ts

··· 524 524 if (matchesKey(data, "c")) { 525 525 const run = runs[this.selectedRunIndex]; 526 526 if (run && run.status === "running") { 527 - if (this.registry) { 528 - // In-session: use abort controller via registry 527 + const artifactsDir = run.summary.observability?.artifactsDir; 528 + 529 + if (this.registry && run.abort) { 530 + // In-session active run: cancel through abort controller. 531 + this.registry.cancel(run.runId); 532 + } else if (artifactsDir) { 533 + // Historical/standalone: cancel through persisted run metadata. 534 + cancelRunByPidFiles(artifactsDir); 535 + } else if (this.registry) { 529 536 this.registry.cancel(run.runId); 530 - } else { 531 - // Standalone mode: kill by PID files 532 - const artifactsDir = run.summary.observability?.artifactsDir; 533 - if (artifactsDir) { 534 - cancelRunByPidFiles(artifactsDir); 535 - } 536 537 } 537 538 } 538 539 return;

+1 -1

packages/pi-mill/observability.ts

··· 34 34 }; 35 35 if (withArtifacts) { 36 36 const base = sessionDir 37 - ? path.join(sessionDir, ".factory", runId) 37 + ? path.join(sessionDir, ".mill", runId) 38 38 : fs.mkdtempSync(path.join(os.tmpdir(), "pi-subagent-observe-")); 39 39 fs.mkdirSync(base, { recursive: true }); 40 40 record.artifactsDir = base;

+32 -5

packages/pi-mill/package.json

··· 1 1 { 2 - "name": "@mill/pi-mill", 3 - "version": "0.0.0", 4 - "description": "Pi extension for subagent orchestration powered by mill", 2 + "name": "pi-mill", 3 + "version": "0.1.0", 4 + "description": "Pi extension package that routes subagent execution through mill", 5 5 "keywords": [ 6 - "pi-package" 6 + "pi-package", 7 + "pi", 8 + "extension", 9 + "mill" 7 10 ], 8 11 "license": "MIT", 9 12 "type": "module", 13 + "homepage": "https://github.com/laulauland/mill/tree/main/packages/pi-mill", 14 + "repository": { 15 + "type": "git", 16 + "url": "git+https://github.com/laulauland/mill.git", 17 + "directory": "packages/pi-mill" 18 + }, 19 + "bugs": { 20 + "url": "https://github.com/laulauland/mill/issues" 21 + }, 22 + "files": [ 23 + "*.ts", 24 + "executors", 25 + ".pi-skills", 26 + "README.md", 27 + "package.json", 28 + "scripts" 29 + ], 30 + "scripts": { 31 + "clean": "rm -rf .pi-skills", 32 + "build": "echo 'nothing to build'", 33 + "check": "echo 'nothing to check'", 34 + "migrate:runs": "node ./scripts/migrate-factory-to-mill.mjs", 35 + "prepack": "node ./scripts/sync-skills.mjs" 36 + }, 10 37 "peerDependencies": { 11 38 "@mariozechner/pi-agent-core": "*", 12 39 "@mariozechner/pi-ai": "*", ··· 19 46 "./index.ts" 20 47 ], 21 48 "skills": [ 22 - "./skills" 49 + "./.pi-skills/mill/SKILL.md" 23 50 ] 24 51 } 25 52 }

+3 -2

packages/pi-mill/program-env.d.ts

··· 43 43 signal?: AbortSignal; 44 44 } 45 45 46 - interface Factory { 46 + interface Mill { 47 47 runId: string; 48 48 spawn(input: SpawnInput): Promise<ExecutionResult>; 49 49 shutdown(cancelRunning?: boolean): Promise<void>; ··· 53 53 }; 54 54 } 55 55 56 - declare const factory: Factory; 56 + /** Runtime API inside pi-mill programs. */ 57 + declare const mill: Mill; 57 58 declare const process: { 58 59 cwd(): string; 59 60 env: Record<string, string | undefined>;

+359 -82

packages/pi-mill/runtime.ts

··· 131 131 errorMessage?: string; 132 132 } 133 133 134 - interface MillRunSyncPayload { 134 + interface MillRunSubmitPayload { 135 + runId?: string; 136 + } 137 + 138 + interface MillRunInspectPayload { 135 139 run?: { 136 140 status?: string; 137 141 }; 138 142 result?: { 143 + status?: string; 144 + errorMessage?: string; 139 145 spawns?: ReadonlyArray<MillSpawnResult>; 140 146 }; 147 + } 148 + 149 + interface MillWatchEvent { 150 + type?: string; 151 + payload?: { 152 + message?: string; 153 + toolName?: string; 154 + }; 155 + } 156 + 157 + interface CommandCapture { 158 + code: number; 159 + stdout: string; 160 + stderr: string; 161 + combined: string; 162 + aborted: boolean; 141 163 } 142 164 143 165 function newUsage() { ··· 173 195 return undefined; 174 196 }; 175 197 198 + const parseJsonObjectFromLine = (line: string): Record<string, unknown> | undefined => { 199 + try { 200 + const parsed = JSON.parse(line) as unknown; 201 + if (typeof parsed === "object" && parsed !== null) { 202 + return parsed as Record<string, unknown>; 203 + } 204 + } catch { 205 + // ignore 206 + } 207 + 208 + return undefined; 209 + }; 210 + 211 + const formatCommand = (command: string, args: ReadonlyArray<string>): string => 212 + [command, ...args].join(" "); 213 + 214 + const appendCommandLog = ( 215 + logPath: string, 216 + command: string, 217 + args: ReadonlyArray<string>, 218 + output: CommandCapture, 219 + ): void => { 220 + const header = [ 221 + `> ${formatCommand(command, args)}`, 222 + `exit=${output.code}${output.aborted ? " (aborted)" : ""}`, 223 + ].join("\n"); 224 + const body = output.combined.trim(); 225 + const chunk = `${header}${body.length > 0 ? `\n${body}` : ""}\n\n`; 226 + fs.appendFileSync(logPath, chunk, "utf-8"); 227 + }; 228 + 229 + const runCommandCapture = (input: { 230 + command: string; 231 + args: ReadonlyArray<string>; 232 + cwd: string; 233 + env?: NodeJS.ProcessEnv; 234 + signal?: AbortSignal; 235 + onLine?: (line: string, stream: "stdout" | "stderr") => void; 236 + }): Promise<CommandCapture> => 237 + new Promise((resolve) => { 238 + let stdout = ""; 239 + let stderr = ""; 240 + let stdoutBuffer = ""; 241 + let stderrBuffer = ""; 242 + let aborted = input.signal?.aborted ?? false; 243 + let killTimer: ReturnType<typeof setTimeout> | undefined; 244 + 245 + const flushLines = (which: "stdout" | "stderr") => { 246 + let buffer = which === "stdout" ? stdoutBuffer : stderrBuffer; 247 + while (true) { 248 + const newlineIndex = buffer.indexOf("\n"); 249 + if (newlineIndex < 0) break; 250 + const line = buffer.slice(0, newlineIndex).trim(); 251 + buffer = buffer.slice(newlineIndex + 1); 252 + if (line.length > 0) input.onLine?.(line, which); 253 + } 254 + if (which === "stdout") stdoutBuffer = buffer; 255 + else stderrBuffer = buffer; 256 + }; 257 + 258 + const flushTail = () => { 259 + const outTail = stdoutBuffer.trim(); 260 + if (outTail.length > 0) input.onLine?.(outTail, "stdout"); 261 + const errTail = stderrBuffer.trim(); 262 + if (errTail.length > 0) input.onLine?.(errTail, "stderr"); 263 + stdoutBuffer = ""; 264 + stderrBuffer = ""; 265 + }; 266 + 267 + const child = spawn(input.command, [...input.args], { 268 + cwd: input.cwd, 269 + shell: false, 270 + stdio: ["ignore", "pipe", "pipe"], 271 + env: input.env, 272 + }); 273 + 274 + const abortChild = () => { 275 + aborted = true; 276 + child.kill("SIGTERM"); 277 + killTimer = setTimeout(() => { 278 + if (!child.killed) child.kill("SIGKILL"); 279 + }, 3000); 280 + }; 281 + 282 + if (input.signal?.aborted) { 283 + abortChild(); 284 + } else { 285 + input.signal?.addEventListener("abort", abortChild, { once: true }); 286 + } 287 + 288 + child.stdout?.on("data", (chunk: Buffer) => { 289 + const text = chunk.toString(); 290 + stdout += text; 291 + stdoutBuffer += text; 292 + flushLines("stdout"); 293 + }); 294 + 295 + child.stderr?.on("data", (chunk: Buffer) => { 296 + const text = chunk.toString(); 297 + stderr += text; 298 + stderrBuffer += text; 299 + flushLines("stderr"); 300 + }); 301 + 302 + child.on("error", (error) => { 303 + if (killTimer) clearTimeout(killTimer); 304 + input.signal?.removeEventListener("abort", abortChild); 305 + const errorText = error instanceof Error ? error.message : String(error); 306 + resolve({ 307 + code: 1, 308 + stdout, 309 + stderr: `${stderr}${stderr.length > 0 ? "\n" : ""}${errorText}`, 310 + combined: [stdout, stderr, errorText].filter((part) => part.trim().length > 0).join("\n"), 311 + aborted, 312 + }); 313 + }); 314 + 315 + child.on("close", (exitCode) => { 316 + if (killTimer) clearTimeout(killTimer); 317 + input.signal?.removeEventListener("abort", abortChild); 318 + flushTail(); 319 + const code = exitCode ?? 1; 320 + resolve({ 321 + code, 322 + stdout, 323 + stderr, 324 + combined: [stdout, stderr].filter((part) => part.trim().length > 0).join("\n"), 325 + aborted, 326 + }); 327 + }); 328 + }); 329 + 176 330 function writeMillProgram(input: { 177 331 systemPrompt: string; 178 332 prompt: string; ··· 194 348 } 195 349 196 350 const decodeMillResult = ( 197 - payload: MillRunSyncPayload, 351 + payload: MillRunInspectPayload, 198 352 fallback: { agent: string; modelId: string; prompt: string }, 199 353 ): ExecutionResult => { 200 354 const spawns = payload.result?.spawns; 201 355 if (!Array.isArray(spawns) || spawns.length === 0) { 356 + const failedStatus = payload.result?.status ?? payload.run?.status; 357 + const message = payload.result?.errorMessage; 202 358 throw new FactoryError({ 203 359 code: "RUNTIME", 204 - message: "mill run completed without spawn results.", 360 + message: 361 + message && message.length > 0 362 + ? `mill run ${failedStatus ?? "unknown"}: ${message}` 363 + : "mill run completed without spawn results.", 205 364 recoverable: false, 206 365 }); 207 366 } ··· 209 368 const selectedSpawn = 210 369 spawns.find((spawn) => spawn.agent === fallback.agent) ?? spawns[0] ?? ({} as MillSpawnResult); 211 370 212 - const runStatus = payload.run?.status; 371 + const runStatus = payload.result?.status ?? payload.run?.status; 213 372 const derivedExitCode = 214 373 typeof selectedSpawn.exitCode === "number" 215 374 ? selectedSpawn.exitCode ··· 227 386 usage: newUsage(), 228 387 model: selectedSpawn.model ?? fallback.modelId, 229 388 stopReason: selectedSpawn.stopReason, 230 - errorMessage: selectedSpawn.errorMessage, 389 + errorMessage: selectedSpawn.errorMessage ?? payload.result?.errorMessage, 231 390 step: undefined, 232 391 text: selectedSpawn.text ?? "", 233 392 sessionPath: selectedSpawn.sessionRef, 234 393 }; 235 394 }; 236 395 396 + const extractRunId = (payload: Record<string, unknown>): string | undefined => { 397 + const direct = payload.runId; 398 + if (typeof direct === "string" && direct.length > 0) { 399 + return direct; 400 + } 401 + 402 + const nestedRun = payload.run; 403 + if (typeof nestedRun === "object" && nestedRun !== null) { 404 + const nestedId = (nestedRun as { id?: unknown }).id; 405 + if (typeof nestedId === "string" && nestedId.length > 0) { 406 + return nestedId; 407 + } 408 + } 409 + 410 + return undefined; 411 + }; 412 + 237 413 export function spawnSubagent(input: SpawnInput): Promise<ExecutionResult> { 238 414 return runSubagentProcess(input); 239 415 } ··· 250 426 fs.mkdirSync(outputDir, { recursive: true }); 251 427 252 428 const stdoutPath = path.join(outputDir, `${input.taskId}.stdout.log`); 253 - const pidPath = path.join(outputDir, `${input.taskId}.pid`); 254 429 255 430 const result: ExecutionResult = { 256 431 taskId: input.taskId, ··· 280 455 modelId: input.modelId, 281 456 }); 282 457 283 - const args = [...input.millArgs, "run", tempProgram.filePath, "--sync", "--json"]; 284 - if (input.millRunsDir && input.millRunsDir.trim().length > 0) { 285 - args.push("--runs-dir", input.millRunsDir); 286 - } 458 + const childDepth = parseInt(process.env.PI_FACTORY_DEPTH || "0", 10) + 1; 459 + const childEnv = { ...process.env, PI_FACTORY_DEPTH: String(childDepth) }; 460 + 461 + let aborted = input.signal?.aborted ?? false; 462 + const handleAbort = () => { 463 + aborted = true; 464 + }; 465 + input.signal?.addEventListener("abort", handleAbort, { once: true }); 466 + 467 + let submittedRunId: string | undefined; 468 + let cancelRequested = false; 469 + 470 + const requestRunCancel = async (): Promise<void> => { 471 + if (cancelRequested || submittedRunId === undefined) return; 472 + cancelRequested = true; 473 + const cancelArgs = [...input.millArgs, "cancel", submittedRunId, "--json"]; 474 + if (input.millRunsDir && input.millRunsDir.trim().length > 0) { 475 + cancelArgs.push("--runs-dir", input.millRunsDir); 476 + } 287 477 288 - let aborted = false; 478 + const cancelled = await runCommandCapture({ 479 + command: input.millCommand, 480 + args: cancelArgs, 481 + cwd: input.cwd, 482 + env: process.env, 483 + }); 484 + appendCommandLog(stdoutPath, input.millCommand, cancelArgs, cancelled); 485 + }; 289 486 290 487 try { 291 - const code = await new Promise<number>((resolve) => { 292 - const stdoutFd = fs.openSync(stdoutPath, "w"); 293 - const childDepth = parseInt(process.env.PI_FACTORY_DEPTH || "0", 10) + 1; 294 - const proc = spawn(input.millCommand, args, { 295 - cwd: input.cwd, 296 - detached: true, 297 - stdio: ["ignore", stdoutFd, stdoutFd], 298 - shell: false, 299 - env: { ...process.env, PI_FACTORY_DEPTH: String(childDepth) }, 488 + const submitArgs = [...input.millArgs, "run", tempProgram.filePath, "--json"]; 489 + if (input.millRunsDir && input.millRunsDir.trim().length > 0) { 490 + submitArgs.push("--runs-dir", input.millRunsDir); 491 + } 492 + 493 + const submitted = await runCommandCapture({ 494 + command: input.millCommand, 495 + args: submitArgs, 496 + cwd: input.cwd, 497 + env: childEnv, 498 + signal: input.signal, 499 + }); 500 + appendCommandLog(stdoutPath, input.millCommand, submitArgs, submitted); 501 + 502 + if (submitted.aborted || aborted) { 503 + throw new FactoryError({ code: "CANCELLED", message: "Subagent aborted.", recoverable: true }); 504 + } 505 + 506 + if (submitted.code !== 0) { 507 + throw new FactoryError({ 508 + code: "RUNTIME", 509 + message: 510 + submitted.combined.trim().length > 0 511 + ? `mill run failed:\n${submitted.combined.trim()}` 512 + : "mill run failed.", 513 + recoverable: false, 300 514 }); 301 - proc.unref(); 302 - fs.closeSync(stdoutFd); 515 + } 303 516 304 - if (proc.pid != null) { 305 - fs.writeFileSync(pidPath, String(proc.pid), "utf-8"); 306 - } 517 + const submitPayload = parseJsonObjectFromText( 518 + [submitted.stdout, submitted.stderr].join("\n"), 519 + ) as MillRunSubmitPayload | Record<string, unknown> | undefined; 520 + if (!submitPayload) { 521 + throw new FactoryError({ 522 + code: "RUNTIME", 523 + message: "mill run did not return JSON submission payload.", 524 + recoverable: false, 525 + }); 526 + } 307 527 308 - let killTimer: ReturnType<typeof setTimeout> | undefined; 309 - const kill = () => { 310 - aborted = true; 311 - proc.kill("SIGTERM"); 312 - killTimer = setTimeout(() => { 313 - if (!proc.killed) proc.kill("SIGKILL"); 314 - }, 3000); 315 - }; 528 + submittedRunId = extractRunId(submitPayload as Record<string, unknown>); 529 + if (!submittedRunId) { 530 + throw new FactoryError({ 531 + code: "RUNTIME", 532 + message: "mill run submission payload is missing runId.", 533 + recoverable: false, 534 + }); 535 + } 316 536 317 - if (input.signal?.aborted) { 318 - kill(); 319 - } 320 - input.signal?.addEventListener("abort", kill, { once: true }); 537 + input.obs.push(input.runId, "info", `spawn_submitted:${input.taskId}`, { 538 + taskId: input.taskId, 539 + childRunId: submittedRunId, 540 + }); 321 541 322 - proc.on("close", (exitCode) => { 323 - if (killTimer) clearTimeout(killTimer); 324 - try { 325 - fs.unlinkSync(pidPath); 326 - } catch { 327 - // ignore 542 + let terminalEvent: string | undefined; 543 + const watchArgs = [...input.millArgs, "watch", submittedRunId, "--json"]; 544 + if (input.millRunsDir && input.millRunsDir.trim().length > 0) { 545 + watchArgs.push("--runs-dir", input.millRunsDir); 546 + } 547 + 548 + const watched = await runCommandCapture({ 549 + command: input.millCommand, 550 + args: watchArgs, 551 + cwd: input.cwd, 552 + env: process.env, 553 + signal: input.signal, 554 + onLine: (line, stream) => { 555 + const event = parseJsonObjectFromLine(line) as MillWatchEvent | undefined; 556 + if (!event || typeof event.type !== "string") { 557 + if (stream === "stderr") { 558 + input.obs.push(input.runId, "warning", `watch:${input.taskId}`, { line }); 559 + } 560 + return; 328 561 } 329 - resolve(exitCode ?? 1); 330 - }); 331 562 332 - proc.on("error", () => { 333 - if (killTimer) clearTimeout(killTimer); 334 - try { 335 - fs.unlinkSync(pidPath); 336 - } catch { 337 - // ignore 563 + if (event.type === "spawn:milestone") { 564 + input.obs.push(input.runId, "info", `spawn:milestone:${input.taskId}`, { 565 + message: event.payload?.message, 566 + }); 338 567 } 339 - resolve(1); 568 + 569 + if (event.type === "spawn:tool_call") { 570 + input.obs.push(input.runId, "info", `spawn:tool_call:${input.taskId}`, { 571 + toolName: event.payload?.toolName, 572 + }); 573 + } 574 + 575 + if ( 576 + event.type === "run:complete" || 577 + event.type === "run:failed" || 578 + event.type === "run:cancelled" 579 + ) { 580 + terminalEvent = event.type; 581 + } 582 + }, 583 + }); 584 + appendCommandLog(stdoutPath, input.millCommand, watchArgs, watched); 585 + 586 + if (watched.aborted || aborted) { 587 + await requestRunCancel(); 588 + throw new FactoryError({ code: "CANCELLED", message: "Subagent aborted.", recoverable: true }); 589 + } 590 + 591 + if (watched.code !== 0) { 592 + throw new FactoryError({ 593 + code: "RUNTIME", 594 + message: 595 + watched.combined.trim().length > 0 596 + ? `mill watch failed:\n${watched.combined.trim()}` 597 + : "mill watch failed.", 598 + recoverable: false, 340 599 }); 600 + } 601 + 602 + if (terminalEvent === "run:cancelled") { 603 + throw new FactoryError({ 604 + code: "CANCELLED", 605 + message: "Subagent run was cancelled.", 606 + recoverable: true, 607 + }); 608 + } 609 + 610 + const inspectArgs = [...input.millArgs, "inspect", submittedRunId, "--json"]; 611 + if (input.millRunsDir && input.millRunsDir.trim().length > 0) { 612 + inspectArgs.push("--runs-dir", input.millRunsDir); 613 + } 614 + 615 + const inspected = await runCommandCapture({ 616 + command: input.millCommand, 617 + args: inspectArgs, 618 + cwd: input.cwd, 619 + env: process.env, 620 + signal: input.signal, 341 621 }); 622 + appendCommandLog(stdoutPath, input.millCommand, inspectArgs, inspected); 342 623 343 - const output = fs.existsSync(stdoutPath) ? fs.readFileSync(stdoutPath, "utf-8") : ""; 344 - const parsed = parseJsonObjectFromText(output) as MillRunSyncPayload | undefined; 624 + if (inspected.aborted || aborted) { 625 + await requestRunCancel(); 626 + throw new FactoryError({ code: "CANCELLED", message: "Subagent aborted.", recoverable: true }); 627 + } 345 628 346 - if (!parsed) { 347 - result.stderr = output.trim(); 348 - if (aborted) { 349 - throw new FactoryError({ 350 - code: "CANCELLED", 351 - message: "Subagent aborted.", 352 - recoverable: true, 353 - }); 354 - } 629 + if (inspected.code !== 0) { 355 630 throw new FactoryError({ 356 631 code: "RUNTIME", 357 632 message: 358 - result.stderr.length > 0 359 - ? `mill output was not valid JSON:\n${result.stderr}` 360 - : "mill output was empty.", 633 + inspected.combined.trim().length > 0 634 + ? `mill inspect failed:\n${inspected.combined.trim()}` 635 + : "mill inspect failed.", 636 + recoverable: false, 637 + }); 638 + } 639 + 640 + const parsed = parseJsonObjectFromText([inspected.stdout, inspected.stderr].join("\n")); 641 + if (!parsed) { 642 + throw new FactoryError({ 643 + code: "RUNTIME", 644 + message: "mill inspect output was not valid JSON.", 361 645 recoverable: false, 362 646 }); 363 647 } 364 648 365 - const decoded = decodeMillResult(parsed, { 649 + const decoded = decodeMillResult(parsed as MillRunInspectPayload, { 366 650 agent: input.agent, 367 651 modelId: input.modelId, 368 652 prompt: input.prompt, ··· 376 660 result.errorMessage = decoded.errorMessage; 377 661 result.text = decoded.text; 378 662 result.sessionPath = decoded.sessionPath; 379 - result.stderr = code === 0 ? "" : output.trim(); 380 - 381 - if (aborted) { 382 - throw new FactoryError({ 383 - code: "CANCELLED", 384 - message: "Subagent aborted.", 385 - recoverable: true, 386 - }); 387 - } 663 + result.stderr = ""; 388 664 389 665 input.onProgress?.({ ...result, messages: [] }); 390 666 return result; 391 667 } finally { 668 + input.signal?.removeEventListener("abort", handleAbort); 392 669 try { 393 670 fs.rmSync(tempProgram.dir, { recursive: true, force: true }); 394 671 } catch { ··· 397 674 } 398 675 } 399 676 400 - // ── Factory (program runtime) ────────────────────────────────────────── 677 + // ── Mill runtime (program host API) ───────────────────────────────────── 401 678 402 679 export interface RuntimeSpawnInput { 403 680 agent: string; ··· 410 687 signal?: AbortSignal; 411 688 } 412 689 413 - export interface Factory { 690 + export interface MillRuntime { 414 691 runId: string; 415 692 spawn(input: RuntimeSpawnInput): SpawnPromise; 416 693 shutdown(cancelRunning?: boolean): Promise<void>; ··· 431 708 return model; 432 709 } 433 710 434 - export function createFactory( 711 + export function createMillRuntime( 435 712 ctx: ExtensionContext, 436 713 runId: string, 437 714 obs: ObservabilityStore, ··· 444 721 millArgs?: string[]; 445 722 millRunsDir?: string; 446 723 }, 447 - ): Factory { 724 + ): MillRuntime { 448 725 let spawnCounter = 0; 449 726 const runtimeAbort = new AbortController(); 450 727 const activeTasks = new Map< ··· 456 733 const millArgs = options?.millArgs ?? []; 457 734 const millRunsDir = options?.millRunsDir ?? process.env.PI_FACTORY_MILL_RUNS_DIR; 458 735 459 - const factory: Factory = { 736 + const millRuntime: MillRuntime = { 460 737 runId, 461 738 462 739 spawn({ agent, systemPrompt, prompt, cwd, model, tools, step, signal }) { ··· 545 822 }, 546 823 }; 547 824 548 - return factory; 825 + return millRuntime; 549 826 } 550 827 551 828 // ── Preflight typecheck ────────────────────────────────────────────────

+66 -14

packages/pi-mill/scanner.ts

··· 1 + import { spawnSync } from "node:child_process"; 1 2 import * as fs from "node:fs"; 2 3 import * as path from "node:path"; 3 4 import * as os from "node:os"; ··· 6 7 7 8 /** 8 9 * Filesystem scanner for standalone --mill mode. 9 - * Reads run.json files from ~/.pi/agent/sessions/<session-dir>/.factory/<run-id>/run.json 10 + * Reads run.json files from ~/.pi/agent/sessions/<session-dir>/.mill/<run-id>/run.json 10 11 */ 11 12 12 13 /** Convert a cwd path to the session directory name pi uses. */ ··· 22 23 task?: string; 23 24 startedAt?: number; 24 25 completedAt?: number; 26 + mill?: { 27 + command?: string; 28 + args?: string[]; 29 + runsDir?: string; 30 + }; 25 31 results?: Array<{ 26 32 agent: string; 27 33 task: string; ··· 37 43 } 38 44 39 45 /** Parse a single run.json into a RunRecord (without promise/abort). */ 40 - function parseRunJson(data: RunJsonData): Omit<RunRecord, "promise" | "abort"> { 46 + function parseRunJson( 47 + data: RunJsonData, 48 + artifactsDir: string, 49 + ): Omit<RunRecord, "promise" | "abort"> { 41 50 const status: RunStatus = data.status ?? "done"; 42 51 const results: ExecutionResult[] = (data.results ?? []).map((r) => ({ 43 52 taskId: "", ··· 67 76 status, 68 77 results, 69 78 error: data.error as RunSummary["error"], 70 - metadata: { task: data.task }, 79 + metadata: { 80 + task: data.task, 81 + millCommand: data.mill?.command, 82 + millArgs: data.mill?.args, 83 + millRunsDir: data.mill?.runsDir, 84 + }, 85 + observability: { 86 + status, 87 + events: [], 88 + artifacts: [], 89 + artifactsDir, 90 + startedAt: data.startedAt ?? Date.now(), 91 + endedAt: data.completedAt, 92 + }, 71 93 }; 72 94 73 95 return { ··· 87 109 } 88 110 89 111 /** 90 - * Scan all run.json files under a session's .factory directory. 112 + * Scan all run.json files under a session's .mill directory. 91 113 * If sessionDirName is provided, scans only that session. 92 114 * If not provided, scans all sessions. 93 115 */ ··· 100 122 const sessionDirs = sessionDirName ? [sessionDirName] : listSessionDirs(sessionsBase); 101 123 102 124 for (const dir of sessionDirs) { 103 - const factoryDir = path.join(sessionsBase, dir, ".factory"); 104 - if (!fs.existsSync(factoryDir)) continue; 125 + const millDir = path.join(sessionsBase, dir, ".mill"); 126 + if (!fs.existsSync(millDir)) continue; 105 127 106 128 try { 107 - for (const entry of fs.readdirSync(factoryDir)) { 108 - const runJsonPath = path.join(factoryDir, entry, "run.json"); 129 + for (const entry of fs.readdirSync(millDir)) { 130 + const runJsonPath = path.join(millDir, entry, "run.json"); 109 131 if (!fs.existsSync(runJsonPath)) continue; 110 132 try { 111 133 const raw = fs.readFileSync(runJsonPath, "utf-8"); 112 134 const data: RunJsonData = JSON.parse(raw); 113 - records.push(parseRunJson(data)); 135 + records.push(parseRunJson(data, path.join(millDir, entry))); 114 136 } catch { 115 137 // Skip malformed run.json files 116 138 } ··· 145 167 } 146 168 147 169 /** 148 - * Cancel all running subagents for a run by scanning for PID files in the run's sessions directory. 149 - * Returns the number of processes signalled. 170 + * Cancel a run using metadata from run.json (preferred), with PID-kill fallback. 171 + * Returns the number of cancellation actions attempted. 150 172 */ 151 173 export function cancelRunByPidFiles(artifactsDir: string): number { 152 - const sessionsDir = path.join(artifactsDir, "sessions"); 153 174 let cancelled = 0; 175 + 154 176 try { 155 - if (!fs.existsSync(sessionsDir)) return 0; 177 + const runJsonPath = path.join(artifactsDir, "run.json"); 178 + if (fs.existsSync(runJsonPath)) { 179 + const data: RunJsonData = JSON.parse(fs.readFileSync(runJsonPath, "utf-8")); 180 + if (typeof data.runId === "string" && data.runId.length > 0) { 181 + const command = data.mill?.command?.trim() || "mill"; 182 + const args = [...(data.mill?.args ?? []), "cancel", data.runId]; 183 + if (data.mill?.runsDir && data.mill.runsDir.trim().length > 0) { 184 + args.push("--runs-dir", data.mill.runsDir); 185 + } 186 + 187 + const result = spawnSync(command, args, { 188 + stdio: "ignore", 189 + shell: false, 190 + }); 191 + 192 + if (result.status === 0) { 193 + cancelled++; 194 + return cancelled; 195 + } 196 + } 197 + } 198 + } catch { 199 + // fall through to PID fallback 200 + } 201 + 202 + const sessionsDir = path.join(artifactsDir, "sessions"); 203 + try { 204 + if (!fs.existsSync(sessionsDir)) return cancelled; 156 205 for (const entry of fs.readdirSync(sessionsDir)) { 157 206 if (!entry.endsWith(".pid")) continue; 158 207 const taskId = entry.replace(/\.pid$/, ""); 159 208 if (cancelByPidFile(sessionsDir, taskId)) cancelled++; 160 209 } 161 - } catch {} 210 + } catch { 211 + // ignore fallback failures 212 + } 213 + 162 214 return cancelled; 163 215 } 164 216

+219

packages/pi-mill/scripts/migrate-factory-to-mill.mjs

··· 1 + #!/usr/bin/env node 2 + import fs from "node:fs"; 3 + import os from "node:os"; 4 + import path from "node:path"; 5 + 6 + const args = process.argv.slice(2); 7 + 8 + const getArgValue = (name) => { 9 + const index = args.indexOf(name); 10 + if (index < 0) return undefined; 11 + return args[index + 1]; 12 + }; 13 + 14 + const hasArg = (name) => args.includes(name); 15 + 16 + const sessionsDir = 17 + getArgValue("--sessions-dir") ?? path.join(os.homedir(), ".pi", "agent", "sessions"); 18 + const dryRun = hasArg("--dry-run"); 19 + const mode = getArgValue("--mode") ?? "copy"; // copy | move 20 + const overwrite = hasArg("--overwrite"); 21 + const cleanup = hasArg("--cleanup"); 22 + 23 + if (hasArg("--help") || hasArg("-h")) { 24 + console.log(`migrate-factory-to-mill 25 + 26 + Migrates pi-mill run artifacts from: 27 + <session>/.factory/<runId> 28 + to: 29 + <session>/.mill/<runId> 30 + 31 + Usage: 32 + node ./scripts/migrate-factory-to-mill.mjs [options] 33 + 34 + Options: 35 + --sessions-dir <path> Sessions root (default: ~/.pi/agent/sessions) 36 + --mode <copy|move> copy (default) or move 37 + --overwrite Overwrite files if destination exists 38 + --cleanup Remove empty .factory directories after migration 39 + --dry-run Print actions without changing files 40 + -h, --help Show help 41 + `); 42 + process.exit(0); 43 + } 44 + 45 + if (mode !== "copy" && mode !== "move") { 46 + console.error(`Invalid --mode '${mode}'. Use 'copy' or 'move'.`); 47 + process.exit(1); 48 + } 49 + 50 + const exists = (p) => { 51 + try { 52 + fs.accessSync(p); 53 + return true; 54 + } catch { 55 + return false; 56 + } 57 + }; 58 + 59 + const isDirectory = (p) => { 60 + try { 61 + return fs.statSync(p).isDirectory(); 62 + } catch { 63 + return false; 64 + } 65 + }; 66 + 67 + const ensureDir = (dir) => { 68 + if (dryRun) return; 69 + fs.mkdirSync(dir, { recursive: true }); 70 + }; 71 + 72 + const copyOrMoveRecursive = (src, dst, options) => { 73 + const { overwriteFiles, moveFiles } = options; 74 + const stat = fs.statSync(src); 75 + 76 + if (stat.isDirectory()) { 77 + ensureDir(dst); 78 + for (const entry of fs.readdirSync(src)) { 79 + copyOrMoveRecursive(path.join(src, entry), path.join(dst, entry), options); 80 + } 81 + if (moveFiles) { 82 + const remaining = fs.readdirSync(src); 83 + if (remaining.length === 0 && !dryRun) { 84 + fs.rmdirSync(src); 85 + } 86 + } 87 + return; 88 + } 89 + 90 + ensureDir(path.dirname(dst)); 91 + 92 + if (exists(dst) && !overwriteFiles) { 93 + return; 94 + } 95 + 96 + if (moveFiles) { 97 + if (dryRun) return; 98 + 99 + try { 100 + fs.renameSync(src, dst); 101 + } catch { 102 + fs.copyFileSync(src, dst); 103 + fs.rmSync(src, { force: true }); 104 + } 105 + return; 106 + } 107 + 108 + if (!dryRun) { 109 + fs.copyFileSync(src, dst); 110 + } 111 + }; 112 + 113 + const stat = { 114 + sessionsScanned: 0, 115 + sessionsTouched: 0, 116 + runDirsMigrated: 0, 117 + runDirsMerged: 0, 118 + filesOverwritten: 0, 119 + skippedNoFactory: 0, 120 + }; 121 + 122 + if (!exists(sessionsDir) || !isDirectory(sessionsDir)) { 123 + console.error(`Sessions directory not found: ${sessionsDir}`); 124 + process.exit(1); 125 + } 126 + 127 + const sessionEntries = fs.readdirSync(sessionsDir).filter((entry) => 128 + isDirectory(path.join(sessionsDir, entry)), 129 + ); 130 + 131 + for (const sessionName of sessionEntries) { 132 + stat.sessionsScanned += 1; 133 + 134 + const sessionPath = path.join(sessionsDir, sessionName); 135 + const sourceRoot = path.join(sessionPath, ".factory"); 136 + const targetRoot = path.join(sessionPath, ".mill"); 137 + 138 + if (!exists(sourceRoot) || !isDirectory(sourceRoot)) { 139 + stat.skippedNoFactory += 1; 140 + continue; 141 + } 142 + 143 + const runEntries = fs.readdirSync(sourceRoot).filter((entry) => 144 + isDirectory(path.join(sourceRoot, entry)), 145 + ); 146 + 147 + if (runEntries.length === 0) { 148 + continue; 149 + } 150 + 151 + stat.sessionsTouched += 1; 152 + ensureDir(targetRoot); 153 + 154 + for (const runId of runEntries) { 155 + const srcRunDir = path.join(sourceRoot, runId); 156 + const dstRunDir = path.join(targetRoot, runId); 157 + 158 + const destinationExists = exists(dstRunDir); 159 + 160 + if (destinationExists) { 161 + stat.runDirsMerged += 1; 162 + } else { 163 + stat.runDirsMigrated += 1; 164 + } 165 + 166 + if (dryRun) { 167 + console.log( 168 + `[dry-run] ${mode} ${srcRunDir} -> ${dstRunDir}${destinationExists ? " (merge)" : ""}`, 169 + ); 170 + } 171 + 172 + if (overwrite && destinationExists) { 173 + const walk = (p) => { 174 + for (const entry of fs.readdirSync(p)) { 175 + const full = path.join(p, entry); 176 + const s = fs.statSync(full); 177 + if (s.isDirectory()) walk(full); 178 + else { 179 + const rel = path.relative(srcRunDir, full); 180 + const dst = path.join(dstRunDir, rel); 181 + if (exists(dst)) stat.filesOverwritten += 1; 182 + } 183 + } 184 + }; 185 + walk(srcRunDir); 186 + } 187 + 188 + copyOrMoveRecursive(srcRunDir, dstRunDir, { 189 + overwriteFiles: overwrite, 190 + moveFiles: mode === "move", 191 + }); 192 + } 193 + 194 + if (cleanup && mode === "move") { 195 + try { 196 + const leftovers = fs.readdirSync(sourceRoot); 197 + if (leftovers.length === 0) { 198 + if (dryRun) { 199 + console.log(`[dry-run] remove ${sourceRoot}`); 200 + } else { 201 + fs.rmdirSync(sourceRoot); 202 + } 203 + } 204 + } catch { 205 + // ignore 206 + } 207 + } 208 + } 209 + 210 + console.log("\nMigration summary:"); 211 + console.log(`- sessions scanned: ${stat.sessionsScanned}`); 212 + console.log(`- sessions touched: ${stat.sessionsTouched}`); 213 + console.log(`- run dirs migrated (new): ${stat.runDirsMigrated}`); 214 + console.log(`- run dirs merged (already existed): ${stat.runDirsMerged}`); 215 + if (overwrite) { 216 + console.log(`- files overwritten: ${stat.filesOverwritten}`); 217 + } 218 + console.log(`- sessions without .factory: ${stat.skippedNoFactory}`); 219 + console.log(`- mode: ${mode}${dryRun ? " (dry-run)" : ""}`);

+35

packages/pi-mill/scripts/sync-skills.mjs

··· 1 + import fs from "node:fs"; 2 + import path from "node:path"; 3 + import { fileURLToPath } from "node:url"; 4 + 5 + const __dirname = path.dirname(fileURLToPath(import.meta.url)); 6 + const packageDir = path.resolve(__dirname, ".."); 7 + const sourceSkillDir = path.resolve(packageDir, "../skills/mill"); 8 + const targetRootDir = path.resolve(packageDir, ".pi-skills"); 9 + const targetSkillDir = path.resolve(targetRootDir, "mill"); 10 + 11 + const copyRecursive = (from, to) => { 12 + const stat = fs.statSync(from); 13 + 14 + if (stat.isDirectory()) { 15 + fs.mkdirSync(to, { recursive: true }); 16 + 17 + for (const entry of fs.readdirSync(from)) { 18 + copyRecursive(path.join(from, entry), path.join(to, entry)); 19 + } 20 + 21 + return; 22 + } 23 + 24 + fs.mkdirSync(path.dirname(to), { recursive: true }); 25 + fs.copyFileSync(from, to); 26 + }; 27 + 28 + if (!fs.existsSync(path.join(sourceSkillDir, "SKILL.md"))) { 29 + throw new Error(`Missing source skill at ${sourceSkillDir}/SKILL.md`); 30 + } 31 + 32 + fs.rmSync(targetRootDir, { recursive: true, force: true }); 33 + copyRecursive(sourceSkillDir, targetSkillDir); 34 + 35 + console.log(`Synced skills from ${sourceSkillDir} -> ${targetSkillDir}`);

-225

packages/pi-mill/skills/mill-basics/SKILL.md

··· 1 - --- 2 - name: mill-basics 3 - description: "Write pi-mill programs to orchestrate multi-agent workflows. Use when spawning subagents, coordinating parallel/sequential tasks, building agent-driven automation, or applying common orchestration patterns like fan-out, pipelines, and synthesis." 4 - --- 5 - 6 - # Mill Basics 7 - 8 - Pi-mill enables writing scripts that orchestrate multiple AI agents. Scripts use the `factory` global to spawn subagents, coordinate work, and compose results. 9 - 10 - ## systemPrompt vs prompt 11 - 12 - These two fields have distinct roles — don't mix them: 13 - 14 - - **systemPrompt**: Defines WHO the agent is and HOW it should behave. Personality, methodology, principles, output format, tool usage conventions. 15 - - **prompt**: Defines WHAT the agent should do right now. The concrete assignment — specific files to read, bugs to fix, features to implement, commands to run. 16 - 17 - ```typescript 18 - // BAD: work leaked into systemPrompt 19 - { systemPrompt: "Review src/auth/ for security issues", prompt: "Do the review" } 20 - 21 - // BAD: systemPrompt is too weak 22 - { systemPrompt: "Lint.", prompt: "Run lint on src/ and fix errors" } 23 - 24 - // GOOD: clean separation 25 - { 26 - systemPrompt: "You are a security-focused code reviewer. Look for OWASP Top 10 vulnerabilities, injection flaws, and auth bypasses. Report findings with severity ratings.", 27 - prompt: "Review src/auth/ for security issues. Focus on the login flow and session management." 28 - } 29 - ``` 30 - 31 - ## Program Structure 32 - 33 - Factory programs are top-level TypeScript scripts. The `factory` global is available — no imports or exports needed: 34 - 35 - ```typescript 36 - const result = await factory.spawn({ 37 - agent: "researcher", 38 - systemPrompt: 39 - "You are a research assistant. You find accurate, up-to-date information and cite sources. You present findings in a structured format.", 40 - prompt: 41 - "Find information about TypeScript 5.0 — new features, breaking changes, and migration notes.", 42 - model: "anthropic/claude-opus-4-6", 43 - }); 44 - 45 - console.log(result.text); 46 - ``` 47 - 48 - The script runs as a module — use `await` at top level, `Promise.all` for parallelism, and standard imports. 49 - 50 - ## Mill API 51 - 52 - ### spawn 53 - 54 - Create a subagent task: 55 - 56 - ```typescript 57 - const result = await factory.spawn({ 58 - agent: "code-reviewer", // Role label (for logging/display) 59 - systemPrompt: "You review code...", // WHO: behavior, principles, methodology 60 - prompt: "Review main.ts for...", // WHAT: the specific work to do now 61 - model: "anthropic/claude-opus-4-6", // Model in provider/model-id format 62 - cwd: "/path/to/project", // Working directory (defaults to process.cwd()) 63 - step: 1, // Optional step number 64 - signal: abortSignal, // Optional cancellation 65 - }); 66 - ``` 67 - 68 - Returns `Promise<ExecutionResult>`. Use `await` for one agent, `Promise.all` for parallel execution. 69 - 70 - ### Parallel execution 71 - 72 - ```typescript 73 - const [security, coverage] = await Promise.all([ 74 - factory.spawn({ 75 - agent: "security", 76 - systemPrompt: "You are a security reviewer...", 77 - prompt: "Review src/auth/", 78 - model: "anthropic/claude-opus-4-6", 79 - }), 80 - factory.spawn({ 81 - agent: "coverage", 82 - systemPrompt: "You analyze test coverage...", 83 - prompt: "Check coverage for src/auth/", 84 - model: "anthropic/claude-sonnet-4-6", 85 - }), 86 - ]); 87 - ``` 88 - 89 - ### Observe 90 - 91 - ```typescript 92 - factory.observe.log("info", "Starting analysis", { fileCount: 42 }); 93 - factory.observe.log("warning", "Slow response", { duration: 5000 }); 94 - factory.observe.log("error", "Task failed", { taskId: "task-3" }); 95 - 96 - const artifactPath = factory.observe.artifact("summary.md", reportContent); 97 - ``` 98 - 99 - ### Shutdown 100 - 101 - ```typescript 102 - await factory.shutdown(true); // Cancel all running tasks 103 - await factory.shutdown(false); // Wait for running tasks to complete naturally 104 - ``` 105 - 106 - ## Execution Results 107 - 108 - Each subagent returns an `ExecutionResult`: 109 - 110 - ```typescript 111 - interface ExecutionResult { 112 - taskId: string; 113 - agent: string; 114 - task: string; // Original execution prompt string 115 - exitCode: number; 116 - 117 - text: string; 118 - sessionPath?: string; 119 - 120 - messages: unknown[]; 121 - 122 - usage: UsageStats; 123 - model?: string; 124 - stopReason?: string; 125 - errorMessage?: string; 126 - stderr: string; 127 - 128 - step?: number; 129 - } 130 - ``` 131 - 132 - ## Context flow 133 - 134 - ### Context DOWN (Parent -> Subagent) 135 - 136 - The parent session path is appended to the subagent system prompt automatically. Subagents can use `search_thread` to read parent context. 137 - 138 - ### Context UP (Subagent -> Program) 139 - 140 - 1. `result.text` for quick chaining 141 - 2. `result.sessionPath` for deep review 142 - 143 - ```typescript 144 - const research = await factory.spawn({ 145 - agent: "researcher", 146 - systemPrompt: "You are a thorough technical researcher.", 147 - prompt: "Research Rust async patterns and common pitfalls.", 148 - model: "anthropic/claude-opus-4-6", 149 - }); 150 - 151 - const summary = await factory.spawn({ 152 - agent: "summarizer", 153 - systemPrompt: "You write concise executive summaries.", 154 - prompt: `Summarize this research:\n\n${research.text}`, 155 - model: "anthropic/claude-haiku-4-5", 156 - }); 157 - 158 - const review = await factory.spawn({ 159 - agent: "reviewer", 160 - systemPrompt: "You are a technical reviewer. Verify claims and flag unsupported assertions.", 161 - prompt: `Review research session at ${research.sessionPath} for technical accuracy.`, 162 - model: "anthropic/claude-opus-4-6", 163 - }); 164 - ``` 165 - 166 - ## Error handling 167 - 168 - Check `exitCode` / `stopReason` / `errorMessage` and escalate: 169 - 170 - ```typescript 171 - const result = await factory.spawn({ ... }); 172 - 173 - const failed = 174 - result.exitCode !== 0 || 175 - result.stopReason === "error" || 176 - Boolean(result.errorMessage); 177 - 178 - if (failed) { 179 - factory.observe.log("error", "Task failed", { 180 - taskId: result.taskId, 181 - exitCode: result.exitCode, 182 - stopReason: result.stopReason, 183 - error: result.errorMessage, 184 - stderr: result.stderr, 185 - }); 186 - throw new Error(`Task ${result.taskId} failed: ${result.errorMessage || "unknown error"}`); 187 - } 188 - ``` 189 - 190 - ## Async model 191 - 192 - Programs run asynchronously by default when invoked via tool call: immediate `runId`, completion via notification. 193 - 194 - Inside your program, use `await` and `Promise.all` normally: 195 - 196 - ```typescript 197 - const [r1, r2] = await Promise.all([ 198 - factory.spawn({ 199 - agent: "a", 200 - systemPrompt: "...", 201 - prompt: "...", 202 - model: "anthropic/claude-opus-4-6", 203 - }), 204 - factory.spawn({ agent: "b", systemPrompt: "...", prompt: "...", model: "cerebras/zai-glm-4.7" }), 205 - ]); 206 - console.log(r1.text, r2.text); 207 - ``` 208 - 209 - ## Detached processes 210 - 211 - Subagent processes are detached: 212 - 213 - - Closing pi or cancelling a turn does **not** kill running subagents 214 - - Output is written to `.stdout.jsonl` files 215 - - PID files enable cancel via `/mill` or `pi --mill` 216 - 217 - ## Key principles 218 - 219 - 1. Programs coordinate, subagents execute 220 - 2. Use `result.text` for fast chaining 221 - 3. Use `result.sessionPath` for deep context 222 - 4. Check failure signals (`exitCode`, `stopReason`, `errorMessage`) 223 - 5. Log progress with `factory.observe.log()` 224 - 225 - See [patterns.md](patterns.md) for common orchestration patterns.

+8 -8

packages/pi-mill/skills/mill-basics/patterns.md packages/skills/mill/references/patterns.md

··· 8 8 9 9 ```ts 10 10 const results = await Promise.all([ 11 - factory.spawn({ 11 + mill.spawn({ 12 12 agent: "security", 13 13 systemPrompt: 14 14 "You are a security reviewer. You look for injection flaws, auth bypasses, and data exposure. Report findings with severity ratings.", ··· 16 16 model: "anthropic/claude-opus-4-6", 17 17 step: 0, 18 18 }), 19 - factory.spawn({ 19 + mill.spawn({ 20 20 agent: "perf", 21 21 systemPrompt: 22 22 "You are a performance analyst. You identify bottlenecks, unnecessary allocations, and O(n²) patterns.", ··· 32 32 Each step feeds into the next via `result.text`: 33 33 34 34 ```ts 35 - const analysis = await factory.spawn({ 35 + const analysis = await mill.spawn({ 36 36 agent: "analyzer", 37 37 systemPrompt: 38 38 "You analyze codebases systematically. You map structure, dependencies, and public interfaces.", ··· 41 41 step: 0, 42 42 }); 43 43 44 - const plan = await factory.spawn({ 44 + const plan = await mill.spawn({ 45 45 agent: "planner", 46 46 systemPrompt: "You design thorough test plans. You prioritize critical paths and edge cases.", 47 47 prompt: `Design integration tests covering the API endpoints found:\n\n${analysis.text}`, ··· 56 56 57 57 ```ts 58 58 const reviews = await Promise.all([ 59 - factory.spawn({ 59 + mill.spawn({ 60 60 agent: "frontend", 61 61 systemPrompt: 62 62 "You are a frontend specialist. You review UI code for accessibility, performance, and UX issues.", ··· 64 64 model: "anthropic/claude-sonnet-4-6", 65 65 step: 0, 66 66 }), 67 - factory.spawn({ 67 + mill.spawn({ 68 68 agent: "backend", 69 69 systemPrompt: 70 70 "You are a backend specialist. You review server code for correctness, scalability, and error handling.", ··· 72 72 model: "mistral/devstral-2512", 73 73 step: 1, 74 74 }), 75 - factory.spawn({ 75 + mill.spawn({ 76 76 agent: "infra", 77 77 systemPrompt: 78 78 "You are an infrastructure specialist. You review configs, deployments, and operational concerns.", ··· 83 83 ]); 84 84 85 85 const context = reviews.map((r) => `[${r.agent}]\n${r.text}`).join("\n\n"); 86 - const summary = await factory.spawn({ 86 + const summary = await mill.spawn({ 87 87 agent: "synthesizer", 88 88 systemPrompt: 89 89 "You synthesize multiple perspectives into clear, actionable summaries. You deduplicate, prioritize, and highlight conflicts.",

+31 -31

packages/pi-mill/skills/mill-ralph-loop/SKILL.md packages/skills/mill/references/ralph-loop.md

··· 24 24 25 25 while (!done && iteration < maxIterations) { 26 26 iteration++; 27 - factory.observe.log("info", `Iteration ${iteration}`, { maxIterations }); 27 + mill.observe.log("info", `Iteration ${iteration}`, { maxIterations }); 28 28 29 - const result = await factory.spawn({ 29 + const result = await mill.spawn({ 30 30 agent: "worker", 31 31 systemPrompt: "You are fixing issues iteratively", 32 32 prompt: "Fix the next issue", ··· 38 38 done = result.exitCode === 0 && result.text.includes("all clean"); 39 39 40 40 if (result.exitCode !== 0) { 41 - factory.observe.log("error", "Agent failed", { iteration, error: result.errorMessage }); 41 + mill.observe.log("error", "Agent failed", { iteration, error: result.errorMessage }); 42 42 break; 43 43 } 44 44 } ··· 63 63 }); 64 64 65 65 if (lintResult.status === 0) { 66 - factory.observe.log("info", "Lint clean!", { iterations: iteration }); 66 + mill.observe.log("info", "Lint clean!", { iterations: iteration }); 67 67 break; 68 68 } 69 69 70 - factory.observe.log("info", `Iteration ${iteration}`, { 70 + mill.observe.log("info", `Iteration ${iteration}`, { 71 71 exitCode: lintResult.status, 72 72 errorCount: (lintResult.stdout.match(/error/gi) || []).length, 73 73 }); 74 74 75 - const result = await factory.spawn({ 75 + const result = await mill.spawn({ 76 76 agent: "linter", 77 77 systemPrompt: `You fix lint errors iteratively. 78 78 Run 'npm run lint' to see current errors. ··· 84 84 }); 85 85 86 86 if (result.exitCode !== 0) { 87 - factory.observe.log("error", "Agent failed", { iteration }); 87 + mill.observe.log("error", "Agent failed", { iteration }); 88 88 break; 89 89 } 90 90 } ··· 123 123 const errorCount = (lintResult.stdout.match(/error/gi) || []).length; 124 124 125 125 if (lintResult.status === 0) { 126 - factory.observe.log("info", "All issues fixed!", { 126 + mill.observe.log("info", "All issues fixed!", { 127 127 iterations: iteration, 128 128 fixedIssues: progress.fixedIssues, 129 129 }); ··· 139 139 140 140 // Exit if stagnant 141 141 if (progress.stagnantIterations >= 3) { 142 - factory.observe.log("warning", "No progress for 3 iterations", { errorCount }); 142 + mill.observe.log("warning", "No progress for 3 iterations", { errorCount }); 143 143 break; 144 144 } 145 145 146 - factory.observe.log("info", `Iteration ${iteration}`, { 146 + mill.observe.log("info", `Iteration ${iteration}`, { 147 147 errorCount, 148 148 lastErrorCount: progress.lastErrorCount, 149 149 fixed: progress.fixedIssues.length, ··· 151 151 152 152 progress.lastErrorCount = errorCount; 153 153 154 - const result = await factory.spawn({ 154 + const result = await mill.spawn({ 155 155 agent: "fixer", 156 156 systemPrompt: `You fix lint errors iteratively. 157 157 Track your progress and avoid repeating unsuccessful approaches. ··· 193 193 }); 194 194 195 195 if (testResult.status === 0) { 196 - factory.observe.log("info", "Tests passing!", { iterations: iteration }); 196 + mill.observe.log("info", "Tests passing!", { iterations: iteration }); 197 197 break; 198 198 } 199 199 200 - factory.observe.log("info", `Iteration ${iteration}`, { 200 + mill.observe.log("info", `Iteration ${iteration}`, { 201 201 exitCode: testResult.status, 202 202 timeout: testResult.signal === "SIGTERM", 203 203 }); ··· 207 207 .join("\n") 208 208 .slice(-5000); // Last 5KB to avoid huge prompt payloads 209 209 210 - const result = await factory.spawn({ 210 + const result = await mill.spawn({ 211 211 agent: "test-fixer", 212 212 systemPrompt: `You fix failing tests iteratively. 213 213 Analyze test output, identify the root cause, and make minimal fixes. ··· 219 219 }); 220 220 221 221 if (result.exitCode !== 0) { 222 - factory.observe.log("error", "Agent failed", { iteration }); 222 + mill.observe.log("error", "Agent failed", { iteration }); 223 223 break; 224 224 } 225 225 } ··· 257 257 while (iteration < maxIterations) { 258 258 const nextTask = tasks.find((t) => !t.completed); 259 259 if (!nextTask) { 260 - factory.observe.log("info", "All tasks completed!", { iterations: iteration }); 260 + mill.observe.log("info", "All tasks completed!", { iterations: iteration }); 261 261 break; 262 262 } 263 263 264 264 iteration++; 265 - factory.observe.log("info", `Iteration ${iteration}: ${nextTask.id}`, { 265 + mill.observe.log("info", `Iteration ${iteration}: ${nextTask.id}`, { 266 266 remaining: tasks.filter((t) => !t.completed).length, 267 267 }); 268 268 269 - const result = await factory.spawn({ 269 + const result = await mill.spawn({ 270 270 agent: "implementer", 271 271 systemPrompt: `You implement PRD tasks iteratively. 272 272 Read the PRD at ${prdPath}. ··· 281 281 }); 282 282 283 283 if (result.exitCode !== 0) { 284 - factory.observe.log("error", "Agent failed", { iteration, task: nextTask.id }); 284 + mill.observe.log("error", "Agent failed", { iteration, task: nextTask.id }); 285 285 break; 286 286 } 287 287 ··· 336 336 }); 337 337 338 338 if (checkResult.status === 0) { 339 - factory.observe.log("info", "Check passed!", { iterations: iteration }); 339 + mill.observe.log("info", "Check passed!", { iterations: iteration }); 340 340 break; 341 341 } 342 342 ··· 344 344 const currentOutput = checkResult.stdout + checkResult.stderr; 345 345 if (currentOutput === lastCheckOutput) { 346 346 stagnantCount++; 347 - factory.observe.log("warning", "No change detected", { stagnantCount }); 347 + mill.observe.log("warning", "No change detected", { stagnantCount }); 348 348 } else { 349 349 stagnantCount = 0; 350 350 } 351 351 lastCheckOutput = currentOutput; 352 352 353 353 if (stagnantCount >= maxStagnantIterations) { 354 - factory.observe.log("error", "Stagnant iterations exceeded", { stagnantCount }); 354 + mill.observe.log("error", "Stagnant iterations exceeded", { stagnantCount }); 355 355 break; 356 356 } 357 357 358 - factory.observe.log("info", `Iteration ${iteration}`, { 358 + mill.observe.log("info", `Iteration ${iteration}`, { 359 359 stagnantCount, 360 360 failedCount, 361 361 max: maxIterations, 362 362 }); 363 363 364 - const result = await factory.spawn({ 364 + const result = await mill.spawn({ 365 365 agent: "worker", 366 366 systemPrompt: "You are fixing issues iteratively", 367 367 prompt: "Continue fixing issues", ··· 371 371 372 372 if (result.exitCode !== 0) { 373 373 failedCount++; 374 - factory.observe.log("error", "Agent failed", { iteration, failedCount }); 374 + mill.observe.log("error", "Agent failed", { iteration, failedCount }); 375 375 376 376 if (failedCount >= maxFailedIterations) { 377 - factory.observe.log("error", "Failed iterations exceeded", { failedCount }); 377 + mill.observe.log("error", "Failed iterations exceeded", { failedCount }); 378 378 break; 379 379 } 380 380 } else { ··· 429 429 Observability is critical for debugging loops: 430 430 431 431 ```typescript 432 - factory.observe.log("info", "Loop state", { 432 + mill.observe.log("info", "Loop state", { 433 433 iteration, 434 434 errorCount, 435 435 stagnantCount, ··· 468 468 - Tasks requiring deep context across iterations 469 469 - Complex multi-step reasoning within a single problem 470 470 - When the agent needs to remember detailed discussions 471 - - Parallel work (use `Promise.all` with `factory.spawn` instead) 471 + - Parallel work (use `Promise.all` with `mill.spawn` instead) 472 472 473 473 ## Advanced: Nested Loops 474 474 ··· 478 478 const modules = ["src/auth", "src/api", "src/db"]; 479 479 480 480 for (const module of modules) { 481 - factory.observe.log("info", `Processing module: ${module}`); 481 + mill.observe.log("info", `Processing module: ${module}`); 482 482 483 483 let iteration = 0; 484 484 while (iteration < 10) { 485 485 iteration++; 486 486 487 - const result = await factory.spawn({ 487 + const result = await mill.spawn({ 488 488 agent: "module-fixer", 489 489 systemPrompt: `Fix issues in ${module}`, 490 490 prompt: "Run checks and fix issues", ··· 506 506 507 507 The Ralph Loop is a simple but powerful pattern: 508 508 509 - - **While loop** around `await factory.spawn()` 509 + - **While loop** around `await mill.spawn()` 510 510 - **Filesystem persistence** between iterations 511 511 - **Bash exit conditions** for authoritative checks 512 512 - **Progress tracking** to detect stagnation

+17 -17

packages/pi-mill/skills/mill-worktree/SKILL.md packages/skills/mill/references/worktree.md

··· 70 70 throw new Error(`Failed to create workspace ${t.name}: ${result.stderr}`); 71 71 } 72 72 73 - factory.observe.log("info", `Created workspace: ${t.name}`, { path: wtPath }); 73 + mill.observe.log("info", `Created workspace: ${t.name}`, { path: wtPath }); 74 74 } 75 75 76 76 // 2. Install dependencies in each worktree 77 77 await Promise.all( 78 78 worktrees.map((wt, i) => 79 - factory.spawn({ 79 + mill.spawn({ 80 80 agent: "installer", 81 81 systemPrompt: 82 82 "Install project dependencies. Run the appropriate install command (npm install, pnpm install, bun install, etc.) and verify it succeeds.", ··· 91 91 // 3. Dispatch parallel agents 92 92 const results = await Promise.all( 93 93 tasks.map((t, i) => 94 - factory.spawn({ 94 + mill.spawn({ 95 95 agent: t.name, 96 96 systemPrompt: t.systemPrompt, 97 97 prompt: t.prompt, ··· 105 105 // 4. Check results 106 106 const failed = results.filter((r) => r.exitCode !== 0); 107 107 if (failed.length > 0) { 108 - factory.observe.log("warning", "Some agents failed", { 108 + mill.observe.log("warning", "Some agents failed", { 109 109 failed: failed.map((r) => r.agent), 110 110 }); 111 111 } 112 112 113 113 // 5. Merge results back 114 - const mergeResult = await factory.spawn({ 114 + const mergeResult = await mill.spawn({ 115 115 agent: "merger", 116 116 systemPrompt: `You merge parallel workstream results using jj. 117 117 Use 'jj log' to see all changes across workspaces. ··· 131 131 const summaryContent = results 132 132 .map((r) => `## ${r.agent}\n**Status:** ${r.exitCode === 0 ? "pass" : "fail"}\n\n${r.text}`) 133 133 .join("\n\n---\n\n"); 134 - factory.observe.artifact("worktree-report.md", summaryContent); 134 + mill.observe.artifact("worktree-report.md", summaryContent); 135 135 } finally { 136 136 // 7. Cleanup — always runs 137 137 for (const wt of worktrees) { ··· 143 143 if (fs.existsSync(wt)) { 144 144 fs.rmSync(wt, { recursive: true, force: true }); 145 145 } 146 - factory.observe.log("info", `Cleaned up workspace`, { path: wt }); 146 + mill.observe.log("info", `Cleaned up workspace`, { path: wt }); 147 147 } 148 148 } 149 149 ``` ··· 233 233 throw new Error(`Failed to create worktree ${t.name}: ${result.stderr}`); 234 234 } 235 235 236 - factory.observe.log("info", `Created worktree: ${t.name}`, { path: wtPath, branch }); 236 + mill.observe.log("info", `Created worktree: ${t.name}`, { path: wtPath, branch }); 237 237 } 238 238 239 239 // 2. Install dependencies 240 240 await Promise.all( 241 241 worktrees.map((wt, i) => 242 - factory.spawn({ 242 + mill.spawn({ 243 243 agent: "installer", 244 244 systemPrompt: "Install project dependencies.", 245 245 prompt: "Run the install command for this project (npm install, etc.)", ··· 253 253 // 3. Dispatch agents 254 254 const results = await Promise.all( 255 255 tasks.map((t, i) => 256 - factory.spawn({ 256 + mill.spawn({ 257 257 agent: t.name, 258 258 systemPrompt: t.systemPrompt, 259 259 prompt: `${t.prompt}\n\nCommit your changes to the current branch when complete.`, ··· 269 269 .map((r, i) => ({ result: r, worktree: worktrees[i] })) 270 270 .filter(({ result }) => result.exitCode === 0); 271 271 272 - await factory.spawn({ 272 + await mill.spawn({ 273 273 agent: "merger", 274 274 systemPrompt: `You merge git branches from parallel workstreams. 275 275 Merge each feature branch into ${baseBranch}. ··· 359 359 // Install deps in parallel 360 360 await Promise.all( 361 361 worktrees.map((wt, i) => 362 - factory.spawn({ 362 + mill.spawn({ 363 363 agent: "installer", 364 364 systemPrompt: "Install deps.", 365 365 prompt: "npm install", ··· 373 373 // Parallel implementation 374 374 const results = await Promise.all( 375 375 tasks.map((t, i) => 376 - factory.spawn({ 376 + mill.spawn({ 377 377 agent: t.name, 378 378 systemPrompt: t.systemPrompt, 379 379 prompt: t.prompt, ··· 386 386 387 387 // Synthesize — merge and verify 388 388 const context = results.map((r) => `[${r.agent}]\n${r.text}`).join("\n\n"); 389 - const synthesis = await factory.spawn({ 389 + const synthesis = await mill.spawn({ 390 390 agent: "integrator", 391 391 systemPrompt: `You integrate parallel workstreams. 392 392 1. Use jj to merge all workspace changes into the main workspace. ··· 446 446 // Dedicated install step 447 447 await Promise.all( 448 448 worktrees.map((wt) => 449 - factory.spawn({ 449 + mill.spawn({ 450 450 agent: "installer", 451 451 systemPrompt: "Install dependencies.", 452 452 prompt: "npm install", ··· 459 459 // Then dispatch real work 460 460 await Promise.all( 461 461 tasks.map((t, i) => 462 - factory.spawn({ 462 + mill.spawn({ 463 463 agent: t.name, 464 464 systemPrompt: t.systemPrompt, 465 465 prompt: t.prompt, ··· 507 507 Not ideal for: 508 508 509 509 - Tasks that heavily overlap in the same files 510 - - Read-only analysis (just use `Promise.all` with `factory.spawn` and same `cwd`) 510 + - Read-only analysis (just use `Promise.all` with `mill.spawn` and same `cwd`) 511 511 - Very small changes (worktree overhead isn't worth it) 512 512 - Repos with huge `node_modules` or build artifacts (disk cost) 513 513

+6

packages/skills/README.md

··· 1 + # pi-mill-skills 2 + 3 + Shared skills package for mill orchestration guidance. 4 + 5 + Primary skill: 6 + - `mill/` — reusable for pi, Claude Code, and Codex workflows.

+24

packages/skills/mill/SKILL.md

··· 1 + --- 2 + name: mill 3 + description: "Write mill orchestration programs for parallel/sequential agent workflows, iterative Ralph Loop execution, and worktree-isolated development." 4 + --- 5 + 6 + # mill 7 + 8 + Use this skill whenever you are writing or reviewing a mill-based orchestration program (including pi-mill extension flows). 9 + 10 + ## Core rules 11 + 12 + 1. Keep `systemPrompt` (WHO/how) separate from `prompt` (WHAT/task). 13 + 2. Use `await` for sequential steps and `Promise.all` for independent parallel work. 14 + 3. Always pass an explicit `model` in `provider/model-id` format. 15 + 4. Check `exitCode`, `stopReason`, and `errorMessage` before trusting results. 16 + 5. Use `mill.observe.log(...)` for progress and diagnostics. 17 + 18 + ## Available patterns 19 + 20 + - General orchestration patterns: `./references/patterns.md` 21 + - Iterative Ralph Loop pattern: `./references/ralph-loop.md` 22 + - Worktree-isolated parallel development: `./references/worktree.md` 23 + 24 + Prefer these patterns before inventing new orchestration scaffolding.

+23

packages/skills/package.json

··· 1 + { 2 + "name": "pi-mill-skills", 3 + "version": "0.1.0", 4 + "description": "Shared skill bundle for pi-mill", 5 + "keywords": [ 6 + "pi-package", 7 + "pi", 8 + "skills", 9 + "mill" 10 + ], 11 + "license": "MIT", 12 + "type": "module", 13 + "files": [ 14 + "pi-mill", 15 + "README.md", 16 + "package.json" 17 + ], 18 + "pi": { 19 + "skills": [ 20 + "./mill" 21 + ] 22 + } 23 + }

Configure Feed

Configure Feed