feat: pi-mill extension, program host, and driver/CLI refinements

+18

bun.lock

··· 5 5 "": { 6 6 "name": "mill", 7 7 "dependencies": { 8 + "@effect/cli": "^0.73.2", 8 9 "@effect/platform": "^0.94.5", 9 10 "@effect/platform-bun": "^0.87.1", 11 + "@effect/printer": "^0.47.0", 12 + "@effect/printer-ansi": "^0.47.0", 10 13 "@effect/schema": "^0.75.5", 14 + "@effect/typeclass": "^0.38.0", 11 15 "effect": "^3.19.19", 12 16 }, 13 17 "devDependencies": { ··· 76 80 77 81 "@ast-grep/cli-win32-x64-msvc": ["@ast-grep/cli-win32-x64-msvc@0.39.9", "", { "os": "win32", "cpu": "x64" }, "sha512-+Kopx/NUEmFORNmQ/nsHXvnqoOuj45vCrDhQ/cj01D4AoeQ9MxbzVAk8sjSJsJbazDaV9if41+HTAhpVWhC4CQ=="], 78 82 83 + "@effect/cli": ["@effect/cli@0.73.2", "", { "dependencies": { "ini": "^4.1.3", "toml": "^3.0.0", "yaml": "^2.5.0" }, "peerDependencies": { "@effect/platform": "^0.94.3", "@effect/printer": "^0.47.0", "@effect/printer-ansi": "^0.47.0", "effect": "^3.19.16" } }, "sha512-K8IJo81+qa1LU8dhxcDU4QO/bIjL/dPd3zUOSCpLiuUNz8Y3/T+WNs3GqIXEhMfCFMSlRZERN0YgmtRlEZUREA=="], 84 + 79 85 "@effect/cluster": ["@effect/cluster@0.56.4", "", { "dependencies": { "kubernetes-types": "^1.30.0" }, "peerDependencies": { "@effect/platform": "^0.94.5", "@effect/rpc": "^0.73.1", "@effect/sql": "^0.49.0", "@effect/workflow": "^0.16.0", "effect": "^3.19.17" } }, "sha512-7Je5/JlbZOlsSxsbKjr97dJed2cNGWsb+TLNgMcr5mRDbcWlFOTUGvsrisEJV6waosYLIg+2omPdvnvRoYKdhA=="], 80 86 81 87 "@effect/experimental": ["@effect/experimental@0.58.0", "", { "dependencies": { "uuid": "^11.0.3" }, "peerDependencies": { "@effect/platform": "^0.94.0", "effect": "^3.19.13", "ioredis": "^5", "lmdb": "^3" }, "optionalPeers": ["ioredis", "lmdb"] }, "sha512-IEP9sapjF6rFy5TkoqDPc86st/fnqUfjT7Xa3pWJrFGr1hzaMXHo+mWsYOZS9LAOVKnpHuVziDK97EP5qsCHVA=="], ··· 86 92 87 93 "@effect/platform-node-shared": ["@effect/platform-node-shared@0.57.1", "", { "dependencies": { "@parcel/watcher": "^2.5.1", "multipasta": "^0.2.7", "ws": "^8.18.2" }, "peerDependencies": { "@effect/cluster": "^0.56.1", "@effect/platform": "^0.94.2", "@effect/rpc": "^0.73.0", "@effect/sql": "^0.49.0", "effect": "^3.19.15" } }, "sha512-oX/bApMdoKsyrDiNdJxo7U9Rz1RXsjRv+ecfAPp1qGlSdGIo32wVRvJ2XCHqYj0sqaYJS0pU0/GCulRfVGuJag=="], 88 94 95 + "@effect/printer": ["@effect/printer@0.47.0", "", { "peerDependencies": { "@effect/typeclass": "^0.38.0", "effect": "^3.19.0" } }, "sha512-VgR8e+YWWhMEAh9qFOjwiZ3OXluAbcVLIOtvp2S5di1nSrPOZxj78g8LE77JSvyfp5y5bS2gmFW+G7xD5uU+2Q=="], 96 + 97 + "@effect/printer-ansi": ["@effect/printer-ansi@0.47.0", "", { "dependencies": { "@effect/printer": "^0.47.0" }, "peerDependencies": { "@effect/typeclass": "^0.38.0", "effect": "^3.19.0" } }, "sha512-tDEQ9XJpXDNYoWMQJHFRMxKGmEOu6z32x3Kb8YLOV5nkauEKnKmWNs7NBp8iio/pqoJbaSwqDwUg9jXVquxfWQ=="], 98 + 89 99 "@effect/rpc": ["@effect/rpc@0.73.2", "", { "dependencies": { "msgpackr": "^1.11.4" }, "peerDependencies": { "@effect/platform": "^0.94.5", "effect": "^3.19.18" } }, "sha512-td7LHDgBOYKg+VgGWEelD8rSAmvjXz7am17vfxZROX5qIYuvH7drL/z4p5xQFadhHZ7DYdlFpqdO9ggc77OCIw=="], 90 100 91 101 "@effect/schema": ["@effect/schema@0.75.5", "", { "dependencies": { "fast-check": "^3.21.0" }, "peerDependencies": { "effect": "^3.9.2" } }, "sha512-TQInulTVCuF+9EIbJpyLP6dvxbQJMphrnRqgexm/Ze39rSjfhJuufF7XvU3SxTgg3HnL7B/kpORTJbHhlE6thw=="], 92 102 93 103 "@effect/sql": ["@effect/sql@0.49.0", "", { "dependencies": { "uuid": "^11.0.3" }, "peerDependencies": { "@effect/experimental": "^0.58.0", "@effect/platform": "^0.94.0", "effect": "^3.19.13" } }, "sha512-9UEKR+z+MrI/qMAmSvb/RiD9KlgIazjZUCDSpwNgm0lEK9/Q6ExEyfziiYFVCPiptp52cBw8uBHRic8hHnwqXA=="], 104 + 105 + "@effect/typeclass": ["@effect/typeclass@0.38.0", "", { "peerDependencies": { "effect": "^3.19.0" } }, "sha512-lMUcJTRtG8KXhXoczapZDxbLK5os7M6rn0zkvOgncJW++A0UyelZfMVMKdT5R+fgpZcsAU/1diaqw3uqLJwGxA=="], 94 106 95 107 "@effect/workflow": ["@effect/workflow@0.16.0", "", { "peerDependencies": { "@effect/experimental": "^0.58.0", "@effect/platform": "^0.94.0", "@effect/rpc": "^0.73.0", "effect": "^3.19.13" } }, "sha512-MiAdlxx3TixkgHdbw+Yf1Z3tHAAE0rOQga12kIydJqj05Fnod+W/I+kQGRMY/XWRg+QUsVxhmh1qTr7Ype6lrw=="], 96 108 ··· 224 236 225 237 "find-my-way-ts": ["find-my-way-ts@0.1.6", "", {}, "sha512-a85L9ZoXtNAey3Y6Z+eBWW658kO/MwR7zIafkIUPUMf3isZG0NCs2pjW2wtjxAKuJPxMAsHUIP4ZPGv0o5gyTA=="], 226 238 239 + "ini": ["ini@4.1.3", "", {}, "sha512-X7rqawQBvfdjS10YU1y1YVreA3SsLrW9dX2CewP2EbBJM4ypVNLDkO5y04gejPwKIY9lR+7r9gn3rFPt/kmWFg=="], 240 + 227 241 "is-extglob": ["is-extglob@2.1.1", "", {}, "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ=="], 228 242 229 243 "is-glob": ["is-glob@4.0.3", "", { "dependencies": { "is-extglob": "^2.1.1" } }, "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg=="], ··· 250 264 251 265 "tinypool": ["tinypool@2.0.0", "", {}, "sha512-/RX9RzeH2xU5ADE7n2Ykvmi9ED3FBGPAjw9u3zucrNNaEBIO0HPSYgL0NT7+3p147ojeSdaVu08F6hjpv31HJg=="], 252 266 267 + "toml": ["toml@3.0.0", "", {}, "sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w=="], 268 + 253 269 "uuid": ["uuid@11.1.0", "", { "bin": { "uuid": "dist/esm/bin/uuid" } }, "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A=="], 254 270 255 271 "ws": ["ws@8.19.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg=="], 272 + 273 + "yaml": ["yaml@2.8.2", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A=="], 256 274 } 257 275 }

+5 -1

package.json

··· 12 12 "lint:ast-grep:test": "ast-grep test --config .ast-grep/sgconfig.yml --test-dir .ast-grep/tests --skip-snapshot-tests", 13 13 "lint:ast-grep": "ast-grep scan --config .ast-grep/sgconfig.yml packages/*/src --error", 14 14 "lint:effect": "ast-grep scan --config .ast-grep/sgconfig.yml packages/*/src/internal packages/*/src/domain packages/*/src/runtime --error --filter 'no-(raw-promise|try-catch|throw|dot-then|any|bun-globals|node-imports|dynamic-import)'", 15 - "lint:boundary": "ast-grep scan --config .ast-grep/sgconfig.yml packages/*/src --error --filter 'no-(interface-outside-public|promise-outside-public|interface-for-domain-models|effect-runpromise|runtime-runpromise-outside-boundary|public-import-internal)'", 15 + "lint:boundary": "ast-grep scan --config .ast-grep/sgconfig.yml packages/*/src --error --filter 'no-(interface-outside-public|promise-outside-public|interface-for-domain-models|effect-runpromise|runtime-runpromise-outside-boundary|public-import-internal|dynamic-import)'", 16 16 "lint:runtime-safety": "ast-grep scan --config .ast-grep/sgconfig.yml packages/*/src/internal packages/*/src/domain packages/*/src/runtime --error --filter 'no-(json-parse-outside-codec|shell-string-command|process-env-outside-config|date-now-outside-clock|math-random-outside-random)'", 17 17 "lint:exports": "bun run scripts/check-exports.ts", 18 18 "format": "oxfmt . --write", ··· 20 20 "check": "bun run lint:ast-grep:test && bun run lint:effect && bun run lint:boundary && bun run lint:runtime-safety && bun run lint:exports && bun run lint:ast-grep && bun run lint && bun run format:check && bun run typecheck && bun test" 21 21 }, 22 22 "dependencies": { 23 + "@effect/cli": "^0.73.2", 23 24 "@effect/platform": "^0.94.5", 24 25 "@effect/platform-bun": "^0.87.1", 26 + "@effect/printer": "^0.47.0", 27 + "@effect/printer-ansi": "^0.47.0", 25 28 "@effect/schema": "^0.75.5", 29 + "@effect/typeclass": "^0.38.0", 26 30 "effect": "^3.19.19" 27 31 }, 28 32 "devDependencies": {

+2

packages/cli/src/bin/mill.ts

··· 1 + #!/usr/bin/env bun 2 + 1 3 import { runCli } from "../public/index.api"; 2 4 3 5 const code = await runCli(process.argv.slice(2));

+47 -42

packages/cli/src/public/index.api.test.ts

··· 21 21 models: Schema.Array(Schema.String), 22 22 }), 23 23 }), 24 + executors: Schema.Record({ 25 + key: Schema.String, 26 + value: Schema.Struct({ 27 + description: Schema.String, 28 + }), 29 + }), 24 30 authoring: Schema.Struct({ 25 31 instructions: Schema.String, 26 32 }), ··· 159 165 }), 160 166 ); 161 167 168 + const WatchEventEnvelope = Schema.parseJson( 169 + Schema.Struct({ 170 + type: Schema.String, 171 + runId: Schema.String, 172 + }), 173 + ); 174 + 162 175 describe("runCli", () => { 163 - it("writes machine payload to stdout only in --json mode", async () => { 176 + it("writes machine payload to stdout for discovery --json", async () => { 164 177 const stdout: Array<string> = []; 165 178 const stderr: Array<string> = []; 166 179 167 - const code = await runCli(["--help", "--json"], { 180 + const code = await runCli(["discovery", "--json"], { 168 181 cwd: "/workspace/repo", 169 182 homeDirectory: "/Users/tester", 170 183 pathExists: async () => false, ··· 184 197 185 198 const payload = Schema.decodeUnknownSync(DiscoveryEnvelope)(stdout[0]); 186 199 expect(payload.discoveryVersion).toBe(1); 187 - expect(payload.drivers.default?.models).toEqual([ 200 + expect(payload.drivers.pi?.models).toEqual([ 188 201 "openai/gpt-5.3-codex", 189 202 "anthropic/claude-sonnet-4-6", 190 203 ]); 191 204 expect(payload.programApi.spawnRequired).toEqual(["agent", "systemPrompt", "prompt"]); 192 205 expect(payload.drivers.codex?.models).toEqual(["openai/gpt-5.3-codex"]); 193 - }); 194 - 195 - it("routes human help text to stdout in non-json mode", async () => { 196 - const stdout: Array<string> = []; 197 - const stderr: Array<string> = []; 198 - 199 - const code = await runCli(["--help"], { 200 - cwd: "/workspace/repo", 201 - homeDirectory: "/Users/tester", 202 - pathExists: async () => false, 203 - io: { 204 - stdout: (line) => { 205 - stdout.push(line); 206 - }, 207 - stderr: (line) => { 208 - stderr.push(line); 209 - }, 210 - }, 211 - }); 212 - 213 - expect(code).toBe(0); 214 - expect(stdout).toHaveLength(1); 215 - expect(stderr).toHaveLength(0); 216 - expect(stdout[0]).toContain("mill — Effect-first orchestration runtime"); 206 + expect(payload.executors.direct?.description).toBe("Local direct executor"); 207 + expect(payload.executors.vm).toBeUndefined(); 217 208 }); 218 209 219 210 it("executes run --sync and resolves status for persisted runId", async () => { ··· 258 249 259 250 const runPayload = Schema.decodeUnknownSync(RunSyncEnvelope)(runStdout[0]); 260 251 expect(runPayload.run.status).toBe("complete"); 261 - expect(runPayload.run.driver).toBe("default"); 252 + expect(runPayload.run.driver).toBe("pi"); 262 253 expect(runPayload.run.executor).toBe("direct"); 263 254 expect(runPayload.result.status).toBe("complete"); 264 255 expect(runPayload.result.spawns).toHaveLength(1); ··· 287 278 const statusPayload = Schema.decodeUnknownSync(StatusEnvelope)(statusStdout[0]); 288 279 expect(statusPayload.id).toBe(runPayload.run.id); 289 280 expect(statusPayload.status).toBe("complete"); 290 - expect(statusPayload.driver).toBe("default"); 281 + expect(statusPayload.driver).toBe("pi"); 291 282 expect(statusPayload.executor).toBe("direct"); 292 283 } finally { 293 284 await rm(tempDirectory, { recursive: true, force: true }); ··· 315 306 try { 316 307 const runStdout: Array<string> = []; 317 308 const runCode = await runCli( 318 - ["run", programPath, "--sync", "--json", "--driver", "codex", "--executor", "vm"], 309 + ["run", programPath, "--sync", "--json", "--driver", "codex", "--executor", "direct"], 319 310 { 320 311 cwd: tempDirectory, 321 312 homeDirectory, ··· 333 324 334 325 const payload = Schema.decodeUnknownSync(RunSyncEnvelope)(runStdout[0]); 335 326 expect(payload.run.driver).toBe("codex"); 336 - expect(payload.run.executor).toBe("vm"); 327 + expect(payload.run.executor).toBe("direct"); 337 328 expect(payload.result.spawns[0]?.driver).toBe("codex"); 338 329 } finally { 339 330 await rm(tempDirectory, { recursive: true, force: true }); ··· 364 355 cwd: tempDirectory, 365 356 homeDirectory, 366 357 pathExists: async (path) => path === join(tempDirectory, "mill.config.ts"), 367 - loadConfigOverrides: async () => ({ 368 - defaultDriver: "claude", 369 - defaultExecutor: "vm", 358 + loadConfigModule: async () => ({ 359 + default: { 360 + defaultDriver: "claude", 361 + defaultExecutor: "direct", 362 + }, 370 363 }), 371 364 io: { 372 365 stdout: (line) => { ··· 380 373 381 374 const payload = Schema.decodeUnknownSync(RunSyncEnvelope)(runStdout[0]); 382 375 expect(payload.run.driver).toBe("claude"); 383 - expect(payload.run.executor).toBe("vm"); 376 + expect(payload.run.executor).toBe("direct"); 384 377 expect(payload.result.spawns[0]?.driver).toBe("claude"); 385 378 } finally { 386 379 await rm(tempDirectory, { recursive: true, force: true }); ··· 594 587 expect(watchStdout.length).toBeGreaterThan(0); 595 588 596 589 for (const line of watchStdout) { 597 - const parsed = JSON.parse(line) as { readonly type?: string; readonly runId?: string }; 590 + const parsed = Schema.decodeUnknownSync(WatchEventEnvelope)(line); 598 591 expect(parsed.runId).toBe(runPayload.run.id); 599 592 expect(typeof parsed.type).toBe("string"); 600 593 } ··· 685 678 686 679 await writeFile( 687 680 programPath, 688 - [ 689 - "await new Promise((resolve) => setTimeout(resolve, 400));", 690 - "return 'done';", 691 - ].join("\n"), 681 + ["await new Promise((resolve) => setTimeout(resolve, 400));", "return 'done';"].join("\n"), 692 682 "utf-8", 693 683 ); 694 684 ··· 769 759 expect(terminalEvents).toHaveLength(1); 770 760 expect(terminalEvents[0]?.type).toBe("run:cancelled"); 771 761 772 - await new Promise((resolve) => setTimeout(resolve, 450)); 762 + const statusAfterCancelStdout: Array<string> = []; 763 + const statusAfterCancelCode = await runCli(["status", submittedRun.runId, "--json"], { 764 + cwd: tempDirectory, 765 + homeDirectory, 766 + pathExists: async () => false, 767 + io: { 768 + stdout: (line) => { 769 + statusAfterCancelStdout.push(line); 770 + }, 771 + stderr: () => undefined, 772 + }, 773 + }); 774 + 775 + expect(statusAfterCancelCode).toBe(0); 776 + const cancelledStatus = Schema.decodeUnknownSync(StatusEnvelope)(statusAfterCancelStdout[0]); 777 + expect(cancelledStatus.status).toBe("cancelled"); 773 778 } finally { 774 779 await rm(tempDirectory, { recursive: true, force: true }); 775 780 } ··· 910 915 id: runId, 911 916 status: "running", 912 917 programPath: "/tmp/program.ts", 913 - driver: "default", 918 + driver: "pi", 914 919 executor: "direct", 915 920 createdAt: "2026-02-23T20:00:00.000Z", 916 921 updatedAt: "2026-02-23T20:00:00.000Z",

+332 -232

packages/cli/src/public/index.api.ts

··· 1 - import * as Command from "@effect/platform/Command"; 1 + import { Args, Command as CliCommand, Options, ValidationError } from "@effect/cli"; 2 + import * as PlatformCommand from "@effect/platform/Command"; 2 3 import * as FileSystem from "@effect/platform/FileSystem"; 3 4 import * as BunContext from "@effect/platform-bun/BunContext"; 4 - import { Effect, Runtime, Scope } from "effect"; 5 + import { Effect, Option, Runtime, Scope } from "effect"; 5 6 import { 6 7 cancelRun, 7 8 createDiscoveryPayload, ··· 15 16 submitRun, 16 17 waitForRun, 17 18 watchRun, 18 - type ConfigOverrides, 19 19 type LaunchWorkerInput, 20 20 } from "@mill/core"; 21 21 import { createClaudeDriverRegistration } from "@mill/driver-claude"; ··· 32 32 readonly homeDirectory?: string; 33 33 readonly runsDirectory?: string; 34 34 readonly pathExists?: (path: string) => Promise<boolean>; 35 - readonly loadConfigOverrides?: (path: string) => Promise<ConfigOverrides>; 35 + readonly loadConfigModule?: (path: string) => Promise<unknown>; 36 36 readonly launchWorker?: (input: LaunchWorkerInput) => Promise<void>; 37 37 readonly io?: CliIo; 38 38 } 39 39 40 + interface CliExit { 41 + readonly _tag: "CliExit"; 42 + readonly code: number; 43 + } 44 + 40 45 const runtime = Runtime.defaultRuntime; 41 46 42 47 const defaultIo: CliIo = { ··· 56 61 }, 57 62 }); 58 63 59 - const createVmExecutor = () => ({ 60 - description: "VM-style executor placeholder", 61 - runtime: { 62 - name: "vm", 63 - runProgram: (input: { readonly execute: Effect.Effect<unknown, unknown> }) => input.execute, 64 - }, 65 - }); 66 - 67 64 const defaultConfig = defineConfig({ 68 - defaultDriver: "default", 65 + defaultDriver: "pi", 69 66 defaultExecutor: "direct", 70 67 defaultModel: "openai/gpt-5.3-codex", 71 68 drivers: { 72 - default: processDriver(createPiDriverRegistration()), 69 + pi: processDriver(createPiDriverRegistration()), 73 70 claude: processDriver(createClaudeDriverRegistration()), 74 71 codex: processDriver(createCodexDriverRegistration()), 75 72 }, 76 73 executors: { 77 74 direct: createDirectExecutor(), 78 - vm: createVmExecutor(), 79 75 }, 80 76 extensions: [], 81 77 authoring: { ··· 84 80 }, 85 81 }); 86 82 87 - const readFlagValue = (argv: ReadonlyArray<string>, flag: string): string | undefined => { 88 - const index = argv.indexOf(flag); 89 - 90 - if (index < 0) { 91 - return undefined; 92 - } 93 - 94 - return argv[index + 1]; 95 - }; 96 - 97 - const parseTimeoutSeconds = (argv: ReadonlyArray<string>): number | undefined => { 98 - const value = readFlagValue(argv, "--timeout"); 99 - 100 - if (value === undefined) { 101 - return undefined; 102 - } 103 - 104 - const parsed = Number.parseFloat(value); 105 - 106 - if (!Number.isFinite(parsed) || parsed <= 0) { 107 - return undefined; 108 - } 109 - 110 - return parsed; 111 - }; 112 - 113 83 const runWithBunContext = <A, E>(effect: Effect.Effect<A, E, BunContext.BunContext>): Promise<A> => 114 84 Runtime.runPromise(runtime)(Effect.provide(effect, BunContext.layer)); 115 85 116 86 const millBinPath = decodeURIComponent(new URL("../bin/mill.ts", import.meta.url).pathname); 117 87 118 88 const launchDetachedWorker = async (input: LaunchWorkerInput): Promise<void> => { 119 - const workerCommand = Command.make( 89 + const workerCommand = PlatformCommand.make( 120 90 process.execPath, 121 91 "run", 122 92 millBinPath, ··· 132 102 "--executor", 133 103 input.executorName, 134 104 ).pipe( 135 - Command.workingDirectory(input.cwd), 136 - Command.stdin("ignore"), 137 - Command.stdout("ignore"), 138 - Command.stderr("ignore"), 105 + PlatformCommand.workingDirectory(input.cwd), 106 + PlatformCommand.stdin("ignore"), 107 + PlatformCommand.stdout("ignore"), 108 + PlatformCommand.stderr("ignore"), 139 109 ); 140 110 141 111 await runWithBunContext( 142 112 Effect.gen(function* () { 143 113 const detachedScope = yield* Scope.make(); 144 114 145 - yield* Scope.extend(Command.start(workerCommand), detachedScope); 115 + yield* Scope.extend(PlatformCommand.start(workerCommand), detachedScope); 146 116 }), 147 117 ); 148 118 }; 149 119 120 + const optionalTextOption = (name: string) => Options.text(name).pipe(Options.optional); 121 + 122 + const fromOption = <A>(value: Option.Option<A>): A | undefined => 123 + Option.isSome(value) ? value.value : undefined; 124 + 125 + const toCliEffect = (program: Promise<number>) => 126 + Effect.flatMap( 127 + Effect.promise(() => program), 128 + (code) => 129 + code === 0 130 + ? Effect.void 131 + : Effect.fail<CliExit>({ 132 + _tag: "CliExit", 133 + code, 134 + }), 135 + ); 136 + 137 + const formatUnknownError = (error: unknown): string => { 138 + if (error instanceof Error) { 139 + return error.message; 140 + } 141 + 142 + return String(error); 143 + }; 144 + 145 + interface RunCommandInput { 146 + readonly program: string; 147 + readonly json: boolean; 148 + readonly sync: boolean; 149 + readonly runsDir: Option.Option<string>; 150 + readonly driver: Option.Option<string>; 151 + readonly executor: Option.Option<string>; 152 + } 153 + 150 154 const runCommand = async ( 151 - argv: ReadonlyArray<string>, 155 + command: RunCommandInput, 152 156 options: RunCliOptions, 153 157 io: CliIo, 154 158 ): Promise<number> => { 155 - const programPath = argv[0]; 156 - 157 - if (programPath === undefined) { 158 - io.stderr("Usage: mill run <program.ts> [--json] [--sync] [--driver] [--executor]"); 159 - return 1; 160 - } 161 - 162 159 const runInput = { 163 160 defaults: defaultConfig, 164 - programPath, 161 + programPath: command.program, 165 162 cwd: options.cwd, 166 163 homeDirectory: options.homeDirectory, 167 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 168 - driverName: readFlagValue(argv, "--driver"), 169 - executorName: readFlagValue(argv, "--executor"), 164 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 165 + driverName: fromOption(command.driver), 166 + executorName: fromOption(command.executor), 170 167 pathExists: options.pathExists, 171 - loadConfigOverrides: options.loadConfigOverrides, 168 + loadConfigModule: options.loadConfigModule, 172 169 launchWorker: options.launchWorker ?? launchDetachedWorker, 173 170 } as const; 174 171 175 - if (argv.includes("--sync")) { 172 + if (command.sync) { 176 173 const output = await runProgramSync(runInput); 177 174 178 - if (argv.includes("--json")) { 175 + if (command.json) { 179 176 io.stdout(JSON.stringify(output)); 180 177 return 0; 181 178 } ··· 186 183 187 184 const submittedRun = await submitRun(runInput); 188 185 189 - if (argv.includes("--json")) { 186 + if (command.json) { 190 187 io.stdout( 191 188 JSON.stringify({ 192 189 runId: submittedRun.id, ··· 201 198 return 0; 202 199 }; 203 200 201 + interface WorkerCommandInput { 202 + readonly runId: string; 203 + readonly program: string; 204 + readonly runsDir: Option.Option<string>; 205 + readonly driver: Option.Option<string>; 206 + readonly executor: Option.Option<string>; 207 + readonly json: boolean; 208 + } 209 + 204 210 const workerCommand = async ( 205 - argv: ReadonlyArray<string>, 211 + command: WorkerCommandInput, 206 212 options: RunCliOptions, 207 213 io: CliIo, 208 214 ): Promise<number> => { 209 - const runId = readFlagValue(argv, "--run-id"); 210 - const programPath = readFlagValue(argv, "--program"); 211 - 212 - if (runId === undefined || programPath === undefined) { 213 - io.stderr( 214 - "Usage: mill _worker --run-id <id> --program <abs-path> [--runs-dir] [--driver] [--executor]", 215 - ); 216 - return 1; 217 - } 218 - 219 215 const output = await runWorker({ 220 216 defaults: defaultConfig, 221 - runId, 222 - programPath, 217 + runId: command.runId, 218 + programPath: command.program, 223 219 cwd: options.cwd, 224 220 homeDirectory: options.homeDirectory, 225 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 226 - driverName: readFlagValue(argv, "--driver"), 227 - executorName: readFlagValue(argv, "--executor"), 221 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 222 + driverName: fromOption(command.driver), 223 + executorName: fromOption(command.executor), 228 224 pathExists: options.pathExists, 229 - loadConfigOverrides: options.loadConfigOverrides, 225 + loadConfigModule: options.loadConfigModule, 230 226 }); 231 227 232 - if (argv.includes("--json")) { 228 + if (command.json) { 233 229 io.stdout(JSON.stringify(output)); 234 230 } 235 231 ··· 243 239 'import { createCodexDriverRegistration } from "@mill/driver-codex";', 244 240 "", 245 241 "export default defineConfig({", 246 - ' defaultDriver: "default",', 242 + ' defaultDriver: "pi",', 247 243 ' defaultExecutor: "direct",', 248 244 ' defaultModel: "openai/gpt-5.3-codex",', 249 245 " drivers: {", 250 - " default: processDriver(createPiDriverRegistration()),", 246 + " pi: processDriver(createPiDriverRegistration()),", 251 247 " claude: processDriver(createClaudeDriverRegistration()),", 252 248 " codex: processDriver(createCodexDriverRegistration()),", 253 249 " },", ··· 259 255 " runProgram: ({ execute }) => execute,", 260 256 " },", 261 257 " },", 262 - " vm: {", 263 - ' description: "VM-style executor placeholder",', 264 - " runtime: {", 265 - ' name: "vm",', 266 - " runProgram: ({ execute }) => execute,", 267 - " },", 268 - " },", 258 + " // Future: add sandboxed executors here.", 269 259 " },", 270 260 " extensions: [],", 271 261 " authoring: {", ··· 287 277 io.stdout(`Created ${configPath}`); 288 278 return 0; 289 279 }; 280 + 281 + interface StatusCommandInput { 282 + readonly runId: string; 283 + readonly json: boolean; 284 + readonly runsDir: Option.Option<string>; 285 + readonly driver: Option.Option<string>; 286 + } 290 287 291 288 const statusCommand = async ( 292 - argv: ReadonlyArray<string>, 289 + command: StatusCommandInput, 293 290 options: RunCliOptions, 294 291 io: CliIo, 295 292 ): Promise<number> => { 296 - const runId = argv[0]; 297 - 298 - if (runId === undefined) { 299 - io.stderr("Usage: mill status <runId> [--json]"); 300 - return 1; 301 - } 302 - 303 293 const output = await getRunStatus({ 304 294 defaults: defaultConfig, 305 - runId, 295 + runId: command.runId, 306 296 cwd: options.cwd, 307 297 homeDirectory: options.homeDirectory, 308 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 309 - driverName: readFlagValue(argv, "--driver"), 298 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 299 + driverName: fromOption(command.driver), 310 300 pathExists: options.pathExists, 311 - loadConfigOverrides: options.loadConfigOverrides, 301 + loadConfigModule: options.loadConfigModule, 312 302 }); 313 303 314 - if (argv.includes("--json")) { 304 + if (command.json) { 315 305 io.stdout(JSON.stringify(output)); 316 306 return 0; 317 307 } ··· 319 309 io.stdout(`run ${output.id} status=${output.status}`); 320 310 return 0; 321 311 }; 312 + 313 + interface WaitCommandInput { 314 + readonly runId: string; 315 + readonly timeout: number; 316 + readonly json: boolean; 317 + readonly runsDir: Option.Option<string>; 318 + readonly driver: Option.Option<string>; 319 + } 322 320 323 321 const waitCommand = async ( 324 - argv: ReadonlyArray<string>, 322 + command: WaitCommandInput, 325 323 options: RunCliOptions, 326 324 io: CliIo, 327 325 ): Promise<number> => { 328 - const runId = argv[0]; 329 - const timeoutSeconds = parseTimeoutSeconds(argv); 330 - const isJson = argv.includes("--json"); 331 - 332 - if (runId === undefined || timeoutSeconds === undefined) { 333 - io.stderr("Usage: mill wait <runId> --timeout <seconds> [--json]"); 326 + if (!Number.isFinite(command.timeout) || command.timeout <= 0) { 327 + io.stderr("--timeout must be a positive number."); 334 328 return 1; 335 329 } 336 330 331 + const timeoutSeconds = command.timeout; 332 + 337 333 const [waitResult] = await Promise.allSettled([ 338 334 waitForRun({ 339 335 defaults: defaultConfig, 340 - runId, 336 + runId: command.runId, 341 337 timeoutSeconds, 342 338 cwd: options.cwd, 343 339 homeDirectory: options.homeDirectory, 344 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 345 - driverName: readFlagValue(argv, "--driver"), 340 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 341 + driverName: fromOption(command.driver), 346 342 pathExists: options.pathExists, 347 - loadConfigOverrides: options.loadConfigOverrides, 343 + loadConfigModule: options.loadConfigModule, 348 344 }), 349 345 ]); 350 346 351 347 if (waitResult.status === "fulfilled") { 352 - if (isJson) { 348 + if (command.json) { 353 349 io.stdout(JSON.stringify(waitResult.value)); 354 350 return 0; 355 351 } ··· 364 360 }; 365 361 366 362 if (waitError._tag === "WaitTimeoutError") { 367 - const message = `Timeout waiting for run ${runId} after ${timeoutSeconds}s.`; 363 + const message = `Timeout waiting for run ${command.runId} after ${timeoutSeconds}s.`; 368 364 369 - if (isJson) { 365 + if (command.json) { 370 366 io.stdout( 371 367 JSON.stringify({ 372 368 ok: false, 373 369 error: { 374 370 _tag: "WaitTimeoutError", 375 - runId, 371 + runId: command.runId, 376 372 timeoutSeconds, 377 373 message, 378 374 }, ··· 387 383 388 384 const fallbackMessage = waitError.message ?? String(waitResult.reason); 389 385 390 - if (isJson) { 386 + if (command.json) { 391 387 io.stdout( 392 388 JSON.stringify({ 393 389 ok: false, 394 390 error: { 395 391 _tag: "WaitError", 396 - runId, 392 + runId: command.runId, 397 393 timeoutSeconds, 398 394 message: fallbackMessage, 399 395 }, ··· 406 402 return 1; 407 403 }; 408 404 405 + interface WatchCommandInput { 406 + readonly runId: string; 407 + readonly json: boolean; 408 + readonly raw: boolean; 409 + readonly runsDir: Option.Option<string>; 410 + readonly driver: Option.Option<string>; 411 + } 412 + 409 413 const watchCommand = async ( 410 - argv: ReadonlyArray<string>, 414 + command: WatchCommandInput, 411 415 options: RunCliOptions, 412 416 io: CliIo, 413 417 ): Promise<number> => { 414 - const runId = argv[0]; 415 - 416 - if (runId === undefined) { 417 - io.stderr("Usage: mill watch <runId> [--json] [--raw]"); 418 - return 1; 419 - } 420 - 421 418 await watchRun({ 422 419 defaults: defaultConfig, 423 - runId, 424 - raw: argv.includes("--raw"), 420 + runId: command.runId, 421 + raw: command.raw, 425 422 cwd: options.cwd, 426 423 homeDirectory: options.homeDirectory, 427 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 428 - driverName: readFlagValue(argv, "--driver"), 424 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 425 + driverName: fromOption(command.driver), 429 426 pathExists: options.pathExists, 430 - loadConfigOverrides: options.loadConfigOverrides, 427 + loadConfigModule: options.loadConfigModule, 431 428 onEvent: (line) => { 432 429 io.stdout(line); 433 430 }, ··· 436 433 return 0; 437 434 }; 438 435 436 + interface InspectCommandInput { 437 + readonly ref: string; 438 + readonly json: boolean; 439 + readonly session: boolean; 440 + readonly runsDir: Option.Option<string>; 441 + readonly driver: Option.Option<string>; 442 + } 443 + 439 444 const inspectCommand = async ( 440 - argv: ReadonlyArray<string>, 445 + command: InspectCommandInput, 441 446 options: RunCliOptions, 442 447 io: CliIo, 443 448 ): Promise<number> => { 444 - const ref = argv[0]; 445 - 446 - if (ref === undefined) { 447 - io.stderr("Usage: mill inspect <runId>[.<spawnId>] [--json] [--session]"); 448 - return 1; 449 - } 450 - 451 449 const inspected = await inspectRun({ 452 450 defaults: defaultConfig, 453 - ref, 454 - session: argv.includes("--session"), 451 + ref: command.ref, 452 + session: command.session, 455 453 cwd: options.cwd, 456 454 homeDirectory: options.homeDirectory, 457 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 458 - driverName: readFlagValue(argv, "--driver"), 455 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 456 + driverName: fromOption(command.driver), 459 457 pathExists: options.pathExists, 460 - loadConfigOverrides: options.loadConfigOverrides, 458 + loadConfigModule: options.loadConfigModule, 461 459 }); 462 460 463 - if (argv.includes("--json")) { 461 + if (command.json) { 464 462 io.stdout(JSON.stringify(inspected)); 465 463 return 0; 466 464 } ··· 469 467 return 0; 470 468 }; 471 469 470 + interface CancelCommandInput { 471 + readonly runId: string; 472 + readonly json: boolean; 473 + readonly runsDir: Option.Option<string>; 474 + readonly driver: Option.Option<string>; 475 + } 476 + 472 477 const cancelCommand = async ( 473 - argv: ReadonlyArray<string>, 478 + command: CancelCommandInput, 474 479 options: RunCliOptions, 475 480 io: CliIo, 476 481 ): Promise<number> => { 477 - const runId = argv[0]; 478 - 479 - if (runId === undefined) { 480 - io.stderr("Usage: mill cancel <runId> [--json]"); 481 - return 1; 482 - } 483 - 484 482 const cancelled = await cancelRun({ 485 483 defaults: defaultConfig, 486 - runId, 484 + runId: command.runId, 487 485 cwd: options.cwd, 488 486 homeDirectory: options.homeDirectory, 489 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 490 - driverName: readFlagValue(argv, "--driver"), 487 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 488 + driverName: fromOption(command.driver), 491 489 pathExists: options.pathExists, 492 - loadConfigOverrides: options.loadConfigOverrides, 490 + loadConfigModule: options.loadConfigModule, 493 491 }); 494 492 495 - if (argv.includes("--json")) { 493 + if (command.json) { 496 494 io.stdout(JSON.stringify(cancelled)); 497 495 return 0; 498 496 } ··· 501 499 return 0; 502 500 }; 503 501 502 + const RUN_STATUSES = ["pending", "running", "complete", "failed", "cancelled"] as const; 503 + type RunStatus = (typeof RUN_STATUSES)[number]; 504 + 505 + interface LsCommandInput { 506 + readonly json: boolean; 507 + readonly status: Option.Option<RunStatus>; 508 + readonly runsDir: Option.Option<string>; 509 + readonly driver: Option.Option<string>; 510 + } 511 + 504 512 const lsCommand = async ( 505 - argv: ReadonlyArray<string>, 513 + command: LsCommandInput, 506 514 options: RunCliOptions, 507 515 io: CliIo, 508 516 ): Promise<number> => { 509 - const statusFilter = readFlagValue(argv, "--status") as 510 - | "pending" 511 - | "running" 512 - | "complete" 513 - | "failed" 514 - | "cancelled" 515 - | undefined; 516 517 const runs = await listRuns({ 517 518 defaults: defaultConfig, 518 - status: statusFilter, 519 + status: fromOption(command.status), 519 520 cwd: options.cwd, 520 521 homeDirectory: options.homeDirectory, 521 - runsDirectory: readFlagValue(argv, "--runs-dir") ?? options.runsDirectory, 522 - driverName: readFlagValue(argv, "--driver"), 522 + runsDirectory: fromOption(command.runsDir) ?? options.runsDirectory, 523 + driverName: fromOption(command.driver), 523 524 pathExists: options.pathExists, 524 - loadConfigOverrides: options.loadConfigOverrides, 525 + loadConfigModule: options.loadConfigModule, 525 526 }); 526 527 527 - if (argv.includes("--json")) { 528 + if (command.json) { 528 529 io.stdout(JSON.stringify(runs)); 529 530 return 0; 530 531 } ··· 538 539 return 0; 539 540 }; 540 541 541 - export const runCli = async ( 542 - argv: ReadonlyArray<string>, 543 - options?: RunCliOptions, 542 + interface DiscoveryCommandInput { 543 + readonly json: boolean; 544 + } 545 + 546 + const discoveryCommand = async ( 547 + command: DiscoveryCommandInput, 548 + options: RunCliOptions, 549 + io: CliIo, 544 550 ): Promise<number> => { 545 - const io = options?.io ?? defaultIo; 546 - const showHelp = argv.length === 0 || argv.includes("--help"); 551 + const payload = await createDiscoveryPayload({ 552 + defaults: defaultConfig, 553 + cwd: options.cwd, 554 + homeDirectory: options.homeDirectory, 555 + pathExists: options.pathExists, 556 + loadConfigModule: options.loadConfigModule, 557 + }); 547 558 548 - if (showHelp) { 549 - const payload = await createDiscoveryPayload({ 550 - defaults: defaultConfig, 551 - cwd: options?.cwd, 552 - homeDirectory: options?.homeDirectory, 553 - pathExists: options?.pathExists, 554 - loadConfigOverrides: options?.loadConfigOverrides, 555 - }); 559 + io.stdout(command.json ? JSON.stringify(payload) : JSON.stringify(payload, null, 2)); 560 + return 0; 561 + }; 556 562 557 - if (argv.includes("--json")) { 558 - io.stdout(JSON.stringify(payload)); 559 - return 0; 560 - } 563 + const createCli = (options: RunCliOptions, io: CliIo) => { 564 + const run = CliCommand.make( 565 + "run", 566 + { 567 + program: Args.text({ name: "program.ts" }), 568 + json: Options.boolean("json"), 569 + sync: Options.boolean("sync"), 570 + runsDir: optionalTextOption("runs-dir"), 571 + driver: optionalTextOption("driver"), 572 + executor: optionalTextOption("executor"), 573 + }, 574 + (command) => toCliEffect(runCommand(command, options, io)), 575 + ).pipe(CliCommand.withDescription("Run a mill program.")); 561 576 562 - io.stdout( 563 - [ 564 - "mill — Effect-first orchestration runtime", 565 - "", 566 - `Authoring guidance: ${payload.authoring.instructions}`, 567 - `Registered drivers: ${Object.keys(payload.drivers).join(", ")}`, 568 - `Registered executors: ${Object.keys(payload.executors).join(", ")}`, 569 - "", 570 - "Run `mill --help --json` for machine-readable discovery.", 571 - ].join("\n"), 572 - ); 573 - return 0; 574 - } 577 + const worker = CliCommand.make( 578 + "_worker", 579 + { 580 + runId: Options.text("run-id"), 581 + program: Options.text("program"), 582 + runsDir: optionalTextOption("runs-dir"), 583 + driver: optionalTextOption("driver"), 584 + executor: optionalTextOption("executor"), 585 + json: Options.boolean("json"), 586 + }, 587 + (command) => toCliEffect(workerCommand(command, options, io)), 588 + ).pipe(CliCommand.withDescription("Run the detached worker for an existing run.")); 575 589 576 - if (argv[0] === "run") { 577 - return runCommand(argv.slice(1), options ?? {}, io); 578 - } 590 + const status = CliCommand.make( 591 + "status", 592 + { 593 + runId: Args.text({ name: "runId" }), 594 + json: Options.boolean("json"), 595 + runsDir: optionalTextOption("runs-dir"), 596 + driver: optionalTextOption("driver"), 597 + }, 598 + (command) => toCliEffect(statusCommand(command, options, io)), 599 + ).pipe(CliCommand.withDescription("Read the current run status.")); 579 600 580 - if (argv[0] === "_worker") { 581 - return workerCommand(argv.slice(1), options ?? {}, io); 582 - } 601 + const wait = CliCommand.make( 602 + "wait", 603 + { 604 + runId: Args.text({ name: "runId" }), 605 + timeout: Options.float("timeout"), 606 + json: Options.boolean("json"), 607 + runsDir: optionalTextOption("runs-dir"), 608 + driver: optionalTextOption("driver"), 609 + }, 610 + (command) => toCliEffect(waitCommand(command, options, io)), 611 + ).pipe(CliCommand.withDescription("Wait for a run to reach a terminal state.")); 612 + 613 + const watch = CliCommand.make( 614 + "watch", 615 + { 616 + runId: Args.text({ name: "runId" }), 617 + json: Options.boolean("json"), 618 + raw: Options.boolean("raw"), 619 + runsDir: optionalTextOption("runs-dir"), 620 + driver: optionalTextOption("driver"), 621 + }, 622 + (command) => toCliEffect(watchCommand(command, options, io)), 623 + ).pipe(CliCommand.withDescription("Stream run events.")); 624 + 625 + const inspect = CliCommand.make( 626 + "inspect", 627 + { 628 + ref: Args.text({ name: "runId[.spawnId]" }), 629 + json: Options.boolean("json"), 630 + session: Options.boolean("session"), 631 + runsDir: optionalTextOption("runs-dir"), 632 + driver: optionalTextOption("driver"), 633 + }, 634 + (command) => toCliEffect(inspectCommand(command, options, io)), 635 + ).pipe(CliCommand.withDescription("Inspect run, spawn, or session output.")); 583 636 584 - if (argv[0] === "status") { 585 - return statusCommand(argv.slice(1), options ?? {}, io); 586 - } 637 + const cancel = CliCommand.make( 638 + "cancel", 639 + { 640 + runId: Args.text({ name: "runId" }), 641 + json: Options.boolean("json"), 642 + runsDir: optionalTextOption("runs-dir"), 643 + driver: optionalTextOption("driver"), 644 + }, 645 + (command) => toCliEffect(cancelCommand(command, options, io)), 646 + ).pipe(CliCommand.withDescription("Cancel a run.")); 587 647 588 - if (argv[0] === "wait") { 589 - return waitCommand(argv.slice(1), options ?? {}, io); 590 - } 648 + const ls = CliCommand.make( 649 + "ls", 650 + { 651 + json: Options.boolean("json"), 652 + status: Options.choice("status", RUN_STATUSES).pipe(Options.optional), 653 + runsDir: optionalTextOption("runs-dir"), 654 + driver: optionalTextOption("driver"), 655 + }, 656 + (command) => toCliEffect(lsCommand(command, options, io)), 657 + ).pipe(CliCommand.withDescription("List runs.")); 591 658 592 - if (argv[0] === "watch") { 593 - return watchCommand(argv.slice(1), options ?? {}, io); 594 - } 659 + const init = CliCommand.make("init", {}, () => toCliEffect(initCommand(options, io))).pipe( 660 + CliCommand.withDescription("Create a starter mill.config.ts."), 661 + ); 595 662 596 - if (argv[0] === "inspect") { 597 - return inspectCommand(argv.slice(1), options ?? {}, io); 598 - } 663 + const discovery = CliCommand.make( 664 + "discovery", 665 + { 666 + json: Options.boolean("json"), 667 + }, 668 + (command) => toCliEffect(discoveryCommand(command, options, io)), 669 + ).pipe(CliCommand.withDescription("Emit discovery metadata for tooling.")); 599 670 600 - if (argv[0] === "cancel") { 601 - return cancelCommand(argv.slice(1), options ?? {}, io); 602 - } 671 + return CliCommand.make("mill").pipe( 672 + CliCommand.withDescription("Mill orchestration runtime."), 673 + CliCommand.withSubcommands([ 674 + run, 675 + status, 676 + wait, 677 + watch, 678 + inspect, 679 + cancel, 680 + ls, 681 + init, 682 + discovery, 683 + worker, 684 + ]), 685 + ); 686 + }; 603 687 604 - if (argv[0] === "ls") { 605 - return lsCommand(argv.slice(1), options ?? {}, io); 606 - } 688 + export const runCli = async ( 689 + argv: ReadonlyArray<string>, 690 + options?: RunCliOptions, 691 + ): Promise<number> => { 692 + const resolvedOptions = options ?? {}; 693 + const io = resolvedOptions.io ?? defaultIo; 694 + const command = createCli(resolvedOptions, io); 695 + const run = CliCommand.run(command, { 696 + name: "mill", 697 + version: "0.0.0", 698 + executable: "mill", 699 + }); 607 700 608 - if (argv[0] === "init") { 609 - return initCommand(options ?? {}, io); 610 - } 701 + const codeEffect = run([process.execPath, millBinPath, ...argv]).pipe( 702 + Effect.as(0), 703 + Effect.catchTag("CliExit", (error) => Effect.succeed(error.code)), 704 + Effect.catchIf(ValidationError.isValidationError, () => Effect.succeed(1)), 705 + Effect.catchAll((error) => 706 + Effect.sync(() => { 707 + io.stderr(formatUnknownError(error)); 708 + return 1; 709 + }), 710 + ), 711 + ); 611 712 612 - io.stderr(`Unknown command: ${argv[0]}`); 613 - return 1; 713 + return runWithBunContext(codeEffect); 614 714 };

+64 -10

packages/cli/src/public/index.e2e.test.ts

··· 25 25 models: Schema.Array(Schema.String), 26 26 }), 27 27 }), 28 + executors: Schema.Record({ 29 + key: Schema.String, 30 + value: Schema.Struct({ 31 + description: Schema.String, 32 + }), 33 + }), 28 34 authoring: Schema.Struct({ 29 35 instructions: Schema.String, 30 36 }), ··· 130 136 ), 131 137 ); 132 138 139 + const EventTypeEnvelope = Schema.parseJson( 140 + Schema.Struct({ 141 + type: Schema.String, 142 + }), 143 + ); 144 + 133 145 const commandOutput = (command: Command.Command): Promise<string> => 134 146 Runtime.runPromise(runtime)(Effect.provide(Command.string(command), BunContext.layer)); 135 147 136 148 const commandExitCode = (command: Command.Command): Promise<number> => 137 149 Runtime.runPromise(runtime)(Effect.provide(Command.exitCode(command), BunContext.layer)); 138 150 139 - describe("mill --help --json (e2e)", () => { 151 + describe("mill discovery/help (e2e)", () => { 140 152 it("returns discovery contract payload on stdout", async () => { 141 153 const output = await commandOutput( 142 - Command.make("bun", "run", "packages/cli/src/bin/mill.ts", "--help", "--json"), 154 + Command.make("bun", "run", "packages/cli/src/bin/mill.ts", "discovery", "--json"), 143 155 ); 144 156 145 157 const payload = Schema.decodeUnknownSync(DiscoveryEnvelope)(output); 146 158 expect(payload.discoveryVersion).toBe(1); 147 159 expect(payload.programApi.spawnRequired).toEqual(["agent", "systemPrompt", "prompt"]); 148 - expect(payload.drivers.default?.models).toEqual([ 160 + expect(payload.drivers.pi?.models).toEqual([ 149 161 "openai/gpt-5.3-codex", 150 162 "anthropic/claude-sonnet-4-6", 151 163 ]); 152 164 expect(payload.drivers.claude?.models).toEqual(["anthropic/claude-sonnet-4-6"]); 153 165 expect(payload.drivers.codex?.models).toEqual(["openai/gpt-5.3-codex"]); 166 + expect(payload.executors.direct?.description).toBe("Local direct executor"); 167 + expect(payload.executors.vm).toBeUndefined(); 154 168 expect(payload.authoring.instructions.length).toBeGreaterThan(0); 155 169 expect(payload.async.submit).toBe("mill run <program.ts> --json"); 156 170 }); 171 + 172 + it("prints top-level help via built-in --help", async () => { 173 + const output = await commandOutput( 174 + Command.make("bun", "run", "packages/cli/src/bin/mill.ts", "--help"), 175 + ); 176 + 177 + expect(output).toContain("USAGE"); 178 + expect(output).toContain("$ mill"); 179 + expect(output).toContain("COMMANDS"); 180 + expect(output).not.toContain("Effect-first"); 181 + }); 182 + 183 + it("prints per-command help via built-in --help", async () => { 184 + const output = await commandOutput( 185 + Command.make("bun", "run", "packages/cli/src/bin/mill.ts", "run", "--help"), 186 + ); 187 + 188 + expect(output).toContain("$ run [--json] [--sync]"); 189 + expect(output).toContain("--driver"); 190 + expect(output).toContain("--executor"); 191 + }); 157 192 }); 158 193 159 194 describe("mill run/status/wait (e2e)", () => { ··· 188 223 "--driver", 189 224 "codex", 190 225 "--executor", 191 - "vm", 226 + "direct", 192 227 "--runs-dir", 193 228 runsDirectory, 194 229 ), ··· 196 231 197 232 const runPayload = Schema.decodeUnknownSync(RunSyncEnvelope)(runOutput); 198 233 expect(runPayload.run.driver).toBe("codex"); 199 - expect(runPayload.run.executor).toBe("vm"); 234 + expect(runPayload.run.executor).toBe("direct"); 200 235 expect(runPayload.result.spawns[0]?.driver).toBe("codex"); 201 236 } finally { 202 237 await rm(tempDirectory, { recursive: true, force: true }); ··· 304 339 .split("\n") 305 340 .map((line) => line.trim()) 306 341 .filter((line) => line.length > 0) 307 - .map((line) => JSON.parse(line) as { readonly type: string }) 342 + .map((line) => Schema.decodeUnknownSync(EventTypeEnvelope)(line)) 308 343 .filter( 309 344 (event) => 310 345 event.type === "run:complete" || ··· 358 393 359 394 const runPayload = Schema.decodeUnknownSync(RunSyncEnvelope)(runOutput); 360 395 expect(runPayload.run.status).toBe("complete"); 361 - expect(runPayload.run.driver).toBe("default"); 396 + expect(runPayload.run.driver).toBe("pi"); 362 397 expect(runPayload.run.executor).toBe("direct"); 363 398 expect(runPayload.result.status).toBe("complete"); 364 399 expect(runPayload.result.spawns).toHaveLength(2); ··· 486 521 expect(cancelPayload.runId).toBe(cancelRun.runId); 487 522 expect(cancelPayload.status).toBe("cancelled"); 488 523 524 + const waitCancelledOutput = await commandOutput( 525 + Command.make( 526 + "bun", 527 + "run", 528 + "packages/cli/src/bin/mill.ts", 529 + "wait", 530 + cancelRun.runId, 531 + "--timeout", 532 + "8", 533 + "--json", 534 + "--runs-dir", 535 + runsDirectory, 536 + ), 537 + ); 538 + 539 + const waitCancelled = Schema.decodeUnknownSync(StatusEnvelope)(waitCancelledOutput); 540 + expect(waitCancelled.status).toBe("cancelled"); 541 + 489 542 const waitCompleteOutput = await commandOutput( 490 543 Command.make( 491 544 "bun", ··· 524 577 525 578 expect(watchLines.length).toBeGreaterThan(0); 526 579 const watchTerminalCount = watchLines 527 - .map((line) => JSON.parse(line) as { readonly type: string }) 580 + .map((line) => Schema.decodeUnknownSync(EventTypeEnvelope)(line)) 528 581 .filter( 529 582 (event) => 530 583 event.type === "run:complete" || ··· 546 599 ), 547 600 ); 548 601 549 - const inspectedCancelled = Schema.decodeUnknownSync(InspectRunEnvelope)(inspectCancelledOutput); 602 + const inspectedCancelled = 603 + Schema.decodeUnknownSync(InspectRunEnvelope)(inspectCancelledOutput); 550 604 expect(inspectedCancelled.run.status).toBe("cancelled"); 551 605 expect( 552 606 inspectedCancelled.events.filter((event) => event.type === "run:cancelled"), ··· 610 664 id: runId, 611 665 status: "running", 612 666 programPath: "/tmp/program.ts", 613 - driver: "default", 667 + driver: "pi", 614 668 executor: "direct", 615 669 createdAt: "2026-02-23T20:00:00.000Z", 616 670 updatedAt: "2026-02-23T20:00:00.000Z",

+68

packages/core/src/domain/program-host.schema.ts

··· 1 + import * as Schema from "@effect/schema/Schema"; 2 + import { SpawnOptions } from "./spawn.schema"; 3 + 4 + export const ProgramHostProtocolPrefix = "__MILL_HOST__"; 5 + 6 + const RequestId = Schema.NonEmptyString; 7 + 8 + export const ProgramHostSpawnRequestMessage = Schema.Struct({ 9 + kind: Schema.Literal("request"), 10 + requestId: RequestId, 11 + requestType: Schema.Literal("spawn"), 12 + input: SpawnOptions, 13 + }); 14 + 15 + export const ProgramHostExtensionRequestMessage = Schema.Struct({ 16 + kind: Schema.Literal("request"), 17 + requestId: RequestId, 18 + requestType: Schema.Literal("extension"), 19 + extensionName: Schema.NonEmptyString, 20 + methodName: Schema.NonEmptyString, 21 + args: Schema.Array(Schema.Unknown), 22 + }); 23 + 24 + export const ProgramHostSuccessResultMessage = Schema.Struct({ 25 + kind: Schema.Literal("result"), 26 + ok: Schema.Literal(true), 27 + value: Schema.Unknown, 28 + }); 29 + 30 + export const ProgramHostFailureResultMessage = Schema.Struct({ 31 + kind: Schema.Literal("result"), 32 + ok: Schema.Literal(false), 33 + message: Schema.String, 34 + }); 35 + 36 + export const ProgramHostInboundMessage = Schema.Union( 37 + ProgramHostSpawnRequestMessage, 38 + ProgramHostExtensionRequestMessage, 39 + ProgramHostSuccessResultMessage, 40 + ProgramHostFailureResultMessage, 41 + ); 42 + 43 + export type ProgramHostInboundMessage = Schema.Schema.Type<typeof ProgramHostInboundMessage>; 44 + 45 + const ProgramHostInboundMessageJson = Schema.parseJson(ProgramHostInboundMessage); 46 + 47 + export const decodeProgramHostInboundMessage = Schema.decodeUnknown(ProgramHostInboundMessageJson); 48 + 49 + export const ProgramHostSuccessResponseMessage = Schema.Struct({ 50 + kind: Schema.Literal("response"), 51 + requestId: RequestId, 52 + ok: Schema.Literal(true), 53 + value: Schema.Unknown, 54 + }); 55 + 56 + export const ProgramHostFailureResponseMessage = Schema.Struct({ 57 + kind: Schema.Literal("response"), 58 + requestId: RequestId, 59 + ok: Schema.Literal(false), 60 + message: Schema.String, 61 + }); 62 + 63 + export const ProgramHostResponseMessage = Schema.Union( 64 + ProgramHostSuccessResponseMessage, 65 + ProgramHostFailureResponseMessage, 66 + ); 67 + 68 + export type ProgramHostResponseMessage = Schema.Schema.Type<typeof ProgramHostResponseMessage>;

+10 -6

packages/core/src/internal/engine.effect.test.ts

··· 418 418 419 419 const watchTier1Effect = Effect.scoped( 420 420 Stream.runCollect( 421 - Stream.takeUntil(engine.watch(runId), (event) => 422 - event.type === "run:complete" || 423 - event.type === "run:failed" || 424 - event.type === "run:cancelled", 421 + Stream.takeUntil( 422 + engine.watch(runId), 423 + (event) => 424 + event.type === "run:complete" || 425 + event.type === "run:failed" || 426 + event.type === "run:cancelled", 425 427 ), 426 428 ), 427 429 ); 428 430 429 - const watchRawEffect = Effect.scoped(Stream.runCollect(Stream.take(engine.watchRaw(runId), 2))); 431 + const watchRawEffect = Effect.scoped( 432 + Stream.runCollect(Stream.take(engine.watchRaw(runId), 2)), 433 + ); 430 434 431 435 const executionEffect = engine.runSync({ 432 436 runId, ··· 460 464 expect(rawEvents[0]).toContain("raw:scout"); 461 465 462 466 const eventsFile = await readFile(join(runsDirectory, runId, "events.ndjson"), "utf-8"); 463 - expect(eventsFile.includes("\"type\":\"final\"")).toBe(false); 467 + expect(eventsFile.includes('"type":"final"')).toBe(false); 464 468 } finally { 465 469 await rm(runsDirectory, { recursive: true, force: true }); 466 470 }

+21 -16

packages/core/src/internal/engine.effect.ts

··· 3 3 makeEventEnvelope, 4 4 type MillEvent, 5 5 type SpawnCompleteEvent, 6 - type SpawnErrorEvent, 7 6 type SpawnMilestoneEvent, 8 7 type SpawnStartEvent, 9 8 type SpawnToolCallEvent, ··· 112 111 RunSyncOutput["run"], 113 112 RunNotFoundError | PersistenceError | LifecycleInvariantError | WaitTimeoutError 114 113 >; 115 - readonly list: (status?: RunSyncOutput["run"]["status"]) => Effect.Effect< 116 - ReadonlyArray<RunSyncOutput["run"]>, 117 - PersistenceError 118 - >; 114 + readonly list: ( 115 + status?: RunSyncOutput["run"]["status"], 116 + ) => Effect.Effect<ReadonlyArray<RunSyncOutput["run"]>, PersistenceError>; 119 117 readonly watch: (runId: RunId) => Stream.Stream<MillEvent, RunNotFoundError | PersistenceError>; 120 118 readonly watchRaw: (runId: RunId) => Stream.Stream<string, RunNotFoundError | PersistenceError>; 121 119 readonly inspect: ( ··· 172 170 runId: RunId, 173 171 ): Effect.Effect<LifecycleGuardState, PersistenceError | LifecycleInvariantError> => 174 172 Effect.gen(function* () { 175 - const persistedEvents = yield* Effect.mapError( 176 - runStore.readEvents(runId), 177 - (error) => toPersistenceError(runId, error), 173 + const persistedEvents = yield* Effect.mapError(runStore.readEvents(runId), (error) => 174 + toPersistenceError(runId, error), 178 175 ); 179 176 180 177 let lifecycleState = initialLifecycleGuardState; ··· 952 949 ), 953 950 954 951 watchRaw: (runId) => 955 - Stream.unwrapScoped(Effect.zipRight(runStore.getRun(runId), Effect.succeed(watchRawLive(runId)))), 952 + Stream.unwrapScoped( 953 + Effect.zipRight(runStore.getRun(runId), Effect.succeed(watchRawLive(runId))), 954 + ), 956 955 957 956 inspect: (ref) => 958 957 Effect.gen(function* () { ··· 1011 1010 const sequenceRef = yield* Ref.make(maxSequence); 1012 1011 1013 1012 yield* Effect.catchTag( 1014 - appendTier1Event(lifecycleStateRef, sequenceRef, runStore, runId, (sequence, timestamp) => ({ 1015 - ...makeEventEnvelope(runId, sequence, timestamp), 1016 - type: "run:cancelled", 1017 - payload: { 1018 - reason, 1019 - }, 1020 - })), 1013 + appendTier1Event( 1014 + lifecycleStateRef, 1015 + sequenceRef, 1016 + runStore, 1017 + runId, 1018 + (sequence, timestamp) => ({ 1019 + ...makeEventEnvelope(runId, sequence, timestamp), 1020 + type: "run:cancelled", 1021 + payload: { 1022 + reason, 1023 + }, 1024 + }), 1025 + ), 1021 1026 "LifecycleInvariantError", 1022 1027 () => Effect.void, 1023 1028 );

+3 -1

packages/core/src/internal/observer-hub.effect.ts

··· 35 35 }); 36 36 37 37 export const publishTier1Event = (runId: string, event: MillEvent): Effect.Effect<void> => 38 - Effect.asVoid(Effect.flatMap(ensureTier1PubSub(runId), (pubSub) => PubSub.publish(pubSub, event))); 38 + Effect.asVoid( 39 + Effect.flatMap(ensureTier1PubSub(runId), (pubSub) => PubSub.publish(pubSub, event)), 40 + ); 39 41 40 42 export const publishRawEvent = (runId: string, raw: string): Effect.Effect<void> => 41 43 Effect.asVoid(Effect.flatMap(ensureRawPubSub(runId), (pubSub) => PubSub.publish(pubSub, raw)));

+6 -2

packages/core/src/internal/run-store.effect.ts

··· 47 47 readonly getResult: ( 48 48 runId: RunId, 49 49 ) => Effect.Effect<RunResult | undefined, RunNotFoundError | PersistenceError>; 50 - readonly listRuns: (status?: RunRecord["status"]) => Effect.Effect<ReadonlyArray<RunRecord>, PersistenceError>; 50 + readonly listRuns: ( 51 + status?: RunRecord["status"], 52 + ) => Effect.Effect<ReadonlyArray<RunRecord>, PersistenceError>; 51 53 } 52 54 53 55 export interface MakeRunStoreInput { ··· 241 243 return undefined; 242 244 } 243 245 244 - const maybeRun = yield* Effect.either(storeGetRun(input.runsDirectory, decodedRunId.right)); 246 + const maybeRun = yield* Effect.either( 247 + storeGetRun(input.runsDirectory, decodedRunId.right), 248 + ); 245 249 246 250 if (maybeRun._tag === "Left") { 247 251 return undefined;

+1

packages/core/src/program-host.effect.ts

··· 1 + export * from "./runtime/program-host.effect";

+99 -25

packages/core/src/public/config-loader.api.test.ts

··· 53 53 cwd: "/workspace/repo/app", 54 54 homeDirectory: "/Users/tester", 55 55 pathExists: async (path) => path === "/workspace/repo/app/mill.config.ts", 56 - loadConfigOverrides: async (path) => ({ 57 - authoringInstructions: `loaded:${path}`, 56 + loadConfigModule: async (path) => ({ 57 + default: { 58 + authoring: { 59 + instructions: `loaded:${path}`, 60 + }, 61 + }, 58 62 }), 59 63 }); 60 64 ··· 72 76 homeDirectory: "/Users/tester", 73 77 pathExists: async (path) => 74 78 path === "/workspace/repo/.jj" || path === "/workspace/repo/mill.config.ts", 75 - loadConfigOverrides: async (path) => ({ 76 - authoringInstructions: `loaded:${path}`, 79 + loadConfigModule: async (path) => ({ 80 + default: { 81 + authoring: { 82 + instructions: `loaded:${path}`, 83 + }, 84 + }, 77 85 }), 78 86 }); 79 87 ··· 88 96 cwd: "/workspace/repo/packages/cli", 89 97 homeDirectory: "/Users/tester", 90 98 pathExists: async (path) => path === "/Users/tester/.mill/config.ts", 91 - loadConfigOverrides: async (path) => ({ 92 - authoringInstructions: `loaded:${path}`, 99 + loadConfigModule: async (path) => ({ 100 + default: { 101 + authoring: { 102 + instructions: `loaded:${path}`, 103 + }, 104 + }, 93 105 }), 94 106 }); 95 107 ··· 120 132 path === "/workspace/repo/.jj" || 121 133 path === "/workspace/mill.config.ts" || 122 134 path === "/Users/tester/.mill/config.ts", 123 - loadConfigOverrides: async (path) => ({ 124 - authoringInstructions: `loaded:${path}`, 135 + loadConfigModule: async (path) => ({ 136 + default: { 137 + authoring: { 138 + instructions: `loaded:${path}`, 139 + }, 140 + }, 125 141 }), 126 142 }); 127 143 ··· 139 155 path === "/workspace/repo/.jj" || 140 156 path === "/workspace/mill.config.ts" || 141 157 path === "/Users/tester/.mill/config.ts", 142 - loadConfigOverrides: async (path) => ({ 143 - authoringInstructions: `loaded:${path}`, 158 + loadConfigModule: async (path) => ({ 159 + default: { 160 + authoring: { 161 + instructions: `loaded:${path}`, 162 + }, 163 + }, 144 164 }), 145 165 }); 146 166 ··· 155 175 homeDirectory: "/Users/tester", 156 176 pathExists: async (path) => 157 177 path === "/scratch/mill.config.ts" || path === "/Users/tester/.mill/config.ts", 158 - loadConfigOverrides: async (path) => ({ 159 - authoringInstructions: `loaded:${path}`, 178 + loadConfigModule: async (path) => ({ 179 + default: { 180 + authoring: { 181 + instructions: `loaded:${path}`, 182 + }, 183 + }, 160 184 }), 161 185 }); 162 186 ··· 165 189 expect(resolved.config.authoring.instructions).toBe("loaded:/Users/tester/.mill/config.ts"); 166 190 }); 167 191 168 - it("loads computed overrides from mill.config.ts const expressions", async () => { 192 + it("loads real TS module exports (drivers/executors/extensions) from mill.config.ts", async () => { 169 193 const tempDirectory = await mkdtemp(join(tmpdir(), "mill-config-loader-")); 170 194 const configPath = join(tempDirectory, "mill.config.ts"); 195 + const configLoaderPath = decodeURIComponent( 196 + new URL("./config-loader.api.ts", import.meta.url).pathname, 197 + ); 171 198 172 199 await writeFile( 173 200 configPath, 174 201 [ 175 - 'const instructions = [`Use systemPrompt for WHO.`, `Use prompt for WHAT.`].join(" ");', 176 - "export default {", 177 - ' defaultDriver: "pi-local" as const,', 178 - ' defaultExecutor: "vm" as const,', 179 - ' defaultModel: "openai/gpt-5.3-codex" as const,', 202 + `import { defineConfig, processDriver } from ${JSON.stringify(configLoaderPath)};`, 203 + "", 204 + 'const suffix = ["from", "module"].join("-");', 205 + "const extensionPrefix = `extension-${suffix}`;", 206 + "", 207 + "export default defineConfig({", 208 + ' defaultDriver: "module-driver",', 209 + ' defaultExecutor: "module-executor",', 210 + ' defaultModel: "provider/module-model",', 211 + " drivers: {", 212 + " 'module-driver': processDriver({", 213 + " description: `driver-${suffix}`,", 214 + ' modelFormat: "provider/model-id",', 215 + " process: {", 216 + ' command: "module-driver",', 217 + " args: [],", 218 + " env: {},", 219 + " },", 220 + " codec: {", 221 + ' modelCatalog: { _tag: "loaded-from-module" },', 222 + " },", 223 + " runtime: {", 224 + ' name: "module-driver",', 225 + " spawn: () => ({ kind: " + '"driver-runtime"' + " }),", 226 + " },", 227 + " }),", 228 + " },", 229 + " executors: {", 230 + " 'module-executor': {", 231 + ' description: "executor-from-module",', 232 + " runtime: {", 233 + ' name: "module-executor",', 234 + " runProgram: ({ execute }) => execute,", 235 + " },", 236 + " },", 237 + " },", 238 + " extensions: [", 239 + " {", 240 + ' name: "moduleTools",', 241 + " api: {", 242 + " echo: (...args) => `${extensionPrefix}:${String(args[0] ?? " + '""' + ")}`,", 243 + " },", 244 + " },", 245 + " ],", 180 246 " authoring: {", 181 - " instructions,", 247 + ' instructions: ["Use", "module", "config"].join(" "),', 182 248 " },", 183 - "};", 249 + "});", 184 250 ].join("\n"), 185 251 "utf-8", 186 252 ); ··· 194 260 195 261 expect(resolved.source).toBe("cwd"); 196 262 expect(resolved.configPath).toBe(configPath); 197 - expect(resolved.config.defaultDriver).toBe("pi-local"); 198 - expect(resolved.config.defaultExecutor).toBe("vm"); 199 - expect(resolved.config.defaultModel).toBe("openai/gpt-5.3-codex"); 200 - expect(resolved.config.authoring.instructions).toBe( 201 - "Use systemPrompt for WHO. Use prompt for WHAT.", 263 + expect(resolved.config.defaultDriver).toBe("module-driver"); 264 + expect(resolved.config.defaultExecutor).toBe("module-executor"); 265 + expect(resolved.config.defaultModel).toBe("provider/module-model"); 266 + expect(resolved.config.authoring.instructions).toBe("Use module config"); 267 + expect(Object.keys(resolved.config.drivers)).toContain("default"); 268 + expect(Object.keys(resolved.config.drivers)).toContain("module-driver"); 269 + expect(Object.keys(resolved.config.executors)).toContain("direct"); 270 + expect(Object.keys(resolved.config.executors)).toContain("module-executor"); 271 + expect(resolved.config.extensions[0]?.name).toBe("moduleTools"); 272 + expect(typeof resolved.config.drivers["module-driver"]?.runtime?.spawn).toBe("function"); 273 + expect(typeof resolved.config.executors["module-executor"]?.runtime.runProgram).toBe( 274 + "function", 202 275 ); 276 + expect(typeof resolved.config.extensions[0]?.api?.echo).toBe("function"); 203 277 } finally { 204 278 await rm(tempDirectory, { recursive: true, force: true }); 205 279 }

+119 -100

packages/core/src/public/config-loader.api.ts

··· 2 2 import * as BunContext from "@effect/platform-bun/BunContext"; 3 3 import { Effect, Runtime } from "effect"; 4 4 import type { 5 - ConfigOverrides, 5 + ConfigFileOverrides, 6 6 DriverRegistration, 7 7 MillConfig, 8 8 ResolvedConfig, ··· 20 20 const defaultPathExists = async (path: string): Promise<boolean> => 21 21 runWithBunContext(Effect.flatMap(FileSystem.FileSystem, (fileSystem) => fileSystem.exists(path))); 22 22 23 - const extractConfigString = (source: string, key: string): string | undefined => { 24 - const match = new RegExp(`${key}\\s*:\\s*["']([^"'\\n]+)["']`).exec(source); 25 - return match?.[1]; 26 - }; 27 - 28 - const extractConstStringValue = (source: string, identifier: string): string | undefined => { 29 - const escapedIdentifier = identifier.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 30 - const directStringMatch = new RegExp( 31 - `const\\s+${escapedIdentifier}\\s*=\\s*(["'\\"])(([\\s\\S]*?))\\1\\s*;?`, 32 - ).exec(source); 33 - 34 - if (directStringMatch !== null) { 35 - return directStringMatch[2]; 36 - } 37 - 38 - const joinedArrayMatch = new RegExp( 39 - `const\\s+${escapedIdentifier}\\s*=\\s*\\[([\\s\\S]*?)\\]\\.join\$(["'])((?:[\\s\\S]*?))\\2\$\\s*;?`, 40 - ).exec(source); 41 - 42 - if (joinedArrayMatch === null) { 43 - return undefined; 44 - } 45 - 46 - const values = Array.from(joinedArrayMatch[1].matchAll(/["'`]([^"'`]+)["'`]/g)).map( 47 - (match) => match[1], 48 - ); 49 - 50 - if (values.length === 0) { 51 - return undefined; 52 - } 53 - 54 - return values.join(joinedArrayMatch[3]); 55 - }; 56 - 57 - const extractAuthoringInstructions = (source: string): string | undefined => { 58 - const directInstructions = extractConfigString(source, "instructions"); 59 - 60 - if (directInstructions !== undefined) { 61 - return directInstructions; 62 - } 63 - 64 - const authoringBlockMatch = /authoring\s*:\s*\{([\s\S]*?)\}/.exec(source); 65 - 66 - if (authoringBlockMatch === null) { 67 - return undefined; 68 - } 69 - 70 - const authoringBlock = authoringBlockMatch[1]; 71 - const explicitIdentifierMatch = /instructions\s*:\s*([A-Za-z_$][\w$]*)/.exec(authoringBlock); 72 - 73 - if (explicitIdentifierMatch !== null) { 74 - return extractConstStringValue(source, explicitIdentifierMatch[1]); 75 - } 76 - 77 - const hasShorthandInstructions = /\binstructions\b\s*(?:,|$)/.test(authoringBlock); 78 - 79 - if (!hasShorthandInstructions) { 80 - return undefined; 81 - } 82 - 83 - return extractConstStringValue(source, "instructions"); 84 - }; 85 - 86 - const parseConfigOverridesFromText = (source: string): ConfigOverrides => ({ 87 - defaultDriver: extractConfigString(source, "defaultDriver"), 88 - defaultExecutor: extractConfigString(source, "defaultExecutor"), 89 - defaultModel: extractConfigString(source, "defaultModel"), 90 - authoringInstructions: extractAuthoringInstructions(source), 91 - }); 92 - 93 - const readConfigSource = async (path: string): Promise<string> => 94 - runWithBunContext( 95 - Effect.catchAll( 96 - Effect.flatMap(FileSystem.FileSystem, (fileSystem) => 97 - fileSystem.readFileString(path, "utf-8"), 98 - ), 99 - () => Effect.succeed(""), 100 - ), 101 - ); 102 - 103 - const defaultLoadConfigOverrides = async (path: string): Promise<ConfigOverrides> => { 104 - const source = await readConfigSource(path); 105 - 106 - return parseConfigOverridesFromText(source); 107 - }; 108 - 109 23 const normalizePath = (path: string): string => { 110 24 if (path.length <= 1) { 111 25 return path; ··· 156 70 } 157 71 }; 158 72 159 - const mergeConfig = (defaults: MillConfig, overrides: ConfigOverrides): MillConfig => ({ 160 - ...defaults, 161 - defaultDriver: overrides.defaultDriver ?? defaults.defaultDriver, 162 - defaultExecutor: overrides.defaultExecutor ?? defaults.defaultExecutor, 163 - defaultModel: overrides.defaultModel ?? defaults.defaultModel, 164 - authoring: { 165 - instructions: overrides.authoringInstructions ?? defaults.authoring.instructions, 166 - }, 167 - }); 168 - 169 73 const resolveConfigPath = async ( 170 74 cwd: string, 171 75 homeDirectory: string | undefined, ··· 224 128 return undefined; 225 129 }; 226 130 131 + const isRecord = (value: unknown): value is Record<string, unknown> => 132 + typeof value === "object" && value !== null; 133 + 134 + const hasConfigShape = (value: Record<string, unknown>): boolean => 135 + [ 136 + "defaultDriver", 137 + "defaultExecutor", 138 + "defaultModel", 139 + "drivers", 140 + "executors", 141 + "extensions", 142 + "authoring", 143 + ].some((key) => key in value); 144 + 145 + const readRecordField = ( 146 + value: Record<string, unknown>, 147 + key: string, 148 + ): Record<string, unknown> | undefined => { 149 + const field = value[key]; 150 + 151 + if (!isRecord(field)) { 152 + return undefined; 153 + } 154 + 155 + return field; 156 + }; 157 + 158 + const readStringField = (value: Record<string, unknown>, key: string): string | undefined => { 159 + const field = value[key]; 160 + return typeof field === "string" ? field : undefined; 161 + }; 162 + 163 + const toConfigOverrides = (value: Record<string, unknown>): ConfigFileOverrides => { 164 + const authoringRecord = readRecordField(value, "authoring"); 165 + 166 + return { 167 + defaultDriver: readStringField(value, "defaultDriver"), 168 + defaultExecutor: readStringField(value, "defaultExecutor"), 169 + defaultModel: readStringField(value, "defaultModel"), 170 + drivers: readRecordField(value, "drivers") as Readonly<Record<string, DriverRegistration>>, 171 + executors: readRecordField(value, "executors") as MillConfig["executors"], 172 + extensions: Array.isArray(value.extensions) 173 + ? (value.extensions as MillConfig["extensions"]) 174 + : undefined, 175 + authoring: { 176 + instructions: 177 + authoringRecord === undefined 178 + ? undefined 179 + : readStringField(authoringRecord, "instructions"), 180 + }, 181 + }; 182 + }; 183 + 184 + const toModuleSpecifier = (path: string, cwd: string): string => { 185 + if (path.startsWith("file://")) { 186 + return path; 187 + } 188 + 189 + if (path.startsWith("/")) { 190 + return new URL(path, "file://").href; 191 + } 192 + 193 + return new URL(path, `file://${normalizePath(cwd)}/`).href; 194 + }; 195 + 196 + const defaultLoadConfigModule = async (path: string): Promise<unknown> => { 197 + const moduleSpecifier = toModuleSpecifier(path, process.cwd()); 198 + // ast-grep-ignore: no-dynamic-import 199 + return import(moduleSpecifier); 200 + }; 201 + 202 + const extractConfigFromModule = (moduleValue: unknown): ConfigFileOverrides | undefined => { 203 + if (!isRecord(moduleValue)) { 204 + return undefined; 205 + } 206 + 207 + const candidateValues: ReadonlyArray<unknown> = [ 208 + moduleValue.default, 209 + moduleValue.config, 210 + moduleValue.millConfig, 211 + moduleValue, 212 + ]; 213 + 214 + for (const candidate of candidateValues) { 215 + if (!isRecord(candidate) || !hasConfigShape(candidate)) { 216 + continue; 217 + } 218 + 219 + return toConfigOverrides(candidate); 220 + } 221 + 222 + return undefined; 223 + }; 224 + 225 + const mergeConfig = (defaults: MillConfig, overrides: ConfigFileOverrides): MillConfig => ({ 226 + ...defaults, 227 + defaultDriver: overrides.defaultDriver ?? defaults.defaultDriver, 228 + defaultExecutor: overrides.defaultExecutor ?? defaults.defaultExecutor, 229 + defaultModel: overrides.defaultModel ?? defaults.defaultModel, 230 + drivers: { 231 + ...defaults.drivers, 232 + ...overrides.drivers, 233 + }, 234 + executors: { 235 + ...defaults.executors, 236 + ...overrides.executors, 237 + }, 238 + extensions: overrides.extensions ?? defaults.extensions, 239 + authoring: { 240 + instructions: overrides.authoring?.instructions ?? defaults.authoring.instructions, 241 + }, 242 + }); 243 + 227 244 export const defineConfig = <T extends MillConfig>(config: T): T => config; 228 245 229 246 export const processDriver = <T extends DriverRegistration>(driver: T): T => driver; ··· 232 249 const cwd = options.cwd ?? process.cwd(); 233 250 const homeDirectory = options.homeDirectory ?? process.env.HOME; 234 251 const pathExists = options.pathExists ?? defaultPathExists; 235 - const loadConfigOverrides = options.loadConfigOverrides ?? defaultLoadConfigOverrides; 252 + const loadConfigModule = options.loadConfigModule ?? defaultLoadConfigModule; 236 253 237 254 const resolvedPath = await resolveConfigPath(cwd, homeDirectory, pathExists); 238 255 ··· 243 260 }; 244 261 } 245 262 246 - const overrides = await loadConfigOverrides(resolvedPath.path); 263 + const loadedModule = await loadConfigModule(resolvedPath.path); 264 + const loadedConfig = extractConfigFromModule(loadedModule); 247 265 248 266 return { 249 267 source: resolvedPath.source, 250 268 configPath: resolvedPath.path, 251 - config: mergeConfig(options.defaults, overrides), 269 + config: 270 + loadedConfig === undefined ? options.defaults : mergeConfig(options.defaults, loadedConfig), 252 271 }; 253 272 };

+7 -3

packages/core/src/public/discovery.api.test.ts

··· 98 98 expect(payload.drivers.codex?.models).toEqual(["openai/gpt-5.3-codex"]); 99 99 }); 100 100 101 - it("applies authoring instructions from resolved config overrides", async () => { 101 + it("applies authoring instructions from resolved config module", async () => { 102 102 const payload = await createDiscoveryPayload({ 103 103 defaults: makeDefaults(), 104 104 cwd: "/repo", 105 105 homeDirectory: "/home/tester", 106 106 pathExists: async (path) => path === "/repo/mill.config.ts", 107 - loadConfigOverrides: async () => ({ 108 - authoringInstructions: "from-cwd-config", 107 + loadConfigModule: async () => ({ 108 + default: { 109 + authoring: { 110 + instructions: "from-cwd-config", 111 + }, 112 + }, 109 113 }), 110 114 }); 111 115

+19 -5

packages/core/src/public/run.api.test.ts

··· 2 2 import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; 3 3 import { tmpdir } from "node:os"; 4 4 import { join } from "node:path"; 5 + import * as Schema from "@effect/schema/Schema"; 5 6 import { Effect } from "effect"; 6 7 import { decodeMillEventJsonSync } from "../domain/event.schema"; 7 8 import { runProgramSync, runWorker } from "./run.api"; 8 9 import type { MillConfig } from "./types"; 10 + 11 + const ProgramResultEnvelope = Schema.parseJson( 12 + Schema.Struct({ 13 + note: Schema.optional(Schema.String), 14 + driver: Schema.optional(Schema.String), 15 + executor: Schema.optional(Schema.String), 16 + }), 17 + ); 9 18 10 19 const makeConfig = (): MillConfig => ({ 11 20 defaultDriver: "default", ··· 172 181 expect(output.run.executor).toBe("vm"); 173 182 expect(output.result.spawns[0]?.driver).toBe("codex"); 174 183 175 - const parsedProgramResult = JSON.parse(output.result.programResult ?? "{}") as { 176 - readonly note?: string; 177 - readonly driver?: string; 178 - readonly executor?: string; 179 - }; 184 + const parsedProgramResult = Schema.decodeUnknownSync(ProgramResultEnvelope)( 185 + output.result.programResult ?? "{}", 186 + ); 180 187 181 188 expect(parsedProgramResult.note).toBe("echo:hello"); 182 189 expect(parsedProgramResult.driver).toBe("codex"); ··· 190 197 .map((line) => decodeMillEventJsonSync(line).type); 191 198 192 199 expect(eventTypes.includes("extension:error")).toBe(true); 200 + 201 + const hostMarker = await readFile( 202 + join(output.run.paths.runDir, "program-host.marker"), 203 + "utf-8", 204 + ); 205 + expect(hostMarker).toContain("process-host:bun"); 206 + expect(hostMarker).toContain(`executor=${output.run.executor}`); 193 207 } finally { 194 208 await rm(tempDirectory, { recursive: true, force: true }); 195 209 }

+45 -102

packages/core/src/public/run.api.ts

··· 1 1 import * as FileSystem from "@effect/platform/FileSystem"; 2 2 import * as BunContext from "@effect/platform-bun/BunContext"; 3 3 import { Effect, Runtime, Stream } from "effect"; 4 - import { 5 - makeMillEngine, 6 - ProgramExecutionError, 7 - type InspectResult, 8 - } from "../engine.effect"; 4 + import { makeMillEngine, ProgramExecutionError, type InspectResult } from "../engine.effect"; 9 5 import { makeDriverRegistry } from "../driver-registry.effect"; 10 6 import { makeExecutorRegistry } from "../executor-registry.effect"; 11 7 import { ··· 15 11 type RunSyncOutput, 16 12 } from "../run.schema"; 17 13 import { runDetachedWorker } from "../worker.effect"; 18 - import { decodeSpawnOptions } from "../spawn.schema"; 14 + import { executeProgramInProcessHost } from "../program-host.effect"; 19 15 import { resolveConfig } from "./config-loader.api"; 20 16 import type { 21 - ConfigOverrides, 22 17 DriverSessionPointer, 23 18 ExecutorRuntime, 24 19 ExtensionRegistration, 25 20 ResolveConfigOptions, 26 - SpawnInput, 27 - SpawnOutput, 28 21 } from "./types"; 29 22 30 23 const runtime = Runtime.defaultRuntime; 31 24 32 - type ProgramRunner = () => Promise<unknown>; 33 - 34 - type AsyncFunctionConstructor = new (...args: ReadonlyArray<string>) => ProgramRunner; 35 - 36 - const ProgramAsyncFunction = Object.getPrototypeOf(async () => undefined) 37 - .constructor as AsyncFunctionConstructor; 38 - 39 - interface GlobalMillContext { 40 - mill?: { 41 - spawn: (input: SpawnInput) => Promise<SpawnOutput>; 42 - [name: string]: unknown; 43 - }; 44 - } 45 - 46 25 interface BaseRunInput extends ResolveConfigOptions { 47 26 readonly driverName?: string; 48 27 readonly executorName?: string; ··· 58 37 readonly waitTimeoutSeconds?: number; 59 38 } 60 39 61 - interface GetRunStatusInput extends Omit< 62 - ResolveConfigOptions, 63 - "pathExists" | "loadConfigOverrides" 64 - > { 40 + interface GetRunStatusInput extends Omit<ResolveConfigOptions, "pathExists" | "loadConfigModule"> { 65 41 readonly runId: string; 66 42 readonly driverName?: string; 67 43 readonly executorName?: string; 68 44 readonly runsDirectory?: string; 69 45 readonly pathExists?: (path: string) => Promise<boolean>; 70 - readonly loadConfigOverrides?: (path: string) => Promise<ConfigOverrides>; 46 + readonly loadConfigModule?: (path: string) => Promise<unknown>; 71 47 } 72 48 73 49 export interface WaitForRunInput extends GetRunStatusInput { ··· 173 149 }), 174 150 ); 175 151 176 - const toExtensionApiBridge = ( 177 - extensions: ReadonlyArray<ExtensionRegistration>, 178 - ): Readonly<Record<string, unknown>> => 179 - Object.fromEntries( 180 - extensions 181 - .filter((extension) => extension.api !== undefined) 182 - .map((extension) => { 183 - const api = extension.api ?? {}; 184 - 185 - return [ 186 - extension.name, 187 - Object.fromEntries( 188 - Object.entries(api).map(([methodName, method]) => [ 189 - methodName, 190 - (...args: ReadonlyArray<unknown>) => 191 - Runtime.runPromise(runtime)(Effect.provide(method(...args), BunContext.layer)), 192 - ]), 193 - ), 194 - ] as const; 195 - }), 196 - ); 197 - 198 - const executeProgramWithInjectedMill = ( 199 - programSource: string, 200 - spawn: (input: SpawnInput) => Effect.Effect<SpawnOutput, unknown>, 201 - extensions: ReadonlyArray<ExtensionRegistration>, 202 - ): Effect.Effect<unknown, ProgramExecutionError> => 203 - Effect.tryPromise({ 204 - try: async () => { 205 - const globalContext = globalThis as GlobalMillContext; 206 - const previousMill = globalContext.mill; 207 - const programRunner = new ProgramAsyncFunction(programSource); 208 - const extensionApiBridge = toExtensionApiBridge(extensions); 209 - 210 - globalContext.mill = { 211 - spawn: async (input) => { 212 - const decodedInput = await Runtime.runPromise(runtime)(decodeSpawnOptions(input)); 213 - return Runtime.runPromise(runtime)(Effect.provide(spawn(decodedInput), BunContext.layer)); 214 - }, 215 - ...extensionApiBridge, 216 - }; 217 - 218 - try { 219 - return await programRunner(); 220 - } finally { 221 - if (previousMill === undefined) { 222 - delete globalContext.mill; 223 - } else { 224 - globalContext.mill = previousMill; 225 - } 226 - } 227 - }, 228 - catch: (error) => 229 - new ProgramExecutionError({ 230 - runId: "pending", 231 - message: String(error), 232 - }), 233 - }); 234 - 235 152 const makeEngineForConfig = async (input: BaseRunInput): Promise<EngineContext> => { 236 153 const cwd = input.cwd ?? process.cwd(); 237 154 const resolvedConfig = await resolveConfig(input); ··· 268 185 }; 269 186 }; 270 187 271 - const parseInspectRef = (ref: string): { runId: string; spawnId?: string } => { 188 + const parseInspectRef = ( 189 + ref: string, 190 + ): 191 + | { 192 + runId: string; 193 + spawnId?: string; 194 + } 195 + | undefined => { 272 196 const [runIdPart, spawnIdPart] = ref.split("."); 273 197 274 198 if (runIdPart === undefined || runIdPart.length === 0) { 275 - throw new Error("inspect reference requires a runId"); 199 + return undefined; 276 200 } 277 201 278 202 if (spawnIdPart === undefined || spawnIdPart.length === 0) { ··· 339 263 driverName: input.driverName, 340 264 executorName: input.executorName, 341 265 pathExists: input.pathExists, 342 - loadConfigOverrides: input.loadConfigOverrides, 266 + loadConfigModule: input.loadConfigModule, 343 267 }); 344 268 345 269 const engineContext = await makeEngineForConfig(input); ··· 348 272 ); 349 273 350 274 if (result === undefined) { 351 - throw new Error(`Run ${submittedRun.id} completed without persisted result.`); 275 + return Promise.reject(new Error(`Run ${submittedRun.id} completed without persisted result.`)); 352 276 } 353 277 354 278 return { ··· 374 298 engineContext.selectedExecutorRuntime.runProgram({ 375 299 runId: input.runId, 376 300 programPath, 377 - execute: executeProgramWithInjectedMill( 301 + execute: executeProgramInProcessHost({ 302 + runId: input.runId, 303 + runDirectory: joinPath(engineContext.runsDirectory, input.runId), 304 + workingDirectory: cwd, 305 + programPath, 378 306 programSource, 307 + executorName: engineContext.selectedExecutorName, 308 + extensions: engineContext.selectedExtensions, 379 309 spawn, 380 - engineContext.selectedExtensions, 381 - ), 310 + }), 382 311 }), 383 312 (error) => 384 313 new ProgramExecutionError({ ··· 411 340 return waitOutcome.right; 412 341 } 413 342 414 - throw waitOutcome.left; 343 + return Promise.reject(waitOutcome.left); 415 344 }; 416 345 417 346 export const watchRun = async (input: WatchRunInput): Promise<void> => { ··· 438 367 await runWithBunContext( 439 368 Effect.scoped( 440 369 Stream.runForEach( 441 - Stream.takeUntil(engineContext.engine.watch(runId), (event) => isRunTerminalEvent(event.type)), 370 + Stream.takeUntil(engineContext.engine.watch(runId), (event) => 371 + isRunTerminalEvent(event.type), 372 + ), 442 373 (event) => 443 374 Effect.sync(() => { 444 375 input.onEvent(JSON.stringify(event)); ··· 452 383 input: InspectRunInput, 453 384 ): Promise<InspectResult | InspectSessionOutput> => { 454 385 const parsedRef = parseInspectRef(input.ref); 386 + 387 + if (parsedRef === undefined) { 388 + return Promise.reject(new Error("inspect reference requires a runId")); 389 + } 390 + 455 391 const engineContext = await makeEngineForConfig(input); 456 392 const inspected = await runWithBunContext( 457 393 engineContext.engine.inspect({ 458 394 runId: decodeRunIdSync(parsedRef.runId), 459 - spawnId: 460 - parsedRef.spawnId === undefined ? undefined : decodeSpawnIdSync(parsedRef.spawnId), 395 + spawnId: parsedRef.spawnId === undefined ? undefined : decodeSpawnIdSync(parsedRef.spawnId), 461 396 }), 462 397 ); 463 398 ··· 466 401 } 467 402 468 403 if (inspected.kind !== "spawn" || inspected.result === undefined) { 469 - throw new Error("inspect --session requires a runId.spawnId reference with completed spawn result"); 404 + return Promise.reject( 405 + new Error("inspect --session requires a runId.spawnId reference with completed spawn result"), 406 + ); 470 407 } 471 408 472 409 const resolvedConfig = await resolveConfig(input); ··· 474 411 defaultDriver: resolvedConfig.config.defaultDriver, 475 412 drivers: resolvedConfig.config.drivers, 476 413 }); 477 - const run = await runWithBunContext(engineContext.engine.status(decodeRunIdSync(parsedRef.runId))); 414 + const run = await runWithBunContext( 415 + engineContext.engine.status(decodeRunIdSync(parsedRef.runId)), 416 + ); 478 417 const resolvedDriver = await Runtime.runPromise(runtime)(driverRegistry.resolve(run.driver)); 479 418 480 419 if (resolvedDriver.runtime.resolveSession === undefined) { 481 - throw new Error(`Driver ${resolvedDriver.name} does not support session inspection`); 420 + return Promise.reject( 421 + new Error(`Driver ${resolvedDriver.name} does not support session inspection`), 422 + ); 482 423 } 483 424 484 425 const sessionPointer = await Runtime.runPromise(runtime)( ··· 497 438 } satisfies InspectSessionOutput; 498 439 }; 499 440 500 - export const cancelRun = async (input: CancelRunInput): Promise<{ 441 + export const cancelRun = async ( 442 + input: CancelRunInput, 443 + ): Promise<{ 501 444 runId: string; 502 445 status: RunRecord["status"]; 503 446 alreadyTerminal: boolean;

+8 -3

packages/core/src/public/types.ts

··· 162 162 163 163 export type ConfigSource = "cwd" | "upward" | "home" | "defaults"; 164 164 165 - export interface ConfigOverrides { 165 + export interface ConfigFileOverrides { 166 166 readonly defaultDriver?: string; 167 167 readonly defaultExecutor?: string; 168 168 readonly defaultModel?: string; 169 - readonly authoringInstructions?: string; 169 + readonly drivers?: Readonly<Record<string, DriverRegistration>>; 170 + readonly executors?: Readonly<Record<string, ExecutorRegistration>>; 171 + readonly extensions?: ReadonlyArray<ExtensionRegistration>; 172 + readonly authoring?: { 173 + readonly instructions?: string; 174 + }; 170 175 } 171 176 172 177 export interface ResolvedConfig { ··· 180 185 readonly cwd?: string; 181 186 readonly homeDirectory?: string; 182 187 readonly pathExists?: (path: string) => Promise<boolean>; 183 - readonly loadConfigOverrides?: (path: string) => Promise<ConfigOverrides>; 188 + readonly loadConfigModule?: (path: string) => Promise<unknown>; 184 189 }

+464

packages/core/src/runtime/program-host.effect.ts

··· 1 + import * as Command from "@effect/platform/Command"; 2 + import * as FileSystem from "@effect/platform/FileSystem"; 3 + import { Cause, Data, Effect, Exit, Fiber, Queue, Ref, Stream } from "effect"; 4 + import { 5 + ProgramHostProtocolPrefix, 6 + decodeProgramHostInboundMessage, 7 + type ProgramHostInboundMessage, 8 + type ProgramHostResponseMessage, 9 + } from "../domain/program-host.schema"; 10 + import type { SpawnOptions, SpawnResult } from "../domain/spawn.schema"; 11 + import type { ExtensionRegistration } from "../public/types"; 12 + 13 + export class ProgramHostError extends Data.TaggedError("ProgramHostError")<{ 14 + runId: string; 15 + message: string; 16 + }> {} 17 + 18 + export interface ExecuteProgramInProcessHostInput { 19 + readonly runId: string; 20 + readonly runDirectory: string; 21 + readonly workingDirectory: string; 22 + readonly programPath: string; 23 + readonly programSource: string; 24 + readonly executorName: string; 25 + readonly extensions: ReadonlyArray<ExtensionRegistration>; 26 + readonly spawn: (input: SpawnOptions) => Effect.Effect<SpawnResult, unknown>; 27 + } 28 + 29 + type ProgramHostResultMessage = Extract<ProgramHostInboundMessage, { readonly kind: "result" }>; 30 + 31 + type ExtensionApiMethod = (...args: ReadonlyArray<unknown>) => Effect.Effect<unknown, unknown>; 32 + 33 + const textEncoder = new TextEncoder(); 34 + 35 + const normalizePath = (path: string): string => { 36 + if (path.length <= 1) { 37 + return path; 38 + } 39 + 40 + return path.endsWith("/") ? path.slice(0, -1) : path; 41 + }; 42 + 43 + const joinPath = (base: string, child: string): string => 44 + normalizePath(base) === "/" ? `/${child}` : `${normalizePath(base)}/${child}`; 45 + 46 + const toMessage = (error: unknown): string => { 47 + if (error instanceof Error) { 48 + return error.message; 49 + } 50 + 51 + return String(error); 52 + }; 53 + 54 + const buildExtensionApiLookup = ( 55 + extensions: ReadonlyArray<ExtensionRegistration>, 56 + ): ReadonlyMap<string, Readonly<Record<string, ExtensionApiMethod>>> => 57 + new Map( 58 + extensions 59 + .filter((extension) => extension.api !== undefined) 60 + .map( 61 + (extension) => 62 + [extension.name, extension.api as Readonly<Record<string, ExtensionApiMethod>>] as const, 63 + ), 64 + ); 65 + 66 + const buildExtensionSpecs = (extensions: ReadonlyArray<ExtensionRegistration>) => 67 + extensions 68 + .filter((extension) => extension.api !== undefined) 69 + .map((extension) => ({ 70 + name: extension.name, 71 + methods: Object.keys(extension.api ?? {}), 72 + })); 73 + 74 + const createProgramHostSource = ( 75 + input: Pick<ExecuteProgramInProcessHostInput, "executorName" | "programSource" | "extensions">, 76 + ): string => { 77 + const extensionSpecs = JSON.stringify(buildExtensionSpecs(input.extensions)); 78 + const protocolPrefix = JSON.stringify(ProgramHostProtocolPrefix); 79 + const executorName = JSON.stringify(input.executorName); 80 + 81 + return [ 82 + `const __millProtocolPrefix = ${protocolPrefix};`, 83 + `const __millExecutorName = ${executorName};`, 84 + `const __millExtensionSpecs = ${extensionSpecs};`, 85 + "globalThis.__millExecutorName = __millExecutorName;", 86 + "", 87 + "const __millPending = new Map();", 88 + "let __millRequestCounter = 0;", 89 + 'let __millStdinBuffer = "";', 90 + "", 91 + "const __millSend = (message) => {", 92 + ' process.stdout.write(__millProtocolPrefix + JSON.stringify(message) + "\\n");', 93 + "};", 94 + "", 95 + "const __millResolveResponse = (message) => {", 96 + ' if (message.kind !== "response") {', 97 + " return;", 98 + " }", 99 + "", 100 + " const pending = __millPending.get(message.requestId);", 101 + "", 102 + " if (pending === undefined) {", 103 + " return;", 104 + " }", 105 + "", 106 + " __millPending.delete(message.requestId);", 107 + "", 108 + " if (message.ok === true) {", 109 + " pending.resolve(message.value);", 110 + " return;", 111 + " }", 112 + "", 113 + ' pending.reject(new Error(String(message.message ?? "program host request failed")));', 114 + "};", 115 + "", 116 + 'process.stdin.setEncoding("utf8");', 117 + 'process.stdin.on("data", (chunk) => {', 118 + " __millStdinBuffer += chunk;", 119 + "", 120 + " while (true) {", 121 + ' const newlineIndex = __millStdinBuffer.indexOf("\\n");', 122 + "", 123 + " if (newlineIndex < 0) {", 124 + " break;", 125 + " }", 126 + "", 127 + " const line = __millStdinBuffer.slice(0, newlineIndex).trim();", 128 + " __millStdinBuffer = __millStdinBuffer.slice(newlineIndex + 1);", 129 + "", 130 + " if (line.length === 0) {", 131 + " continue;", 132 + " }", 133 + "", 134 + " try {", 135 + " __millResolveResponse(JSON.parse(line));", 136 + " } catch (_error) {", 137 + " // Ignore malformed parent responses.", 138 + " }", 139 + " }", 140 + "});", 141 + "", 142 + "const __millCallHost = (request) =>", 143 + " new Promise((resolve, reject) => {", 144 + " __millRequestCounter += 1;", 145 + " const requestId = `req_${__millRequestCounter}`;", 146 + "", 147 + " __millPending.set(requestId, { resolve, reject });", 148 + " __millSend({", 149 + ' kind: "request",', 150 + " requestId,", 151 + " ...request,", 152 + " });", 153 + " });", 154 + "", 155 + "const __millApi = {", 156 + " spawn: (input) =>", 157 + " __millCallHost({", 158 + ' requestType: "spawn",', 159 + " input,", 160 + " }),", 161 + "};", 162 + "", 163 + "for (const extension of __millExtensionSpecs) {", 164 + " const extensionApi = {};", 165 + "", 166 + " for (const methodName of extension.methods) {", 167 + " extensionApi[methodName] = (...args) =>", 168 + " __millCallHost({", 169 + ' requestType: "extension",', 170 + " extensionName: extension.name,", 171 + " methodName,", 172 + " args,", 173 + " });", 174 + " }", 175 + "", 176 + " __millApi[extension.name] = extensionApi;", 177 + "}", 178 + "", 179 + "globalThis.mill = __millApi;", 180 + "", 181 + "const __millProgram = async () => {", 182 + input.programSource, 183 + "};", 184 + "", 185 + "const __millRun = async () => {", 186 + " try {", 187 + " const value = await __millProgram();", 188 + "", 189 + " __millSend({", 190 + ' kind: "result",', 191 + " ok: true,", 192 + " value,", 193 + " });", 194 + " } catch (error) {", 195 + " __millSend({", 196 + ' kind: "result",', 197 + " ok: false,", 198 + " message: error instanceof Error ? error.message : String(error),", 199 + " });", 200 + " } finally {", 201 + " process.stdin.pause();", 202 + " }", 203 + "};", 204 + "", 205 + "await __millRun();", 206 + "", 207 + ].join("\n"); 208 + }; 209 + 210 + const encodeResponse = (response: ProgramHostResponseMessage): Uint8Array => 211 + textEncoder.encode(`${JSON.stringify(response)}\n`); 212 + 213 + const sendResponse = ( 214 + queue: Queue.Queue<Uint8Array>, 215 + response: ProgramHostResponseMessage, 216 + ): Effect.Effect<void> => Effect.asVoid(Queue.offer(queue, encodeResponse(response))); 217 + 218 + const summarizeCause = (cause: Exit.Exit<unknown, unknown>["cause"]): string => Cause.pretty(cause); 219 + 220 + const extensionMessage = (stderrLines: ReadonlyArray<string>): string => { 221 + if (stderrLines.length === 0) { 222 + return ""; 223 + } 224 + 225 + return `\nstderr:\n${stderrLines.join("\n")}`; 226 + }; 227 + 228 + const completeResult = ( 229 + resultRef: Ref.Ref<ProgramHostResultMessage | undefined>, 230 + result: ProgramHostResultMessage, 231 + ): Effect.Effect<void> => 232 + Ref.update(resultRef, (current) => { 233 + if (current !== undefined) { 234 + return current; 235 + } 236 + 237 + return result; 238 + }); 239 + 240 + export const executeProgramInProcessHost = ( 241 + input: ExecuteProgramInProcessHostInput, 242 + ): Effect.Effect<unknown, ProgramHostError> => 243 + Effect.scoped( 244 + Effect.gen(function* () { 245 + const fileSystem = yield* FileSystem.FileSystem; 246 + const runDirectory = normalizePath(input.runDirectory); 247 + const markerPath = joinPath(runDirectory, "program-host.marker"); 248 + const hostProgramPath = joinPath(runDirectory, "program-host.ts"); 249 + const extensionLookup = buildExtensionApiLookup(input.extensions); 250 + const protocolResultRef = yield* Ref.make<ProgramHostResultMessage | undefined>(undefined); 251 + const stderrLinesRef = yield* Ref.make<ReadonlyArray<string>>([]); 252 + 253 + yield* Effect.mapError( 254 + fileSystem.makeDirectory(runDirectory, { recursive: true }), 255 + (error) => 256 + new ProgramHostError({ 257 + runId: input.runId, 258 + message: `Unable to ensure run directory ${runDirectory}: ${toMessage(error)}`, 259 + }), 260 + ); 261 + 262 + yield* Effect.mapError( 263 + fileSystem.writeFileString( 264 + markerPath, 265 + [ 266 + "process-host:bun", 267 + `runId=${input.runId}`, 268 + `executor=${input.executorName}`, 269 + `programPath=${input.programPath}`, 270 + ].join("\n"), 271 + ), 272 + (error) => 273 + new ProgramHostError({ 274 + runId: input.runId, 275 + message: `Unable to write program host marker: ${toMessage(error)}`, 276 + }), 277 + ); 278 + 279 + yield* Effect.mapError( 280 + fileSystem.writeFileString(hostProgramPath, createProgramHostSource(input)), 281 + (error) => 282 + new ProgramHostError({ 283 + runId: input.runId, 284 + message: `Unable to write program host script: ${toMessage(error)}`, 285 + }), 286 + ); 287 + 288 + const command = Command.make(process.execPath, "run", hostProgramPath).pipe( 289 + Command.workingDirectory(input.workingDirectory), 290 + Command.stdin("pipe"), 291 + Command.stdout("pipe"), 292 + Command.stderr("pipe"), 293 + ); 294 + 295 + const processHandle = yield* Effect.mapError( 296 + Command.start(command), 297 + (error) => 298 + new ProgramHostError({ 299 + runId: input.runId, 300 + message: `Unable to start Bun program host: ${toMessage(error)}`, 301 + }), 302 + ); 303 + 304 + const responseQueue = yield* Queue.unbounded<Uint8Array>(); 305 + 306 + const stdinFiber = yield* Effect.forkScoped( 307 + Stream.run(Stream.fromQueue(responseQueue, { shutdown: true }), processHandle.stdin), 308 + ); 309 + 310 + const stdoutFiber = yield* Effect.forkScoped( 311 + Stream.runForEach(Stream.splitLines(Stream.decodeText(processHandle.stdout)), (line) => 312 + Effect.gen(function* () { 313 + if (!line.startsWith(ProgramHostProtocolPrefix)) { 314 + return; 315 + } 316 + 317 + const protocolPayload = line.slice(ProgramHostProtocolPrefix.length); 318 + const decoded = yield* Effect.either(decodeProgramHostInboundMessage(protocolPayload)); 319 + 320 + if (decoded._tag === "Left") { 321 + yield* completeResult(protocolResultRef, { 322 + kind: "result", 323 + ok: false, 324 + message: `Malformed program host payload: ${toMessage(decoded.left)}`, 325 + }); 326 + yield* Effect.ignore(processHandle.kill("SIGTERM")); 327 + return; 328 + } 329 + 330 + const message = decoded.right; 331 + 332 + if (message.kind === "result") { 333 + yield* completeResult(protocolResultRef, message); 334 + return; 335 + } 336 + 337 + if (message.requestType === "spawn") { 338 + const spawnExit = yield* Effect.exit(input.spawn(message.input)); 339 + 340 + if (Exit.isSuccess(spawnExit)) { 341 + yield* sendResponse(responseQueue, { 342 + kind: "response", 343 + requestId: message.requestId, 344 + ok: true, 345 + value: spawnExit.value, 346 + }); 347 + return; 348 + } 349 + 350 + yield* sendResponse(responseQueue, { 351 + kind: "response", 352 + requestId: message.requestId, 353 + ok: false, 354 + message: summarizeCause(spawnExit.cause), 355 + }); 356 + return; 357 + } 358 + 359 + const extensionApi = extensionLookup.get(message.extensionName); 360 + const method = extensionApi?.[message.methodName]; 361 + 362 + if (method === undefined) { 363 + yield* sendResponse(responseQueue, { 364 + kind: "response", 365 + requestId: message.requestId, 366 + ok: false, 367 + message: `Unknown extension api ${message.extensionName}.${message.methodName}`, 368 + }); 369 + return; 370 + } 371 + 372 + const methodExit = yield* Effect.exit(method(...message.args)); 373 + 374 + if (Exit.isSuccess(methodExit)) { 375 + yield* sendResponse(responseQueue, { 376 + kind: "response", 377 + requestId: message.requestId, 378 + ok: true, 379 + value: methodExit.value, 380 + }); 381 + return; 382 + } 383 + 384 + yield* sendResponse(responseQueue, { 385 + kind: "response", 386 + requestId: message.requestId, 387 + ok: false, 388 + message: summarizeCause(methodExit.cause), 389 + }); 390 + }), 391 + ), 392 + ); 393 + 394 + const stderrFiber = yield* Effect.forkScoped( 395 + Stream.runForEach(Stream.splitLines(Stream.decodeText(processHandle.stderr)), (line) => 396 + Ref.update(stderrLinesRef, (lines) => [...lines, line]), 397 + ), 398 + ); 399 + 400 + const exitCode = yield* Effect.mapError( 401 + processHandle.exitCode, 402 + (error) => 403 + new ProgramHostError({ 404 + runId: input.runId, 405 + message: `Program host process failed before completion: ${toMessage(error)}`, 406 + }), 407 + ); 408 + 409 + yield* Queue.shutdown(responseQueue); 410 + yield* Effect.ignore(Fiber.join(stdinFiber)); 411 + 412 + yield* Effect.mapError( 413 + Fiber.join(stdoutFiber), 414 + (error) => 415 + new ProgramHostError({ 416 + runId: input.runId, 417 + message: `Program host stdout processing failed: ${toMessage(error)}`, 418 + }), 419 + ); 420 + 421 + yield* Effect.mapError( 422 + Fiber.join(stderrFiber), 423 + (error) => 424 + new ProgramHostError({ 425 + runId: input.runId, 426 + message: `Program host stderr processing failed: ${toMessage(error)}`, 427 + }), 428 + ); 429 + 430 + const stderrLines = yield* Ref.get(stderrLinesRef); 431 + const protocolResult = yield* Ref.get(protocolResultRef); 432 + 433 + if (protocolResult === undefined) { 434 + return yield* Effect.fail( 435 + new ProgramHostError({ 436 + runId: input.runId, 437 + message: `Program host exited without result (exitCode=${exitCode}).${extensionMessage( 438 + stderrLines, 439 + )}`, 440 + }), 441 + ); 442 + } 443 + 444 + if (protocolResult.ok === false) { 445 + return yield* Effect.fail( 446 + new ProgramHostError({ 447 + runId: input.runId, 448 + message: `${protocolResult.message}${extensionMessage(stderrLines)}`, 449 + }), 450 + ); 451 + } 452 + 453 + if (exitCode !== 0) { 454 + return yield* Effect.fail( 455 + new ProgramHostError({ 456 + runId: input.runId, 457 + message: `Program host exited with code ${exitCode}.${extensionMessage(stderrLines)}`, 458 + }), 459 + ); 460 + } 461 + 462 + return protocolResult.value; 463 + }), 464 + );

+1 -1

packages/driver-claude/src/public/index.api.test.ts

··· 18 18 const driver = createClaudeDriverRegistration(); 19 19 20 20 if (driver.runtime === undefined) { 21 - throw new Error("driver runtime is required"); 21 + return; 22 22 } 23 23 24 24 const output = await Runtime.runPromise(runtime)(

+1 -1

packages/driver-codex/src/public/index.api.test.ts

··· 18 18 const driver = createCodexDriverRegistration(); 19 19 20 20 if (driver.runtime === undefined) { 21 - throw new Error("driver runtime is required"); 21 + return; 22 22 } 23 23 24 24 const output = await Runtime.runPromise(runtime)(

+6 -3

packages/driver-pi/src/public/index.api.test.ts

··· 58 58 expect(driver.runtime).toBeDefined(); 59 59 60 60 if (driver.runtime === undefined) { 61 - throw new Error("driver runtime is required"); 61 + return; 62 62 } 63 63 64 64 expect(driver.runtime.resolveSession).toBeDefined(); 65 65 66 66 if (driver.runtime.resolveSession === undefined) { 67 - throw new Error("resolveSession bridge is required"); 67 + return; 68 68 } 69 69 70 70 const session = await Runtime.runPromise(runtime)( 71 - Effect.provide(driver.runtime.resolveSession({ sessionRef: "session/scout" }), BunContext.layer), 71 + Effect.provide( 72 + driver.runtime.resolveSession({ sessionRef: "session/scout" }), 73 + BunContext.layer, 74 + ), 72 75 ); 73 76 74 77 expect(session.sessionRef).toBe("session/scout");

+58

packages/pi-mill/README.md

··· 1 + # @mill/pi-mill 2 + 3 + A pi extension that provides the same `subagent` tool + TUI monitor workflow as your existing setup, but executes each child task through **mill** (`mill run --sync --json`) instead of spawning `pi` directly. 4 + 5 + ## What stays the same 6 + 7 + - `subagent` tool contract (`task` + `code`) 8 + - Program-mode orchestration with `factory.spawn(...)` 9 + - Async return (immediate run id, completion notification) 10 + - `/mill` overlay monitor 11 + - `pi --mill` standalone monitor 12 + - Status widget + batched completion notifications 13 + - Bundled skills (`mill-basics`, `mill-ralph-loop`, `mill-worktree`) 14 + 15 + ## What changed 16 + 17 + - Child execution is now delegated to `mill`. 18 + - Each `factory.spawn(...)` compiles to a tiny temporary mill program with one `mill.spawn(...)` call. 19 + - Driver/executor/model behavior comes from your mill defaults and config resolution. 20 + 21 + ## Install as a pi package 22 + 23 + ```bash 24 + pi install /absolute/path/to/mill/packages/pi-mill 25 + ``` 26 + 27 + (or add as a local package in your pi settings). 28 + 29 + ## Mill prerequisites 30 + 31 + 1. `mill` must be on your `PATH` (or configure a custom command below). 32 + 2. Configure your global/project `mill.config.ts` with real drivers/executors as needed. 33 + 34 + ## Extension config 35 + 36 + Edit `index.ts`: 37 + 38 + ```ts 39 + export const config = { 40 + maxDepth: 1, 41 + millCommand: "mill", 42 + millArgs: [], 43 + millRunsDir: undefined, 44 + prompt: "...optional extra guidance for the tool description...", 45 + }; 46 + ``` 47 + 48 + - `maxDepth`: subagent nesting limit (`PI_FACTORY_DEPTH` guard) 49 + - `millCommand`: executable name/path for mill 50 + - `millArgs`: extra args prepended to every mill invocation 51 + - `millRunsDir`: optional override for `--runs-dir` 52 + - `prompt`: additional model/tool guidance appended to tool description 53 + 54 + ## Notes 55 + 56 + - Cancelling via `/mill` or `pi --mill` still works (PID-based). 57 + - `ExecutionResult.sessionPath` now contains mill driver `sessionRef` when available. 58 + - This package intentionally keeps the old UX while switching execution backend to mill.

+30

packages/pi-mill/contract.ts

··· 1 + import { Type, type Static } from "@sinclair/typebox"; 2 + import { FactoryError } from "./errors.js"; 3 + 4 + export const SubagentSchema = Type.Object({ 5 + task: Type.String({ description: "Label/description for this program run." }), 6 + code: Type.String({ 7 + description: 8 + "TypeScript script using the `factory` global. Use factory.spawn() to orchestrate agents. The script runs as a top-level module — use await and Promise.all directly.", 9 + }), 10 + }); 11 + 12 + export type SubagentParams = Static<typeof SubagentSchema>; 13 + 14 + export function validateParams(params: SubagentParams): SubagentParams { 15 + if (!params.task?.trim()) { 16 + throw new FactoryError({ 17 + code: "INVALID_INPUT", 18 + message: "'task' is required.", 19 + recoverable: true, 20 + }); 21 + } 22 + if (!params.code?.trim()) { 23 + throw new FactoryError({ 24 + code: "INVALID_INPUT", 25 + message: "'code' is required and must be non-empty.", 26 + recoverable: true, 27 + }); 28 + } 29 + return params; 30 + }

+40

packages/pi-mill/errors.ts

··· 1 + export type ErrorCode = 2 + | "INVALID_INPUT" 3 + | "MODEL_NOT_FOUND" 4 + | "CANCELLED" 5 + | "RUNTIME" 6 + | "CONFIRMATION_REJECTED"; 7 + 8 + export interface ThreadRef { 9 + runId: string; 10 + taskId?: string; 11 + step?: number; 12 + } 13 + 14 + export interface ErrorDetails { 15 + code: ErrorCode; 16 + message: string; 17 + recoverable: boolean; 18 + thread?: ThreadRef; 19 + meta?: Record<string, unknown>; 20 + } 21 + 22 + export class FactoryError extends Error { 23 + readonly details: ErrorDetails; 24 + constructor(details: ErrorDetails) { 25 + super(`${details.code}: ${details.message}`); 26 + this.name = "FactoryError"; 27 + this.details = details; 28 + } 29 + } 30 + 31 + export function toErrorDetails(error: unknown, fallback?: Partial<ErrorDetails>): ErrorDetails { 32 + if (error instanceof FactoryError) return error.details; 33 + return { 34 + code: fallback?.code ?? "RUNTIME", 35 + message: error instanceof Error ? error.message : String(error), 36 + recoverable: fallback?.recoverable ?? false, 37 + thread: fallback?.thread, 38 + meta: fallback?.meta, 39 + }; 40 + }

+329

packages/pi-mill/executors/program-executor.ts

··· 1 + import { pathToFileURL } from "node:url"; 2 + import { highlightCode, type ExtensionContext } from "@mariozechner/pi-coding-agent"; 3 + import { matchesKey, truncateToWidth, wrapTextWithAnsi } from "@mariozechner/pi-tui"; 4 + import { FactoryError, toErrorDetails } from "../errors.js"; 5 + import type { ObservabilityStore } from "../observability.js"; 6 + import { 7 + createFactory, 8 + patchConsole, 9 + patchPromiseAll, 10 + prepareProgramModule, 11 + preflightTypecheck, 12 + } from "../runtime.js"; 13 + import type { ExecutionResult, RunSummary } from "../types.js"; 14 + 15 + // ── Confirmation UI ──────────────────────────────────────────────────── 16 + 17 + export async function confirmExecution( 18 + ctx: ExtensionContext, 19 + code: string, 20 + ): Promise<{ approved: boolean; reason?: string }> { 21 + if (!ctx.hasUI) return { approved: true }; 22 + 23 + const lines = highlightCode(code, "typescript"); 24 + const displayLines = lines.length > 0 ? lines : code.split("\n"); 25 + 26 + const result = await ctx.ui.custom<{ approved: boolean; reason?: string }>( 27 + (tui, theme, _keybindings, done) => { 28 + let offset = 0; 29 + let collectingReason = false; 30 + let reason = ""; 31 + 32 + const codeRows = () => Math.max(8, Math.min(42, tui.terminal.rows - 14)); 33 + const clamp = () => { 34 + offset = Math.max(0, Math.min(offset, Math.max(0, displayLines.length - codeRows()))); 35 + }; 36 + const boxLine = (text: string, w: number) => `│ ${truncateToWidth(text, w, "…", true)} │`; 37 + 38 + return { 39 + render(width: number) { 40 + clamp(); 41 + const totalW = Math.max(40, width); 42 + const contentW = Math.max(20, totalW - 4); 43 + const rows = codeRows(); 44 + const end = Math.min(displayLines.length, offset + rows); 45 + const out: string[] = []; 46 + 47 + out.push(`┌${"─".repeat(totalW - 2)}┐`); 48 + for (const l of wrapTextWithAnsi(theme.bold("Run subagent program?"), contentW)) 49 + out.push(boxLine(l, contentW)); 50 + for (const l of wrapTextWithAnsi( 51 + theme.fg("muted", `Lines ${offset + 1}-${end} / ${displayLines.length}`), 52 + contentW, 53 + )) 54 + out.push(boxLine(l, contentW)); 55 + out.push(boxLine(theme.fg("dim", ""), contentW)); 56 + 57 + for (let i = offset; i < end; i++) { 58 + out.push( 59 + boxLine( 60 + `${theme.fg("dim", String(i + 1).padStart(4, " "))} ${displayLines[i]}`, 61 + contentW, 62 + ), 63 + ); 64 + } 65 + 66 + out.push(boxLine(theme.fg("dim", ""), contentW)); 67 + if (collectingReason) { 68 + for (const l of wrapTextWithAnsi( 69 + theme.fg("warning", "Reject reason (optional):"), 70 + contentW, 71 + )) 72 + out.push(boxLine(l, contentW)); 73 + for (const l of wrapTextWithAnsi( 74 + `${theme.fg("accent", "> ")}${reason || theme.fg("dim", "(empty)")}`, 75 + contentW, 76 + )) 77 + out.push(boxLine(l, contentW)); 78 + for (const l of wrapTextWithAnsi( 79 + theme.fg("muted", "Enter reject • Backspace edit • Esc back"), 80 + contentW, 81 + )) 82 + out.push(boxLine(l, contentW)); 83 + } else { 84 + for (const l of wrapTextWithAnsi( 85 + theme.fg("muted", "↑/↓ scroll • Enter/Y confirm • N reject • Esc cancel"), 86 + contentW, 87 + )) 88 + out.push(boxLine(l, contentW)); 89 + } 90 + out.push(`└${"─".repeat(totalW - 2)}┘`); 91 + return out; 92 + }, 93 + invalidate() {}, 94 + handleInput(data: string) { 95 + if (collectingReason) { 96 + if (matchesKey(data, "return")) { 97 + done({ approved: false, reason: reason.trim() || undefined }); 98 + return; 99 + } 100 + if (matchesKey(data, "escape")) { 101 + collectingReason = false; 102 + tui.requestRender(); 103 + return; 104 + } 105 + if (matchesKey(data, "ctrl+c")) { 106 + done({ approved: false }); 107 + return; 108 + } 109 + if (matchesKey(data, "backspace") || data === "\x7f") { 110 + reason = reason.slice(0, -1); 111 + tui.requestRender(); 112 + return; 113 + } 114 + if (data.length === 1 && data >= " " && data !== "\x7f") { 115 + reason += data; 116 + tui.requestRender(); 117 + } 118 + return; 119 + } 120 + if (matchesKey(data, "return") || data === "y" || data === "Y") { 121 + done({ approved: true }); 122 + return; 123 + } 124 + if (data === "n" || data === "N") { 125 + collectingReason = true; 126 + tui.requestRender(); 127 + return; 128 + } 129 + if (matchesKey(data, "escape") || matchesKey(data, "ctrl+c")) { 130 + done({ approved: false }); 131 + return; 132 + } 133 + if (matchesKey(data, "up") || data === "k") { 134 + offset -= 1; 135 + tui.requestRender(); 136 + return; 137 + } 138 + if (matchesKey(data, "down") || data === "j") { 139 + offset += 1; 140 + tui.requestRender(); 141 + return; 142 + } 143 + if (matchesKey(data, "pageUp")) { 144 + offset -= codeRows(); 145 + tui.requestRender(); 146 + return; 147 + } 148 + if (matchesKey(data, "pageDown")) { 149 + offset += codeRows(); 150 + tui.requestRender(); 151 + } 152 + }, 153 + }; 154 + }, 155 + { 156 + overlay: true, 157 + overlayOptions: { anchor: "center", width: "92%", maxHeight: "90%", margin: 1 }, 158 + }, 159 + ); 160 + 161 + return result ?? { approved: false }; 162 + } 163 + 164 + // ── Program execution ────────────────────────────────────────────────── 165 + 166 + export async function executeProgram(input: { 167 + ctx: ExtensionContext; 168 + runId: string; 169 + code: string; 170 + task: string; 171 + cwd: string; 172 + obs: ObservabilityStore; 173 + onUpdate?: (summary: RunSummary) => void; 174 + signal?: AbortSignal; 175 + parentSessionPath?: string; 176 + sessionDir?: string; 177 + skipConfirmation?: boolean; 178 + millCommand?: string; 179 + millArgs?: string[]; 180 + millRunsDir?: string; 181 + }): Promise<RunSummary> { 182 + const { ctx, runId, code, obs } = input; 183 + const resultsByTask = new Map<string, ExecutionResult>(); 184 + const results: ExecutionResult[] = []; 185 + 186 + const sync = () => { 187 + results.splice(0, results.length, ...resultsByTask.values()); 188 + }; 189 + const emit = (status: RunSummary["status"], error?: RunSummary["error"]) => { 190 + sync(); 191 + input.onUpdate?.({ 192 + runId, 193 + status, 194 + results: [...results], 195 + observability: obs.toSummary(runId), 196 + error, 197 + }); 198 + }; 199 + 200 + let runtime: ReturnType<typeof createFactory> | null = null; 201 + try { 202 + // Write program source as early as possible so failed preflight/confirmation runs 203 + // still keep a legible copy of the attempted program. 204 + obs.writeArtifact(runId, "program.ts", code); 205 + 206 + // Preflight typecheck — catch type errors before showing confirmation dialog 207 + const typeErrors = await preflightTypecheck(code); 208 + if (typeErrors) { 209 + throw new FactoryError({ 210 + code: "INVALID_INPUT", 211 + message: `Type errors in program code:\n${typeErrors}`, 212 + recoverable: true, 213 + }); 214 + } 215 + 216 + if (!input.skipConfirmation) { 217 + const confirmation = await confirmExecution(ctx, code); 218 + if (!confirmation.approved) { 219 + throw new FactoryError({ 220 + code: "CONFIRMATION_REJECTED", 221 + message: confirmation.reason ? `Cancelled: ${confirmation.reason}` : "Cancelled by user.", 222 + recoverable: true, 223 + }); 224 + } 225 + } 226 + 227 + emit("running"); 228 + obs.push(runId, "info", "program:start", { codeBytes: code.length }); 229 + 230 + runtime = createFactory(ctx, runId, obs, { 231 + defaultSignal: input.signal, 232 + onTaskUpdate: (result) => { 233 + resultsByTask.set(result.taskId, result); 234 + emit("running"); 235 + }, 236 + parentSessionPath: input.parentSessionPath, 237 + sessionDir: input.sessionDir, 238 + millCommand: input.millCommand, 239 + millArgs: input.millArgs, 240 + millRunsDir: input.millRunsDir, 241 + }); 242 + 243 + const { modulePath } = prepareProgramModule(code); 244 + const restorePromise = patchPromiseAll(obs, runId); 245 + const restoreConsole = patchConsole(obs, runId); 246 + 247 + // Inject the factory global 248 + const prev = (globalThis as any).factory; 249 + (globalThis as any).factory = runtime; 250 + 251 + let importPromise: Promise<unknown>; 252 + try { 253 + importPromise = import(pathToFileURL(modulePath).toString()); 254 + // Prevent unhandled rejection if importPromise rejects before being awaited 255 + importPromise.catch(() => {}); 256 + } catch (e) { 257 + // Restore immediately on synchronous throw 258 + if (prev === undefined) delete (globalThis as any).factory; 259 + else (globalThis as any).factory = prev; 260 + restorePromise(); 261 + restoreConsole(); 262 + throw e; 263 + } 264 + 265 + if (input.signal) { 266 + if (input.signal.aborted) { 267 + if (prev === undefined) delete (globalThis as any).factory; 268 + else (globalThis as any).factory = prev; 269 + restorePromise(); 270 + restoreConsole(); 271 + throw new FactoryError({ 272 + code: "CANCELLED", 273 + message: "Cancelled before execution.", 274 + recoverable: true, 275 + }); 276 + } 277 + let onAbort: (() => void) | undefined; 278 + const cancelled = new Promise<never>((_resolve, reject) => { 279 + onAbort = () => 280 + reject(new FactoryError({ code: "CANCELLED", message: "Cancelled.", recoverable: true })); 281 + input.signal?.addEventListener("abort", onAbort, { once: true }); 282 + }); 283 + try { 284 + await Promise.race([importPromise, cancelled]); 285 + } finally { 286 + if (onAbort) input.signal?.removeEventListener("abort", onAbort); 287 + if (prev === undefined) delete (globalThis as any).factory; 288 + else (globalThis as any).factory = prev; 289 + restorePromise(); 290 + restoreConsole(); 291 + } 292 + } else { 293 + try { 294 + await importPromise; 295 + } finally { 296 + if (prev === undefined) delete (globalThis as any).factory; 297 + else (globalThis as any).factory = prev; 298 + restorePromise(); 299 + restoreConsole(); 300 + } 301 + } 302 + 303 + emit("done"); 304 + return { 305 + runId, 306 + status: "done", 307 + results, 308 + observability: obs.toSummary(runId), 309 + metadata: { modulePath }, 310 + }; 311 + } catch (error) { 312 + const details = toErrorDetails(error); 313 + obs.push(runId, "error", details.message, { code: details.code }); 314 + const status = 315 + details.code === "CANCELLED" || details.code === "CONFIRMATION_REJECTED" 316 + ? "cancelled" 317 + : "failed"; 318 + emit(status, details); 319 + return { runId, status, results, observability: obs.toSummary(runId), error: details }; 320 + } finally { 321 + if (runtime) { 322 + try { 323 + await runtime.shutdown(true); 324 + } catch (e) { 325 + obs.push(runId, "warning", "shutdown_failed", { error: String(e) }); 326 + } 327 + } 328 + } 329 + }

+46

packages/pi-mill/format.ts

··· 1 + import type { RunSummary } from "./types.js"; 2 + 3 + export function formatElapsed(ms: number): string { 4 + const secs = Math.floor(ms / 1000); 5 + if (secs < 60) return `${secs}s`; 6 + const mins = Math.floor(secs / 60); 7 + const rem = secs % 60; 8 + if (rem === 0 || mins >= 10) return `${mins}m`; 9 + return `${mins}m ${rem}s`; 10 + } 11 + 12 + export function statusIcon(status: string): string { 13 + switch (status) { 14 + case "running": 15 + return "●"; 16 + case "done": 17 + return "✓"; 18 + case "failed": 19 + return "✗"; 20 + case "cancelled": 21 + return "◼"; 22 + default: 23 + return "?"; 24 + } 25 + } 26 + 27 + export function agentLabel(record: { 28 + task?: string; 29 + summary?: { results: Array<{ agent: string }> }; 30 + runId: string; 31 + }): string; 32 + export function agentLabel(summary: RunSummary): string; 33 + export function agentLabel(input: any): string { 34 + // RunRecord style (has .summary) 35 + if (input.summary) { 36 + if (input.task) return input.task; 37 + const results = input.summary.results; 38 + if (results.length > 0) return results[0].agent; 39 + return input.runId.slice(0, 8); 40 + } 41 + // RunSummary style (has .results directly, .metadata) 42 + const meta = input.metadata as Record<string, unknown> | undefined; 43 + if (typeof meta?.task === "string") return meta.task; 44 + if (input.results?.[0]?.agent) return input.results[0].agent; 45 + return input.runId.slice(0, 8); 46 + }

+616

packages/pi-mill/index.ts

··· 1 + import { spawnSync } from "node:child_process"; 2 + import * as fs from "node:fs"; 3 + import * as os from "node:os"; 4 + import * as path from "node:path"; 5 + import type { AgentToolResult } from "@mariozechner/pi-agent-core"; 6 + import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; 7 + import { keyHint } from "@mariozechner/pi-coding-agent"; 8 + import { Container, Spacer, Text } from "@mariozechner/pi-tui"; 9 + import { SubagentSchema, validateParams } from "./contract.js"; 10 + import { toErrorDetails } from "./errors.js"; 11 + import { ObservabilityStore } from "./observability.js"; 12 + import { RunRegistry } from "./registry.js"; 13 + import { confirmExecution, executeProgram } from "./executors/program-executor.js"; 14 + import { FactoryWidget } from "./widget.js"; 15 + import { FactoryMonitor } from "./monitor.js"; 16 + import { registerMessageRenderer, notifyCompletion } from "./notify.js"; 17 + import type { RunSummary } from "./types.js"; 18 + 19 + function writeRunJson(summary: RunSummary): void { 20 + const dir = summary.observability?.artifactsDir; 21 + if (!dir) return; 22 + try { 23 + const data = { 24 + runId: summary.runId, 25 + status: summary.status, 26 + task: (summary.metadata as any)?.task, 27 + startedAt: summary.observability?.startedAt, 28 + completedAt: summary.observability?.endedAt ?? Date.now(), 29 + results: summary.results.map((r) => ({ 30 + agent: r.agent, 31 + task: r.task, 32 + model: r.model, 33 + exitCode: r.exitCode, 34 + text: r.text, 35 + sessionPath: r.sessionPath, 36 + usage: r.usage, 37 + })), 38 + error: summary.error, 39 + }; 40 + fs.writeFileSync(path.join(dir, "run.json"), JSON.stringify(data, null, 2)); 41 + } catch {} 42 + } 43 + 44 + /** Write a partial run.json so external monitors (pi --mill) can see active runs. */ 45 + function writeRunningMarker(runId: string, task: string, artifactsDir: string): void { 46 + try { 47 + const data = { 48 + runId, 49 + status: "running", 50 + task, 51 + startedAt: Date.now(), 52 + results: [], 53 + }; 54 + fs.writeFileSync(path.join(artifactsDir, "run.json"), JSON.stringify(data, null, 2)); 55 + } catch {} 56 + } 57 + 58 + function generateRunId(): string { 59 + return `mill-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; 60 + } 61 + 62 + interface ExtensionConfig { 63 + maxDepth: number; 64 + millCommand: string; 65 + millArgs: string[]; 66 + millRunsDir?: string; 67 + prompt: string; 68 + } 69 + 70 + const parseJsonObjectFromText = (text: string): Record<string, unknown> | undefined => { 71 + const candidates = text 72 + .split("\n") 73 + .map((line) => line.trim()) 74 + .filter((line) => line.length > 0) 75 + .reverse(); 76 + 77 + for (const candidate of candidates) { 78 + try { 79 + const parsed = JSON.parse(candidate) as unknown; 80 + if (typeof parsed === "object" && parsed !== null) { 81 + return parsed as Record<string, unknown>; 82 + } 83 + } catch { 84 + continue; 85 + } 86 + } 87 + 88 + return undefined; 89 + }; 90 + 91 + function readModelsFromMill(config: ExtensionConfig): string[] { 92 + const args = [...config.millArgs, "discovery", "--json"]; 93 + if (config.millRunsDir && config.millRunsDir.trim().length > 0) { 94 + args.push("--runs-dir", config.millRunsDir); 95 + } 96 + 97 + const result = spawnSync(config.millCommand, args, { 98 + encoding: "utf-8", 99 + shell: false, 100 + }); 101 + 102 + if (result.status !== 0) { 103 + return []; 104 + } 105 + 106 + const payload = parseJsonObjectFromText(result.stdout); 107 + if (!payload) { 108 + return []; 109 + } 110 + 111 + const drivers = payload.drivers; 112 + if (typeof drivers !== "object" || drivers === null) { 113 + return []; 114 + } 115 + 116 + const models = new Set<string>(); 117 + for (const entry of Object.values(drivers)) { 118 + if (typeof entry !== "object" || entry === null) { 119 + continue; 120 + } 121 + const values = (entry as { models?: unknown }).models; 122 + if (!Array.isArray(values)) { 123 + continue; 124 + } 125 + for (const value of values) { 126 + if (typeof value === "string" && value.trim().length > 0) { 127 + models.add(value); 128 + } 129 + } 130 + } 131 + 132 + return [...models]; 133 + } 134 + 135 + function readEnabledModelsFallback(): string[] { 136 + try { 137 + const p = path.join(os.homedir(), ".pi", "agent", "settings.json"); 138 + if (!fs.existsSync(p)) return []; 139 + const parsed: unknown = JSON.parse(fs.readFileSync(p, "utf-8")); 140 + if ( 141 + typeof parsed === "object" && 142 + parsed !== null && 143 + "enabledModels" in parsed && 144 + Array.isArray((parsed as any).enabledModels) 145 + ) { 146 + return (parsed as any).enabledModels.filter( 147 + (m: unknown): m is string => typeof m === "string" && (m as string).length > 0, 148 + ); 149 + } 150 + return []; 151 + } catch { 152 + return []; 153 + } 154 + } 155 + 156 + // ── Text helpers ─────────────────────────────────────────────────────── 157 + 158 + function buildPrimaryContent(summary: RunSummary, forUpdate = false): string { 159 + if (summary.error) return `${summary.error.code}: ${summary.error.message}`; 160 + if (summary.results.length === 0) return forUpdate ? "(running...)" : "Completed."; 161 + if (summary.results.length === 1) { 162 + return summary.results[0].text || (forUpdate ? "(running...)" : "Completed."); 163 + } 164 + const lines = [`Program completed with ${summary.results.length} result(s):`]; 165 + for (const r of summary.results) { 166 + lines.push(r.text ? `\n[${r.agent}]\n${r.text}` : `\n[${r.agent}] (no output)`); 167 + } 168 + return lines.join("\n").trim(); 169 + } 170 + 171 + function renderCollapsed(summary: RunSummary, expanded: boolean, theme: any): Text { 172 + const icon = 173 + summary.status === "done" 174 + ? theme.fg("success", "✓") 175 + : summary.status === "running" 176 + ? theme.fg("warning", "⏳") 177 + : summary.status === "cancelled" 178 + ? theme.fg("warning", "◼") 179 + : theme.fg("error", "✗"); 180 + 181 + let out = `${icon} ${theme.fg("toolTitle", theme.bold("subagent"))}`; 182 + out += ` ${theme.fg("muted", `[${summary.runId}]`)}`; 183 + if (summary.error) 184 + out += `\n${theme.fg("error", `${summary.error.code}: ${summary.error.message}`)}`; 185 + 186 + if (summary.results.length === 0) { 187 + out += `\n${theme.fg("muted", "(no results yet)")}`; 188 + } else { 189 + for (const r of summary.results.slice(-5)) { 190 + const rIcon = 191 + r.exitCode === 0 192 + ? theme.fg("success", "✓") 193 + : summary.status === "running" || r.exitCode < 0 194 + ? theme.fg("warning", "⏳") 195 + : theme.fg("error", "✗"); 196 + const model = r.model ? ` ${theme.fg("muted", `[${r.model}]`)}` : ""; 197 + out += `\n${rIcon} ${theme.fg("accent", r.agent)}${model} ${theme.fg("dim", r.task.slice(0, 80))}`; 198 + } 199 + } 200 + 201 + if (!expanded) out += `\n${theme.fg("muted", keyHint("expandTools", "to expand"))}`; 202 + return new Text(out, 0, 0); 203 + } 204 + 205 + function renderExpanded(summary: RunSummary, theme: any): Container { 206 + const container = new Container(); 207 + container.addChild(renderCollapsed(summary, true, theme)); 208 + container.addChild(new Spacer(1)); 209 + 210 + if (summary.observability) { 211 + container.addChild(new Text(theme.fg("muted", "── observability ──"), 0, 0)); 212 + for (const ev of summary.observability.events.slice(-30)) { 213 + const time = new Date(ev.time).toISOString(); 214 + container.addChild( 215 + new Text( 216 + `${theme.fg("muted", time)} ${theme.fg("accent", ev.type)} ${theme.fg("toolOutput", ev.message)}`, 217 + 0, 218 + 0, 219 + ), 220 + ); 221 + } 222 + if (summary.observability.artifacts.length > 0) { 223 + container.addChild(new Spacer(1)); 224 + container.addChild(new Text(theme.fg("muted", "artifacts:"), 0, 0)); 225 + for (const a of summary.observability.artifacts) 226 + container.addChild(new Text(theme.fg("dim", `- ${a}`), 0, 0)); 227 + } 228 + } 229 + 230 + if (summary.results.length > 0) { 231 + container.addChild(new Spacer(1)); 232 + container.addChild(new Text(theme.fg("muted", "── outputs ──"), 0, 0)); 233 + for (const r of summary.results) { 234 + container.addChild(new Spacer(1)); 235 + container.addChild( 236 + new Text( 237 + `${theme.fg("accent", r.agent)} ${theme.fg("muted", `model=${r.model ?? "?"}`)}`, 238 + 0, 239 + 0, 240 + ), 241 + ); 242 + container.addChild(new Text(theme.fg("dim", r.task), 0, 0)); 243 + if (r.text) container.addChild(new Text(r.text, 0, 0)); 244 + if (r.sessionPath) 245 + container.addChild(new Text(theme.fg("dim", `session: ${r.sessionPath}`), 0, 0)); 246 + } 247 + } 248 + 249 + return container; 250 + } 251 + 252 + function loadHistoricalRuns(ctx: ExtensionContext, registry: RunRegistry): void { 253 + const sessionDir = ctx.sessionManager.getSessionDir(); 254 + if (!sessionDir) return; 255 + const factoryDir = path.join(sessionDir, ".factory"); 256 + if (!fs.existsSync(factoryDir)) return; 257 + try { 258 + for (const entry of fs.readdirSync(factoryDir)) { 259 + const runJsonPath = path.join(factoryDir, entry, "run.json"); 260 + if (!fs.existsSync(runJsonPath)) continue; 261 + try { 262 + const data = JSON.parse(fs.readFileSync(runJsonPath, "utf-8")); 263 + registry.loadHistorical({ 264 + runId: data.runId, 265 + status: data.status ?? "done", 266 + summary: { 267 + runId: data.runId, 268 + status: data.status ?? "done", 269 + results: data.results ?? [], 270 + error: data.error, 271 + metadata: { task: data.task }, 272 + }, 273 + startedAt: data.startedAt ?? Date.now(), 274 + completedAt: data.completedAt, 275 + acknowledged: true, 276 + task: data.task, 277 + }); 278 + } catch {} 279 + } 280 + } catch {} 281 + } 282 + 283 + // ── Extension entry point ────────────────────────────────────────────── 284 + 285 + // ── Extension config ─────────────────────────────────────────────────── 286 + // Edit this object to customize behavior. It lives here so it's version-controlled 287 + // alongside the extension code. 288 + 289 + export const config: ExtensionConfig = { 290 + /** Maximum nesting depth for subagent spawning. 1 = orchestrator can spawn subagents, but those subagents cannot spawn their own. 0 = no subagents at all. */ 291 + maxDepth: 1, 292 + /** mill executable path/name. */ 293 + millCommand: "mill", 294 + /** Optional static args prepended to every mill invocation. */ 295 + millArgs: [], 296 + /** Optional runs-dir override passed to mill commands (discovery + child runs). */ 297 + millRunsDir: undefined, 298 + /** Extra text appended to the tool description. Use for model selection hints, project conventions, etc. */ 299 + prompt: 300 + "Use openai/gpt-5.3-codex for most subagent operations, especially if they entail making changes across multiple files. If you need to search you can use faster models like cerebras/zai-glm-4.7. If you need to look at and reason over images (a screenshot is referenced) use google-gemini-cli/gemini-3-flash-preview to see the changes.", 301 + }; 302 + 303 + // ──────────────────────────────────────────────────────────────────────── 304 + 305 + export default function (pi: ExtensionAPI) { 306 + // Register bundled skills from the skills/ subdirectory 307 + const extensionDir = import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname); 308 + const skillsDir = path.join(extensionDir, "skills"); 309 + pi.on("resources_discover", () => { 310 + if (fs.existsSync(skillsDir)) { 311 + return { skillPaths: [skillsDir] }; 312 + } 313 + return {}; 314 + }); 315 + const observability = new ObservabilityStore(); 316 + const registry = new RunRegistry(); 317 + const widget = new FactoryWidget(); 318 + const modelsFromMill = readModelsFromMill(config); 319 + const fallbackModels = readEnabledModelsFallback(); 320 + const enabledModels = modelsFromMill.length > 0 ? modelsFromMill : fallbackModels; 321 + const modelsText = enabledModels.length > 0 ? enabledModels.join(", ") : "(none detected)"; 322 + 323 + // Keep a reference to the current context for widget/notification updates 324 + let currentCtx: ExtensionContext | undefined; 325 + let pollTimer: ReturnType<typeof setInterval> | undefined; 326 + 327 + // Register --mill flag for standalone monitoring 328 + pi.registerFlag("mill", { 329 + description: "Monitor subagent runs", 330 + type: "boolean", 331 + default: false, 332 + }); 333 + 334 + // Register the message renderer for completion notifications 335 + registerMessageRenderer(pi); 336 + 337 + // Widget polling — updates running jobs every 250ms 338 + function startPolling() { 339 + if (pollTimer) return; 340 + pollTimer = setInterval(() => { 341 + if (!currentCtx) return; 342 + const runs = registry.getVisible(); 343 + widget.update(runs, currentCtx); 344 + // Stop polling if nothing is running 345 + if (registry.getActive().length === 0) { 346 + stopPolling(); 347 + } 348 + }, 250); 349 + } 350 + 351 + function stopPolling() { 352 + if (pollTimer) { 353 + clearInterval(pollTimer); 354 + pollTimer = undefined; 355 + } 356 + } 357 + 358 + // Lifecycle hooks 359 + pi.on("session_start", async (_event, ctx) => { 360 + currentCtx = ctx; 361 + loadHistoricalRuns(ctx, registry); 362 + 363 + // --mill flag: show full-screen monitor and exit when done. 364 + // We must defer ctx.ui.custom() because session_start fires during 365 + // initExtensions(), BEFORE ui.start() sets up terminal keyboard input. 366 + // Awaiting ctx.ui.custom() here would deadlock: the component needs 367 + // keyboard input to call done(), but ui.start() can't run until this 368 + // handler returns. setTimeout(0) schedules after init() completes. 369 + if (pi.getFlag("mill") === true) { 370 + setTimeout(async () => { 371 + await ctx.ui.custom<void>( 372 + (tui, theme, _kb, done) => 373 + new FactoryMonitor({ 374 + tui, 375 + theme, 376 + done, 377 + registry, 378 + sessionDir: ctx.cwd, 379 + }), 380 + ); 381 + ctx.shutdown(); 382 + }, 0); 383 + } 384 + }); 385 + 386 + pi.on("session_switch", async (_event, ctx) => { 387 + currentCtx = ctx; 388 + registry.clearHistorical(); 389 + loadHistoricalRuns(ctx, registry); 390 + widget.update(registry.getVisible(), ctx); 391 + if (registry.getActive().length > 0) startPolling(); 392 + else stopPolling(); 393 + }); 394 + 395 + pi.on("session_shutdown", async () => { 396 + // Don't cancel active runs — children are detached and will continue 397 + stopPolling(); 398 + }); 399 + 400 + // /mill command — overview of all runs (overlay UI) 401 + pi.registerCommand("mill", { 402 + description: "Show subagent run status", 403 + handler: async (_args, ctx) => { 404 + await ctx.ui.custom<void>( 405 + (tui, theme, _kb, done) => new FactoryMonitor({ tui, theme, done, registry }), 406 + { 407 + overlay: true, 408 + overlayOptions: { 409 + width: "90%", 410 + minWidth: 60, 411 + maxHeight: "95%", 412 + anchor: "center", 413 + }, 414 + }, 415 + ); 416 + }, 417 + }); 418 + 419 + // Depth guard: skip subagent tool registration if we're already at max depth. 420 + // PI_FACTORY_DEPTH is set by runtime.ts when spawning child mill processes. 421 + const currentDepth = parseInt(process.env.PI_FACTORY_DEPTH || "0", 10); 422 + if (currentDepth >= config.maxDepth) { 423 + return; 424 + } 425 + 426 + pi.registerTool({ 427 + name: "subagent", 428 + label: "Subagent", 429 + description: [ 430 + "Spawn subagents for delegated or orchestrated work.", 431 + "Execution backend: mill (mill run --sync --json). Configure drivers/executors/models via mill.config.ts.", 432 + `Enabled models: ${modelsText}`, 433 + "Write a TypeScript script. `factory` is a global (like `process` or `console`). Use factory.spawn() to orchestrate agents.", 434 + "factory.spawn() returns a Promise<ExecutionResult>. Use `await` for sequential, `Promise.all` for parallel.", 435 + "Each spawn needs: agent, systemPrompt, prompt, model. cwd defaults to process.cwd().", 436 + "systemPrompt defines WHO the agent is (behavior, principles, methodology). prompt defines WHAT it should do now (specific files, specific work). Don't put task details in systemPrompt.", 437 + "Context flow: each subagent gets the parent session path and can use search_thread to explore it. Each subagent's session is persisted and available via result.sessionPath. Result text is auto-populated on result.text.", 438 + "Async by default: returns immediately with a runId. Results are delivered via notification when complete. Do NOT poll or check for results — just continue with other work and the notification will arrive automatically.", 439 + "Model selection: use provider/model-id format (e.g. 'anthropic/claude-opus-4-6', 'cerebras/zai-glm-4.7'). Match model capability to task complexity. Use smaller/faster models for simple tasks, stronger models for complex reasoning. Vary your choices across the enabled models — don't default to one.", 440 + "Available types: Factory, ExecutionResult, SpawnInput, UsageStats.", 441 + ...(config.prompt ? [config.prompt] : []), 442 + ].join(" "), 443 + parameters: SubagentSchema, 444 + 445 + async execute( 446 + _toolCallId, 447 + rawParams, 448 + signal, 449 + onUpdate, 450 + ctx, 451 + ): Promise<AgentToolResult<RunSummary>> { 452 + currentCtx = ctx; 453 + const params = validateParams(rawParams); 454 + const runId = generateRunId(); 455 + const piSessionDir = ctx.sessionManager.getSessionDir() ?? undefined; 456 + observability.createRun(runId, true, piSessionDir); 457 + observability.setStatus(runId, "running", "run:start"); 458 + 459 + const parentSessionPath = ctx.sessionManager.getSessionFile() ?? undefined; 460 + const run = observability.get(runId); 461 + const sessionDir = run?.artifactsDir ? path.join(run.artifactsDir, "sessions") : undefined; 462 + 463 + const emitUpdate = (summary: RunSummary) => { 464 + onUpdate?.({ 465 + content: [{ type: "text", text: buildPrimaryContent(summary, true) }], 466 + details: summary, 467 + }); 468 + // Update registry so overlay reads live data 469 + registry.updateSummary(runId, summary); 470 + // Also update widget with latest state 471 + widget.update(registry.getVisible(), ctx); 472 + }; 473 + 474 + // Confirm BEFORE going async so user sees the dialog 475 + const confirmation = await confirmExecution(ctx, params.code); 476 + if (!confirmation.approved) { 477 + const msg = confirmation.reason 478 + ? `Cancelled: ${confirmation.reason}` 479 + : "Cancelled by user."; 480 + return { 481 + content: [{ type: "text", text: msg }], 482 + details: { 483 + runId, 484 + status: "cancelled" as const, 485 + results: [], 486 + error: { code: "CONFIRMATION_REJECTED", message: msg, recoverable: true }, 487 + }, 488 + }; 489 + } 490 + 491 + const abort = new AbortController(); 492 + 493 + // Don't wire the parent tool signal — subagent runs are detached and 494 + // should survive turn cancellation. Use "c" in /mill or pi --mill 495 + // to explicitly cancel a run. 496 + 497 + const promise = executeProgram({ 498 + ctx, 499 + runId, 500 + code: params.code, 501 + task: params.task, 502 + cwd: ctx.cwd, 503 + obs: observability, 504 + onUpdate: emitUpdate, 505 + signal: abort.signal, 506 + parentSessionPath, 507 + sessionDir, 508 + skipConfirmation: true, 509 + millCommand: config.millCommand, 510 + millArgs: config.millArgs, 511 + millRunsDir: config.millRunsDir, 512 + }); 513 + 514 + // Register in the registry 515 + const initialSummary: RunSummary = { 516 + runId, 517 + status: "running", 518 + results: [], 519 + observability: observability.toSummary(runId), 520 + }; 521 + registry.register(runId, initialSummary, promise, abort, { task: params.task }); 522 + 523 + // Write running marker so external monitors (pi --mill) see active runs 524 + const runArtifactsDir = observability.get(runId)?.artifactsDir; 525 + if (runArtifactsDir) writeRunningMarker(runId, params.task, runArtifactsDir); 526 + 527 + // Wire completion: update observability, widget, and notify 528 + promise.then( 529 + (summary) => { 530 + try { 531 + observability.setStatus( 532 + runId, 533 + summary.status === "done" 534 + ? "done" 535 + : summary.status === "cancelled" 536 + ? "cancelled" 537 + : "failed", 538 + ); 539 + const fullSummary = { 540 + ...summary, 541 + observability: observability.toSummary(runId), 542 + metadata: { task: params.task }, 543 + }; 544 + registry.complete(runId, fullSummary); 545 + widget.update(registry.getVisible(), ctx); 546 + notifyCompletion(pi, registry, fullSummary); 547 + writeRunJson(fullSummary); 548 + widget.update(registry.getVisible(), ctx); 549 + } catch { 550 + /* shutting down */ 551 + } 552 + }, 553 + (err) => { 554 + try { 555 + const details = toErrorDetails(err); 556 + observability.setStatus(runId, details.code === "CANCELLED" ? "cancelled" : "failed"); 557 + const failedSummary: RunSummary = { 558 + runId, 559 + status: "failed", 560 + results: [], 561 + error: details, 562 + observability: observability.toSummary(runId), 563 + metadata: { task: params.task }, 564 + }; 565 + registry.fail(runId, details); 566 + notifyCompletion(pi, registry, failedSummary); 567 + writeRunJson(failedSummary); 568 + widget.update(registry.getVisible(), ctx); 569 + } catch { 570 + /* shutting down */ 571 + } 572 + }, 573 + ); 574 + 575 + // Start polling for widget updates 576 + startPolling(); 577 + 578 + // Update widget immediately 579 + widget.update(registry.getVisible(), ctx); 580 + 581 + // Return immediately with artifact paths so orchestrator can check progress 582 + const artifactsDir = observability.get(runId)?.artifactsDir; 583 + const lines = [ 584 + `Spawned '${params.task}' → ${runId}. Running async — results will be delivered when complete.`, 585 + ]; 586 + if (artifactsDir) { 587 + lines.push(`Artifacts: ${artifactsDir}`); 588 + lines.push(`Status: ${artifactsDir}/run.json (written on completion)`); 589 + lines.push(`Sessions: ${artifactsDir}/sessions/`); 590 + } 591 + return { 592 + content: [{ type: "text", text: lines.join("\n") }], 593 + details: initialSummary, 594 + }; 595 + }, 596 + 597 + renderCall(args, theme) { 598 + const asyncLabel = ` ${theme.fg("dim", "(async)")}`; 599 + return new Text( 600 + `${theme.fg("toolTitle", theme.bold("subagent"))} ${theme.fg("accent", args.task)}${asyncLabel}`, 601 + 0, 602 + 0, 603 + ); 604 + }, 605 + 606 + renderResult(result, options, theme) { 607 + const details = result.details; 608 + if (!details) { 609 + const txt = result.content[0]; 610 + return new Text(txt?.type === "text" ? txt.text : "(no output)", 0, 0); 611 + } 612 + if (options.expanded) return renderExpanded(details, theme); 613 + return renderCollapsed(details, false, theme); 614 + }, 615 + }); 616 + }

+661

packages/pi-mill/monitor.ts

··· 1 + import type { Component, TUI } from "@mariozechner/pi-tui"; 2 + import { matchesKey, truncateToWidth, visibleWidth, wrapTextWithAnsi } from "@mariozechner/pi-tui"; 3 + import type { Theme } from "@mariozechner/pi-coding-agent"; 4 + import type { RunRegistry, RunRecord } from "./registry.js"; 5 + import { formatElapsed, statusIcon, agentLabel } from "./format.js"; 6 + import { scanRuns, cwdToSessionDir, getSessionsBase, cancelRunByPidFiles } from "./scanner.js"; 7 + import type { ExecutionResult } from "./types.js"; 8 + 9 + /** 10 + * 3-level drill-down TUI for monitoring pi-mill subagent runs. 11 + * 12 + * Level 1: Run list 13 + * Level 2: Agent list (within a run) 14 + * Level 3: Agent detail (single agent, scrollable) 15 + * 16 + * Works in two contexts: 17 + * - In-session overlay: reads from a RunRegistry 18 + * - Standalone mode: scans filesystem for run.json files 19 + */ 20 + 21 + type Level = 1 | 2 | 3; 22 + 23 + const MAX_RUNS_VISIBLE = 12; 24 + const MAX_AGENTS_VISIBLE = 10; 25 + 26 + export interface MonitorOptions { 27 + tui: TUI; 28 + theme: Theme; 29 + done: () => void; 30 + registry?: RunRegistry; 31 + sessionDir?: string; 32 + } 33 + 34 + export class FactoryMonitor implements Component { 35 + protected tui: TUI; 36 + protected theme: Theme; 37 + protected done: () => void; 38 + private registry?: RunRegistry; 39 + private sessionDir?: string; 40 + 41 + // Navigation state 42 + private level: Level = 1; 43 + private selectedRunIndex = 0; 44 + private runListScroll = 0; 45 + private selectedAgentIndex = 0; 46 + private agentListScroll = 0; 47 + private detailScroll = 0; 48 + 49 + // Filesystem-scanned runs (standalone mode) 50 + private scannedRuns: RunRecord[] = []; 51 + 52 + // Polling for auto-refresh 53 + private refreshTimer: ReturnType<typeof setInterval> | undefined; 54 + private renderTimeout: ReturnType<typeof setTimeout> | undefined; 55 + 56 + constructor(options: MonitorOptions) { 57 + this.tui = options.tui; 58 + this.theme = options.theme; 59 + this.done = options.done; 60 + this.registry = options.registry; 61 + this.sessionDir = options.sessionDir; 62 + 63 + if (this.sessionDir) { 64 + this.refreshScannedRuns(); 65 + } 66 + this.startAutoRefresh(); 67 + } 68 + 69 + // ── Data access ──────────────────────────────────────────────────── 70 + 71 + private getSortedRuns(): RunRecord[] { 72 + const runs = this.registry ? this.registry.getAll() : this.scannedRuns; 73 + 74 + return [...runs].sort((a, b) => { 75 + if (a.status === "running" && b.status !== "running") return -1; 76 + if (b.status === "running" && a.status !== "running") return 1; 77 + return b.startedAt - a.startedAt; 78 + }); 79 + } 80 + 81 + private refreshScannedRuns(): void { 82 + if (!this.sessionDir) return; 83 + const base = getSessionsBase(); 84 + const dirName = cwdToSessionDir(this.sessionDir); 85 + const raw = scanRuns(base, dirName); 86 + this.scannedRuns = raw as RunRecord[]; 87 + } 88 + 89 + // ── Rendering ────────────────────────────────────────────────────── 90 + 91 + render(width: number): string[] { 92 + const t = this.theme; 93 + const innerW = Math.max(10, width - 2); 94 + const border = (c: string) => t.fg("border", c); 95 + const pad = (s: string) => truncateToWidth(s, innerW, "…", true); 96 + const row = (s: string) => border("│") + pad(" " + s) + border("│"); 97 + const emptyRow = () => border("│") + pad("") + border("│"); 98 + const lines: string[] = []; 99 + const runs = this.getSortedRuns(); 100 + 101 + // ── Top border with title ── 102 + const titleText = this.buildTitle(runs); 103 + const titleW = visibleWidth(titleText); 104 + const leftPad = Math.floor((innerW - titleW) / 2); 105 + const rightPad = innerW - titleW - leftPad; 106 + lines.push( 107 + border("╭") + 108 + border("─".repeat(Math.max(0, leftPad))) + 109 + t.fg("accent", titleText) + 110 + border("─".repeat(Math.max(0, rightPad))) + 111 + border("╮"), 112 + ); 113 + 114 + switch (this.level) { 115 + case 1: 116 + this.renderRunList(lines, runs, innerW, border, row, emptyRow); 117 + break; 118 + case 2: 119 + this.renderAgentList(lines, runs, innerW, border, row, emptyRow); 120 + break; 121 + case 3: 122 + this.renderAgentDetail(lines, runs, innerW, border, row, emptyRow); 123 + break; 124 + } 125 + 126 + // ── Footer ── 127 + lines.push(border("├") + border("─".repeat(innerW)) + border("┤")); 128 + lines.push(row(t.fg("dim", this.footerHints()))); 129 + lines.push(border("╰") + border("─".repeat(innerW)) + border("╯")); 130 + 131 + return lines; 132 + } 133 + 134 + private buildTitle(runs: RunRecord[]): string { 135 + switch (this.level) { 136 + case 1: 137 + return ` mill (${runs.length} run${runs.length === 1 ? "" : "s"}) `; 138 + case 2: { 139 + const run = runs[this.selectedRunIndex]; 140 + const label = run ? agentLabel(run) : "run"; 141 + return ` ${label} `; 142 + } 143 + case 3: { 144 + const run = runs[this.selectedRunIndex]; 145 + const agent = run?.summary.results[this.selectedAgentIndex]; 146 + const name = agent?.agent ?? "agent"; 147 + return ` ${name} `; 148 + } 149 + } 150 + } 151 + 152 + private footerHints(): string { 153 + switch (this.level) { 154 + case 1: 155 + return "j/k select Enter drill c cancel r refresh q/Esc close"; 156 + case 2: 157 + return "j/k select Enter detail Esc back"; 158 + case 3: 159 + return "j/k scroll Esc back"; 160 + } 161 + } 162 + 163 + // ── Level 1: Run list ────────────────────────────────────────────── 164 + 165 + private renderRunList( 166 + lines: string[], 167 + runs: RunRecord[], 168 + innerW: number, 169 + border: (c: string) => string, 170 + row: (s: string) => string, 171 + emptyRow: () => string, 172 + ): void { 173 + const t = this.theme; 174 + 175 + if (runs.length === 0) { 176 + lines.push(row(t.fg("muted", "No subagent runs."))); 177 + lines.push(emptyRow()); 178 + return; 179 + } 180 + 181 + // Clamp selection 182 + this.selectedRunIndex = Math.max(0, Math.min(this.selectedRunIndex, runs.length - 1)); 183 + this.clampScroll("run", runs.length, MAX_RUNS_VISIBLE); 184 + 185 + lines.push(emptyRow()); 186 + 187 + let rendered = 0; 188 + 189 + // Scroll-up indicator 190 + if (this.runListScroll > 0) { 191 + lines.push(row(t.fg("dim", `▲ ${this.runListScroll} more above`))); 192 + rendered++; 193 + } 194 + 195 + const visible = runs.slice(this.runListScroll, this.runListScroll + MAX_RUNS_VISIBLE); 196 + for (let i = 0; i < visible.length && rendered < MAX_RUNS_VISIBLE; i++) { 197 + const globalIdx = this.runListScroll + i; 198 + const r = visible[i]!; 199 + const selected = globalIdx === this.selectedRunIndex; 200 + const prefix = selected ? t.fg("accent", "▶ ") : " "; 201 + const line = this.formatRunLine(r, innerW - 4); 202 + lines.push(row(prefix + line)); 203 + rendered++; 204 + } 205 + 206 + // Scroll-down indicator 207 + const remaining = runs.length - this.runListScroll - visible.length; 208 + if (remaining > 0 && rendered < MAX_RUNS_VISIBLE) { 209 + lines.push(row(t.fg("dim", `▼ ${remaining} more below`))); 210 + rendered++; 211 + } 212 + 213 + // Pad to fixed height 214 + while (rendered < MAX_RUNS_VISIBLE) { 215 + lines.push(emptyRow()); 216 + rendered++; 217 + } 218 + } 219 + 220 + private formatRunLine(r: RunRecord, maxWidth: number): string { 221 + const t = this.theme; 222 + const elapsed = formatElapsed((r.completedAt ?? Date.now()) - r.startedAt); 223 + const icon = this.coloredStatusIcon(r.status); 224 + const task = agentLabel(r); 225 + const agentCount = r.summary.results.length; 226 + const model = r.summary.results[0]?.model ?? ""; 227 + const modelShort = model.includes("/") ? model.split("/").pop()! : model; 228 + 229 + const parts = [icon, t.fg("accent", task), t.fg("dim", elapsed)]; 230 + if (agentCount > 1) parts.push(t.fg("muted", `${agentCount} agents`)); 231 + if (modelShort) parts.push(t.fg("muted", modelShort)); 232 + 233 + return truncateToWidth(parts.join(" "), maxWidth); 234 + } 235 + 236 + // ── Level 2: Agent list ──────────────────────────────────────────── 237 + 238 + private renderAgentList( 239 + lines: string[], 240 + runs: RunRecord[], 241 + innerW: number, 242 + border: (c: string) => string, 243 + row: (s: string) => string, 244 + emptyRow: () => string, 245 + ): void { 246 + const t = this.theme; 247 + const run = runs[this.selectedRunIndex]; 248 + if (!run) { 249 + lines.push(row(t.fg("error", "Run not found."))); 250 + return; 251 + } 252 + 253 + const flat = (s: string) => s.replace(/[\n\r]+/g, " ").trim(); 254 + 255 + // Run header info 256 + lines.push(emptyRow()); 257 + lines.push(row(t.fg("muted", "Task: ") + flat(agentLabel(run)))); 258 + 259 + const elapsed = formatElapsed((run.completedAt ?? Date.now()) - run.startedAt); 260 + lines.push( 261 + row( 262 + t.fg("muted", "Status: ") + 263 + this.coloredStatusIcon(run.status) + 264 + " " + 265 + run.status + 266 + " " + 267 + t.fg("dim", elapsed), 268 + ), 269 + ); 270 + 271 + // Total cost 272 + const totalCost = run.summary.results.reduce((sum, r) => sum + (r.usage?.cost ?? 0), 0); 273 + if (totalCost > 0) { 274 + lines.push(row(t.fg("muted", "Cost: ") + t.fg("dim", `$${totalCost.toFixed(4)}`))); 275 + } 276 + 277 + if (run.summary.error) { 278 + lines.push( 279 + row( 280 + t.fg( 281 + "error", 282 + "Error: " + flat(`${run.summary.error.code} — ${run.summary.error.message}`), 283 + ), 284 + ), 285 + ); 286 + } 287 + 288 + // Separator 289 + lines.push(border("├") + border("─".repeat(innerW)) + border("┤")); 290 + 291 + // Agent list 292 + const agents = run.summary.results; 293 + if (agents.length === 0) { 294 + lines.push(row(t.fg("muted", "No child agents."))); 295 + lines.push(emptyRow()); 296 + return; 297 + } 298 + 299 + this.selectedAgentIndex = Math.max(0, Math.min(this.selectedAgentIndex, agents.length - 1)); 300 + this.clampScroll("agent", agents.length, MAX_AGENTS_VISIBLE); 301 + 302 + let rendered = 0; 303 + 304 + if (this.agentListScroll > 0) { 305 + lines.push(row(t.fg("dim", `▲ ${this.agentListScroll} more above`))); 306 + rendered++; 307 + } 308 + 309 + const visible = agents.slice(this.agentListScroll, this.agentListScroll + MAX_AGENTS_VISIBLE); 310 + for (let i = 0; i < visible.length && rendered < MAX_AGENTS_VISIBLE; i++) { 311 + const globalIdx = this.agentListScroll + i; 312 + const agent = visible[i]!; 313 + const selected = globalIdx === this.selectedAgentIndex; 314 + const prefix = selected ? t.fg("accent", "▶ ") : " "; 315 + const agentLine = this.formatAgentLine(agent, innerW - 4); 316 + lines.push(row(prefix + agentLine)); 317 + rendered++; 318 + } 319 + 320 + const remaining = agents.length - this.agentListScroll - visible.length; 321 + if (remaining > 0 && rendered < MAX_AGENTS_VISIBLE) { 322 + lines.push(row(t.fg("dim", `▼ ${remaining} more below`))); 323 + rendered++; 324 + } 325 + 326 + while (rendered < MAX_AGENTS_VISIBLE) { 327 + lines.push(emptyRow()); 328 + rendered++; 329 + } 330 + } 331 + 332 + private formatAgentLine(agent: ExecutionResult, maxWidth: number): string { 333 + const t = this.theme; 334 + const icon = 335 + agent.exitCode === 0 336 + ? t.fg("success", "✓") 337 + : agent.exitCode > 0 338 + ? t.fg("error", "✗") 339 + : agent.exitCode === -1 340 + ? t.fg("warning", "●") 341 + : t.fg("warning", "?"); 342 + 343 + const model = agent.model ?? ""; 344 + const modelShort = model.includes("/") ? model.split("/").pop()! : model; 345 + 346 + const outputSnippet = agent.text 347 + ? agent.text 348 + .replace(/[\n\r]+/g, " ") 349 + .trim() 350 + .slice(0, 40) 351 + : ""; 352 + 353 + const parts = [icon, t.fg("accent", agent.agent)]; 354 + if (modelShort) parts.push(t.fg("muted", modelShort)); 355 + if (agent.exitCode >= 0) { 356 + parts.push(t.fg(agent.exitCode === 0 ? "success" : "error", `exit=${agent.exitCode}`)); 357 + } 358 + if (outputSnippet) parts.push(t.fg("dim", outputSnippet)); 359 + 360 + return truncateToWidth(parts.join(" "), maxWidth); 361 + } 362 + 363 + // ── Level 3: Agent detail ────────────────────────────────────────── 364 + 365 + private renderAgentDetail( 366 + lines: string[], 367 + runs: RunRecord[], 368 + innerW: number, 369 + _border: (c: string) => string, 370 + row: (s: string) => string, 371 + emptyRow: () => string, 372 + ): void { 373 + const t = this.theme; 374 + const run = runs[this.selectedRunIndex]; 375 + const agent = run?.summary.results[this.selectedAgentIndex]; 376 + if (!run || !agent) { 377 + lines.push(row(t.fg("error", "Agent not found."))); 378 + return; 379 + } 380 + 381 + const allLines = this.buildAgentDetailLines(agent, innerW - 2); 382 + const maxVisible = 20; 383 + 384 + // Clamp scroll 385 + const maxScroll = Math.max(0, allLines.length - maxVisible); 386 + this.detailScroll = Math.max(0, Math.min(this.detailScroll, maxScroll)); 387 + 388 + let rendered = 0; 389 + 390 + if (this.detailScroll > 0) { 391 + lines.push(row(t.fg("dim", `▲ ${this.detailScroll} more above`))); 392 + rendered++; 393 + } 394 + 395 + const scrolled = allLines.slice(this.detailScroll, this.detailScroll + maxVisible); 396 + for (const dl of scrolled) { 397 + if (rendered >= maxVisible) break; 398 + lines.push(row(dl)); 399 + rendered++; 400 + } 401 + 402 + const below = allLines.length - this.detailScroll - scrolled.length; 403 + if (below > 0 && rendered < maxVisible) { 404 + lines.push(row(t.fg("dim", `▼ ${below} more below`))); 405 + rendered++; 406 + } 407 + 408 + while (rendered < maxVisible) { 409 + lines.push(emptyRow()); 410 + rendered++; 411 + } 412 + } 413 + 414 + private buildAgentDetailLines(agent: ExecutionResult, maxWidth: number): string[] { 415 + const t = this.theme; 416 + const lines: string[] = []; 417 + 418 + lines.push(""); 419 + lines.push(t.fg("muted", "Agent: ") + t.fg("accent", agent.agent)); 420 + if (agent.model) lines.push(t.fg("muted", "Model: ") + agent.model); 421 + lines.push( 422 + t.fg("muted", "Exit code: ") + 423 + (agent.exitCode >= 0 ? String(agent.exitCode) : t.fg("warning", "running")), 424 + ); 425 + 426 + if (agent.usage) { 427 + const u = agent.usage; 428 + const parts: string[] = []; 429 + if (u.turns > 0) parts.push(`${u.turns} turns`); 430 + if (u.input > 0 || u.output > 0) parts.push(`${u.input} in / ${u.output} out`); 431 + if (u.cost > 0) parts.push(`$${u.cost.toFixed(4)}`); 432 + if (parts.length > 0) { 433 + lines.push(t.fg("muted", "Usage: ") + t.fg("dim", parts.join(" "))); 434 + } 435 + } 436 + 437 + if (agent.sessionPath) { 438 + lines.push(t.fg("muted", "Session: ") + t.fg("dim", agent.sessionPath)); 439 + } 440 + 441 + if ( 442 + agent.stopReason && 443 + agent.stopReason !== "end_turn" && 444 + agent.stopReason !== "toolUse" && 445 + agent.exitCode >= 0 446 + ) { 447 + lines.push(t.fg("muted", "Stop reason: ") + agent.stopReason); 448 + } 449 + if (agent.errorMessage) { 450 + lines.push(t.fg("error", "Error: ") + agent.errorMessage); 451 + } 452 + 453 + // Task 454 + lines.push(""); 455 + lines.push(t.fg("muted", "── Task ──")); 456 + const taskWrapped = wrapTextWithAnsi(agent.task || "(no task)", maxWidth); 457 + for (const wl of taskWrapped) lines.push(wl); 458 + 459 + // Full output 460 + lines.push(""); 461 + lines.push(t.fg("muted", "── Output ──")); 462 + if (agent.text) { 463 + const outputWrapped = wrapTextWithAnsi(agent.text, maxWidth); 464 + for (const wl of outputWrapped) lines.push(wl); 465 + } else { 466 + lines.push(t.fg("dim", "(no output)")); 467 + } 468 + 469 + return lines; 470 + } 471 + 472 + // ── Input handling ───────────────────────────────────────────────── 473 + 474 + handleInput(data: string): void { 475 + // q or Esc at level 1 closes, at deeper levels goes back 476 + if (matchesKey(data, "escape") || (this.level === 1 && matchesKey(data, "q"))) { 477 + if (this.level === 1) { 478 + this.dispose(); 479 + this.done(); 480 + } else { 481 + this.level = (this.level - 1) as Level; 482 + if (this.level === 1) this.detailScroll = 0; 483 + if (this.level === 2) this.detailScroll = 0; 484 + } 485 + return; 486 + } 487 + 488 + switch (this.level) { 489 + case 1: 490 + this.handleLevel1Input(data); 491 + break; 492 + case 2: 493 + this.handleLevel2Input(data); 494 + break; 495 + case 3: 496 + this.handleLevel3Input(data); 497 + break; 498 + } 499 + } 500 + 501 + private handleLevel1Input(data: string): void { 502 + const runs = this.getSortedRuns(); 503 + 504 + if (matchesKey(data, "j")) { 505 + if (this.selectedRunIndex < runs.length - 1) { 506 + this.selectedRunIndex++; 507 + } 508 + return; 509 + } 510 + if (matchesKey(data, "k")) { 511 + if (this.selectedRunIndex > 0) { 512 + this.selectedRunIndex--; 513 + } 514 + return; 515 + } 516 + if (matchesKey(data, "return")) { 517 + if (runs.length > 0) { 518 + this.level = 2; 519 + this.selectedAgentIndex = 0; 520 + this.agentListScroll = 0; 521 + } 522 + return; 523 + } 524 + if (matchesKey(data, "c")) { 525 + const run = runs[this.selectedRunIndex]; 526 + if (run && run.status === "running") { 527 + if (this.registry) { 528 + // In-session: use abort controller via registry 529 + this.registry.cancel(run.runId); 530 + } else { 531 + // Standalone mode: kill by PID files 532 + const artifactsDir = run.summary.observability?.artifactsDir; 533 + if (artifactsDir) { 534 + cancelRunByPidFiles(artifactsDir); 535 + } 536 + } 537 + } 538 + return; 539 + } 540 + if (matchesKey(data, "r")) { 541 + if (this.sessionDir) { 542 + this.refreshScannedRuns(); 543 + } 544 + return; 545 + } 546 + } 547 + 548 + private handleLevel2Input(data: string): void { 549 + const runs = this.getSortedRuns(); 550 + const run = runs[this.selectedRunIndex]; 551 + if (!run) return; 552 + const agents = run.summary.results; 553 + 554 + if (matchesKey(data, "j")) { 555 + if (this.selectedAgentIndex < agents.length - 1) { 556 + this.selectedAgentIndex++; 557 + } 558 + return; 559 + } 560 + if (matchesKey(data, "k")) { 561 + if (this.selectedAgentIndex > 0) { 562 + this.selectedAgentIndex--; 563 + } 564 + return; 565 + } 566 + if (matchesKey(data, "return")) { 567 + if (agents.length > 0) { 568 + this.level = 3; 569 + this.detailScroll = 0; 570 + } 571 + return; 572 + } 573 + } 574 + 575 + private handleLevel3Input(data: string): void { 576 + if (matchesKey(data, "j")) { 577 + this.detailScroll++; 578 + return; 579 + } 580 + if (matchesKey(data, "k")) { 581 + if (this.detailScroll > 0) this.detailScroll--; 582 + return; 583 + } 584 + } 585 + 586 + // ── Helpers ──────────────────────────────────────────────────────── 587 + 588 + private coloredStatusIcon(status: string): string { 589 + const t = this.theme; 590 + const icon = statusIcon(status); 591 + switch (status) { 592 + case "running": 593 + return t.fg("warning", icon); 594 + case "done": 595 + return t.fg("success", icon); 596 + case "failed": 597 + return t.fg("error", icon); 598 + case "cancelled": 599 + return t.fg("muted", icon); 600 + default: 601 + return t.fg("dim", icon); 602 + } 603 + } 604 + 605 + private clampScroll(which: "run" | "agent", total: number, maxVisible: number): void { 606 + if (which === "run") { 607 + const maxScroll = Math.max(0, total - maxVisible); 608 + this.runListScroll = Math.max(0, Math.min(this.runListScroll, maxScroll)); 609 + if (this.selectedRunIndex < this.runListScroll) { 610 + this.runListScroll = this.selectedRunIndex; 611 + } else if (this.selectedRunIndex >= this.runListScroll + maxVisible) { 612 + this.runListScroll = this.selectedRunIndex - maxVisible + 1; 613 + } 614 + } else { 615 + const maxScroll = Math.max(0, total - maxVisible); 616 + this.agentListScroll = Math.max(0, Math.min(this.agentListScroll, maxScroll)); 617 + if (this.selectedAgentIndex < this.agentListScroll) { 618 + this.agentListScroll = this.selectedAgentIndex; 619 + } else if (this.selectedAgentIndex >= this.agentListScroll + maxVisible) { 620 + this.agentListScroll = this.selectedAgentIndex - maxVisible + 1; 621 + } 622 + } 623 + } 624 + 625 + // ── Auto-refresh ─────────────────────────────────────────────────── 626 + 627 + private startAutoRefresh(): void { 628 + const interval = this.sessionDir ? 1000 : 500; 629 + this.refreshTimer = setInterval(() => { 630 + if (this.sessionDir) { 631 + this.refreshScannedRuns(); 632 + this.debouncedRender(); 633 + } else if (this.registry && this.registry.getActive().length > 0) { 634 + this.debouncedRender(); 635 + } 636 + }, interval); 637 + } 638 + 639 + private debouncedRender(): void { 640 + if (this.renderTimeout) clearTimeout(this.renderTimeout); 641 + this.renderTimeout = setTimeout(() => { 642 + this.renderTimeout = undefined; 643 + this.tui.requestRender(); 644 + }, 16); 645 + } 646 + 647 + invalidate(): void { 648 + // No-op: render() is always fresh 649 + } 650 + 651 + dispose(): void { 652 + if (this.refreshTimer) { 653 + clearInterval(this.refreshTimer); 654 + this.refreshTimer = undefined; 655 + } 656 + if (this.renderTimeout) { 657 + clearTimeout(this.renderTimeout); 658 + this.renderTimeout = undefined; 659 + } 660 + } 661 + }

+148

packages/pi-mill/notify.ts

··· 1 + import fs from "node:fs"; 2 + import path from "node:path"; 3 + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; 4 + import { Box, Text } from "@mariozechner/pi-tui"; 5 + import type { RunSummary } from "./types.js"; 6 + import type { RunRegistry } from "./registry.js"; 7 + import { formatElapsed, statusIcon, agentLabel } from "./format.js"; 8 + 9 + const CUSTOM_TYPE = "pi-mill:complete"; 10 + const TEXT_TRUNCATE = 500; 11 + 12 + function elapsedMs(summary: RunSummary): number { 13 + const obs = summary.observability; 14 + if (obs?.startedAt) { 15 + const end = obs.endedAt ?? Date.now(); 16 + return end - obs.startedAt; 17 + } 18 + return 0; 19 + } 20 + 21 + function totalCost(summary: RunSummary): number { 22 + return summary.results.reduce((sum, r) => sum + (r.usage?.cost ?? 0), 0); 23 + } 24 + 25 + const SUMMARY_TRUNCATE = 200; 26 + 27 + function writeResultsFile(summary: RunSummary): string | null { 28 + const artifactsDir = summary.observability?.artifactsDir; 29 + if (!artifactsDir) return null; 30 + try { 31 + const resultsPath = path.join(artifactsDir, "results.md"); 32 + const lines: string[] = [`# ${agentLabel(summary)} — ${summary.status}\n`]; 33 + for (const r of summary.results) { 34 + const model = r.model ? ` (${r.model})` : ""; 35 + lines.push(`## ${r.agent}${model}\n`); 36 + if (r.task) lines.push(`**Task:** ${r.task}\n`); 37 + lines.push(r.text || "(no output)"); 38 + if (r.sessionPath) lines.push(`\n**Session:** ${r.sessionPath}`); 39 + lines.push(""); 40 + } 41 + fs.writeFileSync(resultsPath, lines.join("\n")); 42 + return resultsPath; 43 + } catch { 44 + return null; 45 + } 46 + } 47 + 48 + function buildContentLine(summary: RunSummary): string { 49 + const name = agentLabel(summary); 50 + const elapsed = formatElapsed(elapsedMs(summary)); 51 + if (summary.status === "cancelled") { 52 + const reason = summary.error?.message ?? "Cancelled by user."; 53 + return `Subagent '${name}' cancelled (${elapsed}). ${reason} Do not investigate — move on or retry with a different approach.`; 54 + } 55 + 56 + const resultsPath = writeResultsFile(summary); 57 + const parts = [`Subagent '${name}' ${summary.status} (${elapsed}).`]; 58 + 59 + // Include error message so the LLM knows what went wrong and can fix its code 60 + if (summary.error) { 61 + parts.push(`Error: ${summary.error.code}: ${summary.error.message}`); 62 + } 63 + 64 + // Truncated summary per child 65 + if (summary.results.length > 0) { 66 + for (const r of summary.results) { 67 + const snippet = r.text 68 + ? r.text.slice(0, SUMMARY_TRUNCATE) + (r.text.length > SUMMARY_TRUNCATE ? "…" : "") 69 + : "(no output)"; 70 + parts.push(`[${r.agent}] ${snippet}`); 71 + } 72 + } 73 + 74 + if (resultsPath) { 75 + parts.push(`Full results: ${resultsPath}`); 76 + } 77 + 78 + return parts.join("\n"); 79 + } 80 + 81 + export function registerMessageRenderer(pi: ExtensionAPI): void { 82 + pi.registerMessageRenderer(CUSTOM_TYPE, (message, { expanded }, theme) => { 83 + const summaries = (message.details as { summaries: RunSummary[] } | undefined)?.summaries ?? []; 84 + 85 + const lines: string[] = []; 86 + 87 + for (const summary of summaries) { 88 + const icon = statusIcon(summary.status); 89 + const name = agentLabel(summary); 90 + const elapsed = formatElapsed(elapsedMs(summary)); 91 + const color = 92 + summary.status === "done" ? "success" : summary.status === "failed" ? "error" : "warning"; 93 + 94 + lines.push( 95 + `${theme.fg(color, icon)} ${theme.fg("accent", name)} ${summary.status} ${theme.fg("dim", elapsed)}`, 96 + ); 97 + 98 + if (expanded) { 99 + const resultText = summary.results[0]?.text; 100 + if (resultText) { 101 + const truncated = 102 + resultText.length > TEXT_TRUNCATE 103 + ? resultText.slice(0, TEXT_TRUNCATE) + "…" 104 + : resultText; 105 + lines.push(theme.fg("dim", ` ${truncated.replace(/\n/g, "\n ")}`)); 106 + } 107 + 108 + const model = summary.results[0]?.model; 109 + if (model) { 110 + lines.push(theme.fg("dim", ` model: ${model}`)); 111 + } 112 + 113 + const cost = totalCost(summary); 114 + if (cost > 0) { 115 + lines.push(theme.fg("dim", ` cost: $${cost.toFixed(4)}`)); 116 + } 117 + 118 + if (summaries.length > 1) { 119 + lines.push(""); 120 + } 121 + } 122 + } 123 + 124 + const box = new Box(1, 1, (t) => theme.bg("customMessageBg", t)); 125 + box.addChild(new Text(lines.join("\n"), 0, 0)); 126 + return box; 127 + }); 128 + } 129 + 130 + export function notifyCompletion( 131 + pi: ExtensionAPI, 132 + registry: RunRegistry, 133 + summary: RunSummary, 134 + ): void { 135 + const content = buildContentLine(summary); 136 + 137 + pi.sendMessage( 138 + { 139 + customType: CUSTOM_TYPE, 140 + content, 141 + display: true, 142 + details: { summaries: [summary] }, 143 + }, 144 + { triggerTurn: true, deliverAs: "followUp" }, 145 + ); 146 + 147 + registry.acknowledge(summary.runId); 148 + }

+99

packages/pi-mill/observability.ts

··· 1 + import * as fs from "node:fs"; 2 + import * as os from "node:os"; 3 + import * as path from "node:path"; 4 + 5 + export type RunStatus = "queued" | "running" | "done" | "failed" | "cancelled"; 6 + 7 + export interface RunEvent { 8 + time: number; 9 + type: "status" | "info" | "warning" | "error" | "artifact"; 10 + message: string; 11 + data?: Record<string, unknown>; 12 + } 13 + 14 + export interface RunRecord { 15 + runId: string; 16 + status: RunStatus; 17 + startedAt: number; 18 + endedAt?: number; 19 + events: RunEvent[]; 20 + artifactsDir?: string; 21 + artifacts: string[]; 22 + } 23 + 24 + export class ObservabilityStore { 25 + private readonly runs = new Map<string, RunRecord>(); 26 + 27 + createRun(runId: string, withArtifacts: boolean, sessionDir?: string): RunRecord { 28 + const record: RunRecord = { 29 + runId, 30 + status: "queued", 31 + startedAt: Date.now(), 32 + events: [], 33 + artifacts: [], 34 + }; 35 + if (withArtifacts) { 36 + const base = sessionDir 37 + ? path.join(sessionDir, ".factory", runId) 38 + : fs.mkdtempSync(path.join(os.tmpdir(), "pi-subagent-observe-")); 39 + fs.mkdirSync(base, { recursive: true }); 40 + record.artifactsDir = base; 41 + } 42 + this.runs.set(runId, record); 43 + return record; 44 + } 45 + 46 + get(runId: string): RunRecord | undefined { 47 + return this.runs.get(runId); 48 + } 49 + 50 + setStatus(runId: string, status: RunStatus, message?: string): void { 51 + const run = this.runs.get(runId); 52 + if (!run) return; 53 + run.status = status; 54 + if (status === "done" || status === "failed" || status === "cancelled") 55 + run.endedAt = Date.now(); 56 + if (message) this.push(runId, "status", message, { status }); 57 + } 58 + 59 + push( 60 + runId: string, 61 + type: RunEvent["type"], 62 + message: string, 63 + data?: Record<string, unknown>, 64 + ): void { 65 + const run = this.runs.get(runId); 66 + if (!run) return; 67 + run.events.push({ time: Date.now(), type, message, data }); 68 + } 69 + 70 + writeArtifact(runId: string, relativePath: string, content: string): string | null { 71 + const run = this.runs.get(runId); 72 + if (!run || !run.artifactsDir) return null; 73 + const fullPath = path.join(run.artifactsDir, relativePath); 74 + fs.mkdirSync(path.dirname(fullPath), { recursive: true }); 75 + fs.writeFileSync(fullPath, content, "utf-8"); 76 + run.artifacts.push(fullPath); 77 + this.push(runId, "artifact", `artifact:${relativePath}`, { path: fullPath }); 78 + return fullPath; 79 + } 80 + 81 + toSummary( 82 + runId: string, 83 + ): Pick< 84 + RunRecord, 85 + "runId" | "status" | "startedAt" | "endedAt" | "events" | "artifacts" | "artifactsDir" 86 + > | null { 87 + const run = this.runs.get(runId); 88 + if (!run) return null; 89 + return { 90 + runId: run.runId, 91 + status: run.status, 92 + startedAt: run.startedAt, 93 + endedAt: run.endedAt, 94 + events: run.events, 95 + artifacts: run.artifacts, 96 + artifactsDir: run.artifactsDir, 97 + }; 98 + } 99 + }

+25

packages/pi-mill/package.json

··· 1 + { 2 + "name": "@mill/pi-mill", 3 + "version": "0.0.0", 4 + "description": "Pi extension for subagent orchestration powered by mill", 5 + "keywords": [ 6 + "pi-package" 7 + ], 8 + "license": "MIT", 9 + "type": "module", 10 + "peerDependencies": { 11 + "@mariozechner/pi-agent-core": "*", 12 + "@mariozechner/pi-ai": "*", 13 + "@mariozechner/pi-coding-agent": "*", 14 + "@mariozechner/pi-tui": "*", 15 + "@sinclair/typebox": "*" 16 + }, 17 + "pi": { 18 + "extensions": [ 19 + "./index.ts" 20 + ], 21 + "skills": [ 22 + "./skills" 23 + ] 24 + } 25 + }

+66

packages/pi-mill/program-env.d.ts

··· 1 + /** 2 + * Ambient type declarations for pi-mill programs. 3 + * Available as globals — do not import. 4 + */ 5 + 6 + interface UsageStats { 7 + input: number; 8 + output: number; 9 + cacheRead: number; 10 + cacheWrite: number; 11 + cost: number; 12 + contextTokens: number; 13 + turns: number; 14 + } 15 + 16 + interface ExecutionResult { 17 + taskId: string; 18 + agent: string; 19 + task: string; 20 + exitCode: number; 21 + messages: unknown[]; 22 + stderr: string; 23 + usage: UsageStats; 24 + model?: string; 25 + stopReason?: string; 26 + errorMessage?: string; 27 + step?: number; 28 + text: string; 29 + sessionPath?: string; 30 + } 31 + 32 + interface SpawnInput { 33 + agent: string; 34 + /** WHO the subagent is and how it should work (behavior, principles, methodology). */ 35 + systemPrompt: string; 36 + /** WHAT the subagent should do right now (specific files, commands, and goals). */ 37 + prompt: string; 38 + /** Model identifier in provider/model-id format (e.g. "anthropic/claude-opus-4-6", "cerebras/zai-glm-4.7") */ 39 + model: string; 40 + cwd?: string; 41 + tools?: string[]; 42 + step?: number; 43 + signal?: AbortSignal; 44 + } 45 + 46 + interface Factory { 47 + runId: string; 48 + spawn(input: SpawnInput): Promise<ExecutionResult>; 49 + shutdown(cancelRunning?: boolean): Promise<void>; 50 + observe: { 51 + log(type: "info" | "warning" | "error", message: string, data?: Record<string, unknown>): void; 52 + artifact(relativePath: string, content: string): string | null; 53 + }; 54 + } 55 + 56 + declare const factory: Factory; 57 + declare const process: { 58 + cwd(): string; 59 + env: Record<string, string | undefined>; 60 + [key: string]: unknown; 61 + }; 62 + declare const console: { 63 + log(...args: unknown[]): void; 64 + error(...args: unknown[]): void; 65 + warn(...args: unknown[]): void; 66 + };

+113

packages/pi-mill/registry.ts

··· 1 + import type { RunSummary } from "./types.js"; 2 + import type { ErrorDetails } from "./errors.js"; 3 + 4 + export type RunStatus = "running" | "done" | "failed" | "cancelled"; 5 + 6 + export interface RunRecord { 7 + runId: string; 8 + status: RunStatus; 9 + summary: RunSummary; 10 + promise?: Promise<RunSummary>; 11 + abort?: AbortController; 12 + startedAt: number; 13 + completedAt?: number; 14 + acknowledged?: boolean; 15 + /** Metadata stored at registration time (before results arrive). */ 16 + task?: string; 17 + } 18 + 19 + export class RunRegistry { 20 + private runs = new Map<string, RunRecord>(); 21 + 22 + register( 23 + runId: string, 24 + summary: RunSummary, 25 + promise: Promise<RunSummary>, 26 + abort: AbortController, 27 + meta?: { task?: string }, 28 + ): RunRecord { 29 + const record: RunRecord = { 30 + runId, 31 + status: "running", 32 + summary, 33 + promise, 34 + abort, 35 + startedAt: Date.now(), 36 + task: meta?.task, 37 + }; 38 + 39 + this.runs.set(runId, record); 40 + 41 + return record; 42 + } 43 + 44 + get(runId: string): RunRecord | undefined { 45 + return this.runs.get(runId); 46 + } 47 + 48 + getActive(): RunRecord[] { 49 + return [...this.runs.values()].filter((r) => r.status === "running"); 50 + } 51 + 52 + getAll(): RunRecord[] { 53 + return [...this.runs.values()]; 54 + } 55 + 56 + updateSummary(runId: string, summary: RunSummary): void { 57 + const record = this.runs.get(runId); 58 + if (!record || record.status !== "running") return; 59 + record.summary = { ...summary, status: "running" }; 60 + } 61 + 62 + complete(runId: string, summary: RunSummary): void { 63 + const record = this.runs.get(runId); 64 + if (!record) return; 65 + record.summary = summary; 66 + record.status = summary.status === "running" ? "done" : summary.status; 67 + record.completedAt = Date.now(); 68 + } 69 + 70 + fail(runId: string, error: ErrorDetails): void { 71 + const record = this.runs.get(runId); 72 + if (!record) return; 73 + record.status = "failed"; 74 + record.summary.status = "failed"; 75 + record.summary.error = error; 76 + record.completedAt = Date.now(); 77 + } 78 + 79 + cancel(runId: string): void { 80 + const record = this.runs.get(runId); 81 + if (!record) return; 82 + record.status = "cancelled"; 83 + record.summary.status = "cancelled"; 84 + record.completedAt = Date.now(); 85 + record.abort?.abort(); 86 + } 87 + 88 + acknowledge(runId: string): void { 89 + const record = this.runs.get(runId); 90 + if (record) record.acknowledged = true; 91 + } 92 + 93 + getVisible(): RunRecord[] { 94 + return [...this.runs.values()].filter((r) => r.status === "running" || !r.acknowledged); 95 + } 96 + 97 + loadHistorical(record: Omit<RunRecord, "promise" | "abort">): void { 98 + this.runs.set(record.runId, record as RunRecord); 99 + } 100 + 101 + clear(): void { 102 + this.runs.clear(); 103 + } 104 + 105 + /** Clear only non-active runs (historical/completed), preserving running ones. */ 106 + clearHistorical(): void { 107 + for (const [id, record] of this.runs) { 108 + if (record.status !== "running") { 109 + this.runs.delete(id); 110 + } 111 + } 112 + } 113 + }

+632

packages/pi-mill/runtime.ts

··· 1 + import { spawn } from "node:child_process"; 2 + import * as fs from "node:fs"; 3 + import * as os from "node:os"; 4 + import * as path from "node:path"; 5 + import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; 6 + import { FactoryError } from "./errors.js"; 7 + import type { ObservabilityStore } from "./observability.js"; 8 + import type { ExecutionResult } from "./types.js"; 9 + 10 + // ── Branded spawn promise ────────────────────────────────────────────── 11 + 12 + export const SPAWN_BRAND = Symbol.for("pi-mill:spawn"); 13 + 14 + export interface SpawnPromise extends Promise<ExecutionResult> { 15 + taskId: string; 16 + [SPAWN_BRAND]: true; 17 + } 18 + 19 + // ── Console patching — route program logs to observability ────────────── 20 + 21 + export function patchConsole(obs: ObservabilityStore, runId: string): () => void { 22 + const originalLog = console.log; 23 + const originalWarn = console.warn; 24 + const originalError = console.error; 25 + 26 + const format = (...args: unknown[]) => 27 + args.map((a) => (typeof a === "string" ? a : JSON.stringify(a))).join(" "); 28 + 29 + console.log = (...args: unknown[]) => obs.push(runId, "info", `console: ${format(...args)}`); 30 + console.warn = (...args: unknown[]) => obs.push(runId, "warning", `console: ${format(...args)}`); 31 + console.error = (...args: unknown[]) => obs.push(runId, "error", `console: ${format(...args)}`); 32 + 33 + return () => { 34 + console.log = originalLog; 35 + console.warn = originalWarn; 36 + console.error = originalError; 37 + }; 38 + } 39 + 40 + // ── Promise.all / Promise.allSettled patching for observability ───────── 41 + 42 + export function patchPromiseAll(obs: ObservabilityStore, runId: string): () => void { 43 + const originalAll = Promise.all.bind(Promise); 44 + const originalAllSettled = Promise.allSettled.bind(Promise); 45 + let groupCounter = 0; 46 + 47 + Promise.all = function <T>(iterable: Iterable<T>): Promise<Awaited<T>[]> { 48 + const items = Array.from(iterable); 49 + const spawns = items.filter( 50 + (item): item is any => 51 + item != null && typeof item === "object" && (item as any)[SPAWN_BRAND] === true, 52 + ); 53 + if (spawns.length > 0) { 54 + groupCounter++; 55 + const groupId = `group-${groupCounter}`; 56 + obs.push(runId, "info", "group:start", { 57 + groupId, 58 + count: spawns.length, 59 + tasks: spawns.map((s: any) => s.taskId), 60 + }); 61 + const result = originalAll(items); 62 + result.then( 63 + () => obs.push(runId, "info", "group:done", { groupId, count: spawns.length }), 64 + () => obs.push(runId, "info", "group:failed", { groupId, count: spawns.length }), 65 + ); 66 + return result; 67 + } 68 + return originalAll(items); 69 + } as typeof Promise.all; 70 + 71 + Promise.allSettled = function <T>( 72 + iterable: Iterable<T>, 73 + ): Promise<PromiseSettledResult<Awaited<T>>[]> { 74 + const items = Array.from(iterable); 75 + const spawns = items.filter( 76 + (item): item is any => 77 + item != null && typeof item === "object" && (item as any)[SPAWN_BRAND] === true, 78 + ); 79 + if (spawns.length > 0) { 80 + groupCounter++; 81 + const groupId = `group-settled-${groupCounter}`; 82 + obs.push(runId, "info", "group:start", { 83 + groupId, 84 + count: spawns.length, 85 + tasks: spawns.map((s: any) => s.taskId), 86 + settled: true, 87 + }); 88 + const result = originalAllSettled(items); 89 + result.then(() => obs.push(runId, "info", "group:done", { groupId, count: spawns.length })); 90 + return result; 91 + } 92 + return originalAllSettled(items); 93 + } as typeof Promise.allSettled; 94 + 95 + return () => { 96 + Promise.all = originalAll; 97 + Promise.allSettled = originalAllSettled; 98 + }; 99 + } 100 + 101 + // ── Single subagent spawn (via mill) ─────────────────────────────────── 102 + 103 + interface SpawnInput { 104 + runId: string; 105 + taskId: string; 106 + agent: string; 107 + systemPrompt: string; 108 + prompt: string; 109 + cwd: string; 110 + modelId: string; 111 + tools: string[]; 112 + step?: number; 113 + signal?: AbortSignal; 114 + obs: ObservabilityStore; 115 + onProgress?: (result: ExecutionResult) => void; 116 + parentSessionPath?: string; 117 + sessionDir?: string; 118 + millCommand: string; 119 + millArgs: string[]; 120 + millRunsDir?: string; 121 + } 122 + 123 + interface MillSpawnResult { 124 + text?: string; 125 + sessionRef?: string; 126 + agent?: string; 127 + model?: string; 128 + driver?: string; 129 + exitCode?: number; 130 + stopReason?: string; 131 + errorMessage?: string; 132 + } 133 + 134 + interface MillRunSyncPayload { 135 + run?: { 136 + status?: string; 137 + }; 138 + result?: { 139 + spawns?: ReadonlyArray<MillSpawnResult>; 140 + }; 141 + } 142 + 143 + function newUsage() { 144 + return { 145 + input: 0, 146 + output: 0, 147 + cacheRead: 0, 148 + cacheWrite: 0, 149 + cost: 0, 150 + contextTokens: 0, 151 + turns: 0, 152 + }; 153 + } 154 + 155 + const parseJsonObjectFromText = (text: string): Record<string, unknown> | undefined => { 156 + const lines = text 157 + .split("\n") 158 + .map((line) => line.trim()) 159 + .filter((line) => line.length > 0) 160 + .reverse(); 161 + 162 + for (const line of lines) { 163 + try { 164 + const parsed = JSON.parse(line) as unknown; 165 + if (typeof parsed === "object" && parsed !== null) { 166 + return parsed as Record<string, unknown>; 167 + } 168 + } catch { 169 + continue; 170 + } 171 + } 172 + 173 + return undefined; 174 + }; 175 + 176 + function writeMillProgram(input: { 177 + systemPrompt: string; 178 + prompt: string; 179 + agent: string; 180 + modelId: string; 181 + }): { dir: string; filePath: string } { 182 + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-mill-spawn-")); 183 + const filePath = path.join(dir, "program.ts"); 184 + const spawnPayload = JSON.stringify({ 185 + agent: input.agent, 186 + systemPrompt: input.systemPrompt, 187 + prompt: input.prompt, 188 + model: input.modelId, 189 + }); 190 + 191 + const source = `await mill.spawn(${spawnPayload});\n`; 192 + fs.writeFileSync(filePath, source, { encoding: "utf-8", mode: 0o600 }); 193 + return { dir, filePath }; 194 + } 195 + 196 + const decodeMillResult = ( 197 + payload: MillRunSyncPayload, 198 + fallback: { agent: string; modelId: string; prompt: string }, 199 + ): ExecutionResult => { 200 + const spawns = payload.result?.spawns; 201 + if (!Array.isArray(spawns) || spawns.length === 0) { 202 + throw new FactoryError({ 203 + code: "RUNTIME", 204 + message: "mill run completed without spawn results.", 205 + recoverable: false, 206 + }); 207 + } 208 + 209 + const selectedSpawn = 210 + spawns.find((spawn) => spawn.agent === fallback.agent) ?? spawns[0] ?? ({} as MillSpawnResult); 211 + 212 + const runStatus = payload.run?.status; 213 + const derivedExitCode = 214 + typeof selectedSpawn.exitCode === "number" 215 + ? selectedSpawn.exitCode 216 + : runStatus === "complete" 217 + ? 0 218 + : 1; 219 + 220 + return { 221 + taskId: "", 222 + agent: selectedSpawn.agent ?? fallback.agent, 223 + task: fallback.prompt, 224 + exitCode: derivedExitCode, 225 + messages: [], 226 + stderr: "", 227 + usage: newUsage(), 228 + model: selectedSpawn.model ?? fallback.modelId, 229 + stopReason: selectedSpawn.stopReason, 230 + errorMessage: selectedSpawn.errorMessage, 231 + step: undefined, 232 + text: selectedSpawn.text ?? "", 233 + sessionPath: selectedSpawn.sessionRef, 234 + }; 235 + }; 236 + 237 + export function spawnSubagent(input: SpawnInput): Promise<ExecutionResult> { 238 + return runSubagentProcess(input); 239 + } 240 + 241 + async function runSubagentProcess(input: SpawnInput): Promise<ExecutionResult> { 242 + input.obs.push(input.runId, "info", `spawn:${input.taskId}`, { 243 + agent: input.agent, 244 + model: input.modelId, 245 + backend: "mill", 246 + tools: input.tools, 247 + }); 248 + 249 + const outputDir = input.sessionDir ?? path.join(os.tmpdir(), "pi-mill-output"); 250 + fs.mkdirSync(outputDir, { recursive: true }); 251 + 252 + const stdoutPath = path.join(outputDir, `${input.taskId}.stdout.log`); 253 + const pidPath = path.join(outputDir, `${input.taskId}.pid`); 254 + 255 + const result: ExecutionResult = { 256 + taskId: input.taskId, 257 + agent: input.agent, 258 + task: input.prompt, 259 + exitCode: -1, 260 + messages: [], 261 + stderr: "", 262 + usage: newUsage(), 263 + model: input.modelId, 264 + step: input.step, 265 + text: "", 266 + sessionPath: undefined, 267 + }; 268 + 269 + input.onProgress?.({ ...result, messages: [] }); 270 + 271 + let systemPrompt = input.systemPrompt.trim(); 272 + if (input.parentSessionPath && fs.existsSync(input.parentSessionPath)) { 273 + systemPrompt += `\n\nParent conversation session: ${input.parentSessionPath}\nUse search_thread to explore parent context if you need background on what led to this task.`; 274 + } 275 + 276 + const tempProgram = writeMillProgram({ 277 + systemPrompt, 278 + prompt: input.prompt, 279 + agent: input.agent, 280 + modelId: input.modelId, 281 + }); 282 + 283 + const args = [...input.millArgs, "run", tempProgram.filePath, "--sync", "--json"]; 284 + if (input.millRunsDir && input.millRunsDir.trim().length > 0) { 285 + args.push("--runs-dir", input.millRunsDir); 286 + } 287 + 288 + let aborted = false; 289 + 290 + try { 291 + const code = await new Promise<number>((resolve) => { 292 + const stdoutFd = fs.openSync(stdoutPath, "w"); 293 + const childDepth = parseInt(process.env.PI_FACTORY_DEPTH || "0", 10) + 1; 294 + const proc = spawn(input.millCommand, args, { 295 + cwd: input.cwd, 296 + detached: true, 297 + stdio: ["ignore", stdoutFd, stdoutFd], 298 + shell: false, 299 + env: { ...process.env, PI_FACTORY_DEPTH: String(childDepth) }, 300 + }); 301 + proc.unref(); 302 + fs.closeSync(stdoutFd); 303 + 304 + if (proc.pid != null) { 305 + fs.writeFileSync(pidPath, String(proc.pid), "utf-8"); 306 + } 307 + 308 + let killTimer: ReturnType<typeof setTimeout> | undefined; 309 + const kill = () => { 310 + aborted = true; 311 + proc.kill("SIGTERM"); 312 + killTimer = setTimeout(() => { 313 + if (!proc.killed) proc.kill("SIGKILL"); 314 + }, 3000); 315 + }; 316 + 317 + if (input.signal?.aborted) { 318 + kill(); 319 + } 320 + input.signal?.addEventListener("abort", kill, { once: true }); 321 + 322 + proc.on("close", (exitCode) => { 323 + if (killTimer) clearTimeout(killTimer); 324 + try { 325 + fs.unlinkSync(pidPath); 326 + } catch { 327 + // ignore 328 + } 329 + resolve(exitCode ?? 1); 330 + }); 331 + 332 + proc.on("error", () => { 333 + if (killTimer) clearTimeout(killTimer); 334 + try { 335 + fs.unlinkSync(pidPath); 336 + } catch { 337 + // ignore 338 + } 339 + resolve(1); 340 + }); 341 + }); 342 + 343 + const output = fs.existsSync(stdoutPath) ? fs.readFileSync(stdoutPath, "utf-8") : ""; 344 + const parsed = parseJsonObjectFromText(output) as MillRunSyncPayload | undefined; 345 + 346 + if (!parsed) { 347 + result.stderr = output.trim(); 348 + if (aborted) { 349 + throw new FactoryError({ 350 + code: "CANCELLED", 351 + message: "Subagent aborted.", 352 + recoverable: true, 353 + }); 354 + } 355 + throw new FactoryError({ 356 + code: "RUNTIME", 357 + message: 358 + result.stderr.length > 0 359 + ? `mill output was not valid JSON:\n${result.stderr}` 360 + : "mill output was empty.", 361 + recoverable: false, 362 + }); 363 + } 364 + 365 + const decoded = decodeMillResult(parsed, { 366 + agent: input.agent, 367 + modelId: input.modelId, 368 + prompt: input.prompt, 369 + }); 370 + 371 + result.agent = decoded.agent; 372 + result.task = decoded.task; 373 + result.exitCode = decoded.exitCode; 374 + result.model = decoded.model; 375 + result.stopReason = decoded.stopReason; 376 + result.errorMessage = decoded.errorMessage; 377 + result.text = decoded.text; 378 + result.sessionPath = decoded.sessionPath; 379 + result.stderr = code === 0 ? "" : output.trim(); 380 + 381 + if (aborted) { 382 + throw new FactoryError({ 383 + code: "CANCELLED", 384 + message: "Subagent aborted.", 385 + recoverable: true, 386 + }); 387 + } 388 + 389 + input.onProgress?.({ ...result, messages: [] }); 390 + return result; 391 + } finally { 392 + try { 393 + fs.rmSync(tempProgram.dir, { recursive: true, force: true }); 394 + } catch { 395 + // ignore 396 + } 397 + } 398 + } 399 + 400 + // ── Factory (program runtime) ────────────────────────────────────────── 401 + 402 + export interface RuntimeSpawnInput { 403 + agent: string; 404 + systemPrompt: string; 405 + prompt: string; 406 + cwd?: string; 407 + model: string; 408 + tools?: string[]; 409 + step?: number; 410 + signal?: AbortSignal; 411 + } 412 + 413 + export interface Factory { 414 + runId: string; 415 + spawn(input: RuntimeSpawnInput): SpawnPromise; 416 + shutdown(cancelRunning?: boolean): Promise<void>; 417 + observe: { 418 + log(type: "info" | "warning" | "error", message: string, data?: Record<string, unknown>): void; 419 + artifact(relativePath: string, content: string): string | null; 420 + }; 421 + } 422 + 423 + function validateModelSelector(model: string, agent: string): string { 424 + if (!model?.trim()) { 425 + throw new FactoryError({ 426 + code: "INVALID_INPUT", 427 + message: `Spawn for '${agent}' requires a non-empty 'model'.`, 428 + recoverable: true, 429 + }); 430 + } 431 + return model; 432 + } 433 + 434 + export function createFactory( 435 + ctx: ExtensionContext, 436 + runId: string, 437 + obs: ObservabilityStore, 438 + options?: { 439 + onTaskUpdate?: (result: ExecutionResult) => void; 440 + defaultSignal?: AbortSignal; 441 + parentSessionPath?: string; 442 + sessionDir?: string; 443 + millCommand?: string; 444 + millArgs?: string[]; 445 + millRunsDir?: string; 446 + }, 447 + ): Factory { 448 + let spawnCounter = 0; 449 + const runtimeAbort = new AbortController(); 450 + const activeTasks = new Map< 451 + string, 452 + { controller: AbortController; promise: Promise<ExecutionResult> } 453 + >(); 454 + 455 + const millCommand = options?.millCommand?.trim() || process.env.PI_FACTORY_MILL_CMD || "mill"; 456 + const millArgs = options?.millArgs ?? []; 457 + const millRunsDir = options?.millRunsDir ?? process.env.PI_FACTORY_MILL_RUNS_DIR; 458 + 459 + const factory: Factory = { 460 + runId, 461 + 462 + spawn({ agent, systemPrompt, prompt, cwd, model, tools, step, signal }) { 463 + if (!systemPrompt?.trim()) { 464 + throw new FactoryError({ 465 + code: "INVALID_INPUT", 466 + message: `Spawn for '${agent}' requires non-empty systemPrompt.`, 467 + recoverable: true, 468 + }); 469 + } 470 + if (!prompt?.trim()) { 471 + throw new FactoryError({ 472 + code: "INVALID_INPUT", 473 + message: `Spawn for '${agent}' requires non-empty prompt.`, 474 + recoverable: true, 475 + }); 476 + } 477 + 478 + const modelId = validateModelSelector(model, agent); 479 + 480 + spawnCounter += 1; 481 + const taskId = `task-${spawnCounter}`; 482 + const taskAbort = new AbortController(); 483 + 484 + const relayAbort = () => taskAbort.abort(); 485 + const boundSignals = [signal, options?.defaultSignal, runtimeAbort.signal].filter( 486 + (s): s is AbortSignal => Boolean(s), 487 + ); 488 + for (const bound of boundSignals) { 489 + if (bound.aborted) taskAbort.abort(); 490 + else bound.addEventListener("abort", relayAbort, { once: true }); 491 + } 492 + 493 + const taskPromise = spawnSubagent({ 494 + runId, 495 + taskId, 496 + agent, 497 + systemPrompt, 498 + prompt, 499 + cwd: cwd ?? process.cwd(), 500 + modelId, 501 + tools: tools ?? [], 502 + step, 503 + signal: taskAbort.signal, 504 + obs, 505 + onProgress: (partial) => options?.onTaskUpdate?.(partial), 506 + parentSessionPath: options?.parentSessionPath, 507 + sessionDir: options?.sessionDir, 508 + millCommand, 509 + millArgs, 510 + millRunsDir, 511 + }) 512 + .then((finalResult) => { 513 + options?.onTaskUpdate?.(finalResult); 514 + return finalResult; 515 + }) 516 + .finally(() => { 517 + for (const bound of boundSignals) bound.removeEventListener("abort", relayAbort); 518 + activeTasks.delete(taskId); 519 + }); 520 + activeTasks.set(taskId, { controller: taskAbort, promise: taskPromise }); 521 + 522 + const branded = taskPromise as any; 523 + branded[SPAWN_BRAND] = true; 524 + branded.taskId = taskId; 525 + return branded as SpawnPromise; 526 + }, 527 + 528 + async shutdown(cancelRunning = true) { 529 + if (cancelRunning) { 530 + runtimeAbort.abort(); 531 + for (const { controller } of activeTasks.values()) controller.abort(); 532 + } 533 + const pending = Array.from(activeTasks.values()).map(({ promise }) => promise); 534 + if (pending.length > 0) await Promise.allSettled(pending); 535 + obs.push(runId, "info", "runtime:shutdown", { cancelRunning, pending: pending.length }); 536 + }, 537 + 538 + observe: { 539 + log(type, message, data) { 540 + obs.push(runId, type, message, data); 541 + }, 542 + artifact(relativePath, content) { 543 + return obs.writeArtifact(runId, relativePath, content); 544 + }, 545 + }, 546 + }; 547 + 548 + return factory; 549 + } 550 + 551 + // ── Preflight typecheck ──────────────────────────────────────────────── 552 + 553 + const PROGRAM_ENV_PATH = path.join( 554 + import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname), 555 + "program-env.d.ts", 556 + ); 557 + 558 + /** 559 + * Run a preflight typecheck on program code using tsgo (native TypeScript compiler). 560 + * Returns null if clean, or an error message string if there are type errors. 561 + * Falls back silently (returns null) if tsgo is not available. 562 + */ 563 + export async function preflightTypecheck(code: string): Promise<string | null> { 564 + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-mill-typecheck-")); 565 + const programPath = path.join(tmpDir, "program.ts"); 566 + try { 567 + fs.writeFileSync(programPath, `/// <reference path="env.d.ts" />\n${code}`, "utf-8"); 568 + fs.copyFileSync(PROGRAM_ENV_PATH, path.join(tmpDir, "env.d.ts")); 569 + fs.writeFileSync( 570 + path.join(tmpDir, "tsconfig.json"), 571 + JSON.stringify({ 572 + compilerOptions: { 573 + target: "ES2022", 574 + module: "ES2022", 575 + moduleResolution: "bundler", 576 + moduleDetection: "force", 577 + strict: true, 578 + noEmit: true, 579 + skipLibCheck: true, 580 + types: [], 581 + }, 582 + include: ["program.ts", "env.d.ts"], 583 + }), 584 + ); 585 + 586 + const result = await new Promise<{ code: number; stderr: string }>((resolve) => { 587 + let stderr = ""; 588 + const proc = spawn("tsgo", ["--noEmit", "-p", path.join(tmpDir, "tsconfig.json")], { 589 + stdio: ["ignore", "pipe", "pipe"], 590 + }); 591 + proc.stdout.on("data", (chunk: Buffer) => { 592 + stderr += chunk.toString(); 593 + }); 594 + proc.stderr.on("data", (chunk: Buffer) => { 595 + stderr += chunk.toString(); 596 + }); 597 + proc.on("close", (exitCode) => resolve({ code: exitCode ?? 1, stderr })); 598 + proc.on("error", () => resolve({ code: -1, stderr: "" })); 599 + }); 600 + 601 + if (result.code === -1) return null; 602 + if (result.code === 0) return null; 603 + 604 + const errors = result.stderr 605 + .split("\n") 606 + .filter((l) => l.includes("error TS")) 607 + .join("\n") 608 + .trim(); 609 + 610 + const details = errors || result.stderr.trim(); 611 + if (!details) return null; 612 + return `Program source preserved at: ${programPath}\n${details}`; 613 + } catch { 614 + return null; 615 + } 616 + } 617 + 618 + // ── Program module preparation ───────────────────────────────────────── 619 + 620 + export function prepareProgramModule(code: string): { modulePath: string } { 621 + if (!code.trim()) { 622 + throw new FactoryError({ 623 + code: "INVALID_INPUT", 624 + message: "Program code is empty.", 625 + recoverable: true, 626 + }); 627 + } 628 + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-mill-program-")); 629 + const modulePath = path.join(tmpDir, "program.ts"); 630 + fs.writeFileSync(modulePath, code, "utf-8"); 631 + return { modulePath }; 632 + }

+179

packages/pi-mill/scanner.ts

··· 1 + import * as fs from "node:fs"; 2 + import * as path from "node:path"; 3 + import * as os from "node:os"; 4 + import type { RunRecord, RunStatus } from "./registry.js"; 5 + import type { RunSummary, ExecutionResult, UsageStats } from "./types.js"; 6 + 7 + /** 8 + * Filesystem scanner for standalone --mill mode. 9 + * Reads run.json files from ~/.pi/agent/sessions/<session-dir>/.factory/<run-id>/run.json 10 + */ 11 + 12 + /** Convert a cwd path to the session directory name pi uses. */ 13 + export function cwdToSessionDir(cwd: string): string { 14 + // /Users/foo/Code/project → --Users-foo-Code-project-- 15 + return "--" + cwd.slice(1).replace(/\//g, "-") + "--"; 16 + } 17 + 18 + /** Shape of run.json on disk (written by writeRunJson in index.ts). */ 19 + interface RunJsonData { 20 + runId: string; 21 + status?: RunStatus; 22 + task?: string; 23 + startedAt?: number; 24 + completedAt?: number; 25 + results?: Array<{ 26 + agent: string; 27 + task: string; 28 + model?: string; 29 + exitCode: number; 30 + text: string; 31 + sessionPath?: string; 32 + usage?: UsageStats; 33 + stopReason?: string; 34 + errorMessage?: string; 35 + }>; 36 + error?: { code: string; message: string; recoverable: boolean }; 37 + } 38 + 39 + /** Parse a single run.json into a RunRecord (without promise/abort). */ 40 + function parseRunJson(data: RunJsonData): Omit<RunRecord, "promise" | "abort"> { 41 + const status: RunStatus = data.status ?? "done"; 42 + const results: ExecutionResult[] = (data.results ?? []).map((r) => ({ 43 + taskId: "", 44 + agent: r.agent ?? "unknown", 45 + task: r.task ?? "", 46 + exitCode: r.exitCode ?? -1, 47 + messages: [], 48 + stderr: "", 49 + usage: r.usage ?? { 50 + input: 0, 51 + output: 0, 52 + cacheRead: 0, 53 + cacheWrite: 0, 54 + cost: 0, 55 + contextTokens: 0, 56 + turns: 0, 57 + }, 58 + model: r.model, 59 + stopReason: r.stopReason, 60 + errorMessage: r.errorMessage, 61 + text: r.text ?? "", 62 + sessionPath: r.sessionPath, 63 + })); 64 + 65 + const summary: RunSummary = { 66 + runId: data.runId, 67 + status, 68 + results, 69 + error: data.error as RunSummary["error"], 70 + metadata: { task: data.task }, 71 + }; 72 + 73 + return { 74 + runId: data.runId, 75 + status, 76 + summary, 77 + startedAt: data.startedAt ?? Date.now(), 78 + completedAt: data.completedAt, 79 + acknowledged: true, 80 + task: data.task, 81 + }; 82 + } 83 + 84 + /** Get the sessions base directory. */ 85 + export function getSessionsBase(): string { 86 + return path.join(os.homedir(), ".pi", "agent", "sessions"); 87 + } 88 + 89 + /** 90 + * Scan all run.json files under a session's .factory directory. 91 + * If sessionDirName is provided, scans only that session. 92 + * If not provided, scans all sessions. 93 + */ 94 + export function scanRuns( 95 + sessionsBase: string, 96 + sessionDirName?: string, 97 + ): Omit<RunRecord, "promise" | "abort">[] { 98 + const records: Omit<RunRecord, "promise" | "abort">[] = []; 99 + 100 + const sessionDirs = sessionDirName ? [sessionDirName] : listSessionDirs(sessionsBase); 101 + 102 + for (const dir of sessionDirs) { 103 + const factoryDir = path.join(sessionsBase, dir, ".factory"); 104 + if (!fs.existsSync(factoryDir)) continue; 105 + 106 + try { 107 + for (const entry of fs.readdirSync(factoryDir)) { 108 + const runJsonPath = path.join(factoryDir, entry, "run.json"); 109 + if (!fs.existsSync(runJsonPath)) continue; 110 + try { 111 + const raw = fs.readFileSync(runJsonPath, "utf-8"); 112 + const data: RunJsonData = JSON.parse(raw); 113 + records.push(parseRunJson(data)); 114 + } catch { 115 + // Skip malformed run.json files 116 + } 117 + } 118 + } catch { 119 + // Skip inaccessible directories 120 + } 121 + } 122 + 123 + return records; 124 + } 125 + 126 + /** 127 + * Cancel a subagent by reading its PID file and sending SIGTERM (then SIGKILL after 3s). 128 + * Returns true if the signal was sent, false if the PID file was missing or the process was already gone. 129 + */ 130 + export function cancelByPidFile(outputDir: string, taskId: string): boolean { 131 + const pidPath = path.join(outputDir, `${taskId}.pid`); 132 + try { 133 + const pid = parseInt(fs.readFileSync(pidPath, "utf-8").trim(), 10); 134 + if (isNaN(pid)) return false; 135 + process.kill(pid, "SIGTERM"); 136 + setTimeout(() => { 137 + try { 138 + process.kill(pid, "SIGKILL"); 139 + } catch {} 140 + }, 3000); 141 + return true; 142 + } catch { 143 + return false; 144 + } 145 + } 146 + 147 + /** 148 + * Cancel all running subagents for a run by scanning for PID files in the run's sessions directory. 149 + * Returns the number of processes signalled. 150 + */ 151 + export function cancelRunByPidFiles(artifactsDir: string): number { 152 + const sessionsDir = path.join(artifactsDir, "sessions"); 153 + let cancelled = 0; 154 + try { 155 + if (!fs.existsSync(sessionsDir)) return 0; 156 + for (const entry of fs.readdirSync(sessionsDir)) { 157 + if (!entry.endsWith(".pid")) continue; 158 + const taskId = entry.replace(/\.pid$/, ""); 159 + if (cancelByPidFile(sessionsDir, taskId)) cancelled++; 160 + } 161 + } catch {} 162 + return cancelled; 163 + } 164 + 165 + /** List all session directory names under the sessions base. */ 166 + function listSessionDirs(sessionsBase: string): string[] { 167 + try { 168 + if (!fs.existsSync(sessionsBase)) return []; 169 + return fs.readdirSync(sessionsBase).filter((d) => { 170 + try { 171 + return fs.statSync(path.join(sessionsBase, d)).isDirectory(); 172 + } catch { 173 + return false; 174 + } 175 + }); 176 + } catch { 177 + return []; 178 + } 179 + }

+225

packages/pi-mill/skills/mill-basics/SKILL.md

··· 1 + --- 2 + name: mill-basics 3 + description: "Write pi-mill programs to orchestrate multi-agent workflows. Use when spawning subagents, coordinating parallel/sequential tasks, building agent-driven automation, or applying common orchestration patterns like fan-out, pipelines, and synthesis." 4 + --- 5 + 6 + # Mill Basics 7 + 8 + Pi-mill enables writing scripts that orchestrate multiple AI agents. Scripts use the `factory` global to spawn subagents, coordinate work, and compose results. 9 + 10 + ## systemPrompt vs prompt 11 + 12 + These two fields have distinct roles — don't mix them: 13 + 14 + - **systemPrompt**: Defines WHO the agent is and HOW it should behave. Personality, methodology, principles, output format, tool usage conventions. 15 + - **prompt**: Defines WHAT the agent should do right now. The concrete assignment — specific files to read, bugs to fix, features to implement, commands to run. 16 + 17 + ```typescript 18 + // BAD: work leaked into systemPrompt 19 + { systemPrompt: "Review src/auth/ for security issues", prompt: "Do the review" } 20 + 21 + // BAD: systemPrompt is too weak 22 + { systemPrompt: "Lint.", prompt: "Run lint on src/ and fix errors" } 23 + 24 + // GOOD: clean separation 25 + { 26 + systemPrompt: "You are a security-focused code reviewer. Look for OWASP Top 10 vulnerabilities, injection flaws, and auth bypasses. Report findings with severity ratings.", 27 + prompt: "Review src/auth/ for security issues. Focus on the login flow and session management." 28 + } 29 + ``` 30 + 31 + ## Program Structure 32 + 33 + Factory programs are top-level TypeScript scripts. The `factory` global is available — no imports or exports needed: 34 + 35 + ```typescript 36 + const result = await factory.spawn({ 37 + agent: "researcher", 38 + systemPrompt: 39 + "You are a research assistant. You find accurate, up-to-date information and cite sources. You present findings in a structured format.", 40 + prompt: 41 + "Find information about TypeScript 5.0 — new features, breaking changes, and migration notes.", 42 + model: "anthropic/claude-opus-4-6", 43 + }); 44 + 45 + console.log(result.text); 46 + ``` 47 + 48 + The script runs as a module — use `await` at top level, `Promise.all` for parallelism, and standard imports. 49 + 50 + ## Mill API 51 + 52 + ### spawn 53 + 54 + Create a subagent task: 55 + 56 + ```typescript 57 + const result = await factory.spawn({ 58 + agent: "code-reviewer", // Role label (for logging/display) 59 + systemPrompt: "You review code...", // WHO: behavior, principles, methodology 60 + prompt: "Review main.ts for...", // WHAT: the specific work to do now 61 + model: "anthropic/claude-opus-4-6", // Model in provider/model-id format 62 + cwd: "/path/to/project", // Working directory (defaults to process.cwd()) 63 + step: 1, // Optional step number 64 + signal: abortSignal, // Optional cancellation 65 + }); 66 + ``` 67 + 68 + Returns `Promise<ExecutionResult>`. Use `await` for one agent, `Promise.all` for parallel execution. 69 + 70 + ### Parallel execution 71 + 72 + ```typescript 73 + const [security, coverage] = await Promise.all([ 74 + factory.spawn({ 75 + agent: "security", 76 + systemPrompt: "You are a security reviewer...", 77 + prompt: "Review src/auth/", 78 + model: "anthropic/claude-opus-4-6", 79 + }), 80 + factory.spawn({ 81 + agent: "coverage", 82 + systemPrompt: "You analyze test coverage...", 83 + prompt: "Check coverage for src/auth/", 84 + model: "anthropic/claude-sonnet-4-6", 85 + }), 86 + ]); 87 + ``` 88 + 89 + ### Observe 90 + 91 + ```typescript 92 + factory.observe.log("info", "Starting analysis", { fileCount: 42 }); 93 + factory.observe.log("warning", "Slow response", { duration: 5000 }); 94 + factory.observe.log("error", "Task failed", { taskId: "task-3" }); 95 + 96 + const artifactPath = factory.observe.artifact("summary.md", reportContent); 97 + ``` 98 + 99 + ### Shutdown 100 + 101 + ```typescript 102 + await factory.shutdown(true); // Cancel all running tasks 103 + await factory.shutdown(false); // Wait for running tasks to complete naturally 104 + ``` 105 + 106 + ## Execution Results 107 + 108 + Each subagent returns an `ExecutionResult`: 109 + 110 + ```typescript 111 + interface ExecutionResult { 112 + taskId: string; 113 + agent: string; 114 + task: string; // Original execution prompt string 115 + exitCode: number; 116 + 117 + text: string; 118 + sessionPath?: string; 119 + 120 + messages: unknown[]; 121 + 122 + usage: UsageStats; 123 + model?: string; 124 + stopReason?: string; 125 + errorMessage?: string; 126 + stderr: string; 127 + 128 + step?: number; 129 + } 130 + ``` 131 + 132 + ## Context flow 133 + 134 + ### Context DOWN (Parent -> Subagent) 135 + 136 + The parent session path is appended to the subagent system prompt automatically. Subagents can use `search_thread` to read parent context. 137 + 138 + ### Context UP (Subagent -> Program) 139 + 140 + 1. `result.text` for quick chaining 141 + 2. `result.sessionPath` for deep review 142 + 143 + ```typescript 144 + const research = await factory.spawn({ 145 + agent: "researcher", 146 + systemPrompt: "You are a thorough technical researcher.", 147 + prompt: "Research Rust async patterns and common pitfalls.", 148 + model: "anthropic/claude-opus-4-6", 149 + }); 150 + 151 + const summary = await factory.spawn({ 152 + agent: "summarizer", 153 + systemPrompt: "You write concise executive summaries.", 154 + prompt: `Summarize this research:\n\n${research.text}`, 155 + model: "anthropic/claude-haiku-4-5", 156 + }); 157 + 158 + const review = await factory.spawn({ 159 + agent: "reviewer", 160 + systemPrompt: "You are a technical reviewer. Verify claims and flag unsupported assertions.", 161 + prompt: `Review research session at ${research.sessionPath} for technical accuracy.`, 162 + model: "anthropic/claude-opus-4-6", 163 + }); 164 + ``` 165 + 166 + ## Error handling 167 + 168 + Check `exitCode` / `stopReason` / `errorMessage` and escalate: 169 + 170 + ```typescript 171 + const result = await factory.spawn({ ... }); 172 + 173 + const failed = 174 + result.exitCode !== 0 || 175 + result.stopReason === "error" || 176 + Boolean(result.errorMessage); 177 + 178 + if (failed) { 179 + factory.observe.log("error", "Task failed", { 180 + taskId: result.taskId, 181 + exitCode: result.exitCode, 182 + stopReason: result.stopReason, 183 + error: result.errorMessage, 184 + stderr: result.stderr, 185 + }); 186 + throw new Error(`Task ${result.taskId} failed: ${result.errorMessage || "unknown error"}`); 187 + } 188 + ``` 189 + 190 + ## Async model 191 + 192 + Programs run asynchronously by default when invoked via tool call: immediate `runId`, completion via notification. 193 + 194 + Inside your program, use `await` and `Promise.all` normally: 195 + 196 + ```typescript 197 + const [r1, r2] = await Promise.all([ 198 + factory.spawn({ 199 + agent: "a", 200 + systemPrompt: "...", 201 + prompt: "...", 202 + model: "anthropic/claude-opus-4-6", 203 + }), 204 + factory.spawn({ agent: "b", systemPrompt: "...", prompt: "...", model: "cerebras/zai-glm-4.7" }), 205 + ]); 206 + console.log(r1.text, r2.text); 207 + ``` 208 + 209 + ## Detached processes 210 + 211 + Subagent processes are detached: 212 + 213 + - Closing pi or cancelling a turn does **not** kill running subagents 214 + - Output is written to `.stdout.jsonl` files 215 + - PID files enable cancel via `/mill` or `pi --mill` 216 + 217 + ## Key principles 218 + 219 + 1. Programs coordinate, subagents execute 220 + 2. Use `result.text` for fast chaining 221 + 3. Use `result.sessionPath` for deep context 222 + 4. Check failure signals (`exitCode`, `stopReason`, `errorMessage`) 223 + 5. Log progress with `factory.observe.log()` 224 + 225 + See [patterns.md](patterns.md) for common orchestration patterns.

+117

packages/pi-mill/skills/mill-basics/patterns.md

··· 1 + # Mill Patterns 2 + 3 + Common orchestration patterns for pi-mill programs. 4 + 5 + ## Parallel Review 6 + 7 + Fan out independent tasks, collect results: 8 + 9 + ```ts 10 + const results = await Promise.all([ 11 + factory.spawn({ 12 + agent: "security", 13 + systemPrompt: 14 + "You are a security reviewer. You look for injection flaws, auth bypasses, and data exposure. Report findings with severity ratings.", 15 + prompt: "Review src/auth/ for security vulnerabilities.", 16 + model: "anthropic/claude-opus-4-6", 17 + step: 0, 18 + }), 19 + factory.spawn({ 20 + agent: "perf", 21 + systemPrompt: 22 + "You are a performance analyst. You identify bottlenecks, unnecessary allocations, and O(n²) patterns.", 23 + prompt: "Profile src/api/ for performance issues.", 24 + model: "anthropic/claude-sonnet-4-6", 25 + step: 1, 26 + }), 27 + ]); 28 + ``` 29 + 30 + ## Sequential Pipeline 31 + 32 + Each step feeds into the next via `result.text`: 33 + 34 + ```ts 35 + const analysis = await factory.spawn({ 36 + agent: "analyzer", 37 + systemPrompt: 38 + "You analyze codebases systematically. You map structure, dependencies, and public interfaces.", 39 + prompt: "Map all API endpoints in the codebase — list routes, methods, and handlers.", 40 + model: "anthropic/claude-opus-4-6", 41 + step: 0, 42 + }); 43 + 44 + const plan = await factory.spawn({ 45 + agent: "planner", 46 + systemPrompt: "You design thorough test plans. You prioritize critical paths and edge cases.", 47 + prompt: `Design integration tests covering the API endpoints found:\n\n${analysis.text}`, 48 + model: "anthropic/claude-sonnet-4-6", 49 + step: 1, 50 + }); 51 + ``` 52 + 53 + ## Fan-out then Synthesize 54 + 55 + Parallel investigation followed by a single summarizer: 56 + 57 + ```ts 58 + const reviews = await Promise.all([ 59 + factory.spawn({ 60 + agent: "frontend", 61 + systemPrompt: 62 + "You are a frontend specialist. You review UI code for accessibility, performance, and UX issues.", 63 + prompt: "Review the frontend code.", 64 + model: "anthropic/claude-sonnet-4-6", 65 + step: 0, 66 + }), 67 + factory.spawn({ 68 + agent: "backend", 69 + systemPrompt: 70 + "You are a backend specialist. You review server code for correctness, scalability, and error handling.", 71 + prompt: "Review the backend code.", 72 + model: "mistral/devstral-2512", 73 + step: 1, 74 + }), 75 + factory.spawn({ 76 + agent: "infra", 77 + systemPrompt: 78 + "You are an infrastructure specialist. You review configs, deployments, and operational concerns.", 79 + prompt: "Review the infrastructure.", 80 + model: "anthropic/claude-haiku-4-5", 81 + step: 2, 82 + }), 83 + ]); 84 + 85 + const context = reviews.map((r) => `[${r.agent}]\n${r.text}`).join("\n\n"); 86 + const summary = await factory.spawn({ 87 + agent: "synthesizer", 88 + systemPrompt: 89 + "You synthesize multiple perspectives into clear, actionable summaries. You deduplicate, prioritize, and highlight conflicts.", 90 + prompt: `Synthesize these reviews into an actionable summary:\n${context}`, 91 + model: "anthropic/claude-opus-4-6", 92 + step: 3, 93 + }); 94 + ``` 95 + 96 + ## Model Selection 97 + 98 + Models use `provider/model-id` format. Match capability to task complexity: 99 + 100 + - **Fast/cheap** -- `cerebras/zai-glm-4.7` for file search, formatting, grep-like work 101 + - **Fast + vision** -- `google-gemini-cli/gemini-3-flash-preview` when the agent needs to look at images or screenshots 102 + - **Mid-tier coding** -- `mistral/devstral-2512` for code review, refactoring, focused implementation 103 + - **Mid-tier general** -- `anthropic/claude-haiku-4-5` for analysis, summarization, planning 104 + - **Frontier** -- `anthropic/claude-opus-4-6` for complex multi-step reasoning, large changes across many files 105 + - **Frontier coding** -- `openai-codex/gpt-5.3-codex` for heavy implementation tasks 106 + - **Strong all-rounder** -- `anthropic/claude-sonnet-4-6` for tasks that need solid reasoning without frontier cost 107 + 108 + Override `model` per-agent when tasks vary in complexity. Don't default everything to one model. 109 + 110 + ## Context Chaining 111 + 112 + Each result has: 113 + 114 + - `result.text` — final assistant output, use directly in subsequent prompts 115 + - `result.sessionPath` — full session file, explorable via `search_thread` 116 + 117 + Pass context between agents by including `result.text` in the next agent's prompt string. For deep investigation, point agents at each other's `sessionPath`.

+515

packages/pi-mill/skills/mill-ralph-loop/SKILL.md

··· 1 + --- 2 + name: mill-ralph-loop 3 + description: Iterative task execution using the Ralph Loop pattern (named after Ralph Wiggum). Use when you need to repeatedly run an agent until a condition is met—fixing all lint errors, passing all tests, or exhausting PRD tasks. The filesystem serves as memory between iterations. 4 + --- 5 + 6 + # Ralph Loop Pattern 7 + 8 + The Ralph Loop (named after Ralph Wiggum) is an agentic pattern where you run an AI agent in a continuous loop until a task is complete. Each iteration starts relatively fresh, with the filesystem serving as persistent memory. 9 + 10 + ## Core Characteristics 11 + 12 + 1. **Same systemPrompt repeated** — The agent receives consistent instructions each iteration 13 + 2. **Filesystem as memory** — Code changes persist on disk between iterations 14 + 3. **Fresh context** — Each iteration reduces context pollution vs. single long conversation 15 + 4. **Exit condition** — Loop ends when tests pass, lint is clean, or work is exhausted 16 + 5. **Simple orchestrator** — Just `while (!done) { run agent }` 17 + 18 + ## Basic Structure 19 + 20 + ```typescript 21 + const maxIterations = 10; 22 + let iteration = 0; 23 + let done = false; 24 + 25 + while (!done && iteration < maxIterations) { 26 + iteration++; 27 + factory.observe.log("info", `Iteration ${iteration}`, { maxIterations }); 28 + 29 + const result = await factory.spawn({ 30 + agent: "worker", 31 + systemPrompt: "You are fixing issues iteratively", 32 + prompt: "Fix the next issue", 33 + model: "anthropic/claude-sonnet-4-6", 34 + step: iteration, 35 + }); 36 + 37 + // Check exit condition 38 + done = result.exitCode === 0 && result.text.includes("all clean"); 39 + 40 + if (result.exitCode !== 0) { 41 + factory.observe.log("error", "Agent failed", { iteration, error: result.errorMessage }); 42 + break; 43 + } 44 + } 45 + ``` 46 + 47 + ## Pattern 1: Fix All Lint Errors 48 + 49 + Repeatedly run an agent until lint is clean: 50 + 51 + ```typescript 52 + import { spawnSync } from "node:child_process"; 53 + 54 + const maxIterations = 20; 55 + let iteration = 0; 56 + 57 + while (iteration < maxIterations) { 58 + iteration++; 59 + 60 + const lintResult = spawnSync("npm", ["run", "lint"], { 61 + cwd: process.cwd(), 62 + encoding: "utf-8", 63 + }); 64 + 65 + if (lintResult.status === 0) { 66 + factory.observe.log("info", "Lint clean!", { iterations: iteration }); 67 + break; 68 + } 69 + 70 + factory.observe.log("info", `Iteration ${iteration}`, { 71 + exitCode: lintResult.status, 72 + errorCount: (lintResult.stdout.match(/error/gi) || []).length, 73 + }); 74 + 75 + const result = await factory.spawn({ 76 + agent: "linter", 77 + systemPrompt: `You fix lint errors iteratively. 78 + Run 'npm run lint' to see current errors. 79 + Fix one or more errors, focusing on the most common patterns. 80 + Make minimal, focused changes.`, 81 + prompt: `Fix lint errors. Current output:\n\n${lintResult.stdout}\n${lintResult.stderr}`, 82 + model: "mistral/devstral-2512", 83 + step: iteration, 84 + }); 85 + 86 + if (result.exitCode !== 0) { 87 + factory.observe.log("error", "Agent failed", { iteration }); 88 + break; 89 + } 90 + } 91 + ``` 92 + 93 + ## Pattern 2: With Progress Tracking 94 + 95 + Accumulate state across iterations to show progress: 96 + 97 + ```typescript 98 + import { spawnSync } from "node:child_process"; 99 + 100 + interface ProgressState { 101 + fixedIssues: string[]; 102 + lastErrorCount: number; 103 + stagnantIterations: number; 104 + } 105 + 106 + const maxIterations = 20; 107 + let iteration = 0; 108 + 109 + const progress: ProgressState = { 110 + fixedIssues: [], 111 + lastErrorCount: Infinity, 112 + stagnantIterations: 0, 113 + }; 114 + 115 + while (iteration < maxIterations) { 116 + iteration++; 117 + 118 + const lintResult = spawnSync("npm", ["run", "lint"], { 119 + cwd: process.cwd(), 120 + encoding: "utf-8", 121 + }); 122 + 123 + const errorCount = (lintResult.stdout.match(/error/gi) || []).length; 124 + 125 + if (lintResult.status === 0) { 126 + factory.observe.log("info", "All issues fixed!", { 127 + iterations: iteration, 128 + fixedIssues: progress.fixedIssues, 129 + }); 130 + break; 131 + } 132 + 133 + // Track progress 134 + if (errorCount >= progress.lastErrorCount) { 135 + progress.stagnantIterations++; 136 + } else { 137 + progress.stagnantIterations = 0; 138 + } 139 + 140 + // Exit if stagnant 141 + if (progress.stagnantIterations >= 3) { 142 + factory.observe.log("warning", "No progress for 3 iterations", { errorCount }); 143 + break; 144 + } 145 + 146 + factory.observe.log("info", `Iteration ${iteration}`, { 147 + errorCount, 148 + lastErrorCount: progress.lastErrorCount, 149 + fixed: progress.fixedIssues.length, 150 + }); 151 + 152 + progress.lastErrorCount = errorCount; 153 + 154 + const result = await factory.spawn({ 155 + agent: "fixer", 156 + systemPrompt: `You fix lint errors iteratively. 157 + Track your progress and avoid repeating unsuccessful approaches. 158 + Previous fixes: ${progress.fixedIssues.join(", ") || "none yet"} 159 + Error count: ${errorCount} (was ${progress.lastErrorCount === Infinity ? "unknown" : progress.lastErrorCount})`, 160 + prompt: `Fix lint errors:\n\n${lintResult.stdout}\n${lintResult.stderr}`, 161 + model: "anthropic/claude-sonnet-4-6", 162 + step: iteration, 163 + }); 164 + 165 + if (result.exitCode === 0) { 166 + const fixMatch = result.text.match(/fixed?:?\s*(.+)/i); 167 + if (fixMatch) { 168 + progress.fixedIssues.push(fixMatch[1]); 169 + } 170 + } 171 + } 172 + ``` 173 + 174 + ## Pattern 3: Loop Until Tests Pass 175 + 176 + Run agent repeatedly until test suite passes: 177 + 178 + ```typescript 179 + import { spawnSync } from "node:child_process"; 180 + 181 + const testCommand = "npm test"; 182 + const [cmd, ...args] = testCommand.split(" "); 183 + const maxIterations = 10; 184 + let iteration = 0; 185 + 186 + while (iteration < maxIterations) { 187 + iteration++; 188 + 189 + const testResult = spawnSync(cmd, args, { 190 + cwd: process.cwd(), 191 + encoding: "utf-8", 192 + timeout: 60000, 193 + }); 194 + 195 + if (testResult.status === 0) { 196 + factory.observe.log("info", "Tests passing!", { iterations: iteration }); 197 + break; 198 + } 199 + 200 + factory.observe.log("info", `Iteration ${iteration}`, { 201 + exitCode: testResult.status, 202 + timeout: testResult.signal === "SIGTERM", 203 + }); 204 + 205 + const failureOutput = [testResult.stdout, testResult.stderr] 206 + .filter(Boolean) 207 + .join("\n") 208 + .slice(-5000); // Last 5KB to avoid huge prompt payloads 209 + 210 + const result = await factory.spawn({ 211 + agent: "test-fixer", 212 + systemPrompt: `You fix failing tests iteratively. 213 + Analyze test output, identify the root cause, and make minimal fixes. 214 + Run the tests again to verify your changes. 215 + Focus on one failure at a time if there are multiple.`, 216 + prompt: `Fix failing tests. Output from '${testCommand}':\n\n${failureOutput}`, 217 + model: "anthropic/claude-opus-4-6", 218 + step: iteration, 219 + }); 220 + 221 + if (result.exitCode !== 0) { 222 + factory.observe.log("error", "Agent failed", { iteration }); 223 + break; 224 + } 225 + } 226 + ``` 227 + 228 + ## Pattern 4: Exhaustive PRD Implementation 229 + 230 + Work through Product Requirements Document tasks until all are complete: 231 + 232 + ```typescript 233 + import fs from "node:fs"; 234 + 235 + interface PRDTask { 236 + id: string; 237 + description: string; 238 + completed: boolean; 239 + } 240 + 241 + const prdPath = "./PRD.md"; 242 + const tasksPath = "./tasks.json"; 243 + const maxIterations = 50; 244 + 245 + // Load or initialize tasks 246 + let tasks: PRDTask[]; 247 + if (fs.existsSync(tasksPath)) { 248 + tasks = JSON.parse(fs.readFileSync(tasksPath, "utf-8")); 249 + } else { 250 + const prdContent = fs.readFileSync(prdPath, "utf-8"); 251 + tasks = parsePRD(prdContent); 252 + fs.writeFileSync(tasksPath, JSON.stringify(tasks, null, 2)); 253 + } 254 + 255 + let iteration = 0; 256 + 257 + while (iteration < maxIterations) { 258 + const nextTask = tasks.find((t) => !t.completed); 259 + if (!nextTask) { 260 + factory.observe.log("info", "All tasks completed!", { iterations: iteration }); 261 + break; 262 + } 263 + 264 + iteration++; 265 + factory.observe.log("info", `Iteration ${iteration}: ${nextTask.id}`, { 266 + remaining: tasks.filter((t) => !t.completed).length, 267 + }); 268 + 269 + const result = await factory.spawn({ 270 + agent: "implementer", 271 + systemPrompt: `You implement PRD tasks iteratively. 272 + Read the PRD at ${prdPath}. 273 + Complete tasks one at a time. 274 + Mark tasks complete by updating ${tasksPath}.`, 275 + prompt: `Implement: ${nextTask.id} - ${nextTask.description}\n\nCompleted so far:\n${tasks 276 + .filter((t) => t.completed) 277 + .map((t) => `+ ${t.id}`) 278 + .join("\n")}`, 279 + model: "openai-codex/gpt-5.3-codex", 280 + step: iteration, 281 + }); 282 + 283 + if (result.exitCode !== 0) { 284 + factory.observe.log("error", "Agent failed", { iteration, task: nextTask.id }); 285 + break; 286 + } 287 + 288 + // Reload tasks (agent may have updated them) 289 + if (fs.existsSync(tasksPath)) { 290 + tasks = JSON.parse(fs.readFileSync(tasksPath, "utf-8")); 291 + } 292 + } 293 + 294 + function parsePRD(content: string): PRDTask[] { 295 + const matches = content.matchAll(/^[-*]\s*\[\s*\]\s*(.+)$/gm); 296 + const tasks: PRDTask[] = []; 297 + let id = 1; 298 + 299 + for (const match of matches) { 300 + tasks.push({ 301 + id: `TASK-${id++}`, 302 + description: match[1].trim(), 303 + completed: false, 304 + }); 305 + } 306 + 307 + return tasks; 308 + } 309 + ``` 310 + 311 + ## Pattern 5: Combined Safety Checks 312 + 313 + Comprehensive safety and exit logic: 314 + 315 + ```typescript 316 + import { spawnSync } from "node:child_process"; 317 + 318 + const maxIterations = 20; 319 + const maxStagnantIterations = 3; 320 + const maxFailedIterations = 2; 321 + const checkCommand = "npm run lint"; 322 + 323 + let iteration = 0; 324 + let stagnantCount = 0; 325 + let failedCount = 0; 326 + let lastCheckOutput = ""; 327 + 328 + while (iteration < maxIterations) { 329 + iteration++; 330 + 331 + // Periodic check 332 + const [cmd, ...args] = checkCommand.split(" "); 333 + const checkResult = spawnSync(cmd, args, { 334 + cwd: process.cwd(), 335 + encoding: "utf-8", 336 + }); 337 + 338 + if (checkResult.status === 0) { 339 + factory.observe.log("info", "Check passed!", { iterations: iteration }); 340 + break; 341 + } 342 + 343 + // Track stagnation 344 + const currentOutput = checkResult.stdout + checkResult.stderr; 345 + if (currentOutput === lastCheckOutput) { 346 + stagnantCount++; 347 + factory.observe.log("warning", "No change detected", { stagnantCount }); 348 + } else { 349 + stagnantCount = 0; 350 + } 351 + lastCheckOutput = currentOutput; 352 + 353 + if (stagnantCount >= maxStagnantIterations) { 354 + factory.observe.log("error", "Stagnant iterations exceeded", { stagnantCount }); 355 + break; 356 + } 357 + 358 + factory.observe.log("info", `Iteration ${iteration}`, { 359 + stagnantCount, 360 + failedCount, 361 + max: maxIterations, 362 + }); 363 + 364 + const result = await factory.spawn({ 365 + agent: "worker", 366 + systemPrompt: "You are fixing issues iteratively", 367 + prompt: "Continue fixing issues", 368 + model: "anthropic/claude-sonnet-4-6", 369 + step: iteration, 370 + }); 371 + 372 + if (result.exitCode !== 0) { 373 + failedCount++; 374 + factory.observe.log("error", "Agent failed", { iteration, failedCount }); 375 + 376 + if (failedCount >= maxFailedIterations) { 377 + factory.observe.log("error", "Failed iterations exceeded", { failedCount }); 378 + break; 379 + } 380 + } else { 381 + failedCount = 0; 382 + } 383 + } 384 + ``` 385 + 386 + ## Best Practices 387 + 388 + ### 1. **Set max iterations** 389 + 390 + Always have an upper bound to prevent infinite loops: 391 + 392 + ```typescript 393 + const maxIterations = 20; // Sensible default 394 + ``` 395 + 396 + ### 2. **Detect stagnation** 397 + 398 + Track if the agent is making progress: 399 + 400 + ```typescript 401 + if (currentState === lastState) { 402 + stagnantCount++; 403 + if (stagnantCount >= 3) break; 404 + } 405 + ``` 406 + 407 + ### 3. **Use bash exit conditions** 408 + 409 + Shell out to authoritative checks (tests, lint, build): 410 + 411 + ```typescript 412 + const result = spawnSync("npm", ["test"], { encoding: "utf-8" }); 413 + if (result.status === 0) break; 414 + ``` 415 + 416 + ### 4. **Provide context to agent** 417 + 418 + Include iteration number, progress, previous attempts: 419 + 420 + ```typescript 421 + prompt: `Iteration ${iteration}/${maxIterations} 422 + Fixed so far: ${fixed.join(", ")} 423 + Current errors: ${errorCount} 424 + ...`; 425 + ``` 426 + 427 + ### 5. **Log everything** 428 + 429 + Observability is critical for debugging loops: 430 + 431 + ```typescript 432 + factory.observe.log("info", "Loop state", { 433 + iteration, 434 + errorCount, 435 + stagnantCount, 436 + lastChange, 437 + }); 438 + ``` 439 + 440 + ### 6. **Limit context size** 441 + 442 + Truncate large outputs to avoid prompt bloat: 443 + 444 + ```typescript 445 + const recentOutput = fullOutput.slice(-5000); // Last 5KB 446 + ``` 447 + 448 + ### 7. **Allow early exit** 449 + 450 + If the goal is achieved, return immediately: 451 + 452 + ```typescript 453 + if (testsPassing) break; 454 + ``` 455 + 456 + ## When to Use Ralph Loop 457 + 458 + Good for: 459 + 460 + - Fixing lint/type errors iteratively 461 + - Making tests pass one by one 462 + - Implementing PRD tasks sequentially 463 + - Refactoring with incremental validation 464 + - Code generation with iterative refinement 465 + 466 + Not ideal for: 467 + 468 + - Tasks requiring deep context across iterations 469 + - Complex multi-step reasoning within a single problem 470 + - When the agent needs to remember detailed discussions 471 + - Parallel work (use `Promise.all` with `factory.spawn` instead) 472 + 473 + ## Advanced: Nested Loops 474 + 475 + You can nest Ralph Loops for hierarchical work: 476 + 477 + ```typescript 478 + const modules = ["src/auth", "src/api", "src/db"]; 479 + 480 + for (const module of modules) { 481 + factory.observe.log("info", `Processing module: ${module}`); 482 + 483 + let iteration = 0; 484 + while (iteration < 10) { 485 + iteration++; 486 + 487 + const result = await factory.spawn({ 488 + agent: "module-fixer", 489 + systemPrompt: `Fix issues in ${module}`, 490 + prompt: "Run checks and fix issues", 491 + model: "mistral/devstral-2512", 492 + step: iteration, 493 + }); 494 + 495 + const check = spawnSync("npm", ["run", "lint", module], { 496 + cwd: process.cwd(), 497 + encoding: "utf-8", 498 + }); 499 + 500 + if (check.status === 0) break; 501 + } 502 + } 503 + ``` 504 + 505 + ## Summary 506 + 507 + The Ralph Loop is a simple but powerful pattern: 508 + 509 + - **While loop** around `await factory.spawn()` 510 + - **Filesystem persistence** between iterations 511 + - **Bash exit conditions** for authoritative checks 512 + - **Progress tracking** to detect stagnation 513 + - **Max iterations** for safety 514 + 515 + It works because the agent sees fresh context each iteration, making progress incrementally while the filesystem accumulates changes. Perfect for iterative tasks where "run it again" is a valid strategy.

+524

packages/pi-mill/skills/mill-worktree/SKILL.md

··· 1 + --- 2 + name: mill-worktree 3 + description: "Worktree-based parallel development with pi-mill. Use when multiple agents need to edit code simultaneously without conflicts—each agent gets its own working directory via jj workspace or git worktree." 4 + --- 5 + 6 + # Worktree-Based Parallel Development 7 + 8 + When multiple agents need to edit files simultaneously, they'll conflict if they share a working directory. The solution: give each agent its own worktree. Each has a full working copy but shares the underlying repository. Agents work in complete isolation—own directory, own state, no file conflicts. 9 + 10 + ## Why Worktrees? 11 + 12 + - **No merge conflicts during work** — Each agent has its own copy of every file 13 + - **Full toolchain access** — Each worktree can run its own dev server, tests, linter 14 + - **Atomic merges** — Combine results after all agents finish 15 + - **Clean rollback** — Discard a worktree if an agent fails 16 + 17 + ## Jujutsu (jj) Variant 18 + 19 + ### Core Commands 20 + 21 + ```bash 22 + # Create a workspace (like git worktree add) 23 + jj workspace add /tmp/worktree-auth 24 + 25 + # List workspaces 26 + jj workspace list 27 + 28 + # Remove workspace tracking (doesn't delete files) 29 + jj workspace forget <workspace-name> 30 + 31 + # Delete the directory 32 + rm -rf /tmp/worktree-auth 33 + ``` 34 + 35 + ### Basic Pattern 36 + 37 + ```typescript 38 + import { spawnSync } from "node:child_process"; 39 + import fs from "node:fs"; 40 + 41 + const baseCwd = process.cwd(); 42 + const tasks = [ 43 + { 44 + name: "auth", 45 + prompt: "Implement auth module", 46 + systemPrompt: 47 + "You are a software engineer. Implement the requested changes. Run tests to verify your work.", 48 + }, 49 + { 50 + name: "api", 51 + prompt: "Implement API endpoints", 52 + systemPrompt: 53 + "You are a software engineer. Implement the requested changes. Run tests to verify your work.", 54 + }, 55 + ]; 56 + const worktrees: string[] = []; 57 + 58 + try { 59 + // 1. Create worktrees 60 + for (const t of tasks) { 61 + const wtPath = `/tmp/pi-worktree-${t.name}-${Date.now()}`; 62 + worktrees.push(wtPath); 63 + 64 + const result = spawnSync("jj", ["workspace", "add", wtPath], { 65 + cwd: baseCwd, 66 + encoding: "utf-8", 67 + }); 68 + 69 + if (result.status !== 0) { 70 + throw new Error(`Failed to create workspace ${t.name}: ${result.stderr}`); 71 + } 72 + 73 + factory.observe.log("info", `Created workspace: ${t.name}`, { path: wtPath }); 74 + } 75 + 76 + // 2. Install dependencies in each worktree 77 + await Promise.all( 78 + worktrees.map((wt, i) => 79 + factory.spawn({ 80 + agent: "installer", 81 + systemPrompt: 82 + "Install project dependencies. Run the appropriate install command (npm install, pnpm install, bun install, etc.) and verify it succeeds.", 83 + prompt: "Install dependencies in this workspace.", 84 + model: "cerebras/zai-glm-4.7", 85 + cwd: wt, 86 + step: i, 87 + }), 88 + ), 89 + ); 90 + 91 + // 3. Dispatch parallel agents 92 + const results = await Promise.all( 93 + tasks.map((t, i) => 94 + factory.spawn({ 95 + agent: t.name, 96 + systemPrompt: t.systemPrompt, 97 + prompt: t.prompt, 98 + model: "anthropic/claude-opus-4-6", 99 + cwd: worktrees[i], 100 + step: i, 101 + }), 102 + ), 103 + ); 104 + 105 + // 4. Check results 106 + const failed = results.filter((r) => r.exitCode !== 0); 107 + if (failed.length > 0) { 108 + factory.observe.log("warning", "Some agents failed", { 109 + failed: failed.map((r) => r.agent), 110 + }); 111 + } 112 + 113 + // 5. Merge results back 114 + const mergeResult = await factory.spawn({ 115 + agent: "merger", 116 + systemPrompt: `You merge parallel workstream results using jj. 117 + Use 'jj log' to see all changes across workspaces. 118 + Create a merge commit that combines all successful changes. 119 + Resolve any conflicts if they arise. 120 + The main workspace is at: ${baseCwd}`, 121 + prompt: `Merge changes from ${worktrees.length} parallel workstreams. 122 + Workspaces: ${worktrees.join(", ")} 123 + Failed agents: ${failed.map((r) => r.agent).join(", ") || "none"} 124 + Use jj to combine the changes into the main workspace.`, 125 + model: "anthropic/claude-sonnet-4-6", 126 + cwd: baseCwd, 127 + step: tasks.length, 128 + }); 129 + 130 + // 6. Write summary 131 + const summaryContent = results 132 + .map((r) => `## ${r.agent}\n**Status:** ${r.exitCode === 0 ? "pass" : "fail"}\n\n${r.text}`) 133 + .join("\n\n---\n\n"); 134 + factory.observe.artifact("worktree-report.md", summaryContent); 135 + } finally { 136 + // 7. Cleanup — always runs 137 + for (const wt of worktrees) { 138 + const name = wt.split("/").pop() || ""; 139 + spawnSync("jj", ["workspace", "forget", name], { 140 + cwd: baseCwd, 141 + encoding: "utf-8", 142 + }); 143 + if (fs.existsSync(wt)) { 144 + fs.rmSync(wt, { recursive: true, force: true }); 145 + } 146 + factory.observe.log("info", `Cleaned up workspace`, { path: wt }); 147 + } 148 + } 149 + ``` 150 + 151 + ### jj Merge Strategies 152 + 153 + After parallel work, you have multiple jj changes to combine. Common approaches: 154 + 155 + **Rebase onto each other (sequential):** 156 + 157 + ```bash 158 + # In the main workspace, rebase changes into a sequence 159 + jj rebase -s <change-auth> -d <change-api> 160 + jj rebase -s <change-ui> -d <change-auth> 161 + ``` 162 + 163 + **Create a merge commit:** 164 + 165 + ```bash 166 + # Create a new change with multiple parents 167 + jj new <change-auth> <change-api> <change-ui> -m "Merge parallel workstreams" 168 + ``` 169 + 170 + **Squash into one:** 171 + 172 + ```bash 173 + # If you want a single combined change 174 + jj new <change-auth> <change-api> <change-ui> 175 + jj squash 176 + ``` 177 + 178 + ## Git Worktree Variant 179 + 180 + For repositories using git instead of jj: 181 + 182 + ### Core Commands 183 + 184 + ```bash 185 + # Create a worktree on a new branch 186 + git worktree add /tmp/worktree-auth -b feature/auth 187 + 188 + # List worktrees 189 + git worktree list 190 + 191 + # Remove worktree (cleans up git metadata) 192 + git worktree remove /tmp/worktree-auth 193 + 194 + # Force remove if dirty 195 + git worktree remove --force /tmp/worktree-auth 196 + ``` 197 + 198 + ### Basic Pattern 199 + 200 + ```typescript 201 + import { spawnSync } from "node:child_process"; 202 + import fs from "node:fs"; 203 + 204 + const baseCwd = process.cwd(); 205 + const baseBranch = "main"; 206 + const tasks = [ 207 + { 208 + name: "auth", 209 + prompt: "Implement auth module", 210 + systemPrompt: "Implement the requested changes. Commit your work when done.", 211 + }, 212 + { 213 + name: "payments", 214 + prompt: "Implement payments", 215 + systemPrompt: "Implement the requested changes. Commit your work when done.", 216 + }, 217 + ]; 218 + const worktrees: Array<{ path: string; branch: string }> = []; 219 + 220 + try { 221 + // 1. Create worktrees with dedicated branches 222 + for (const t of tasks) { 223 + const branch = `worktree/${t.name}-${Date.now()}`; 224 + const wtPath = `/tmp/pi-worktree-${t.name}-${Date.now()}`; 225 + worktrees.push({ path: wtPath, branch }); 226 + 227 + const result = spawnSync("git", ["worktree", "add", wtPath, "-b", branch, baseBranch], { 228 + cwd: baseCwd, 229 + encoding: "utf-8", 230 + }); 231 + 232 + if (result.status !== 0) { 233 + throw new Error(`Failed to create worktree ${t.name}: ${result.stderr}`); 234 + } 235 + 236 + factory.observe.log("info", `Created worktree: ${t.name}`, { path: wtPath, branch }); 237 + } 238 + 239 + // 2. Install dependencies 240 + await Promise.all( 241 + worktrees.map((wt, i) => 242 + factory.spawn({ 243 + agent: "installer", 244 + systemPrompt: "Install project dependencies.", 245 + prompt: "Run the install command for this project (npm install, etc.)", 246 + model: "cerebras/zai-glm-4.7", 247 + cwd: wt.path, 248 + step: i, 249 + }), 250 + ), 251 + ); 252 + 253 + // 3. Dispatch agents 254 + const results = await Promise.all( 255 + tasks.map((t, i) => 256 + factory.spawn({ 257 + agent: t.name, 258 + systemPrompt: t.systemPrompt, 259 + prompt: `${t.prompt}\n\nCommit your changes to the current branch when complete.`, 260 + model: "openai-codex/gpt-5.3-codex", 261 + cwd: worktrees[i].path, 262 + step: i, 263 + }), 264 + ), 265 + ); 266 + 267 + // 4. Merge branches back 268 + const successful = results 269 + .map((r, i) => ({ result: r, worktree: worktrees[i] })) 270 + .filter(({ result }) => result.exitCode === 0); 271 + 272 + await factory.spawn({ 273 + agent: "merger", 274 + systemPrompt: `You merge git branches from parallel workstreams. 275 + Merge each feature branch into ${baseBranch}. 276 + Handle conflicts if they arise. Prefer keeping both changes when possible.`, 277 + prompt: `Merge these branches into ${baseBranch}: 278 + ${successful.map(({ worktree }) => `- ${worktree.branch}`).join("\n")}`, 279 + model: "anthropic/claude-sonnet-4-6", 280 + cwd: baseCwd, 281 + step: tasks.length, 282 + }); 283 + } finally { 284 + // 5. Cleanup 285 + for (const wt of worktrees) { 286 + spawnSync("git", ["worktree", "remove", "--force", wt.path], { 287 + cwd: baseCwd, 288 + encoding: "utf-8", 289 + }); 290 + spawnSync("git", ["branch", "-D", wt.branch], { 291 + cwd: baseCwd, 292 + encoding: "utf-8", 293 + }); 294 + if (fs.existsSync(wt.path)) { 295 + fs.rmSync(wt.path, { recursive: true, force: true }); 296 + } 297 + } 298 + } 299 + ``` 300 + 301 + ## Dependency Installation 302 + 303 + Each worktree needs its own `node_modules` (or equivalent). Common patterns: 304 + 305 + ```typescript 306 + // Detect package manager and install 307 + function installDeps(cwd: string): { status: number; stderr: string } { 308 + if (fs.existsSync(`${cwd}/bun.lockb`)) { 309 + return spawnSync("bun", ["install"], { cwd, encoding: "utf-8" }); 310 + } else if (fs.existsSync(`${cwd}/pnpm-lock.yaml`)) { 311 + return spawnSync("pnpm", ["install", "--frozen-lockfile"], { cwd, encoding: "utf-8" }); 312 + } else if (fs.existsSync(`${cwd}/yarn.lock`)) { 313 + return spawnSync("yarn", ["install", "--frozen-lockfile"], { cwd, encoding: "utf-8" }); 314 + } else { 315 + return spawnSync("npm", ["ci"], { cwd, encoding: "utf-8" }); 316 + } 317 + } 318 + ``` 319 + 320 + Or let each agent handle it — the installer agent in the examples above will figure out the right command. 321 + 322 + ## Advanced: Fan-Out with Worktrees + Synthesize 323 + 324 + Combine the worktree pattern with fan-out-then-synthesize: 325 + 326 + ```typescript 327 + import { spawnSync } from "node:child_process"; 328 + import fs from "node:fs"; 329 + 330 + const baseCwd = process.cwd(); 331 + const worktrees: string[] = []; 332 + 333 + const tasks = [ 334 + { 335 + name: "api", 336 + prompt: "Add pagination to /api/users endpoint", 337 + systemPrompt: "You are a backend engineer.", 338 + }, 339 + { 340 + name: "ui", 341 + prompt: "Add pagination controls to the users table", 342 + systemPrompt: "You are a frontend engineer.", 343 + }, 344 + { 345 + name: "tests", 346 + prompt: "Write integration tests for paginated user listing", 347 + systemPrompt: "You are a QA engineer.", 348 + }, 349 + ]; 350 + 351 + try { 352 + // Setup worktrees 353 + for (const t of tasks) { 354 + const wt = `/tmp/pi-wt-${t.name}-${Date.now()}`; 355 + worktrees.push(wt); 356 + spawnSync("jj", ["workspace", "add", wt], { cwd: baseCwd, encoding: "utf-8" }); 357 + } 358 + 359 + // Install deps in parallel 360 + await Promise.all( 361 + worktrees.map((wt, i) => 362 + factory.spawn({ 363 + agent: "installer", 364 + systemPrompt: "Install deps.", 365 + prompt: "npm install", 366 + model: "cerebras/zai-glm-4.7", 367 + cwd: wt, 368 + step: i, 369 + }), 370 + ), 371 + ); 372 + 373 + // Parallel implementation 374 + const results = await Promise.all( 375 + tasks.map((t, i) => 376 + factory.spawn({ 377 + agent: t.name, 378 + systemPrompt: t.systemPrompt, 379 + prompt: t.prompt, 380 + model: "anthropic/claude-opus-4-6", 381 + cwd: worktrees[i], 382 + step: i, 383 + }), 384 + ), 385 + ); 386 + 387 + // Synthesize — merge and verify 388 + const context = results.map((r) => `[${r.agent}]\n${r.text}`).join("\n\n"); 389 + const synthesis = await factory.spawn({ 390 + agent: "integrator", 391 + systemPrompt: `You integrate parallel workstreams. 392 + 1. Use jj to merge all workspace changes into the main workspace. 393 + 2. Resolve any conflicts. 394 + 3. Run the full test suite to verify integration. 395 + 4. Fix any integration issues. 396 + Main workspace: ${baseCwd}`, 397 + prompt: `Integrate these parallel changes:\n\n${context}`, 398 + model: "anthropic/claude-opus-4-6", 399 + cwd: baseCwd, 400 + step: tasks.length, 401 + }); 402 + } finally { 403 + for (const wt of worktrees) { 404 + const name = wt.split("/").pop() || ""; 405 + spawnSync("jj", ["workspace", "forget", name], { cwd: baseCwd, encoding: "utf-8" }); 406 + if (fs.existsSync(wt)) fs.rmSync(wt, { recursive: true, force: true }); 407 + } 408 + } 409 + ``` 410 + 411 + ## Best Practices 412 + 413 + ### 1. **Always clean up in `finally`** 414 + 415 + Worktrees leak disk space and repository state if not cleaned: 416 + 417 + ```typescript 418 + try { 419 + // ... create worktrees, run agents 420 + } finally { 421 + // ... forget workspaces, delete directories 422 + } 423 + ``` 424 + 425 + ### 2. **Use `/tmp` for worktree paths** 426 + 427 + Keeps worktrees out of your project directory and OS handles cleanup on reboot: 428 + 429 + ```typescript 430 + const wtPath = `/tmp/pi-worktree-${name}-${Date.now()}`; 431 + ``` 432 + 433 + ### 3. **Include timestamps in paths** 434 + 435 + Prevents collisions if you run the same program twice: 436 + 437 + ```typescript 438 + const wtPath = `/tmp/pi-wt-${name}-${Date.now()}`; 439 + ``` 440 + 441 + ### 4. **Install deps before dispatching agents** 442 + 443 + Agents shouldn't waste tokens figuring out dependency installation. Do it as a setup step: 444 + 445 + ```typescript 446 + // Dedicated install step 447 + await Promise.all( 448 + worktrees.map((wt) => 449 + factory.spawn({ 450 + agent: "installer", 451 + systemPrompt: "Install dependencies.", 452 + prompt: "npm install", 453 + model: "cerebras/zai-glm-4.7", 454 + cwd: wt, 455 + }), 456 + ), 457 + ); 458 + 459 + // Then dispatch real work 460 + await Promise.all( 461 + tasks.map((t, i) => 462 + factory.spawn({ 463 + agent: t.name, 464 + systemPrompt: t.systemPrompt, 465 + prompt: t.prompt, 466 + model: "anthropic/claude-opus-4-6", 467 + cwd: worktrees[i], 468 + }), 469 + ), 470 + ); 471 + ``` 472 + 473 + ### 5. **Scope agent work narrowly** 474 + 475 + Each agent should work on a well-defined, non-overlapping area. If two agents edit the same files, merging becomes painful: 476 + 477 + ``` 478 + Agent A: "Implement auth module in src/auth/" 479 + Agent B: "Implement payments in src/payments/" 480 + NOT: "Refactor the app" — too broad, will conflict 481 + ``` 482 + 483 + ### 6. **Verify after merge** 484 + 485 + Always run tests/lint after merging parallel changes: 486 + 487 + ```typescript 488 + const verify = spawnSync("npm", ["test"], { cwd: baseCwd, encoding: "utf-8" }); 489 + if (verify.status !== 0) { 490 + // Fix integration issues 491 + } 492 + ``` 493 + 494 + ### 7. **Track worktree count** 495 + 496 + Each worktree is a full working copy. On large repos, 5+ simultaneous worktrees can use significant disk space. Start with 2-3 parallel agents and scale up. 497 + 498 + ## When to Use Worktrees 499 + 500 + Good for: 501 + 502 + - Implementing multiple independent features in parallel 503 + - Parallel refactoring of separate modules 504 + - Running different test suites simultaneously 505 + - Any task where agents would otherwise conflict on files 506 + 507 + Not ideal for: 508 + 509 + - Tasks that heavily overlap in the same files 510 + - Read-only analysis (just use `Promise.all` with `factory.spawn` and same `cwd`) 511 + - Very small changes (worktree overhead isn't worth it) 512 + - Repos with huge `node_modules` or build artifacts (disk cost) 513 + 514 + ## Summary 515 + 516 + The worktree pattern gives each agent full isolation: 517 + 518 + 1. **Create** — `jj workspace add` or `git worktree add` 519 + 2. **Install** — Dependencies in each worktree 520 + 3. **Dispatch** — Parallel agents via `Promise.all`, each with own `cwd` 521 + 4. **Merge** — Combine changes with jj/git 522 + 5. **Cleanup** — Forget workspaces, delete directories 523 + 524 + Agents never step on each other's toes. The merge step is where conflicts surface — and by scoping work to non-overlapping areas, you minimize that pain.

+18

packages/pi-mill/tsconfig.json

··· 1 + { 2 + "compilerOptions": { 3 + "target": "ES2022", 4 + "module": "NodeNext", 5 + "moduleResolution": "NodeNext", 6 + "lib": ["ES2022"], 7 + "types": ["node"], 8 + "strict": true, 9 + "skipLibCheck": true, 10 + "noEmit": true, 11 + "allowImportingTsExtensions": true, 12 + "resolveJsonModule": true, 13 + "esModuleInterop": true, 14 + "forceConsistentCasingInFileNames": true 15 + }, 16 + "include": ["./**/*.ts"], 17 + "exclude": ["./node_modules", "./tests"] 18 + }

+46

packages/pi-mill/types.ts

··· 1 + import type { Message } from "@mariozechner/pi-ai"; 2 + import type { ErrorDetails } from "./errors.js"; 3 + 4 + export interface UsageStats { 5 + input: number; 6 + output: number; 7 + cacheRead: number; 8 + cacheWrite: number; 9 + cost: number; 10 + contextTokens: number; 11 + turns: number; 12 + } 13 + 14 + export interface ExecutionResult { 15 + taskId: string; 16 + agent: string; 17 + task: string; 18 + exitCode: number; 19 + messages: Message[]; 20 + stderr: string; 21 + usage: UsageStats; 22 + model?: string; 23 + stopReason?: string; 24 + errorMessage?: string; 25 + step?: number; 26 + /** Final assistant text output, auto-populated on completion. */ 27 + text: string; 28 + /** Path to the subagent's session .jsonl file. Use search_thread to explore. */ 29 + sessionPath?: string; 30 + } 31 + 32 + export interface RunSummary { 33 + runId: string; 34 + status: "running" | "done" | "failed" | "cancelled"; 35 + results: ExecutionResult[]; 36 + observability?: { 37 + status: string; 38 + events: Array<{ time: number; type: string; message: string; data?: Record<string, unknown> }>; 39 + artifacts: string[]; 40 + artifactsDir?: string; 41 + startedAt: number; 42 + endedAt?: number; 43 + } | null; 44 + error?: ErrorDetails; 45 + metadata?: Record<string, unknown>; 46 + }

+86

packages/pi-mill/widget.ts

···

+20 -4

scripts/check-exports.test.ts

··· 32 32 33 33 await writeFile( 34 34 join(workspaceRoot, "packages", "core", "package.json"), 35 - JSON.stringify({ name: "@fixture/core", exports: { ".": "./src/public/index.api.ts" } }, null, 2), 35 + JSON.stringify( 36 + { name: "@fixture/core", exports: { ".": "./src/public/index.api.ts" } }, 37 + null, 38 + 2, 39 + ), 36 40 "utf-8", 37 41 ); 38 42 await writeFile( 39 43 join(workspaceRoot, "tools", "kit", "package.json"), 40 - JSON.stringify({ name: "@fixture/kit", exports: { ".": "./src/public/index.api.ts" } }, null, 2), 44 + JSON.stringify( 45 + { name: "@fixture/kit", exports: { ".": "./src/public/index.api.ts" } }, 46 + null, 47 + 2, 48 + ), 41 49 "utf-8", 42 50 ); 43 51 ··· 58 66 try { 59 67 await writeFile( 60 68 join(workspaceRoot, "package.json"), 61 - JSON.stringify({ name: "mill-fixture", private: true, workspaces: ["packages/*"] }, null, 2), 69 + JSON.stringify( 70 + { name: "mill-fixture", private: true, workspaces: ["packages/*"] }, 71 + null, 72 + 2, 73 + ), 62 74 "utf-8", 63 75 ); 64 76 await mkdir(join(workspaceRoot, "packages", "core"), { recursive: true }); ··· 101 113 try { 102 114 await writeFile( 103 115 join(workspaceRoot, "package.json"), 104 - JSON.stringify({ name: "mill-fixture", private: true, workspaces: ["packages/*"] }, null, 2), 116 + JSON.stringify( 117 + { name: "mill-fixture", private: true, workspaces: ["packages/*"] }, 118 + null, 119 + 2, 120 + ), 105 121 "utf-8", 106 122 ); 107 123 await mkdir(join(workspaceRoot, "packages", "core"), { recursive: true });

+6 -4

scripts/check-exports.ts

··· 104 104 105 105 const packageName = typeof packageJson.name === "string" ? packageJson.name : packageJsonPath; 106 106 const invalidExports = [ 107 - ...new Set([ 108 - ...normalizeExportKeys(packageJson.exports), 109 - ...normalizeExportEntries(packageJson.exports), 110 - ].filter((entry) => isInternalExportPath(entry))), 107 + ...new Set( 108 + [ 109 + ...normalizeExportKeys(packageJson.exports), 110 + ...normalizeExportEntries(packageJson.exports), 111 + ].filter((entry) => isInternalExportPath(entry)), 112 + ), 111 113 ].sort(); 112 114 113 115 if (invalidExports.length > 0) {

+9 -2

scripts/guardrail-harness.test.ts

··· 62 62 await writeFile( 63 63 badInternalPath, 64 64 [ 65 - "import * as Runtime from \"effect/Runtime\";", 65 + 'import * as Runtime from "effect/Runtime";', 66 66 "const run = Runtime.runPromise(runtime)(effect);", 67 67 ].join("\n"), 68 68 "utf-8", ··· 94 94 const fixtureRoot = await mkdtemp(join(tmpdir(), "mill-guardrail-runtime-")); 95 95 96 96 try { 97 - const badInternalPath = join(fixtureRoot, "packages", "core", "src", "internal", "bad.effect.ts"); 97 + const badInternalPath = join( 98 + fixtureRoot, 99 + "packages", 100 + "core", 101 + "src", 102 + "internal", 103 + "bad.effect.ts", 104 + ); 98 105 await mkdir(join(fixtureRoot, "packages", "core", "src", "internal"), { recursive: true }); 99 106 100 107 await writeFile(

+15 -4

scripts/guardrail-harness.ts

··· 23 23 } 24 24 25 25 export interface GuardrailSuiteResult { 26 - readonly results: ReadonlyArray<{ readonly check: GuardrailCheck; readonly result: GuardrailCommandResult }>; 27 - readonly failures: ReadonlyArray<{ readonly check: GuardrailCheck; readonly result: GuardrailCommandResult }>; 26 + readonly results: ReadonlyArray<{ 27 + readonly check: GuardrailCheck; 28 + readonly result: GuardrailCommandResult; 29 + }>; 30 + readonly failures: ReadonlyArray<{ 31 + readonly check: GuardrailCheck; 32 + readonly result: GuardrailCommandResult; 33 + }>; 28 34 } 29 35 30 36 export const runGuardrailCommand = async ( ··· 53 59 }; 54 60 }; 55 61 56 - export const runGuardrailSuite = async (input: GuardrailSuiteInput): Promise<GuardrailSuiteResult> => { 57 - const results: Array<{ readonly check: GuardrailCheck; readonly result: GuardrailCommandResult }> = []; 62 + export const runGuardrailSuite = async ( 63 + input: GuardrailSuiteInput, 64 + ): Promise<GuardrailSuiteResult> => { 65 + const results: Array<{ 66 + readonly check: GuardrailCheck; 67 + readonly result: GuardrailCommandResult; 68 + }> = []; 58 69 59 70 for (const check of input.checks) { 60 71 const result = await runGuardrailCommand({ cwd: input.cwd, cmd: check.cmd });

Configure Feed

Configure Feed