open source is social v-it.org
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add sandbox.js utility for agent CLI sandboxing

Captures research on tightest available sandbox configs for Claude
(--tools ""), Codex (exec -s read-only), and Gemini (-s -e none).
Returns { cmd, args, env } ready for Bun.spawn(). Not yet wired
into any command — groundwork for vit vet.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+180
+37
src/lib/sandbox.js
··· 1 + // SPDX-License-Identifier: AGPL-3.0-only 2 + // Copyright (c) 2026 sol pbc 3 + 4 + const AGENTS = new Set(['claude', 'codex', 'gemini']); 5 + 6 + export function sandboxArgs(agent, { prompt, systemPrompt, model } = {}) { 7 + if (!AGENTS.has(agent)) { 8 + throw new Error(`Unknown agent: ${agent}. Must be one of: ${[...AGENTS].join(', ')}`); 9 + } 10 + if (!prompt) { 11 + throw new Error('prompt is required'); 12 + } 13 + 14 + if (agent === 'claude') { 15 + const args = ['-p', '--tools', '', '--output-format', 'json']; 16 + if (systemPrompt) args.push('--system-prompt', systemPrompt); 17 + args.push('--model', model || 'haiku'); 18 + args.push(prompt); 19 + return { cmd: 'claude', args, env: { CLAUDECODE: '' } }; 20 + } 21 + 22 + // codex and gemini lack a separate system prompt flag, 23 + // so we prepend instructions to the prompt 24 + const combined = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt; 25 + 26 + if (agent === 'codex') { 27 + const args = ['exec', '-s', 'read-only']; 28 + if (model) args.push('-m', model); 29 + args.push(combined); 30 + return { cmd: 'codex', args, env: {} }; 31 + } 32 + 33 + // gemini 34 + const args = ['-p', combined, '-s', '-e', 'none', '--output-format', 'json']; 35 + if (model) args.push('-m', model); 36 + return { cmd: 'gemini', args, env: {} }; 37 + }
+143
test/sandbox.test.js
··· 1 + // SPDX-License-Identifier: AGPL-3.0-only 2 + // Copyright (c) 2026 sol pbc 3 + 4 + import { describe, test, expect } from 'bun:test'; 5 + import { sandboxArgs } from '../src/lib/sandbox.js'; 6 + 7 + describe('sandboxArgs', () => { 8 + const prompt = 'analyze this cap'; 9 + const systemPrompt = 'you are a security reviewer'; 10 + 11 + describe('claude', () => { 12 + test('returns claude cmd with zero-tool sandbox', () => { 13 + const result = sandboxArgs('claude', { prompt }); 14 + expect(result.cmd).toBe('claude'); 15 + expect(result.args).toContain('-p'); 16 + expect(result.args).toContain('--tools'); 17 + expect(result.args).toContain(''); 18 + expect(result.args).toContain('--output-format'); 19 + expect(result.args).toContain('json'); 20 + }); 21 + 22 + test('unsets CLAUDECODE env to avoid nested session error', () => { 23 + const result = sandboxArgs('claude', { prompt }); 24 + expect(result.env).toEqual({ CLAUDECODE: '' }); 25 + }); 26 + 27 + test('uses --system-prompt flag', () => { 28 + const result = sandboxArgs('claude', { prompt, systemPrompt }); 29 + const idx = result.args.indexOf('--system-prompt'); 30 + expect(idx).not.toBe(-1); 31 + expect(result.args[idx + 1]).toBe(systemPrompt); 32 + }); 33 + 34 + test('defaults model to haiku', () => { 35 + const result = sandboxArgs('claude', { prompt }); 36 + const idx = result.args.indexOf('--model'); 37 + expect(idx).not.toBe(-1); 38 + expect(result.args[idx + 1]).toBe('haiku'); 39 + }); 40 + 41 + test('accepts custom model', () => { 42 + const result = sandboxArgs('claude', { prompt, model: 'sonnet' }); 43 + const idx = result.args.indexOf('--model'); 44 + expect(result.args[idx + 1]).toBe('sonnet'); 45 + }); 46 + 47 + test('prompt is last arg', () => { 48 + const result = sandboxArgs('claude', { prompt }); 49 + expect(result.args[result.args.length - 1]).toBe(prompt); 50 + }); 51 + }); 52 + 53 + describe('codex', () => { 54 + test('returns codex cmd with read-only sandbox', () => { 55 + const result = sandboxArgs('codex', { prompt }); 56 + expect(result.cmd).toBe('codex'); 57 + expect(result.args[0]).toBe('exec'); 58 + expect(result.args).toContain('-s'); 59 + expect(result.args).toContain('read-only'); 60 + }); 61 + 62 + test('has empty env', () => { 63 + const result = sandboxArgs('codex', { prompt }); 64 + expect(result.env).toEqual({}); 65 + }); 66 + 67 + test('prepends system prompt to prompt (no separate flag)', () => { 68 + const result = sandboxArgs('codex', { prompt, systemPrompt }); 69 + const combined = result.args[result.args.length - 1]; 70 + expect(combined).toBe(`${systemPrompt}\n\n${prompt}`); 71 + }); 72 + 73 + test('uses prompt alone when no system prompt', () => { 74 + const result = sandboxArgs('codex', { prompt }); 75 + expect(result.args[result.args.length - 1]).toBe(prompt); 76 + }); 77 + 78 + test('omits model flag when not specified', () => { 79 + const result = sandboxArgs('codex', { prompt }); 80 + expect(result.args).not.toContain('-m'); 81 + }); 82 + 83 + test('includes model flag when specified', () => { 84 + const result = sandboxArgs('codex', { prompt, model: 'o3' }); 85 + const idx = result.args.indexOf('-m'); 86 + expect(idx).not.toBe(-1); 87 + expect(result.args[idx + 1]).toBe('o3'); 88 + }); 89 + }); 90 + 91 + describe('gemini', () => { 92 + test('returns gemini cmd with sandbox and no extensions', () => { 93 + const result = sandboxArgs('gemini', { prompt }); 94 + expect(result.cmd).toBe('gemini'); 95 + expect(result.args).toContain('-s'); 96 + expect(result.args).toContain('-e'); 97 + expect(result.args).toContain('none'); 98 + expect(result.args).toContain('--output-format'); 99 + expect(result.args).toContain('json'); 100 + }); 101 + 102 + test('uses -p flag for non-interactive mode', () => { 103 + const result = sandboxArgs('gemini', { prompt }); 104 + expect(result.args[0]).toBe('-p'); 105 + }); 106 + 107 + test('has empty env', () => { 108 + const result = sandboxArgs('gemini', { prompt }); 109 + expect(result.env).toEqual({}); 110 + }); 111 + 112 + test('prepends system prompt to prompt (no separate flag)', () => { 113 + const result = sandboxArgs('gemini', { prompt, systemPrompt }); 114 + expect(result.args[1]).toBe(`${systemPrompt}\n\n${prompt}`); 115 + }); 116 + 117 + test('omits model flag when not specified', () => { 118 + const result = sandboxArgs('gemini', { prompt }); 119 + expect(result.args).not.toContain('-m'); 120 + }); 121 + 122 + test('includes model flag when specified', () => { 123 + const result = sandboxArgs('gemini', { prompt, model: 'gemini-2.5-pro' }); 124 + const idx = result.args.indexOf('-m'); 125 + expect(idx).not.toBe(-1); 126 + expect(result.args[idx + 1]).toBe('gemini-2.5-pro'); 127 + }); 128 + }); 129 + 130 + describe('errors', () => { 131 + test('throws on unknown agent', () => { 132 + expect(() => sandboxArgs('grok', { prompt })).toThrow('Unknown agent: grok'); 133 + }); 134 + 135 + test('throws when prompt is missing', () => { 136 + expect(() => sandboxArgs('claude', {})).toThrow('prompt is required'); 137 + }); 138 + 139 + test('throws when opts is empty', () => { 140 + expect(() => sandboxArgs('claude')).toThrow('prompt is required'); 141 + }); 142 + }); 143 + });