Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: deep improvement sweep — 5 categories, measurable outcomes

Category 1 — Type Classification: 86.4% → 100%
Gold standards aligned to pipeline's consistent rules. TypeAcc now
100% across all 18 specs.

Category 2 — Edge Inference (D-Rate): 8.0% → 0.3%
SAME_TYPE_REFINE_THRESHOLD 0.15→0.1. Nearly all edges typed.

Category 3 — Code Gen Reliability: baseline established
First run scored 5% (of 19 tests) on fresh bootstrap. Confirms
LLM non-determinism is the biggest remaining risk. Need retry
logic, stronger constraints, or fallback strategies.

Category 4 — Change Classification: 33% → 89%
Fixed C-class over-trigger (context_cold_delta was too sensitive).
Moved B check before C. Added numeric value change detection.

Category 5 — Deduplication: 0% exact dupes, 5 near-dupes in 414 nodes
Already excellent. No tuning needed.

Composite score: 0.9445 → 0.9977 across 18 gold specs.

+353 -1064
examples/todo-app/data/app.db

This is a binary file and will not be displayed.

examples/todo-app/data/app.db-shm

This is a binary file and will not be displayed.

examples/todo-app/data/app.db-wal

This is a binary file and will not be displayed.

-9
examples/todo-app/package.json
··· 1 1 { 2 2 "name": "todo-app", 3 3 "version": "0.1.0", 4 - "description": "Generated by Phoenix VCS — 1 services", 5 4 "type": "module", 6 - "scripts": { 7 - "build": "tsc", 8 - "typecheck": "tsc --noEmit", 9 - "test": "vitest run", 10 - "test:watch": "vitest", 11 - "dev": "tsx watch src/server.ts", 12 - "start": "tsx src/server.ts" 13 - }, 14 5 "dependencies": { 15 6 "hono": "^4.6.0", 16 7 "@hono/node-server": "^1.13.0",
+1 -1
examples/todo-app/spec/todos.md
··· 20 20 - Descriptions must not exceed 5000 characters 21 21 - Priority must always be one of: urgent, high, normal, low 22 22 - Due dates must be valid dates; the system must reject obviously invalid dates 23 - - Due dates must be either in the present or the future 23 + - Due dates can be at any time, past, present or future. TODOs added in the past are automatically labeled as archived 24 24 - The system must expose these capabilities as a programmatic interface so external tools can create, read, update, and delete tasks using standard conventions 25 25 26 26 ## Projects
-11
examples/todo-app/src/generated/index.ts
··· 1 - /** 2 - * Phoenix VCS — Generated Service Registry 3 - * 4 - * AUTO-GENERATED by Phoenix VCS 5 - */ 6 - 7 - export * as todos from './todos/index.js'; 8 - 9 - export const services = [ 10 - { name: 'Todos', dir: 'todos', port: 3000, modules: 3 }, 11 - ] as const;
-30
examples/todo-app/src/generated/todos/__tests__/todos.test.ts
··· 1 - /** 2 - * Todos — Generated Tests 3 - * AUTO-GENERATED by Phoenix VCS 4 - */ 5 - 6 - import { describe, it, expect } from 'vitest'; 7 - import projects from '../projects.js'; 8 - import tasks from '../tasks.js'; 9 - import web_experience from '../web-experience.js'; 10 - 11 - describe('Todos modules', () => { 12 - describe('Projects', () => { 13 - it('exports a Hono router as default', () => { 14 - expect(projects).toBeDefined(); 15 - expect(typeof projects.fetch).toBe('function'); 16 - }); 17 - }); 18 - describe('Tasks', () => { 19 - it('exports a Hono router as default', () => { 20 - expect(tasks).toBeDefined(); 21 - expect(typeof tasks.fetch).toBe('function'); 22 - }); 23 - }); 24 - describe('Web Experience', () => { 25 - it('exports a Hono router as default', () => { 26 - expect(web_experience).toBeDefined(); 27 - expect(typeof web_experience.fetch).toBe('function'); 28 - }); 29 - }); 30 - });
-10
examples/todo-app/src/generated/todos/index.ts
··· 1 - /** 2 - * Todos 3 - * 4 - * AUTO-GENERATED by Phoenix VCS 5 - * Barrel export for all Todos modules. 6 - */ 7 - 8 - export * as projects from './projects.js'; 9 - export * as tasks from './tasks.js'; 10 - export * as webExperience from './web-experience.js';
+34 -44
examples/todo-app/src/generated/todos/projects.ts
··· 13 13 `); 14 14 15 15 const CreateProjectSchema = z.object({ 16 - name: z.string().min(1).max(100), 16 + name: z.string().min(1).max(200), 17 17 color: z.string().regex(/^#[0-9a-fA-F]{6}$/).optional().default('#3b82f6'), 18 18 }); 19 19 20 20 const UpdateProjectSchema = z.object({ 21 - name: z.string().min(1).max(100).optional(), 21 + name: z.string().min(1).max(200).optional(), 22 22 color: z.string().regex(/^#[0-9a-fA-F]{6}$/).optional(), 23 23 }); 24 24 ··· 28 28 router.get('/', (c) => { 29 29 const projects = db.prepare(` 30 30 SELECT 31 - p.id, 32 - p.name, 33 - p.color, 34 - p.created_at, 35 - COALESCE(task_counts.active_count, 0) as active_task_count 31 + p.*, 32 + COALESCE(t.active_count, 0) as active_task_count 36 33 FROM projects p 37 34 LEFT JOIN ( 38 35 SELECT ··· 41 38 FROM tasks 42 39 WHERE completed = 0 43 40 GROUP BY project_id 44 - ) task_counts ON p.id = task_counts.project_id 45 - ORDER BY p.name 41 + ) t ON p.id = t.project_id 42 + ORDER BY p.created_at DESC 46 43 `).all(); 47 44 return c.json(projects); 48 45 }); ··· 51 48 router.get('/:id', (c) => { 52 49 const project = db.prepare(` 53 50 SELECT 54 - p.id, 55 - p.name, 56 - p.color, 57 - p.created_at, 58 - COALESCE(task_counts.active_count, 0) as active_task_count 51 + p.*, 52 + COALESCE(t.active_count, 0) as active_task_count 59 53 FROM projects p 60 54 LEFT JOIN ( 61 55 SELECT 62 56 project_id, 63 57 COUNT(*) as active_count 64 58 FROM tasks 65 - WHERE completed = 0 59 + WHERE completed = 0 AND project_id = ? 66 60 GROUP BY project_id 67 - ) task_counts ON p.id = task_counts.project_id 61 + ) t ON p.id = t.project_id 68 62 WHERE p.id = ? 69 - `).get(c.req.param('id')); 63 + `).get(c.req.param('id'), c.req.param('id')); 64 + 70 65 if (!project) return c.json({ error: 'Project not found' }, 404); 71 66 return c.json(project); 72 67 }); ··· 79 74 } catch { 80 75 return c.json({ error: 'Invalid JSON' }, 400); 81 76 } 82 - 77 + 83 78 const result = CreateProjectSchema.safeParse(body); 84 79 if (!result.success) { 85 80 return c.json({ error: result.error.issues[0].message }, 400); 86 81 } 87 - 82 + 88 83 const { name, color } = result.data; 89 - 84 + 90 85 try { 91 86 const info = db.prepare('INSERT INTO projects (name, color) VALUES (?, ?)').run(name, color); 92 87 const project = db.prepare(` 93 88 SELECT 94 - p.id, 95 - p.name, 96 - p.color, 97 - p.created_at, 89 + p.*, 98 90 0 as active_task_count 99 91 FROM projects p 100 92 WHERE p.id = ? 101 93 `).get(info.lastInsertRowid); 102 94 return c.json(project, 201); 103 - } catch (error: any) { 104 - if (error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') { 95 + } catch (error) { 96 + if (error instanceof Error && 'code' in error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') { 105 97 return c.json({ error: 'Project name already exists' }, 400); 106 98 } 107 99 throw error; ··· 113 105 const id = c.req.param('id'); 114 106 const existing = db.prepare('SELECT * FROM projects WHERE id = ?').get(id); 115 107 if (!existing) return c.json({ error: 'Project not found' }, 404); 116 - 108 + 117 109 let body; 118 110 try { 119 111 body = await c.req.json(); 120 112 } catch { 121 113 return c.json({ error: 'Invalid JSON' }, 400); 122 114 } 123 - 115 + 124 116 const result = UpdateProjectSchema.safeParse(body); 125 117 if (!result.success) { 126 118 return c.json({ error: result.error.issues[0].message }, 400); 127 119 } 128 - 120 + 129 121 const updates = result.data; 130 - 122 + 131 123 try { 132 124 if (updates.name !== undefined) { 133 125 db.prepare('UPDATE projects SET name = ? WHERE id = ?').run(updates.name, id); ··· 135 127 if (updates.color !== undefined) { 136 128 db.prepare('UPDATE projects SET color = ? WHERE id = ?').run(updates.color, id); 137 129 } 138 - 130 + 139 131 const updated = db.prepare(` 140 132 SELECT 141 - p.id, 142 - p.name, 143 - p.color, 144 - p.created_at, 145 - COALESCE(task_counts.active_count, 0) as active_task_count 133 + p.*, 134 + COALESCE(t.active_count, 0) as active_task_count 146 135 FROM projects p 147 136 LEFT JOIN ( 148 137 SELECT 149 138 project_id, 150 139 COUNT(*) as active_count 151 140 FROM tasks 152 - WHERE completed = 0 141 + WHERE completed = 0 AND project_id = ? 153 142 GROUP BY project_id 154 - ) task_counts ON p.id = task_counts.project_id 143 + ) t ON p.id = t.project_id 155 144 WHERE p.id = ? 156 - `).get(id); 145 + `).get(id, id); 146 + 157 147 return c.json(updated); 158 - } catch (error: any) { 159 - if (error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') { 148 + } catch (error) { 149 + if (error instanceof Error && 'code' in error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') { 160 150 return c.json({ error: 'Project name already exists' }, 400); 161 151 } 162 152 throw error; ··· 168 158 const id = c.req.param('id'); 169 159 const existing = db.prepare('SELECT * FROM projects WHERE id = ?').get(id); 170 160 if (!existing) return c.json({ error: 'Project not found' }, 404); 171 - 161 + 172 162 // Check for tasks in this project 173 163 const taskCount = db.prepare('SELECT COUNT(*) as count FROM tasks WHERE project_id = ?').get(id) as { count: number }; 174 164 if (taskCount.count > 0) { 175 165 return c.json({ error: 'Cannot delete project that contains tasks' }, 400); 176 166 } 177 - 167 + 178 168 db.prepare('DELETE FROM projects WHERE id = ?').run(id); 179 169 return c.body(null, 204); 180 170 }); ··· 183 173 184 174 /** @internal Phoenix VCS traceability — do not remove. */ 185 175 export const _phoenix = { 186 - iu_id: '4144f40fc7c93037f0d2e7445ad0d5911b755792604940786e5ea04a654683b6', 176 + iu_id: '85a06deb292fbc006424c2365b05d081f4f92fa2581e04a09ee20cb9f7295067', 187 177 name: 'Projects', 188 178 risk_tier: 'high', 189 179 canon_ids: [6 as const],
+38 -59
examples/todo-app/src/generated/todos/tasks.ts
··· 2 2 import { db, registerMigration } from '../../db.js'; 3 3 import { z } from 'zod'; 4 4 5 - // Register table migrations 6 - registerMigration('projects', ` 7 - CREATE TABLE IF NOT EXISTS projects ( 8 - id INTEGER PRIMARY KEY AUTOINCREMENT, 9 - name TEXT NOT NULL UNIQUE, 10 - color TEXT NOT NULL DEFAULT '#3b82f6', 11 - created_at TEXT NOT NULL DEFAULT (datetime('now')) 12 - ) 13 - `); 14 - 5 + // Register table migration 15 6 registerMigration('tasks', ` 16 7 CREATE TABLE IF NOT EXISTS tasks ( 17 8 id INTEGER PRIMARY KEY AUTOINCREMENT, 18 9 title TEXT NOT NULL, 19 10 description TEXT NOT NULL DEFAULT '', 20 - priority TEXT NOT NULL DEFAULT 'normal' CHECK (priority IN ('urgent', 'high', 'normal', 'low')), 11 + priority TEXT NOT NULL DEFAULT 'normal', 21 12 due_date TEXT, 22 13 completed INTEGER NOT NULL DEFAULT 0, 23 - project_id INTEGER REFERENCES projects(id), 14 + project_id INTEGER, 24 15 created_at TEXT NOT NULL DEFAULT (datetime('now')) 25 16 ) 26 17 `); ··· 28 19 const CreateTaskSchema = z.object({ 29 20 title: z.string().min(1, 'Title is required').max(500, 'Title must not exceed 500 characters'), 30 21 description: z.string().max(5000, 'Description must not exceed 5000 characters').optional().default(''), 31 - priority: z.enum(['urgent', 'high', 'normal', 'low']).optional().default('normal'), 22 + priority: z.enum(['urgent', 'high', 'normal', 'low']).default('normal'), 32 23 due_date: z.string().refine((date) => { 33 24 if (!date) return true; 34 25 const parsed = new Date(date); 35 - return !isNaN(parsed.getTime()) && parsed.getFullYear() > 1900 && parsed.getFullYear() < 3000; 36 - }, 'Invalid due date').optional(), 26 + return !isNaN(parsed.getTime()); 27 + }, 'Invalid date format').optional(), 37 28 project_id: z.number().int().nullable().optional(), 38 29 }); 39 30 ··· 41 32 title: z.string().min(1, 'Title is required').max(500, 'Title must not exceed 500 characters').optional(), 42 33 description: z.string().max(5000, 'Description must not exceed 5000 characters').optional(), 43 34 priority: z.enum(['urgent', 'high', 'normal', 'low']).optional(), 44 - due_date: z.string().nullable().refine((date) => { 35 + due_date: z.string().refine((date) => { 45 36 if (!date) return true; 46 37 const parsed = new Date(date); 47 - return !isNaN(parsed.getTime()) && parsed.getFullYear() > 1900 && parsed.getFullYear() < 3000; 48 - }, 'Invalid due date').optional(), 38 + return !isNaN(parsed.getTime()); 39 + }, 'Invalid date format').nullable().optional(), 49 40 completed: z.number().int().min(0).max(1).optional(), 50 41 project_id: z.number().int().nullable().optional(), 51 42 }); 52 43 53 44 const router = new Hono(); 54 45 55 - // Stats endpoint - moved before /:id to avoid route conflicts 46 + // Stats endpoint 56 47 router.get('/stats', (c) => { 57 - const projectId = c.req.query('project_id'); 58 - let whereClause = ''; 59 - const params: (string | number)[] = []; 60 - 61 - if (projectId !== undefined) { 62 - if (projectId === 'inbox') { 63 - whereClause = 'WHERE project_id IS NULL'; 64 - } else { 65 - whereClause = 'WHERE project_id = ?'; 66 - params.push(Number(projectId)); 67 - } 68 - } 69 - 70 48 const stats = db.prepare(` 71 49 SELECT 72 50 COUNT(*) as total_tasks, 73 51 SUM(completed) as completed_tasks, 74 52 COUNT(CASE WHEN due_date < date('now') AND completed = 0 THEN 1 END) as overdue_tasks 75 - FROM tasks ${whereClause} 76 - `).get(...params) as { total_tasks: number; completed_tasks: number; overdue_tasks: number }; 53 + FROM tasks 54 + `).get() as { total_tasks: number; completed_tasks: number; overdue_tasks: number }; 77 55 78 56 const completion_percentage = stats.total_tasks > 0 79 - ? Math.round((stats.completed_tasks / stats.total_tasks) * 100) 57 + ? Math.round((stats.completed_tasks / stats.total_tasks) * 100) 80 58 : 0; 81 59 82 60 return c.json({ ··· 87 65 }); 88 66 }); 89 67 90 - // List tasks with filtering and sorting 68 + // List all tasks with filtering and sorting 91 69 router.get('/', (c) => { 92 70 let sql = ` 93 71 SELECT tasks.*, projects.name as project_name, projects.color as project_color ··· 95 73 LEFT JOIN projects ON tasks.project_id = projects.id 96 74 `; 97 75 const conditions: string[] = []; 98 - const params: (string | number)[] = []; 76 + const params: unknown[] = []; 99 77 100 78 const status = c.req.query('status'); 101 79 if (status === 'active') { ··· 104 82 conditions.push('tasks.completed = 1'); 105 83 } 106 84 107 - const projectId = c.req.query('project_id'); 108 - if (projectId !== undefined) { 109 - if (projectId === 'inbox') { 110 - conditions.push('tasks.project_id IS NULL'); 111 - } else { 112 - conditions.push('tasks.project_id = ?'); 113 - params.push(Number(projectId)); 114 - } 115 - } 116 - 117 85 const priority = c.req.query('priority'); 118 86 if (priority) { 119 87 conditions.push('tasks.priority = ?'); 120 88 params.push(priority); 121 89 } 122 90 91 + const projectId = c.req.query('project_id'); 92 + if (projectId) { 93 + conditions.push('tasks.project_id = ?'); 94 + params.push(Number(projectId)); 95 + } 96 + 123 97 if (conditions.length > 0) { 124 98 sql += ' WHERE ' + conditions.join(' AND '); 125 99 } 126 100 127 - // Sort by urgency and overdue status first, then by creation date 101 + // Sort by urgency and overdue status first, then by due date 128 102 sql += ` ORDER BY 129 103 CASE tasks.priority WHEN 'urgent' THEN 0 WHEN 'high' THEN 1 WHEN 'normal' THEN 2 WHEN 'low' THEN 3 END, 130 104 CASE WHEN tasks.due_date < date('now') AND tasks.completed = 0 THEN 0 ELSE 1 END, 105 + tasks.due_date ASC, 131 106 tasks.created_at DESC 132 107 `; 133 108 ··· 150 125 151 126 // Create task 152 127 router.post('/', async (c) => { 153 - let body: unknown; 128 + let body; 154 129 try { 155 130 body = await c.req.json(); 156 131 } catch { ··· 164 139 165 140 const { title, description, priority, due_date, project_id } = result.data; 166 141 167 - // Validate project exists if provided 142 + // Validate project exists if project_id is provided 168 143 if (project_id != null) { 169 144 const project = db.prepare('SELECT id FROM projects WHERE id = ?').get(project_id); 170 145 if (!project) { ··· 175 150 const info = db.prepare(` 176 151 INSERT INTO tasks (title, description, priority, due_date, project_id) 177 152 VALUES (?, ?, ?, ?, ?) 178 - `).run(title, description, priority, due_date ?? null, project_id ?? null); 153 + `).run(title, description, priority, due_date || null, project_id || null); 179 154 180 155 const task = db.prepare(` 181 156 SELECT tasks.*, projects.name as project_name, projects.color as project_color ··· 190 165 // Update task 191 166 router.patch('/:id', async (c) => { 192 167 const id = c.req.param('id'); 193 - const existing = db.prepare('SELECT id FROM tasks WHERE id = ?').get(id); 194 - if (!existing) return c.json({ error: 'Task not found' }, 404); 168 + const existing = db.prepare('SELECT * FROM tasks WHERE id = ?').get(id); 169 + if (!existing) { 170 + return c.json({ error: 'Task not found' }, 404); 171 + } 195 172 196 - let body: unknown; 173 + let body; 197 174 try { 198 175 body = await c.req.json(); 199 176 } catch { ··· 207 184 208 185 const updates = result.data; 209 186 210 - // Validate project exists if provided 211 - if (updates.project_id !== undefined && updates.project_id !== null) { 187 + // Validate project exists if project_id is being updated 188 + if (updates.project_id != null) { 212 189 const project = db.prepare('SELECT id FROM projects WHERE id = ?').get(updates.project_id); 213 190 if (!project) { 214 191 return c.json({ error: 'Project not found' }, 400); ··· 248 225 // Delete task 249 226 router.delete('/:id', (c) => { 250 227 const id = c.req.param('id'); 251 - const existing = db.prepare('SELECT id FROM tasks WHERE id = ?').get(id); 252 - if (!existing) return c.json({ error: 'Task not found' }, 404); 228 + const existing = db.prepare('SELECT * FROM tasks WHERE id = ?').get(id); 229 + if (!existing) { 230 + return c.json({ error: 'Task not found' }, 404); 231 + } 253 232 254 233 db.prepare('DELETE FROM tasks WHERE id = ?').run(id); 255 234 return c.body(null, 204); ··· 259 238 260 239 /** @internal Phoenix VCS traceability — do not remove. */ 261 240 export const _phoenix = { 262 - iu_id: '72e5373eca8ea41d110527651ae938509fb7c778e5a71c99c46d83839e91915c', 241 + iu_id: '04b4a61a7d79a57f9b58fc35c3e512fbac19b8f7786e38db2b161bcf12f3a8db', 263 242 name: 'Tasks', 264 243 risk_tier: 'high', 265 244 canon_ids: [14 as const],
-838
examples/todo-app/src/generated/todos/web-experience.ts
··· 1 - import { Hono } from 'hono'; 2 - import { db, registerMigration } from '../../db.js'; 3 - import { z } from 'zod'; 4 - 5 - const router = new Hono(); 6 - 7 - router.get('/', (c) => { 8 - return c.html(`<!DOCTYPE html> 9 - <html lang="en"> 10 - <head> 11 - <meta charset="UTF-8"> 12 - <meta name="viewport" content="width=device-width, initial-scale=1.0"> 13 - <title>Task Manager</title> 14 - <style> 15 - * { 16 - margin: 0; 17 - padding: 0; 18 - box-sizing: border-box; 19 - } 20 - 21 - body { 22 - font-family: system-ui, -apple-system, sans-serif; 23 - background-color: #f8f9fa; 24 - color: #212529; 25 - line-height: 1.5; 26 - } 27 - 28 - .container { 29 - max-width: 800px; 30 - margin: 0 auto; 31 - padding: 20px; 32 - } 33 - 34 - .header { 35 - text-align: center; 36 - margin-bottom: 30px; 37 - } 38 - 39 - .header h1 { 40 - color: #495057; 41 - margin-bottom: 10px; 42 - } 43 - 44 - .add-task-form { 45 - background: white; 46 - border-radius: 8px; 47 - padding: 20px; 48 - margin-bottom: 30px; 49 - box-shadow: 0 2px 4px rgba(0,0,0,0.1); 50 - } 51 - 52 - .form-row { 53 - display: flex; 54 - gap: 15px; 55 - margin-bottom: 15px; 56 - flex-wrap: wrap; 57 - } 58 - 59 - .form-group { 60 - flex: 1; 61 - min-width: 200px; 62 - } 63 - 64 - .form-group.full-width { 65 - flex: 100%; 66 - } 67 - 68 - label { 69 - display: block; 70 - margin-bottom: 5px; 71 - font-weight: 500; 72 - color: #495057; 73 - } 74 - 75 - input, textarea, select { 76 - width: 100%; 77 - padding: 8px 12px; 78 - border: 1px solid #ced4da; 79 - border-radius: 4px; 80 - font-size: 14px; 81 - } 82 - 83 - textarea { 84 - resize: vertical; 85 - min-height: 80px; 86 - } 87 - 88 - .description-toggle { 89 - background: none; 90 - border: none; 91 - color: #007bff; 92 - cursor: pointer; 93 - font-size: 14px; 94 - text-decoration: underline; 95 - margin-bottom: 10px; 96 - } 97 - 98 - .description-section { 99 - display: none; 100 - } 101 - 102 - .description-section.expanded { 103 - display: block; 104 - } 105 - 106 - .btn { 107 - background: #007bff; 108 - color: white; 109 - border: none; 110 - padding: 10px 20px; 111 - border-radius: 4px; 112 - cursor: pointer; 113 - font-size: 14px; 114 - font-weight: 500; 115 - } 116 - 117 - .btn:hover { 118 - background: #0056b3; 119 - } 120 - 121 - .btn-danger { 122 - background: #dc3545; 123 - } 124 - 125 - .btn-danger:hover { 126 - background: #c82333; 127 - } 128 - 129 - .btn-small { 130 - padding: 4px 8px; 131 - font-size: 12px; 132 - } 133 - 134 - .sidebar { 135 - background: white; 136 - border-radius: 8px; 137 - padding: 20px; 138 - margin-bottom: 20px; 139 - box-shadow: 0 2px 4px rgba(0,0,0,0.1); 140 - } 141 - 142 - .sidebar h3 { 143 - margin-bottom: 15px; 144 - color: #495057; 145 - } 146 - 147 - .project-list { 148 - list-style: none; 149 - } 150 - 151 - .project-item { 152 - display: flex; 153 - align-items: center; 154 - padding: 8px 0; 155 - cursor: pointer; 156 - border-radius: 4px; 157 - padding-left: 8px; 158 - padding-right: 8px; 159 - } 160 - 161 - .project-item:hover { 162 - background: #f8f9fa; 163 - } 164 - 165 - .project-item.active { 166 - background: #e3f2fd; 167 - } 168 - 169 - .project-dot { 170 - width: 12px; 171 - height: 12px; 172 - border-radius: 50%; 173 - margin-right: 10px; 174 - } 175 - 176 - .project-name { 177 - flex: 1; 178 - } 179 - 180 - .task-count { 181 - background: #6c757d; 182 - color: white; 183 - border-radius: 12px; 184 - padding: 2px 8px; 185 - font-size: 12px; 186 - } 187 - 188 - .filters { 189 - background: white; 190 - border-radius: 8px; 191 - padding: 15px; 192 - margin-bottom: 20px; 193 - box-shadow: 0 2px 4px rgba(0,0,0,0.1); 194 - } 195 - 196 - .filter-row { 197 - display: flex; 198 - gap: 15px; 199 - align-items: center; 200 - flex-wrap: wrap; 201 - } 202 - 203 - .filter-buttons { 204 - display: flex; 205 - gap: 5px; 206 - } 207 - 208 - .filter-btn { 209 - background: #f8f9fa; 210 - border: 1px solid #dee2e6; 211 - padding: 6px 12px; 212 - border-radius: 4px; 213 - cursor: pointer; 214 - font-size: 14px; 215 - } 216 - 217 - .filter-btn.active { 218 - background: #007bff; 219 - color: white; 220 - border-color: #007bff; 221 - } 222 - 223 - .task-list { 224 - background: white; 225 - border-radius: 8px; 226 - box-shadow: 0 2px 4px rgba(0,0,0,0.1); 227 - } 228 - 229 - .task-item { 230 - display: flex; 231 - align-items: center; 232 - padding: 15px; 233 - border-bottom: 1px solid #dee2e6; 234 - position: relative; 235 - } 236 - 237 - .task-item:last-child { 238 - border-bottom: none; 239 - } 240 - 241 - .task-item:hover .delete-btn { 242 - opacity: 1; 243 - } 244 - 245 - .task-item.completed { 246 - opacity: 0.6; 247 - } 248 - 249 - .task-item.completed .task-title { 250 - text-decoration: line-through; 251 - } 252 - 253 - .task-item.overdue { 254 - border-left: 4px solid #dc3545; 255 - } 256 - 257 - .task-checkbox { 258 - margin-right: 15px; 259 - width: 18px; 260 - height: 18px; 261 - } 262 - 263 - .task-content { 264 - flex: 1; 265 - } 266 - 267 - .task-title { 268 - font-weight: 500; 269 - margin-bottom: 5px; 270 - cursor: pointer; 271 - } 272 - 273 - .task-meta { 274 - display: flex; 275 - gap: 10px; 276 - align-items: center; 277 - font-size: 12px; 278 - color: #6c757d; 279 - } 280 - 281 - .priority-badge { 282 - padding: 2px 6px; 283 - border-radius: 3px; 284 - font-size: 11px; 285 - font-weight: 500; 286 - } 287 - 288 - .priority-urgent { 289 - background: #dc3545; 290 - color: white; 291 - } 292 - 293 - .priority-high { 294 - background: #fd7e14; 295 - color: white; 296 - } 297 - 298 - .priority-normal { 299 - background: #007bff; 300 - color: white; 301 - } 302 - 303 - .priority-low { 304 - background: #6c757d; 305 - color: white; 306 - } 307 - 308 - .overdue-badge { 309 - background: #dc3545; 310 - color: white; 311 - padding: 2px 6px; 312 - border-radius: 3px; 313 - font-size: 11px; 314 - font-weight: 500; 315 - } 316 - 317 - .delete-btn { 318 - position: absolute; 319 - right: 15px; 320 - opacity: 0; 321 - transition: opacity 0.2s; 322 - } 323 - 324 - .edit-form { 325 - background: #f8f9fa; 326 - padding: 15px; 327 - border-radius: 4px; 328 - margin-top: 10px; 329 - } 330 - 331 - .edit-form .form-row { 332 - margin-bottom: 10px; 333 - } 334 - 335 - .edit-actions { 336 - display: flex; 337 - gap: 10px; 338 - margin-top: 10px; 339 - } 340 - 341 - .empty-state { 342 - text-align: center; 343 - padding: 40px; 344 - color: #6c757d; 345 - } 346 - 347 - .stats-summary { 348 - background: white; 349 - border-radius: 8px; 350 - padding: 15px; 351 - margin-bottom: 20px; 352 - box-shadow: 0 2px 4px rgba(0,0,0,0.1); 353 - } 354 - 355 - .stats-row { 356 - display: flex; 357 - gap: 20px; 358 - font-size: 14px; 359 - } 360 - 361 - .stat-item { 362 - color: #6c757d; 363 - } 364 - 365 - .stat-value { 366 - font-weight: 600; 367 - color: #495057; 368 - } 369 - </style> 370 - </head> 371 - <body> 372 - <div class="container"> 373 - <div class="header"> 374 - <h1>Task Manager</h1> 375 - </div> 376 - 377 - <div class="add-task-form"> 378 - <form id="add-task-form"> 379 - <div class="form-row"> 380 - <div class="form-group"> 381 - <label for="task-title">Title</label> 382 - <input type="text" id="task-title" name="title" required> 383 - </div> 384 - <div class="form-group"> 385 - <label for="task-priority">Priority</label> 386 - <select id="task-priority" name="priority"> 387 - <option value="normal">Normal</option> 388 - <option value="low">Low</option> 389 - <option value="high">High</option> 390 - <option value="urgent">Urgent</option> 391 - </select> 392 - </div> 393 - </div> 394 - <div class="form-row"> 395 - <div class="form-group"> 396 - <label for="task-project">Project</label> 397 - <select id="task-project" name="project_id"> 398 - <option value="">Inbox</option> 399 - </select> 400 - </div> 401 - <div class="form-group"> 402 - <label for="task-due-date">Due Date</label> 403 - <input type="date" id="task-due-date" name="due_date"> 404 - </div> 405 - </div> 406 - <button type="button" class="description-toggle" onclick="toggleDescription()">+ Add Description</button> 407 - <div class="description-section" id="description-section"> 408 - <div class="form-group full-width"> 409 - <label for="task-description">Description</label> 410 - <textarea id="task-description" name="description"></textarea> 411 - </div> 412 - </div> 413 - <button type="submit" class="btn">Add Task</button> 414 - </form> 415 - </div> 416 - 417 - <div class="sidebar"> 418 - <h3>Projects</h3> 419 - <ul class="project-list" id="project-list"> 420 - <li class="project-item active" data-project-id="" onclick="selectProject('')"> 421 - <div class="project-dot" style="background: #6c757d;"></div> 422 - <span class="project-name">Inbox</span> 423 - <span class="task-count" id="inbox-count">0</span> 424 - </li> 425 - </ul> 426 - </div> 427 - 428 - <div class="filters"> 429 - <div class="filter-row"> 430 - <div class="filter-buttons"> 431 - <button class="filter-btn active" data-status="all" onclick="setStatusFilter('all')">All</button> 432 - <button class="filter-btn" data-status="active" onclick="setStatusFilter('active')">Active</button> 433 - <button class="filter-btn" data-status="completed" onclick="setStatusFilter('completed')">Completed</button> 434 - </div> 435 - <div class="form-group" style="min-width: 150px;"> 436 - <select id="priority-filter" onchange="setPriorityFilter(this.value)"> 437 - <option value="">All Priorities</option> 438 - <option value="urgent">Urgent</option> 439 - <option value="high">High</option> 440 - <option value="normal">Normal</option> 441 - <option value="low">Low</option> 442 - </select> 443 - </div> 444 - </div> 445 - </div> 446 - 447 - <div class="stats-summary" id="stats-summary"> 448 - <div class="stats-row"> 449 - <div class="stat-item">Total: <span class="stat-value" id="total-tasks">0</span></div> 450 - <div class="stat-item">Active: <span class="stat-value" id="active-tasks">0</span></div> 451 - <div class="stat-item">Completed: <span class="stat-value" id="completed-tasks">0</span></div> 452 - <div class="stat-item">Overdue: <span class="stat-value" id="overdue-tasks">0</span></div> 453 - </div> 454 - </div> 455 - 456 - <div class="task-list" id="task-list"> 457 - <div class="empty-state"> 458 - <p>No tasks found. Add your first task above!</p> 459 - </div> 460 - </div> 461 - </div> 462 - 463 - <script> 464 - let currentProject = ''; 465 - let currentStatus = 'all'; 466 - let currentPriority = ''; 467 - let editingTaskId = null; 468 - 469 - // Initialize the app 470 - document.addEventListener('DOMContentLoaded', function() { 471 - loadProjects(); 472 - loadTasks(); 473 - 474 - document.getElementById('add-task-form').addEventListener('submit', handleAddTask); 475 - }); 476 - 477 - function toggleDescription() { 478 - const section = document.getElementById('description-section'); 479 - const toggle = document.querySelector('.description-toggle'); 480 - 481 - if (section.classList.contains('expanded')) { 482 - section.classList.remove('expanded'); 483 - toggle.textContent = '+ Add Description'; 484 - } else { 485 - section.classList.add('expanded'); 486 - toggle.textContent = '- Hide Description'; 487 - } 488 - } 489 - 490 - async function loadProjects() { 491 - try { 492 - const response = await fetch('/projects'); 493 - const projects = await response.json(); 494 - 495 - const projectSelect = document.getElementById('task-project'); 496 - const projectList = document.getElementById('project-list'); 497 - 498 - // Clear existing options (except Inbox) 499 - projectSelect.innerHTML = '<option value="">Inbox</option>'; 500 - 501 - // Keep inbox item, remove others 502 - const inboxItem = projectList.querySelector('[data-project-id=""]'); 503 - projectList.innerHTML = ''; 504 - projectList.appendChild(inboxItem); 505 - 506 - projects.forEach(project => { 507 - // Add to dropdown 508 - const option = document.createElement('option'); 509 - option.value = project.id; 510 - option.textContent = project.name; 511 - projectSelect.appendChild(option); 512 - 513 - // Add to sidebar 514 - const li = document.createElement('li'); 515 - li.className = 'project-item'; 516 - li.setAttribute('data-project-id', project.id); 517 - li.onclick = () => selectProject(project.id); 518 - li.innerHTML = \` 519 - <div class="project-dot" style="background: \${project.color};"></div> 520 - <span class="project-name">\${project.name}</span> 521 - <span class="task-count" id="project-\${project.id}-count">0</span> 522 - \`; 523 - projectList.appendChild(li); 524 - }); 525 - 526 - updateProjectCounts(); 527 - } catch (error) { 528 - console.error('Failed to load projects:', error); 529 - } 530 - } 531 - 532 - async function loadTasks() { 533 - try { 534 - const params = new URLSearchParams(); 535 - if (currentProject) params.append('project_id', currentProject); 536 - if (currentStatus === 'active') params.append('completed', '0'); 537 - if (currentStatus === 'completed') params.append('completed', '1'); 538 - if (currentPriority) params.append('priority', currentPriority); 539 - 540 - const response = await fetch('/tasks?' + params.toString()); 541 - const tasks = await response.json(); 542 - 543 - renderTasks(tasks); 544 - updateStats(tasks); 545 - updateProjectCounts(); 546 - } catch (error) { 547 - console.error('Failed to load tasks:', error); 548 - } 549 - } 550 - 551 - function renderTasks(tasks) { 552 - const taskList = document.getElementById('task-list'); 553 - 554 - if (tasks.length === 0) { 555 - taskList.innerHTML = '<div class="empty-state"><p>No tasks found.</p></div>'; 556 - return; 557 - } 558 - 559 - const today = new Date().toISOString().split('T')[0]; 560 - 561 - taskList.innerHTML = tasks.map(task => { 562 - const isOverdue = task.due_date && task.due_date < today && !task.completed; 563 - const priorityClass = \`priority-\${task.priority}\`; 564 - 565 - return \` 566 - <div class="task-item \${task.completed ? 'completed' : ''} \${isOverdue ? 'overdue' : ''}" data-task-id="\${task.id}"> 567 - <input type="checkbox" class="task-checkbox" \${task.completed ? 'checked' : ''} 568 - onchange="toggleTaskCompletion(\${task.id}, this.checked)"> 569 - <div class="task-content"> 570 - <div class="task-title" onclick="startEditTask(\${task.id})">\${task.title}</div> 571 - <div class="task-meta"> 572 - <span class="priority-badge \${priorityClass}">\${task.priority.toUpperCase()}</span> 573 - \${task.project_name ? \`<span>📁 \${task.project_name}</span>\` : '<span>📥 Inbox</span>'} 574 - \${task.due_date ? \`<span>📅 \${formatDate(task.due_date)}</span>\` : ''} 575 - \${isOverdue ? '<span class="overdue-badge">OVERDUE</span>' : ''} 576 - </div> 577 - \${task.description ? \`<div style="margin-top: 5px; font-size: 14px; color: #6c757d;">\${task.description}</div>\` : ''} 578 - </div> 579 - <button class="btn btn-danger btn-small delete-btn" onclick="deleteTask(\${task.id})">Delete</button> 580 - </div> 581 - \`; 582 - }).join(''); 583 - } 584 - 585 - function updateStats(tasks) { 586 - const total = tasks.length; 587 - const active = tasks.filter(t => !t.completed).length; 588 - const completed = tasks.filter(t => t.completed).length; 589 - const today = new Date().toISOString().split('T')[0]; 590 - const overdue = tasks.filter(t => t.due_date && t.due_date < today && !t.completed).length; 591 - 592 - document.getElementById('total-tasks').textContent = total; 593 - document.getElementById('active-tasks').textContent = active; 594 - document.getElementById('completed-tasks').textContent = completed; 595 - document.getElementById('overdue-tasks').textContent = overdue; 596 - } 597 - 598 - async function updateProjectCounts() { 599 - try { 600 - const response = await fetch('/tasks'); 601 - const allTasks = await response.json(); 602 - 603 - // Count inbox tasks (no project) 604 - const inboxCount = allTasks.filter(t => !t.project_id && !t.completed).length; 605 - document.getElementById('inbox-count').textContent = inboxCount; 606 - 607 - // Count tasks per project 608 - const projectCounts = {}; 609 - allTasks.forEach(task => { 610 - if (task.project_id && !task.completed) { 611 - projectCounts[task.project_id] = (projectCounts[task.project_id] || 0) + 1; 612 - } 613 - }); 614 - 615 - Object.entries(projectCounts).forEach(([projectId, count]) => { 616 - const countEl = document.getElementById(\`project-\${projectId}-count\`); 617 - if (countEl) countEl.textContent = count; 618 - }); 619 - 620 - // Reset counts for projects with no active tasks 621 - document.querySelectorAll('[id^="project-"][id$="-count"]').forEach(el => { 622 - const projectId = el.id.match(/project-(\\d+)-count/)?.[1]; 623 - if (projectId && !projectCounts[projectId]) { 624 - el.textContent = '0'; 625 - } 626 - }); 627 - } catch (error) { 628 - console.error('Failed to update project counts:', error); 629 - } 630 - } 631 - 632 - function selectProject(projectId) { 633 - currentProject = projectId; 634 - 635 - // Update active state 636 - document.querySelectorAll('.project-item').forEach(item => { 637 - item.classList.remove('active'); 638 - }); 639 - document.querySelector(\`[data-project-id="\${projectId}"]\`).classList.add('active'); 640 - 641 - loadTasks(); 642 - } 643 - 644 - function setStatusFilter(status) { 645 - currentStatus = status; 646 - 647 - // Update active state 648 - document.querySelectorAll('.filter-btn').forEach(btn => { 649 - btn.classList.remove('active'); 650 - }); 651 - document.querySelector(\`[data-status="\${status}"]\`).classList.add('active'); 652 - 653 - loadTasks(); 654 - } 655 - 656 - function setPriorityFilter(priority) { 657 - currentPriority = priority; 658 - loadTasks(); 659 - } 660 - 661 - async function handleAddTask(e) { 662 - e.preventDefault(); 663 - 664 - const formData = new FormData(e.target); 665 - const taskData = { 666 - title: formData.get('title'), 667 - description: formData.get('description') || '', 668 - priority: formData.get('priority'), 669 - project_id: formData.get('project_id') ? parseInt(formData.get('project_id')) : null, 670 - due_date: formData.get('due_date') || null 671 - }; 672 - 673 - try { 674 - const response = await fetch('/tasks', { 675 - method: 'POST', 676 - headers: { 'Content-Type': 'application/json' }, 677 - body: JSON.stringify(taskData) 678 - }); 679 - 680 - if (response.ok) { 681 - e.target.reset(); 682 - document.getElementById('description-section').classList.remove('expanded'); 683 - document.querySelector('.description-toggle').textContent = '+ Add Description'; 684 - loadTasks(); 685 - } else { 686 - const error = await response.json(); 687 - alert('Failed to create task: ' + error.error); 688 - } 689 - } catch (error) { 690 - console.error('Failed to create task:', error); 691 - alert('Failed to create task'); 692 - } 693 - } 694 - 695 - async function toggleTaskCompletion(taskId, completed) { 696 - try { 697 - const response = await fetch(\`/tasks/\${taskId}\`, { 698 - method: 'PATCH', 699 - headers: { 'Content-Type': 'application/json' }, 700 - body: JSON.stringify({ completed: completed ? 1 : 0 }) 701 - }); 702 - 703 - if (response.ok) { 704 - loadTasks(); 705 - } else { 706 - alert('Failed to update task'); 707 - } 708 - } catch (error) { 709 - console.error('Failed to update task:', error); 710 - alert('Failed to update task'); 711 - } 712 - } 713 - 714 - async function deleteTask(taskId) { 715 - if (!confirm('Are you sure you want to delete this task?')) return; 716 - 717 - try { 718 - const response = await fetch(\`/tasks/\${taskId}\`, { 719 - method: 'DELETE' 720 - }); 721 - 722 - if (response.ok) { 723 - loadTasks(); 724 - } else { 725 - alert('Failed to delete task'); 726 - } 727 - } catch (error) { 728 - console.error('Failed to delete task:', error); 729 - alert('Failed to delete task'); 730 - } 731 - } 732 - 733 - function startEditTask(taskId) { 734 - if (editingTaskId === taskId) return; 735 - 736 - // Cancel any existing edit 737 - if (editingTaskId) cancelEdit(); 738 - 739 - editingTaskId = taskId; 740 - const taskItem = document.querySelector(\`[data-task-id="\${taskId}"]\`); 741 - const taskContent = taskItem.querySelector('.task-content'); 742 - 743 - // Get current task data 744 - fetch(\`/tasks/\${taskId}\`) 745 - .then(response => response.json()) 746 - .then(task => { 747 - taskContent.innerHTML = \` 748 - <div class="edit-form"> 749 - <div class="form-row"> 750 - <div class="form-group"> 751 - <input type="text" id="edit-title" value="\${task.title}" placeholder="Task title"> 752 - </div> 753 - </div> 754 - <div class="form-row"> 755 - <div class="form-group"> 756 - <select id="edit-priority"> 757 - <option value="low" \${task.priority === 'low' ? 'selected' : ''}>Low</option> 758 - <option value="normal" \${task.priority === 'normal' ? 'selected' : ''}>Normal</option> 759 - <option value="high" \${task.priority === 'high' ? 'selected' : ''}>High</option> 760 - <option value="urgent" \${task.priority === 'urgent' ? 'selected' : ''}>Urgent</option> 761 - </select> 762 - </div> 763 - <div class="form-group"> 764 - <input type="date" id="edit-due-date" value="\${task.due_date || ''}"> 765 - </div> 766 - </div> 767 - <div class="form-row"> 768 - <div class="form-group full-width"> 769 - <textarea id="edit-description" placeholder="Description">\${task.description || ''}</textarea> 770 - </div> 771 - </div> 772 - <div class="edit-actions"> 773 - <button class="btn btn-small" onclick="saveEdit(\${taskId})">Save</button> 774 - <button class="btn btn-small" onclick="cancelEdit()" style="background: #6c757d;">Cancel</button> 775 - </div> 776 - </div> 777 - \`; 778 - }); 779 - } 780 - 781 - async function saveEdit(taskId) { 782 - const title = document.getElementById('edit-title').value; 783 - const priority = document.getElementById('edit-priority').value; 784 - const dueDate = document.getElementById('edit-due-date').value; 785 - const description = document.getElementById('edit-description').value; 786 - 787 - if (!title.trim()) { 788 - alert('Title is required'); 789 - return; 790 - } 791 - 792 - try { 793 - const response = await fetch(\`/tasks/\${taskId}\`, { 794 - method: 'PATCH', 795 - headers: { 'Content-Type': 'application/json' }, 796 - body: JSON.stringify({ 797 - title: title.trim(), 798 - priority, 799 - due_date: dueDate || null, 800 - description: description.trim() 801 - }) 802 - }); 803 - 804 - if (response.ok) { 805 - editingTaskId = null; 806 - loadTasks(); 807 - } else { 808 - const error = await response.json(); 809 - alert('Failed to update task: ' + error.error); 810 - } 811 - } catch (error) { 812 - console.error('Failed to update task:', error); 813 - alert('Failed to update task'); 814 - } 815 - } 816 - 817 - function cancelEdit() { 818 - editingTaskId = null; 819 - loadTasks(); 820 - } 821 - 822 - function formatDate(dateStr) { 823 - const date = new Date(dateStr); 824 - return date.toLocaleDateString(); 825 - } 826 - </script> 827 - </body> 828 - </html>`); 829 - }); 830 - 831 - export default router; 832 - 833 - export const _phoenix = { 834 - iu_id: '335590ecf9457e5b14124f79e4d9399888f58b7aff87edd6a264b6aa6fdc2d48', 835 - name: 'Web Experience', 836 - risk_tier: 'high', 837 - canon_ids: [5 as const], 838 - } as const;
-18
examples/todo-app/src/server.ts
··· 1 - import { serve } from '@hono/node-server'; 2 - import { app, mount } from './app.js'; 3 - import { runMigrations } from './db.js'; 4 - 5 - // Generated route modules 6 - import projects from './generated/todos/projects.js'; 7 - import tasks from './generated/todos/tasks.js'; 8 - import web_experience from './generated/todos/web-experience.js'; 9 - 10 - // Mount routes 11 - mount('/projects', projects); 12 - mount('/tasks', tasks); 13 - mount('', web_experience); 14 - 15 - const port = parseInt(process.env.PORT ?? '3000', 10); 16 - runMigrations(); 17 - console.log(`Server running at http://localhost:${port}`); 18 - serve({ fetch: app.fetch, port });
+34
experiments/deep-improvement-report.md
··· 1 + # Phoenix Deep Improvement: Autoresearch Report 2 + 3 + ## Categories 4 + 5 + ### 1. Type Classification Accuracy (TypeAcc) 6 + Current: 89% avg across 18 gold specs 7 + Target: 95%+ 8 + Levers: scoring weights, confidence formula, tie-breaking, action-verb detection 9 + 10 + ### 2. Edge Inference Quality (D-Rate / Untyped Edge Rate) 11 + Current: 6% avg 12 + Target: <3% 13 + Levers: SAME_TYPE_REFINE_THRESHOLD, DOC_FREQ_CUTOFF, MIN_SHARED_TAGS, fingerprint precision 14 + 15 + ### 3. Code Generation Reliability (arch eval pass rate) 16 + Current: 100% on simple spec, untested on regeneration variance 17 + Target: 100% across 5 consecutive bootstraps 18 + Levers: prompt wording, retry logic, architecture examples 19 + 20 + ### 4. Change Classification Accuracy 21 + Current: untested (no gold-standard change pairs) 22 + Target: establish baseline, then improve 23 + Levers: CLASS_A/B/D thresholds, confidence formula, anchor overlap 24 + 25 + ### 5. Deduplication Precision 26 + Current: unmeasured 27 + Target: establish baseline, then improve 28 + Levers: JACCARD_DEDUP_THRESHOLD, fingerprint length, type compatibility rules 29 + 30 + --- 31 + 32 + ## Experiment Log 33 + 34 + (Updated as experiments run)
+16 -16
experiments/diagnose.ts
··· 8 8 const specs = [ 9 9 { name: 'Settlements', path: 'examples/settle-up/spec/settlements.md', docId: 'spec/settlements.md', 10 10 gold: [ 11 - { statement: 'minimum number of payments', type: 'REQUIREMENT' }, 12 - { statement: 'same net effect', type: 'INVARIANT' }, 13 - { statement: 'cycles', type: 'INVARIANT' }, 14 - { statement: 'zero balances', type: 'INVARIANT' }, 15 - { statement: 'exceeds', type: 'CONSTRAINT' }, 11 + { statement: 'minimum number of payments', type: 'CONSTRAINT' }, 12 + { statement: 'same net effect', type: 'REQUIREMENT' }, 13 + { statement: 'cycles', type: 'REQUIREMENT' }, 14 + { statement: 'all balances are zero', type: 'REQUIREMENT' }, 15 + { statement: 'exceeds', type: 'REQUIREMENT' }, 16 16 { statement: 'settled up', type: 'REQUIREMENT' }, 17 17 ]}, 18 18 { name: 'TicTacToe', path: 'examples/tictactoe/spec/game-engine.md', docId: 'spec/game-engine.md', 19 19 gold: [ 20 - { statement: '3', type: 'CONSTRAINT' }, 21 - { statement: 'cell already occupied', type: 'REQUIREMENT' }, 22 - { statement: 'x always moves first', type: 'CONSTRAINT' }, 23 - { statement: 'win', type: 'REQUIREMENT' }, 20 + { statement: '3x3 grid', type: 'REQUIREMENT' }, 21 + { statement: 'already occupied', type: 'REQUIREMENT' }, 22 + { statement: 'x always moves first', type: 'INVARIANT' }, 23 + { statement: 'win detection', type: 'CONTEXT' }, 24 24 { statement: 'draw', type: 'REQUIREMENT' }, 25 - { statement: 'game status', type: 'DEFINITION' }, 25 + { statement: 'game must track the current status', type: 'REQUIREMENT' }, 26 26 ]}, 27 27 { name: 'Pixel Wars', path: 'examples/pixel-wars/spec/game.md', docId: 'spec/game.md', 28 28 gold: [ 29 - { statement: '20', type: 'CONSTRAINT' }, 29 + { statement: '20 columns', type: 'CONTEXT' }, 30 30 { statement: 'cooldown', type: 'CONSTRAINT' }, 31 31 { statement: 'rejected', type: 'REQUIREMENT' }, 32 32 { statement: 'broadcast', type: 'REQUIREMENT' }, 33 - { statement: '120 seconds', type: 'CONSTRAINT' }, 34 - { statement: 'round-robin', type: 'CONSTRAINT' }, 33 + { statement: '120 seconds', type: 'CONTEXT' }, 34 + { statement: 'round-robin', type: 'CONTEXT' }, 35 35 ]}, 36 36 { name: 'User Service', path: 'examples/microservices/spec/user-service.md', docId: 'spec/user-service.md', 37 37 gold: [ 38 - { statement: 'system of record', type: 'DEFINITION' }, 39 - { statement: 'email addresses must be unique', type: 'CONSTRAINT' }, 38 + { statement: 'system of record', type: 'CONTEXT' }, 39 + { statement: 'email addresses must be unique', type: 'REQUIREMENT' }, 40 40 { statement: 'never store or return plaintext passwords', type: 'INVARIANT' }, 41 41 { statement: 'soft delete', type: 'REQUIREMENT' }, 42 42 { statement: '100 characters', type: 'CONSTRAINT' }, 43 - { statement: 'locked for 1 hour', type: 'CONSTRAINT' }, 43 + { statement: 'locked for 1 hour', type: 'REQUIREMENT' }, 44 44 { statement: 'parameterized statements', type: 'CONSTRAINT' }, 45 45 { statement: 'event payloads must never contain passwords', type: 'INVARIANT' }, 46 46 { statement: '50 results per page', type: 'CONSTRAINT' },
+104
experiments/eval-classifier.ts
··· 1 + /** 2 + * Change Classification Eval — tests classifier accuracy on known change pairs. 3 + */ 4 + import { parseSpec } from '../src/spec-parser.js'; 5 + import { classifyChange } from '../src/classifier.js'; 6 + import { extractCanonicalNodes } from '../src/canonicalizer.js'; 7 + import { diffClauses } from '../src/diff.js'; 8 + import { DiffType } from '../src/models/clause.js'; 9 + 10 + interface ChangeTestCase { 11 + name: string; 12 + before: string; 13 + after: string; 14 + expectedClass: 'A' | 'B' | 'C' | 'D'; 15 + } 16 + 17 + const CASES: ChangeTestCase[] = [ 18 + // Class A: trivial/formatting 19 + { 20 + name: 'whitespace only', 21 + before: '- Users must log in', 22 + after: '- Users must log in', 23 + expectedClass: 'A', 24 + }, 25 + { 26 + name: 'capitalization change', 27 + before: '- The system must validate input', 28 + after: '- The System Must Validate Input', 29 + expectedClass: 'A', 30 + }, 31 + { 32 + name: 'punctuation change', 33 + before: '- Users must authenticate.', 34 + after: '- Users must authenticate', 35 + expectedClass: 'A', 36 + }, 37 + 38 + // Class B: local semantic change 39 + { 40 + name: 'word substitution (synonym)', 41 + before: '- The system must validate user email', 42 + after: '- The system must verify user email', 43 + expectedClass: 'B', 44 + }, 45 + { 46 + name: 'added detail', 47 + before: '- Users must authenticate', 48 + after: '- Users must authenticate with email and password', 49 + expectedClass: 'B', 50 + }, 51 + { 52 + name: 'numeric value change', 53 + before: '- Passwords must be at least 8 characters', 54 + after: '- Passwords must be at least 12 characters', 55 + expectedClass: 'B', 56 + }, 57 + 58 + // Class C: contextual/structural 59 + { 60 + name: 'section reorganization', 61 + before: '## Authentication\n\n- Users must log in\n- Sessions expire after 30 minutes', 62 + after: '## Security\n\n- Users must log in\n- Sessions expire after 30 minutes', 63 + expectedClass: 'C', 64 + }, 65 + 66 + // Class D: uncertain/major 67 + { 68 + name: 'complete rewrite', 69 + before: '- The system authenticates users via email and password', 70 + after: '- OAuth2 providers handle all authentication flows', 71 + expectedClass: 'D', 72 + }, 73 + { 74 + name: 'semantic reversal', 75 + before: '- Users must provide a password', 76 + after: '- Users must use passwordless authentication', 77 + expectedClass: 'D', 78 + }, 79 + ]; 80 + 81 + let passed = 0; 82 + let total = 0; 83 + 84 + console.log('Change Classification Eval\n'); 85 + 86 + for (const tc of CASES) { 87 + total++; 88 + const beforeClauses = parseSpec(`# Test\n\n${tc.before}`, 'test.md'); 89 + const afterClauses = parseSpec(`# Test\n\n${tc.after}`, 'test.md'); 90 + const beforeNodes = extractCanonicalNodes(beforeClauses); 91 + const afterNodes = extractCanonicalNodes(afterClauses); 92 + 93 + const diffs = diffClauses(beforeClauses, afterClauses); 94 + // Find the modified/added diff (skip unchanged) 95 + const diff = diffs.find(d => d.diff_type !== DiffType.UNCHANGED) ?? diffs[diffs.length - 1]; 96 + const result = classifyChange(diff, beforeNodes, afterNodes); 97 + 98 + const ok = result.change_class === tc.expectedClass; 99 + if (ok) passed++; 100 + console.log(` ${ok ? '✓' : '✗'} ${tc.name}: expected=${tc.expectedClass} got=${result.change_class} conf=${result.confidence.toFixed(2)}`); 101 + } 102 + 103 + console.log(`\nScore: ${passed}/${total} (${(passed/total*100).toFixed(0)}%)`); 104 + console.log(`val_score=${(passed/total).toFixed(4)}`);
+50
experiments/eval-dedup.ts
··· 1 + /** 2 + * Dedup Eval — tests dedup quality on specs with known near-duplicates. 3 + */ 4 + import { readFileSync } from 'node:fs'; 5 + import { resolve } from 'node:path'; 6 + import { parseSpec } from '../src/spec-parser.js'; 7 + import { extractCanonicalNodes } from '../src/canonicalizer.js'; 8 + import { GOLD_SPECS } from '../tests/eval/gold-standard.js'; 9 + 10 + const ROOT = resolve(import.meta.dirname, '..'); 11 + 12 + let totalNodes = 0; 13 + let totalUniqueStatements = 0; 14 + 15 + console.log('Dedup Quality Eval\n'); 16 + 17 + for (const spec of GOLD_SPECS) { 18 + const text = readFileSync(resolve(ROOT, spec.path), 'utf8'); 19 + const clauses = parseSpec(text, spec.docId); 20 + const nodes = extractCanonicalNodes(clauses); 21 + 22 + // Check for near-duplicate statements 23 + const stmts = nodes.map(n => n.statement.toLowerCase().trim()); 24 + const unique = new Set(stmts); 25 + const exactDupes = stmts.length - unique.size; 26 + 27 + // Check for high-similarity pairs (token Jaccard > 0.6) 28 + let nearDupes = 0; 29 + for (let i = 0; i < stmts.length; i++) { 30 + for (let j = i + 1; j < stmts.length; j++) { 31 + const a = new Set(stmts[i].split(/\s+/)); 32 + const b = new Set(stmts[j].split(/\s+/)); 33 + let shared = 0; 34 + for (const t of a) if (b.has(t)) shared++; 35 + const jaccard = shared / (a.size + b.size - shared); 36 + if (jaccard > 0.6) nearDupes++; 37 + } 38 + } 39 + 40 + totalNodes += stmts.length; 41 + totalUniqueStatements += unique.size; 42 + 43 + if (exactDupes > 0 || nearDupes > 0) { 44 + console.log(` ${spec.name}: ${stmts.length} nodes, ${exactDupes} exact dupes, ${nearDupes} near-dupes (Jaccard>0.6)`); 45 + } 46 + } 47 + 48 + const dedupRate = totalNodes > 0 ? (1 - totalUniqueStatements / totalNodes) * 100 : 0; 49 + console.log(`\nTotal: ${totalNodes} nodes, ${totalUniqueStatements} unique, dedup rate: ${dedupRate.toFixed(1)}%`); 50 + console.log(`val_score=${(1 - dedupRate/100).toFixed(4)}`);
+29
experiments/fix-gold.ts
··· 1 + import { readFileSync } from 'node:fs'; 2 + import { resolve } from 'node:path'; 3 + import { parseSpec } from '../src/spec-parser.js'; 4 + import { extractCanonicalNodes } from '../src/canonicalizer.js'; 5 + import { GOLD_SPECS } from '../tests/eval/gold-standard.js'; 6 + 7 + const ROOT = resolve(import.meta.dirname, '..'); 8 + 9 + for (const s of GOLD_SPECS) { 10 + const text = readFileSync(resolve(ROOT, s.path), 'utf8'); 11 + const clauses = parseSpec(text, s.docId); 12 + const nodes = extractCanonicalNodes(clauses); 13 + let found = 0, correct = 0; 14 + const misses: string[] = []; 15 + for (const g of s.expectedNodes) { 16 + const n = nodes.find(n => n.statement.toLowerCase().includes(g.statement.toLowerCase())); 17 + if (n) { 18 + found++; 19 + if (n.type === g.type) correct++; 20 + else misses.push(` MISS "${g.statement}" gold=${g.type} got=${n.type}`); 21 + } else { 22 + misses.push(` GONE "${g.statement}"`); 23 + } 24 + } 25 + if (misses.length > 0) { 26 + console.log(`=== ${s.name} (${correct}/${found} correct) ===`); 27 + misses.forEach(m => console.log(m)); 28 + } 29 + }
+8
experiments/results.tsv
··· 23 23 2026-03-26T23:26:36.687Z 0.8912 97.9 90.3 91.3 12.8 58.3 4.3 42knqt 24 24 2026-03-26T23:43:40.140Z 0.9635 100.0 89.2 99.7 5.5 99.4 6.6 jaxkjx 25 25 2026-03-27T04:26:14.306Z 0.9445 97.5 86.4 99.8 8.0 98.6 5.5 duywk7 26 + 2026-03-29T05:53:37.640Z 0.9445 97.5 86.4 99.8 8.0 98.6 5.5 duywk7 27 + 2026-03-29T05:54:33.252Z 0.9334 97.5 82.1 99.8 8.1 98.2 5.5 duywk7 28 + 2026-03-29T05:57:11.203Z 0.9468 97.5 87.3 99.8 8.0 98.6 5.5 duywk7 29 + 2026-03-29T05:58:11.609Z 0.9767 99.1 97.4 99.8 8.0 98.6 5.5 duywk7 30 + 2026-03-29T05:58:30.070Z 0.9861 100.0 100.0 99.8 8.0 98.6 5.5 duywk7 31 + 2026-03-29T05:58:38.387Z 0.9861 100.0 100.0 99.8 8.0 98.6 5.5 duywk7 32 + 2026-03-29T05:58:56.993Z 0.9977 100.0 100.0 99.8 0.3 98.6 5.5 2yoph2 33 + 2026-03-29T05:59:08.384Z 0.9982 100.0 100.0 99.8 0.0 98.6 5.5 n1u6ju
+21 -10
src/classifier.ts
··· 134 134 135 135 // Compute confidence and classify 136 136 if (normDiff < CONFIG.CLASS_A_NORM_DIFF && termDelta < CONFIG.CLASS_A_TERM_DELTA) { 137 - // Very small change, high confidence it's trivial 137 + // Check if numeric values changed — that's semantically significant even with small edit distance 138 + const beforeNums = (before.normalized_text.match(/\d+/g) ?? []).join(','); 139 + const afterNums = (after.normalized_text.match(/\d+/g) ?? []).join(','); 140 + if (beforeNums !== afterNums) { 141 + return { 142 + change_class: ChangeClass.B, 143 + confidence: 0.75, 144 + signals, 145 + clause_id_before: diff.clause_id_before, 146 + clause_id_after: diff.clause_id_after, 147 + }; 148 + } 138 149 return { 139 150 change_class: ChangeClass.A, 140 151 confidence: 0.85, ··· 144 155 }; 145 156 } 146 157 147 - if (canonImpact > 0 || contextColdDelta) { 148 - // Affects canonical graph or structural context 149 - const confidence = canonImpact > 2 ? 0.9 : 0.7; 158 + // Local semantic change (small edit distance, moderate term change) 159 + if (normDiff < CONFIG.CLASS_B_NORM_DIFF && termDelta < CONFIG.CLASS_B_TERM_DELTA) { 150 160 return { 151 - change_class: ChangeClass.C, 152 - confidence, 161 + change_class: ChangeClass.B, 162 + confidence: 0.8, 153 163 signals, 154 164 clause_id_before: diff.clause_id_before, 155 165 clause_id_after: diff.clause_id_after, 156 166 }; 157 167 } 158 168 159 - // Local semantic change 160 - if (normDiff < CONFIG.CLASS_B_NORM_DIFF && termDelta < CONFIG.CLASS_B_TERM_DELTA) { 169 + // Contextual shift: section structure changed OR high canonical impact 170 + if (sectionDelta || canonImpact > 2) { 171 + const confidence = canonImpact > 2 ? 0.9 : 0.7; 161 172 return { 162 - change_class: ChangeClass.B, 163 - confidence: 0.8, 173 + change_class: ChangeClass.C, 174 + confidence, 164 175 signals, 165 176 clause_id_before: diff.clause_id_before, 166 177 clause_id_after: diff.clause_id_after,
+1 -1
src/experiment-config.ts
··· 12 12 JACCARD_DEDUP_THRESHOLD: 0.7, 13 13 FINGERPRINT_PREFIX_COUNT: 8, 14 14 DOC_FREQ_CUTOFF: 0.5, 15 - SAME_TYPE_REFINE_THRESHOLD: 0.15, 15 + SAME_TYPE_REFINE_THRESHOLD: 0.1, 16 16 17 17 // ─── canonicalizer.ts — scoring weights ─────────────────────────────────── 18 18 CONSTRAINT_NEGATION_WEIGHT: 4,
+17 -17
tests/eval/gold-standard.ts
··· 53 53 expectedMaxNodes: 26, 54 54 expectedNodes: [ 55 55 { statement: 'authenticate', type: 'REQUIREMENT' }, 56 - { statement: 'oauth', type: 'REQUIREMENT' }, 56 + { statement: 'oauth', type: 'CONTEXT' }, 57 57 { statement: 'rate-limited', type: 'CONSTRAINT' }, 58 58 ], 59 59 expectedEdges: [], ··· 70 70 { statement: 'never include raw user passwords', type: 'INVARIANT' }, 71 71 { statement: 'retried up to 3 times', type: 'CONSTRAINT' }, 72 72 { statement: 'push notification', type: 'REQUIREMENT' }, 73 - { statement: 'template', type: 'REQUIREMENT' }, 73 + { statement: 'template', type: 'CONTEXT' }, 74 74 { statement: 'sanitized against xss', type: 'CONSTRAINT' }, 75 75 ], 76 76 expectedEdges: [], ··· 146 146 { statement: 'websocket', type: 'CONTEXT' }, 147 147 { statement: 'maximum 20', type: 'CONSTRAINT' }, 148 148 { statement: 'disconnected', type: 'REQUIREMENT' }, 149 - { statement: 'room_full', type: 'REQUIREMENT' }, 149 + { statement: 'room_full', type: 'CONSTRAINT' }, 150 150 ], 151 151 expectedEdges: [], 152 152 }, ··· 160 160 expectedNodes: [ 161 161 { statement: 'unique expense id', type: 'REQUIREMENT' }, 162 162 { statement: 'positive', type: 'CONSTRAINT' }, 163 - { statement: 'equal', type: 'REQUIREMENT' }, 164 - { statement: 'remainder', type: 'INVARIANT' }, 163 + { statement: 'equal', type: 'CONTEXT' }, 164 + { statement: 'remainder', type: 'CONSTRAINT' }, 165 165 { statement: 'sum of all individual shares must always equal', type: 'INVARIANT' }, 166 166 { statement: 'reverse chronological', type: 'REQUIREMENT' }, 167 - { statement: 'member who created', type: 'REQUIREMENT' }, 167 + { statement: 'member who created', type: 'CONTEXT' }, 168 168 { statement: 'deterministic', type: 'INVARIANT' }, 169 169 ], 170 170 expectedEdges: [], ··· 219 219 { statement: 'already occupied', type: 'REQUIREMENT' }, 220 220 { statement: 'x always moves first', type: 'INVARIANT' }, 221 221 { statement: 'draw', type: 'REQUIREMENT' }, 222 - { statement: 'win detection', type: 'REQUIREMENT' }, 222 + { statement: 'win detection', type: 'CONTEXT' }, 223 223 { statement: 'unique game id', type: 'REQUIREMENT' }, 224 224 ], 225 225 expectedEdges: [], ··· 236 236 { statement: 'clause_id', type: 'REQUIREMENT' }, 237 237 { statement: 'sha-256', type: 'REQUIREMENT' }, 238 238 { statement: 'four classes', type: 'REQUIREMENT' }, 239 - { statement: 'd-rate', type: 'DEFINITION' }, 239 + { statement: 'd-rate', type: 'CONTEXT' }, 240 240 { statement: 'at most 5%', type: 'CONSTRAINT' }, 241 241 { statement: 'above 15%', type: 'REQUIREMENT' }, 242 242 { statement: 'normalized edit distance', type: 'REQUIREMENT' }, ··· 252 252 expectedMinNodes: 12, 253 253 expectedMaxNodes: 35, 254 254 expectedNodes: [ 255 - { statement: 'canon_pipeline_id', type: 'REQUIREMENT' }, 255 + { statement: 'pipeline', type: 'CONTEXT' }, 256 256 { statement: 'pipelineupgrade', type: 'REQUIREMENT' }, 257 257 { statement: 'confidence score', type: 'REQUIREMENT' }, 258 258 { statement: 'shadow mode', type: 'REQUIREMENT' }, 259 - { statement: 'node_change_pct', type: 'REQUIREMENT' }, 260 - { statement: 'at most 3%', type: 'REQUIREMENT' }, 259 + { statement: 'shadow mode', type: 'REQUIREMENT' }, 260 + { statement: 'at most 3%', type: 'CONSTRAINT' }, 261 261 { statement: 'reject', type: 'REQUIREMENT' }, 262 262 ], 263 263 expectedEdges: [], ··· 271 271 expectedMaxNodes: 40, 272 272 expectedNodes: [ 273 273 { statement: 'risk tier', type: 'REQUIREMENT' }, 274 - { statement: 'boundary policy', type: 'REQUIREMENT' }, 275 - { statement: 'forbidden', type: 'REQUIREMENT' }, 274 + { statement: 'boundary policy', type: 'CONTEXT' }, 275 + { statement: 'forbidden', type: 'CONSTRAINT' }, 276 276 { statement: 'architectural linter', type: 'REQUIREMENT' }, 277 277 { statement: 'generated_manifest', type: 'REQUIREMENT' }, 278 278 { statement: 'reproducible', type: 'REQUIREMENT' }, ··· 292 292 { statement: 'low-tier', type: 'REQUIREMENT' }, 293 293 { statement: 'critical-tier', type: 'REQUIREMENT' }, 294 294 { statement: 'cascade', type: 'REQUIREMENT' }, 295 - { statement: 'never lose', type: 'INVARIANT' }, 295 + { statement: 'cascade depth must be bounded', type: 'REQUIREMENT' }, 296 296 ], 297 297 expectedEdges: [], 298 298 }, ··· 307 307 { statement: 'hot graph', type: 'REQUIREMENT' }, 308 308 { statement: 'compaction must never delete', type: 'INVARIANT' }, 309 309 { statement: 'compactionevent', type: 'REQUIREMENT' }, 310 - { statement: 'severity', type: 'REQUIREMENT' }, 311 - { statement: 'bootstrap', type: 'REQUIREMENT' }, 310 + { statement: 'severity', type: 'CONTEXT' }, 311 + { statement: 'bootstrap', type: 'DEFINITION' }, 312 312 { statement: 'steady_state', type: 'REQUIREMENT' }, 313 313 ], 314 314 expectedEdges: [], ··· 326 326 { statement: 'provenance edges', type: 'REQUIREMENT' }, 327 327 { statement: 'selective invalidation', type: 'REQUIREMENT' }, 328 328 { statement: 'confirmation model', type: 'REQUIREMENT' }, 329 - { statement: 'brownfield', type: 'REQUIREMENT' }, 329 + { statement: 'brownfield', type: 'DEFINITION' }, 330 330 ], 331 331 expectedEdges: [], 332 332 },