Real-time index of opencode sessions
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add flow decomposition plans and zero-copy/tree regenesis docs

rektide 5eee8013 b7cbef06

+933 -8
+6
.beads/issues.jsonl
··· 1 + {"id":"ocs-bon-builder","title":"Introduce bon::Builder across key types","description":"Replace manual constructor patterns with `bon::Builder` derive to reduce API surface, improve ergonomics, and make future configuration options additive without proliferating `with_*` variants.\n\nKey goals:\n- Unify mutually exclusive construction modes into fluent builder APIs\n- Reduce invalid state combinations for structs with many related fields\n- Make test fixtures and synthetic data creation cleaner\n- Establish a consistent pattern for configuration-heavy types","status":"open","priority":2,"issue_type":"epic","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-23T18:52:23.254842635-05:00","created_by":"rektide de la faye","updated_at":"2026-02-23T18:52:23.254842635-05:00"} 2 + {"id":"ocs-bon-domain-structs","title":"Add bon::Builder to large serde domain structs","description":"Add builders to message/part domain types with many optional fields to simplify test fixtures and future synthetic data generation.\n\nCandidates:\n- `AssistantMessage` (`src/types/message.rs:63-88`) - 15 fields, many optional\n- `UserMessage` (`src/types/message.rs:30-45`) - 9 fields, many optional\n- `ToolStateCompleted` (`src/types/part.rs:128-137`) - 6 fields\n- `SubtaskPart` (`src/types/part.rs:254-264`) - 7 fields\n\nThese are serde types, so builder is primarily for programmatic construction (tests, fixtures).\n\nAcceptance:\n- Selected structs derive `bon::Builder`\n- At least one test demonstrates cleaner fixture construction\n- Serde behavior unchanged","status":"open","priority":2,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-23T18:53:17.020990858-05:00","created_by":"rektide de la faye","updated_at":"2026-02-23T18:53:17.020990858-05:00","dependencies":[{"issue_id":"ocs-bon-domain-structs","depends_on_id":"ocs-bon-builder","type":"parent-child","created_at":"2026-02-23T18:56:40.272222113-05:00","created_by":"rektide de la faye"}]} 3 + {"id":"ocs-bon-index-meta","title":"Add bon::Builder to index metadata structs","description":"Add builders to `SessionMeta`, `MessageMeta`, and `PartRef` to improve hot-path construction clarity.\n\nLocations:\n- `SessionMeta` literal at `src/index.rs:295-305`\n- `MessageMeta` literal at `src/index.rs:335-343`\n- `PartRef::new()` constructor at `src/index.rs:51-64`\n\nTarget:\n```rust\nSessionMeta::builder()\n .id(session_id.clone())\n .title(session.title)\n .created(session.time.created)\n .updated(session.time.updated)\n .project_id(project_id.to_string())\n .message_count(message_count)\n .build()\n```\n\nAcceptance:\n- All three structs derive `bon::Builder`\n- Inline literals replaced with builder calls\n- Tests pass","status":"open","priority":2,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-23T18:53:13.369488571-05:00","created_by":"rektide de la faye","updated_at":"2026-02-23T18:53:13.369488571-05:00","dependencies":[{"issue_id":"ocs-bon-index-meta","depends_on_id":"ocs-bon-builder","type":"parent-child","created_at":"2026-02-23T18:56:39.554905836-05:00","created_by":"rektide de la faye"}]} 4 + {"id":"ocs-bon-materializer","title":"Add bon::Builder to SessionMaterializer","description":"Replace `new()`, `detect()`, `with_paths()`, `with_reader()` constructors with a unified builder pattern.\n\nCurrent API (`src/materializer.rs:16-33`):\n- `SessionMaterializer::new()` - auto-detect paths\n- `SessionMaterializer::detect()` - same as new\n- `SessionMaterializer::with_paths(StoragePaths)` - explicit paths\n- `SessionMaterializer::with_reader(FileReader)` - custom reader\n\nTarget API:\n```rust\nlet m = SessionMaterializer::builder()\n .paths(storage_paths) // or .detect_paths() or .reader(file_reader)\n .build()?;\n```\n\nAcceptance:\n- `SessionMaterializer` derives `bon::Builder`\n- Existing constructors deprecated or removed\n- `SessionLoader` similarly updated for consistency\n- Tests pass","status":"open","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-23T18:53:06.117888779-05:00","created_by":"rektide de la faye","updated_at":"2026-02-23T18:53:06.117888779-05:00","dependencies":[{"issue_id":"ocs-bon-materializer","depends_on_id":"ocs-bon-builder","type":"parent-child","created_at":"2026-02-23T18:56:38.449090839-05:00","created_by":"rektide de la faye"}]} 5 + {"id":"ocs-bon-storage-paths","title":"Add bon::Builder to StoragePaths","description":"Replace manual struct assembly with builder pattern for safer path configuration.\n\nCurrent state (`src/storage/paths.rs:5-13`):\n- 7 related fields that must stay consistent\n- `detect()` and `from_base()` constructors\n- Test in `src/storage/paths.rs:141` uses literal struct assembly\n\nTarget:\n```rust\nlet paths = StoragePaths::builder()\n .base(path_buf)\n .build()?;\n\n// Or for tests:\nlet paths = StoragePaths::builder()\n .root(\"/test/storage\")\n .session(\"/test/storage/session\")\n .message(\"/test/storage/message\")\n .part(\"/test/storage/part\")\n .diff(\"/test/storage/session_diff\")\n .snapshot(\"/test/storage/snapshot\")\n .migration(\"/test/storage/migration\")\n .build();\n```\n\nAcceptance:\n- `StoragePaths` derives `bon::Builder`\n- `detect()` and `from_base()` remain as convenience methods\n- Test updated to use builder","status":"open","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-23T18:53:09.833679196-05:00","created_by":"rektide de la faye","updated_at":"2026-02-23T18:53:09.833679196-05:00","dependencies":[{"issue_id":"ocs-bon-storage-paths","depends_on_id":"ocs-bon-builder","type":"parent-child","created_at":"2026-02-23T18:56:38.932992393-05:00","created_by":"rektide de la faye"}]} 1 6 {"id":"ocs-core-err","title":"Error Type and Result Alias","description":"Define unified Error enum with all variants and Result\u003cT\u003e alias","status":"closed","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:07:43.184485341-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:08:59.153252492-05:00","closed_at":"2026-02-18T18:08:59.153252492-05:00","close_reason":"Completed"} 2 7 {"id":"ocs-core-id","title":"ID Types with Timestamp Extraction","description":"Define typed identifiers for sessions, messages, and parts with timestamp extraction, parse/display/debug traits, and serde support","status":"closed","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:07:40.590649889-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:08:58.530206256-05:00","closed_at":"2026-02-18T18:08:58.530206256-05:00","close_reason":"Completed"} 3 8 {"id":"ocs-core-type","title":"Core Data Types","description":"Define Rust structs matching opencode schemas: SessionInfo, Message (User/Assistant), Part (12 variants), with serde support","status":"closed","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:07:42.004452213-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:08:58.827551611-05:00","closed_at":"2026-02-18T18:08:58.827551611-05:00","close_reason":"Completed"} ··· 8 13 {"id":"ocs-load-sess","title":"Session Loader","description":"Load a complete session with all metadata and associated diff file","status":"closed","priority":2,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:09:10.541800693-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:11:53.49490446-05:00","closed_at":"2026-02-18T18:11:53.49490446-05:00","close_reason":"Completed"} 9 14 {"id":"ocs-mat-query","title":"Query Interface","description":"High-level query API: session tree, time filtering, project filtering, relationship navigation","status":"closed","priority":2,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:12:06.668313077-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:18:57.808443063-05:00","closed_at":"2026-02-18T18:18:57.808443063-05:00","close_reason":"Completed"} 10 15 {"id":"ocs-mat-sess","title":"Session Materializer","description":"SessionMaterializer with index-based lookups and lazy content loading via mmap","status":"closed","priority":2,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:12:04.965159412-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:18:57.59091873-05:00","closed_at":"2026-02-18T18:18:57.59091873-05:00","close_reason":"Completed"} 16 + {"id":"ocs-optimize-append-indexing","title":"Optimize update-read path to index only newly appended content","description":"Investigate what content is typically appended in update operations and optimize the indexing to only process newly appended content rather than re-indexing entire files/structures.","status":"open","priority":2,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-20T03:14:29.630420381-05:00","created_by":"rektide de la faye","updated_at":"2026-02-20T03:14:29.630420381-05:00"} 11 17 {"id":"ocs-stor-mmap","title":"Memory-Mapped File Wrapper","description":"Create safe wrapper around memmap2 with caching for shared ownership","status":"closed","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:07:45.445449396-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:09:00.022211385-05:00","closed_at":"2026-02-18T18:09:00.022211385-05:00","close_reason":"Completed"} 12 18 {"id":"ocs-stor-path","title":"Storage Path Resolution","description":"Implement XDG-compliant path discovery with path builders for each entity type","status":"closed","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:07:44.237866599-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:08:59.580357656-05:00","closed_at":"2026-02-18T18:08:59.580357656-05:00","close_reason":"Completed"} 13 19 {"id":"ocs-stor-read","title":"File Reader with JSON Parsing","description":"Read and parse JSON files for each entity type with mmap caching","status":"closed","priority":1,"issue_type":"task","owner":"rektide+git@voodoowarez.com","created_at":"2026-02-18T18:07:46.577111181-05:00","created_by":"rektide de la faye","updated_at":"2026-02-18T18:09:00.301894724-05:00","closed_at":"2026-02-18T18:09:00.301894724-05:00","close_reason":"Completed"}
+1
Cargo.toml
··· 20 20 tokio = { version = "1", features = ["rt", "sync"], optional = true } 21 21 watchman_client = { version = "0.1", optional = true } 22 22 notify = { version = "8", optional = true } 23 + bon = "3.9.0" 23 24 24 25 [features] 25 26 default = []
+27
README.md
··· 91 91 println!("indexed sessions={}", index.session_count()); 92 92 ``` 93 93 94 + ### 5. Decomposed flow execution (Bon builder) 95 + 96 + Use staged planning + execution when you want control over how much gets loaded. 97 + 98 + ```rust 99 + use opencode_session::{SessionFlowOptions, SessionId, SessionMaterializer}; 100 + use std::str::FromStr; 101 + 102 + let materializer = SessionMaterializer::new()?; 103 + let session_id = SessionId::from_str("ses_3975b29b7ffeDyjus9LjxKUoeX")?; 104 + 105 + let options = SessionFlowOptions::builder() 106 + .session_id(session_id) 107 + .part_limit_per_message(5) 108 + .build(); 109 + 110 + let scope = materializer.plan_session_flow(&options)?; 111 + let message_scopes = materializer.plan_message_flows(&options, &scope)?; 112 + let result = materializer.run_session_flow(&options)?; 113 + 114 + println!( 115 + "planned_messages={} loaded_messages={}", 116 + message_scopes.len(), 117 + result.messages.len(), 118 + ); 119 + ``` 120 + 94 121 ## Core types 95 122 96 123 - [`SessionIndex`](/src/index.rs): metadata graph (projects -> sessions -> messages -> parts)
+236
doc/discovery/regenesis-tree.md
··· 1 + # Regenesis Tree: Structured Session Graph APIs 2 + 3 + > This document defines the next tree API shape for `opencode-session-rs`, including problem framing, current state, and draft plans with explicit design decisions. 4 + 5 + ## Problem 6 + 7 + Consumers need two things at once: 8 + 9 + 1. A predictable graph model for navigating sessions -> messages -> parts. 10 + 2. Control over execution stages, so they can stop early (metadata only) or continue to hydrated payloads. 11 + 12 + Historically, tree loading tended to jump from IDs directly to fully hydrated structures, which couples traversal and parsing too tightly. 13 + 14 + ## Current State 15 + 16 + Current modules and capabilities: 17 + 18 + - Index graph and builder in [`/src/index.rs`](/src/index.rs) 19 + - Staged materializer methods in [`/src/materializer.rs`](/src/materializer.rs) 20 + - Loader wrappers in [`/src/loader.rs`](/src/loader.rs) 21 + - Reader/listing primitives in [`/src/storage/reader.rs`](/src/storage/reader.rs) 22 + - Public exports in [`/src/lib.rs`](/src/lib.rs) 23 + 24 + Recent progress: 25 + 26 + - Flow decomposition methods exist (`plan_*`, `run_*`) and are configurable with Bon builder options. 27 + - Tree assembly is more modular than before. 28 + 29 + Remaining gaps: 30 + 31 + 1. Tree API contracts are still mixed between reference and hydrated output semantics. 32 + 2. Projection choices are not explicit enough for consumer intent. 33 + 3. Reporting/diagnostics for partial graph construction is under-specified. 34 + 35 + ## Target Tree Model 36 + 37 + Use a three-shape model with explicit boundaries: 38 + 39 + 1. **Structure Tree**: IDs + relationships only. 40 + 2. **Reference Tree**: structure + mapped spans for payload leaves. 41 + 3. **Hydrated Tree**: reference tree plus parsed objects. 42 + 43 + ```mermaid 44 + flowchart LR 45 + Index[SessionIndex] --> StructureTree[StructureTree] 46 + StructureTree --> ReferenceTree[ReferenceTree] 47 + ReferenceTree --> HydratedTree[HydratedTree] 48 + ``` 49 + 50 + ### Why three shapes? 51 + 52 + - Structure tree is cheapest for navigation/search/filter. 53 + - Reference tree is the zero-copy contract boundary. 54 + - Hydrated tree is convenience for clients that want full structs. 55 + 56 + ## Draft Types 57 + 58 + ### Structure level 59 + 60 + ```rust 61 + pub struct SessionNode { 62 + pub session_id: SessionId, 63 + pub project_id: String, 64 + pub message_ids: Vec<MessageId>, 65 + } 66 + 67 + pub struct MessageNode { 68 + pub message_id: MessageId, 69 + pub session_id: SessionId, 70 + pub part_ids: Vec<PartId>, 71 + } 72 + ``` 73 + 74 + ### Reference level 75 + 76 + ```rust 77 + pub struct SessionRefNode { 78 + pub key: SessionKey, 79 + pub span: MappedSpan, 80 + pub messages: Vec<MessageRefNode>, 81 + } 82 + 83 + pub struct MessageRefNode { 84 + pub key: MessageKey, 85 + pub span: MappedSpan, 86 + pub parts: Vec<PartRefNode>, 87 + } 88 + 89 + pub struct PartRefNode { 90 + pub key: PartKey, 91 + pub kind: PartKind, 92 + pub span: MappedSpan, 93 + } 94 + ``` 95 + 96 + ### Hydrated level 97 + 98 + ```rust 99 + pub struct SessionHydratedNode { 100 + pub info: SessionInfo, 101 + pub messages: Vec<MessageHydratedNode>, 102 + } 103 + 104 + pub struct MessageHydratedNode { 105 + pub message: Message, 106 + pub parts: Vec<Part>, 107 + } 108 + ``` 109 + 110 + ## Draft Flow Pipeline 111 + 112 + ### Stage 1: Plan 113 + 114 + Build deterministic scopes from options and index. 115 + 116 + - input: flow options 117 + - output: planned session/message/part IDs 118 + 119 + ### Stage 2: Resolve references 120 + 121 + Convert planned IDs into mapped spans. 122 + 123 + - input: scopes 124 + - output: reference tree 125 + 126 + ### Stage 3: Optional hydrate 127 + 128 + Parse selected references into typed structs. 129 + 130 + - input: reference tree 131 + - output: hydrated tree or partial hydrated projection 132 + 133 + ## Key Design Choices 134 + 135 + ### 1) IDs remain the graph spine 136 + 137 + Decision: 138 + 139 + - All internal maps and joins remain keyed by typed IDs. 140 + 141 + Why: 142 + 143 + - Matches opencode storage relationships. 144 + - Keeps tree planning independent from payload parse cost. 145 + 146 + ### 2) Deterministic ordering is mandatory 147 + 148 + Decision: 149 + 150 + - Externally visible tree vectors are ordered consistently: 151 + - sessions: descending session ID 152 + - messages: ascending message ID 153 + - parts: ascending part ID 154 + 155 + Why: 156 + 157 + - Stable behavior for caches, pagination, and tests. 158 + 159 + ### 3) Stage outputs are first-class types 160 + 161 + Decision: 162 + 163 + - Do not collapse plan/resolve/hydrate into one opaque return type. 164 + 165 + Why: 166 + 167 + - Consumers can opt into only the stage they need. 168 + - Easier observability and diagnostics per stage. 169 + 170 + ### 4) Flow options use Bon for ergonomics 171 + 172 + Decision: 173 + 174 + - Continue using Bon builder for flow configuration. 175 + 176 + Why: 177 + 178 + - Keeps options explicit and discoverable. 179 + - Avoids telescoping constructors as options grow. 180 + 181 + ### 5) Tree assembly should not hide errors 182 + 183 + Decision: 184 + 185 + - Return typed reports for skipped nodes and failed hydrations. 186 + 187 + Why: 188 + 189 + - Consumers need policy control (strict/fail-fast vs tolerant/partial). 190 + 191 + ## Consumer-Facing API Draft 192 + 193 + Potential high-level methods on [`/src/materializer.rs`](/src/materializer.rs): 194 + 195 + - `plan_session_tree(options) -> SessionPlan` 196 + - `resolve_session_tree(plan) -> SessionRefTree` 197 + - `hydrate_session_tree(ref_tree) -> SessionHydratedTree` 198 + - `hydrate_message_nodes(ref_tree, ids) -> PartialHydratedMessages` 199 + 200 + This naming makes stage boundaries explicit and testable. 201 + 202 + ## Diagnostics and Reports 203 + 204 + Add reports for each stage: 205 + 206 + - `PlanReport` (counts, truncated by limits) 207 + - `ResolveReport` (missing files, invalid references) 208 + - `HydrateReport` (parse failures by key/path) 209 + 210 + Each report should include: 211 + 212 + - stage name 213 + - total attempted 214 + - total succeeded 215 + - skipped/failure entries with typed reason 216 + 217 + ## Acceptance Criteria 218 + 219 + 1. Tree APIs expose plan/resolve/hydrate as separate public methods. 220 + 2. Reference tree leaves are mmap spans (no implicit parse in resolve stage). 221 + 3. Hydrated helpers are wrappers over resolve + hydrate stages. 222 + 4. Deterministic ordering is enforced and tested. 223 + 5. Stage reports are available for strict and tolerant consumer policies. 224 + 225 + ## Migration Strategy 226 + 227 + 1. Introduce new stage types/methods in parallel with existing convenience methods. 228 + 2. Re-implement convenience methods on top of staged APIs. 229 + 3. Mark old ambiguous methods for cleanup in next breaking pass. 230 + 4. Update [`/README.md`](/README.md) examples to prefer staged flow. 231 + 232 + ## Out of Scope for This Tree Pass 233 + 234 + - Durable CDC replay log. 235 + - Full watch backend implementation. 236 + - Rustdoc-heavy narrative documentation (to follow after contracts settle).
+219
doc/discovery/regenesis-zerocopy.md
··· 1 + # Regenesis Zero-Copy: Mmap-First Tree Leaves 2 + 3 + > This document formalizes a strict zero-copy direction for `opencode-session-rs`: every leaf returned by tree/projection APIs must be an addressable region in a memory-mapped file. 4 + 5 + ## Problem 6 + 7 + The project goal has always been memory-aware session materialization: let the kernel manage hot/cold file pages and reclaim them under pressure. The current implementation partially achieves this but still allocates eagerly in key paths. 8 + 9 + Required invariant going forward: 10 + 11 + - Any tree leaf representing a session/message/part payload is a file mapping reference (`Arc<MappedFile>`) plus byte range. 12 + - Parsing into owned structs is an explicit opt-in step, not the default tree assembly path. 13 + 14 + ## Current State 15 + 16 + Current implementation status: 17 + 18 + - Uses `mmap` via [`/src/storage/mmap.rs`](/src/storage/mmap.rs). 19 + - Uses typed parse via `serde_json::from_slice` in [`/src/storage/reader.rs`](/src/storage/reader.rs). 20 + - Builds index metadata in [`/src/index.rs`](/src/index.rs). 21 + - Exposes staged flow controls in [`/src/materializer.rs`](/src/materializer.rs) with Bon-based options. 22 + 23 + What is good now: 24 + 25 + 1. File bytes are mmap-backed, so the OS can evict clean pages. 26 + 2. Mmap cache is shared by `Arc`, avoiding duplicate mappings. 27 + 3. Flow decomposition allows more selective loading than before. 28 + 29 + What is still not aligned with strict zero-copy: 30 + 31 + 1. Typed reads eagerly deserialize JSON into owned heap structs. 32 + 2. Index building still parses entities that could remain references. 33 + 3. Public flow output currently returns hydrated objects (`LoadedSession`, `MessageWithParts`) by default. 34 + 35 + ## Target Model 36 + 37 + ### Principle 38 + 39 + Build and return reference trees first. Hydration is layered on top. 40 + 41 + ```mermaid 42 + flowchart TD 43 + Paths[StoragePaths] --> Index[Index relationships by IDs] 44 + Index --> RefTree[Reference tree] 45 + RefTree --> SpanLeaf[MappedSpan leaves] 46 + SpanLeaf --> ParseOnDemand[Optional parse adapters] 47 + ``` 48 + 49 + ### Core leaf type 50 + 51 + ```rust 52 + pub struct MappedSpan { 53 + pub file: Arc<MappedFile>, 54 + pub offset: usize, 55 + pub len: usize, 56 + } 57 + 58 + impl MappedSpan { 59 + pub fn as_bytes(&self) -> &[u8] { 60 + &self.file.as_bytes()[self.offset..self.offset + self.len] 61 + } 62 + } 63 + ``` 64 + 65 + For current opencode layout (one JSON entity per file), most spans start as full-file spans: 66 + 67 + - `offset = 0` 68 + - `len = file.len()` 69 + 70 + This still satisfies the contract that leaves are explicit file addresses. 71 + 72 + ## Draft API Plan 73 + 74 + ### A) Add reference-first reader APIs 75 + 76 + Files: 77 + 78 + - [`/src/storage/reader.rs`](/src/storage/reader.rs) 79 + 80 + Add: 81 + 82 + - `read_span(path) -> Result<MappedSpan>` 83 + - `read_session_span(project_id, session_id)` 84 + - `read_message_span(session_id, message_id)` 85 + - `read_part_span(message_id, part_id)` 86 + 87 + Keep parse helpers as adapters: 88 + 89 + - `parse_span<T>(&MappedSpan) -> Result<T>` 90 + 91 + ### B) Add reference tree projection types 92 + 93 + Files: 94 + 95 + - [`/src/materializer.rs`](/src/materializer.rs) 96 + - (new) `/src/materializer/projection.rs` (or same module initially) 97 + 98 + Add: 99 + 100 + - `SessionRefLeaf { key, span }` 101 + - `MessageRefLeaf { key, span, part_refs }` 102 + - `PartRefLeaf { key, kind, span }` 103 + - `SessionRefTree { session, messages }` 104 + 105 + ### C) Make decomposed flow return ref trees by default 106 + 107 + Files: 108 + 109 + - [`/src/materializer.rs`](/src/materializer.rs) 110 + 111 + Adjust: 112 + 113 + - `run_session_flow` returns a ref-first result. 114 + - Provide explicit hydration adapters: 115 + - `hydrate_session_info(&SessionRefLeaf)` 116 + - `hydrate_message(&MessageRefLeaf)` 117 + - `hydrate_part(&PartRefLeaf)` 118 + 119 + ### D) Preserve hydrated convenience API as wrappers 120 + 121 + Files: 122 + 123 + - [`/src/materializer.rs`](/src/materializer.rs) 124 + 125 + Hydrated methods remain, but implemented as wrappers over ref flow + hydrate. 126 + 127 + ## Key Design Choices 128 + 129 + ### 1) Full-file spans now, subspans later 130 + 131 + Decision: 132 + 133 + - Start with full-file spans for all entities. 134 + 135 + Why: 136 + 137 + - Matches current on-disk format. 138 + - Guarantees minimal complexity for first strict-zero-copy pass. 139 + - Leaves room for future structural indexing/subspans if needed. 140 + 141 + ### 2) No implicit parse during tree assembly 142 + 143 + Decision: 144 + 145 + - Reference-tree assembly never deserializes JSON. 146 + 147 + Why: 148 + 149 + - Keeps memory profile predictable. 150 + - Ensures the OS, not heap ownership, controls most payload memory pressure. 151 + 152 + ### 3) Metadata remains index-resident 153 + 154 + Decision: 155 + 156 + - Keep small identity/relationship metadata resident (`SessionId`, `MessageId`, counts, ordering). 157 + 158 + Why: 159 + 160 + - Structural queries must stay fast and stable. 161 + - This metadata footprint is tiny compared to full payload hydration. 162 + 163 + ### 4) Cache lifecycle supports page reclamation 164 + 165 + Decision: 166 + 167 + - Keep mmap cache bounded/maintained (`prune_unused`, optional capacity policy). 168 + 169 + Why: 170 + 171 + - Arc retention determines mapping lifetime in-process. 172 + - Kernel page eviction works best when dead mappings are also removable from cache. 173 + 174 + ## Operational Behavior 175 + 176 + ### Memory behavior expectations 177 + 178 + With the target model: 179 + 180 + 1. Tree build/load mostly allocates IDs and small vectors/maps. 181 + 2. Payload bytes remain file-backed mmap pages. 182 + 3. Pages can be reclaimed by the OS and faulted back when re-accessed. 183 + 4. Hydration allocates heap only for explicitly requested entities. 184 + 185 + ### Failure behavior 186 + 187 + Reference phase failures should surface by file/key, not generic parse errors. 188 + 189 + Hydration phase failures should include entity key + path + parse context. 190 + 191 + ## Acceptance Criteria 192 + 193 + 1. A full session tree projection can be built without deserializing entity payloads. 194 + 2. All payload leaves in that tree expose `MappedSpan` addresses. 195 + 3. Existing hydrated APIs remain available as wrappers over the reference flow. 196 + 4. Integration tests prove memory-mapped references remain valid across staged operations. 197 + 5. No hidden parse path appears in reference tree code paths. 198 + 199 + ## Rollout Plan 200 + 201 + 1. Add `MappedSpan` and span readers in [`/src/storage/reader.rs`](/src/storage/reader.rs). 202 + 2. Add reference leaf/tree types in materializer module. 203 + 3. Switch flow planner/executor to ref-first results. 204 + 4. Re-implement hydrated calls as adapters. 205 + 5. Add fixtures and tests for zero-copy tree behavior. 206 + 207 + ## Risks and Mitigations 208 + 209 + Risk: API confusion between ref and hydrated paths. 210 + 211 + - Mitigation: clear naming (`*_ref_*`, `hydrate_*`) and explicit return types. 212 + 213 + Risk: retained `Arc<MappedFile>` values can keep too many mappings alive. 214 + 215 + - Mitigation: add periodic pruning and bounded cache policy knobs. 216 + 217 + Risk: consumers assume borrowed lifetimes from parsed structs. 218 + 219 + - Mitigation: avoid borrow-heavy parse API in v1; return owned parse outputs from explicit hydrate steps.
+266
doc/discovery/regenesis.md
··· 1 + # Regenesis: API Refinement Plan After Initial Implementation 2 + 3 + > This document defines the next refactor pass for `opencode-session-rs` after the first major API reshape, before writing full API docs. 4 + 5 + ## Context 6 + 7 + The project has moved from initial research and architecture design into a working implementation with a cleaner, more composable API surface. 8 + 9 + Primary design references: 10 + 11 + - [`/doc/discovery/opencode-session.md`](/doc/discovery/opencode-session.md) 12 + - [`/doc/discovery/genesis.md`](/doc/discovery/genesis.md) 13 + - [`/doc/discovery/breakdown.md`](/doc/discovery/breakdown.md) 14 + - [`/doc/discovery/watchman.md`](/doc/discovery/watchman.md) 15 + 16 + Recent implementation references: 17 + 18 + - ergonomic decomposition commit: `db417d8954c86acc1fb4dca74a506d09c1a44efe` 19 + - brainstorm/analysis commit: `370920960456d08dfac15cfd22bc9e53bc8e59cd` 20 + - latest reshape commit: `b7cbef06` 21 + 22 + ## Current Shape (Post-Refactor) 23 + 24 + The crate now has three clear layers: 25 + 26 + 1. `storage` layer for path resolution, mapped files, and typed reads 27 + 2. `index` layer for metadata graph and relationship navigation 28 + 3. `materializer` layer for high-level, ID-driven loading and orchestration 29 + 30 + This is a strong direction, but we still need a final pass to formalize contracts and event surfaces before documenting the API as stable-for-now. 31 + 32 + ## Regenesis Goals 33 + 34 + 1. **Formal contracts**: make failure behavior and lookup guarantees explicit. 35 + 2. **Composable flows**: expose staged methods so consumers can run only the pieces they need. 36 + 3. **Structured change surface**: define CDC/event types that align with watch integration plans. 37 + 4. **Deterministic behavior**: ensure stable ordering and predictable filtering semantics. 38 + 5. **Tested guarantees**: add integration tests around malformed/missing data and partial corruption. 39 + 40 + ## Guiding Principles 41 + 42 + - Keep domain-grouped modules; avoid flat API sprawl. 43 + - Prefer explicit return types over hidden side effects. 44 + - Let failures surface with enough structure for consumer policy decisions. 45 + - Separate identity keys (session/message/part IDs) from ordering/version keys (generation + sequence). 46 + - Preserve snapshot semantics for in-flight reads where possible. 47 + 48 + ## Architecture Target 49 + 50 + ```mermaid 51 + flowchart TD 52 + Storage[storage module] --> IndexBuild[index builder] 53 + IndexBuild --> IndexView[index query view] 54 + IndexView --> MaterializerFlow[materializer staged flow] 55 + 56 + WatchSource[watch source] --> EventClassifier[event classifier] 57 + EventClassifier --> ChangeHub[change hub] 58 + ChangeHub --> SessionStream[session updates stream] 59 + ChangeHub --> EntityStream[entity CDC stream] 60 + 61 + MaterializerFlow --> ChangeHub 62 + ``` 63 + 64 + ## Proposed Next-Pass Work 65 + 66 + ### 1) Index Contract Formalization 67 + 68 + Files: 69 + 70 + - [`/src/index.rs`](/src/index.rs) 71 + - [`/src/materializer.rs`](/src/materializer.rs) 72 + 73 + Changes: 74 + 75 + - Introduce explicit build outcomes for skipped entities (for example: unreadable JSON, invalid ID, missing linkage). 76 + - Replace silent `Err(_) => false` style paths with structured skip reasons. 77 + - Add a `BuildReport` returned by builder runs, including counters and skipped-item diagnostics. 78 + 79 + Desired effect: 80 + 81 + - Consumers can decide whether to tolerate partial indexes or fail fast. 82 + 83 + ### 2) Flow Decomposition API 84 + 85 + Files: 86 + 87 + - [`/src/materializer.rs`](/src/materializer.rs) 88 + - [`/src/loader.rs`](/src/loader.rs) 89 + 90 + Changes: 91 + 92 + - Add staged methods that mirror common user workflows: 93 + - resolve IDs 94 + - resolve metadata 95 + - load payloads 96 + - assemble projections 97 + - Provide projection structs for common bundles (for example: session + message headers, message + part summaries). 98 + 99 + Desired effect: 100 + 101 + - Libraries can stop at the cheapest useful stage and avoid pulling full trees by default. 102 + 103 + ### 3) CDC/Event Type Formalization 104 + 105 + Files: 106 + 107 + - [`/src/watch.rs`](/src/watch.rs) 108 + - (new) `/src/change/mod.rs` 109 + - (new) `/src/change/event.rs` 110 + - (new) `/src/change/stream.rs` 111 + 112 + Changes: 113 + 114 + - Introduce a stable event envelope with: 115 + - `EventCursor { generation, seq_in_generation }` 116 + - `ChangeEntity` keys 117 + - `ChangeOp` verbs 118 + - Add two public stream surfaces: 119 + - low-level entity CDC stream 120 + - session projection update stream 121 + 122 + Desired effect: 123 + 124 + - Downstream libraries receive structured updates instead of inferring change meaning from raw paths. 125 + 126 + ### 4) Ordering and Determinism Sweep 127 + 128 + Files: 129 + 130 + - [`/src/storage/reader.rs`](/src/storage/reader.rs) 131 + - [`/src/storage/paths.rs`](/src/storage/paths.rs) 132 + - [`/src/index.rs`](/src/index.rs) 133 + 134 + Changes: 135 + 136 + - Ensure all externally-visible iteration and list APIs are explicitly ordered and documented. 137 + - Add tests asserting order stability across repeated scans. 138 + 139 + Desired effect: 140 + 141 + - Consumer behavior is reproducible and easier to cache. 142 + 143 + ### 5) Integration Test Fixtures 144 + 145 + Files: 146 + 147 + - (new) `/tests/fixtures/...` 148 + - (new) `/tests/index_build.rs` 149 + - (new) `/tests/materializer_flow.rs` 150 + - (new) `/tests/corruption_policy.rs` 151 + 152 + Changes: 153 + 154 + - Add fixture trees for valid, partial, and corrupted storage states. 155 + - Validate index/report behavior and staged flow behavior. 156 + 157 + Desired effect: 158 + 159 + - Refactors become safer and API contracts are enforced by tests. 160 + 161 + ## Proposed Domain Grouping (Next Structure) 162 + 163 + ```text 164 + src/ 165 + core/ 166 + error.rs 167 + id.rs 168 + storage/ 169 + paths.rs 170 + mmap.rs 171 + reader.rs 172 + index/ 173 + model.rs 174 + builder.rs 175 + query.rs 176 + report.rs 177 + materialize/ 178 + session.rs 179 + flow.rs 180 + projection.rs 181 + change/ 182 + event.rs 183 + stream.rs 184 + backend.rs 185 + types/ 186 + session.rs 187 + message.rs 188 + part.rs 189 + ``` 190 + 191 + Notes: 192 + 193 + - This keeps responsibilities grouped by domain, not by technical utility alone. 194 + - Breaking changes are acceptable while crate version is `<1.0`. 195 + 196 + ## API Contract Draft (Before Docs) 197 + 198 + ### Identity keys 199 + 200 + - Session: `(project_id, session_id)` 201 + - Message: `(session_id, message_id)` 202 + - Part: `(message_id, part_id)` 203 + 204 + ### Version/order keys 205 + 206 + - `generation` (batch-level ordering boundary) 207 + - `seq_in_generation` (within-batch ordering) 208 + 209 + ### Error policy 210 + 211 + - Parsing and linkage issues should be represented as typed errors or skip reports. 212 + - Avoid collapsing distinct failure modes into generic `NotFound` where context is available. 213 + 214 + ## Acceptance Criteria for This Regenesis Pass 215 + 216 + 1. Builder returns an explicit report for indexed + skipped entities. 217 + 2. Materializer exposes staged flow methods without requiring full tree assembly. 218 + 3. Change/event envelope and stream traits are public and test-covered. 219 + 4. Public list/iteration APIs document and enforce deterministic ordering. 220 + 5. Integration tests cover valid, partial, and corrupted storage trees. 221 + 6. README usage examples align with final staged API naming. 222 + 223 + ## Out of Scope (This Pass) 224 + 225 + - Full watchman runtime implementation details. 226 + - Durable CDC event log persistence. 227 + - Full API docs text and exhaustive rustdoc examples. 228 + 229 + Those come after this pass completes. 230 + 231 + ## Post-Flow-Decomposition Next Options 232 + 233 + Flow decomposition is now implemented with staged planning/execution APIs and Bon-based options construction. The next options to pursue before API docs are: 234 + 235 + ### Option A: Flow and Index Diagnostics 236 + 237 + Scope: 238 + 239 + - add `FlowReport` and `BuildReport` outputs 240 + - record indexed counts, skipped counts, and structured skip reasons 241 + 242 + Value: 243 + 244 + - consumers can choose strict vs tolerant policies without guessing from partial results 245 + 246 + ### Option B: Integration Fixtures for Partial/Corrupt Trees 247 + 248 + Scope: 249 + 250 + - add fixture-backed integration tests for valid, partial, and corrupted storage states 251 + - assert behavior of `include_*` flags and `message_limit` / `part_limit_per_message` 252 + 253 + Value: 254 + 255 + - locks in behavior under real-world filesystem drift and malformed JSON 256 + 257 + ### Option C: Typed Projection Outputs to Reduce Overfetch 258 + 259 + Scope: 260 + 261 + - add projection structs for metadata-only and partially hydrated reads 262 + - make projection-building methods explicit in materializer flow API 263 + 264 + Value: 265 + 266 + - consumer libraries can avoid full-tree loads while keeping ergonomic, typed responses
+4 -1
src/lib.rs
··· 15 15 MessageMeta, MessageRole, PartKind, PartRef, SessionIndex, SessionIndexBuilder, SessionMeta, 16 16 }; 17 17 pub use loader::{LoadedSession, MessageWithParts, SessionLoader, SessionTree}; 18 - pub use materializer::{SessionMaterializer, Stats as MaterializerStats}; 18 + pub use materializer::{ 19 + MessageFlowScope, SessionFlowOptions, SessionFlowResult, SessionFlowScope, SessionMaterializer, 20 + Stats as MaterializerStats, 21 + }; 19 22 pub use storage::{FileReader, MappedFile, MappedFileCache, StoragePaths}; 20 23 pub use types::{ 21 24 message::{AssistantMessage, FileDiff, Message, UserMessage},
+174 -7
src/materializer.rs
··· 4 4 use crate::storage::{FileReader, MappedFile, StoragePaths}; 5 5 use crate::types::{Message, Part, SessionInfo}; 6 6 use crate::{Error, Result}; 7 + use bon::Builder; 7 8 use std::path::Path; 8 9 use std::sync::Arc; 9 10 10 11 pub struct SessionMaterializer { 11 12 reader: FileReader, 12 13 index: SessionIndex, 14 + } 15 + 16 + #[derive(Debug, Clone, Builder)] 17 + pub struct SessionFlowOptions { 18 + pub session_id: SessionId, 19 + #[builder(default = true)] 20 + pub include_diff: bool, 21 + #[builder(default = true)] 22 + pub include_messages: bool, 23 + #[builder(default = true)] 24 + pub include_parts: bool, 25 + pub message_limit: Option<usize>, 26 + pub part_limit_per_message: Option<usize>, 27 + } 28 + 29 + #[derive(Debug, Clone)] 30 + pub struct SessionFlowScope { 31 + pub session_id: SessionId, 32 + pub project_id: String, 33 + pub message_ids: Vec<MessageId>, 34 + } 35 + 36 + #[derive(Debug, Clone)] 37 + pub struct MessageFlowScope { 38 + pub session_id: SessionId, 39 + pub message_id: MessageId, 40 + pub part_ids: Vec<PartId>, 41 + } 42 + 43 + #[derive(Debug, Clone)] 44 + pub struct SessionFlowResult { 45 + pub scope: SessionFlowScope, 46 + pub message_scopes: Vec<MessageFlowScope>, 47 + pub session: LoadedSession, 48 + pub messages: Vec<MessageWithParts>, 13 49 } 14 50 15 51 impl SessionMaterializer { ··· 92 128 } 93 129 94 130 pub fn load_session(&self, session_id: &SessionId) -> Result<LoadedSession> { 131 + self.load_session_with_diff_policy(session_id, true) 132 + } 133 + 134 + pub fn load_session_with_diff_policy( 135 + &self, 136 + session_id: &SessionId, 137 + include_diff: bool, 138 + ) -> Result<LoadedSession> { 95 139 let info = self.load_session_info(session_id)?; 96 - let diff = self.reader.read_diff(session_id).ok(); 140 + let diff = if include_diff { 141 + self.reader.read_diff(session_id).ok() 142 + } else { 143 + None 144 + }; 97 145 Ok(LoadedSession { info, diff }) 98 146 } 99 147 ··· 109 157 110 158 pub fn load_parts_for_message(&self, message_id: &MessageId) -> Result<Vec<Part>> { 111 159 let part_ids = self.part_ids_for_message(message_id); 160 + self.load_parts_by_ids(part_ids) 161 + } 162 + 163 + pub fn load_parts_by_ids(&self, part_ids: &[PartId]) -> Result<Vec<Part>> { 112 164 let mut parts = Vec::with_capacity(part_ids.len()); 113 165 for part_id in part_ids { 114 166 parts.push(self.load_part(part_id)?); ··· 117 169 } 118 170 119 171 pub fn load_message_with_parts(&self, message_id: &MessageId) -> Result<MessageWithParts> { 172 + let part_ids = self.part_ids_for_message(message_id); 173 + self.load_message_with_part_ids(message_id, part_ids) 174 + } 175 + 176 + pub fn load_message_with_part_ids( 177 + &self, 178 + message_id: &MessageId, 179 + part_ids: &[PartId], 180 + ) -> Result<MessageWithParts> { 120 181 let message = self.load_message(message_id)?; 121 - let parts = self.load_parts_for_message(message_id)?; 182 + let parts = self.load_parts_by_ids(part_ids)?; 122 183 Ok(MessageWithParts { message, parts }) 123 184 } 124 185 186 + pub fn load_messages_by_ids(&self, message_ids: &[MessageId]) -> Result<Vec<Message>> { 187 + let mut messages = Vec::with_capacity(message_ids.len()); 188 + for message_id in message_ids { 189 + messages.push(self.load_message(message_id)?); 190 + } 191 + Ok(messages) 192 + } 193 + 194 + pub fn load_messages_with_scopes( 195 + &self, 196 + message_scopes: &[MessageFlowScope], 197 + ) -> Result<Vec<MessageWithParts>> { 198 + let mut messages = Vec::with_capacity(message_scopes.len()); 199 + for scope in message_scopes { 200 + messages.push(self.load_message_with_part_ids(&scope.message_id, &scope.part_ids)?); 201 + } 202 + Ok(messages) 203 + } 204 + 125 205 pub fn load_messages_with_parts_for_session( 126 206 &self, 127 207 session_id: &SessionId, 128 208 ) -> Result<Vec<MessageWithParts>> { 129 209 let message_ids = self.message_ids_for_session(session_id); 130 - let mut messages = Vec::with_capacity(message_ids.len()); 210 + let mut message_scopes = Vec::with_capacity(message_ids.len()); 211 + 131 212 for message_id in message_ids { 132 - messages.push(self.load_message_with_parts(message_id)?); 213 + message_scopes.push(MessageFlowScope { 214 + session_id: session_id.clone(), 215 + message_id: message_id.clone(), 216 + part_ids: self.part_ids_for_message(message_id).to_vec(), 217 + }); 133 218 } 134 - Ok(messages) 219 + 220 + self.load_messages_with_scopes(&message_scopes) 135 221 } 136 222 137 223 pub fn load_session_tree(&self, session_id: &SessionId) -> Result<SessionTree> { 138 - let session = self.load_session(session_id)?; 139 - let messages = self.load_messages_with_parts_for_session(session_id)?; 224 + let flow = self.run_session_flow( 225 + &SessionFlowOptions::builder() 226 + .session_id(session_id.clone()) 227 + .build(), 228 + )?; 229 + 230 + let session = flow.session; 231 + let messages = flow.messages; 140 232 Ok(SessionTree { session, messages }) 141 233 } 142 234 235 + pub fn plan_session_flow(&self, options: &SessionFlowOptions) -> Result<SessionFlowScope> { 236 + let meta = self.require_session_meta(&options.session_id)?; 237 + 238 + let mut message_ids = self.message_ids_for_session(&options.session_id).to_vec(); 239 + if let Some(limit) = options.message_limit { 240 + message_ids.truncate(limit); 241 + } 242 + 243 + Ok(SessionFlowScope { 244 + session_id: options.session_id.clone(), 245 + project_id: meta.project_id.clone(), 246 + message_ids, 247 + }) 248 + } 249 + 250 + pub fn plan_message_flows( 251 + &self, 252 + options: &SessionFlowOptions, 253 + scope: &SessionFlowScope, 254 + ) -> Result<Vec<MessageFlowScope>> { 255 + if !options.include_messages { 256 + return Ok(Vec::new()); 257 + } 258 + 259 + let mut message_scopes = Vec::with_capacity(scope.message_ids.len()); 260 + for message_id in &scope.message_ids { 261 + self.require_message_meta(message_id)?; 262 + let mut part_ids = if options.include_parts { 263 + self.part_ids_for_message(message_id).to_vec() 264 + } else { 265 + Vec::new() 266 + }; 267 + 268 + if let Some(limit) = options.part_limit_per_message { 269 + part_ids.truncate(limit); 270 + } 271 + 272 + message_scopes.push(MessageFlowScope { 273 + session_id: scope.session_id.clone(), 274 + message_id: message_id.clone(), 275 + part_ids, 276 + }); 277 + } 278 + 279 + Ok(message_scopes) 280 + } 281 + 282 + pub fn run_session_flow(&self, options: &SessionFlowOptions) -> Result<SessionFlowResult> { 283 + let scope = self.plan_session_flow(options)?; 284 + let session = 285 + self.load_session_with_diff_policy(&scope.session_id, options.include_diff)?; 286 + let message_scopes = self.plan_message_flows(options, &scope)?; 287 + let messages = self.load_messages_with_scopes(&message_scopes)?; 288 + 289 + Ok(SessionFlowResult { 290 + scope, 291 + message_scopes, 292 + session, 293 + messages, 294 + }) 295 + } 296 + 143 297 pub fn session_metas_created_since(&self, since: i64) -> Vec<&SessionMeta> { 144 298 self.index 145 299 .session_metas() ··· 204 358 #[cfg(test)] 205 359 mod tests { 206 360 use super::*; 361 + use crate::SessionId; 207 362 208 363 #[test] 209 364 fn test_stats() { ··· 215 370 cached_files: 3, 216 371 }; 217 372 assert_eq!(stats.sessions, 10); 373 + } 374 + 375 + #[test] 376 + fn test_flow_options_builder_defaults() { 377 + let session_id = SessionId::new("ses_3975b29b7ffeDyjus9LjxKUoeX").unwrap(); 378 + let options = SessionFlowOptions::builder().session_id(session_id).build(); 379 + 380 + assert!(options.include_diff); 381 + assert!(options.include_messages); 382 + assert!(options.include_parts); 383 + assert_eq!(options.message_limit, None); 384 + assert_eq!(options.part_limit_per_message, None); 218 385 } 219 386 }