Editor for papermario-dx mods
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

improve render performance greatly

+527 -296
+1 -1
CONTRIBUTING.md
··· 34 34 - Write unit tests for all public functions containing business logic, data transformations, or state management. They go in a `#[cfg(test)] mod tests` at the bottom of each file. GUI/rendering code does not require unit tests. 35 35 - Write integration tests using `egui_kittest` for GUI code. Binary crates (like `kammy`) cannot use the `tests/` directory at the crate root because there is no library target to import. Instead, use a `src/tests.rs` module gated behind `#[cfg(test)]`, with submodules in `src/tests/` organized by feature (e.g. `src/tests/undo.rs`). Shared test utilities go in `src/tests.rs`. Library crates should use the standard `tests/` directory at the crate root. 36 36 - Prefer module-level inner doc comments (`//!`) at the top of a file over outer doc comments (`///`) on the `mod` declaration. This keeps the documentation next to the code it describes. 37 - - Avoid just `#[expect]` or `#[allow]`ing lines. The checks are there for a reason. For example, `as` should usually be `.into()`. 37 + - Avoid just `#[expect]` or `#[allow]`ing lines. The checks are there for a reason. For example, `as` should usually be `.into()`, or `.try_into()?`. 38 38 39 39 ## Error handling 40 40
+1
Cargo.lock
··· 3128 3128 dependencies = [ 3129 3129 "loro", 3130 3130 "loroscope", 3131 + "parallel_rdp", 3131 3132 "rsp", 3132 3133 ] 3133 3134
+8 -1
Cargo.toml
··· 36 36 suspicious = { level = "warn", priority = -1 } 37 37 perf = { level = "warn", priority = -1 } 38 38 39 - must_use_candidate = "allow" 40 39 redundant_pub_crate = "deny" 40 + 41 + # Allows 42 + many_single_char_names = "allow" 43 + must_use_candidate = "allow" 44 + too_many_lines = "allow" 41 45 42 46 # Don't panic 43 47 unwrap_used = "deny" ··· 99 103 use_self = "deny" 100 104 needless_collect = "warn" 101 105 branches_sharing_code = "warn" 106 + 107 + [profile.dev.package.rsp] 108 + opt-level = 2
+1
crates/kammy/Cargo.toml
··· 10 10 [dependencies] 11 11 parallel_rdp = { path = "../parallel_rdp" } 12 12 pm64 = { path = "../pm64" } 13 + 13 14 loroscope = { path = "../loroscope" } 14 15 loro = { workspace = true } 15 16 winit = { workspace = true }
+5 -15
crates/kammy/src/app.rs
··· 9 9 10 10 use crate::Project; 11 11 use crate::dock::{Dock, DockPosition}; 12 - use crate::editor::display_list::DisplayListEditor; 13 12 use crate::editor::map::MapEditor; 14 13 use crate::editor::todo::TodoEditor; 15 14 use crate::editor::{Editor, EditorId, Inspect, TileBehavior, UndoBehavior}; ··· 18 17 use crate::tool::assets::AssetsTool; 19 18 use crate::tool::hierarchy::HierarchyTool; 20 19 use crate::tool::inspector::InspectorTool; 20 + 21 + /// Callback for initialising CRDT data when a new editor is added. 22 + type SetupCrdt = dyn Fn(EditorId, &Project); 21 23 22 24 /// The main application, managing a tabbed editor tree with per-tab undo 23 25 /// and collapsible tool docks. ··· 135 137 fn add_editor( 136 138 &mut self, 137 139 make_editor: impl FnOnce(EditorId) -> Box<dyn Editor>, 138 - setup_crdt: Option<&dyn Fn(EditorId, &Project)>, 140 + setup_crdt: Option<&SetupCrdt>, 139 141 ) { 140 142 let editor_id = self.alloc_editor_id(); 141 143 ··· 186 188 project.doc().commit(); 187 189 }), 188 190 ); 189 - } 190 - 191 - fn add_display_list_editor(&mut self) { 192 - self.add_editor(|id| Box::new(DisplayListEditor::new(id)), None); 193 191 } 194 192 195 193 fn add_map_editor(&mut self) { ··· 345 343 if ui.button("+ Todo").clicked() { 346 344 self.add_todo_editor(); 347 345 } 348 - if ui.button("+ Display List").clicked() { 349 - self.add_display_list_editor(); 350 - } 351 346 if ui.button("+ Map").clicked() { 352 347 self.add_map_editor(); 353 348 } ··· 394 389 // Destructure for disjoint borrows 395 390 let Self { 396 391 project, 397 - active_editor_id, 398 392 inspect, 399 393 left_dock, 400 394 right_dock, ··· 402 396 .. 403 397 } = self; 404 398 405 - let mut tool_ctx = ToolContext { 406 - project, 407 - active_editor_id: *active_editor_id, 408 - inspect, 409 - }; 399 + let mut tool_ctx = ToolContext { inspect }; 410 400 411 401 bottom_dock.show(ctx, &mut tool_ctx); 412 402 left_dock.show(ctx, &mut tool_ctx);
+2 -12
crates/kammy/src/dock.rs
··· 52 52 } 53 53 } 54 54 55 - /// Whether the dock is currently open (has an active tool). 56 - pub fn is_open(&self) -> bool { 57 - self.active.is_some() 58 - } 59 - 60 55 /// Toggles a tool by index. If the tool is already active, collapses the 61 56 /// dock. Otherwise, activates the tool. 62 57 pub fn toggle_tool(&mut self, idx: usize) { ··· 129 124 #[cfg(test)] 130 125 mod tests { 131 126 use super::*; 132 - 133 - use egui; 134 127 135 128 #[derive(Debug)] 136 129 struct DummyTool { ··· 169 162 #[test] 170 163 fn starts_collapsed() { 171 164 let dock = make_dock(None); 172 - assert!(!dock.is_open()); 165 + assert_eq!(dock.active, None); 173 166 } 174 167 175 168 #[test] 176 169 fn starts_open() { 177 170 let dock = make_dock(Some(0)); 178 - assert!(dock.is_open()); 171 + assert_eq!(dock.active, Some(0)); 179 172 } 180 173 181 174 #[test] ··· 184 177 185 178 dock.toggle_tool(0); 186 179 assert_eq!(dock.active, Some(0)); 187 - assert!(dock.is_open()); 188 180 189 181 // Toggle same tool collapses 190 182 dock.toggle_tool(0); 191 183 assert_eq!(dock.active, None); 192 - assert!(!dock.is_open()); 193 184 } 194 185 195 186 #[test] ··· 198 189 199 190 dock.toggle_tool(1); 200 191 assert_eq!(dock.active, Some(1)); 201 - assert!(dock.is_open()); 202 192 } 203 193 }
-1
crates/kammy/src/editor.rs
··· 4 4 5 5 //! Editor trait, built-in editor implementations, and tile-tree dispatch. 6 6 7 - pub mod display_list; 8 7 pub mod map; 9 8 pub mod todo; 10 9
-121
crates/kammy/src/editor/display_list.rs
··· 1 - // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 - // 3 - // SPDX-License-Identifier: AGPL-3.0-or-later 4 - 5 - //! Display list editor: renders N64 display lists via parallel-rdp. 6 - //! 7 - //! Currently renders a solid-color test pattern to verify the full pipeline: 8 - //! RDRAM write -> RDP command submit -> scanout -> wgpu texture -> egui display. 9 - 10 - use super::{Editor, EditorContext, EditorId}; 11 - use crate::widget::rdp_viewport::{DisplayList, RdpViewport, ViConfig}; 12 - 13 - /// An editor that renders N64 display lists via the RDP. 14 - pub struct DisplayListEditor { 15 - id: EditorId, 16 - viewport: RdpViewport, 17 - frame_count: u32, 18 - } 19 - 20 - impl std::fmt::Debug for DisplayListEditor { 21 - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 22 - f.debug_struct("DisplayListEditor") 23 - .field("id", &self.id) 24 - .field("frame_count", &self.frame_count) 25 - .finish_non_exhaustive() 26 - } 27 - } 28 - 29 - impl DisplayListEditor { 30 - /// Creates a new display list editor with the given stable ID. 31 - pub fn new(id: EditorId) -> Self { 32 - Self { 33 - id, 34 - viewport: RdpViewport::new(4 * 1024 * 1024), 35 - frame_count: 0, 36 - } 37 - } 38 - } 39 - 40 - /// Build an RDP display list that fills the framebuffer with a solid color. 41 - /// 42 - /// The color cycles through red, green, blue based on the frame counter, 43 - /// producing a simple animated test pattern. 44 - fn build_fill_rect_display_list(frame: u32) -> DisplayList { 45 - // Framebuffer: 320x240, 16-bit (5/5/5/1) 46 - const FB_WIDTH: u32 = 320; 47 - const FB_HEIGHT: u32 = 240; 48 - const FB_ORIGIN: u32 = 0x100; // Non-zero: parallel-rdp treats origin 0 as blank 49 - 50 - let phase = frame / 60 % 3; 51 - let fill_color: u32 = match phase { 52 - 0 => 0xF801_F801, // Red (16-bit 5551: R=31, G=0, B=0, A=1), packed twice 53 - 1 => 0x07C1_07C1, // Green 54 - _ => 0x003F_003F, // Blue 55 - }; 56 - 57 - // RDP commands (each command is 64 bits = 2 words) 58 - let commands: Vec<u32> = vec![ 59 - // Set Color Image: format=RGBA, size=16-bit, width=320, address=0 60 - // Command byte: 0x3F (Set Color Image) 61 - // Bits: [63:56]=0x3F, [55:53]=format(0=RGBA), [52:51]=size(1=16-bit), 62 - // [41:32]=width-1, [25:0]=address 63 - 0x3F10_0000 | ((FB_WIDTH - 1) & 0x3FF), 64 - FB_ORIGIN, 65 - // Set Scissor: XH=0, YH=0, XL=320<<2, YL=240<<2 66 - // Command byte: 0x2D 67 - 0x2D00_0000, 68 - ((FB_WIDTH << 2) << 12) | (FB_HEIGHT << 2), 69 - // Set Other Modes: cycle_type=Fill 70 - // Command byte: 0x2F, bit 55-52 = cycle type (3=Fill) 71 - 0x2F30_0000, 72 - 0x0000_0000, 73 - // Set Fill Color 74 - // Command byte: 0x37 75 - 0x3700_0000, 76 - fill_color, 77 - // Fill Rectangle: covers entire framebuffer 78 - // Command byte: 0x36 79 - // Bits: XL=320<<2, YL=240<<2 (word 0), XH=0, YH=0 (word 1) 80 - 0x3600_0000 | ((FB_WIDTH << 2) << 12) | (FB_HEIGHT << 2), 81 - 0x0000_0000, 82 - // Sync Full: wait for all rendering to complete 83 - // Command byte: 0x29 84 - 0x2900_0000, 85 - 0x0000_0000, 86 - ]; 87 - 88 - let vi = ViConfig { 89 - // Control: 16-bit color (bits 1:0 = 2), anti-alias + resample (bits 9:8 = 3) 90 - control: 0x0000_0302, 91 - origin: FB_ORIGIN, 92 - width: FB_WIDTH, 93 - v_sync: 525, // NTSC: 525 lines 94 - h_start: (0x006C << 16) | 0x02EC, // Typical NTSC H range 95 - v_start: (0x0025 << 16) | 0x01FF, // Typical NTSC V range 96 - x_scale: (FB_WIDTH * 1024 / 640), // Scale to fill 640 output 97 - y_scale: (FB_HEIGHT * 1024 / 480), // Scale to fill 480 output 98 - }; 99 - 100 - DisplayList { commands, vi } 101 - } 102 - 103 - impl Editor for DisplayListEditor { 104 - fn id(&self) -> EditorId { 105 - self.id 106 - } 107 - 108 - fn title(&self) -> String { 109 - "Display List".to_owned() 110 - } 111 - 112 - fn ui(&mut self, ui: &mut egui::Ui, ctx: &mut EditorContext) { 113 - let display_list = build_fill_rect_display_list(self.frame_count); 114 - self.frame_count = self.frame_count.wrapping_add(1); 115 - 116 - self.viewport 117 - .ui(ui, ctx.gpu.as_deref_mut(), &display_list, 4.0 / 3.0, |_| {}); 118 - 119 - ui.ctx().request_repaint(); 120 - } 121 - }
+21 -23
crates/kammy/src/editor/map.rs
··· 15 15 16 16 use super::{Editor, EditorContext, EditorId}; 17 17 use crate::Project; 18 - use crate::widget::rdp_viewport::{DisplayList, RdpViewport}; 18 + use crate::widget::rdp_viewport::RdpViewport; 19 19 20 20 const FB_WIDTH: u32 = 320; 21 21 const FB_HEIGHT: u32 = 240; ··· 27 27 id: EditorId, 28 28 viewport: RdpViewport, 29 29 camera: camera::OrbitCamera, 30 - /// Persistent RSP renderer — avoids 4 MB RDRAM allocation per frame. 31 - rsp_renderer: pm64::render::Renderer, 32 30 } 33 31 34 32 impl std::fmt::Debug for MapEditor { ··· 46 44 id, 47 45 viewport: RdpViewport::new(4 * 1024 * 1024), 48 46 camera: camera::OrbitCamera::default(), 49 - rsp_renderer: pm64::render::Renderer::new(), 50 47 } 51 48 } 52 49 } ··· 61 58 } 62 59 63 60 fn ui(&mut self, ui: &mut egui::Ui, ctx: &mut EditorContext) { 61 + // Handle camera input first so this frame's drag is reflected immediately. 62 + let interact_rect = ui.available_rect_before_wrap(); 63 + let interact_response = ui.interact( 64 + interact_rect, 65 + ui.id().with("camera"), 66 + egui::Sense::click_and_drag(), 67 + ); 68 + self.camera.handle_input(&interact_response); 69 + 64 70 let nodes = extract_nodes(ctx.project); 65 - let aspect = FB_WIDTH as f32 / FB_HEIGHT as f32; 71 + #[expect( 72 + clippy::cast_possible_truncation, 73 + clippy::as_conversions, 74 + reason = "320/240 is well within f32 range" 75 + )] 76 + let aspect = (f64::from(FB_WIDTH) / f64::from(FB_HEIGHT)) as f32; 66 77 let camera_matrices = self.camera.to_n64_matrices(aspect); 67 - 68 - let rdp_commands = self.rsp_renderer.render(&nodes, &camera_matrices); 78 + let vi = vi_config_ntsc(FB_ORIGIN); 69 79 70 - let display_list = DisplayList { 71 - commands: rdp_commands, 72 - vi: vi_config_ntsc(FB_ORIGIN), 73 - }; 74 - 75 - let response = self.viewport.ui( 80 + self.viewport.ui( 76 81 ui, 77 82 ctx.gpu.as_deref_mut(), 78 - &display_list, 83 + &vi, 79 84 aspect, 80 - |_rdram| {}, 85 + &nodes, 86 + &camera_matrices, 81 87 ); 82 - 83 - // Layer a drag/scroll sensor over the viewport for camera control 84 - let response = ui.interact( 85 - response.rect, 86 - response.id.with("camera"), 87 - egui::Sense::click_and_drag(), 88 - ); 89 - self.camera.handle_input(&response); 90 88 91 89 ui.ctx().request_repaint(); 92 90 }
+14 -10
crates/kammy/src/gpu.rs
··· 22 22 use winit::window::Window; 23 23 24 24 /// GPU state shared across the application. 25 + /// 26 + /// Field order matters for drop: wgpu resources must be released before 27 + /// `rdp_context` destroys the underlying VkDevice/VkInstance. 25 28 pub struct GpuState { 26 - /// The parallel-rdp Vulkan context (owns the `VkInstance` + `VkDevice`). 27 - pub rdp_context: parallel_rdp::VulkanContext, 28 - /// wgpu device wrapping Granite's `VkDevice`. 29 - pub device: wgpu::Device, 30 - /// wgpu queue wrapping Granite's graphics queue. 31 - pub queue: wgpu::Queue, 32 - /// Window surface for presentation. 33 - surface: wgpu::Surface<'static>, 34 - /// Current surface configuration. 35 - surface_config: wgpu::SurfaceConfiguration, 36 29 /// egui renderer (draws egui primitives via wgpu). 37 30 pub renderer: egui_wgpu::Renderer, 31 + /// Current surface configuration. 32 + surface_config: wgpu::SurfaceConfiguration, 33 + /// Window surface for presentation. 34 + surface: wgpu::Surface<'static>, 35 + /// wgpu queue wrapping Granite's graphics queue. 36 + pub queue: wgpu::Queue, 37 + /// wgpu device wrapping Granite's `VkDevice`. 38 + pub device: wgpu::Device, 38 39 /// Prevents the wgpu Instance from being dropped prematurely. 39 40 _instance: wgpu::Instance, 41 + /// The parallel-rdp Vulkan context (owns the `VkInstance` + `VkDevice`). 42 + /// Must be last: Granite owns the Vulkan handles that everything above wraps. 43 + pub rdp_context: parallel_rdp::VulkanContext, 40 44 } 41 45 42 46 impl std::fmt::Debug for GpuState {
+12 -8
crates/kammy/src/main.rs
··· 55 55 } 56 56 57 57 /// Runtime state created after the window is available. 58 + /// 59 + /// Field order matters: Rust drops fields in declaration order, and the 60 + /// app's editors hold parallel-rdp `Renderer`s whose destructors need 61 + /// the Vulkan device that lives inside `gpu`. So `app` must drop first. 58 62 struct AppState { 59 - window: Arc<Window>, 60 - gpu: gpu::GpuState, 61 - egui_ctx: egui::Context, 63 + app: app::KammyApp, 62 64 egui_state: egui_winit::State, 63 - app: app::KammyApp, 65 + egui_ctx: egui::Context, 66 + gpu: gpu::GpuState, 67 + window: Arc<Window>, 64 68 } 65 69 66 70 impl ApplicationHandler for WinitApp { ··· 106 110 let app = app::KammyApp::new(); 107 111 108 112 self.state = Some(AppState { 109 - window, 110 - gpu, 111 - egui_ctx, 112 - egui_state, 113 113 app, 114 + egui_state, 115 + egui_ctx, 116 + gpu, 117 + window, 114 118 }); 115 119 } 116 120
+1 -6
crates/kammy/src/tool.rs
··· 12 12 pub mod hierarchy; 13 13 pub mod inspector; 14 14 15 - use crate::Project; 16 - use crate::editor::{EditorId, Inspect}; 15 + use crate::editor::Inspect; 17 16 18 17 /// Context passed to each tool during rendering. 19 18 pub struct ToolContext<'a> { 20 - /// The shared project data (CRDT document). 21 - pub project: &'a Project, 22 - /// The currently focused editor, if any. 23 - pub active_editor_id: Option<EditorId>, 24 19 /// The current inspect object set by editors. Tools like the Inspector 25 20 /// read this to display property UI. 26 21 pub inspect: &'a mut Option<Box<dyn Inspect>>,
+137 -62
crates/kammy/src/widget/rdp_viewport.rs
··· 6 6 7 7 //! An egui widget that renders N64 display lists using parallel-rdp. 8 8 9 + use pm64::gbi::{CameraMatrices, NodeData}; 10 + use pm64::render::ParallelRdpSink; 11 + 9 12 use crate::gpu::GpuState; 10 13 11 14 /// N64 Video Interface register configuration for scanout. ··· 29 32 pub y_scale: u32, 30 33 } 31 34 32 - /// A display list to be rendered by the RDP. 33 - #[derive(Debug, Clone)] 34 - pub struct DisplayList { 35 - /// RDP command words (big-endian 32-bit). 36 - pub commands: Vec<u32>, 37 - /// Video Interface configuration for scanout. 38 - pub vi: ViConfig, 39 - } 40 - 41 35 /// Reusable egui widget that renders N64 display lists via parallel-rdp. 42 36 /// 43 37 /// Each instance owns its own [`parallel_rdp::Renderer`] (command processor + 44 38 /// RDRAM). The widget submits display list commands, performs scanout, and 45 39 /// displays the result as an egui image. 46 40 /// 47 - /// The renderer is created lazily on the first [`show`](Self::show) call that 41 + /// GPU work is pipelined: each frame submits commands and signals the GPU 42 + /// timeline (non-blocking), then waits for the *previous* frame's signal 43 + /// at the start of the next frame. This overlaps GPU rendering with the 44 + /// CPU's egui layout pass, eliminating the blocking `flush()` stall. 45 + /// 46 + /// The renderer is created lazily on the first [`ui`](Self::ui) call that 48 47 /// receives a GPU context. 49 48 pub struct RdpViewport { 50 49 renderer: Option<parallel_rdp::Renderer>, 51 50 rdram_size: u32, 51 + /// Persistent RSP renderer — avoids 4 MB RDRAM allocation per frame. 52 + rsp_renderer: pm64::render::Renderer, 52 53 /// Registered egui texture ID (reused across frames). 53 54 texture_id: Option<egui::TextureId>, 54 55 /// The current frame's scanout texture wrapper. Kept alive so egui can 55 - /// reference it during the render pass (which runs after `show()`). 56 + /// reference it during the render pass (which runs after `ui()`). 56 57 current_texture: Option<wgpu::Texture>, 58 + /// Pending GPU timeline value from the previous frame's scanout. 59 + pending_timeline: Option<u64>, 60 + /// Scanout result waiting for the timeline to complete. 61 + pending_scanout: Option<PendingScanout>, 62 + } 63 + 64 + /// A scanout result waiting to be imported into wgpu once the GPU finishes. 65 + struct PendingScanout { 66 + vk_image: ash::vk::Image, 67 + width: u32, 68 + height: u32, 57 69 } 58 70 59 71 impl std::fmt::Debug for RdpViewport { ··· 64 76 } 65 77 } 66 78 79 + impl Drop for RdpViewport { 80 + fn drop(&mut self) { 81 + // Wait for any in-flight GPU work before destroying the renderer. 82 + if let (Some(timeline), Some(renderer)) = 83 + (self.pending_timeline.take(), self.renderer.as_mut()) 84 + { 85 + renderer.wait_for_timeline(timeline); 86 + } 87 + // Release the wgpu texture wrapping a VkImage owned by the renderer 88 + // before the renderer (and its CommandProcessor) are dropped. 89 + self.current_texture = None; 90 + } 91 + } 92 + 67 93 impl RdpViewport { 68 94 /// Creates a new viewport. 69 95 /// 70 96 /// `rdram_size` is the RDRAM capacity in bytes (typically 4 MiB). The 71 - /// underlying renderer is created lazily when [`show`](Self::show) is 97 + /// underlying renderer is created lazily when [`ui`](Self::ui) is 72 98 /// first called with a GPU context. 73 99 pub fn new(rdram_size: u32) -> Self { 74 100 Self { 75 101 renderer: None, 76 102 rdram_size, 103 + rsp_renderer: pm64::render::Renderer::new(), 77 104 texture_id: None, 78 105 current_texture: None, 106 + pending_timeline: None, 107 + pending_scanout: None, 79 108 } 80 109 } 81 110 82 - /// Renders the display list and shows the result in the UI. 111 + /// Renders an N64 frame and shows the result in the UI. 83 112 /// 84 113 /// `display_aspect` is the intended display aspect ratio (width/height). 85 114 /// The scanout texture is stretched to fill the available UI space at 86 115 /// this ratio — necessary because non-interlaced VI modes produce 87 116 /// half-height scanouts that don't reflect the true display shape. 88 117 /// 89 - /// The closure receives the renderer's RDRAM for direct writes (textures, 90 - /// framebuffer data, etc.) before commands are submitted. 118 + /// Has 1 frame of latency. 91 119 /// 92 120 /// If `gpu` is `None` (headless/test), displays a placeholder label. 93 121 pub fn ui( 94 122 &mut self, 95 123 ui: &mut egui::Ui, 96 124 gpu: Option<&mut GpuState>, 97 - display_list: &DisplayList, 125 + vi: &ViConfig, 98 126 display_aspect: f32, 99 - write_rdram: impl FnOnce(&mut [u8]), 127 + nodes: &[NodeData], 128 + camera: &CameraMatrices, 100 129 ) -> egui::Response { 101 130 let Some(gpu) = gpu else { 102 131 return ui.label("GPU not available"); 103 132 }; 104 133 105 - let renderer = match &mut self.renderer { 106 - Some(r) => r, 107 - None => match parallel_rdp::Renderer::new(&gpu.rdp_context, self.rdram_size, 0) { 108 - Ok(r) => self.renderer.insert(r), 134 + // Lazily create the renderer on first use. 135 + if self.renderer.is_none() { 136 + match parallel_rdp::Renderer::new(&gpu.rdp_context, self.rdram_size, 0) { 137 + Ok(r) => { 138 + self.renderer = Some(r); 139 + } 109 140 Err(e) => { 110 141 tracing::warn!("failed to create RDP renderer: {e:?}"); 111 142 return ui.label("RDP renderer unavailable"); 112 143 } 113 - }, 144 + } 145 + } 146 + 147 + // Wait for the previous frame's GPU work and import its scanout. 148 + // This wait should be near-instant because the GPU has had a full 149 + // egui frame (~16ms) to finish since we signalled. 150 + // 151 + // Scoped separately from the command submission below so 152 + // `update_egui_texture` can borrow `&mut self`. 153 + if let Some(timeline) = self.pending_timeline.take() { 154 + if let Some(renderer) = &mut self.renderer { 155 + renderer.wait_for_timeline(timeline); 156 + } 157 + 158 + if let Some(scanout) = self.pending_scanout.take() { 159 + // SAFETY: wait_for_timeline ensures the GPU is done, and the 160 + // VkImage is still valid (no new scanout has been called yet). 161 + if let Some(texture) = unsafe { 162 + import_scanout_image( 163 + &gpu.device, 164 + scanout.vk_image, 165 + scanout.width, 166 + scanout.height, 167 + ) 168 + } { 169 + let view = texture.create_view(&wgpu::TextureViewDescriptor::default()); 170 + self.update_egui_texture(gpu, &view); 171 + self.current_texture = Some(texture); 172 + } else { 173 + tracing::warn!("failed to import scanout VkImage into wgpu"); 174 + } 175 + } 176 + } 177 + 178 + // Submit new work for this frame. 179 + let Some(renderer) = &mut self.renderer else { 180 + return ui.label("RDP renderer unavailable"); 114 181 }; 115 - 116 - write_rdram(renderer.rdram_mut()); 117 182 renderer.begin_frame(); 118 - Self::set_vi_registers(renderer, &display_list.vi); 119 - renderer.enqueue_commands(&display_list.commands); 183 + Self::set_vi_registers(renderer, vi); 184 + self.rsp_renderer 185 + .render_to(nodes, camera, &mut ParallelRdpSink(renderer)); 120 186 121 - let Some((vk_image, width, height)) = renderer.scanout() else { 122 - return ui.label("No scanout output"); 123 - }; 124 - if width == 0 || height == 0 { 125 - return ui.label("No scanout output"); 187 + if let Some((vk_image, width, height)) = renderer.scanout() 188 + && width > 0 189 + && height > 0 190 + { 191 + if self.current_texture.is_none() { 192 + // First frame: no previous texture to display yet, so 193 + // do a blocking flush to bootstrap. 194 + renderer.flush(); 195 + if let Some(texture) = 196 + unsafe { import_scanout_image(&gpu.device, vk_image, width, height) } 197 + { 198 + let view = texture.create_view(&wgpu::TextureViewDescriptor::default()); 199 + self.update_egui_texture(gpu, &view); 200 + self.current_texture = Some(texture); 201 + } 202 + } else { 203 + // Pipeline: signal non-blocking, import on next frame 204 + self.pending_timeline = Some(renderer.signal_timeline()); 205 + self.pending_scanout = Some(PendingScanout { 206 + vk_image, 207 + width, 208 + height, 209 + }); 210 + } 126 211 } 127 212 128 - // Ensure all GPU scanout work is complete before wgpu reads the image 129 - renderer.flush(); 213 + // Display the texture (either from previous frame's import or 214 + // from the bootstrap flush above) 215 + let available = ui.available_size(); 216 + let size = if available.x / available.y.max(1.0) > display_aspect { 217 + egui::vec2(available.y * display_aspect, available.y) 218 + } else { 219 + egui::vec2(available.x, available.x / display_aspect) 220 + }; 130 221 131 - // SAFETY: flush() was called above, and the VkImage from scanout() 132 - // remains valid until the wgpu::Texture is dropped (next frame at earliest). 133 - let Some(texture) = (unsafe { import_scanout_image(&gpu.device, vk_image, width, height) }) 134 - else { 135 - tracing::warn!("failed to import scanout VkImage into wgpu"); 136 - return ui.label("Scanout import failed"); 137 - }; 138 - let view = texture.create_view(&wgpu::TextureViewDescriptor::default()); 222 + if let Some(texture_id) = self.texture_id { 223 + ui.image(egui::load::SizedTexture::new(texture_id, size)) 224 + } else { 225 + ui.label("Loading…") 226 + } 227 + } 139 228 140 - // Register or update the egui texture binding 229 + /// Registers or updates the egui texture binding. 230 + fn update_egui_texture(&mut self, gpu: &mut GpuState, view: &wgpu::TextureView) { 141 231 if let Some(id) = self.texture_id { 142 232 gpu.renderer.update_egui_texture_from_wgpu_texture( 143 233 &gpu.device, 144 - &view, 234 + view, 145 235 wgpu::FilterMode::Nearest, 146 236 id, 147 237 ); 148 238 } else { 149 239 let id = 150 240 gpu.renderer 151 - .register_native_texture(&gpu.device, &view, wgpu::FilterMode::Nearest); 241 + .register_native_texture(&gpu.device, view, wgpu::FilterMode::Nearest); 152 242 self.texture_id = Some(id); 153 243 } 154 - 155 - // Keep texture alive until the render pass uses it 156 - self.current_texture = Some(texture); 157 - 158 - // Scale image to fill available UI space at the caller's display aspect ratio 159 - let available = ui.available_size(); 160 - let size = if available.x / available.y.max(1.0) > display_aspect { 161 - egui::vec2(available.y * display_aspect, available.y) 162 - } else { 163 - egui::vec2(available.x, available.x / display_aspect) 164 - }; 165 - 166 - let Some(texture_id) = self.texture_id else { 167 - return ui.label("Texture not ready"); 168 - }; 169 - ui.image(egui::load::SizedTexture::new(texture_id, size)) 170 244 } 171 245 172 246 fn set_vi_registers(renderer: &mut parallel_rdp::Renderer, vi: &ViConfig) { ··· 186 260 /// 187 261 /// # Safety 188 262 /// 189 - /// The `VkImage` must be valid and fully rendered (call `flush()` first). 190 - /// It must remain valid until the wgpu texture is dropped. 263 + /// The `VkImage` must be valid and fully rendered (call `flush()` or 264 + /// `wait_for_timeline()` first). It must remain valid until the wgpu 265 + /// texture is dropped. 191 266 unsafe fn import_scanout_image( 192 267 device: &wgpu::Device, 193 268 vk_image: ash::vk::Image,
+18 -1
crates/parallel_rdp/src/bridge.cpp
··· 157 157 158 158 void rdp_renderer_destroy(void *renderer) 159 159 { 160 - delete static_cast<RdpRenderer *>(renderer); 160 + auto *r = static_cast<RdpRenderer *>(renderer); 161 + // Ensure all GPU work completes before destroying the CommandProcessor, 162 + // otherwise its destructor may race with in-flight commands. 163 + uint64_t timeline = r->processor->signal_timeline(); 164 + r->processor->wait_for_timeline(timeline); 165 + delete r; 161 166 } 162 167 163 168 uint8_t *rdp_renderer_get_rdram(void *renderer) ··· 283 288 uint64_t timeline = r->processor->signal_timeline(); 284 289 r->processor->wait_for_timeline(timeline); 285 290 } 291 + 292 + uint64_t rdp_renderer_signal_timeline(void *renderer) 293 + { 294 + auto *r = static_cast<RdpRenderer *>(renderer); 295 + return r->processor->signal_timeline(); 296 + } 297 + 298 + void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value) 299 + { 300 + auto *r = static_cast<RdpRenderer *>(renderer); 301 + r->processor->wait_for_timeline(value); 302 + }
+9
crates/parallel_rdp/src/bridge.hpp
··· 120 120 /// Signal the renderer's timeline and wait for all previous work to complete. 121 121 void rdp_renderer_flush(void *renderer); 122 122 123 + /// Signal the renderer's timeline and return the timeline value (non-blocking). 124 + /// 125 + /// Call `rdp_renderer_wait_for_timeline` with the returned value to wait 126 + /// for all work submitted before this signal to complete. 127 + uint64_t rdp_renderer_signal_timeline(void *renderer); 128 + 129 + /// Wait for the renderer's timeline to reach `value`. 130 + void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value); 131 + 123 132 #ifdef __cplusplus 124 133 } 125 134 #endif
+26
crates/parallel_rdp/src/lib.rs
··· 271 271 /// 272 272 /// Write display list data, textures, and framebuffer contents here before 273 273 /// calling [`enqueue_commands`](Self::enqueue_commands) and [`scanout`](Self::scanout). 274 + /// 275 + /// # Panics 276 + /// 277 + /// Panics if the RDRAM size (a `u32`) does not fit in a `usize`. This can 278 + /// only happen on 16-bit platforms, which cannot run Vulkan. 279 + #[expect( 280 + clippy::expect_used, 281 + reason = "RDRAM size is u32, which fits in usize on all Vulkan-capable platforms" 282 + )] 274 283 pub fn rdram_mut(&mut self) -> &mut [u8] { 275 284 unsafe { 276 285 let ptr = ffi::rdp_renderer_get_rdram(self.ptr); ··· 362 371 pub fn flush(&mut self) { 363 372 unsafe { 364 373 ffi::rdp_renderer_flush(self.ptr); 374 + } 375 + } 376 + 377 + /// Signals the GPU timeline and returns a token (non-blocking). 378 + /// 379 + /// Call [`wait_for_timeline`](Self::wait_for_timeline) with the returned 380 + /// value to wait for all work submitted before this signal. 381 + pub fn signal_timeline(&mut self) -> u64 { 382 + unsafe { ffi::rdp_renderer_signal_timeline(self.ptr) } 383 + } 384 + 385 + /// Waits for the GPU timeline to reach the given value. 386 + /// 387 + /// If the GPU has already passed this point, returns immediately. 388 + pub fn wait_for_timeline(&mut self, value: u64) { 389 + unsafe { 390 + ffi::rdp_renderer_wait_for_timeline(self.ptr, value); 365 391 } 366 392 } 367 393 }
+1
crates/pm64/Cargo.toml
··· 9 9 10 10 [dependencies] 11 11 loroscope = { path = "../loroscope" } 12 + parallel_rdp = { path = "../parallel_rdp" } 12 13 rsp = { path = "../rsp" } 13 14 loro = { workspace = true } 14 15
+3 -7
crates/pm64/src/gbi.rs
··· 20 20 } 21 21 22 22 /// A triangle referencing three vertices. 23 - #[derive(Clone, Debug)] 23 + #[derive(Clone, Debug, PartialEq)] 24 24 pub struct TriangleData { 25 25 pub v0: VertexData, 26 26 pub v1: VertexData, ··· 28 28 } 29 29 30 30 /// A model node's geometry in plain (non-CRDT) form. 31 - #[derive(Clone, Debug)] 31 + #[derive(Clone, Debug, PartialEq)] 32 32 pub struct NodeData { 33 33 /// Triangles belonging to this node. 34 34 pub triangles: Vec<TriangleData>, 35 35 } 36 36 37 37 /// N64 camera matrices in s15.16 fixed-point format (64 bytes each). 38 - #[derive(Clone, Debug)] 38 + #[derive(Clone, Debug, PartialEq, Eq)] 39 39 pub struct CameraMatrices { 40 40 /// Projection matrix (64 bytes, s15.16 fixed-point). 41 41 pub projection: [u8; 64], ··· 156 156 /// - `proj_addr`: RDRAM address of the projection matrix. 157 157 /// - `mv_addr`: RDRAM address of the modelview matrix. 158 158 /// - `viewport_addr`: RDRAM address where the viewport struct will be placed. 159 - #[expect( 160 - clippy::many_single_char_names, 161 - reason = "a/b/c vertex indices are standard triangle nomenclature" 162 - )] 163 159 pub fn reconstruct( 164 160 nodes: &[NodeData], 165 161 _camera: &CameraMatrices,
+129 -12
crates/pm64/src/render.rs
··· 9 9 10 10 use crate::gbi::{self, CameraMatrices, GbiOutput, NodeData}; 11 11 12 + /// Bridges [`rsp::RdpSink`] to [`parallel_rdp::Renderer::enqueue_commands`]. 13 + #[derive(Debug)] 14 + pub struct ParallelRdpSink<'a>(pub &'a mut parallel_rdp::Renderer); 15 + 16 + impl rsp::RdpSink for ParallelRdpSink<'_> { 17 + fn receive_commands(&mut self, commands: &[u32]) { 18 + self.0.enqueue_commands(commands); 19 + } 20 + } 21 + 12 22 // Microcode binaries from the Paper Mario 64 decomp 13 23 const F3DEX2_TEXT: &[u8] = include_bytes!(concat!( 14 24 env!("PM64_ASSETS_DIR"), ··· 115 125 /// on every call. 116 126 pub fn render(nodes: &[NodeData], camera: &CameraMatrices) -> Vec<u32> { 117 127 let mut renderer = Renderer::new(); 118 - renderer.render(nodes, camera) 128 + renderer.render(nodes, camera).to_vec() 119 129 } 120 130 121 131 /// Persistent RSP render context that reuses its device across frames. ··· 123 133 /// Avoids the 4 MB RDRAM allocation that [`render`] incurs on every call. 124 134 /// Microcode is loaded once at construction; subsequent [`render`](Self::render) 125 135 /// calls only write the per-frame data (display list, vertices, matrices) 126 - /// and reset the RSP/RDP state. 136 + /// and reset the RSP/RDP control state. 127 137 pub struct Renderer { 128 138 device: rsp::Device, 139 + /// `true` after the first frame (IMEM holds F3DEX2, not rspboot). 140 + warm: bool, 129 141 } 130 142 131 143 impl std::fmt::Debug for Renderer { 132 144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 133 - f.debug_struct("Renderer").finish_non_exhaustive() 145 + f.debug_struct("Renderer") 146 + .field("warm", &self.warm) 147 + .finish_non_exhaustive() 148 + } 149 + } 150 + 151 + impl Default for Renderer { 152 + fn default() -> Self { 153 + Self::new() 134 154 } 135 155 } 136 156 ··· 141 161 let rdram = device.rdram_mut(); 142 162 write_be_bytes_to_rdram(rdram, F3DEX2_TEXT_ADDR, F3DEX2_TEXT); 143 163 write_be_bytes_to_rdram(rdram, F3DEX2_DATA_ADDR, F3DEX2_DATA); 144 - Self { device } 164 + Self { 165 + device, 166 + warm: false, 167 + } 145 168 } 146 169 147 170 /// Renders model geometry through the RSP, producing RDP command words. 148 - pub fn render(&mut self, nodes: &[NodeData], camera: &CameraMatrices) -> Vec<u32> { 171 + /// 172 + /// The returned slice borrows from the internal RSP device and is valid 173 + /// until the next call to `render`. 174 + pub fn render(&mut self, nodes: &[NodeData], camera: &CameraMatrices) -> &[u32] { 149 175 let gbi_output = gbi::reconstruct( 150 176 nodes, 151 177 camera, ··· 158 184 self.render_gbi(&gbi_output, camera) 159 185 } 160 186 187 + /// Renders model geometry. 188 + pub fn render_to( 189 + &mut self, 190 + nodes: &[NodeData], 191 + camera: &CameraMatrices, 192 + sink: &mut dyn rsp::RdpSink, 193 + ) { 194 + let gbi_output = gbi::reconstruct( 195 + nodes, 196 + camera, 197 + addr_u32(FB_ADDR), 198 + addr_u32(VERTEX_ADDR), 199 + addr_u32(PROJ_MTX_ADDR), 200 + addr_u32(MV_MTX_ADDR), 201 + addr_u32(VIEWPORT_ADDR), 202 + ); 203 + self.prepare_frame(&gbi_output, camera); 204 + self.device.run_with_sink(sink); 205 + } 206 + 161 207 /// Renders a pre-built GBI display list through the RSP. 162 - fn render_gbi(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) -> Vec<u32> { 163 - self.device.reset(); 208 + fn render_gbi(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) -> &[u32] { 209 + self.prepare_frame(gbi_output, camera); 210 + self.device.run() 211 + } 212 + 213 + /// Resets the RSP/RDP, writes all per-frame data (display list, vertices, 214 + /// matrices, `OSTask`) to RDRAM/DMEM, and prepares the RSP for execution. 215 + /// 216 + /// On the first frame, a full [`reset`](rsp::Device::reset) is used. On 217 + /// subsequent frames, [`rearm`](rsp::Device::rearm) resets only the 218 + /// control registers while preserving lookup tables and dispatch tables. 219 + /// Rspboot always runs to properly initialise F3DEX2. 220 + fn prepare_frame(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) { 221 + if self.warm { 222 + self.device.rearm(); 223 + } else { 224 + self.device.reset(); 225 + } 164 226 165 227 let rdram = self.device.rdram_mut(); 166 228 ··· 180 242 // Write viewport data (big-endian i16 values → native-endian RDRAM) 181 243 write_be_bytes_to_rdram(rdram, VIEWPORT_ADDR, &gbi_output.viewport_data); 182 244 183 - // Write rspboot to IMEM (big-endian, RSP reads directly) 245 + // Load rspboot into IMEM. It will DMA F3DEX2 text into IMEM and 246 + // data into DMEM, then jump to the microcode entry point. 184 247 self.device.imem_mut()[..RSPBOOT.len()].copy_from_slice(RSPBOOT); 185 248 186 - // Write OSTask to DMEM (big-endian, RSP reads directly with LW) 249 + // Write OSTask to DMEM (big-endian, RSP reads directly with LW). 187 250 let dmem = self.device.dmem_mut(); 188 251 let task_base = TASK_DMEM_OFFSET; 189 252 ··· 221 284 write_be_u32(dmem, task_base + TASK_DATA_SIZE, dl_size); 222 285 write_be_u32(dmem, task_base + TASK_YIELD_DATA_PTR, 0); 223 286 224 - // Decode IMEM and run RSP 225 287 self.device.decode_imem(); 226 288 self.device.set_pc(0); 227 289 self.device.clear_halt(); 228 - 229 - self.device.run().to_vec() 290 + self.warm = true; 230 291 } 231 292 } 232 293 ··· 357 418 assert!( 358 419 !rdp_commands.is_empty(), 359 420 "RSP should produce RDP commands for a single triangle" 421 + ); 422 + } 423 + 424 + #[test] 425 + fn render_to_streams_to_sink() { 426 + struct CountingSink(usize); 427 + impl rsp::RdpSink for CountingSink { 428 + fn receive_commands(&mut self, commands: &[u32]) { 429 + self.0 += commands.len(); 430 + } 431 + } 432 + 433 + let nodes = vec![NodeData { 434 + triangles: vec![TriangleData { 435 + v0: VertexData { 436 + x: 0, 437 + y: 0, 438 + z: 0, 439 + r: 255, 440 + g: 0, 441 + b: 0, 442 + a: 255, 443 + }, 444 + v1: VertexData { 445 + x: 100, 446 + y: 0, 447 + z: 0, 448 + r: 255, 449 + g: 0, 450 + b: 0, 451 + a: 255, 452 + }, 453 + v2: VertexData { 454 + x: 50, 455 + y: 100, 456 + z: 0, 457 + r: 255, 458 + g: 0, 459 + b: 0, 460 + a: 255, 461 + }, 462 + }], 463 + }]; 464 + 465 + let camera = CameraMatrices { 466 + projection: identity_n64_matrix(), 467 + modelview: identity_n64_matrix(), 468 + }; 469 + 470 + let mut renderer = Renderer::new(); 471 + let mut sink = CountingSink(0); 472 + renderer.render_to(&nodes, &camera, &mut sink); 473 + 474 + assert!( 475 + sink.0 > 0, 476 + "render_to should have streamed RDP commands to the sink" 360 477 ); 361 478 } 362 479 }
+82 -4
crates/rsp/src/lib.rs
··· 24 24 pub mod su_instructions; 25 25 pub mod vu_instructions; 26 26 27 + pub use rdp::RdpSink; 28 + 27 29 /// Branch state enum used by the RSP CPU pipeline. 28 30 #[derive(PartialEq, Copy, Clone)] 29 31 pub enum BranchStepState { ··· 68 70 pub byte_swap: usize, 69 71 /// Maximum total cycles before `run()` forcibly halts. 70 72 pub max_cycles: u64, 73 + /// Active RDP command sink, set only during [`run_with_sink`](Self::run_with_sink). 74 + /// 75 + /// Raw pointer because `run_rdp` is called deep in the RSP execution 76 + /// stack and all intermediate functions take `&mut Device`. 77 + pub(crate) sink: Option<*mut dyn rdp::RdpSink>, 71 78 } 72 79 73 80 impl Device { ··· 83 90 mi: Mi { regs: [0; 4] }, 84 91 byte_swap: 0, 85 92 max_cycles: DEFAULT_MAX_CYCLES, 93 + sink: None, 86 94 }; 87 95 rsp_interface::init(&mut device); 88 96 rdp::init(&mut device); ··· 99 107 self.rdp = rdp::Rdp::new(); 100 108 self.mi = Mi { regs: [0; 4] }; 101 109 self.byte_swap = 0; 110 + self.sink = None; 102 111 rsp_interface::init(self); 103 112 rdp::init(self); 104 113 } 105 114 115 + /// Resets RSP/RDP control state so the device can run again. 116 + /// 117 + /// Unlike [`reset`](Self::reset), this preserves RDRAM, RSP memory 118 + /// (DMEM/IMEM), the decoded instruction cache, lookup tables, and 119 + /// dispatch tables. Only CPU flags, SP registers, and DPC registers 120 + /// are cleared. 121 + pub fn rearm(&mut self) { 122 + // CPU flags 123 + self.rsp.cpu.broken = false; 124 + self.rsp.cpu.halted = false; 125 + self.rsp.cpu.running = false; 126 + self.rsp.cpu.sync_point = false; 127 + self.rsp.cpu.cycle_counter = 0; 128 + self.rsp.cpu.pipeline_full = false; 129 + self.rsp.cpu.branch_state = cpu::BranchState { 130 + state: BranchStepState::Step, 131 + pc: 0, 132 + }; 133 + self.rsp.cpu.last_instruction_type = cpu::InstructionType::Su; 134 + self.rsp.cpu.instruction_type = cpu::InstructionType::Su; 135 + 136 + // SP registers 137 + self.rsp.regs = [0; rsp_interface::SP_REGS_COUNT as usize]; 138 + self.rsp.regs2 = [0; rsp_interface::SP_REGS2_COUNT as usize]; 139 + self.rsp.fifo = [rsp_interface::RspDma { 140 + dir: rsp_interface::DmaDir::None, 141 + length: 0, 142 + memaddr: 0, 143 + dramaddr: 0, 144 + }; 2]; 145 + self.rsp.last_status_value = 0; 146 + self.rsp.run_after_dma = false; 147 + 148 + // DPC/DPS registers 149 + self.rdp.regs_dpc = [0; rdp::DPC_REGS_COUNT as usize]; 150 + self.rdp.regs_dps = [0; rdp::DPS_REGS_COUNT as usize]; 151 + self.rdp.wait_frozen = false; 152 + self.rdp.last_status_value = 0; 153 + self.rdp.collected_commands.clear(); 154 + 155 + self.mi = Mi { regs: [0; 4] }; 156 + self.byte_swap = 0; 157 + self.sink = None; 158 + 159 + // Match the initial state from reset() without regenerating tables 160 + self.rsp.regs[rsp_interface::SP_STATUS_REG as usize] = 1; // HALT 161 + self.rdp.regs_dpc[rdp::DPC_STATUS_REG as usize] |= 1 << 7; // CBUF_READY 162 + } 163 + 106 164 /// Runs the RSP until it halts or breaks, then returns the collected RDP 107 165 /// command words. 108 166 /// 109 - /// The RSP may hit sync points during DMA and DPC operations. This method 110 - /// automatically resumes execution after each sync point, looping until 111 - /// the RSP truly halts or breaks. 167 + /// Commands are buffered in `rdp.collected_commands`. For streaming 168 + /// delivery to a GPU backend, use [`run_with_sink`](Self::run_with_sink). 112 169 pub fn run(&mut self) -> &[u32] { 113 170 self.rdp.collected_commands.clear(); 171 + self.run_inner(); 172 + &self.rdp.collected_commands 173 + } 174 + 175 + /// Runs the RSP, streaming RDP commands directly to `sink`. 176 + /// 177 + /// Unlike [`run`](Self::run), this does not buffer commands in 178 + /// `collected_commands`. The RDRAM path is zero-copy — the byte 179 + /// slice is reinterpreted as `&[u32]` and passed straight through. 180 + pub fn run_with_sink(&mut self, sink: &mut dyn rdp::RdpSink) { 181 + // SAFETY: The pointer is cleared before this method returns. The 182 + // transmute erases the borrow lifetime so it can be stored in the 183 + // struct, but run_inner is synchronous and the sink reference is 184 + // valid throughout. 185 + self.sink = unsafe { Some(std::mem::transmute(std::ptr::from_mut(sink))) }; 186 + self.run_inner(); 187 + self.sink = None; 188 + } 189 + 190 + /// RSP execution loop shared by [`run`](Self::run) and 191 + /// [`run_with_sink`](Self::run_with_sink). 192 + fn run_inner(&mut self) { 114 193 let mut total_cycles: u64 = 0; 115 194 loop { 116 195 let batch_cycles = cpu::run(self); ··· 120 199 break; 121 200 } 122 201 } 123 - &self.rdp.collected_commands 124 202 } 125 203 126 204 /// Mutable access to RDRAM for writing data the RSP will read.
+56 -12
crates/rsp/src/rdp.rs
··· 5 5 6 6 //! RDP (Reality Display Processor) register handling and command collection. 7 7 //! 8 - //! Instead of sending commands to a GPU backend (as gopher64 does), this 9 - //! standalone version collects the RDP command words into a `Vec<u32>` so 10 - //! the caller can pass them to parallel-rdp or another renderer. 8 + //! By default, RDP command words are collected into a `Vec<u32>` (see 9 + //! [`Device::run`](crate::Device::run)). When a [`RdpSink`] is installed via 10 + //! [`Device::run_with_sink`](crate::Device::run_with_sink), commands are 11 + //! streamed directly to the sink — zero-copy for the RDRAM path. 12 + 13 + /// Sink for RDP command words produced during RSP execution. 14 + /// 15 + /// Implementations receive batches of commands as the RSP produces them, 16 + /// rather than waiting for a complete `Vec<u32>` at the end. Install a 17 + /// sink via [`Device::run_with_sink`](crate::Device::run_with_sink). 18 + pub trait RdpSink { 19 + /// Receives a batch of RDP command words in native byte order. 20 + fn receive_commands(&mut self, commands: &[u32]); 21 + } 11 22 12 23 pub const DPC_START_REG: u32 = 0; 13 24 pub const DPC_END_REG: u32 = 1; ··· 122 133 } 123 134 } 124 135 125 - /// Collects RDP commands from RDRAM between CURRENT and END registers. 136 + /// Dispatches RDP commands from RDRAM (or DMEM in XBUS mode) to either the 137 + /// installed [`RdpSink`] or the fallback `collected_commands` buffer. 138 + /// 139 + /// The RDRAM path is zero-copy when a sink is present: the byte slice is 140 + /// reinterpreted as `&[u32]` in-place. DPC register masking (`& 0xFFFFF8`) 141 + /// guarantees 8-byte alignment for both CURRENT and END. 126 142 fn run_rdp(device: &mut crate::Device) { 127 143 let current = device.rdp.regs_dpc[DPC_CURRENT_REG as usize] as usize; 128 144 let end = device.rdp.regs_dpc[DPC_END_REG as usize] as usize; 129 145 130 - if device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_XBUS_DMEM_DMA != 0 { 131 - // XBUS mode: commands come from DMEM/IMEM instead of RDRAM 132 - let mut addr = current & 0xFFF; 133 - while addr < (end & 0xFFF) { 134 - let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap()); 135 - device.rdp.collected_commands.push(word); 136 - addr += 4; 146 + let is_xbus = device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_XBUS_DMEM_DMA != 0; 147 + 148 + if is_xbus { 149 + let start = current & 0xFFF; 150 + let end_addr = end & 0xFFF; 151 + if let Some(sink_ptr) = device.sink { 152 + let mut commands = Vec::new(); 153 + let mut addr = start; 154 + while addr < end_addr { 155 + let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap()); 156 + commands.push(word); 157 + addr += 4; 158 + } 159 + // SAFETY: sink_ptr is valid for the duration of run_with_sink. 160 + unsafe { (*sink_ptr).receive_commands(&commands) }; 161 + } else { 162 + let mut addr = start; 163 + while addr < end_addr { 164 + let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap()); 165 + device.rdp.collected_commands.push(word); 166 + addr += 4; 167 + } 137 168 } 169 + } else if let Some(sink_ptr) = device.sink { 170 + let end_clamped = end.min(device.rdram.mem.len()); 171 + let start_clamped = current.min(end_clamped); 172 + let slice = &device.rdram.mem[start_clamped..end_clamped]; 173 + // SAFETY: DPC registers mask addresses with & 0xFFFFF8 (8-byte aligned). 174 + // Vec<u8> allocations are at least pointer-aligned. RDRAM stores u32 175 + // words in native byte order. 176 + let (prefix, commands, suffix) = unsafe { slice.align_to::<u32>() }; 177 + debug_assert!( 178 + prefix.is_empty() && suffix.is_empty(), 179 + "RDRAM slice not u32-aligned: {start_clamped:#x}..{end_clamped:#x}" 180 + ); 181 + // SAFETY: sink_ptr is valid for the duration of run_with_sink. 182 + unsafe { (*sink_ptr).receive_commands(commands) }; 138 183 } else { 139 - // Normal mode: commands come from RDRAM (stored in native byte order) 140 184 let mut addr = current; 141 185 while addr < end { 142 186 if addr + 4 <= device.rdram.mem.len() {