···3434- Write unit tests for all public functions containing business logic, data transformations, or state management. They go in a `#[cfg(test)] mod tests` at the bottom of each file. GUI/rendering code does not require unit tests.
3535- Write integration tests using `egui_kittest` for GUI code. Binary crates (like `kammy`) cannot use the `tests/` directory at the crate root because there is no library target to import. Instead, use a `src/tests.rs` module gated behind `#[cfg(test)]`, with submodules in `src/tests/` organized by feature (e.g. `src/tests/undo.rs`). Shared test utilities go in `src/tests.rs`. Library crates should use the standard `tests/` directory at the crate root.
3636- Prefer module-level inner doc comments (`//!`) at the top of a file over outer doc comments (`///`) on the `mod` declaration. This keeps the documentation next to the code it describes.
3737-- Avoid just `#[expect]` or `#[allow]`ing lines. The checks are there for a reason. For example, `as` should usually be `.into()`.
3737+- Avoid just `#[expect]` or `#[allow]`ing lines. The checks are there for a reason. For example, `as` should usually be `.into()`, or `.try_into()?`.
38383939## Error handling
4040
···991010use crate::Project;
1111use crate::dock::{Dock, DockPosition};
1212-use crate::editor::display_list::DisplayListEditor;
1312use crate::editor::map::MapEditor;
1413use crate::editor::todo::TodoEditor;
1514use crate::editor::{Editor, EditorId, Inspect, TileBehavior, UndoBehavior};
···1817use crate::tool::assets::AssetsTool;
1918use crate::tool::hierarchy::HierarchyTool;
2019use crate::tool::inspector::InspectorTool;
2020+2121+/// Callback for initialising CRDT data when a new editor is added.
2222+type SetupCrdt = dyn Fn(EditorId, &Project);
21232224/// The main application, managing a tabbed editor tree with per-tab undo
2325/// and collapsible tool docks.
···135137 fn add_editor(
136138 &mut self,
137139 make_editor: impl FnOnce(EditorId) -> Box<dyn Editor>,
138138- setup_crdt: Option<&dyn Fn(EditorId, &Project)>,
140140+ setup_crdt: Option<&SetupCrdt>,
139141 ) {
140142 let editor_id = self.alloc_editor_id();
141143···186188 project.doc().commit();
187189 }),
188190 );
189189- }
190190-191191- fn add_display_list_editor(&mut self) {
192192- self.add_editor(|id| Box::new(DisplayListEditor::new(id)), None);
193191 }
194192195193 fn add_map_editor(&mut self) {
···345343 if ui.button("+ Todo").clicked() {
346344 self.add_todo_editor();
347345 }
348348- if ui.button("+ Display List").clicked() {
349349- self.add_display_list_editor();
350350- }
351346 if ui.button("+ Map").clicked() {
352347 self.add_map_editor();
353348 }
···394389 // Destructure for disjoint borrows
395390 let Self {
396391 project,
397397- active_editor_id,
398392 inspect,
399393 left_dock,
400394 right_dock,
···402396 ..
403397 } = self;
404398405405- let mut tool_ctx = ToolContext {
406406- project,
407407- active_editor_id: *active_editor_id,
408408- inspect,
409409- };
399399+ let mut tool_ctx = ToolContext { inspect };
410400411401 bottom_dock.show(ctx, &mut tool_ctx);
412402 left_dock.show(ctx, &mut tool_ctx);
+2-12
crates/kammy/src/dock.rs
···5252 }
5353 }
54545555- /// Whether the dock is currently open (has an active tool).
5656- pub fn is_open(&self) -> bool {
5757- self.active.is_some()
5858- }
5959-6055 /// Toggles a tool by index. If the tool is already active, collapses the
6156 /// dock. Otherwise, activates the tool.
6257 pub fn toggle_tool(&mut self, idx: usize) {
···129124#[cfg(test)]
130125mod tests {
131126 use super::*;
132132-133133- use egui;
134127135128 #[derive(Debug)]
136129 struct DummyTool {
···169162 #[test]
170163 fn starts_collapsed() {
171164 let dock = make_dock(None);
172172- assert!(!dock.is_open());
165165+ assert_eq!(dock.active, None);
173166 }
174167175168 #[test]
176169 fn starts_open() {
177170 let dock = make_dock(Some(0));
178178- assert!(dock.is_open());
171171+ assert_eq!(dock.active, Some(0));
179172 }
180173181174 #[test]
···184177185178 dock.toggle_tool(0);
186179 assert_eq!(dock.active, Some(0));
187187- assert!(dock.is_open());
188180189181 // Toggle same tool collapses
190182 dock.toggle_tool(0);
191183 assert_eq!(dock.active, None);
192192- assert!(!dock.is_open());
193184 }
194185195186 #[test]
···198189199190 dock.toggle_tool(1);
200191 assert_eq!(dock.active, Some(1));
201201- assert!(dock.is_open());
202192 }
203193}
-1
crates/kammy/src/editor.rs
···4455//! Editor trait, built-in editor implementations, and tile-tree dispatch.
6677-pub mod display_list;
87pub mod map;
98pub mod todo;
109
-121
crates/kammy/src/editor/display_list.rs
···11-// SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com>
22-//
33-// SPDX-License-Identifier: AGPL-3.0-or-later
44-55-//! Display list editor: renders N64 display lists via parallel-rdp.
66-//!
77-//! Currently renders a solid-color test pattern to verify the full pipeline:
88-//! RDRAM write -> RDP command submit -> scanout -> wgpu texture -> egui display.
99-1010-use super::{Editor, EditorContext, EditorId};
1111-use crate::widget::rdp_viewport::{DisplayList, RdpViewport, ViConfig};
1212-1313-/// An editor that renders N64 display lists via the RDP.
1414-pub struct DisplayListEditor {
1515- id: EditorId,
1616- viewport: RdpViewport,
1717- frame_count: u32,
1818-}
1919-2020-impl std::fmt::Debug for DisplayListEditor {
2121- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2222- f.debug_struct("DisplayListEditor")
2323- .field("id", &self.id)
2424- .field("frame_count", &self.frame_count)
2525- .finish_non_exhaustive()
2626- }
2727-}
2828-2929-impl DisplayListEditor {
3030- /// Creates a new display list editor with the given stable ID.
3131- pub fn new(id: EditorId) -> Self {
3232- Self {
3333- id,
3434- viewport: RdpViewport::new(4 * 1024 * 1024),
3535- frame_count: 0,
3636- }
3737- }
3838-}
3939-4040-/// Build an RDP display list that fills the framebuffer with a solid color.
4141-///
4242-/// The color cycles through red, green, blue based on the frame counter,
4343-/// producing a simple animated test pattern.
4444-fn build_fill_rect_display_list(frame: u32) -> DisplayList {
4545- // Framebuffer: 320x240, 16-bit (5/5/5/1)
4646- const FB_WIDTH: u32 = 320;
4747- const FB_HEIGHT: u32 = 240;
4848- const FB_ORIGIN: u32 = 0x100; // Non-zero: parallel-rdp treats origin 0 as blank
4949-5050- let phase = frame / 60 % 3;
5151- let fill_color: u32 = match phase {
5252- 0 => 0xF801_F801, // Red (16-bit 5551: R=31, G=0, B=0, A=1), packed twice
5353- 1 => 0x07C1_07C1, // Green
5454- _ => 0x003F_003F, // Blue
5555- };
5656-5757- // RDP commands (each command is 64 bits = 2 words)
5858- let commands: Vec<u32> = vec![
5959- // Set Color Image: format=RGBA, size=16-bit, width=320, address=0
6060- // Command byte: 0x3F (Set Color Image)
6161- // Bits: [63:56]=0x3F, [55:53]=format(0=RGBA), [52:51]=size(1=16-bit),
6262- // [41:32]=width-1, [25:0]=address
6363- 0x3F10_0000 | ((FB_WIDTH - 1) & 0x3FF),
6464- FB_ORIGIN,
6565- // Set Scissor: XH=0, YH=0, XL=320<<2, YL=240<<2
6666- // Command byte: 0x2D
6767- 0x2D00_0000,
6868- ((FB_WIDTH << 2) << 12) | (FB_HEIGHT << 2),
6969- // Set Other Modes: cycle_type=Fill
7070- // Command byte: 0x2F, bit 55-52 = cycle type (3=Fill)
7171- 0x2F30_0000,
7272- 0x0000_0000,
7373- // Set Fill Color
7474- // Command byte: 0x37
7575- 0x3700_0000,
7676- fill_color,
7777- // Fill Rectangle: covers entire framebuffer
7878- // Command byte: 0x36
7979- // Bits: XL=320<<2, YL=240<<2 (word 0), XH=0, YH=0 (word 1)
8080- 0x3600_0000 | ((FB_WIDTH << 2) << 12) | (FB_HEIGHT << 2),
8181- 0x0000_0000,
8282- // Sync Full: wait for all rendering to complete
8383- // Command byte: 0x29
8484- 0x2900_0000,
8585- 0x0000_0000,
8686- ];
8787-8888- let vi = ViConfig {
8989- // Control: 16-bit color (bits 1:0 = 2), anti-alias + resample (bits 9:8 = 3)
9090- control: 0x0000_0302,
9191- origin: FB_ORIGIN,
9292- width: FB_WIDTH,
9393- v_sync: 525, // NTSC: 525 lines
9494- h_start: (0x006C << 16) | 0x02EC, // Typical NTSC H range
9595- v_start: (0x0025 << 16) | 0x01FF, // Typical NTSC V range
9696- x_scale: (FB_WIDTH * 1024 / 640), // Scale to fill 640 output
9797- y_scale: (FB_HEIGHT * 1024 / 480), // Scale to fill 480 output
9898- };
9999-100100- DisplayList { commands, vi }
101101-}
102102-103103-impl Editor for DisplayListEditor {
104104- fn id(&self) -> EditorId {
105105- self.id
106106- }
107107-108108- fn title(&self) -> String {
109109- "Display List".to_owned()
110110- }
111111-112112- fn ui(&mut self, ui: &mut egui::Ui, ctx: &mut EditorContext) {
113113- let display_list = build_fill_rect_display_list(self.frame_count);
114114- self.frame_count = self.frame_count.wrapping_add(1);
115115-116116- self.viewport
117117- .ui(ui, ctx.gpu.as_deref_mut(), &display_list, 4.0 / 3.0, |_| {});
118118-119119- ui.ctx().request_repaint();
120120- }
121121-}
+21-23
crates/kammy/src/editor/map.rs
···15151616use super::{Editor, EditorContext, EditorId};
1717use crate::Project;
1818-use crate::widget::rdp_viewport::{DisplayList, RdpViewport};
1818+use crate::widget::rdp_viewport::RdpViewport;
19192020const FB_WIDTH: u32 = 320;
2121const FB_HEIGHT: u32 = 240;
···2727 id: EditorId,
2828 viewport: RdpViewport,
2929 camera: camera::OrbitCamera,
3030- /// Persistent RSP renderer — avoids 4 MB RDRAM allocation per frame.
3131- rsp_renderer: pm64::render::Renderer,
3230}
33313432impl std::fmt::Debug for MapEditor {
···4644 id,
4745 viewport: RdpViewport::new(4 * 1024 * 1024),
4846 camera: camera::OrbitCamera::default(),
4949- rsp_renderer: pm64::render::Renderer::new(),
5047 }
5148 }
5249}
···6158 }
62596360 fn ui(&mut self, ui: &mut egui::Ui, ctx: &mut EditorContext) {
6161+ // Handle camera input first so this frame's drag is reflected immediately.
6262+ let interact_rect = ui.available_rect_before_wrap();
6363+ let interact_response = ui.interact(
6464+ interact_rect,
6565+ ui.id().with("camera"),
6666+ egui::Sense::click_and_drag(),
6767+ );
6868+ self.camera.handle_input(&interact_response);
6969+6470 let nodes = extract_nodes(ctx.project);
6565- let aspect = FB_WIDTH as f32 / FB_HEIGHT as f32;
7171+ #[expect(
7272+ clippy::cast_possible_truncation,
7373+ clippy::as_conversions,
7474+ reason = "320/240 is well within f32 range"
7575+ )]
7676+ let aspect = (f64::from(FB_WIDTH) / f64::from(FB_HEIGHT)) as f32;
6677 let camera_matrices = self.camera.to_n64_matrices(aspect);
6767-6868- let rdp_commands = self.rsp_renderer.render(&nodes, &camera_matrices);
7878+ let vi = vi_config_ntsc(FB_ORIGIN);
69797070- let display_list = DisplayList {
7171- commands: rdp_commands,
7272- vi: vi_config_ntsc(FB_ORIGIN),
7373- };
7474-7575- let response = self.viewport.ui(
8080+ self.viewport.ui(
7681 ui,
7782 ctx.gpu.as_deref_mut(),
7878- &display_list,
8383+ &vi,
7984 aspect,
8080- |_rdram| {},
8585+ &nodes,
8686+ &camera_matrices,
8187 );
8282-8383- // Layer a drag/scroll sensor over the viewport for camera control
8484- let response = ui.interact(
8585- response.rect,
8686- response.id.with("camera"),
8787- egui::Sense::click_and_drag(),
8888- );
8989- self.camera.handle_input(&response);
90889189 ui.ctx().request_repaint();
9290 }
+14-10
crates/kammy/src/gpu.rs
···2222use winit::window::Window;
23232424/// GPU state shared across the application.
2525+///
2626+/// Field order matters for drop: wgpu resources must be released before
2727+/// `rdp_context` destroys the underlying VkDevice/VkInstance.
2528pub struct GpuState {
2626- /// The parallel-rdp Vulkan context (owns the `VkInstance` + `VkDevice`).
2727- pub rdp_context: parallel_rdp::VulkanContext,
2828- /// wgpu device wrapping Granite's `VkDevice`.
2929- pub device: wgpu::Device,
3030- /// wgpu queue wrapping Granite's graphics queue.
3131- pub queue: wgpu::Queue,
3232- /// Window surface for presentation.
3333- surface: wgpu::Surface<'static>,
3434- /// Current surface configuration.
3535- surface_config: wgpu::SurfaceConfiguration,
3629 /// egui renderer (draws egui primitives via wgpu).
3730 pub renderer: egui_wgpu::Renderer,
3131+ /// Current surface configuration.
3232+ surface_config: wgpu::SurfaceConfiguration,
3333+ /// Window surface for presentation.
3434+ surface: wgpu::Surface<'static>,
3535+ /// wgpu queue wrapping Granite's graphics queue.
3636+ pub queue: wgpu::Queue,
3737+ /// wgpu device wrapping Granite's `VkDevice`.
3838+ pub device: wgpu::Device,
3839 /// Prevents the wgpu Instance from being dropped prematurely.
3940 _instance: wgpu::Instance,
4141+ /// The parallel-rdp Vulkan context (owns the `VkInstance` + `VkDevice`).
4242+ /// Must be last: Granite owns the Vulkan handles that everything above wraps.
4343+ pub rdp_context: parallel_rdp::VulkanContext,
4044}
41454246impl std::fmt::Debug for GpuState {
+12-8
crates/kammy/src/main.rs
···5555}
56565757/// Runtime state created after the window is available.
5858+///
5959+/// Field order matters: Rust drops fields in declaration order, and the
6060+/// app's editors hold parallel-rdp `Renderer`s whose destructors need
6161+/// the Vulkan device that lives inside `gpu`. So `app` must drop first.
5862struct AppState {
5959- window: Arc<Window>,
6060- gpu: gpu::GpuState,
6161- egui_ctx: egui::Context,
6363+ app: app::KammyApp,
6264 egui_state: egui_winit::State,
6363- app: app::KammyApp,
6565+ egui_ctx: egui::Context,
6666+ gpu: gpu::GpuState,
6767+ window: Arc<Window>,
6468}
65696670impl ApplicationHandler for WinitApp {
···106110 let app = app::KammyApp::new();
107111108112 self.state = Some(AppState {
109109- window,
110110- gpu,
111111- egui_ctx,
112112- egui_state,
113113 app,
114114+ egui_state,
115115+ egui_ctx,
116116+ gpu,
117117+ window,
114118 });
115119 }
116120
+1-6
crates/kammy/src/tool.rs
···1212pub mod hierarchy;
1313pub mod inspector;
14141515-use crate::Project;
1616-use crate::editor::{EditorId, Inspect};
1515+use crate::editor::Inspect;
17161817/// Context passed to each tool during rendering.
1918pub struct ToolContext<'a> {
2020- /// The shared project data (CRDT document).
2121- pub project: &'a Project,
2222- /// The currently focused editor, if any.
2323- pub active_editor_id: Option<EditorId>,
2419 /// The current inspect object set by editors. Tools like the Inspector
2520 /// read this to display property UI.
2621 pub inspect: &'a mut Option<Box<dyn Inspect>>,
+137-62
crates/kammy/src/widget/rdp_viewport.rs
···6677//! An egui widget that renders N64 display lists using parallel-rdp.
8899+use pm64::gbi::{CameraMatrices, NodeData};
1010+use pm64::render::ParallelRdpSink;
1111+912use crate::gpu::GpuState;
10131114/// N64 Video Interface register configuration for scanout.
···2932 pub y_scale: u32,
3033}
31343232-/// A display list to be rendered by the RDP.
3333-#[derive(Debug, Clone)]
3434-pub struct DisplayList {
3535- /// RDP command words (big-endian 32-bit).
3636- pub commands: Vec<u32>,
3737- /// Video Interface configuration for scanout.
3838- pub vi: ViConfig,
3939-}
4040-4135/// Reusable egui widget that renders N64 display lists via parallel-rdp.
4236///
4337/// Each instance owns its own [`parallel_rdp::Renderer`] (command processor +
4438/// RDRAM). The widget submits display list commands, performs scanout, and
4539/// displays the result as an egui image.
4640///
4747-/// The renderer is created lazily on the first [`show`](Self::show) call that
4141+/// GPU work is pipelined: each frame submits commands and signals the GPU
4242+/// timeline (non-blocking), then waits for the *previous* frame's signal
4343+/// at the start of the next frame. This overlaps GPU rendering with the
4444+/// CPU's egui layout pass, eliminating the blocking `flush()` stall.
4545+///
4646+/// The renderer is created lazily on the first [`ui`](Self::ui) call that
4847/// receives a GPU context.
4948pub struct RdpViewport {
5049 renderer: Option<parallel_rdp::Renderer>,
5150 rdram_size: u32,
5151+ /// Persistent RSP renderer — avoids 4 MB RDRAM allocation per frame.
5252+ rsp_renderer: pm64::render::Renderer,
5253 /// Registered egui texture ID (reused across frames).
5354 texture_id: Option<egui::TextureId>,
5455 /// The current frame's scanout texture wrapper. Kept alive so egui can
5555- /// reference it during the render pass (which runs after `show()`).
5656+ /// reference it during the render pass (which runs after `ui()`).
5657 current_texture: Option<wgpu::Texture>,
5858+ /// Pending GPU timeline value from the previous frame's scanout.
5959+ pending_timeline: Option<u64>,
6060+ /// Scanout result waiting for the timeline to complete.
6161+ pending_scanout: Option<PendingScanout>,
6262+}
6363+6464+/// A scanout result waiting to be imported into wgpu once the GPU finishes.
6565+struct PendingScanout {
6666+ vk_image: ash::vk::Image,
6767+ width: u32,
6868+ height: u32,
5769}
58705971impl std::fmt::Debug for RdpViewport {
···6476 }
6577}
66787979+impl Drop for RdpViewport {
8080+ fn drop(&mut self) {
8181+ // Wait for any in-flight GPU work before destroying the renderer.
8282+ if let (Some(timeline), Some(renderer)) =
8383+ (self.pending_timeline.take(), self.renderer.as_mut())
8484+ {
8585+ renderer.wait_for_timeline(timeline);
8686+ }
8787+ // Release the wgpu texture wrapping a VkImage owned by the renderer
8888+ // before the renderer (and its CommandProcessor) are dropped.
8989+ self.current_texture = None;
9090+ }
9191+}
9292+6793impl RdpViewport {
6894 /// Creates a new viewport.
6995 ///
7096 /// `rdram_size` is the RDRAM capacity in bytes (typically 4 MiB). The
7171- /// underlying renderer is created lazily when [`show`](Self::show) is
9797+ /// underlying renderer is created lazily when [`ui`](Self::ui) is
7298 /// first called with a GPU context.
7399 pub fn new(rdram_size: u32) -> Self {
74100 Self {
75101 renderer: None,
76102 rdram_size,
103103+ rsp_renderer: pm64::render::Renderer::new(),
77104 texture_id: None,
78105 current_texture: None,
106106+ pending_timeline: None,
107107+ pending_scanout: None,
79108 }
80109 }
811108282- /// Renders the display list and shows the result in the UI.
111111+ /// Renders an N64 frame and shows the result in the UI.
83112 ///
84113 /// `display_aspect` is the intended display aspect ratio (width/height).
85114 /// The scanout texture is stretched to fill the available UI space at
86115 /// this ratio — necessary because non-interlaced VI modes produce
87116 /// half-height scanouts that don't reflect the true display shape.
88117 ///
8989- /// The closure receives the renderer's RDRAM for direct writes (textures,
9090- /// framebuffer data, etc.) before commands are submitted.
118118+ /// Has 1 frame of latency.
91119 ///
92120 /// If `gpu` is `None` (headless/test), displays a placeholder label.
93121 pub fn ui(
94122 &mut self,
95123 ui: &mut egui::Ui,
96124 gpu: Option<&mut GpuState>,
9797- display_list: &DisplayList,
125125+ vi: &ViConfig,
98126 display_aspect: f32,
9999- write_rdram: impl FnOnce(&mut [u8]),
127127+ nodes: &[NodeData],
128128+ camera: &CameraMatrices,
100129 ) -> egui::Response {
101130 let Some(gpu) = gpu else {
102131 return ui.label("GPU not available");
103132 };
104133105105- let renderer = match &mut self.renderer {
106106- Some(r) => r,
107107- None => match parallel_rdp::Renderer::new(&gpu.rdp_context, self.rdram_size, 0) {
108108- Ok(r) => self.renderer.insert(r),
134134+ // Lazily create the renderer on first use.
135135+ if self.renderer.is_none() {
136136+ match parallel_rdp::Renderer::new(&gpu.rdp_context, self.rdram_size, 0) {
137137+ Ok(r) => {
138138+ self.renderer = Some(r);
139139+ }
109140 Err(e) => {
110141 tracing::warn!("failed to create RDP renderer: {e:?}");
111142 return ui.label("RDP renderer unavailable");
112143 }
113113- },
144144+ }
145145+ }
146146+147147+ // Wait for the previous frame's GPU work and import its scanout.
148148+ // This wait should be near-instant because the GPU has had a full
149149+ // egui frame (~16ms) to finish since we signalled.
150150+ //
151151+ // Scoped separately from the command submission below so
152152+ // `update_egui_texture` can borrow `&mut self`.
153153+ if let Some(timeline) = self.pending_timeline.take() {
154154+ if let Some(renderer) = &mut self.renderer {
155155+ renderer.wait_for_timeline(timeline);
156156+ }
157157+158158+ if let Some(scanout) = self.pending_scanout.take() {
159159+ // SAFETY: wait_for_timeline ensures the GPU is done, and the
160160+ // VkImage is still valid (no new scanout has been called yet).
161161+ if let Some(texture) = unsafe {
162162+ import_scanout_image(
163163+ &gpu.device,
164164+ scanout.vk_image,
165165+ scanout.width,
166166+ scanout.height,
167167+ )
168168+ } {
169169+ let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
170170+ self.update_egui_texture(gpu, &view);
171171+ self.current_texture = Some(texture);
172172+ } else {
173173+ tracing::warn!("failed to import scanout VkImage into wgpu");
174174+ }
175175+ }
176176+ }
177177+178178+ // Submit new work for this frame.
179179+ let Some(renderer) = &mut self.renderer else {
180180+ return ui.label("RDP renderer unavailable");
114181 };
115115-116116- write_rdram(renderer.rdram_mut());
117182 renderer.begin_frame();
118118- Self::set_vi_registers(renderer, &display_list.vi);
119119- renderer.enqueue_commands(&display_list.commands);
183183+ Self::set_vi_registers(renderer, vi);
184184+ self.rsp_renderer
185185+ .render_to(nodes, camera, &mut ParallelRdpSink(renderer));
120186121121- let Some((vk_image, width, height)) = renderer.scanout() else {
122122- return ui.label("No scanout output");
123123- };
124124- if width == 0 || height == 0 {
125125- return ui.label("No scanout output");
187187+ if let Some((vk_image, width, height)) = renderer.scanout()
188188+ && width > 0
189189+ && height > 0
190190+ {
191191+ if self.current_texture.is_none() {
192192+ // First frame: no previous texture to display yet, so
193193+ // do a blocking flush to bootstrap.
194194+ renderer.flush();
195195+ if let Some(texture) =
196196+ unsafe { import_scanout_image(&gpu.device, vk_image, width, height) }
197197+ {
198198+ let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
199199+ self.update_egui_texture(gpu, &view);
200200+ self.current_texture = Some(texture);
201201+ }
202202+ } else {
203203+ // Pipeline: signal non-blocking, import on next frame
204204+ self.pending_timeline = Some(renderer.signal_timeline());
205205+ self.pending_scanout = Some(PendingScanout {
206206+ vk_image,
207207+ width,
208208+ height,
209209+ });
210210+ }
126211 }
127212128128- // Ensure all GPU scanout work is complete before wgpu reads the image
129129- renderer.flush();
213213+ // Display the texture (either from previous frame's import or
214214+ // from the bootstrap flush above)
215215+ let available = ui.available_size();
216216+ let size = if available.x / available.y.max(1.0) > display_aspect {
217217+ egui::vec2(available.y * display_aspect, available.y)
218218+ } else {
219219+ egui::vec2(available.x, available.x / display_aspect)
220220+ };
130221131131- // SAFETY: flush() was called above, and the VkImage from scanout()
132132- // remains valid until the wgpu::Texture is dropped (next frame at earliest).
133133- let Some(texture) = (unsafe { import_scanout_image(&gpu.device, vk_image, width, height) })
134134- else {
135135- tracing::warn!("failed to import scanout VkImage into wgpu");
136136- return ui.label("Scanout import failed");
137137- };
138138- let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
222222+ if let Some(texture_id) = self.texture_id {
223223+ ui.image(egui::load::SizedTexture::new(texture_id, size))
224224+ } else {
225225+ ui.label("Loading…")
226226+ }
227227+ }
139228140140- // Register or update the egui texture binding
229229+ /// Registers or updates the egui texture binding.
230230+ fn update_egui_texture(&mut self, gpu: &mut GpuState, view: &wgpu::TextureView) {
141231 if let Some(id) = self.texture_id {
142232 gpu.renderer.update_egui_texture_from_wgpu_texture(
143233 &gpu.device,
144144- &view,
234234+ view,
145235 wgpu::FilterMode::Nearest,
146236 id,
147237 );
148238 } else {
149239 let id =
150240 gpu.renderer
151151- .register_native_texture(&gpu.device, &view, wgpu::FilterMode::Nearest);
241241+ .register_native_texture(&gpu.device, view, wgpu::FilterMode::Nearest);
152242 self.texture_id = Some(id);
153243 }
154154-155155- // Keep texture alive until the render pass uses it
156156- self.current_texture = Some(texture);
157157-158158- // Scale image to fill available UI space at the caller's display aspect ratio
159159- let available = ui.available_size();
160160- let size = if available.x / available.y.max(1.0) > display_aspect {
161161- egui::vec2(available.y * display_aspect, available.y)
162162- } else {
163163- egui::vec2(available.x, available.x / display_aspect)
164164- };
165165-166166- let Some(texture_id) = self.texture_id else {
167167- return ui.label("Texture not ready");
168168- };
169169- ui.image(egui::load::SizedTexture::new(texture_id, size))
170244 }
171245172246 fn set_vi_registers(renderer: &mut parallel_rdp::Renderer, vi: &ViConfig) {
···186260///
187261/// # Safety
188262///
189189-/// The `VkImage` must be valid and fully rendered (call `flush()` first).
190190-/// It must remain valid until the wgpu texture is dropped.
263263+/// The `VkImage` must be valid and fully rendered (call `flush()` or
264264+/// `wait_for_timeline()` first). It must remain valid until the wgpu
265265+/// texture is dropped.
191266unsafe fn import_scanout_image(
192267 device: &wgpu::Device,
193268 vk_image: ash::vk::Image,
+18-1
crates/parallel_rdp/src/bridge.cpp
···157157158158void rdp_renderer_destroy(void *renderer)
159159{
160160- delete static_cast<RdpRenderer *>(renderer);
160160+ auto *r = static_cast<RdpRenderer *>(renderer);
161161+ // Ensure all GPU work completes before destroying the CommandProcessor,
162162+ // otherwise its destructor may race with in-flight commands.
163163+ uint64_t timeline = r->processor->signal_timeline();
164164+ r->processor->wait_for_timeline(timeline);
165165+ delete r;
161166}
162167163168uint8_t *rdp_renderer_get_rdram(void *renderer)
···283288 uint64_t timeline = r->processor->signal_timeline();
284289 r->processor->wait_for_timeline(timeline);
285290}
291291+292292+uint64_t rdp_renderer_signal_timeline(void *renderer)
293293+{
294294+ auto *r = static_cast<RdpRenderer *>(renderer);
295295+ return r->processor->signal_timeline();
296296+}
297297+298298+void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value)
299299+{
300300+ auto *r = static_cast<RdpRenderer *>(renderer);
301301+ r->processor->wait_for_timeline(value);
302302+}
+9
crates/parallel_rdp/src/bridge.hpp
···120120/// Signal the renderer's timeline and wait for all previous work to complete.
121121void rdp_renderer_flush(void *renderer);
122122123123+/// Signal the renderer's timeline and return the timeline value (non-blocking).
124124+///
125125+/// Call `rdp_renderer_wait_for_timeline` with the returned value to wait
126126+/// for all work submitted before this signal to complete.
127127+uint64_t rdp_renderer_signal_timeline(void *renderer);
128128+129129+/// Wait for the renderer's timeline to reach `value`.
130130+void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value);
131131+123132#ifdef __cplusplus
124133}
125134#endif
+26
crates/parallel_rdp/src/lib.rs
···271271 ///
272272 /// Write display list data, textures, and framebuffer contents here before
273273 /// calling [`enqueue_commands`](Self::enqueue_commands) and [`scanout`](Self::scanout).
274274+ ///
275275+ /// # Panics
276276+ ///
277277+ /// Panics if the RDRAM size (a `u32`) does not fit in a `usize`. This can
278278+ /// only happen on 16-bit platforms, which cannot run Vulkan.
279279+ #[expect(
280280+ clippy::expect_used,
281281+ reason = "RDRAM size is u32, which fits in usize on all Vulkan-capable platforms"
282282+ )]
274283 pub fn rdram_mut(&mut self) -> &mut [u8] {
275284 unsafe {
276285 let ptr = ffi::rdp_renderer_get_rdram(self.ptr);
···362371 pub fn flush(&mut self) {
363372 unsafe {
364373 ffi::rdp_renderer_flush(self.ptr);
374374+ }
375375+ }
376376+377377+ /// Signals the GPU timeline and returns a token (non-blocking).
378378+ ///
379379+ /// Call [`wait_for_timeline`](Self::wait_for_timeline) with the returned
380380+ /// value to wait for all work submitted before this signal.
381381+ pub fn signal_timeline(&mut self) -> u64 {
382382+ unsafe { ffi::rdp_renderer_signal_timeline(self.ptr) }
383383+ }
384384+385385+ /// Waits for the GPU timeline to reach the given value.
386386+ ///
387387+ /// If the GPU has already passed this point, returns immediately.
388388+ pub fn wait_for_timeline(&mut self, value: u64) {
389389+ unsafe {
390390+ ffi::rdp_renderer_wait_for_timeline(self.ptr, value);
365391 }
366392 }
367393}
···2020}
21212222/// A triangle referencing three vertices.
2323-#[derive(Clone, Debug)]
2323+#[derive(Clone, Debug, PartialEq)]
2424pub struct TriangleData {
2525 pub v0: VertexData,
2626 pub v1: VertexData,
···2828}
29293030/// A model node's geometry in plain (non-CRDT) form.
3131-#[derive(Clone, Debug)]
3131+#[derive(Clone, Debug, PartialEq)]
3232pub struct NodeData {
3333 /// Triangles belonging to this node.
3434 pub triangles: Vec<TriangleData>,
3535}
36363737/// N64 camera matrices in s15.16 fixed-point format (64 bytes each).
3838-#[derive(Clone, Debug)]
3838+#[derive(Clone, Debug, PartialEq, Eq)]
3939pub struct CameraMatrices {
4040 /// Projection matrix (64 bytes, s15.16 fixed-point).
4141 pub projection: [u8; 64],
···156156/// - `proj_addr`: RDRAM address of the projection matrix.
157157/// - `mv_addr`: RDRAM address of the modelview matrix.
158158/// - `viewport_addr`: RDRAM address where the viewport struct will be placed.
159159-#[expect(
160160- clippy::many_single_char_names,
161161- reason = "a/b/c vertex indices are standard triangle nomenclature"
162162-)]
163159pub fn reconstruct(
164160 nodes: &[NodeData],
165161 _camera: &CameraMatrices,
+129-12
crates/pm64/src/render.rs
···991010use crate::gbi::{self, CameraMatrices, GbiOutput, NodeData};
11111212+/// Bridges [`rsp::RdpSink`] to [`parallel_rdp::Renderer::enqueue_commands`].
1313+#[derive(Debug)]
1414+pub struct ParallelRdpSink<'a>(pub &'a mut parallel_rdp::Renderer);
1515+1616+impl rsp::RdpSink for ParallelRdpSink<'_> {
1717+ fn receive_commands(&mut self, commands: &[u32]) {
1818+ self.0.enqueue_commands(commands);
1919+ }
2020+}
2121+1222// Microcode binaries from the Paper Mario 64 decomp
1323const F3DEX2_TEXT: &[u8] = include_bytes!(concat!(
1424 env!("PM64_ASSETS_DIR"),
···115125/// on every call.
116126pub fn render(nodes: &[NodeData], camera: &CameraMatrices) -> Vec<u32> {
117127 let mut renderer = Renderer::new();
118118- renderer.render(nodes, camera)
128128+ renderer.render(nodes, camera).to_vec()
119129}
120130121131/// Persistent RSP render context that reuses its device across frames.
···123133/// Avoids the 4 MB RDRAM allocation that [`render`] incurs on every call.
124134/// Microcode is loaded once at construction; subsequent [`render`](Self::render)
125135/// calls only write the per-frame data (display list, vertices, matrices)
126126-/// and reset the RSP/RDP state.
136136+/// and reset the RSP/RDP control state.
127137pub struct Renderer {
128138 device: rsp::Device,
139139+ /// `true` after the first frame (IMEM holds F3DEX2, not rspboot).
140140+ warm: bool,
129141}
130142131143impl std::fmt::Debug for Renderer {
132144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
133133- f.debug_struct("Renderer").finish_non_exhaustive()
145145+ f.debug_struct("Renderer")
146146+ .field("warm", &self.warm)
147147+ .finish_non_exhaustive()
148148+ }
149149+}
150150+151151+impl Default for Renderer {
152152+ fn default() -> Self {
153153+ Self::new()
134154 }
135155}
136156···141161 let rdram = device.rdram_mut();
142162 write_be_bytes_to_rdram(rdram, F3DEX2_TEXT_ADDR, F3DEX2_TEXT);
143163 write_be_bytes_to_rdram(rdram, F3DEX2_DATA_ADDR, F3DEX2_DATA);
144144- Self { device }
164164+ Self {
165165+ device,
166166+ warm: false,
167167+ }
145168 }
146169147170 /// Renders model geometry through the RSP, producing RDP command words.
148148- pub fn render(&mut self, nodes: &[NodeData], camera: &CameraMatrices) -> Vec<u32> {
171171+ ///
172172+ /// The returned slice borrows from the internal RSP device and is valid
173173+ /// until the next call to `render`.
174174+ pub fn render(&mut self, nodes: &[NodeData], camera: &CameraMatrices) -> &[u32] {
149175 let gbi_output = gbi::reconstruct(
150176 nodes,
151177 camera,
···158184 self.render_gbi(&gbi_output, camera)
159185 }
160186187187+ /// Renders model geometry.
188188+ pub fn render_to(
189189+ &mut self,
190190+ nodes: &[NodeData],
191191+ camera: &CameraMatrices,
192192+ sink: &mut dyn rsp::RdpSink,
193193+ ) {
194194+ let gbi_output = gbi::reconstruct(
195195+ nodes,
196196+ camera,
197197+ addr_u32(FB_ADDR),
198198+ addr_u32(VERTEX_ADDR),
199199+ addr_u32(PROJ_MTX_ADDR),
200200+ addr_u32(MV_MTX_ADDR),
201201+ addr_u32(VIEWPORT_ADDR),
202202+ );
203203+ self.prepare_frame(&gbi_output, camera);
204204+ self.device.run_with_sink(sink);
205205+ }
206206+161207 /// Renders a pre-built GBI display list through the RSP.
162162- fn render_gbi(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) -> Vec<u32> {
163163- self.device.reset();
208208+ fn render_gbi(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) -> &[u32] {
209209+ self.prepare_frame(gbi_output, camera);
210210+ self.device.run()
211211+ }
212212+213213+ /// Resets the RSP/RDP, writes all per-frame data (display list, vertices,
214214+ /// matrices, `OSTask`) to RDRAM/DMEM, and prepares the RSP for execution.
215215+ ///
216216+ /// On the first frame, a full [`reset`](rsp::Device::reset) is used. On
217217+ /// subsequent frames, [`rearm`](rsp::Device::rearm) resets only the
218218+ /// control registers while preserving lookup tables and dispatch tables.
219219+ /// Rspboot always runs to properly initialise F3DEX2.
220220+ fn prepare_frame(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) {
221221+ if self.warm {
222222+ self.device.rearm();
223223+ } else {
224224+ self.device.reset();
225225+ }
164226165227 let rdram = self.device.rdram_mut();
166228···180242 // Write viewport data (big-endian i16 values → native-endian RDRAM)
181243 write_be_bytes_to_rdram(rdram, VIEWPORT_ADDR, &gbi_output.viewport_data);
182244183183- // Write rspboot to IMEM (big-endian, RSP reads directly)
245245+ // Load rspboot into IMEM. It will DMA F3DEX2 text into IMEM and
246246+ // data into DMEM, then jump to the microcode entry point.
184247 self.device.imem_mut()[..RSPBOOT.len()].copy_from_slice(RSPBOOT);
185248186186- // Write OSTask to DMEM (big-endian, RSP reads directly with LW)
249249+ // Write OSTask to DMEM (big-endian, RSP reads directly with LW).
187250 let dmem = self.device.dmem_mut();
188251 let task_base = TASK_DMEM_OFFSET;
189252···221284 write_be_u32(dmem, task_base + TASK_DATA_SIZE, dl_size);
222285 write_be_u32(dmem, task_base + TASK_YIELD_DATA_PTR, 0);
223286224224- // Decode IMEM and run RSP
225287 self.device.decode_imem();
226288 self.device.set_pc(0);
227289 self.device.clear_halt();
228228-229229- self.device.run().to_vec()
290290+ self.warm = true;
230291 }
231292}
232293···357418 assert!(
358419 !rdp_commands.is_empty(),
359420 "RSP should produce RDP commands for a single triangle"
421421+ );
422422+ }
423423+424424+ #[test]
425425+ fn render_to_streams_to_sink() {
426426+ struct CountingSink(usize);
427427+ impl rsp::RdpSink for CountingSink {
428428+ fn receive_commands(&mut self, commands: &[u32]) {
429429+ self.0 += commands.len();
430430+ }
431431+ }
432432+433433+ let nodes = vec![NodeData {
434434+ triangles: vec![TriangleData {
435435+ v0: VertexData {
436436+ x: 0,
437437+ y: 0,
438438+ z: 0,
439439+ r: 255,
440440+ g: 0,
441441+ b: 0,
442442+ a: 255,
443443+ },
444444+ v1: VertexData {
445445+ x: 100,
446446+ y: 0,
447447+ z: 0,
448448+ r: 255,
449449+ g: 0,
450450+ b: 0,
451451+ a: 255,
452452+ },
453453+ v2: VertexData {
454454+ x: 50,
455455+ y: 100,
456456+ z: 0,
457457+ r: 255,
458458+ g: 0,
459459+ b: 0,
460460+ a: 255,
461461+ },
462462+ }],
463463+ }];
464464+465465+ let camera = CameraMatrices {
466466+ projection: identity_n64_matrix(),
467467+ modelview: identity_n64_matrix(),
468468+ };
469469+470470+ let mut renderer = Renderer::new();
471471+ let mut sink = CountingSink(0);
472472+ renderer.render_to(&nodes, &camera, &mut sink);
473473+474474+ assert!(
475475+ sink.0 > 0,
476476+ "render_to should have streamed RDP commands to the sink"
360477 );
361478 }
362479}
+82-4
crates/rsp/src/lib.rs
···2424pub mod su_instructions;
2525pub mod vu_instructions;
26262727+pub use rdp::RdpSink;
2828+2729/// Branch state enum used by the RSP CPU pipeline.
2830#[derive(PartialEq, Copy, Clone)]
2931pub enum BranchStepState {
···6870 pub byte_swap: usize,
6971 /// Maximum total cycles before `run()` forcibly halts.
7072 pub max_cycles: u64,
7373+ /// Active RDP command sink, set only during [`run_with_sink`](Self::run_with_sink).
7474+ ///
7575+ /// Raw pointer because `run_rdp` is called deep in the RSP execution
7676+ /// stack and all intermediate functions take `&mut Device`.
7777+ pub(crate) sink: Option<*mut dyn rdp::RdpSink>,
7178}
72797380impl Device {
···8390 mi: Mi { regs: [0; 4] },
8491 byte_swap: 0,
8592 max_cycles: DEFAULT_MAX_CYCLES,
9393+ sink: None,
8694 };
8795 rsp_interface::init(&mut device);
8896 rdp::init(&mut device);
···99107 self.rdp = rdp::Rdp::new();
100108 self.mi = Mi { regs: [0; 4] };
101109 self.byte_swap = 0;
110110+ self.sink = None;
102111 rsp_interface::init(self);
103112 rdp::init(self);
104113 }
105114115115+ /// Resets RSP/RDP control state so the device can run again.
116116+ ///
117117+ /// Unlike [`reset`](Self::reset), this preserves RDRAM, RSP memory
118118+ /// (DMEM/IMEM), the decoded instruction cache, lookup tables, and
119119+ /// dispatch tables. Only CPU flags, SP registers, and DPC registers
120120+ /// are cleared.
121121+ pub fn rearm(&mut self) {
122122+ // CPU flags
123123+ self.rsp.cpu.broken = false;
124124+ self.rsp.cpu.halted = false;
125125+ self.rsp.cpu.running = false;
126126+ self.rsp.cpu.sync_point = false;
127127+ self.rsp.cpu.cycle_counter = 0;
128128+ self.rsp.cpu.pipeline_full = false;
129129+ self.rsp.cpu.branch_state = cpu::BranchState {
130130+ state: BranchStepState::Step,
131131+ pc: 0,
132132+ };
133133+ self.rsp.cpu.last_instruction_type = cpu::InstructionType::Su;
134134+ self.rsp.cpu.instruction_type = cpu::InstructionType::Su;
135135+136136+ // SP registers
137137+ self.rsp.regs = [0; rsp_interface::SP_REGS_COUNT as usize];
138138+ self.rsp.regs2 = [0; rsp_interface::SP_REGS2_COUNT as usize];
139139+ self.rsp.fifo = [rsp_interface::RspDma {
140140+ dir: rsp_interface::DmaDir::None,
141141+ length: 0,
142142+ memaddr: 0,
143143+ dramaddr: 0,
144144+ }; 2];
145145+ self.rsp.last_status_value = 0;
146146+ self.rsp.run_after_dma = false;
147147+148148+ // DPC/DPS registers
149149+ self.rdp.regs_dpc = [0; rdp::DPC_REGS_COUNT as usize];
150150+ self.rdp.regs_dps = [0; rdp::DPS_REGS_COUNT as usize];
151151+ self.rdp.wait_frozen = false;
152152+ self.rdp.last_status_value = 0;
153153+ self.rdp.collected_commands.clear();
154154+155155+ self.mi = Mi { regs: [0; 4] };
156156+ self.byte_swap = 0;
157157+ self.sink = None;
158158+159159+ // Match the initial state from reset() without regenerating tables
160160+ self.rsp.regs[rsp_interface::SP_STATUS_REG as usize] = 1; // HALT
161161+ self.rdp.regs_dpc[rdp::DPC_STATUS_REG as usize] |= 1 << 7; // CBUF_READY
162162+ }
163163+106164 /// Runs the RSP until it halts or breaks, then returns the collected RDP
107165 /// command words.
108166 ///
109109- /// The RSP may hit sync points during DMA and DPC operations. This method
110110- /// automatically resumes execution after each sync point, looping until
111111- /// the RSP truly halts or breaks.
167167+ /// Commands are buffered in `rdp.collected_commands`. For streaming
168168+ /// delivery to a GPU backend, use [`run_with_sink`](Self::run_with_sink).
112169 pub fn run(&mut self) -> &[u32] {
113170 self.rdp.collected_commands.clear();
171171+ self.run_inner();
172172+ &self.rdp.collected_commands
173173+ }
174174+175175+ /// Runs the RSP, streaming RDP commands directly to `sink`.
176176+ ///
177177+ /// Unlike [`run`](Self::run), this does not buffer commands in
178178+ /// `collected_commands`. The RDRAM path is zero-copy — the byte
179179+ /// slice is reinterpreted as `&[u32]` and passed straight through.
180180+ pub fn run_with_sink(&mut self, sink: &mut dyn rdp::RdpSink) {
181181+ // SAFETY: The pointer is cleared before this method returns. The
182182+ // transmute erases the borrow lifetime so it can be stored in the
183183+ // struct, but run_inner is synchronous and the sink reference is
184184+ // valid throughout.
185185+ self.sink = unsafe { Some(std::mem::transmute(std::ptr::from_mut(sink))) };
186186+ self.run_inner();
187187+ self.sink = None;
188188+ }
189189+190190+ /// RSP execution loop shared by [`run`](Self::run) and
191191+ /// [`run_with_sink`](Self::run_with_sink).
192192+ fn run_inner(&mut self) {
114193 let mut total_cycles: u64 = 0;
115194 loop {
116195 let batch_cycles = cpu::run(self);
···120199 break;
121200 }
122201 }
123123- &self.rdp.collected_commands
124202 }
125203126204 /// Mutable access to RDRAM for writing data the RSP will read.
+56-12
crates/rsp/src/rdp.rs
···5566//! RDP (Reality Display Processor) register handling and command collection.
77//!
88-//! Instead of sending commands to a GPU backend (as gopher64 does), this
99-//! standalone version collects the RDP command words into a `Vec<u32>` so
1010-//! the caller can pass them to parallel-rdp or another renderer.
88+//! By default, RDP command words are collected into a `Vec<u32>` (see
99+//! [`Device::run`](crate::Device::run)). When a [`RdpSink`] is installed via
1010+//! [`Device::run_with_sink`](crate::Device::run_with_sink), commands are
1111+//! streamed directly to the sink — zero-copy for the RDRAM path.
1212+1313+/// Sink for RDP command words produced during RSP execution.
1414+///
1515+/// Implementations receive batches of commands as the RSP produces them,
1616+/// rather than waiting for a complete `Vec<u32>` at the end. Install a
1717+/// sink via [`Device::run_with_sink`](crate::Device::run_with_sink).
1818+pub trait RdpSink {
1919+ /// Receives a batch of RDP command words in native byte order.
2020+ fn receive_commands(&mut self, commands: &[u32]);
2121+}
11221223pub const DPC_START_REG: u32 = 0;
1324pub const DPC_END_REG: u32 = 1;
···122133 }
123134}
124135125125-/// Collects RDP commands from RDRAM between CURRENT and END registers.
136136+/// Dispatches RDP commands from RDRAM (or DMEM in XBUS mode) to either the
137137+/// installed [`RdpSink`] or the fallback `collected_commands` buffer.
138138+///
139139+/// The RDRAM path is zero-copy when a sink is present: the byte slice is
140140+/// reinterpreted as `&[u32]` in-place. DPC register masking (`& 0xFFFFF8`)
141141+/// guarantees 8-byte alignment for both CURRENT and END.
126142fn run_rdp(device: &mut crate::Device) {
127143 let current = device.rdp.regs_dpc[DPC_CURRENT_REG as usize] as usize;
128144 let end = device.rdp.regs_dpc[DPC_END_REG as usize] as usize;
129145130130- if device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_XBUS_DMEM_DMA != 0 {
131131- // XBUS mode: commands come from DMEM/IMEM instead of RDRAM
132132- let mut addr = current & 0xFFF;
133133- while addr < (end & 0xFFF) {
134134- let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap());
135135- device.rdp.collected_commands.push(word);
136136- addr += 4;
146146+ let is_xbus = device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_XBUS_DMEM_DMA != 0;
147147+148148+ if is_xbus {
149149+ let start = current & 0xFFF;
150150+ let end_addr = end & 0xFFF;
151151+ if let Some(sink_ptr) = device.sink {
152152+ let mut commands = Vec::new();
153153+ let mut addr = start;
154154+ while addr < end_addr {
155155+ let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap());
156156+ commands.push(word);
157157+ addr += 4;
158158+ }
159159+ // SAFETY: sink_ptr is valid for the duration of run_with_sink.
160160+ unsafe { (*sink_ptr).receive_commands(&commands) };
161161+ } else {
162162+ let mut addr = start;
163163+ while addr < end_addr {
164164+ let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap());
165165+ device.rdp.collected_commands.push(word);
166166+ addr += 4;
167167+ }
137168 }
169169+ } else if let Some(sink_ptr) = device.sink {
170170+ let end_clamped = end.min(device.rdram.mem.len());
171171+ let start_clamped = current.min(end_clamped);
172172+ let slice = &device.rdram.mem[start_clamped..end_clamped];
173173+ // SAFETY: DPC registers mask addresses with & 0xFFFFF8 (8-byte aligned).
174174+ // Vec<u8> allocations are at least pointer-aligned. RDRAM stores u32
175175+ // words in native byte order.
176176+ let (prefix, commands, suffix) = unsafe { slice.align_to::<u32>() };
177177+ debug_assert!(
178178+ prefix.is_empty() && suffix.is_empty(),
179179+ "RDRAM slice not u32-aligned: {start_clamped:#x}..{end_clamped:#x}"
180180+ );
181181+ // SAFETY: sink_ptr is valid for the duration of run_with_sink.
182182+ unsafe { (*sink_ptr).receive_commands(commands) };
138183 } else {
139139- // Normal mode: commands come from RDRAM (stored in native byte order)
140184 let mut addr = current;
141185 while addr < end {
142186 if addr + 4 <= device.rdram.mem.len() {