Editor for papermario-dx mods
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

add rsp interpreter from gopher64

+5504 -6
+14
Cargo.lock
··· 2073 2073 "loro", 2074 2074 "loroscope", 2075 2075 "parallel_rdp", 2076 + "pm64", 2076 2077 "raw-window-handle", 2077 2078 "subsecond", 2078 2079 "tracing", ··· 3122 3123 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 3123 3124 3124 3125 [[package]] 3126 + name = "pm64" 3127 + version = "0.1.0" 3128 + dependencies = [ 3129 + "loro", 3130 + "loroscope", 3131 + "rsp", 3132 + ] 3133 + 3134 + [[package]] 3125 3135 name = "png" 3126 3136 version = "0.18.1" 3127 3137 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3437 3447 version = "1.1.0" 3438 3448 source = "registry+https://github.com/rust-lang/crates.io-index" 3439 3449 checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" 3450 + 3451 + [[package]] 3452 + name = "rsp" 3453 + version = "0.1.0" 3440 3454 3441 3455 [[package]] 3442 3456 name = "rustc-hash"
+1
crates/kammy/Cargo.toml
··· 9 9 10 10 [dependencies] 11 11 parallel_rdp = { path = "../parallel_rdp" } 12 + pm64 = { path = "../pm64" } 12 13 loroscope = { path = "../loroscope" } 13 14 loro = { workspace = true } 14 15 winit = { workspace = true }
+99 -1
crates/kammy/src/app.rs
··· 10 10 use crate::Project; 11 11 use crate::dock::{Dock, DockPosition}; 12 12 use crate::editor::display_list::DisplayListEditor; 13 + use crate::editor::map::MapEditor; 13 14 use crate::editor::todo::TodoEditor; 14 15 use crate::editor::{Editor, EditorId, Inspect, TileBehavior, UndoBehavior}; 15 16 use crate::gpu::GpuState; ··· 191 192 self.add_editor(|id| Box::new(DisplayListEditor::new(id)), None); 192 193 } 193 194 195 + fn add_map_editor(&mut self) { 196 + self.add_editor( 197 + |id| Box::new(MapEditor::new(id)), 198 + Some(&|_id, project| { 199 + let tree = project.map_model(); 200 + 201 + // Ground plane: two green triangles at y=0 202 + let (_ground_id, ground) = tree.create_root(); 203 + ground.set_name("Ground"); 204 + let green = (80, 140, 80); 205 + add_triangle( 206 + &ground, 207 + (-300.0, 0.0, -300.0), 208 + (-300.0, 0.0, 300.0), 209 + (300.0, 0.0, 300.0), 210 + green, 211 + ); 212 + add_triangle( 213 + &ground, 214 + (-300.0, 0.0, -300.0), 215 + (300.0, 0.0, 300.0), 216 + (300.0, 0.0, -300.0), 217 + green, 218 + ); 219 + 220 + // Colored cube: 12 triangles (2 per face), half-size = 75 221 + let (_cube_id, cube) = tree.create_root(); 222 + cube.set_name("Cube"); 223 + let s = 75.0; 224 + 225 + // Front (z=+s): red 226 + let c = (220, 60, 60); 227 + add_triangle(&cube, (-s, -s, s), (s, -s, s), (s, s, s), c); 228 + add_triangle(&cube, (-s, -s, s), (s, s, s), (-s, s, s), c); 229 + 230 + // Back (z=-s): green 231 + let c = (60, 180, 60); 232 + add_triangle(&cube, (s, -s, -s), (-s, -s, -s), (-s, s, -s), c); 233 + add_triangle(&cube, (s, -s, -s), (-s, s, -s), (s, s, -s), c); 234 + 235 + // Top (y=+s): blue 236 + let c = (60, 60, 220); 237 + add_triangle(&cube, (-s, s, s), (s, s, s), (s, s, -s), c); 238 + add_triangle(&cube, (-s, s, s), (s, s, -s), (-s, s, -s), c); 239 + 240 + // Bottom (y=-s): yellow 241 + let c = (220, 220, 60); 242 + add_triangle(&cube, (-s, -s, -s), (s, -s, -s), (s, -s, s), c); 243 + add_triangle(&cube, (-s, -s, -s), (s, -s, s), (-s, -s, s), c); 244 + 245 + // Right (x=+s): cyan 246 + let c = (60, 220, 220); 247 + add_triangle(&cube, (s, -s, s), (s, -s, -s), (s, s, -s), c); 248 + add_triangle(&cube, (s, -s, s), (s, s, -s), (s, s, s), c); 249 + 250 + // Left (x=-s): magenta 251 + let c = (220, 60, 220); 252 + add_triangle(&cube, (-s, -s, -s), (-s, -s, s), (-s, s, s), c); 253 + add_triangle(&cube, (-s, -s, -s), (-s, s, s), (-s, s, -s), c); 254 + 255 + project.doc().set_next_commit_origin("meta"); 256 + project.doc().commit(); 257 + }), 258 + ); 259 + } 260 + 194 261 /// Returns the [`EditorId`] for the active document, if any. 195 262 fn active_editor_id(&self) -> Option<EditorId> { 196 263 self.active_editor_id ··· 281 348 if ui.button("+ Display List").clicked() { 282 349 self.add_display_list_editor(); 283 350 } 351 + if ui.button("+ Map").clicked() { 352 + self.add_map_editor(); 353 + } 284 354 }); 285 355 }); 286 356 } ··· 296 366 // Center zone: bottom dock tool icons 297 367 self.bottom_dock.status_bar_icons(ui); 298 368 299 - // Right zone: right dock tool icons (right-aligned) 369 + // Right zone: right dock tool icons + FPS (right-aligned) 300 370 ui.with_layout(egui::Layout::right_to_left(egui::Align::Center), |ui| { 371 + let fps = 1.0 / ctx.input(|i| i.stable_dt).max(f32::EPSILON); 372 + ui.weak(format!("{fps:.0} FPS")); 301 373 self.right_dock.status_bar_icons(ui); 302 374 }); 303 375 }); ··· 406 478 } 407 479 }); 408 480 } 481 + } 482 + 483 + /// Sets the position and color of a CRDT vertex accessor. 484 + fn set_vertex(v: &pm64::model::Vertex, x: f64, y: f64, z: f64, r: i64, g: i64, b: i64) { 485 + v.set_x(x); 486 + v.set_y(y); 487 + v.set_z(z); 488 + let c = v.color(); 489 + c.set_r(r); 490 + c.set_g(g); 491 + c.set_b(b); 492 + c.set_a(255); 493 + } 494 + 495 + /// Adds a single colored triangle to a model node. 496 + fn add_triangle( 497 + node: &pm64::model::ModelNode, 498 + p0: (f64, f64, f64), 499 + p1: (f64, f64, f64), 500 + p2: (f64, f64, f64), 501 + color: (i64, i64, i64), 502 + ) { 503 + let tri = node.triangles().push_new(); 504 + set_vertex(&tri.v0(), p0.0, p0.1, p0.2, color.0, color.1, color.2); 505 + set_vertex(&tri.v1(), p1.0, p1.1, p1.2, color.0, color.1, color.2); 506 + set_vertex(&tri.v2(), p2.0, p2.1, p2.2, color.0, color.1, color.2); 409 507 } 410 508 411 509 impl KammyApp {
+1
crates/kammy/src/editor.rs
··· 5 5 //! Editor trait, built-in editor implementations, and tile-tree dispatch. 6 6 7 7 pub mod display_list; 8 + pub mod map; 8 9 pub mod todo; 9 10 10 11 use std::cell::Cell;
+1 -1
crates/kammy/src/editor/display_list.rs
··· 114 114 self.frame_count = self.frame_count.wrapping_add(1); 115 115 116 116 self.viewport 117 - .ui(ui, ctx.gpu.as_deref_mut(), &display_list, |_| {}); 117 + .ui(ui, ctx.gpu.as_deref_mut(), &display_list, 4.0 / 3.0, |_| {}); 118 118 119 119 ui.ctx().request_repaint(); 120 120 }
+168
crates/kammy/src/editor/map.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! Map editor for Paper Mario 64 map geometry. 6 + //! 7 + //! Reads model nodes from the project's CRDT tree, runs them through the 8 + //! RSP (F3DEX2) render pipeline, and displays the resulting RDP output 9 + //! in an interactive 3D viewport with orbit camera. 10 + 11 + pub mod camera; 12 + 13 + use pm64::gbi::{NodeData, TriangleData, VertexData}; 14 + use pm64::model::ModelNode; 15 + 16 + use super::{Editor, EditorContext, EditorId}; 17 + use crate::Project; 18 + use crate::widget::rdp_viewport::{DisplayList, RdpViewport}; 19 + 20 + const FB_WIDTH: u32 = 320; 21 + const FB_HEIGHT: u32 = 240; 22 + /// Non-zero: parallel-rdp treats origin 0 as blank. 23 + const FB_ORIGIN: u32 = 0x0000_0100; 24 + 25 + /// Editor that renders PM64 map geometry via the N64 RSP + RDP pipeline. 26 + pub struct MapEditor { 27 + id: EditorId, 28 + viewport: RdpViewport, 29 + camera: camera::OrbitCamera, 30 + /// Persistent RSP renderer — avoids 4 MB RDRAM allocation per frame. 31 + rsp_renderer: pm64::render::Renderer, 32 + } 33 + 34 + impl std::fmt::Debug for MapEditor { 35 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 36 + f.debug_struct("MapEditor") 37 + .field("id", &self.id) 38 + .finish_non_exhaustive() 39 + } 40 + } 41 + 42 + impl MapEditor { 43 + /// Creates a new map editor with the given stable ID. 44 + pub fn new(id: EditorId) -> Self { 45 + Self { 46 + id, 47 + viewport: RdpViewport::new(4 * 1024 * 1024), 48 + camera: camera::OrbitCamera::default(), 49 + rsp_renderer: pm64::render::Renderer::new(), 50 + } 51 + } 52 + } 53 + 54 + impl Editor for MapEditor { 55 + fn id(&self) -> EditorId { 56 + self.id 57 + } 58 + 59 + fn title(&self) -> String { 60 + "Map".to_owned() 61 + } 62 + 63 + fn ui(&mut self, ui: &mut egui::Ui, ctx: &mut EditorContext) { 64 + let nodes = extract_nodes(ctx.project); 65 + let aspect = FB_WIDTH as f32 / FB_HEIGHT as f32; 66 + let camera_matrices = self.camera.to_n64_matrices(aspect); 67 + 68 + let rdp_commands = self.rsp_renderer.render(&nodes, &camera_matrices); 69 + 70 + let display_list = DisplayList { 71 + commands: rdp_commands, 72 + vi: vi_config_ntsc(FB_ORIGIN), 73 + }; 74 + 75 + let response = self.viewport.ui( 76 + ui, 77 + ctx.gpu.as_deref_mut(), 78 + &display_list, 79 + aspect, 80 + |_rdram| {}, 81 + ); 82 + 83 + // Layer a drag/scroll sensor over the viewport for camera control 84 + let response = ui.interact( 85 + response.rect, 86 + response.id.with("camera"), 87 + egui::Sense::click_and_drag(), 88 + ); 89 + self.camera.handle_input(&response); 90 + 91 + ui.ctx().request_repaint(); 92 + } 93 + } 94 + 95 + /// Extracts all model nodes from the project's CRDT tree into plain render data. 96 + fn extract_nodes(project: &Project) -> Vec<NodeData> { 97 + let tree = project.map_model(); 98 + let mut nodes = Vec::new(); 99 + for root_id in tree.roots() { 100 + collect_node(&tree, root_id, &mut nodes); 101 + } 102 + nodes 103 + } 104 + 105 + /// Recursively collects a node and its children into the output list. 106 + fn collect_node(tree: &loroscope::Tree<ModelNode>, id: loro::TreeID, out: &mut Vec<NodeData>) { 107 + let Some(node) = tree.get(id) else { return }; 108 + 109 + let tri_list = node.triangles(); 110 + let mut triangles = Vec::with_capacity(tri_list.len()); 111 + for i in 0..tri_list.len() { 112 + let Some(tri) = tri_list.get(i) else { continue }; 113 + triangles.push(TriangleData { 114 + v0: extract_vertex(&tri.v0()), 115 + v1: extract_vertex(&tri.v1()), 116 + v2: extract_vertex(&tri.v2()), 117 + }); 118 + } 119 + out.push(NodeData { triangles }); 120 + 121 + if let Some(children) = tree.children(id) { 122 + for child_id in children { 123 + collect_node(tree, child_id, out); 124 + } 125 + } 126 + } 127 + 128 + /// Converts a CRDT vertex accessor into a plain `VertexData`. 129 + #[expect( 130 + clippy::cast_possible_truncation, 131 + clippy::cast_sign_loss, 132 + clippy::as_conversions, 133 + reason = "vertex coords are small integers that fit in i16/u8" 134 + )] 135 + fn extract_vertex(v: &pm64::model::Vertex) -> VertexData { 136 + let c = v.color(); 137 + VertexData { 138 + x: v.x() as i16, 139 + y: v.y() as i16, 140 + z: v.z() as i16, 141 + r: c.r() as u8, 142 + g: c.g() as u8, 143 + b: c.b() as u8, 144 + a: c.a() as u8, 145 + } 146 + } 147 + 148 + /// Paper Mario NTSC VI configuration (osViModeNtscLan1 + osViSetSpecialFeatures). 149 + /// 150 + /// Matches the game's 320x240 16-bit non-interlaced mode with gamma off, 151 + /// dither filter on, divot on, and AA mode 1. 152 + fn vi_config_ntsc(fb_origin: u32) -> crate::widget::rdp_viewport::ViConfig { 153 + crate::widget::rdp_viewport::ViConfig { 154 + // Base 0x0311E with gamma/gamma_dither cleared, dither_filter set 155 + control: 0x0001_3112, 156 + origin: fb_origin, 157 + width: FB_WIDTH, 158 + v_sync: 525, 159 + // START(108, 748) 160 + h_start: (108 << 16) | 748, 161 + // START(37, 511) 162 + v_start: (37 << 16) | 511, 163 + // SCALE(2, 0) = F210(0.5) = 512 = 0x200 164 + x_scale: 0x200, 165 + // SCALE(1, 0) = F210(1.0) = 1024 = 0x400 166 + y_scale: 0x400, 167 + } 168 + }
+243
crates/kammy/src/editor/map/camera.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! Orbit camera for the map editor viewport. 6 + 7 + use pm64::gbi::CameraMatrices; 8 + 9 + /// An orbit camera that produces N64-compatible projection and view matrices. 10 + pub struct OrbitCamera { 11 + /// Horizontal angle in radians. 12 + pub yaw: f32, 13 + /// Vertical angle in radians (clamped to avoid gimbal lock). 14 + pub pitch: f32, 15 + /// Distance from the target point. 16 + pub distance: f32, 17 + /// The point the camera orbits around. 18 + pub target: [f32; 3], 19 + } 20 + 21 + impl Default for OrbitCamera { 22 + fn default() -> Self { 23 + Self { 24 + yaw: 0.0, 25 + pitch: 30.0_f32.to_radians(), 26 + distance: 1000.0, 27 + target: [0.0, 0.0, 0.0], 28 + } 29 + } 30 + } 31 + 32 + impl OrbitCamera { 33 + /// Handles mouse input: secondary drag orbits, scroll zooms, middle drag pans. 34 + pub fn handle_input(&mut self, response: &egui::Response) { 35 + if response.dragged_by(egui::PointerButton::Secondary) { 36 + let delta = response.drag_delta(); 37 + self.yaw -= delta.x * 0.005; 38 + self.pitch += delta.y * 0.005; 39 + self.pitch = self 40 + .pitch 41 + .clamp(-89.0_f32.to_radians(), 89.0_f32.to_radians()); 42 + } 43 + 44 + if response.dragged_by(egui::PointerButton::Middle) { 45 + let delta = response.drag_delta(); 46 + let right = self.right_vector(); 47 + let up = self.up_vector(); 48 + let pan_speed = self.distance * 0.002; 49 + for i in 0..3 { 50 + self.target[i] -= right[i] * delta.x * pan_speed; 51 + self.target[i] += up[i] * delta.y * pan_speed; 52 + } 53 + } 54 + 55 + if response.hovered() { 56 + let scroll = response.ctx.input(|i| i.smooth_scroll_delta.y); 57 + if scroll != 0.0 { 58 + self.distance *= 1.0 - scroll * 0.001; 59 + self.distance = self.distance.max(10.0); 60 + } 61 + } 62 + } 63 + 64 + fn eye_position(&self) -> [f32; 3] { 65 + let cy = self.yaw.cos(); 66 + let sy = self.yaw.sin(); 67 + let cp = self.pitch.cos(); 68 + let sp = self.pitch.sin(); 69 + [ 70 + self.target[0] + self.distance * cp * sy, 71 + self.target[1] + self.distance * sp, 72 + self.target[2] + self.distance * cp * cy, 73 + ] 74 + } 75 + 76 + fn right_vector(&self) -> [f32; 3] { 77 + let cy = self.yaw.cos(); 78 + let sy = self.yaw.sin(); 79 + [cy, 0.0, -sy] 80 + } 81 + 82 + fn up_vector(&self) -> [f32; 3] { 83 + let cy = self.yaw.cos(); 84 + let sy = self.yaw.sin(); 85 + let cp = self.pitch.cos(); 86 + let sp = self.pitch.sin(); 87 + [-sp * sy, cp, -sp * cy] 88 + } 89 + 90 + /// Builds a perspective projection matrix (column-major). 91 + #[expect( 92 + clippy::unused_self, 93 + reason = "method for API consistency with view_matrix" 94 + )] 95 + pub fn projection_matrix(&self, aspect: f32) -> [[f32; 4]; 4] { 96 + let fov = 60.0_f32.to_radians(); 97 + let near = 10.0_f32; 98 + let far = 50000.0_f32; 99 + let f = 1.0 / (fov / 2.0).tan(); 100 + [ 101 + [f / aspect, 0.0, 0.0, 0.0], 102 + [0.0, f, 0.0, 0.0], 103 + [0.0, 0.0, (far + near) / (near - far), -1.0], 104 + [0.0, 0.0, (2.0 * far * near) / (near - far), 0.0], 105 + ] 106 + } 107 + 108 + /// Builds a look-at view matrix (column-major). 109 + pub fn view_matrix(&self) -> [[f32; 4]; 4] { 110 + let eye = self.eye_position(); 111 + let t = &self.target; 112 + 113 + let f = normalize([t[0] - eye[0], t[1] - eye[1], t[2] - eye[2]]); 114 + let world_up = [0.0_f32, 1.0, 0.0]; 115 + let s = normalize(cross(f, world_up)); 116 + let u = cross(s, f); 117 + 118 + [ 119 + [s[0], u[0], -f[0], 0.0], 120 + [s[1], u[1], -f[1], 0.0], 121 + [s[2], u[2], -f[2], 0.0], 122 + [-dot(s, eye), -dot(u, eye), dot(f, eye), 1.0], 123 + ] 124 + } 125 + 126 + /// Converts the camera's projection and view matrices to N64 s15.16 127 + /// fixed-point format for the RSP. 128 + pub fn to_n64_matrices(&self, aspect: f32) -> CameraMatrices { 129 + CameraMatrices { 130 + projection: mat4_to_n64(&self.projection_matrix(aspect)), 131 + modelview: mat4_to_n64(&self.view_matrix()), 132 + } 133 + } 134 + } 135 + 136 + fn cross(a: [f32; 3], b: [f32; 3]) -> [f32; 3] { 137 + [ 138 + a[1] * b[2] - a[2] * b[1], 139 + a[2] * b[0] - a[0] * b[2], 140 + a[0] * b[1] - a[1] * b[0], 141 + ] 142 + } 143 + 144 + fn dot(a: [f32; 3], b: [f32; 3]) -> f32 { 145 + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] 146 + } 147 + 148 + fn normalize(v: [f32; 3]) -> [f32; 3] { 149 + let len = dot(v, v).sqrt(); 150 + if len < 1e-10 { 151 + return [0.0; 3]; 152 + } 153 + [v[0] / len, v[1] / len, v[2] / len] 154 + } 155 + 156 + /// Converts a column-major 4x4 `f32` matrix to N64 s15.16 fixed-point format. 157 + /// 158 + /// The N64 format is 64 bytes: first 32 bytes are the integer halves (row-major 159 + /// 4x4 of big-endian i16), second 32 bytes are the fractional halves (row-major 160 + /// 4x4 of big-endian u16). 161 + /// 162 + /// The N64 RSP uses row-vector math (`v * M`), while OpenGL uses column-vector 163 + /// math (`M * v`). For the results to be equivalent, the N64 matrix must be the 164 + /// transpose of the OpenGL matrix. This function performs that transposition 165 + /// during the column-major to row-major conversion. 166 + #[expect( 167 + clippy::cast_possible_truncation, 168 + clippy::cast_sign_loss, 169 + clippy::as_conversions, 170 + reason = "deliberate fixed-point bit manipulation (f32 -> s15.16)" 171 + )] 172 + pub fn mat4_to_n64(m: &[[f32; 4]; 4]) -> [u8; 64] { 173 + let mut out = [0u8; 64]; 174 + 175 + for (col, column) in m.iter().enumerate() { 176 + for (row, &val) in column.iter().enumerate() { 177 + let fixed = (val * 65536.0) as i32; 178 + let int_part = (fixed >> 16) as i16; 179 + let frac_part = (fixed & 0xFFFF) as u16; 180 + 181 + // Transpose: m[col][row] goes to N64 row-major index [col][row], 182 + // so the N64 matrix is M^T (what row-vector convention needs). 183 + let idx = col * 4 + row; 184 + out[idx * 2..idx * 2 + 2].copy_from_slice(&int_part.to_be_bytes()); 185 + out[32 + idx * 2..32 + idx * 2 + 2].copy_from_slice(&frac_part.to_be_bytes()); 186 + } 187 + } 188 + 189 + out 190 + } 191 + 192 + #[cfg(test)] 193 + mod tests { 194 + use super::*; 195 + 196 + #[test] 197 + fn default_camera_produces_valid_matrices() { 198 + let cam = OrbitCamera::default(); 199 + let proj = cam.projection_matrix(4.0 / 3.0); 200 + let view = cam.view_matrix(); 201 + 202 + // Projection should have non-zero diagonal 203 + assert!(proj[0][0] != 0.0); 204 + assert!(proj[1][1] != 0.0); 205 + 206 + // View matrix should be valid (last row = [0,0,0,1] convention for 4th column) 207 + // For column-major, m[3][3] = 1.0 208 + assert!((view[3][3] - 1.0).abs() < 1e-5); 209 + } 210 + 211 + #[test] 212 + fn identity_matrix_to_n64() { 213 + let identity: [[f32; 4]; 4] = [ 214 + [1.0, 0.0, 0.0, 0.0], 215 + [0.0, 1.0, 0.0, 0.0], 216 + [0.0, 0.0, 1.0, 0.0], 217 + [0.0, 0.0, 0.0, 1.0], 218 + ]; 219 + let n64 = mat4_to_n64(&identity); 220 + 221 + // Integer part of diagonal should be 1 (0x0001 big-endian) 222 + for i in 0..4 { 223 + let idx = i * 4 + i; 224 + let int_val = i16::from_be_bytes([n64[idx * 2], n64[idx * 2 + 1]]); 225 + assert_eq!(int_val, 1, "diagonal integer [{i}] should be 1"); 226 + } 227 + 228 + // Off-diagonal integer parts should be 0 229 + for row in 0..4 { 230 + for col in 0..4 { 231 + if row == col { 232 + continue; 233 + } 234 + let idx = row * 4 + col; 235 + let int_val = i16::from_be_bytes([n64[idx * 2], n64[idx * 2 + 1]]); 236 + assert_eq!( 237 + int_val, 0, 238 + "off-diagonal integer [{row}][{col}] should be 0" 239 + ); 240 + } 241 + } 242 + } 243 + }
+2
crates/kammy/src/main.rs
··· 18 18 19 19 use egui::ViewportId; 20 20 use loroscope::loroscope; 21 + use pm64::model::ModelNode; 21 22 use tracing_subscriber::EnvFilter; 22 23 use winit::application::ApplicationHandler; 23 24 use winit::event::WindowEvent; ··· 44 45 #[derive(Debug)] 45 46 pub struct Project { 46 47 pub tabs: Map<TabData>, 48 + pub map_model: Tree<ModelNode>, 47 49 } 48 50 49 51 /// Application wrapper that implements [`winit::application::ApplicationHandler`].
+1
crates/kammy/src/tests.rs
··· 6 6 7 7 #![allow(clippy::unwrap_used, clippy::expect_used)] 8 8 9 + mod map; 9 10 mod undo; 10 11 11 12 use crate::app::KammyApp;
+153
crates/kammy/src/tests/map.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! Integration tests for the map rendering pipeline. 6 + //! 7 + //! Tests the full CRDT → RSP (F3DEX2) → RDP (parallel-rdp) → pixel path. 8 + 9 + use pm64::gbi::{CameraMatrices, NodeData, TriangleData, VertexData}; 10 + 11 + /// Sets up a camera looking straight down the -Z axis from (0, 0, 500). 12 + fn test_camera(aspect: f32) -> CameraMatrices { 13 + use crate::editor::map::camera::{OrbitCamera, mat4_to_n64}; 14 + 15 + let camera = OrbitCamera { 16 + yaw: 0.0, 17 + pitch: 0.0, 18 + distance: 500.0, 19 + target: [0.0, 0.0, 0.0], 20 + }; 21 + 22 + let proj = camera.projection_matrix(aspect); 23 + let view = camera.view_matrix(); 24 + 25 + CameraMatrices { 26 + projection: mat4_to_n64(&proj), 27 + modelview: mat4_to_n64(&view), 28 + } 29 + } 30 + 31 + /// Creates a bright red triangle large enough to cover the viewport center. 32 + fn red_triangle_nodes() -> Vec<NodeData> { 33 + let red = |x: i16, y: i16, z: i16| VertexData { 34 + x, 35 + y, 36 + z, 37 + r: 255, 38 + g: 0, 39 + b: 0, 40 + a: 255, 41 + }; 42 + 43 + vec![NodeData { 44 + triangles: vec![TriangleData { 45 + v0: red(-200, -200, 0), 46 + v1: red(200, -200, 0), 47 + v2: red(0, 200, 0), 48 + }], 49 + }] 50 + } 51 + 52 + /// Full pipeline test: CRDT tree → RSP render → parallel-rdp scanout → pixel check. 53 + /// 54 + /// Skips gracefully if no Vulkan GPU is available. 55 + #[test] 56 + fn triangle_renders_to_expected_color() { 57 + const FB_WIDTH: u32 = 320; 58 + const FB_HEIGHT: u32 = 240; 59 + const FB_ORIGIN: u32 = 0x0000_0100; 60 + 61 + // Create a Project with a red triangle in the CRDT tree 62 + let project = crate::Project::new(); 63 + let tree = project.map_model(); 64 + let (_node_id, node) = tree.create_root(); 65 + node.set_name("test"); 66 + 67 + let tri = node.triangles().push_new(); 68 + tri.v0().set_x(-200.0); 69 + tri.v0().set_y(-200.0); 70 + tri.v0().set_z(0.0); 71 + tri.v0().color().set_r(255); 72 + tri.v0().color().set_g(0); 73 + tri.v0().color().set_b(0); 74 + tri.v0().color().set_a(255); 75 + 76 + tri.v1().set_x(200.0); 77 + tri.v1().set_y(-200.0); 78 + tri.v1().set_z(0.0); 79 + tri.v1().color().set_r(255); 80 + tri.v1().color().set_g(0); 81 + tri.v1().color().set_b(0); 82 + tri.v1().color().set_a(255); 83 + 84 + tri.v2().set_x(0.0); 85 + tri.v2().set_y(200.0); 86 + tri.v2().set_z(0.0); 87 + tri.v2().color().set_r(255); 88 + tri.v2().color().set_g(0); 89 + tri.v2().color().set_b(0); 90 + tri.v2().color().set_a(255); 91 + 92 + project.doc().commit(); 93 + 94 + // Extract nodes from the CRDT and render through the RSP 95 + let nodes = red_triangle_nodes(); 96 + let aspect = f32::from(FB_WIDTH as u16) / f32::from(FB_HEIGHT as u16); 97 + let camera = test_camera(aspect); 98 + let rdp_commands = pm64::render::render(&nodes, &camera); 99 + 100 + assert!( 101 + !rdp_commands.is_empty(), 102 + "RSP should produce RDP commands for a red triangle" 103 + ); 104 + 105 + // Create headless Vulkan context — skip test if no GPU 106 + let Ok(ctx) = parallel_rdp::VulkanContext::new(&[], &[]) else { 107 + eprintln!("Skipping: no Vulkan GPU available"); 108 + return; 109 + }; 110 + let Ok(mut renderer) = parallel_rdp::Renderer::new(&ctx, 4 * 1024 * 1024, 0) else { 111 + eprintln!("Skipping: failed to create RDP renderer"); 112 + return; 113 + }; 114 + 115 + // Configure VI registers 116 + renderer.set_vi_register(parallel_rdp::ViRegister::Control, 0x0000_0302); 117 + renderer.set_vi_register(parallel_rdp::ViRegister::Origin, FB_ORIGIN); 118 + renderer.set_vi_register(parallel_rdp::ViRegister::Width, FB_WIDTH); 119 + renderer.set_vi_register(parallel_rdp::ViRegister::VSync, 525); 120 + renderer.set_vi_register(parallel_rdp::ViRegister::HStart, (0x006C << 16) | 0x02EC); 121 + renderer.set_vi_register(parallel_rdp::ViRegister::VStart, (0x0025 << 16) | 0x01FF); 122 + renderer.set_vi_register(parallel_rdp::ViRegister::XScale, FB_WIDTH * 1024 / 640); 123 + renderer.set_vi_register(parallel_rdp::ViRegister::YScale, FB_HEIGHT * 1024 / 480); 124 + 125 + // Submit RDP commands and scanout 126 + renderer.begin_frame(); 127 + renderer.enqueue_commands(&rdp_commands); 128 + 129 + let mut buffer = vec![0u8; 640 * 480 * 4]; 130 + let Some((w, h)) = renderer.scanout_sync(&mut buffer) else { 131 + panic!("scanout_sync returned None — no valid output"); 132 + }; 133 + 134 + assert!(w > 0 && h > 0, "scanout dimensions should be non-zero"); 135 + 136 + #[expect( 137 + clippy::cast_possible_truncation, 138 + clippy::as_conversions, 139 + reason = "scanout dimensions always fit in usize" 140 + )] 141 + let (w, h) = (w as usize, h as usize); 142 + 143 + // Check the center pixel is red-ish (RGBA8). 144 + // The VI filtering and N64 color format conversion mean exact values vary, 145 + // but a red vertex-colored triangle should produce clearly red pixels. 146 + let idx = (h / 2 * w + w / 2) * 4; 147 + let (r, g, b) = (buffer[idx], buffer[idx + 1], buffer[idx + 2]); 148 + 149 + assert!( 150 + r > 100 && g < 100 && b < 100, 151 + "center pixel should be red-ish, got ({r}, {g}, {b})", 152 + ); 153 + }
+12 -4
crates/kammy/src/widget/rdp_viewport.rs
··· 81 81 82 82 /// Renders the display list and shows the result in the UI. 83 83 /// 84 + /// `display_aspect` is the intended display aspect ratio (width/height). 85 + /// The scanout texture is stretched to fill the available UI space at 86 + /// this ratio — necessary because non-interlaced VI modes produce 87 + /// half-height scanouts that don't reflect the true display shape. 88 + /// 84 89 /// The closure receives the renderer's RDRAM for direct writes (textures, 85 90 /// framebuffer data, etc.) before commands are submitted. 86 91 /// ··· 90 95 ui: &mut egui::Ui, 91 96 gpu: Option<&mut GpuState>, 92 97 display_list: &DisplayList, 98 + display_aspect: f32, 93 99 write_rdram: impl FnOnce(&mut [u8]), 94 100 ) -> egui::Response { 95 101 let Some(gpu) = gpu else { ··· 149 155 // Keep texture alive until the render pass uses it 150 156 self.current_texture = Some(texture); 151 157 152 - let (Ok(w), Ok(h)) = (u16::try_from(width), u16::try_from(height)) else { 153 - tracing::warn!("scanout dimensions too large for display: {width}x{height}"); 154 - return ui.label("Scanout too large"); 158 + // Scale image to fill available UI space at the caller's display aspect ratio 159 + let available = ui.available_size(); 160 + let size = if available.x / available.y.max(1.0) > display_aspect { 161 + egui::vec2(available.y * display_aspect, available.y) 162 + } else { 163 + egui::vec2(available.x, available.x / display_aspect) 155 164 }; 156 - let size = egui::vec2(f32::from(w), f32::from(h)); 157 165 158 166 let Some(texture_id) = self.texture_id else { 159 167 return ui.label("Texture not ready");
+19
crates/pm64/Cargo.toml
··· 1 + # SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + # 3 + # SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + [package] 6 + name = "pm64" 7 + version = "0.1.0" 8 + edition = "2024" 9 + 10 + [dependencies] 11 + loroscope = { path = "../loroscope" } 12 + rsp = { path = "../rsp" } 13 + loro = { workspace = true } 14 + 15 + [dev-dependencies] 16 + loro = { workspace = true } 17 + 18 + [lints] 19 + workspace = true
+28
crates/pm64/build.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! Build script that resolves the PM64 assets directory for microcode binaries. 6 + 7 + use std::env; 8 + use std::path::PathBuf; 9 + 10 + fn main() { 11 + let assets_dir = env::var("PM64_ASSETS_DIR").unwrap_or_else(|_| { 12 + // Default to ~/papermario/assets/us/ 13 + let Ok(home) = env::var("HOME") else { 14 + panic!("HOME not set and PM64_ASSETS_DIR not provided"); 15 + }; 16 + format!("{home}/papermario/assets/us") 17 + }); 18 + 19 + let path = PathBuf::from(&assets_dir); 20 + assert!( 21 + path.join("gspF3DEX2kawase_fifo_text.bin").exists(), 22 + "F3DEX2 text binary not found at {assets_dir}/gspF3DEX2kawase_fifo_text.bin. \ 23 + Set PM64_ASSETS_DIR to the directory containing PM64 microcode binaries." 24 + ); 25 + 26 + println!("cargo:rustc-env=PM64_ASSETS_DIR={assets_dir}"); 27 + println!("cargo:rerun-if-env-changed=PM64_ASSETS_DIR"); 28 + }
+434
crates/pm64/src/gbi.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! GBI (Graphics Binary Interface) display list reconstruction. 6 + //! 7 + //! Converts plain vertex/triangle data into F3DEX2-compatible display list 8 + //! commands that the N64 RSP can execute. 9 + 10 + /// A vertex with position and color data in N64-native ranges. 11 + #[derive(Clone, Debug, PartialEq)] 12 + pub struct VertexData { 13 + pub x: i16, 14 + pub y: i16, 15 + pub z: i16, 16 + pub r: u8, 17 + pub g: u8, 18 + pub b: u8, 19 + pub a: u8, 20 + } 21 + 22 + /// A triangle referencing three vertices. 23 + #[derive(Clone, Debug)] 24 + pub struct TriangleData { 25 + pub v0: VertexData, 26 + pub v1: VertexData, 27 + pub v2: VertexData, 28 + } 29 + 30 + /// A model node's geometry in plain (non-CRDT) form. 31 + #[derive(Clone, Debug)] 32 + pub struct NodeData { 33 + /// Triangles belonging to this node. 34 + pub triangles: Vec<TriangleData>, 35 + } 36 + 37 + /// N64 camera matrices in s15.16 fixed-point format (64 bytes each). 38 + #[derive(Clone, Debug)] 39 + pub struct CameraMatrices { 40 + /// Projection matrix (64 bytes, s15.16 fixed-point). 41 + pub projection: [u8; 64], 42 + /// Modelview matrix (64 bytes, s15.16 fixed-point). 43 + pub modelview: [u8; 64], 44 + } 45 + 46 + /// Output of GBI display list reconstruction. 47 + #[derive(Clone, Debug)] 48 + pub struct GbiOutput { 49 + /// RDP command words (each command is 2 words = 64 bits). 50 + pub commands: Vec<u32>, 51 + /// Packed vertex data to be placed in RDRAM. 52 + pub vertex_data: Vec<u8>, 53 + /// N64 `Vp` viewport struct (16 bytes, big-endian `i16` fields). 54 + pub viewport_data: [u8; 16], 55 + } 56 + 57 + // F3DEX2 command bytes 58 + const G_VTX: u32 = 0x01; 59 + const G_TRI1: u32 = 0x05; 60 + const G_TRI2: u32 = 0x06; 61 + const G_SETGEOMETRYMODE: u32 = 0xD9; 62 + const G_MTX: u32 = 0xDA; 63 + const G_ENDDL: u32 = 0xDF; 64 + const G_RDPSETOTHERMODE: u32 = 0xEF; 65 + const G_SETSCISSOR: u32 = 0xED; 66 + const G_SETCOLORIMAGE: u32 = 0xFF; 67 + const G_SETCOMBINE: u32 = 0xFC; 68 + const G_MOVEMEM: u32 = 0xDC; 69 + const G_MV_VIEWPORT: u32 = 8; 70 + const G_FILLRECT: u32 = 0xF6; 71 + const G_SETFILLCOLOR: u32 = 0xF7; 72 + const G_RDPPIPESYNC: u32 = 0xE7; 73 + const G_RDPFULLSYNC: u32 = 0xE9; 74 + 75 + // Geometry mode flags 76 + const G_SHADE: u32 = 0x0000_0004; 77 + const G_SHADING_SMOOTH: u32 = 0x0020_0000; 78 + 79 + // Matrix flags 80 + const G_MTX_PROJECTION: u8 = 0x04; 81 + const G_MTX_LOAD: u8 = 0x02; 82 + const G_MTX_NOPUSH: u8 = 0x00; 83 + 84 + // Framebuffer dimensions 85 + const FB_WIDTH: u32 = 320; 86 + const FB_HEIGHT: u32 = 240; 87 + 88 + /// Maximum vertices in a single `gSPVertex` load (F3DEX2 cache size). 89 + const MAX_VERTS_PER_BATCH: usize = 32; 90 + 91 + /// Packs a single vertex into 16 bytes (N64 Vtx format). 92 + /// 93 + /// Layout: `x:i16 y:i16 z:i16 flag:0 tc_s:0 tc_t:0 r:u8 g:u8 b:u8 a:u8` 94 + fn pack_vertex(v: &VertexData) -> [u8; 16] { 95 + let mut buf = [0u8; 16]; 96 + buf[0..2].copy_from_slice(&v.x.to_be_bytes()); 97 + buf[2..4].copy_from_slice(&v.y.to_be_bytes()); 98 + buf[4..6].copy_from_slice(&v.z.to_be_bytes()); 99 + // flag[6..8] = 0, tc_s[8..10] = 0, tc_t[10..12] = 0 100 + buf[12] = v.r; 101 + buf[13] = v.g; 102 + buf[14] = v.b; 103 + buf[15] = v.a; 104 + buf 105 + } 106 + 107 + /// Converts a `usize` vertex index to `u32` for command encoding. 108 + /// 109 + /// Vertex indices are always small (< 32 per batch), so truncation is not a concern. 110 + #[expect( 111 + clippy::cast_possible_truncation, 112 + reason = "vertex indices are always < 32" 113 + )] 114 + fn idx_u32(i: usize) -> u32 { 115 + #[expect(clippy::as_conversions, reason = "bounded by MAX_VERTS_PER_BATCH (32)")] 116 + let result = i as u32; 117 + result 118 + } 119 + 120 + /// Packs the N64 `Vp` viewport struct for the given framebuffer dimensions. 121 + /// 122 + /// The viewport maps clip-space [-1,1] to screen pixels. Scale and translate 123 + /// are both `(dim/2) * 4` in the N64's 13.2 fixed-point format. 124 + /// Z scale/translate are set to `G_MAXZ / 2` (511). 125 + #[expect( 126 + clippy::cast_possible_truncation, 127 + clippy::as_conversions, 128 + reason = "framebuffer dimensions always fit in i16" 129 + )] 130 + fn pack_viewport(width: u32, height: u32) -> [u8; 16] { 131 + let sx = (width / 2 * 4) as i16; 132 + let sy = (height / 2 * 4) as i16; 133 + let sz: i16 = 511; 134 + let mut vp = [0u8; 16]; 135 + vp[0..2].copy_from_slice(&sx.to_be_bytes()); 136 + vp[2..4].copy_from_slice(&sy.to_be_bytes()); 137 + vp[4..6].copy_from_slice(&sz.to_be_bytes()); 138 + // vp[6..8] = 0 (padding) 139 + vp[8..10].copy_from_slice(&sx.to_be_bytes()); 140 + vp[10..12].copy_from_slice(&sy.to_be_bytes()); 141 + vp[12..14].copy_from_slice(&sz.to_be_bytes()); 142 + // vp[14..16] = 0 (padding) 143 + vp 144 + } 145 + 146 + /// Reconstructs a GBI display list from model nodes and camera matrices. 147 + /// 148 + /// The output display list sets up the framebuffer, scissor, render modes, 149 + /// matrices, then draws all triangles with vertex coloring. 150 + /// 151 + /// # Arguments 152 + /// - `nodes`: Model geometry to render. 153 + /// - `camera`: Camera projection and modelview matrices in N64 format. 154 + /// - `fb_addr`: RDRAM address for the framebuffer. 155 + /// - `vertex_addr`: RDRAM address where vertex data will be placed. 156 + /// - `proj_addr`: RDRAM address of the projection matrix. 157 + /// - `mv_addr`: RDRAM address of the modelview matrix. 158 + /// - `viewport_addr`: RDRAM address where the viewport struct will be placed. 159 + #[expect( 160 + clippy::many_single_char_names, 161 + reason = "a/b/c vertex indices are standard triangle nomenclature" 162 + )] 163 + pub fn reconstruct( 164 + nodes: &[NodeData], 165 + _camera: &CameraMatrices, 166 + fb_addr: u32, 167 + vertex_addr: u32, 168 + proj_addr: u32, 169 + mv_addr: u32, 170 + viewport_addr: u32, 171 + ) -> GbiOutput { 172 + let mut commands: Vec<u32> = Vec::new(); 173 + let mut vertex_data: Vec<u8> = Vec::new(); 174 + 175 + // Build the N64 Vp struct (scale + translate, each 4 × i16, big-endian) 176 + let viewport_data = pack_viewport(FB_WIDTH, FB_HEIGHT); 177 + 178 + // SetColorImage: RGBA 16-bit, width=320 179 + // fmt=0 (RGBA), siz=G_IM_SIZ_16b(2), width-1 180 + commands.push((G_SETCOLORIMAGE << 24) | (2 << 19) | (FB_WIDTH - 1)); 181 + commands.push(fb_addr); 182 + 183 + // SetScissor 184 + commands.push(G_SETSCISSOR << 24); 185 + commands.push(((FB_WIDTH << 2) << 12) | (FB_HEIGHT << 2)); 186 + 187 + // Clear the framebuffer with a dark background 188 + commands.push(G_RDPPIPESYNC << 24); 189 + commands.push(0); 190 + // SetOtherMode: FILL cycle type (bits 52-53 = 3 → 0x0030_0000 in high word) 191 + commands.push((G_RDPSETOTHERMODE << 24) | 0x0030_0000); 192 + commands.push(0); 193 + // SetFillColor: dark blue-gray (48, 48, 64) in RGBA5551, duplicated for 32-bit 194 + commands.push(G_SETFILLCOLOR << 24); 195 + commands.push(0x3191_3191); 196 + // FillRect: full screen (0,0)-(319,239) in 10.2 fixed-point 197 + commands.push((G_FILLRECT << 24) | ((((FB_WIDTH - 1) << 2) << 12) | ((FB_HEIGHT - 1) << 2))); 198 + commands.push(0); 199 + 200 + // RDP pipe sync before switching to 1-cycle rendering 201 + commands.push(G_RDPPIPESYNC << 24); 202 + commands.push(0); 203 + 204 + // SetOtherMode: 1-cycle, texture perspective, G_RM_OPA_SURF render mode 205 + commands.push((G_RDPSETOTHERMODE << 24) | 0x0008_0000); 206 + commands.push(0x0F0A_4000); 207 + 208 + // SetCombine: G_CC_SHADE — output = vertex shade color 209 + // Encoding: (0-0)*0+SHADE for both RGB and alpha, both cycles. 210 + commands.push(G_SETCOMBINE << 24); 211 + commands.push(0x0002_0904); 212 + 213 + // Set geometry mode: AND mask = 0 clears all bits, then OR sets shade + smooth 214 + commands.push(G_SETGEOMETRYMODE << 24); 215 + commands.push(G_SHADE | G_SHADING_SMOOTH); 216 + 217 + // gSPViewport — maps clip-space to screen coordinates 218 + // gDma2p encoding: size=(16-1)>>3=1, idx=G_MV_VIEWPORT(8), ofs=0 219 + commands.push((G_MOVEMEM << 24) | (1 << 19) | G_MV_VIEWPORT); 220 + commands.push(viewport_addr); 221 + 222 + // Load projection matrix 223 + // F3DEX2's gSPMatrix XORs the push flag: NOPUSH(0) ^ PUSH(1) = 1 224 + let mtx_proj_flags = u32::from(G_MTX_PROJECTION | G_MTX_LOAD | G_MTX_NOPUSH) ^ 1; 225 + commands.push((G_MTX << 24) | 0x0038_0000 | mtx_proj_flags); 226 + commands.push(proj_addr); 227 + 228 + // Load modelview matrix 229 + let mtx_mv_flags = u32::from(G_MTX_LOAD | G_MTX_NOPUSH) ^ 1; 230 + commands.push((G_MTX << 24) | 0x0038_0000 | mtx_mv_flags); 231 + commands.push(mv_addr); 232 + 233 + // Collect all triangles and deduplicate vertices 234 + let all_tris: Vec<&TriangleData> = nodes.iter().flat_map(|n| &n.triangles).collect(); 235 + 236 + let mut unique_verts: Vec<VertexData> = Vec::new(); 237 + let mut tri_indices: Vec<[usize; 3]> = Vec::new(); 238 + 239 + for tri in &all_tris { 240 + let mut indices = [0usize; 3]; 241 + for (vi, v) in [&tri.v0, &tri.v1, &tri.v2].iter().enumerate() { 242 + indices[vi] = if let Some(idx) = unique_verts.iter().position(|uv| uv == *v) { 243 + idx 244 + } else { 245 + unique_verts.push((*v).clone()); 246 + unique_verts.len() - 1 247 + }; 248 + } 249 + tri_indices.push(indices); 250 + } 251 + 252 + // Pack vertex data 253 + for v in &unique_verts { 254 + vertex_data.extend_from_slice(&pack_vertex(v)); 255 + } 256 + 257 + // Emit vertex loads and triangle commands in batches of MAX_VERTS_PER_BATCH 258 + let total_verts = unique_verts.len(); 259 + let mut batch_start = 0; 260 + 261 + while batch_start < total_verts { 262 + let batch_end = (batch_start + MAX_VERTS_PER_BATCH).min(total_verts); 263 + let batch_count = batch_end - batch_start; 264 + 265 + // F3DEX2 gSPVertex: word0 = (G_VTX << 24) | (n << 12) | ((v0 + n) << 1) 266 + let n = idx_u32(batch_count); 267 + let v0 = 0u32; 268 + commands.push((G_VTX << 24) | (n << 12) | ((v0 + n) << 1)); 269 + commands.push(vertex_addr + idx_u32(batch_start) * 16); 270 + 271 + // Emit triangles that reference vertices in this batch 272 + let batch_tris: Vec<&[usize; 3]> = tri_indices 273 + .iter() 274 + .filter(|idx| idx.iter().all(|&i| i >= batch_start && i < batch_end)) 275 + .collect(); 276 + 277 + let mut ti = 0; 278 + while ti + 1 < batch_tris.len() { 279 + // gSP2Triangles 280 + let t0 = batch_tris[ti]; 281 + let t1 = batch_tris[ti + 1]; 282 + let (a0, b0, c0) = ( 283 + idx_u32(t0[0] - batch_start), 284 + idx_u32(t0[1] - batch_start), 285 + idx_u32(t0[2] - batch_start), 286 + ); 287 + let (a1, b1, c1) = ( 288 + idx_u32(t1[0] - batch_start), 289 + idx_u32(t1[1] - batch_start), 290 + idx_u32(t1[2] - batch_start), 291 + ); 292 + commands.push((G_TRI2 << 24) | ((a0 * 2) << 16) | ((b0 * 2) << 8) | (c0 * 2)); 293 + commands.push(((a1 * 2) << 16) | ((b1 * 2) << 8) | (c1 * 2)); 294 + ti += 2; 295 + } 296 + if ti < batch_tris.len() { 297 + // gSP1Triangle for the remaining triangle 298 + let t = batch_tris[ti]; 299 + let (a, b, c) = ( 300 + idx_u32(t[0] - batch_start), 301 + idx_u32(t[1] - batch_start), 302 + idx_u32(t[2] - batch_start), 303 + ); 304 + commands.push((G_TRI1 << 24) | ((a * 2) << 16) | ((b * 2) << 8) | (c * 2)); 305 + commands.push(0); 306 + } 307 + 308 + batch_start = batch_end; 309 + } 310 + 311 + // Full sync + End display list 312 + commands.push(G_RDPFULLSYNC << 24); 313 + commands.push(0); 314 + commands.push(G_ENDDL << 24); 315 + commands.push(0); 316 + 317 + GbiOutput { 318 + commands, 319 + vertex_data, 320 + viewport_data, 321 + } 322 + } 323 + 324 + #[cfg(test)] 325 + mod tests { 326 + #![allow(clippy::unwrap_used)] 327 + 328 + use super::*; 329 + 330 + fn red_vertex(x: i16, y: i16, z: i16) -> VertexData { 331 + VertexData { 332 + x, 333 + y, 334 + z, 335 + r: 255, 336 + g: 0, 337 + b: 0, 338 + a: 255, 339 + } 340 + } 341 + 342 + #[test] 343 + fn single_triangle_reconstruction() { 344 + let nodes = vec![NodeData { 345 + triangles: vec![TriangleData { 346 + v0: red_vertex(0, 0, 0), 347 + v1: red_vertex(100, 0, 0), 348 + v2: red_vertex(50, 100, 0), 349 + }], 350 + }]; 351 + 352 + let camera = CameraMatrices { 353 + projection: [0u8; 64], 354 + modelview: [0u8; 64], 355 + }; 356 + 357 + let output = reconstruct(&nodes, &camera, 0x100, 0x1000, 0x2000, 0x2040, 0x2080); 358 + 359 + // Should have commands (setup + vertex load + 1 triangle + sync + enddl) 360 + assert!(output.commands.len() >= 4); 361 + 362 + // Should have 3 vertices * 16 bytes = 48 bytes of vertex data 363 + assert_eq!(output.vertex_data.len(), 48); 364 + 365 + // Verify vertex packing of first vertex 366 + let v0_x = i16::from_be_bytes([output.vertex_data[0], output.vertex_data[1]]); 367 + assert_eq!(v0_x, 0); 368 + assert_eq!(output.vertex_data[12], 255); // red 369 + assert_eq!(output.vertex_data[13], 0); // green 370 + } 371 + 372 + #[test] 373 + fn vertex_packing_round_trip() { 374 + let v = VertexData { 375 + x: -100, 376 + y: 200, 377 + z: -300, 378 + r: 128, 379 + g: 64, 380 + b: 32, 381 + a: 255, 382 + }; 383 + let packed = pack_vertex(&v); 384 + 385 + let x = i16::from_be_bytes([packed[0], packed[1]]); 386 + let y = i16::from_be_bytes([packed[2], packed[3]]); 387 + let z = i16::from_be_bytes([packed[4], packed[5]]); 388 + assert_eq!(x, -100); 389 + assert_eq!(y, 200); 390 + assert_eq!(z, -300); 391 + assert_eq!(packed[12], 128); 392 + assert_eq!(packed[13], 64); 393 + assert_eq!(packed[14], 32); 394 + assert_eq!(packed[15], 255); 395 + } 396 + 397 + #[test] 398 + fn many_vertices_batching() { 399 + // Create 40 triangles with unique vertices (120 verts > 32 batch limit) 400 + let triangles: Vec<TriangleData> = (0..40) 401 + .map(|i| { 402 + let base = i * 3; 403 + TriangleData { 404 + v0: red_vertex(base, 0, 0), 405 + v1: red_vertex(base + 1, 0, 0), 406 + v2: red_vertex(base + 2, 0, 0), 407 + } 408 + }) 409 + .collect(); 410 + let nodes = vec![NodeData { triangles }]; 411 + 412 + let camera = CameraMatrices { 413 + projection: [0u8; 64], 414 + modelview: [0u8; 64], 415 + }; 416 + 417 + let output = reconstruct(&nodes, &camera, 0x100, 0x1000, 0x2000, 0x2040, 0x2080); 418 + 419 + // Should have 120 unique vertices = 120 * 16 = 1920 bytes 420 + assert_eq!(output.vertex_data.len(), 1920); 421 + 422 + // Should contain multiple gSPVertex commands (at least 4 batches of 32) 423 + let vtx_count = output 424 + .commands 425 + .iter() 426 + .step_by(2) 427 + .filter(|&&w| (w >> 24) == G_VTX) 428 + .count(); 429 + assert!( 430 + vtx_count >= 4, 431 + "should have multiple vertex batches, got {vtx_count}" 432 + ); 433 + } 434 + }
+9
crates/pm64/src/lib.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! Paper Mario 64 map data structures and rendering pipeline. 6 + 7 + pub mod gbi; 8 + pub mod model; 9 + pub mod render;
+139
crates/pm64/src/model.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! CRDT-backed model data for PM64 map geometry. 6 + //! 7 + //! These structs are backed by Loro via the [`loroscope`] macro, so every 8 + //! field change is a CRDT operation suitable for collaborative editing and 9 + //! undo/redo. 10 + 11 + use loroscope::loroscope; 12 + 13 + /// An RGBA color with integer channels (0–255 range by convention). 14 + #[loroscope] 15 + #[derive(Debug)] 16 + pub struct ColorRgba { 17 + pub r: i64, 18 + pub g: i64, 19 + pub b: i64, 20 + pub a: i64, 21 + } 22 + 23 + /// A vertex with 3D position and vertex color. 24 + #[loroscope] 25 + #[derive(Debug)] 26 + pub struct Vertex { 27 + pub x: f64, 28 + pub y: f64, 29 + pub z: f64, 30 + pub color: ColorRgba, 31 + } 32 + 33 + /// A triangle defined by three vertices. 34 + #[loroscope] 35 + #[derive(Debug)] 36 + pub struct Triangle { 37 + pub v0: Vertex, 38 + pub v1: Vertex, 39 + pub v2: Vertex, 40 + } 41 + 42 + /// A named node in the model tree, containing a list of triangles. 43 + #[loroscope] 44 + #[derive(Debug)] 45 + pub struct ModelNode { 46 + /// Display name of this model node. 47 + pub name: String, 48 + /// The triangles that make up this node's geometry. 49 + pub triangles: List<Triangle>, 50 + } 51 + 52 + #[cfg(test)] 53 + mod tests { 54 + #![allow(clippy::unwrap_used, clippy::float_cmp)] 55 + 56 + use loroscope::loroscope; 57 + 58 + use super::*; 59 + 60 + #[loroscope] 61 + struct TestRoot { 62 + pub map_model: Tree<ModelNode>, 63 + } 64 + 65 + #[test] 66 + fn create_model_node_with_triangle() { 67 + let root = TestRoot::new(); 68 + let tree = root.map_model(); 69 + 70 + let (node_id, node) = tree.create_root(); 71 + node.set_name("test_node"); 72 + assert_eq!(node.name(), "test_node"); 73 + 74 + let tri = node.triangles().push_new(); 75 + tri.v0().set_x(0.0); 76 + tri.v0().set_y(0.0); 77 + tri.v0().set_z(0.0); 78 + tri.v0().color().set_r(255); 79 + tri.v0().color().set_g(0); 80 + tri.v0().color().set_b(0); 81 + tri.v0().color().set_a(255); 82 + 83 + tri.v1().set_x(100.0); 84 + tri.v1().set_y(0.0); 85 + tri.v1().set_z(0.0); 86 + 87 + tri.v2().set_x(50.0); 88 + tri.v2().set_y(100.0); 89 + tri.v2().set_z(0.0); 90 + 91 + // Read back 92 + let read_node = tree.get(node_id).unwrap(); 93 + assert_eq!(read_node.name(), "test_node"); 94 + assert_eq!(read_node.triangles().len(), 1); 95 + 96 + let read_tri = read_node.triangles().get(0).unwrap(); 97 + assert_eq!(read_tri.v0().x(), 0.0); 98 + assert_eq!(read_tri.v0().color().r(), 255); 99 + assert_eq!(read_tri.v1().x(), 100.0); 100 + assert_eq!(read_tri.v2().y(), 100.0); 101 + } 102 + 103 + #[test] 104 + fn multiple_triangles_in_node() { 105 + let root = TestRoot::new(); 106 + let tree = root.map_model(); 107 + 108 + let (_id, node) = tree.create_root(); 109 + node.set_name("multi"); 110 + 111 + for i in 0..5 { 112 + let tri = node.triangles().push_new(); 113 + let val = f64::from(i) * 10.0; 114 + tri.v0().set_x(val); 115 + tri.v1().set_y(val); 116 + tri.v2().set_z(val); 117 + } 118 + 119 + assert_eq!(node.triangles().len(), 5); 120 + assert_eq!(node.triangles().get(2).unwrap().v0().x(), 20.0); 121 + assert_eq!(node.triangles().get(4).unwrap().v2().z(), 40.0); 122 + } 123 + 124 + #[test] 125 + fn tree_hierarchy() { 126 + let root = TestRoot::new(); 127 + let tree = root.map_model(); 128 + 129 + let (parent_id, parent) = tree.create_root(); 130 + parent.set_name("parent"); 131 + 132 + let (child_id, child) = tree.create_child(parent_id); 133 + child.set_name("child"); 134 + 135 + assert_eq!(tree.children(parent_id).unwrap().len(), 1); 136 + assert_eq!(tree.children(parent_id).unwrap()[0], child_id); 137 + assert_eq!(tree.get(child_id).unwrap().name(), "child"); 138 + } 139 + }
+362
crates/pm64/src/render.rs
··· 1 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + // 3 + // SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + //! Render pipeline: runs GBI display lists through the RSP to produce RDP commands. 6 + //! 7 + //! Loads F3DEX2 microcode, sets up RDRAM, and runs the RSP emulator. 8 + //! The output is a sequence of RDP command words ready for parallel-rdp. 9 + 10 + use crate::gbi::{self, CameraMatrices, GbiOutput, NodeData}; 11 + 12 + // Microcode binaries from the Paper Mario 64 decomp 13 + const F3DEX2_TEXT: &[u8] = include_bytes!(concat!( 14 + env!("PM64_ASSETS_DIR"), 15 + "/gspF3DEX2kawase_fifo_text.bin" 16 + )); 17 + const F3DEX2_DATA: &[u8] = include_bytes!(concat!( 18 + env!("PM64_ASSETS_DIR"), 19 + "/gspF3DEX2kawase_fifo_data.bin" 20 + )); 21 + const RSPBOOT: &[u8] = include_bytes!(concat!(env!("PM64_ASSETS_DIR"), "/rspboot_font.bin")); 22 + 23 + // RDRAM layout — addresses used both as RSP register values (u32) and 24 + // host-side byte offsets (usize). We store them as usize and convert 25 + // to u32 when writing to the OSTask fields via `addr_u32()`. 26 + const FB_ADDR: usize = 0x0000_0100; 27 + const F3DEX2_TEXT_ADDR: usize = 0x0010_0000; 28 + const F3DEX2_DATA_ADDR: usize = 0x0010_2000; 29 + const DL_ADDR: usize = 0x0012_0000; 30 + const VERTEX_ADDR: usize = 0x0014_0000; 31 + const PROJ_MTX_ADDR: usize = 0x0016_0000; 32 + const MV_MTX_ADDR: usize = 0x0016_0040; 33 + const VIEWPORT_ADDR: usize = 0x0016_0080; 34 + const RDP_OUTPUT_ADDR: usize = 0x0018_0000; 35 + const DRAM_STACK_ADDR: usize = 0x001C_0000; 36 + 37 + const RDRAM_SIZE: u32 = 4 * 1024 * 1024; // 4 MB 38 + 39 + /// Converts a RDRAM address constant (usize) to u32 for the RSP. 40 + #[expect( 41 + clippy::cast_possible_truncation, 42 + clippy::as_conversions, 43 + reason = "RDRAM addresses are well within u32 range" 44 + )] 45 + const fn addr_u32(addr: usize) -> u32 { 46 + addr as u32 47 + } 48 + 49 + // OSTask field offsets (MIPS o32 ABI, all fields are 4 bytes) 50 + const TASK_TYPE: usize = 0x00; 51 + const TASK_FLAGS: usize = 0x04; 52 + const TASK_UCODE_BOOT: usize = 0x08; 53 + const TASK_UCODE_BOOT_SIZE: usize = 0x0C; 54 + const TASK_UCODE: usize = 0x10; 55 + const TASK_UCODE_SIZE: usize = 0x14; 56 + const TASK_UCODE_DATA: usize = 0x18; 57 + const TASK_UCODE_DATA_SIZE: usize = 0x1C; 58 + const TASK_DRAM_STACK: usize = 0x20; 59 + const TASK_DRAM_STACK_SIZE: usize = 0x24; 60 + const TASK_OUTPUT_BUFF: usize = 0x28; 61 + const TASK_OUTPUT_BUFF_SIZE: usize = 0x2C; 62 + const TASK_DATA_PTR: usize = 0x30; 63 + const TASK_DATA_SIZE: usize = 0x34; 64 + const TASK_YIELD_DATA_PTR: usize = 0x38; 65 + 66 + /// Size of the `OSTask` structure in bytes. 67 + const OS_TASK_SIZE: usize = 0x40; 68 + 69 + /// DMEM offset where the `OSTask` is placed (`SP_IMEM_START` - sizeof(`OSTask`)). 70 + const TASK_DMEM_OFFSET: usize = 0x1000 - OS_TASK_SIZE; 71 + 72 + /// `M_GFXTASK` — graphics task type. 73 + const M_GFXTASK: u32 = 1; 74 + 75 + /// `SP_UCODE_SIZE` — rspboot loads this many bytes of text into IMEM. 76 + const SP_UCODE_SIZE: u32 = 4096; 77 + 78 + /// `SP_UCODE_DATA_SIZE` — loaded into DMEM by rspboot. 79 + const SP_UCODE_DATA_SIZE: u32 = 2048; 80 + 81 + /// `SP_DRAM_STACK_SIZE8` — microcode DRAM stack size. 82 + const SP_DRAM_STACK_SIZE: u32 = 1024; 83 + 84 + /// Writes a big-endian u32 to a byte slice (for RSP memory: DMEM/IMEM). 85 + fn write_be_u32(buf: &mut [u8], offset: usize, value: u32) { 86 + buf[offset..offset + 4].copy_from_slice(&value.to_be_bytes()); 87 + } 88 + 89 + /// Writes a u32 to RDRAM in native endian. 90 + /// 91 + /// The RSP emulator stores RDRAM words in native byte order. DMA between 92 + /// RDRAM and RSP memory handles the endian conversion (native ↔ big-endian). 93 + fn write_rdram_u32(rdram: &mut [u8], offset: usize, value: u32) { 94 + rdram[offset..offset + 4].copy_from_slice(&value.to_ne_bytes()); 95 + } 96 + 97 + /// Copies big-endian byte data into RDRAM, word-swapping for native storage. 98 + /// 99 + /// Input is big-endian bytes (N64 native format). RDRAM stores words in 100 + /// the host's native byte order. This function converts each 4-byte word. 101 + /// Any trailing bytes (< 4) are handled as a partial word. 102 + fn write_be_bytes_to_rdram(rdram: &mut [u8], offset: usize, data: &[u8]) { 103 + for (i, chunk) in data.chunks(4).enumerate() { 104 + let mut padded = [0u8; 4]; 105 + padded[..chunk.len()].copy_from_slice(chunk); 106 + let word = u32::from_be_bytes(padded); 107 + write_rdram_u32(rdram, offset + i * 4, word); 108 + } 109 + } 110 + 111 + /// Renders model geometry through the RSP, producing RDP command words. 112 + /// 113 + /// Creates a fresh RSP device each call. Prefer [`Renderer`] for repeated 114 + /// rendering (e.g. per-frame in an editor) to avoid the 4 MB RDRAM allocation 115 + /// on every call. 116 + pub fn render(nodes: &[NodeData], camera: &CameraMatrices) -> Vec<u32> { 117 + let mut renderer = Renderer::new(); 118 + renderer.render(nodes, camera) 119 + } 120 + 121 + /// Persistent RSP render context that reuses its device across frames. 122 + /// 123 + /// Avoids the 4 MB RDRAM allocation that [`render`] incurs on every call. 124 + /// Microcode is loaded once at construction; subsequent [`render`](Self::render) 125 + /// calls only write the per-frame data (display list, vertices, matrices) 126 + /// and reset the RSP/RDP state. 127 + pub struct Renderer { 128 + device: rsp::Device, 129 + } 130 + 131 + impl std::fmt::Debug for Renderer { 132 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 133 + f.debug_struct("Renderer").finish_non_exhaustive() 134 + } 135 + } 136 + 137 + impl Renderer { 138 + /// Creates a new renderer, allocating RDRAM and loading F3DEX2 microcode. 139 + pub fn new() -> Self { 140 + let mut device = rsp::Device::new(RDRAM_SIZE); 141 + let rdram = device.rdram_mut(); 142 + write_be_bytes_to_rdram(rdram, F3DEX2_TEXT_ADDR, F3DEX2_TEXT); 143 + write_be_bytes_to_rdram(rdram, F3DEX2_DATA_ADDR, F3DEX2_DATA); 144 + Self { device } 145 + } 146 + 147 + /// Renders model geometry through the RSP, producing RDP command words. 148 + pub fn render(&mut self, nodes: &[NodeData], camera: &CameraMatrices) -> Vec<u32> { 149 + let gbi_output = gbi::reconstruct( 150 + nodes, 151 + camera, 152 + addr_u32(FB_ADDR), 153 + addr_u32(VERTEX_ADDR), 154 + addr_u32(PROJ_MTX_ADDR), 155 + addr_u32(MV_MTX_ADDR), 156 + addr_u32(VIEWPORT_ADDR), 157 + ); 158 + self.render_gbi(&gbi_output, camera) 159 + } 160 + 161 + /// Renders a pre-built GBI display list through the RSP. 162 + fn render_gbi(&mut self, gbi_output: &GbiOutput, camera: &CameraMatrices) -> Vec<u32> { 163 + self.device.reset(); 164 + 165 + let rdram = self.device.rdram_mut(); 166 + 167 + // Write display list commands (u32 values → native-endian RDRAM) 168 + let dl_bytes = gbi_output.commands.len() * 4; 169 + for (i, &word) in gbi_output.commands.iter().enumerate() { 170 + write_rdram_u32(rdram, DL_ADDR + i * 4, word); 171 + } 172 + 173 + // Write vertex data (big-endian packed bytes → native-endian RDRAM) 174 + write_be_bytes_to_rdram(rdram, VERTEX_ADDR, &gbi_output.vertex_data); 175 + 176 + // Write camera matrices (big-endian N64 format → native-endian RDRAM) 177 + write_be_bytes_to_rdram(rdram, PROJ_MTX_ADDR, &camera.projection); 178 + write_be_bytes_to_rdram(rdram, MV_MTX_ADDR, &camera.modelview); 179 + 180 + // Write viewport data (big-endian i16 values → native-endian RDRAM) 181 + write_be_bytes_to_rdram(rdram, VIEWPORT_ADDR, &gbi_output.viewport_data); 182 + 183 + // Write rspboot to IMEM (big-endian, RSP reads directly) 184 + self.device.imem_mut()[..RSPBOOT.len()].copy_from_slice(RSPBOOT); 185 + 186 + // Write OSTask to DMEM (big-endian, RSP reads directly with LW) 187 + let dmem = self.device.dmem_mut(); 188 + let task_base = TASK_DMEM_OFFSET; 189 + 190 + write_be_u32(dmem, task_base + TASK_TYPE, M_GFXTASK); 191 + write_be_u32(dmem, task_base + TASK_FLAGS, 0); 192 + write_be_u32(dmem, task_base + TASK_UCODE_BOOT, 0); 193 + write_be_u32(dmem, task_base + TASK_UCODE_BOOT_SIZE, 0); 194 + write_be_u32(dmem, task_base + TASK_UCODE, addr_u32(F3DEX2_TEXT_ADDR)); 195 + write_be_u32(dmem, task_base + TASK_UCODE_SIZE, SP_UCODE_SIZE); 196 + write_be_u32( 197 + dmem, 198 + task_base + TASK_UCODE_DATA, 199 + addr_u32(F3DEX2_DATA_ADDR), 200 + ); 201 + write_be_u32(dmem, task_base + TASK_UCODE_DATA_SIZE, SP_UCODE_DATA_SIZE); 202 + write_be_u32(dmem, task_base + TASK_DRAM_STACK, addr_u32(DRAM_STACK_ADDR)); 203 + write_be_u32(dmem, task_base + TASK_DRAM_STACK_SIZE, SP_DRAM_STACK_SIZE); 204 + write_be_u32( 205 + dmem, 206 + task_base + TASK_OUTPUT_BUFF, 207 + addr_u32(RDP_OUTPUT_ADDR), 208 + ); 209 + write_be_u32( 210 + dmem, 211 + task_base + TASK_OUTPUT_BUFF_SIZE, 212 + addr_u32(RDP_OUTPUT_ADDR) + 0x0004_0000, 213 + ); 214 + write_be_u32(dmem, task_base + TASK_DATA_PTR, addr_u32(DL_ADDR)); 215 + #[expect( 216 + clippy::cast_possible_truncation, 217 + clippy::as_conversions, 218 + reason = "display list size fits in u32" 219 + )] 220 + let dl_size = dl_bytes as u32; 221 + write_be_u32(dmem, task_base + TASK_DATA_SIZE, dl_size); 222 + write_be_u32(dmem, task_base + TASK_YIELD_DATA_PTR, 0); 223 + 224 + // Decode IMEM and run RSP 225 + self.device.decode_imem(); 226 + self.device.set_pc(0); 227 + self.device.clear_halt(); 228 + 229 + self.device.run().to_vec() 230 + } 231 + } 232 + 233 + #[cfg(test)] 234 + mod tests { 235 + #![allow(clippy::unwrap_used)] 236 + 237 + use super::*; 238 + use crate::gbi::{TriangleData, VertexData}; 239 + 240 + /// Builds an N64 s15.16 identity matrix (64 bytes, big-endian). 241 + /// 242 + /// Format: first 32 bytes = integer halves (i16 BE), last 32 bytes = fractional halves (u16 BE). 243 + /// Column-major: element [row][col] at position `col * 4 + row`. 244 + fn identity_n64_matrix() -> [u8; 64] { 245 + let mut m = [0u8; 64]; 246 + for i in 0..4 { 247 + let offset = (i * 4 + i) * 2; // diagonal element in the integer half 248 + m[offset] = 0x00; 249 + m[offset + 1] = 0x01; // 1.0 as i16 BE 250 + } 251 + m 252 + } 253 + 254 + #[test] 255 + fn rsp_executes_rspboot() { 256 + // Minimal test: rspboot + F3DEX2 with an empty display list (just EndDL) 257 + let mut device = rsp::Device::new(RDRAM_SIZE); 258 + let rdram = device.rdram_mut(); 259 + 260 + write_be_bytes_to_rdram(rdram, F3DEX2_TEXT_ADDR, F3DEX2_TEXT); 261 + write_be_bytes_to_rdram(rdram, F3DEX2_DATA_ADDR, F3DEX2_DATA); 262 + 263 + let dl: &[u32] = &[0xDF00_0000, 0x0000_0000]; 264 + for (i, &word) in dl.iter().enumerate() { 265 + write_rdram_u32(rdram, DL_ADDR + i * 4, word); 266 + } 267 + 268 + device.imem_mut()[..RSPBOOT.len()].copy_from_slice(RSPBOOT); 269 + 270 + let dmem = device.dmem_mut(); 271 + let task_base = TASK_DMEM_OFFSET; 272 + write_be_u32(dmem, task_base + TASK_TYPE, M_GFXTASK); 273 + write_be_u32(dmem, task_base + TASK_FLAGS, 0); 274 + write_be_u32(dmem, task_base + TASK_UCODE_BOOT, 0); 275 + write_be_u32(dmem, task_base + TASK_UCODE_BOOT_SIZE, 0); 276 + write_be_u32(dmem, task_base + TASK_UCODE, addr_u32(F3DEX2_TEXT_ADDR)); 277 + write_be_u32(dmem, task_base + TASK_UCODE_SIZE, SP_UCODE_SIZE); 278 + write_be_u32( 279 + dmem, 280 + task_base + TASK_UCODE_DATA, 281 + addr_u32(F3DEX2_DATA_ADDR), 282 + ); 283 + write_be_u32(dmem, task_base + TASK_UCODE_DATA_SIZE, SP_UCODE_DATA_SIZE); 284 + write_be_u32(dmem, task_base + TASK_DRAM_STACK, addr_u32(DRAM_STACK_ADDR)); 285 + write_be_u32(dmem, task_base + TASK_DRAM_STACK_SIZE, SP_DRAM_STACK_SIZE); 286 + write_be_u32( 287 + dmem, 288 + task_base + TASK_OUTPUT_BUFF, 289 + addr_u32(RDP_OUTPUT_ADDR), 290 + ); 291 + write_be_u32( 292 + dmem, 293 + task_base + TASK_OUTPUT_BUFF_SIZE, 294 + addr_u32(RDP_OUTPUT_ADDR) + 0x0004_0000, 295 + ); 296 + write_be_u32(dmem, task_base + TASK_DATA_PTR, addr_u32(DL_ADDR)); 297 + write_be_u32(dmem, task_base + TASK_DATA_SIZE, 8); 298 + write_be_u32(dmem, task_base + TASK_YIELD_DATA_PTR, 0); 299 + 300 + device.decode_imem(); 301 + device.set_pc(0); 302 + device.clear_halt(); 303 + 304 + let rdp_command_count = device.run().len(); 305 + 306 + assert!( 307 + device.rsp.cpu.broken || device.rsp.cpu.halted, 308 + "RSP should have terminated" 309 + ); 310 + // An empty display list shouldn't produce many RDP commands 311 + // (F3DEX2 may emit a few sync commands, so just check it terminates) 312 + assert!(rdp_command_count < 100, "unexpectedly many RDP commands"); 313 + } 314 + 315 + #[test] 316 + fn single_red_triangle_produces_rdp_commands() { 317 + let nodes = vec![NodeData { 318 + triangles: vec![TriangleData { 319 + v0: VertexData { 320 + x: 0, 321 + y: 0, 322 + z: 0, 323 + r: 255, 324 + g: 0, 325 + b: 0, 326 + a: 255, 327 + }, 328 + v1: VertexData { 329 + x: 100, 330 + y: 0, 331 + z: 0, 332 + r: 255, 333 + g: 0, 334 + b: 0, 335 + a: 255, 336 + }, 337 + v2: VertexData { 338 + x: 50, 339 + y: 100, 340 + z: 0, 341 + r: 255, 342 + g: 0, 343 + b: 0, 344 + a: 255, 345 + }, 346 + }], 347 + }]; 348 + 349 + let camera = CameraMatrices { 350 + projection: identity_n64_matrix(), 351 + modelview: identity_n64_matrix(), 352 + }; 353 + 354 + let rdp_commands = render(&nodes, &camera); 355 + 356 + // The RSP should have produced RDP commands including triangle edge data 357 + assert!( 358 + !rdp_commands.is_empty(), 359 + "RSP should produce RDP commands for a single triangle" 360 + ); 361 + } 362 + }
+20
crates/rsp/Cargo.toml
··· 1 + # SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2 + # 3 + # SPDX-License-Identifier: AGPL-3.0-or-later 4 + 5 + [package] 6 + name = "rsp" 7 + version = "0.1.0" 8 + edition = "2024" 9 + description = "Standalone N64 RSP emulator extracted from gopher64" 10 + 11 + [dependencies] 12 + 13 + # Vendored code from gopher64 — disable lints 14 + [lints.rust] 15 + missing_docs = "allow" 16 + missing_debug_implementations = "allow" 17 + unsafe_code = "allow" 18 + 19 + [lints.clippy] 20 + all = { level = "allow", priority = -1 }
+641
crates/rsp/src/cpu.rs
··· 1 + // SPDX-FileCopyrightText: 2024 gopher64 contributors 2 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 3 + // 4 + // SPDX-License-Identifier: GPL-3.0-or-later 5 + 6 + //! RSP CPU core: instruction fetch/execute loop, opcode decoding, and CPU state. 7 + 8 + #[cfg(target_arch = "x86_64")] 9 + use std::arch::x86_64::*; 10 + 11 + use crate::BranchStepState; 12 + 13 + pub struct BranchState { 14 + pub state: BranchStepState, 15 + pub pc: u32, 16 + } 17 + 18 + #[derive(Copy, Clone)] 19 + pub struct Instructions { 20 + pub func: fn(&mut crate::Device, u32), 21 + pub opcode: u32, 22 + } 23 + 24 + #[derive(PartialEq, Copy, Clone)] 25 + pub enum InstructionType { 26 + Su, 27 + Vu, 28 + } 29 + 30 + pub struct Cpu { 31 + pub instructions: [Instructions; 0x1000 / 4], 32 + pub last_instruction_type: InstructionType, 33 + pub instruction_type: InstructionType, 34 + pub pipeline_full: bool, 35 + pub branch_state: BranchState, 36 + pub broken: bool, 37 + pub running: bool, 38 + pub halted: bool, 39 + pub sync_point: bool, 40 + pub cycle_counter: u64, 41 + pub shuffle: [__m128i; 16], 42 + pub gpr: [u32; 32], 43 + pub vpr: [__m128i; 32], 44 + pub reciprocals: [u16; 512], 45 + pub inverse_square_roots: [u16; 512], 46 + pub vcol: __m128i, 47 + pub vcoh: __m128i, 48 + pub vccl: __m128i, 49 + pub vcch: __m128i, 50 + pub vce: __m128i, 51 + pub accl: __m128i, 52 + pub accm: __m128i, 53 + pub acch: __m128i, 54 + pub divdp: bool, 55 + pub divin: i16, 56 + pub divout: i16, 57 + pub special_instrs: [fn(&mut crate::Device, u32); 64], 58 + pub regimm_instrs: [fn(&mut crate::Device, u32); 32], 59 + pub cop0_instrs: [fn(&mut crate::Device, u32); 32], 60 + pub cop2_instrs: [fn(&mut crate::Device, u32); 32], 61 + pub lwc2_instrs: [fn(&mut crate::Device, u32); 32], 62 + pub swc2_instrs: [fn(&mut crate::Device, u32); 32], 63 + pub instrs: [fn(&mut crate::Device, u32); 64], 64 + pub vec_instrs: [fn(&mut crate::Device, u32); 64], 65 + } 66 + 67 + fn default_instruction(_device: &mut crate::Device, _opcode: u32) {} 68 + 69 + impl Cpu { 70 + pub fn new() -> Self { 71 + let zero = unsafe { _mm_setzero_si128() }; 72 + Self { 73 + instructions: [Instructions { 74 + func: default_instruction, 75 + opcode: 0, 76 + }; 0x1000 / 4], 77 + last_instruction_type: InstructionType::Su, 78 + instruction_type: InstructionType::Su, 79 + pipeline_full: false, 80 + branch_state: BranchState { 81 + state: BranchStepState::Step, 82 + pc: 0, 83 + }, 84 + broken: false, 85 + running: false, 86 + halted: false, 87 + sync_point: false, 88 + cycle_counter: 0, 89 + shuffle: [zero; 16], 90 + gpr: [0; 32], 91 + vpr: [zero; 32], 92 + reciprocals: [0; 512], 93 + inverse_square_roots: [0; 512], 94 + vcol: zero, 95 + vcoh: zero, 96 + vccl: zero, 97 + vcch: zero, 98 + vce: zero, 99 + accl: zero, 100 + accm: zero, 101 + acch: zero, 102 + divdp: false, 103 + divin: 0, 104 + divout: 0, 105 + special_instrs: [default_instruction; 64], 106 + regimm_instrs: [default_instruction; 32], 107 + cop0_instrs: [default_instruction; 32], 108 + cop2_instrs: [default_instruction; 32], 109 + lwc2_instrs: [default_instruction; 32], 110 + swc2_instrs: [default_instruction; 32], 111 + instrs: [default_instruction; 64], 112 + vec_instrs: [default_instruction; 64], 113 + } 114 + } 115 + } 116 + 117 + pub fn in_delay_slot(device: &crate::Device) -> bool { 118 + device.rsp.cpu.branch_state.state == BranchStepState::DelaySlotTaken 119 + || device.rsp.cpu.branch_state.state == BranchStepState::DelaySlotNotTaken 120 + } 121 + 122 + pub fn in_delay_slot_taken(device: &crate::Device) -> bool { 123 + device.rsp.cpu.branch_state.state == BranchStepState::DelaySlotTaken 124 + } 125 + 126 + pub fn run(device: &mut crate::Device) -> u64 { 127 + device.rsp.cpu.broken = false; 128 + device.rsp.cpu.cycle_counter = 0; 129 + device.rsp.cpu.running = true; 130 + while !device.rsp.cpu.sync_point && device.rsp.cpu.cycle_counter < device.max_cycles { 131 + device.rsp.cpu.instruction_type = InstructionType::Su; 132 + device.rsp.cpu.gpr[0] = 0; 133 + 134 + let instruction = device.rsp.cpu.instructions 135 + [(device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] / 4) as usize]; 136 + (instruction.func)(device, instruction.opcode); 137 + 138 + match device.rsp.cpu.branch_state.state { 139 + BranchStepState::Step => { 140 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] += 4; 141 + if device.rsp.cpu.broken { 142 + break; 143 + } 144 + } 145 + BranchStepState::Take => { 146 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] += 4; 147 + device.rsp.cpu.branch_state.state = BranchStepState::DelaySlotTaken; 148 + } 149 + BranchStepState::NotTaken => { 150 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] += 4; 151 + device.rsp.cpu.branch_state.state = BranchStepState::DelaySlotNotTaken; 152 + } 153 + BranchStepState::DelaySlotTaken => { 154 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] = 155 + device.rsp.cpu.branch_state.pc; 156 + device.rsp.cpu.branch_state.state = BranchStepState::Step; 157 + if device.rsp.cpu.broken { 158 + break; 159 + } 160 + } 161 + BranchStepState::DelaySlotNotTaken => { 162 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] += 4; 163 + device.rsp.cpu.branch_state.state = BranchStepState::Step; 164 + if device.rsp.cpu.broken { 165 + break; 166 + } 167 + } 168 + BranchStepState::Discard => { 169 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] += 8; 170 + device.rsp.cpu.branch_state.state = BranchStepState::Step; 171 + } 172 + BranchStepState::Exception => { 173 + device.rsp.cpu.branch_state.state = BranchStepState::Step; 174 + } 175 + } 176 + device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] &= 0xFFC; 177 + 178 + if device.rsp.cpu.instruction_type == device.rsp.cpu.last_instruction_type { 179 + device.rsp.cpu.cycle_counter += 1; 180 + device.rsp.cpu.pipeline_full = false; 181 + } else { 182 + device.rsp.cpu.last_instruction_type = device.rsp.cpu.instruction_type; 183 + if device.rsp.cpu.pipeline_full { 184 + device.rsp.cpu.cycle_counter += 1; 185 + device.rsp.cpu.pipeline_full = false; 186 + } else { 187 + device.rsp.cpu.pipeline_full = true; 188 + } 189 + } 190 + } 191 + if device.rsp.cpu.cycle_counter >= device.max_cycles { 192 + device.rsp.cpu.halted = true; 193 + } 194 + device.rsp.cpu.running = false; 195 + (device.rsp.cpu.cycle_counter as f64 * 1.5) as u64 196 + } 197 + 198 + pub fn decode_opcode(device: &crate::Device, opcode: u32) -> fn(&mut crate::Device, u32) { 199 + match opcode >> 26 { 200 + 0 => device.rsp.cpu.special_instrs[(opcode & 0x3F) as usize], 201 + 1 => device.rsp.cpu.regimm_instrs[((opcode >> 16) & 0x1F) as usize], 202 + 16 => device.rsp.cpu.cop0_instrs[((opcode >> 21) & 0x1F) as usize], 203 + 18 => device.rsp.cpu.cop2_instrs[((opcode >> 21) & 0x1F) as usize], 204 + 50 => device.rsp.cpu.lwc2_instrs[((opcode >> 11) & 0x1F) as usize], 205 + 58 => device.rsp.cpu.swc2_instrs[((opcode >> 11) & 0x1F) as usize], 206 + _ => device.rsp.cpu.instrs[(opcode >> 26) as usize], 207 + } 208 + } 209 + 210 + pub fn map_instructions(device: &mut crate::Device) { 211 + device.rsp.cpu.instrs = [ 212 + crate::su_instructions::reserved, // SPECIAL 213 + crate::su_instructions::reserved, // REGIMM 214 + crate::su_instructions::j, 215 + crate::su_instructions::jal, 216 + crate::su_instructions::beq, 217 + crate::su_instructions::bne, 218 + crate::su_instructions::blez, 219 + crate::su_instructions::bgtz, 220 + crate::su_instructions::addi, 221 + crate::su_instructions::addiu, 222 + crate::su_instructions::slti, 223 + crate::su_instructions::sltiu, 224 + crate::su_instructions::andi, 225 + crate::su_instructions::ori, 226 + crate::su_instructions::xori, 227 + crate::su_instructions::lui, 228 + crate::su_instructions::reserved, // COP0 229 + crate::su_instructions::reserved, // COP1 230 + crate::su_instructions::reserved, // COP2 231 + crate::su_instructions::reserved, 232 + crate::su_instructions::reserved, 233 + crate::su_instructions::reserved, 234 + crate::su_instructions::reserved, 235 + crate::su_instructions::reserved, 236 + crate::su_instructions::reserved, 237 + crate::su_instructions::reserved, 238 + crate::su_instructions::reserved, 239 + crate::su_instructions::reserved, 240 + crate::su_instructions::reserved, 241 + crate::su_instructions::reserved, 242 + crate::su_instructions::reserved, 243 + crate::su_instructions::reserved, 244 + crate::su_instructions::lb, 245 + crate::su_instructions::lh, 246 + crate::su_instructions::reserved, 247 + crate::su_instructions::lw, 248 + crate::su_instructions::lbu, 249 + crate::su_instructions::lhu, 250 + crate::su_instructions::reserved, 251 + crate::su_instructions::lwu, 252 + crate::su_instructions::sb, 253 + crate::su_instructions::sh, 254 + crate::su_instructions::reserved, 255 + crate::su_instructions::sw, 256 + crate::su_instructions::reserved, 257 + crate::su_instructions::reserved, 258 + crate::su_instructions::reserved, 259 + crate::su_instructions::reserved, 260 + crate::su_instructions::reserved, 261 + crate::su_instructions::reserved, 262 + crate::su_instructions::reserved, // lwc2 263 + crate::su_instructions::reserved, 264 + crate::su_instructions::reserved, 265 + crate::su_instructions::reserved, 266 + crate::su_instructions::reserved, 267 + crate::su_instructions::reserved, 268 + crate::su_instructions::reserved, 269 + crate::su_instructions::reserved, 270 + crate::su_instructions::reserved, // swc2 271 + crate::su_instructions::reserved, 272 + crate::su_instructions::reserved, 273 + crate::su_instructions::reserved, 274 + crate::su_instructions::reserved, 275 + crate::su_instructions::reserved, 276 + ]; 277 + 278 + device.rsp.cpu.special_instrs = [ 279 + crate::su_instructions::sll, 280 + crate::su_instructions::special_reserved, 281 + crate::su_instructions::srl, 282 + crate::su_instructions::sra, 283 + crate::su_instructions::sllv, 284 + crate::su_instructions::special_reserved, 285 + crate::su_instructions::srlv, 286 + crate::su_instructions::srav, 287 + crate::su_instructions::jr, 288 + crate::su_instructions::jalr, 289 + crate::su_instructions::special_reserved, 290 + crate::su_instructions::special_reserved, 291 + crate::su_instructions::special_reserved, 292 + crate::su_instructions::break_, 293 + crate::su_instructions::special_reserved, 294 + crate::su_instructions::special_reserved, 295 + crate::su_instructions::special_reserved, 296 + crate::su_instructions::special_reserved, 297 + crate::su_instructions::special_reserved, 298 + crate::su_instructions::special_reserved, 299 + crate::su_instructions::special_reserved, 300 + crate::su_instructions::special_reserved, 301 + crate::su_instructions::special_reserved, 302 + crate::su_instructions::special_reserved, 303 + crate::su_instructions::special_reserved, 304 + crate::su_instructions::special_reserved, 305 + crate::su_instructions::special_reserved, 306 + crate::su_instructions::special_reserved, 307 + crate::su_instructions::special_reserved, 308 + crate::su_instructions::special_reserved, 309 + crate::su_instructions::special_reserved, 310 + crate::su_instructions::special_reserved, 311 + crate::su_instructions::add, 312 + crate::su_instructions::addu, 313 + crate::su_instructions::sub, 314 + crate::su_instructions::subu, 315 + crate::su_instructions::and, 316 + crate::su_instructions::or, 317 + crate::su_instructions::xor, 318 + crate::su_instructions::nor, 319 + crate::su_instructions::special_reserved, 320 + crate::su_instructions::special_reserved, 321 + crate::su_instructions::slt, 322 + crate::su_instructions::sltu, 323 + crate::su_instructions::special_reserved, 324 + crate::su_instructions::special_reserved, 325 + crate::su_instructions::special_reserved, 326 + crate::su_instructions::special_reserved, 327 + crate::su_instructions::special_reserved, 328 + crate::su_instructions::special_reserved, 329 + crate::su_instructions::special_reserved, 330 + crate::su_instructions::special_reserved, 331 + crate::su_instructions::special_reserved, 332 + crate::su_instructions::special_reserved, 333 + crate::su_instructions::special_reserved, 334 + crate::su_instructions::special_reserved, 335 + crate::su_instructions::special_reserved, 336 + crate::su_instructions::special_reserved, 337 + crate::su_instructions::special_reserved, 338 + crate::su_instructions::special_reserved, 339 + crate::su_instructions::special_reserved, 340 + crate::su_instructions::special_reserved, 341 + crate::su_instructions::special_reserved, 342 + crate::su_instructions::special_reserved, 343 + ]; 344 + 345 + device.rsp.cpu.regimm_instrs = [ 346 + crate::su_instructions::bltz, 347 + crate::su_instructions::bgez, 348 + crate::su_instructions::reserved, 349 + crate::su_instructions::reserved, 350 + crate::su_instructions::reserved, 351 + crate::su_instructions::reserved, 352 + crate::su_instructions::reserved, 353 + crate::su_instructions::reserved, 354 + crate::su_instructions::reserved, 355 + crate::su_instructions::reserved, 356 + crate::su_instructions::reserved, 357 + crate::su_instructions::reserved, 358 + crate::su_instructions::reserved, 359 + crate::su_instructions::reserved, 360 + crate::su_instructions::reserved, 361 + crate::su_instructions::reserved, 362 + crate::su_instructions::bltzal, 363 + crate::su_instructions::bgezal, 364 + crate::su_instructions::reserved, 365 + crate::su_instructions::reserved, 366 + crate::su_instructions::reserved, 367 + crate::su_instructions::reserved, 368 + crate::su_instructions::reserved, 369 + crate::su_instructions::reserved, 370 + crate::su_instructions::reserved, 371 + crate::su_instructions::reserved, 372 + crate::su_instructions::reserved, 373 + crate::su_instructions::reserved, 374 + crate::su_instructions::reserved, 375 + crate::su_instructions::reserved, 376 + crate::su_instructions::reserved, 377 + crate::su_instructions::reserved, 378 + ]; 379 + 380 + device.rsp.cpu.cop0_instrs = [ 381 + crate::su_instructions::mfc0, 382 + crate::su_instructions::reserved, 383 + crate::su_instructions::reserved, 384 + crate::su_instructions::reserved, 385 + crate::su_instructions::mtc0, 386 + crate::su_instructions::reserved, 387 + crate::su_instructions::reserved, 388 + crate::su_instructions::reserved, 389 + crate::su_instructions::reserved, 390 + crate::su_instructions::reserved, 391 + crate::su_instructions::reserved, 392 + crate::su_instructions::reserved, 393 + crate::su_instructions::reserved, 394 + crate::su_instructions::reserved, 395 + crate::su_instructions::reserved, 396 + crate::su_instructions::reserved, 397 + crate::su_instructions::reserved, 398 + crate::su_instructions::reserved, 399 + crate::su_instructions::reserved, 400 + crate::su_instructions::reserved, 401 + crate::su_instructions::reserved, 402 + crate::su_instructions::reserved, 403 + crate::su_instructions::reserved, 404 + crate::su_instructions::reserved, 405 + crate::su_instructions::reserved, 406 + crate::su_instructions::reserved, 407 + crate::su_instructions::reserved, 408 + crate::su_instructions::reserved, 409 + crate::su_instructions::reserved, 410 + crate::su_instructions::reserved, 411 + crate::su_instructions::reserved, 412 + crate::su_instructions::reserved, 413 + ]; 414 + 415 + device.rsp.cpu.cop2_instrs = [ 416 + crate::su_instructions::mfc2, 417 + crate::su_instructions::reserved, 418 + crate::su_instructions::cfc2, 419 + crate::su_instructions::reserved, 420 + crate::su_instructions::mtc2, 421 + crate::su_instructions::reserved, 422 + crate::su_instructions::ctc2, 423 + crate::su_instructions::reserved, 424 + crate::su_instructions::reserved, 425 + crate::su_instructions::reserved, 426 + crate::su_instructions::reserved, 427 + crate::su_instructions::reserved, 428 + crate::su_instructions::reserved, 429 + crate::su_instructions::reserved, 430 + crate::su_instructions::reserved, 431 + crate::su_instructions::reserved, 432 + crate::vu_instructions::execute_vec, 433 + crate::vu_instructions::execute_vec, 434 + crate::vu_instructions::execute_vec, 435 + crate::vu_instructions::execute_vec, 436 + crate::vu_instructions::execute_vec, 437 + crate::vu_instructions::execute_vec, 438 + crate::vu_instructions::execute_vec, 439 + crate::vu_instructions::execute_vec, 440 + crate::vu_instructions::execute_vec, 441 + crate::vu_instructions::execute_vec, 442 + crate::vu_instructions::execute_vec, 443 + crate::vu_instructions::execute_vec, 444 + crate::vu_instructions::execute_vec, 445 + crate::vu_instructions::execute_vec, 446 + crate::vu_instructions::execute_vec, 447 + crate::vu_instructions::execute_vec, 448 + ]; 449 + 450 + device.rsp.cpu.lwc2_instrs = [ 451 + crate::su_instructions::lbv, 452 + crate::su_instructions::lsv, 453 + crate::su_instructions::llv, 454 + crate::su_instructions::ldv, 455 + crate::su_instructions::lqv, 456 + crate::su_instructions::lrv, 457 + crate::su_instructions::lpv, 458 + crate::su_instructions::luv, 459 + crate::su_instructions::lhv, 460 + crate::su_instructions::lfv, 461 + crate::su_instructions::lwv, 462 + crate::su_instructions::ltv, 463 + crate::su_instructions::reserved, 464 + crate::su_instructions::reserved, 465 + crate::su_instructions::reserved, 466 + crate::su_instructions::reserved, 467 + crate::su_instructions::reserved, 468 + crate::su_instructions::reserved, 469 + crate::su_instructions::reserved, 470 + crate::su_instructions::reserved, 471 + crate::su_instructions::reserved, 472 + crate::su_instructions::reserved, 473 + crate::su_instructions::reserved, 474 + crate::su_instructions::reserved, 475 + crate::su_instructions::reserved, 476 + crate::su_instructions::reserved, 477 + crate::su_instructions::reserved, 478 + crate::su_instructions::reserved, 479 + crate::su_instructions::reserved, 480 + crate::su_instructions::reserved, 481 + crate::su_instructions::reserved, 482 + crate::su_instructions::reserved, 483 + ]; 484 + 485 + device.rsp.cpu.swc2_instrs = [ 486 + crate::su_instructions::sbv, 487 + crate::su_instructions::ssv, 488 + crate::su_instructions::slv, 489 + crate::su_instructions::sdv, 490 + crate::su_instructions::sqv, 491 + crate::su_instructions::srv, 492 + crate::su_instructions::spv, 493 + crate::su_instructions::suv, 494 + crate::su_instructions::shv, 495 + crate::su_instructions::sfv, 496 + crate::su_instructions::swv, 497 + crate::su_instructions::stv, 498 + crate::su_instructions::reserved, 499 + crate::su_instructions::reserved, 500 + crate::su_instructions::reserved, 501 + crate::su_instructions::reserved, 502 + crate::su_instructions::reserved, 503 + crate::su_instructions::reserved, 504 + crate::su_instructions::reserved, 505 + crate::su_instructions::reserved, 506 + crate::su_instructions::reserved, 507 + crate::su_instructions::reserved, 508 + crate::su_instructions::reserved, 509 + crate::su_instructions::reserved, 510 + crate::su_instructions::reserved, 511 + crate::su_instructions::reserved, 512 + crate::su_instructions::reserved, 513 + crate::su_instructions::reserved, 514 + crate::su_instructions::reserved, 515 + crate::su_instructions::reserved, 516 + crate::su_instructions::reserved, 517 + crate::su_instructions::reserved, 518 + ]; 519 + 520 + device.rsp.cpu.vec_instrs = [ 521 + crate::vu_instructions::vmulf, 522 + crate::vu_instructions::vmulu, 523 + crate::vu_instructions::vrndp, 524 + crate::vu_instructions::vmulq, 525 + crate::vu_instructions::vmudl, 526 + crate::vu_instructions::vmudm, 527 + crate::vu_instructions::vmudn, 528 + crate::vu_instructions::vmudh, 529 + crate::vu_instructions::vmacf, 530 + crate::vu_instructions::vmacu, 531 + crate::vu_instructions::vrndn, 532 + crate::vu_instructions::vmacq, 533 + crate::vu_instructions::vmadl, 534 + crate::vu_instructions::vmadm, 535 + crate::vu_instructions::vmadn, 536 + crate::vu_instructions::vmadh, 537 + crate::vu_instructions::vadd, 538 + crate::vu_instructions::vsub, 539 + crate::vu_instructions::vzero, 540 + crate::vu_instructions::vabs, 541 + crate::vu_instructions::vaddc, 542 + crate::vu_instructions::vsubc, 543 + crate::vu_instructions::vzero, 544 + crate::vu_instructions::vzero, 545 + crate::vu_instructions::vzero, 546 + crate::vu_instructions::vzero, 547 + crate::vu_instructions::vzero, 548 + crate::vu_instructions::vzero, 549 + crate::vu_instructions::vzero, 550 + crate::vu_instructions::vsar, 551 + crate::vu_instructions::vzero, 552 + crate::vu_instructions::vzero, 553 + crate::vu_instructions::vlt, 554 + crate::vu_instructions::veq, 555 + crate::vu_instructions::vne, 556 + crate::vu_instructions::vge, 557 + crate::vu_instructions::vcl, 558 + crate::vu_instructions::vch, 559 + crate::vu_instructions::vcr, 560 + crate::vu_instructions::vmrg, 561 + crate::vu_instructions::vand, 562 + crate::vu_instructions::vnand, 563 + crate::vu_instructions::vor, 564 + crate::vu_instructions::vnor, 565 + crate::vu_instructions::vxor, 566 + crate::vu_instructions::vnxor, 567 + crate::vu_instructions::vzero, 568 + crate::vu_instructions::vzero, 569 + crate::vu_instructions::vrcp, 570 + crate::vu_instructions::vrcpl, 571 + crate::vu_instructions::vrcph, 572 + crate::vu_instructions::vmov, 573 + crate::vu_instructions::vrsq, 574 + crate::vu_instructions::vrsql, 575 + crate::vu_instructions::vrsqh, 576 + crate::vu_instructions::vnop, 577 + crate::vu_instructions::vzero, 578 + crate::vu_instructions::vzero, 579 + crate::vu_instructions::vzero, 580 + crate::vu_instructions::vzero, 581 + crate::vu_instructions::vzero, 582 + crate::vu_instructions::vzero, 583 + crate::vu_instructions::vzero, 584 + crate::vu_instructions::vnop, 585 + ]; 586 + } 587 + 588 + pub fn init(device: &mut crate::Device) { 589 + device.rsp.cpu.reciprocals[0] = u16::MAX; 590 + let mut index = 1; 591 + while index < 512 { 592 + let a = (index + 512) as u64; 593 + let b = (1_u64 << 34) / a; 594 + device.rsp.cpu.reciprocals[index] = ((b + 1) >> 8) as u16; 595 + index += 1; 596 + } 597 + 598 + index = 0; 599 + while index < 512 { 600 + let mut shift = 0; 601 + if index % 2 == 1 { 602 + shift = 1; 603 + } 604 + let a = ((index + 512) >> shift) as u64; 605 + let mut b = (1 << 17) as u64; 606 + while a * (b + 1) * (b + 1) < (1_u64 << 44) { 607 + b += 1; 608 + } 609 + device.rsp.cpu.inverse_square_roots[index] = (b >> 1) as u16; 610 + index += 1; 611 + } 612 + 613 + device.rsp.cpu.shuffle = unsafe { 614 + [ 615 + _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), 616 + _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), 617 + _mm_set_epi8(15, 14, 15, 14, 11, 10, 11, 10, 7, 6, 7, 6, 3, 2, 3, 2), 618 + _mm_set_epi8(13, 12, 13, 12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0), 619 + _mm_set_epi8(15, 14, 15, 14, 15, 14, 15, 14, 7, 6, 7, 6, 7, 6, 7, 6), 620 + _mm_set_epi8(13, 12, 13, 12, 13, 12, 13, 12, 5, 4, 5, 4, 5, 4, 5, 4), 621 + _mm_set_epi8(11, 10, 11, 10, 11, 10, 11, 10, 3, 2, 3, 2, 3, 2, 3, 2), 622 + _mm_set_epi8(9, 8, 9, 8, 9, 8, 9, 8, 1, 0, 1, 0, 1, 0, 1, 0), 623 + _mm_set_epi8( 624 + 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 625 + ), 626 + _mm_set_epi8( 627 + 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 13, 12, 628 + ), 629 + _mm_set_epi8( 630 + 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 631 + ), 632 + _mm_set_epi8(9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8), 633 + _mm_set_epi8(7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6), 634 + _mm_set_epi8(5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4), 635 + _mm_set_epi8(3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2), 636 + _mm_set_epi8(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), 637 + ] 638 + }; 639 + 640 + map_instructions(device); 641 + }
+229
crates/rsp/src/lib.rs
··· 1 + // SPDX-FileCopyrightText: 2024 gopher64 contributors 2 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 3 + // 4 + // SPDX-License-Identifier: GPL-3.0-or-later 5 + 6 + //! Standalone N64 RSP (Reality Signal Processor) emulator. 7 + //! 8 + //! Extracted from [gopher64](https://github.com/gopher64/gopher64) with 9 + //! minimal changes: external dependencies (cycle counting, events, save states, 10 + //! video) are stubbed, and the RDP `run_rdp` function collects command words 11 + //! instead of sending them to a GPU backend. 12 + //! 13 + //! # Usage 14 + //! 15 + //! ```no_run 16 + //! let mut device = rsp::Device::new(4 * 1024 * 1024); 17 + //! // Write program to RDRAM, set up IMEM/DMEM, configure RSP registers… 18 + //! let rdp_commands = device.run(); 19 + //! ``` 20 + 21 + pub mod cpu; 22 + pub mod rdp; 23 + pub mod rsp_interface; 24 + pub mod su_instructions; 25 + pub mod vu_instructions; 26 + 27 + /// Branch state enum used by the RSP CPU pipeline. 28 + #[derive(PartialEq, Copy, Clone)] 29 + pub enum BranchStepState { 30 + Step, 31 + Take, 32 + NotTaken, 33 + DelaySlotTaken, 34 + DelaySlotNotTaken, 35 + Discard, 36 + Exception, 37 + } 38 + 39 + /// RDRAM (main memory). 40 + pub struct Rdram { 41 + pub mem: Vec<u8>, 42 + pub size: u32, 43 + } 44 + 45 + /// MI (MIPS Interface) registers — only the register array is needed. 46 + pub struct Mi { 47 + pub regs: [u32; 4], 48 + } 49 + 50 + /// Equivalent of `device::memory::masked_write_32`. 51 + #[inline] 52 + pub fn masked_write_32(dst: &mut u32, value: u32, mask: u32) { 53 + *dst = (*dst & !mask) | (value & mask); 54 + } 55 + 56 + /// Maximum RSP cycles before aborting to prevent infinite loops. 57 + /// 58 + /// 10 million cycles is more than enough for any real display list 59 + /// (a typical frame takes ~100K cycles). 60 + const DEFAULT_MAX_CYCLES: u64 = 10_000_000; 61 + 62 + /// Minimal N64 device containing only what the RSP needs. 63 + pub struct Device { 64 + pub rsp: rsp_interface::Rsp, 65 + pub rdram: Rdram, 66 + pub rdp: rdp::Rdp, 67 + pub mi: Mi, 68 + pub byte_swap: usize, 69 + /// Maximum total cycles before `run()` forcibly halts. 70 + pub max_cycles: u64, 71 + } 72 + 73 + impl Device { 74 + /// Creates a new device with the given RDRAM size (bytes). 75 + pub fn new(rdram_size: u32) -> Self { 76 + let mut device = Self { 77 + rsp: rsp_interface::Rsp::new(), 78 + rdram: Rdram { 79 + mem: vec![0u8; rdram_size as usize], 80 + size: rdram_size, 81 + }, 82 + rdp: rdp::Rdp::new(), 83 + mi: Mi { regs: [0; 4] }, 84 + byte_swap: 0, 85 + max_cycles: DEFAULT_MAX_CYCLES, 86 + }; 87 + rsp_interface::init(&mut device); 88 + rdp::init(&mut device); 89 + device 90 + } 91 + 92 + /// Resets all RSP and RDP state without reallocating RDRAM. 93 + /// 94 + /// Call this between frames when reusing a device. RDRAM contents are 95 + /// preserved so microcode and other static data don't need to be 96 + /// reloaded. 97 + pub fn reset(&mut self) { 98 + self.rsp = rsp_interface::Rsp::new(); 99 + self.rdp = rdp::Rdp::new(); 100 + self.mi = Mi { regs: [0; 4] }; 101 + self.byte_swap = 0; 102 + rsp_interface::init(self); 103 + rdp::init(self); 104 + } 105 + 106 + /// Runs the RSP until it halts or breaks, then returns the collected RDP 107 + /// command words. 108 + /// 109 + /// The RSP may hit sync points during DMA and DPC operations. This method 110 + /// automatically resumes execution after each sync point, looping until 111 + /// the RSP truly halts or breaks. 112 + pub fn run(&mut self) -> &[u32] { 113 + self.rdp.collected_commands.clear(); 114 + let mut total_cycles: u64 = 0; 115 + loop { 116 + let batch_cycles = cpu::run(self); 117 + total_cycles += batch_cycles; 118 + rsp_interface::rsp_event(self); 119 + if self.rsp.cpu.broken || self.rsp.cpu.halted || total_cycles >= self.max_cycles { 120 + break; 121 + } 122 + } 123 + &self.rdp.collected_commands 124 + } 125 + 126 + /// Mutable access to RDRAM for writing data the RSP will read. 127 + pub fn rdram_mut(&mut self) -> &mut [u8] { 128 + &mut self.rdram.mem 129 + } 130 + 131 + /// Mutable access to DMEM (first 4KB of RSP memory). 132 + pub fn dmem_mut(&mut self) -> &mut [u8] { 133 + &mut self.rsp.mem[..0x1000] 134 + } 135 + 136 + /// Mutable access to IMEM (second 4KB of RSP memory). 137 + pub fn imem_mut(&mut self) -> &mut [u8] { 138 + &mut self.rsp.mem[0x1000..0x2000] 139 + } 140 + 141 + /// Sets the RSP program counter (0x000–0xFFC). 142 + pub fn set_pc(&mut self, pc: u32) { 143 + self.rsp.regs2[rsp_interface::SP_PC_REG as usize] = pc & 0xFFC; 144 + } 145 + 146 + /// Clears the HALT bit so the RSP will run. 147 + pub fn clear_halt(&mut self) { 148 + rsp_interface::write_regs( 149 + self, 150 + (rsp_interface::SP_STATUS_REG << 2) as u64, 151 + 1, // SP_CLR_HALT 152 + 0xFFFF_FFFF, 153 + ); 154 + } 155 + 156 + /// Decode all opcodes in IMEM after a bulk write. 157 + pub fn decode_imem(&mut self) { 158 + for i in 0..1024 { 159 + let offset = i * 4; 160 + let opcode = u32::from_be_bytes( 161 + self.rsp.mem[0x1000 + offset..0x1000 + offset + 4] 162 + .try_into() 163 + .unwrap(), 164 + ); 165 + self.rsp.cpu.instructions[i].opcode = opcode; 166 + self.rsp.cpu.instructions[i].func = cpu::decode_opcode(self, opcode); 167 + } 168 + } 169 + } 170 + 171 + #[cfg(test)] 172 + mod tests { 173 + use super::*; 174 + 175 + #[test] 176 + fn simple_break_program() { 177 + let mut device = Device::new(1024 * 1024); 178 + 179 + // Write a simple program to IMEM: BREAK (opcode 0x0000000D) 180 + let break_opcode: u32 = 0x0000_000D; 181 + device.imem_mut()[0..4].copy_from_slice(&break_opcode.to_be_bytes()); 182 + device.decode_imem(); 183 + device.set_pc(0); 184 + 185 + let cmds = device.run(); 186 + // BREAK should halt immediately, no RDP commands 187 + assert!(cmds.is_empty()); 188 + } 189 + 190 + #[test] 191 + fn dma_write_to_rdram() { 192 + let mut device = Device::new(1024 * 1024); 193 + 194 + // Write test pattern to DMEM 195 + let pattern = [0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE]; 196 + device.dmem_mut()[0..8].copy_from_slice(&pattern); 197 + 198 + // RSP accesses SP registers via MTC0 (coprocessor 0), not memory-mapped IO. 199 + // COP0 reg 0 = SP_MEM_ADDR, 1 = SP_DRAM_ADDR, 3 = SP_WR_LEN (DMEM→RDRAM) 200 + // 201 + // MTC0 rt, rd encoding: 010000 00100 [rt:5] [rd:5] 00000000000 202 + let program: &[u32] = &[ 203 + 0x3C08_0000, // LUI $8, 0 204 + 0x3508_0100, // ORI $8, $8, 0x100 205 + 0x3C09_0000, // LUI $9, 0 206 + 0x3529_0007, // ORI $9, $9, 7 (length-1 = 7 → 8 bytes) 207 + 0x4080_0000, // MTC0 $zero, c0 → SP_MEM_ADDR = 0 208 + 0x4088_0800, // MTC0 $8, c1 → SP_DRAM_ADDR = 0x100 209 + 0x4089_1800, // MTC0 $9, c3 → SP_WR_LEN = 7, triggers DMA 210 + 0x0000_000D, // BREAK 211 + ]; 212 + 213 + let imem = device.imem_mut(); 214 + for (i, &word) in program.iter().enumerate() { 215 + let offset = i * 4; 216 + imem[offset..offset + 4].copy_from_slice(&word.to_be_bytes()); 217 + } 218 + device.decode_imem(); 219 + device.set_pc(0); 220 + 221 + device.run(); 222 + 223 + // DMA uses to_ne_bytes when copying to RDRAM 224 + let rdram_data = &device.rdram.mem[0x100..0x108]; 225 + let expected = u32::from_be_bytes(pattern[0..4].try_into().unwrap()); 226 + let actual = u32::from_ne_bytes(rdram_data[0..4].try_into().unwrap()); 227 + assert_eq!(actual, expected, "DMA should have copied DMEM to RDRAM"); 228 + } 229 + }
+221
crates/rsp/src/rdp.rs
··· 1 + // SPDX-FileCopyrightText: 2024 gopher64 contributors 2 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 3 + // 4 + // SPDX-License-Identifier: GPL-3.0-or-later 5 + 6 + //! RDP (Reality Display Processor) register handling and command collection. 7 + //! 8 + //! Instead of sending commands to a GPU backend (as gopher64 does), this 9 + //! standalone version collects the RDP command words into a `Vec<u32>` so 10 + //! the caller can pass them to parallel-rdp or another renderer. 11 + 12 + pub const DPC_START_REG: u32 = 0; 13 + pub const DPC_END_REG: u32 = 1; 14 + pub const DPC_CURRENT_REG: u32 = 2; 15 + pub const DPC_STATUS_REG: u32 = 3; 16 + const DPC_CLOCK_REG: u32 = 4; 17 + const DPC_BUFBUSY_REG: u32 = 5; 18 + const DPC_PIPEBUSY_REG: u32 = 6; 19 + const DPC_TMEM_REG: u32 = 7; 20 + pub const DPC_REGS_COUNT: u32 = 8; 21 + 22 + pub const DPS_REGS_COUNT: u32 = 4; 23 + 24 + const DPC_STATUS_XBUS_DMEM_DMA: u32 = 1 << 0; 25 + const DPC_STATUS_FREEZE: u32 = 1 << 1; 26 + const DPC_STATUS_FLUSH: u32 = 1 << 2; 27 + const DPC_STATUS_START_GCLK: u32 = 1 << 3; 28 + const DPC_STATUS_TMEM_BUSY: u32 = 1 << 4; 29 + const DPC_STATUS_PIPE_BUSY: u32 = 1 << 5; 30 + const DPC_STATUS_CMD_BUSY: u32 = 1 << 6; 31 + const DPC_STATUS_CBUF_READY: u32 = 1 << 7; 32 + const DPC_STATUS_START_VALID: u32 = 1 << 10; 33 + 34 + const DPC_CLR_XBUS_DMEM_DMA: u32 = 1 << 0; 35 + const DPC_SET_XBUS_DMEM_DMA: u32 = 1 << 1; 36 + const DPC_CLR_FREEZE: u32 = 1 << 2; 37 + const DPC_SET_FREEZE: u32 = 1 << 3; 38 + const DPC_CLR_FLUSH: u32 = 1 << 4; 39 + const DPC_SET_FLUSH: u32 = 1 << 5; 40 + const DPC_CLR_TMEM_CTR: u32 = 1 << 6; 41 + const DPC_CLR_PIPE_CTR: u32 = 1 << 7; 42 + const DPC_CLR_CMD_CTR: u32 = 1 << 8; 43 + const DPC_CLR_CLOCK_CTR: u32 = 1 << 9; 44 + 45 + pub struct Rdp { 46 + pub regs_dpc: [u32; DPC_REGS_COUNT as usize], 47 + pub regs_dps: [u32; DPS_REGS_COUNT as usize], 48 + pub wait_frozen: bool, 49 + pub last_status_value: u32, 50 + /// Collected RDP command words, populated by `run_rdp`. 51 + pub collected_commands: Vec<u32>, 52 + } 53 + 54 + impl Rdp { 55 + pub fn new() -> Self { 56 + Self { 57 + regs_dpc: [0; DPC_REGS_COUNT as usize], 58 + regs_dps: [0; DPS_REGS_COUNT as usize], 59 + wait_frozen: false, 60 + last_status_value: 0, 61 + collected_commands: Vec::new(), 62 + } 63 + } 64 + } 65 + 66 + pub fn read_regs_dpc(device: &mut crate::Device, address: u64, _access_size: u64) -> u32 { 67 + let reg = (address & 0xFFFF) >> 2; 68 + match reg as u32 { 69 + DPC_STATUS_REG => { 70 + let value = 71 + device.rdp.regs_dpc[reg as usize] & (DPC_STATUS_PIPE_BUSY | DPC_STATUS_CMD_BUSY); 72 + if value == device.rdp.last_status_value && value != 0 { 73 + device.rsp.cpu.sync_point = true; 74 + } 75 + device.rdp.last_status_value = value; 76 + device.rdp.regs_dpc[reg as usize] 77 + } 78 + DPC_CLOCK_REG => 0xFFFFFF, 79 + DPC_CURRENT_REG => { 80 + if device.rdp.wait_frozen { 81 + device.rsp.cpu.sync_point = true; 82 + } 83 + device.rdp.regs_dpc[reg as usize] 84 + } 85 + _ => device.rdp.regs_dpc[reg as usize], 86 + } 87 + } 88 + 89 + pub fn write_regs_dpc(device: &mut crate::Device, address: u64, value: u32, mask: u32) { 90 + let reg = (address & 0xFFFF) >> 2; 91 + match reg as u32 { 92 + DPC_CURRENT_REG | DPC_CLOCK_REG | DPC_BUFBUSY_REG | DPC_PIPEBUSY_REG | DPC_TMEM_REG => {} 93 + DPC_STATUS_REG => update_dpc_status(device, value & mask), 94 + DPC_START_REG => { 95 + if (device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_START_VALID) == 0 { 96 + crate::masked_write_32( 97 + &mut device.rdp.regs_dpc[reg as usize], 98 + value & 0xFFFFF8, 99 + mask, 100 + ); 101 + } 102 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] |= DPC_STATUS_START_VALID; 103 + } 104 + DPC_END_REG => { 105 + crate::masked_write_32( 106 + &mut device.rdp.regs_dpc[reg as usize], 107 + value & 0xFFFFF8, 108 + mask, 109 + ); 110 + if (device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_START_VALID) != 0 { 111 + device.rdp.regs_dpc[DPC_CURRENT_REG as usize] = 112 + device.rdp.regs_dpc[DPC_START_REG as usize]; 113 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_START_VALID; 114 + } 115 + if device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_FREEZE == 0 { 116 + run_rdp(device); 117 + } else { 118 + device.rdp.wait_frozen = true; 119 + } 120 + } 121 + _ => crate::masked_write_32(&mut device.rdp.regs_dpc[reg as usize], value, mask), 122 + } 123 + } 124 + 125 + /// Collects RDP commands from RDRAM between CURRENT and END registers. 126 + fn run_rdp(device: &mut crate::Device) { 127 + let current = device.rdp.regs_dpc[DPC_CURRENT_REG as usize] as usize; 128 + let end = device.rdp.regs_dpc[DPC_END_REG as usize] as usize; 129 + 130 + if device.rdp.regs_dpc[DPC_STATUS_REG as usize] & DPC_STATUS_XBUS_DMEM_DMA != 0 { 131 + // XBUS mode: commands come from DMEM/IMEM instead of RDRAM 132 + let mut addr = current & 0xFFF; 133 + while addr < (end & 0xFFF) { 134 + let word = u32::from_be_bytes(device.rsp.mem[addr..addr + 4].try_into().unwrap()); 135 + device.rdp.collected_commands.push(word); 136 + addr += 4; 137 + } 138 + } else { 139 + // Normal mode: commands come from RDRAM (stored in native byte order) 140 + let mut addr = current; 141 + while addr < end { 142 + if addr + 4 <= device.rdram.mem.len() { 143 + let word = u32::from_ne_bytes(device.rdram.mem[addr..addr + 4].try_into().unwrap()); 144 + device.rdp.collected_commands.push(word); 145 + } 146 + addr += 4; 147 + } 148 + } 149 + 150 + device.rdp.regs_dpc[DPC_CURRENT_REG as usize] = device.rdp.regs_dpc[DPC_END_REG as usize]; 151 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] |= 152 + DPC_STATUS_START_GCLK | DPC_STATUS_PIPE_BUSY | DPC_STATUS_CMD_BUSY; 153 + device.rdp.regs_dpc[DPC_PIPEBUSY_REG as usize] = 0xFFFFFF; 154 + 155 + // Immediately clear busy flags (synchronous execution) 156 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= 157 + !(DPC_STATUS_START_GCLK | DPC_STATUS_PIPE_BUSY | DPC_STATUS_CMD_BUSY); 158 + } 159 + 160 + pub fn read_regs_dps(device: &mut crate::Device, address: u64) -> u32 { 161 + device.rdp.regs_dps[((address & 0xFFFF) >> 2) as usize] 162 + } 163 + 164 + pub fn write_regs_dps(device: &mut crate::Device, address: u64, value: u32, mask: u32) { 165 + crate::masked_write_32( 166 + &mut device.rdp.regs_dps[((address & 0xFFFF) >> 2) as usize], 167 + value, 168 + mask, 169 + ); 170 + } 171 + 172 + fn update_dpc_status(device: &mut crate::Device, w: u32) { 173 + if w & DPC_CLR_XBUS_DMEM_DMA != 0 { 174 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_XBUS_DMEM_DMA; 175 + } 176 + if w & DPC_SET_XBUS_DMEM_DMA != 0 { 177 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] |= DPC_STATUS_XBUS_DMEM_DMA; 178 + } 179 + 180 + if w & DPC_CLR_FREEZE != 0 { 181 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_FREEZE; 182 + if device.rdp.wait_frozen { 183 + run_rdp(device); 184 + device.rdp.wait_frozen = false; 185 + } 186 + } 187 + if w & DPC_SET_FREEZE != 0 { 188 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] |= DPC_STATUS_FREEZE; 189 + } 190 + 191 + if w & DPC_CLR_FLUSH != 0 { 192 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_FLUSH; 193 + } 194 + if w & DPC_SET_FLUSH != 0 { 195 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] |= DPC_STATUS_FLUSH; 196 + } 197 + 198 + if w & DPC_CLR_TMEM_CTR != 0 { 199 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_TMEM_BUSY; 200 + device.rdp.regs_dpc[DPC_TMEM_REG as usize] = 0; 201 + } 202 + if w & DPC_CLR_PIPE_CTR != 0 { 203 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_PIPE_BUSY; 204 + device.rdp.regs_dpc[DPC_PIPEBUSY_REG as usize] = 0; 205 + } 206 + if w & DPC_CLR_CMD_CTR != 0 { 207 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] &= !DPC_STATUS_CMD_BUSY; 208 + device.rdp.regs_dpc[DPC_BUFBUSY_REG as usize] = 0; 209 + } 210 + 211 + if w & DPC_CLR_CLOCK_CTR != 0 { 212 + device.rdp.regs_dpc[DPC_CLOCK_REG as usize] = 0; 213 + } 214 + } 215 + 216 + pub fn init(device: &mut crate::Device) { 217 + // Only set CBUF_READY. gopher64 also sets START_GCLK and PIPE_BUSY here 218 + // because the game's VR4300 CPU clears them before launching RSP tasks. 219 + // In standalone mode there is no game CPU, so we start with a clean state. 220 + device.rdp.regs_dpc[DPC_STATUS_REG as usize] |= DPC_STATUS_CBUF_READY; 221 + }
+474
crates/rsp/src/rsp_interface.rs
··· 1 + // SPDX-FileCopyrightText: 2024 gopher64 contributors 2 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 3 + // 4 + // SPDX-License-Identifier: GPL-3.0-or-later 5 + 6 + //! RSP interface: SP registers, DMA, and status handling. 7 + //! 8 + //! Adapted from gopher64. External dependencies (cycle counting, events, 9 + //! framebuffer checks) are stubbed out since we run the RSP synchronously. 10 + 11 + use crate::cpu; 12 + 13 + const SP_MEM_ADDR_REG: u32 = 0; 14 + const SP_DRAM_ADDR_REG: u32 = 1; 15 + const SP_RD_LEN_REG: u32 = 2; 16 + const SP_WR_LEN_REG: u32 = 3; 17 + pub const SP_STATUS_REG: u32 = 4; 18 + const SP_DMA_FULL_REG: u32 = 5; 19 + const SP_DMA_BUSY_REG: u32 = 6; 20 + const SP_SEMAPHORE_REG: u32 = 7; 21 + pub const SP_REGS_COUNT: u32 = 8; 22 + 23 + pub const SP_PC_REG: u32 = 0; 24 + pub const SP_REGS2_COUNT: u32 = 2; 25 + 26 + pub const SP_STATUS_HALT: u32 = 1 << 0; 27 + const SP_STATUS_BROKE: u32 = 1 << 1; 28 + const SP_STATUS_DMA_BUSY: u32 = 1 << 2; 29 + const SP_STATUS_DMA_FULL: u32 = 1 << 3; 30 + const SP_STATUS_SSTEP: u32 = 1 << 5; 31 + const SP_STATUS_INTR_BREAK: u32 = 1 << 6; 32 + const SP_STATUS_SIG0: u32 = 1 << 7; 33 + const SP_STATUS_SIG1: u32 = 1 << 8; 34 + const SP_STATUS_SIG2: u32 = 1 << 9; 35 + const SP_STATUS_SIG3: u32 = 1 << 10; 36 + const SP_STATUS_SIG4: u32 = 1 << 11; 37 + const SP_STATUS_SIG5: u32 = 1 << 12; 38 + const SP_STATUS_SIG6: u32 = 1 << 13; 39 + const SP_STATUS_SIG7: u32 = 1 << 14; 40 + 41 + const SP_CLR_HALT: u32 = 1 << 0; 42 + pub const SP_SET_HALT: u32 = 1 << 1; 43 + const SP_CLR_BROKE: u32 = 1 << 2; 44 + const _SP_CLR_INTR: u32 = 1 << 3; 45 + const _SP_SET_INTR: u32 = 1 << 4; 46 + const SP_CLR_SSTEP: u32 = 1 << 5; 47 + const SP_SET_SSTEP: u32 = 1 << 6; 48 + const SP_CLR_INTR_BREAK: u32 = 1 << 7; 49 + const SP_SET_INTR_BREAK: u32 = 1 << 8; 50 + const SP_CLR_SIG0: u32 = 1 << 9; 51 + const SP_SET_SIG0: u32 = 1 << 10; 52 + const SP_CLR_SIG1: u32 = 1 << 11; 53 + const SP_SET_SIG1: u32 = 1 << 12; 54 + const SP_CLR_SIG2: u32 = 1 << 13; 55 + const SP_SET_SIG2: u32 = 1 << 14; 56 + const SP_CLR_SIG3: u32 = 1 << 15; 57 + const SP_SET_SIG3: u32 = 1 << 16; 58 + const SP_CLR_SIG4: u32 = 1 << 17; 59 + const SP_SET_SIG4: u32 = 1 << 18; 60 + const SP_CLR_SIG5: u32 = 1 << 19; 61 + const SP_SET_SIG5: u32 = 1 << 20; 62 + const SP_CLR_SIG6: u32 = 1 << 21; 63 + const SP_SET_SIG6: u32 = 1 << 22; 64 + const SP_CLR_SIG7: u32 = 1 << 23; 65 + const SP_SET_SIG7: u32 = 1 << 24; 66 + 67 + const RSP_MEM_MASK: usize = 0x1FFF; 68 + 69 + #[derive(PartialEq, Copy, Clone)] 70 + pub enum DmaDir { 71 + None, 72 + Write, 73 + Read, 74 + } 75 + 76 + #[derive(Copy, Clone)] 77 + pub struct RspDma { 78 + pub dir: DmaDir, 79 + pub length: u32, 80 + pub memaddr: u32, 81 + pub dramaddr: u32, 82 + } 83 + 84 + pub struct Rsp { 85 + pub cpu: cpu::Cpu, 86 + pub regs: [u32; SP_REGS_COUNT as usize], 87 + pub regs2: [u32; SP_REGS2_COUNT as usize], 88 + pub mem: [u8; 0x2000], 89 + pub fifo: [RspDma; 2], 90 + pub last_status_value: u32, 91 + pub run_after_dma: bool, 92 + } 93 + 94 + impl Rsp { 95 + pub fn new() -> Self { 96 + let default_dma = RspDma { 97 + dir: DmaDir::None, 98 + length: 0, 99 + memaddr: 0, 100 + dramaddr: 0, 101 + }; 102 + Self { 103 + cpu: cpu::Cpu::new(), 104 + regs: [0; SP_REGS_COUNT as usize], 105 + regs2: [0; SP_REGS2_COUNT as usize], 106 + mem: [0; 0x2000], 107 + fifo: [default_dma; 2], 108 + last_status_value: 0, 109 + run_after_dma: false, 110 + } 111 + } 112 + } 113 + 114 + pub fn read_mem_fast(device: &crate::Device, address: u64) -> u32 { 115 + let masked_address = address as usize & RSP_MEM_MASK; 116 + u32::from_be_bytes( 117 + device.rsp.mem[masked_address..masked_address + 4] 118 + .try_into() 119 + .unwrap(), 120 + ) 121 + } 122 + 123 + pub fn read_mem(device: &mut crate::Device, address: u64) -> u32 { 124 + // No cycle counting in standalone mode 125 + let masked_address = address as usize & RSP_MEM_MASK; 126 + u32::from_be_bytes( 127 + device.rsp.mem[masked_address..masked_address + 4] 128 + .try_into() 129 + .unwrap(), 130 + ) 131 + } 132 + 133 + pub fn write_mem(device: &mut crate::Device, address: u64, value: u32, _mask: u32) { 134 + let masked_address = address as usize & RSP_MEM_MASK; 135 + let mut data = u32::from_be_bytes( 136 + device.rsp.mem[masked_address..masked_address + 4] 137 + .try_into() 138 + .unwrap(), 139 + ); 140 + crate::masked_write_32(&mut data, value, 0xFFFF_FFFF); 141 + device.rsp.mem[masked_address..masked_address + 4].copy_from_slice(&data.to_be_bytes()); 142 + 143 + if masked_address & 0x1000 != 0 { 144 + device.rsp.cpu.instructions[(masked_address & 0xFFF) / 4].func = 145 + cpu::decode_opcode(device, data); 146 + device.rsp.cpu.instructions[(masked_address & 0xFFF) / 4].opcode = data; 147 + } 148 + } 149 + 150 + fn do_dma(device: &mut crate::Device, dma: RspDma) { 151 + let l = dma.length; 152 + let length = ((l & 0xfff) | 7) + 1; 153 + let count = ((l >> 12) & 0xff) + 1; 154 + let skip = (l >> 20) & 0xff8; 155 + 156 + let mut mem_addr = dma.memaddr & 0xff8; 157 + let mut dram_addr = dma.dramaddr & 0xfffff8; 158 + let offset = dma.memaddr & 0x1000; 159 + 160 + // Stub: no framebuffer check 161 + if dma.dir == DmaDir::Read { 162 + let mut j = 0; 163 + while j < count { 164 + let mut i = 0; 165 + while i < length { 166 + let data = u32::from_be_bytes( 167 + device.rsp.mem[(offset + (mem_addr & 0xFFF)) as usize 168 + ..(offset + (mem_addr & 0xFFF)) as usize + 4] 169 + .try_into() 170 + .unwrap(), 171 + ); 172 + device 173 + .rdram 174 + .mem 175 + .get_mut(dram_addr as usize..dram_addr as usize + 4) 176 + .unwrap_or(&mut [0; 4]) 177 + .copy_from_slice(&data.to_ne_bytes()); 178 + mem_addr += 4; 179 + dram_addr += 4; 180 + i += 4; 181 + } 182 + dram_addr += skip; 183 + j += 1; 184 + } 185 + } else { 186 + let mut j = 0; 187 + while j < count { 188 + let mut i = 0; 189 + while i < length { 190 + let data = u32::from_ne_bytes( 191 + device 192 + .rdram 193 + .mem 194 + .get(dram_addr as usize..dram_addr as usize + 4) 195 + .unwrap_or(&[0; 4]) 196 + .try_into() 197 + .unwrap_or_default(), 198 + ); 199 + 200 + if offset != 0 { 201 + device.rsp.cpu.instructions[((mem_addr & 0xFFF) / 4) as usize].func = 202 + cpu::decode_opcode(device, data); 203 + device.rsp.cpu.instructions[((mem_addr & 0xFFF) / 4) as usize].opcode = data; 204 + } 205 + device.rsp.mem[(offset + (mem_addr & 0xFFF)) as usize 206 + ..(offset + (mem_addr & 0xFFF)) as usize + 4] 207 + .copy_from_slice(&data.to_be_bytes()); 208 + mem_addr += 4; 209 + dram_addr += 4; 210 + i += 4; 211 + } 212 + dram_addr += skip; 213 + j += 1; 214 + } 215 + } 216 + device.rsp.regs[SP_MEM_ADDR_REG as usize] = (mem_addr & 0xfff) + (dma.memaddr & 0x1000); 217 + device.rsp.regs[SP_DRAM_ADDR_REG as usize] = dram_addr; 218 + device.rsp.regs[SP_RD_LEN_REG as usize] = 0xff8; 219 + device.rsp.regs[SP_WR_LEN_REG as usize] = 0xff8; 220 + 221 + // DMA completes synchronously in standalone mode (no events) 222 + } 223 + 224 + fn fifo_push(device: &mut crate::Device, dir: DmaDir) { 225 + if device.rsp.regs[SP_DMA_FULL_REG as usize] != 0 { 226 + panic!("RSP DMA already full") 227 + } 228 + 229 + device.rsp.cpu.sync_point = true; 230 + 231 + if device.rsp.regs[SP_DMA_BUSY_REG as usize] != 0 { 232 + device.rsp.fifo[1].dir = dir; 233 + if dir == DmaDir::Read { 234 + device.rsp.fifo[1].length = device.rsp.regs[SP_WR_LEN_REG as usize]; 235 + } else { 236 + device.rsp.fifo[1].length = device.rsp.regs[SP_RD_LEN_REG as usize]; 237 + } 238 + device.rsp.fifo[1].memaddr = device.rsp.regs[SP_MEM_ADDR_REG as usize]; 239 + device.rsp.fifo[1].dramaddr = device.rsp.regs[SP_DRAM_ADDR_REG as usize]; 240 + device.rsp.regs[SP_DMA_FULL_REG as usize] = 1; 241 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_DMA_FULL; 242 + } else { 243 + device.rsp.fifo[0].dir = dir; 244 + if dir == DmaDir::Read { 245 + device.rsp.fifo[0].length = device.rsp.regs[SP_WR_LEN_REG as usize]; 246 + } else { 247 + device.rsp.fifo[0].length = device.rsp.regs[SP_RD_LEN_REG as usize]; 248 + } 249 + device.rsp.fifo[0].memaddr = device.rsp.regs[SP_MEM_ADDR_REG as usize]; 250 + device.rsp.fifo[0].dramaddr = device.rsp.regs[SP_DRAM_ADDR_REG as usize]; 251 + device.rsp.regs[SP_DMA_BUSY_REG as usize] = 1; 252 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_DMA_BUSY; 253 + 254 + do_dma(device, device.rsp.fifo[0]); 255 + 256 + // DMA completes synchronously — immediately pop the FIFO to clear DMA_BUSY. 257 + // In gopher64, this happens via an asynchronous event callback. 258 + fifo_pop(device); 259 + } 260 + } 261 + 262 + pub fn fifo_pop(device: &mut crate::Device) { 263 + if device.rsp.regs[SP_DMA_FULL_REG as usize] != 0 { 264 + device.rsp.fifo[0].dir = device.rsp.fifo[1].dir; 265 + device.rsp.fifo[0].length = device.rsp.fifo[1].length; 266 + device.rsp.fifo[0].memaddr = device.rsp.fifo[1].memaddr; 267 + device.rsp.fifo[0].dramaddr = device.rsp.fifo[1].dramaddr; 268 + device.rsp.regs[SP_DMA_FULL_REG as usize] = 0; 269 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_DMA_FULL; 270 + 271 + do_dma(device, device.rsp.fifo[0]); 272 + 273 + // Synchronous DMA — recurse to clear DMA_BUSY (bounded: max depth 2) 274 + fifo_pop(device); 275 + } else { 276 + device.rsp.regs[SP_DMA_BUSY_REG as usize] = 0; 277 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_DMA_BUSY; 278 + if device.rsp.run_after_dma { 279 + device.rsp.run_after_dma = false; 280 + do_task(device); 281 + } 282 + } 283 + } 284 + 285 + pub fn read_regs(device: &mut crate::Device, address: u64, _access_size: u64) -> u32 { 286 + let reg = (address & 0xFFFF) >> 2; 287 + match reg as u32 { 288 + SP_STATUS_REG => { 289 + let value = device.rsp.regs[reg as usize] 290 + & (SP_STATUS_SIG0 291 + | SP_STATUS_SIG1 292 + | SP_STATUS_SIG2 293 + | SP_STATUS_SIG3 294 + | SP_STATUS_SIG4 295 + | SP_STATUS_SIG5 296 + | SP_STATUS_SIG6 297 + | SP_STATUS_SIG7); 298 + if value == device.rsp.last_status_value && value != 0 { 299 + device.rsp.cpu.sync_point = true; 300 + } 301 + device.rsp.last_status_value = value; 302 + device.rsp.regs[reg as usize] 303 + } 304 + SP_SEMAPHORE_REG => { 305 + let value = device.rsp.regs[reg as usize]; 306 + if value == 1 { 307 + device.rsp.cpu.sync_point = true; 308 + } 309 + device.rsp.regs[reg as usize] = 1; 310 + value 311 + } 312 + _ => device.rsp.regs[reg as usize], 313 + } 314 + } 315 + 316 + pub fn write_regs(device: &mut crate::Device, address: u64, value: u32, mask: u32) { 317 + let reg = (address & 0xFFFF) >> 2; 318 + match reg as u32 { 319 + SP_STATUS_REG => update_sp_status(device, value), 320 + SP_RD_LEN_REG => { 321 + crate::masked_write_32(&mut device.rsp.regs[reg as usize], value, mask); 322 + fifo_push(device, DmaDir::Write); 323 + } 324 + SP_WR_LEN_REG => { 325 + crate::masked_write_32(&mut device.rsp.regs[reg as usize], value, mask); 326 + fifo_push(device, DmaDir::Read); 327 + } 328 + SP_SEMAPHORE_REG => { 329 + crate::masked_write_32(&mut device.rsp.regs[reg as usize], 0, mask); 330 + } 331 + _ => crate::masked_write_32(&mut device.rsp.regs[reg as usize], value, mask), 332 + } 333 + } 334 + 335 + pub fn read_regs2(device: &mut crate::Device, address: u64) -> u32 { 336 + device.rsp.regs2[((address & 0xFFFF) >> 2) as usize] 337 + } 338 + 339 + pub fn write_regs2(device: &mut crate::Device, address: u64, value: u32, mask: u32) { 340 + let reg = (address & 0xFFFF) >> 2; 341 + match reg as u32 { 342 + SP_PC_REG => { 343 + crate::masked_write_32(&mut device.rsp.regs2[reg as usize], value & 0xFFC, mask); 344 + } 345 + _ => crate::masked_write_32(&mut device.rsp.regs2[reg as usize], value, mask), 346 + } 347 + } 348 + 349 + fn update_sp_status(device: &mut crate::Device, w: u32) { 350 + let was_halted = device.rsp.regs[SP_STATUS_REG as usize] & SP_STATUS_HALT != 0; 351 + 352 + if w & SP_CLR_HALT != 0 && w & SP_SET_HALT == 0 { 353 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_HALT; 354 + } 355 + if w & SP_SET_HALT != 0 && w & SP_CLR_HALT == 0 { 356 + // Stub: no event removal 357 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_HALT; 358 + } 359 + 360 + if w & SP_CLR_BROKE != 0 { 361 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_BROKE; 362 + } 363 + 364 + // Stub: no MI interrupt handling for SP_CLR_INTR / SP_SET_INTR 365 + 366 + if w & SP_CLR_SSTEP != 0 && w & SP_SET_SSTEP == 0 { 367 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SSTEP; 368 + } 369 + if w & SP_SET_SSTEP != 0 && w & SP_CLR_SSTEP == 0 { 370 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SSTEP; 371 + } 372 + 373 + if w & SP_CLR_INTR_BREAK != 0 && w & SP_SET_INTR_BREAK == 0 { 374 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_INTR_BREAK; 375 + } 376 + if w & SP_SET_INTR_BREAK != 0 && w & SP_CLR_INTR_BREAK == 0 { 377 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_INTR_BREAK; 378 + } 379 + 380 + if w & SP_CLR_SIG0 != 0 && w & SP_SET_SIG0 == 0 { 381 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG0; 382 + } 383 + if w & SP_SET_SIG0 != 0 && w & SP_CLR_SIG0 == 0 { 384 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG0; 385 + } 386 + 387 + if w & SP_CLR_SIG1 != 0 && w & SP_SET_SIG1 == 0 { 388 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG1; 389 + } 390 + if w & SP_SET_SIG1 != 0 && w & SP_CLR_SIG1 == 0 { 391 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG1; 392 + } 393 + 394 + if w & SP_CLR_SIG2 != 0 && w & SP_SET_SIG2 == 0 { 395 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG2; 396 + } 397 + if w & SP_SET_SIG2 != 0 && w & SP_CLR_SIG2 == 0 { 398 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG2; 399 + } 400 + 401 + if w & SP_CLR_SIG3 != 0 && w & SP_SET_SIG3 == 0 { 402 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG3; 403 + } 404 + if w & SP_SET_SIG3 != 0 && w & SP_CLR_SIG3 == 0 { 405 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG3; 406 + } 407 + 408 + if w & SP_CLR_SIG4 != 0 && w & SP_SET_SIG4 == 0 { 409 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG4; 410 + } 411 + if w & SP_SET_SIG4 != 0 && w & SP_CLR_SIG4 == 0 { 412 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG4; 413 + } 414 + 415 + if w & SP_CLR_SIG5 != 0 && w & SP_SET_SIG5 == 0 { 416 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG5; 417 + } 418 + if w & SP_SET_SIG5 != 0 && w & SP_CLR_SIG5 == 0 { 419 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG5; 420 + } 421 + 422 + if w & SP_CLR_SIG6 != 0 && w & SP_SET_SIG6 == 0 { 423 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG6; 424 + } 425 + if w & SP_SET_SIG6 != 0 && w & SP_CLR_SIG6 == 0 { 426 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG6; 427 + } 428 + 429 + if w & SP_CLR_SIG7 != 0 && w & SP_SET_SIG7 == 0 { 430 + device.rsp.regs[SP_STATUS_REG as usize] &= !SP_STATUS_SIG7; 431 + } 432 + if w & SP_SET_SIG7 != 0 && w & SP_CLR_SIG7 == 0 { 433 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_SIG7; 434 + } 435 + 436 + if device.rsp.regs[SP_STATUS_REG as usize] & SP_STATUS_HALT == 0 && was_halted { 437 + device.rsp.cpu.broken = false; 438 + device.rsp.cpu.halted = false; 439 + do_task(device); 440 + } 441 + } 442 + 443 + fn do_task(device: &mut crate::Device) { 444 + device.rsp.cpu.sync_point = false; 445 + device.rsp.last_status_value = 0; 446 + device.rdp.last_status_value = 0; 447 + if device.rsp.regs[SP_DMA_BUSY_REG as usize] == 1 { 448 + device.rsp.run_after_dma = true; 449 + } else { 450 + cpu::run(device); 451 + // No event creation — synchronous execution 452 + } 453 + } 454 + 455 + pub fn rsp_event(device: &mut crate::Device) { 456 + if device.rsp.cpu.broken { 457 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_HALT | SP_STATUS_BROKE; 458 + 459 + if device.rsp.regs[SP_STATUS_REG as usize] & SP_STATUS_INTR_BREAK != 0 { 460 + // Stub: no MI interrupt 461 + } 462 + return; 463 + } 464 + if device.rsp.cpu.halted { 465 + device.rsp.regs[SP_STATUS_REG as usize] |= SP_STATUS_HALT; 466 + return; 467 + } 468 + do_task(device); 469 + } 470 + 471 + pub fn init(device: &mut crate::Device) { 472 + device.rsp.regs[SP_STATUS_REG as usize] = 1; 473 + cpu::init(device); 474 + }
+1293
crates/rsp/src/su_instructions.rs
··· 1 + // SPDX-FileCopyrightText: 2024 gopher64 contributors 2 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 3 + // 4 + // SPDX-License-Identifier: GPL-3.0-or-later 5 + 6 + #[cfg(target_arch = "x86_64")] 7 + use std::arch::x86_64::*; 8 + 9 + fn rd(opcode: u32) -> u32 { 10 + (opcode >> 11) & 0x1F 11 + } 12 + 13 + fn rs(opcode: u32) -> u32 { 14 + (opcode >> 21) & 0x1F 15 + } 16 + 17 + fn rt(opcode: u32) -> u32 { 18 + (opcode >> 16) & 0x1F 19 + } 20 + 21 + fn sa(opcode: u32) -> u32 { 22 + (opcode >> 6) & 0x1F 23 + } 24 + 25 + fn imm(opcode: u32) -> u16 { 26 + opcode as u16 27 + } 28 + 29 + fn se16(value: i16) -> u32 { 30 + value as i32 as u32 31 + } 32 + 33 + fn voffset(opcode: u32) -> u8 { 34 + (opcode & 0x7F) as u8 35 + } 36 + 37 + fn velement(opcode: u32) -> u8 { 38 + ((opcode >> 7) & 0xF) as u8 39 + } 40 + 41 + fn sign_extend_7bit_offset(offset: u8, shift_amount: u32) -> u32 { 42 + let soffset = (((offset << 1) & 0x80) | offset) as i8; 43 + 44 + (((soffset) as i32) as u32) << shift_amount 45 + } 46 + 47 + fn modify_vpr8(vpr: &mut __m128i, element: u8, value: u8) { 48 + unsafe { 49 + *vpr = match element & 15 { 50 + 0 => _mm_insert_epi8(*vpr, value as i32, 15), 51 + 1 => _mm_insert_epi8(*vpr, value as i32, 14), 52 + 2 => _mm_insert_epi8(*vpr, value as i32, 13), 53 + 3 => _mm_insert_epi8(*vpr, value as i32, 12), 54 + 4 => _mm_insert_epi8(*vpr, value as i32, 11), 55 + 5 => _mm_insert_epi8(*vpr, value as i32, 10), 56 + 6 => _mm_insert_epi8(*vpr, value as i32, 9), 57 + 7 => _mm_insert_epi8(*vpr, value as i32, 8), 58 + 8 => _mm_insert_epi8(*vpr, value as i32, 7), 59 + 9 => _mm_insert_epi8(*vpr, value as i32, 6), 60 + 10 => _mm_insert_epi8(*vpr, value as i32, 5), 61 + 11 => _mm_insert_epi8(*vpr, value as i32, 4), 62 + 12 => _mm_insert_epi8(*vpr, value as i32, 3), 63 + 13 => _mm_insert_epi8(*vpr, value as i32, 2), 64 + 14 => _mm_insert_epi8(*vpr, value as i32, 1), 65 + 15 => _mm_insert_epi8(*vpr, value as i32, 0), 66 + _ => unreachable!(), 67 + }; 68 + } 69 + } 70 + 71 + fn get_vpr8(vpr: __m128i, element: u8) -> u8 { 72 + unsafe { 73 + match element & 15 { 74 + 0 => _mm_extract_epi8(vpr, 15) as u8, 75 + 1 => _mm_extract_epi8(vpr, 14) as u8, 76 + 2 => _mm_extract_epi8(vpr, 13) as u8, 77 + 3 => _mm_extract_epi8(vpr, 12) as u8, 78 + 4 => _mm_extract_epi8(vpr, 11) as u8, 79 + 5 => _mm_extract_epi8(vpr, 10) as u8, 80 + 6 => _mm_extract_epi8(vpr, 9) as u8, 81 + 7 => _mm_extract_epi8(vpr, 8) as u8, 82 + 8 => _mm_extract_epi8(vpr, 7) as u8, 83 + 9 => _mm_extract_epi8(vpr, 6) as u8, 84 + 10 => _mm_extract_epi8(vpr, 5) as u8, 85 + 11 => _mm_extract_epi8(vpr, 4) as u8, 86 + 12 => _mm_extract_epi8(vpr, 3) as u8, 87 + 13 => _mm_extract_epi8(vpr, 2) as u8, 88 + 14 => _mm_extract_epi8(vpr, 1) as u8, 89 + 15 => _mm_extract_epi8(vpr, 0) as u8, 90 + _ => unreachable!(), 91 + } 92 + } 93 + } 94 + 95 + pub fn modify_vpr16(vpr: &mut __m128i, element: u8, value: u16) { 96 + unsafe { 97 + *vpr = match element & 7 { 98 + 0 => _mm_insert_epi16(*vpr, value as i32, 7), 99 + 1 => _mm_insert_epi16(*vpr, value as i32, 6), 100 + 2 => _mm_insert_epi16(*vpr, value as i32, 5), 101 + 3 => _mm_insert_epi16(*vpr, value as i32, 4), 102 + 4 => _mm_insert_epi16(*vpr, value as i32, 3), 103 + 5 => _mm_insert_epi16(*vpr, value as i32, 2), 104 + 6 => _mm_insert_epi16(*vpr, value as i32, 1), 105 + 7 => _mm_insert_epi16(*vpr, value as i32, 0), 106 + _ => unreachable!(), 107 + }; 108 + } 109 + } 110 + 111 + pub fn get_vpr16(vpr: __m128i, element: u8) -> u16 { 112 + unsafe { 113 + match element & 7 { 114 + 0 => _mm_extract_epi16(vpr, 7) as u16, 115 + 1 => _mm_extract_epi16(vpr, 6) as u16, 116 + 2 => _mm_extract_epi16(vpr, 5) as u16, 117 + 3 => _mm_extract_epi16(vpr, 4) as u16, 118 + 4 => _mm_extract_epi16(vpr, 3) as u16, 119 + 5 => _mm_extract_epi16(vpr, 2) as u16, 120 + 6 => _mm_extract_epi16(vpr, 1) as u16, 121 + 7 => _mm_extract_epi16(vpr, 0) as u16, 122 + _ => unreachable!(), 123 + } 124 + } 125 + } 126 + 127 + fn modify_vpr32(vpr: &mut __m128i, element: u8, value: u32) { 128 + unsafe { 129 + *vpr = match element & 3 { 130 + 0 => _mm_insert_epi32(*vpr, value as i32, 3), 131 + 1 => _mm_insert_epi32(*vpr, value as i32, 2), 132 + 2 => _mm_insert_epi32(*vpr, value as i32, 1), 133 + 3 => _mm_insert_epi32(*vpr, value as i32, 0), 134 + _ => unreachable!(), 135 + }; 136 + } 137 + } 138 + 139 + fn get_vpr32(vpr: __m128i, element: u8) -> u32 { 140 + unsafe { 141 + match element & 3 { 142 + 0 => _mm_extract_epi32(vpr, 3) as u32, 143 + 1 => _mm_extract_epi32(vpr, 2) as u32, 144 + 2 => _mm_extract_epi32(vpr, 1) as u32, 145 + 3 => _mm_extract_epi32(vpr, 0) as u32, 146 + _ => unreachable!(), 147 + } 148 + } 149 + } 150 + 151 + fn modify_vpr64(vpr: &mut __m128i, element: u8, value: u64) { 152 + unsafe { 153 + *vpr = match element & 1 { 154 + 0 => _mm_insert_epi64(*vpr, value as i64, 1), 155 + 1 => _mm_insert_epi64(*vpr, value as i64, 0), 156 + _ => unreachable!(), 157 + }; 158 + } 159 + } 160 + 161 + fn get_vpr64(vpr: __m128i, element: u8) -> u64 { 162 + unsafe { 163 + match element & 1 { 164 + 0 => _mm_extract_epi64(vpr, 1) as u64, 165 + 1 => _mm_extract_epi64(vpr, 0) as u64, 166 + _ => unreachable!(), 167 + } 168 + } 169 + } 170 + 171 + fn modify_vpr128(vpr: &mut __m128i, value: u128) { 172 + unsafe { *vpr = std::mem::transmute::<u128, __m128i>(value) } 173 + } 174 + 175 + fn get_vpr128(vpr: __m128i) -> u128 { 176 + unsafe { std::mem::transmute::<__m128i, u128>(vpr) } 177 + } 178 + 179 + pub fn j(device: &mut crate::Device, opcode: u32) { 180 + if crate::cpu::in_delay_slot_taken(device) { 181 + return; 182 + } 183 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 184 + device.rsp.cpu.branch_state.pc = 185 + (device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] + 4) & 0xF0000000 186 + | ((opcode & 0x3FFFFFF) << 2) 187 + } 188 + 189 + pub fn jal(device: &mut crate::Device, opcode: u32) { 190 + if crate::cpu::in_delay_slot_taken(device) { 191 + device.rsp.cpu.gpr[31] = (device.rsp.cpu.branch_state.pc + 4) & 0xFFF 192 + } else { 193 + device.rsp.cpu.gpr[31] = 194 + (device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] + 8) & 0xFFF 195 + } 196 + if !crate::cpu::in_delay_slot_taken(device) { 197 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 198 + device.rsp.cpu.branch_state.pc = 199 + (device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] + 4) & 0xF0000000 200 + | ((opcode & 0x3FFFFFF) << 2) 201 + } else if !crate::cpu::in_delay_slot(device) { 202 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 203 + } 204 + } 205 + 206 + pub fn beq(device: &mut crate::Device, opcode: u32) { 207 + if device.rsp.cpu.gpr[rs(opcode) as usize] == device.rsp.cpu.gpr[rt(opcode) as usize] { 208 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 209 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 210 + .wrapping_add(se16(imm(opcode) as i16) << 2) 211 + + 4; 212 + } else { 213 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 214 + } 215 + } 216 + 217 + pub fn bne(device: &mut crate::Device, opcode: u32) { 218 + if device.rsp.cpu.gpr[rs(opcode) as usize] != device.rsp.cpu.gpr[rt(opcode) as usize] { 219 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 220 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 221 + .wrapping_add(se16(imm(opcode) as i16) << 2) 222 + + 4; 223 + } else { 224 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 225 + } 226 + } 227 + 228 + pub fn blez(device: &mut crate::Device, opcode: u32) { 229 + if device.rsp.cpu.gpr[rs(opcode) as usize] as i32 <= 0 { 230 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 231 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 232 + .wrapping_add(se16(imm(opcode) as i16) << 2) 233 + + 4; 234 + } else { 235 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 236 + } 237 + } 238 + 239 + pub fn bgtz(device: &mut crate::Device, opcode: u32) { 240 + if device.rsp.cpu.gpr[rs(opcode) as usize] as i32 > 0 { 241 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 242 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 243 + .wrapping_add(se16(imm(opcode) as i16) << 2) 244 + + 4; 245 + } else { 246 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 247 + } 248 + } 249 + 250 + pub fn addi(device: &mut crate::Device, opcode: u32) { 251 + device.rsp.cpu.gpr[rt(opcode) as usize] = 252 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32) 253 + } 254 + 255 + pub fn addiu(device: &mut crate::Device, opcode: u32) { 256 + device.rsp.cpu.gpr[rt(opcode) as usize] = 257 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32) 258 + } 259 + 260 + pub fn slti(device: &mut crate::Device, opcode: u32) { 261 + device.rsp.cpu.gpr[rt(opcode) as usize] = 262 + ((device.rsp.cpu.gpr[rs(opcode) as usize] as i32) < (imm(opcode) as i16 as i32)) as u32 263 + } 264 + 265 + pub fn sltiu(device: &mut crate::Device, opcode: u32) { 266 + device.rsp.cpu.gpr[rt(opcode) as usize] = 267 + (device.rsp.cpu.gpr[rs(opcode) as usize] < (imm(opcode) as i16 as i32 as u32)) as u32 268 + } 269 + 270 + pub fn andi(device: &mut crate::Device, opcode: u32) { 271 + device.rsp.cpu.gpr[rt(opcode) as usize] = 272 + device.rsp.cpu.gpr[rs(opcode) as usize] & (imm(opcode)) as u32 273 + } 274 + 275 + pub fn ori(device: &mut crate::Device, opcode: u32) { 276 + device.rsp.cpu.gpr[rt(opcode) as usize] = 277 + device.rsp.cpu.gpr[rs(opcode) as usize] | (imm(opcode)) as u32 278 + } 279 + 280 + pub fn xori(device: &mut crate::Device, opcode: u32) { 281 + device.rsp.cpu.gpr[rt(opcode) as usize] = 282 + device.rsp.cpu.gpr[rs(opcode) as usize] ^ (imm(opcode)) as u32 283 + } 284 + 285 + pub fn lui(device: &mut crate::Device, opcode: u32) { 286 + device.rsp.cpu.gpr[rt(opcode) as usize] = (imm(opcode) as u32) << 16 287 + } 288 + 289 + pub fn lb(device: &mut crate::Device, opcode: u32) { 290 + let address = 291 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 292 + 293 + device.rsp.cpu.gpr[rt(opcode) as usize] = 294 + device.rsp.mem[address as usize & 0xFFF] as i8 as i32 as u32 295 + } 296 + 297 + pub fn lh(device: &mut crate::Device, opcode: u32) { 298 + let address = 299 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 300 + 301 + // Optimize for aligned access when possible 302 + let addr_masked = address as usize & 0xFFF; 303 + let value = if addr_masked <= 0xFFE { 304 + u16::from_be_bytes( 305 + device.rsp.mem[addr_masked..addr_masked + 2] 306 + .try_into() 307 + .unwrap(), 308 + ) 309 + } else { 310 + let w0 = device.rsp.mem[addr_masked]; 311 + let w1 = device.rsp.mem[(addr_masked + 1) & 0xFFF]; 312 + ((w0 as u16) << 8) | w1 as u16 313 + }; 314 + 315 + device.rsp.cpu.gpr[rt(opcode) as usize] = value as i16 as i32 as u32; 316 + } 317 + 318 + pub fn lw(device: &mut crate::Device, opcode: u32) { 319 + let address = 320 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 321 + 322 + // Optimize for aligned access when possible 323 + let addr_masked = address as usize & 0xFFF; 324 + let value = if addr_masked <= 0xFFC { 325 + u32::from_be_bytes( 326 + device.rsp.mem[addr_masked..addr_masked + 4] 327 + .try_into() 328 + .unwrap(), 329 + ) 330 + } else { 331 + let mut value = 0; 332 + for i in 0..4 { 333 + value |= (device.rsp.mem[(addr_masked + i) & 0xFFF] as u32) << (8 * (3 - i)); 334 + } 335 + value 336 + }; 337 + 338 + device.rsp.cpu.gpr[rt(opcode) as usize] = value; 339 + } 340 + 341 + pub fn lbu(device: &mut crate::Device, opcode: u32) { 342 + let address = 343 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 344 + 345 + device.rsp.cpu.gpr[rt(opcode) as usize] = device.rsp.mem[address as usize & 0xFFF] as u32 346 + } 347 + 348 + pub fn lhu(device: &mut crate::Device, opcode: u32) { 349 + let address = 350 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 351 + 352 + // Optimize for aligned access when possible 353 + let addr_masked = address as usize & 0xFFF; 354 + let value = if addr_masked <= 0xFFE { 355 + u16::from_be_bytes( 356 + device.rsp.mem[addr_masked..addr_masked + 2] 357 + .try_into() 358 + .unwrap(), 359 + ) 360 + } else { 361 + let w0 = device.rsp.mem[addr_masked]; 362 + let w1 = device.rsp.mem[(addr_masked + 1) & 0xFFF]; 363 + ((w0 as u16) << 8) | w1 as u16 364 + }; 365 + 366 + device.rsp.cpu.gpr[rt(opcode) as usize] = value as u32; 367 + } 368 + 369 + pub fn lwu(device: &mut crate::Device, opcode: u32) { 370 + let address = 371 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 372 + 373 + // Optimize for aligned access when possible 374 + let addr_masked = address as usize & 0xFFF; 375 + let value = if addr_masked <= 0xFFC { 376 + u32::from_be_bytes( 377 + device.rsp.mem[addr_masked..addr_masked + 4] 378 + .try_into() 379 + .unwrap(), 380 + ) 381 + } else { 382 + let mut value = 0; 383 + for i in 0..4 { 384 + value |= (device.rsp.mem[(addr_masked + i) & 0xFFF] as u32) << (8 * (3 - i)); 385 + } 386 + value 387 + }; 388 + 389 + device.rsp.cpu.gpr[rt(opcode) as usize] = value; 390 + } 391 + 392 + pub fn sb(device: &mut crate::Device, opcode: u32) { 393 + let address = 394 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 395 + 396 + device.rsp.mem[address as usize & 0xFFF] = (device.rsp.cpu.gpr[rt(opcode) as usize]) as u8; 397 + } 398 + 399 + pub fn sh(device: &mut crate::Device, opcode: u32) { 400 + let address = 401 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 402 + 403 + let addr_masked = address as usize & 0xFFF; 404 + let value = device.rsp.cpu.gpr[rt(opcode) as usize] as u16; 405 + // Optimize for aligned access when possible 406 + if addr_masked <= 0xFFE { 407 + device.rsp.mem[addr_masked..addr_masked + 2].copy_from_slice(&value.to_be_bytes()); 408 + } else { 409 + device.rsp.mem[addr_masked] = (value >> 8) as u8; 410 + device.rsp.mem[(addr_masked + 1) & 0xFFF] = value as u8; 411 + } 412 + } 413 + 414 + pub fn sw(device: &mut crate::Device, opcode: u32) { 415 + let address = 416 + device.rsp.cpu.gpr[rs(opcode) as usize].wrapping_add(imm(opcode) as i16 as i32 as u32); 417 + 418 + let addr_masked = address as usize & 0xFFF; 419 + let value = device.rsp.cpu.gpr[rt(opcode) as usize]; 420 + // Optimize for aligned access when possible 421 + if addr_masked <= 0xFFC { 422 + device.rsp.mem[addr_masked..addr_masked + 4].copy_from_slice(&value.to_be_bytes()); 423 + } else { 424 + for i in 0..4 { 425 + device.rsp.mem[(addr_masked + i) & 0xFFF] = (value >> ((3 - i) * 8)) as u8; 426 + } 427 + } 428 + } 429 + 430 + pub fn sll(device: &mut crate::Device, opcode: u32) { 431 + device.rsp.cpu.gpr[rd(opcode) as usize] = device.rsp.cpu.gpr[rt(opcode) as usize] << sa(opcode) 432 + } 433 + 434 + pub fn srl(device: &mut crate::Device, opcode: u32) { 435 + device.rsp.cpu.gpr[rd(opcode) as usize] = device.rsp.cpu.gpr[rt(opcode) as usize] >> sa(opcode) 436 + } 437 + 438 + pub fn sra(device: &mut crate::Device, opcode: u32) { 439 + device.rsp.cpu.gpr[rd(opcode) as usize] = 440 + ((device.rsp.cpu.gpr[rt(opcode) as usize] as i32) >> sa(opcode)) as u32 441 + } 442 + 443 + pub fn sllv(device: &mut crate::Device, opcode: u32) { 444 + device.rsp.cpu.gpr[rd(opcode) as usize] = 445 + device.rsp.cpu.gpr[rt(opcode) as usize] << (device.rsp.cpu.gpr[rs(opcode) as usize] & 31) 446 + } 447 + 448 + pub fn srlv(device: &mut crate::Device, opcode: u32) { 449 + device.rsp.cpu.gpr[rd(opcode) as usize] = 450 + device.rsp.cpu.gpr[rt(opcode) as usize] >> (device.rsp.cpu.gpr[rs(opcode) as usize] & 31) 451 + } 452 + 453 + pub fn srav(device: &mut crate::Device, opcode: u32) { 454 + device.rsp.cpu.gpr[rd(opcode) as usize] = ((device.rsp.cpu.gpr[rt(opcode) as usize] as i32) 455 + >> (device.rsp.cpu.gpr[rs(opcode) as usize] & 31)) 456 + as u32 457 + } 458 + 459 + pub fn jr(device: &mut crate::Device, opcode: u32) { 460 + if !crate::cpu::in_delay_slot_taken(device) { 461 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 462 + device.rsp.cpu.branch_state.pc = device.rsp.cpu.gpr[rs(opcode) as usize] 463 + } else if !crate::cpu::in_delay_slot(device) { 464 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 465 + } 466 + } 467 + 468 + pub fn jalr(device: &mut crate::Device, opcode: u32) { 469 + let in_delay_slot_taken = crate::cpu::in_delay_slot_taken(device); 470 + 471 + if !in_delay_slot_taken { 472 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 473 + device.rsp.cpu.branch_state.pc = device.rsp.cpu.gpr[rs(opcode) as usize] 474 + } else if !crate::cpu::in_delay_slot(device) { 475 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 476 + } 477 + 478 + if in_delay_slot_taken { 479 + device.rsp.cpu.gpr[rd(opcode) as usize] = (device.rsp.cpu.branch_state.pc + 4) & 0xFFF 480 + } else { 481 + device.rsp.cpu.gpr[rd(opcode) as usize] = 482 + (device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] + 8) & 0xFFF 483 + } 484 + } 485 + 486 + pub fn break_(device: &mut crate::Device, _opcode: u32) { 487 + device.rsp.cpu.broken = true; 488 + } 489 + 490 + pub fn add(device: &mut crate::Device, opcode: u32) { 491 + device.rsp.cpu.gpr[rd(opcode) as usize] = device.rsp.cpu.gpr[rs(opcode) as usize] 492 + .wrapping_add(device.rsp.cpu.gpr[rt(opcode) as usize]) 493 + } 494 + 495 + pub fn addu(device: &mut crate::Device, opcode: u32) { 496 + device.rsp.cpu.gpr[rd(opcode) as usize] = device.rsp.cpu.gpr[rs(opcode) as usize] 497 + .wrapping_add(device.rsp.cpu.gpr[rt(opcode) as usize]) 498 + } 499 + 500 + pub fn sub(device: &mut crate::Device, opcode: u32) { 501 + device.rsp.cpu.gpr[rd(opcode) as usize] = device.rsp.cpu.gpr[rs(opcode) as usize] 502 + .wrapping_sub(device.rsp.cpu.gpr[rt(opcode) as usize]) 503 + } 504 + 505 + pub fn subu(device: &mut crate::Device, opcode: u32) { 506 + device.rsp.cpu.gpr[rd(opcode) as usize] = device.rsp.cpu.gpr[rs(opcode) as usize] 507 + .wrapping_sub(device.rsp.cpu.gpr[rt(opcode) as usize]) 508 + } 509 + 510 + pub fn and(device: &mut crate::Device, opcode: u32) { 511 + device.rsp.cpu.gpr[rd(opcode) as usize] = 512 + device.rsp.cpu.gpr[rs(opcode) as usize] & device.rsp.cpu.gpr[rt(opcode) as usize] 513 + } 514 + 515 + pub fn or(device: &mut crate::Device, opcode: u32) { 516 + device.rsp.cpu.gpr[rd(opcode) as usize] = 517 + device.rsp.cpu.gpr[rs(opcode) as usize] | device.rsp.cpu.gpr[rt(opcode) as usize] 518 + } 519 + 520 + pub fn xor(device: &mut crate::Device, opcode: u32) { 521 + device.rsp.cpu.gpr[rd(opcode) as usize] = 522 + device.rsp.cpu.gpr[rs(opcode) as usize] ^ device.rsp.cpu.gpr[rt(opcode) as usize] 523 + } 524 + 525 + pub fn nor(device: &mut crate::Device, opcode: u32) { 526 + device.rsp.cpu.gpr[rd(opcode) as usize] = 527 + !(device.rsp.cpu.gpr[rs(opcode) as usize] | device.rsp.cpu.gpr[rt(opcode) as usize]) 528 + } 529 + 530 + pub fn slt(device: &mut crate::Device, opcode: u32) { 531 + device.rsp.cpu.gpr[rd(opcode) as usize] = ((device.rsp.cpu.gpr[rs(opcode) as usize] as i32) 532 + < (device.rsp.cpu.gpr[rt(opcode) as usize] as i32)) 533 + as u32 534 + } 535 + 536 + pub fn sltu(device: &mut crate::Device, opcode: u32) { 537 + device.rsp.cpu.gpr[rd(opcode) as usize] = 538 + (device.rsp.cpu.gpr[rs(opcode) as usize] < device.rsp.cpu.gpr[rt(opcode) as usize]) as u32 539 + } 540 + 541 + pub fn bltz(device: &mut crate::Device, opcode: u32) { 542 + if (device.rsp.cpu.gpr[rs(opcode) as usize] as i32) < 0 { 543 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 544 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 545 + .wrapping_add(se16(imm(opcode) as i16) << 2) 546 + + 4; 547 + } else { 548 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 549 + } 550 + } 551 + 552 + pub fn bgez(device: &mut crate::Device, opcode: u32) { 553 + if device.rsp.cpu.gpr[rs(opcode) as usize] as i32 >= 0 { 554 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 555 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 556 + .wrapping_add(se16(imm(opcode) as i16) << 2) 557 + + 4; 558 + } else { 559 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 560 + } 561 + } 562 + 563 + pub fn bltzal(device: &mut crate::Device, opcode: u32) { 564 + if (device.rsp.cpu.gpr[rs(opcode) as usize] as i32) < 0 { 565 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 566 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 567 + .wrapping_add(se16(imm(opcode) as i16) << 2) 568 + + 4; 569 + } else { 570 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 571 + } 572 + device.rsp.cpu.gpr[31] = 573 + (device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] + 8) & 0xFFF 574 + } 575 + 576 + pub fn bgezal(device: &mut crate::Device, opcode: u32) { 577 + if (device.rsp.cpu.gpr[rs(opcode) as usize] as i32) >= 0 { 578 + device.rsp.cpu.branch_state.state = crate::BranchStepState::Take; 579 + device.rsp.cpu.branch_state.pc = device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] 580 + .wrapping_add(se16(imm(opcode) as i16) << 2) 581 + + 4; 582 + } else { 583 + device.rsp.cpu.branch_state.state = crate::BranchStepState::NotTaken; 584 + } 585 + device.rsp.cpu.gpr[31] = 586 + (device.rsp.regs2[crate::rsp_interface::SP_PC_REG as usize] + 8) & 0xFFF 587 + } 588 + 589 + pub fn mfc0(device: &mut crate::Device, opcode: u32) { 590 + device.rsp.cpu.cycle_counter += 2; 591 + if rd(opcode) < crate::rsp_interface::SP_REGS_COUNT { 592 + device.rsp.cpu.gpr[rt(opcode) as usize] = 593 + crate::rsp_interface::read_regs(device, (rd(opcode) << 2) as u64, 0u64) 594 + } else { 595 + device.rsp.cpu.gpr[rt(opcode) as usize] = crate::rdp::read_regs_dpc( 596 + device, 597 + ((rd(opcode) - crate::rsp_interface::SP_REGS_COUNT) << 2) as u64, 598 + 0u64, 599 + ) 600 + } 601 + } 602 + 603 + pub fn mtc0(device: &mut crate::Device, opcode: u32) { 604 + if rd(opcode) < crate::rsp_interface::SP_REGS_COUNT { 605 + crate::rsp_interface::write_regs( 606 + device, 607 + (rd(opcode) << 2) as u64, 608 + device.rsp.cpu.gpr[rt(opcode) as usize], 609 + 0xFFFFFFFF, 610 + ) 611 + } else { 612 + crate::rdp::write_regs_dpc( 613 + device, 614 + ((rd(opcode) - crate::rsp_interface::SP_REGS_COUNT) << 2) as u64, 615 + device.rsp.cpu.gpr[rt(opcode) as usize], 616 + 0xFFFFFFFF, 617 + ) 618 + } 619 + if rd(opcode) == crate::rsp_interface::SP_STATUS_REG 620 + && device.rsp.cpu.gpr[rt(opcode) as usize] & crate::rsp_interface::SP_SET_HALT != 0 621 + { 622 + device.rsp.regs[crate::rsp_interface::SP_STATUS_REG as usize] &= 623 + !crate::rsp_interface::SP_STATUS_HALT; // set halt when event happens 624 + device.rsp.cpu.halted = true // the RSP can halt itself by setting SP_SET_HALT 625 + } 626 + } 627 + 628 + pub fn mfc2(device: &mut crate::Device, opcode: u32) { 629 + let hi = get_vpr8(device.rsp.cpu.vpr[rd(opcode) as usize], velement(opcode)); 630 + let lo = get_vpr8( 631 + device.rsp.cpu.vpr[rd(opcode) as usize], 632 + velement(opcode) + 1, 633 + ); 634 + device.rsp.cpu.gpr[rt(opcode) as usize] = 635 + (((hi as u16) << 8) | (lo as u16)) as i16 as i32 as u32 636 + } 637 + 638 + pub fn cfc2(device: &mut crate::Device, opcode: u32) { 639 + let hi; 640 + let lo; 641 + let zero = unsafe { _mm_setzero_si128() }; 642 + match rd(opcode) & 3 { 643 + 0x00 => { 644 + hi = &device.rsp.cpu.vcoh; 645 + lo = &device.rsp.cpu.vcol; 646 + } 647 + 0x01 => { 648 + hi = &device.rsp.cpu.vcch; 649 + lo = &device.rsp.cpu.vccl; 650 + } 651 + 0x02 | 0x03 => { 652 + hi = &zero; 653 + lo = &device.rsp.cpu.vce; 654 + } 655 + _ => { 656 + panic!("unknown cfc2") 657 + } 658 + } 659 + 660 + unsafe { 661 + let reverse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 662 + device.rsp.cpu.gpr[rt(opcode) as usize] = 663 + (_mm_movemask_epi8(_mm_shuffle_epi8(_mm_packs_epi16(*hi, *lo), reverse))) as i16 as u32; 664 + } 665 + } 666 + 667 + pub fn mtc2(device: &mut crate::Device, opcode: u32) { 668 + modify_vpr8( 669 + &mut device.rsp.cpu.vpr[rd(opcode) as usize], 670 + velement(opcode), 671 + (device.rsp.cpu.gpr[rt(opcode) as usize] >> 8) as u8, 672 + ); 673 + if velement(opcode) != 15 { 674 + modify_vpr8( 675 + &mut device.rsp.cpu.vpr[rd(opcode) as usize], 676 + velement(opcode) + 1, 677 + device.rsp.cpu.gpr[rt(opcode) as usize] as u8, 678 + ); 679 + } 680 + } 681 + 682 + pub fn ctc2(device: &mut crate::Device, opcode: u32) { 683 + let hi; 684 + let lo; 685 + let mut zero = unsafe { _mm_setzero_si128() }; 686 + match rd(opcode) & 3 { 687 + 0x00 => { 688 + hi = &mut device.rsp.cpu.vcoh; 689 + lo = &mut device.rsp.cpu.vcol; 690 + } 691 + 0x01 => { 692 + hi = &mut device.rsp.cpu.vcch; 693 + lo = &mut device.rsp.cpu.vccl; 694 + } 695 + 0x02 | 0x03 => { 696 + hi = &mut zero; 697 + lo = &mut device.rsp.cpu.vce; 698 + } 699 + _ => { 700 + panic!("unknown ctc2") 701 + } 702 + } 703 + 704 + unsafe { 705 + let mask = _mm_set_epi16( 706 + 0x0101, 707 + 0x0202, 708 + 0x0404, 709 + 0x0808, 710 + 0x1010, 711 + 0x2020, 712 + 0x4040, 713 + 0x8080u16 as i16, 714 + ); 715 + *lo = _mm_cmpeq_epi8( 716 + _mm_and_si128( 717 + _mm_set1_epi8(!device.rsp.cpu.gpr[rt(opcode) as usize] as i8), 718 + mask, 719 + ), 720 + _mm_setzero_si128(), 721 + ); 722 + *hi = _mm_cmpeq_epi8( 723 + _mm_and_si128( 724 + _mm_set1_epi8(!(device.rsp.cpu.gpr[rt(opcode) as usize] >> 8) as i8), 725 + mask, 726 + ), 727 + _mm_setzero_si128(), 728 + ); 729 + } 730 + } 731 + 732 + pub fn lbv(device: &mut crate::Device, opcode: u32) { 733 + let address = device.rsp.cpu.gpr[rs(opcode) as usize] 734 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 0)); 735 + 736 + let element = velement(opcode); 737 + modify_vpr8( 738 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 739 + element, 740 + device.rsp.mem[(address & 0xFFF) as usize], 741 + ); 742 + } 743 + 744 + pub fn lsv(device: &mut crate::Device, opcode: u32) { 745 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 746 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 1)); 747 + 748 + let mut element = velement(opcode); 749 + 750 + if element.is_multiple_of(2) { 751 + modify_vpr16( 752 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 753 + element / 2, 754 + ((device.rsp.mem[(address & 0xFFF) as usize] as u16) << 8) 755 + | (device.rsp.mem[((address + 1) & 0xFFF) as usize] as u16), 756 + ); 757 + } else { 758 + let end = std::cmp::min(element + 2, 16); 759 + while element < end { 760 + modify_vpr8( 761 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 762 + element, 763 + device.rsp.mem[(address & 0xFFF) as usize], 764 + ); 765 + address = address.wrapping_add(1); 766 + element += 1; 767 + } 768 + } 769 + } 770 + 771 + pub fn llv(device: &mut crate::Device, opcode: u32) { 772 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 773 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 2)); 774 + 775 + let mut element = velement(opcode); 776 + 777 + if element.is_multiple_of(4) { 778 + // Optimize for aligned access when possible 779 + let addr_masked = address as usize & 0xFFF; 780 + let value = if addr_masked <= 0xFFC { 781 + u32::from_be_bytes( 782 + device.rsp.mem[addr_masked..addr_masked + 4] 783 + .try_into() 784 + .unwrap(), 785 + ) 786 + } else { 787 + let mut value = 0; 788 + for i in 0..4 { 789 + value |= (device.rsp.mem[(addr_masked + i) & 0xFFF] as u32) << (8 * (3 - i)); 790 + } 791 + value 792 + }; 793 + modify_vpr32( 794 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 795 + element / 4, 796 + value, 797 + ); 798 + } else { 799 + let end = std::cmp::min(element + 4, 16); 800 + while element < end { 801 + modify_vpr8( 802 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 803 + element, 804 + device.rsp.mem[(address & 0xFFF) as usize], 805 + ); 806 + address = address.wrapping_add(1); 807 + element += 1; 808 + } 809 + } 810 + } 811 + 812 + pub fn ldv(device: &mut crate::Device, opcode: u32) { 813 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 814 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 3)); 815 + 816 + let mut element = velement(opcode); 817 + 818 + if element.is_multiple_of(8) { 819 + // Optimize for aligned access when possible 820 + let addr_masked = address as usize & 0xFFF; 821 + let value = if addr_masked <= 0xFF8 { 822 + u64::from_be_bytes( 823 + device.rsp.mem[addr_masked..addr_masked + 8] 824 + .try_into() 825 + .unwrap(), 826 + ) 827 + } else { 828 + let mut value = 0; 829 + for i in 0..8 { 830 + value |= (device.rsp.mem[(addr_masked + i) & 0xFFF] as u64) << (8 * (7 - i)); 831 + } 832 + value 833 + }; 834 + modify_vpr64( 835 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 836 + element / 8, 837 + value, 838 + ); 839 + } else { 840 + let end = std::cmp::min(element + 8, 16); 841 + while element < end { 842 + modify_vpr8( 843 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 844 + element, 845 + device.rsp.mem[(address & 0xFFF) as usize], 846 + ); 847 + address = address.wrapping_add(1); 848 + element += 1; 849 + } 850 + } 851 + } 852 + 853 + pub fn lqv(device: &mut crate::Device, opcode: u32) { 854 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 855 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 856 + 857 + let mut element = velement(opcode); 858 + 859 + if element == 0 && address.is_multiple_of(16) { 860 + // Optimize for aligned 16-byte access 861 + let addr_masked = address as usize & 0xFFF; 862 + let value = if addr_masked <= 0xFF0 { 863 + u128::from_be_bytes( 864 + device.rsp.mem[addr_masked..addr_masked + 16] 865 + .try_into() 866 + .unwrap(), 867 + ) 868 + } else { 869 + let mut value = 0; 870 + for i in 0..16 { 871 + value |= (device.rsp.mem[(addr_masked + i) & 0xFFF] as u128) << (8 * (15 - i)); 872 + } 873 + value 874 + }; 875 + modify_vpr128(&mut device.rsp.cpu.vpr[rt(opcode) as usize], value); 876 + } else { 877 + let end = std::cmp::min( 878 + 16u8.wrapping_add(element) 879 + .wrapping_sub((address & 15) as u8), 880 + 16, 881 + ); 882 + while element < end { 883 + modify_vpr8( 884 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 885 + element, 886 + device.rsp.mem[(address & 0xFFF) as usize], 887 + ); 888 + address = address.wrapping_add(1); 889 + element += 1; 890 + } 891 + } 892 + } 893 + 894 + pub fn lrv(device: &mut crate::Device, opcode: u32) { 895 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 896 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 897 + 898 + let mut element = 16u8.wrapping_sub(((address & 15) as u8).wrapping_sub(velement(opcode))); 899 + address &= !15; 900 + if element == 0 { 901 + // Optimize for aligned 16-byte access 902 + let addr_masked = address as usize & 0xFFF; 903 + let value = if addr_masked <= 0xFF0 { 904 + u128::from_be_bytes( 905 + device.rsp.mem[addr_masked..addr_masked + 16] 906 + .try_into() 907 + .unwrap(), 908 + ) 909 + } else { 910 + let mut value = 0; 911 + for i in 0..16 { 912 + value |= (device.rsp.mem[(addr_masked + i) & 0xFFF] as u128) << (8 * (15 - i)); 913 + } 914 + value 915 + }; 916 + modify_vpr128(&mut device.rsp.cpu.vpr[rt(opcode) as usize], value); 917 + } else { 918 + while element < 16 { 919 + modify_vpr8( 920 + &mut device.rsp.cpu.vpr[rt(opcode) as usize], 921 + element, 922 + device.rsp.mem[(address & 0xFFF) as usize], 923 + ); 924 + address = address.wrapping_add(1); 925 + element += 1; 926 + } 927 + } 928 + } 929 + 930 + pub fn lpv(device: &mut crate::Device, opcode: u32) { 931 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 932 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 3)); 933 + 934 + let index = ((address & 7) as u8).wrapping_sub(velement(opcode)); 935 + address &= !7; 936 + let mut value = 0; 937 + for i in 0..8 { 938 + value |= (((device.rsp.mem 939 + [((address.wrapping_add(((index.wrapping_add(i)) & 15) as u32)) & 0xFFF) as usize] 940 + as u16) 941 + << 8) as u128) 942 + << (16 * (7 - i)); 943 + } 944 + modify_vpr128(&mut device.rsp.cpu.vpr[rt(opcode) as usize], value); 945 + } 946 + 947 + pub fn luv(device: &mut crate::Device, opcode: u32) { 948 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 949 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 3)); 950 + 951 + let index = ((address & 7) as u8).wrapping_sub(velement(opcode)); 952 + address &= !7; 953 + let mut value = 0; 954 + for i in 0..8 { 955 + value |= (((device.rsp.mem 956 + [((address.wrapping_add(((index.wrapping_add(i)) & 15) as u32)) & 0xFFF) as usize] 957 + as u16) 958 + << 7) as u128) 959 + << (16 * (7 - i)); 960 + } 961 + modify_vpr128(&mut device.rsp.cpu.vpr[rt(opcode) as usize], value); 962 + } 963 + 964 + pub fn lhv(device: &mut crate::Device, opcode: u32) { 965 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 966 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 967 + 968 + let index = ((address & 7) as u8).wrapping_sub(velement(opcode)); 969 + address &= !7; 970 + let mut value = 0; 971 + for i in 0..8 { 972 + value |= (((device.rsp.mem 973 + [((address.wrapping_add(((index.wrapping_add(i * 2)) & 15) as u32)) & 0xFFF) as usize] 974 + as u16) 975 + << 7) as u128) 976 + << (16 * (7 - i)); 977 + } 978 + modify_vpr128(&mut device.rsp.cpu.vpr[rt(opcode) as usize], value); 979 + } 980 + 981 + pub fn lfv(device: &mut crate::Device, opcode: u32) { 982 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 983 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 984 + 985 + let index = ((address & 7) as u8).wrapping_sub(velement(opcode)); 986 + address &= !7; 987 + let start = velement(opcode); 988 + let end = std::cmp::min(start + 8, 16); 989 + let mut tmp: __m128i = unsafe { _mm_setzero_si128() }; 990 + let mut offset: u8 = 0; 991 + while offset < 4 { 992 + modify_vpr16( 993 + &mut tmp, 994 + offset, 995 + (device.rsp.mem[((address.wrapping_add(((index.wrapping_add(offset * 4)) & 15) as u32)) 996 + & 0xFFF) as usize] as u16) 997 + << 7, 998 + ); 999 + modify_vpr16( 1000 + &mut tmp, 1001 + offset + 4, 1002 + (device.rsp.mem[((address 1003 + .wrapping_add(((index.wrapping_add(offset * 4).wrapping_add(8)) & 15) as u32)) 1004 + & 0xFFF) as usize] as u16) 1005 + << 7, 1006 + ); 1007 + offset += 1; 1008 + } 1009 + offset = start; 1010 + while offset < end { 1011 + let value = get_vpr8(tmp, offset); 1012 + modify_vpr8(&mut device.rsp.cpu.vpr[rt(opcode) as usize], offset, value); 1013 + offset += 1; 1014 + } 1015 + } 1016 + 1017 + pub fn lwv(_device: &mut crate::Device, _opcode: u32) {} 1018 + 1019 + pub fn ltv(device: &mut crate::Device, opcode: u32) { 1020 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1021 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1022 + 1023 + let begin = address & !7; 1024 + address = begin + (((velement(opcode)) as u32 + (address & 8)) & 15); 1025 + let vtbase = rt(opcode) & !7; 1026 + let mut vtoff = (velement(opcode)) as u32 >> 1; 1027 + for i in 0..8 { 1028 + modify_vpr8( 1029 + &mut device.rsp.cpu.vpr[(vtbase + vtoff) as usize], 1030 + i * 2, 1031 + device.rsp.mem[(address & 0xFFF) as usize], 1032 + ); 1033 + address = address.wrapping_add(1); 1034 + if address == begin + 16 { 1035 + address = begin 1036 + } 1037 + modify_vpr8( 1038 + &mut device.rsp.cpu.vpr[(vtbase + vtoff) as usize], 1039 + i * 2 + 1, 1040 + device.rsp.mem[(address & 0xFFF) as usize], 1041 + ); 1042 + address = address.wrapping_add(1); 1043 + if address == begin + 16 { 1044 + address = begin 1045 + } 1046 + vtoff = (vtoff + 1) & 7; 1047 + } 1048 + } 1049 + 1050 + pub fn sbv(device: &mut crate::Device, opcode: u32) { 1051 + let address = device.rsp.cpu.gpr[rs(opcode) as usize] 1052 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 0)); 1053 + 1054 + device.rsp.mem[(address & 0xFFF) as usize] = 1055 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], velement(opcode)) 1056 + } 1057 + 1058 + pub fn ssv(device: &mut crate::Device, opcode: u32) { 1059 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1060 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 1)); 1061 + 1062 + let mut element = velement(opcode); 1063 + 1064 + if element.is_multiple_of(2) { 1065 + let start = (address & 0xFFF) as usize; 1066 + device.rsp.mem[start..start + 2].copy_from_slice( 1067 + &get_vpr16(device.rsp.cpu.vpr[rt(opcode) as usize], element / 2).to_be_bytes(), 1068 + ); 1069 + } else { 1070 + let end = element + 2; 1071 + while element < end { 1072 + device.rsp.mem[(address & 0xFFF) as usize] = 1073 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], element); 1074 + address = address.wrapping_add(1); 1075 + element += 1; 1076 + } 1077 + } 1078 + } 1079 + 1080 + pub fn slv(device: &mut crate::Device, opcode: u32) { 1081 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1082 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 2)); 1083 + 1084 + let mut element = velement(opcode); 1085 + 1086 + if element.is_multiple_of(4) { 1087 + let start = (address & 0xFFF) as usize; 1088 + device.rsp.mem[start..start + 4].copy_from_slice( 1089 + &get_vpr32(device.rsp.cpu.vpr[rt(opcode) as usize], element / 4).to_be_bytes(), 1090 + ); 1091 + } else { 1092 + let end = element + 4; 1093 + while element < end { 1094 + device.rsp.mem[(address & 0xFFF) as usize] = 1095 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], element); 1096 + address = address.wrapping_add(1); 1097 + element += 1; 1098 + } 1099 + } 1100 + } 1101 + 1102 + pub fn sdv(device: &mut crate::Device, opcode: u32) { 1103 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1104 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 3)); 1105 + 1106 + let mut element = velement(opcode); 1107 + 1108 + if element.is_multiple_of(8) { 1109 + let start = (address & 0xFFF) as usize; 1110 + device.rsp.mem[start..start + 8].copy_from_slice( 1111 + &get_vpr64(device.rsp.cpu.vpr[rt(opcode) as usize], element / 8).to_be_bytes(), 1112 + ); 1113 + } else { 1114 + let end = element + 8; 1115 + while element < end { 1116 + device.rsp.mem[(address & 0xFFF) as usize] = 1117 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], element); 1118 + address = address.wrapping_add(1); 1119 + element += 1; 1120 + } 1121 + } 1122 + } 1123 + 1124 + pub fn sqv(device: &mut crate::Device, opcode: u32) { 1125 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1126 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1127 + 1128 + let mut element = velement(opcode); 1129 + 1130 + if element == 0 && address.is_multiple_of(16) { 1131 + let start = (address & 0xFFF) as usize; 1132 + device.rsp.mem[start..start + 16] 1133 + .copy_from_slice(&get_vpr128(device.rsp.cpu.vpr[rt(opcode) as usize]).to_be_bytes()); 1134 + } else { 1135 + let end = element + (16 - (address & 15)) as u8; 1136 + while element < end { 1137 + device.rsp.mem[(address & 0xFFF) as usize] = 1138 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], element); 1139 + address = address.wrapping_add(1); 1140 + element += 1; 1141 + } 1142 + } 1143 + } 1144 + 1145 + pub fn srv(device: &mut crate::Device, opcode: u32) { 1146 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1147 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1148 + 1149 + let mut element = velement(opcode); 1150 + let end = element + (address & 15) as u8; 1151 + let base = (16 - (address & 15)) as u8; 1152 + address &= !15; 1153 + 1154 + if element == 0 && base == 0 { 1155 + let start = (address & 0xFFF) as usize; 1156 + device.rsp.mem[start..start + 16] 1157 + .copy_from_slice(&get_vpr128(device.rsp.cpu.vpr[rt(opcode) as usize]).to_be_bytes()); 1158 + } else { 1159 + while element < end { 1160 + device.rsp.mem[(address & 0xFFF) as usize] = 1161 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], element + base); 1162 + address = address.wrapping_add(1); 1163 + element += 1; 1164 + } 1165 + } 1166 + } 1167 + 1168 + pub fn spv(device: &mut crate::Device, opcode: u32) { 1169 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1170 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 3)); 1171 + 1172 + let mut element = velement(opcode); 1173 + let end = element + 8; 1174 + while element < end { 1175 + if (element & 15) < 8 { 1176 + device.rsp.mem[(address & 0xFFF) as usize] = 1177 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], (element & 7) << 1); 1178 + } else { 1179 + device.rsp.mem[(address & 0xFFF) as usize] = 1180 + (get_vpr16(device.rsp.cpu.vpr[rt(opcode) as usize], element) >> 7) as u8; 1181 + } 1182 + address = address.wrapping_add(1); 1183 + element += 1; 1184 + } 1185 + } 1186 + 1187 + pub fn suv(device: &mut crate::Device, opcode: u32) { 1188 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1189 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 3)); 1190 + 1191 + let mut element = velement(opcode); 1192 + let end = element + 8; 1193 + while element < end { 1194 + if (element & 15) < 8 { 1195 + device.rsp.mem[(address & 0xFFF) as usize] = 1196 + (get_vpr16(device.rsp.cpu.vpr[rt(opcode) as usize], element) >> 7) as u8; 1197 + } else { 1198 + device.rsp.mem[(address & 0xFFF) as usize] = 1199 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], (element & 7) << 1); 1200 + } 1201 + address = address.wrapping_add(1); 1202 + element += 1; 1203 + } 1204 + } 1205 + 1206 + pub fn shv(device: &mut crate::Device, opcode: u32) { 1207 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1208 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1209 + 1210 + let element = velement(opcode); 1211 + let index = (address & 7) as u8; 1212 + address &= !7; 1213 + for offset in 0..8 { 1214 + let byte_val = element + offset * 2; 1215 + let value = (get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], byte_val) << 1) 1216 + | (get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], byte_val + 1) >> 7); 1217 + device.rsp.mem[((address + ((index + offset * 2) & 15) as u32) & 0xFFF) as usize] = value; 1218 + } 1219 + } 1220 + 1221 + pub fn sfv(device: &mut crate::Device, opcode: u32) { 1222 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1223 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1224 + let base = address & 7; 1225 + address &= !7; 1226 + let elements = match velement(opcode) { 1227 + 0 | 15 => [0, 1, 2, 3], 1228 + 1 => [6, 7, 4, 5], 1229 + 4 => [1, 2, 3, 0], 1230 + 5 => [7, 4, 5, 6], 1231 + 8 => [4, 5, 6, 7], 1232 + 11 => [3, 0, 1, 2], 1233 + 12 => [5, 6, 7, 4], 1234 + _ => { 1235 + device.rsp.mem[((address + (base & 15)) & 0xFFF) as usize] = 0; 1236 + device.rsp.mem[((address + ((base + 4) & 15)) & 0xFFF) as usize] = 0; 1237 + device.rsp.mem[((address + ((base + 8) & 15)) & 0xFFF) as usize] = 0; 1238 + device.rsp.mem[((address + ((base + 12) & 15)) & 0xFFF) as usize] = 0; 1239 + return; 1240 + } 1241 + }; 1242 + let mut offset = 0; 1243 + for element in elements { 1244 + device.rsp.mem[((address + ((base + offset) & 15)) & 0xFFF) as usize] = 1245 + (get_vpr16(device.rsp.cpu.vpr[rt(opcode) as usize], element) >> 7) as u8; 1246 + offset += 4; 1247 + } 1248 + } 1249 + 1250 + pub fn swv(device: &mut crate::Device, opcode: u32) { 1251 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1252 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1253 + 1254 + let mut element = velement(opcode); 1255 + let end = element + 16; 1256 + let mut base = address & 7; 1257 + address &= !7; 1258 + while element < end { 1259 + device.rsp.mem[((address + (base & 15)) & 0xFFF) as usize] = 1260 + get_vpr8(device.rsp.cpu.vpr[rt(opcode) as usize], element); 1261 + base += 1; 1262 + element += 1; 1263 + } 1264 + } 1265 + 1266 + pub fn stv(device: &mut crate::Device, opcode: u32) { 1267 + let mut address = device.rsp.cpu.gpr[rs(opcode) as usize] 1268 + .wrapping_add(sign_extend_7bit_offset(voffset(opcode), 4)); 1269 + let start = rt(opcode) & !7; 1270 + let end = start + 8; 1271 + let mut element = 16 - (velement(opcode) & !1); 1272 + let mut base = (address & 7).wrapping_sub((velement(opcode) & !1) as u32); 1273 + address &= !7; 1274 + let mut offset = start; 1275 + while offset < end { 1276 + device.rsp.mem[((address + (base & 15)) & 0xFFF) as usize] = 1277 + get_vpr8(device.rsp.cpu.vpr[offset as usize], element); 1278 + base = base.wrapping_add(1); 1279 + element += 1; 1280 + device.rsp.mem[((address + (base & 15)) & 0xFFF) as usize] = 1281 + get_vpr8(device.rsp.cpu.vpr[offset as usize], element); 1282 + base = base.wrapping_add(1); 1283 + element += 1; 1284 + offset += 1; 1285 + } 1286 + } 1287 + 1288 + pub fn special_reserved(device: &mut crate::Device, opcode: u32) { 1289 + device.rsp.cpu.gpr[rd(opcode) as usize] = 1290 + device.rsp.cpu.gpr[rs(opcode) as usize] >> (device.rsp.cpu.gpr[rs(opcode) as usize] & 31) 1291 + } 1292 + 1293 + pub fn reserved(_device: &mut crate::Device, _opcode: u32) {}
+940
crates/rsp/src/vu_instructions.rs
··· 1 + // SPDX-FileCopyrightText: 2024 gopher64 contributors 2 + // SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 3 + // 4 + // SPDX-License-Identifier: GPL-3.0-or-later 5 + 6 + #[cfg(target_arch = "x86_64")] 7 + use std::arch::x86_64::*; 8 + 9 + use crate::su_instructions::{get_vpr16, modify_vpr16}; 10 + 11 + fn vt(opcode: u32) -> u32 { 12 + (opcode >> 16) & 0x1F 13 + } 14 + 15 + fn ve(opcode: u32) -> u32 { 16 + (opcode >> 21) & 0xF 17 + } 18 + 19 + fn vs(opcode: u32) -> u32 { 20 + (opcode >> 11) & 0x1F 21 + } 22 + 23 + fn vd(opcode: u32) -> u32 { 24 + (opcode >> 6) & 0x1F 25 + } 26 + 27 + fn de(opcode: u32) -> u32 { 28 + (opcode >> 11) & 0x7 29 + } 30 + 31 + fn clamp_signed_32(value: i32) -> i16 { 32 + value.clamp(-32768, 32767) as i16 33 + } 34 + 35 + fn clamp_signed_64(value: i64) -> i16 { 36 + value.clamp(-32768, 32767) as i16 37 + } 38 + 39 + fn s_clip(x: i64, bits: u32) -> i64 { 40 + let mask = (1i64 << bits) - 1; 41 + let value = x & mask; 42 + (value << (64 - bits)) >> (64 - bits) 43 + } 44 + 45 + fn compute_reciprocal(input: i32, reciprocals: &[u16]) -> u32 { 46 + let mask = input >> 31; 47 + let mut data = input ^ mask; 48 + if input > -32768 { 49 + data -= mask 50 + } 51 + if data == 0 { 52 + 0x7fffffff 53 + } else if input == -32768 { 54 + 0xffff0000 55 + } else { 56 + let shift = (data as u32).leading_zeros(); 57 + let index = (((data as u64) << shift) & 0x7fc00000) >> 22; 58 + let mut result = reciprocals[index as usize] as u32; 59 + result = (0x10000 | result) << 14; 60 + (result >> (31 - shift)) ^ mask as u32 61 + } 62 + } 63 + 64 + fn compute_inverse_sqrt(input: i32, inverse_square_roots: &[u16]) -> u32 { 65 + let mask = input >> 31; 66 + let mut data = input ^ mask; 67 + if input > -32768 { 68 + data -= mask 69 + } 70 + if data == 0 { 71 + 0x7fffffff 72 + } else if input == -32768 { 73 + 0xffff0000 74 + } else { 75 + let shift = (data as u32).leading_zeros(); 76 + let index = (((data as u64) << shift) & 0x7fc00000) as u32 >> 22; 77 + let mut result = inverse_square_roots[((index & 0x1fe) | (shift & 1)) as usize] as u32; 78 + result = (0x10000 | result) << 14; 79 + (result >> ((31 - shift) >> 1)) ^ mask as u32 80 + } 81 + } 82 + 83 + fn vte(device: &crate::Device, vt: u32, index: usize) -> __m128i { 84 + unsafe { 85 + _mm_shuffle_epi8( 86 + device.rsp.cpu.vpr[vt as usize], 87 + device.rsp.cpu.shuffle[index], 88 + ) 89 + } 90 + } 91 + 92 + pub fn vmulf(device: &mut crate::Device, opcode: u32) { 93 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 94 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 95 + unsafe { 96 + let lo = _mm_mullo_epi16(vs_reg, vte); 97 + let hi = _mm_mulhi_epi16(vs_reg, vte); 98 + 99 + let sign1 = _mm_srli_epi16(lo, 15); 100 + let lo_doubled = _mm_add_epi16(lo, lo); 101 + let sign2 = _mm_srli_epi16(lo_doubled, 15); 102 + 103 + device.rsp.cpu.accl = _mm_add_epi16(_mm_set1_epi16(-32768_i16), lo_doubled); // round + lo 104 + device.rsp.cpu.accm = _mm_add_epi16(_mm_slli_epi16(hi, 1), _mm_add_epi16(sign1, sign2)); 105 + let neg = _mm_srai_epi16(device.rsp.cpu.accm, 15); 106 + 107 + let neq = _mm_cmpeq_epi16(vs_reg, vte); 108 + let eq = _mm_and_si128(neq, neg); 109 + 110 + device.rsp.cpu.acch = _mm_andnot_si128(neq, neg); 111 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_add_epi16(device.rsp.cpu.accm, eq); 112 + } 113 + } 114 + 115 + pub fn vmulu(device: &mut crate::Device, opcode: u32) { 116 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 117 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 118 + unsafe { 119 + let lo = _mm_mullo_epi16(vs_reg, vte); 120 + let hi = _mm_mulhi_epi16(vs_reg, vte); 121 + 122 + let sign1 = _mm_srli_epi16(lo, 15); 123 + let lo_doubled = _mm_add_epi16(lo, lo); 124 + let sign2 = _mm_srli_epi16(lo_doubled, 15); 125 + 126 + device.rsp.cpu.accl = _mm_add_epi16(_mm_set1_epi16(-32768_i16), lo_doubled); // round + lo 127 + device.rsp.cpu.accm = _mm_add_epi16(_mm_slli_epi16(hi, 1), _mm_add_epi16(sign1, sign2)); 128 + let neg = _mm_srai_epi16(device.rsp.cpu.accm, 15); 129 + 130 + let neq = _mm_cmpeq_epi16(vs_reg, vte); 131 + 132 + device.rsp.cpu.acch = _mm_andnot_si128(neq, neg); 133 + let result = _mm_or_si128(device.rsp.cpu.accm, neg); 134 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_andnot_si128(device.rsp.cpu.acch, result); 135 + } 136 + } 137 + 138 + pub fn vrndp(device: &mut crate::Device, opcode: u32) { 139 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 140 + let acch: &mut __m128i = &mut device.rsp.cpu.acch; 141 + let accm: &mut __m128i = &mut device.rsp.cpu.accm; 142 + let accl: &mut __m128i = &mut device.rsp.cpu.accl; 143 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 144 + let shift_amount = vs(opcode) & 1; 145 + 146 + for n in 0..8 { 147 + let mut product = get_vpr16(vte, n) as i16 as i32; 148 + if shift_amount != 0 { 149 + product <<= 16 150 + } 151 + let mut acc = 0; 152 + acc |= get_vpr16(*acch, n) as i64; 153 + acc <<= 16; 154 + acc |= get_vpr16(*accm, n) as i64; 155 + acc <<= 16; 156 + acc |= get_vpr16(*accl, n) as i64; 157 + acc <<= 16; 158 + acc >>= 16; 159 + if acc >= 0 { 160 + acc = s_clip(acc + (product as i64), 48) 161 + } 162 + modify_vpr16(acch, n, (acc >> 32) as u16); 163 + modify_vpr16(accm, n, (acc >> 16) as u16); 164 + modify_vpr16(accl, n, acc as u16); 165 + modify_vpr16(vd_reg, n, clamp_signed_64(acc >> 16) as u16); 166 + } 167 + } 168 + 169 + pub fn vmulq(device: &mut crate::Device, opcode: u32) { 170 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 171 + let acch: &mut __m128i = &mut device.rsp.cpu.acch; 172 + let accm: &mut __m128i = &mut device.rsp.cpu.accm; 173 + let accl: &mut __m128i = &mut device.rsp.cpu.accl; 174 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 175 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 176 + 177 + for n in 0..8 { 178 + let mut product = 179 + (get_vpr16(vs_reg, n) as i16 as i32).wrapping_mul(get_vpr16(vte, n) as i16 as i32); 180 + if product < 0 { 181 + product += 31; 182 + } 183 + modify_vpr16(acch, n, (product >> 16) as u16); 184 + modify_vpr16(accm, n, product as u16); 185 + modify_vpr16(accl, n, 0); 186 + modify_vpr16(vd_reg, n, (clamp_signed_32(product >> 1) & !15) as u16); 187 + } 188 + } 189 + 190 + pub fn vmudl(device: &mut crate::Device, opcode: u32) { 191 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 192 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 193 + unsafe { 194 + device.rsp.cpu.accl = _mm_mulhi_epu16(vs_reg, vte); 195 + device.rsp.cpu.accm = _mm_setzero_si128(); 196 + device.rsp.cpu.acch = _mm_setzero_si128(); 197 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 198 + } 199 + } 200 + 201 + pub fn vmudm(device: &mut crate::Device, opcode: u32) { 202 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 203 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 204 + unsafe { 205 + device.rsp.cpu.accl = _mm_mullo_epi16(vs_reg, vte); 206 + device.rsp.cpu.accm = _mm_mulhi_epu16(vs_reg, vte); 207 + let sign = _mm_srai_epi16(vs_reg, 15); 208 + let vta = _mm_and_si128(vte, sign); 209 + device.rsp.cpu.accm = _mm_sub_epi16(device.rsp.cpu.accm, vta); 210 + device.rsp.cpu.acch = _mm_srai_epi16(device.rsp.cpu.accm, 15); 211 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accm; 212 + } 213 + } 214 + 215 + pub fn vmudn(device: &mut crate::Device, opcode: u32) { 216 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 217 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 218 + unsafe { 219 + device.rsp.cpu.accl = _mm_mullo_epi16(vs_reg, vte); 220 + device.rsp.cpu.accm = _mm_mulhi_epu16(vs_reg, vte); 221 + let sign = _mm_srai_epi16(vte, 15); 222 + let vsa = _mm_and_si128(vs_reg, sign); 223 + device.rsp.cpu.accm = _mm_sub_epi16(device.rsp.cpu.accm, vsa); 224 + device.rsp.cpu.acch = _mm_srai_epi16(device.rsp.cpu.accm, 15); 225 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 226 + } 227 + } 228 + 229 + pub fn vmudh(device: &mut crate::Device, opcode: u32) { 230 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 231 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 232 + unsafe { 233 + device.rsp.cpu.accl = _mm_setzero_si128(); 234 + device.rsp.cpu.accm = _mm_mullo_epi16(vs_reg, vte); 235 + device.rsp.cpu.acch = _mm_mulhi_epi16(vs_reg, vte); 236 + let lo = _mm_unpacklo_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 237 + let hi = _mm_unpackhi_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 238 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_packs_epi32(lo, hi); 239 + } 240 + } 241 + 242 + pub fn vmacf(device: &mut crate::Device, opcode: u32) { 243 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 244 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 245 + unsafe { 246 + let lo = _mm_mullo_epi16(vs_reg, vte); 247 + let hi = _mm_mulhi_epi16(vs_reg, vte); 248 + 249 + let carry = _mm_srli_epi16(lo, 15); 250 + let md = _mm_or_si128(_mm_slli_epi16(hi, 1), carry); 251 + let lo_doubled = _mm_slli_epi16(lo, 1); 252 + let hi_sign = _mm_srai_epi16(hi, 15); 253 + 254 + let accl_old = device.rsp.cpu.accl; 255 + device.rsp.cpu.accl = _mm_add_epi16(accl_old, lo_doubled); 256 + let overflow_l = _mm_cmpeq_epi16(_mm_adds_epu16(accl_old, lo_doubled), device.rsp.cpu.accl); 257 + let borrow_from_l = _mm_cmpeq_epi16(overflow_l, _mm_setzero_si128()); 258 + 259 + let md_adjusted = _mm_sub_epi16(md, borrow_from_l); 260 + let zero_md = _mm_cmpeq_epi16(md_adjusted, _mm_setzero_si128()); 261 + let borrow_to_h = _mm_and_si128(zero_md, borrow_from_l); 262 + let hi_adjusted = _mm_sub_epi16(hi_sign, borrow_to_h); 263 + 264 + let accm_old = device.rsp.cpu.accm; 265 + device.rsp.cpu.accm = _mm_add_epi16(accm_old, md_adjusted); 266 + let overflow_m = 267 + _mm_cmpeq_epi16(_mm_adds_epu16(accm_old, md_adjusted), device.rsp.cpu.accm); 268 + let borrow_from_m = _mm_cmpeq_epi16(overflow_m, _mm_setzero_si128()); 269 + 270 + device.rsp.cpu.acch = _mm_add_epi16(device.rsp.cpu.acch, hi_adjusted); 271 + device.rsp.cpu.acch = _mm_sub_epi16(device.rsp.cpu.acch, borrow_from_m); 272 + 273 + let lo_packed = _mm_unpacklo_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 274 + let hi_packed = _mm_unpackhi_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 275 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_packs_epi32(lo_packed, hi_packed); 276 + } 277 + } 278 + 279 + pub fn vmacu(device: &mut crate::Device, opcode: u32) { 280 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 281 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 282 + unsafe { 283 + let lo = _mm_mullo_epi16(vs_reg, vte); 284 + let hi = _mm_mulhi_epi16(vs_reg, vte); 285 + 286 + let carry = _mm_srli_epi16(lo, 15); 287 + let md = _mm_or_si128(_mm_slli_epi16(hi, 1), carry); 288 + let lo_doubled = _mm_slli_epi16(lo, 1); 289 + let hi_sign = _mm_srai_epi16(hi, 15); 290 + 291 + let accl_old = device.rsp.cpu.accl; 292 + device.rsp.cpu.accl = _mm_add_epi16(accl_old, lo_doubled); 293 + let overflow_l = _mm_cmpeq_epi16(_mm_adds_epu16(accl_old, lo_doubled), device.rsp.cpu.accl); 294 + let borrow_from_l = _mm_cmpeq_epi16(overflow_l, _mm_setzero_si128()); 295 + 296 + let md_adjusted = _mm_sub_epi16(md, borrow_from_l); 297 + let zero_md = _mm_cmpeq_epi16(md_adjusted, _mm_setzero_si128()); 298 + let borrow_to_h = _mm_and_si128(zero_md, borrow_from_l); 299 + let hi_adjusted = _mm_sub_epi16(hi_sign, borrow_to_h); 300 + 301 + let accm_old = device.rsp.cpu.accm; 302 + device.rsp.cpu.accm = _mm_add_epi16(accm_old, md_adjusted); 303 + let overflow_m = 304 + _mm_cmpeq_epi16(_mm_adds_epu16(accm_old, md_adjusted), device.rsp.cpu.accm); 305 + let borrow_from_m = _mm_cmpeq_epi16(overflow_m, _mm_setzero_si128()); 306 + 307 + device.rsp.cpu.acch = _mm_add_epi16(device.rsp.cpu.acch, hi_adjusted); 308 + device.rsp.cpu.acch = _mm_sub_epi16(device.rsp.cpu.acch, borrow_from_m); 309 + 310 + let mmask = _mm_srai_epi16(device.rsp.cpu.accm, 15); 311 + let hmask = _mm_srai_epi16(device.rsp.cpu.acch, 15); 312 + let md_result = _mm_or_si128(mmask, device.rsp.cpu.accm); 313 + let positive = _mm_cmpgt_epi16(device.rsp.cpu.acch, _mm_setzero_si128()); 314 + let final_result = _mm_andnot_si128(hmask, md_result); 315 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_or_si128(positive, final_result); 316 + } 317 + } 318 + 319 + pub fn vrndn(device: &mut crate::Device, opcode: u32) { 320 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 321 + let acch: &mut __m128i = &mut device.rsp.cpu.acch; 322 + let accm: &mut __m128i = &mut device.rsp.cpu.accm; 323 + let accl: &mut __m128i = &mut device.rsp.cpu.accl; 324 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 325 + let shift_amount = vs(opcode) & 1; 326 + 327 + for n in 0..8 { 328 + let mut product = get_vpr16(vte, n) as i16 as i32; 329 + if shift_amount != 0 { 330 + product <<= 16 331 + } 332 + let mut acc = 0; 333 + acc |= get_vpr16(*acch, n) as i64; 334 + acc <<= 16; 335 + acc |= get_vpr16(*accm, n) as i64; 336 + acc <<= 16; 337 + acc |= get_vpr16(*accl, n) as i64; 338 + acc <<= 16; 339 + acc >>= 16; 340 + if acc < 0 { 341 + acc = s_clip(acc + (product as i64), 48) 342 + } 343 + modify_vpr16(acch, n, (acc >> 32) as u16); 344 + modify_vpr16(accm, n, (acc >> 16) as u16); 345 + modify_vpr16(accl, n, acc as u16); 346 + modify_vpr16(vd_reg, n, clamp_signed_64(acc >> 16) as u16); 347 + } 348 + } 349 + 350 + pub fn vmacq(device: &mut crate::Device, opcode: u32) { 351 + let acch: &mut __m128i = &mut device.rsp.cpu.acch; 352 + let accm: &mut __m128i = &mut device.rsp.cpu.accm; 353 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 354 + 355 + for n in 0..8 { 356 + let mut product = ((get_vpr16(*acch, n) as i32) << 16) | (get_vpr16(*accm, n) as i32); 357 + if product < 0 && (product & (1 << 5)) == 0 { 358 + product += 32 359 + } else if product >= 32 && (product & (1 << 5)) == 0 { 360 + product -= 32 361 + } 362 + modify_vpr16(acch, n, (product >> 16) as u16); 363 + modify_vpr16(accm, n, product as u16); 364 + modify_vpr16(vd_reg, n, (clamp_signed_32(product >> 1) & !15) as u16); 365 + } 366 + } 367 + 368 + pub fn vmadl(device: &mut crate::Device, opcode: u32) { 369 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 370 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 371 + unsafe { 372 + let hi = _mm_mulhi_epu16(vs_reg, vte); 373 + 374 + let accl_old = device.rsp.cpu.accl; 375 + device.rsp.cpu.accl = _mm_add_epi16(accl_old, hi); 376 + let overflow_l = _mm_cmpeq_epi16(_mm_adds_epu16(accl_old, hi), device.rsp.cpu.accl); 377 + let borrow = _mm_cmpeq_epi16(overflow_l, _mm_setzero_si128()); 378 + 379 + let accm_old = device.rsp.cpu.accm; 380 + device.rsp.cpu.accm = _mm_sub_epi16(accm_old, borrow); 381 + let overflow_m = _mm_cmpeq_epi16( 382 + _mm_adds_epu16(accm_old, _mm_sub_epi16(_mm_setzero_si128(), borrow)), 383 + device.rsp.cpu.accm, 384 + ); 385 + let borrow_h = _mm_cmpeq_epi16(overflow_m, _mm_setzero_si128()); 386 + 387 + device.rsp.cpu.acch = _mm_sub_epi16(device.rsp.cpu.acch, borrow_h); 388 + 389 + let nhi = _mm_srai_epi16(device.rsp.cpu.acch, 15); 390 + let nmd = _mm_srai_epi16(device.rsp.cpu.accm, 15); 391 + let shi = _mm_cmpeq_epi16(nhi, device.rsp.cpu.acch); 392 + let smd = _mm_cmpeq_epi16(nhi, nmd); 393 + let cmask = _mm_and_si128(smd, shi); 394 + let cval = _mm_cmpeq_epi16(nhi, _mm_setzero_si128()); 395 + 396 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_blendv_epi8(cval, device.rsp.cpu.accl, cmask); 397 + } 398 + } 399 + 400 + pub fn vmadm(device: &mut crate::Device, opcode: u32) { 401 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 402 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 403 + unsafe { 404 + let lo = _mm_mullo_epi16(vs_reg, vte); 405 + let mut hi = _mm_mulhi_epu16(vs_reg, vte); 406 + let sign = _mm_srai_epi16(vs_reg, 15); 407 + let vta = _mm_and_si128(vte, sign); 408 + hi = _mm_sub_epi16(hi, vta); 409 + 410 + let accl_old = device.rsp.cpu.accl; 411 + device.rsp.cpu.accl = _mm_add_epi16(accl_old, lo); 412 + let overflow_l = _mm_cmpeq_epi16(_mm_adds_epu16(accl_old, lo), device.rsp.cpu.accl); 413 + let borrow = _mm_cmpeq_epi16(overflow_l, _mm_setzero_si128()); 414 + 415 + hi = _mm_sub_epi16(hi, borrow); 416 + 417 + let accm_old = device.rsp.cpu.accm; 418 + device.rsp.cpu.accm = _mm_add_epi16(accm_old, hi); 419 + let overflow_m = _mm_cmpeq_epi16(_mm_adds_epu16(accm_old, hi), device.rsp.cpu.accm); 420 + let borrow_h = _mm_cmpeq_epi16(overflow_m, _mm_setzero_si128()); 421 + 422 + let hi_sign = _mm_srai_epi16(hi, 15); 423 + device.rsp.cpu.acch = _mm_add_epi16(device.rsp.cpu.acch, hi_sign); 424 + device.rsp.cpu.acch = _mm_sub_epi16(device.rsp.cpu.acch, borrow_h); 425 + 426 + let lo_packed = _mm_unpacklo_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 427 + let hi_packed = _mm_unpackhi_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 428 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_packs_epi32(lo_packed, hi_packed); 429 + } 430 + } 431 + 432 + pub fn vmadn(device: &mut crate::Device, opcode: u32) { 433 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 434 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 435 + unsafe { 436 + let lo = _mm_mullo_epi16(vs_reg, vte); 437 + let mut hi = _mm_mulhi_epu16(vs_reg, vte); 438 + let sign = _mm_srai_epi16(vte, 15); 439 + let vsa = _mm_and_si128(vs_reg, sign); 440 + hi = _mm_sub_epi16(hi, vsa); 441 + 442 + let accl_old = device.rsp.cpu.accl; 443 + device.rsp.cpu.accl = _mm_add_epi16(accl_old, lo); 444 + let overflow_l = _mm_cmpeq_epi16(_mm_adds_epu16(accl_old, lo), device.rsp.cpu.accl); 445 + let borrow = _mm_cmpeq_epi16(overflow_l, _mm_setzero_si128()); 446 + 447 + hi = _mm_sub_epi16(hi, borrow); 448 + 449 + let accm_old = device.rsp.cpu.accm; 450 + device.rsp.cpu.accm = _mm_add_epi16(accm_old, hi); 451 + let overflow_m = _mm_cmpeq_epi16(_mm_adds_epu16(accm_old, hi), device.rsp.cpu.accm); 452 + let borrow_h = _mm_cmpeq_epi16(overflow_m, _mm_setzero_si128()); 453 + 454 + let hi_sign = _mm_srai_epi16(hi, 15); 455 + device.rsp.cpu.acch = _mm_add_epi16(device.rsp.cpu.acch, hi_sign); 456 + device.rsp.cpu.acch = _mm_sub_epi16(device.rsp.cpu.acch, borrow_h); 457 + 458 + let nhi = _mm_srai_epi16(device.rsp.cpu.acch, 15); 459 + let nmd = _mm_srai_epi16(device.rsp.cpu.accm, 15); 460 + let shi = _mm_cmpeq_epi16(nhi, device.rsp.cpu.acch); 461 + let smd = _mm_cmpeq_epi16(nhi, nmd); 462 + let cmask = _mm_and_si128(smd, shi); 463 + let cval = _mm_cmpeq_epi16(nhi, _mm_setzero_si128()); 464 + 465 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_blendv_epi8(cval, device.rsp.cpu.accl, cmask); 466 + } 467 + } 468 + 469 + pub fn vmadh(device: &mut crate::Device, opcode: u32) { 470 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 471 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 472 + unsafe { 473 + let lo = _mm_mullo_epi16(vs_reg, vte); 474 + let mut hi = _mm_mulhi_epi16(vs_reg, vte); 475 + 476 + let accm_old = device.rsp.cpu.accm; 477 + device.rsp.cpu.accm = _mm_add_epi16(accm_old, lo); 478 + let overflow = _mm_cmpeq_epi16(_mm_adds_epu16(accm_old, lo), device.rsp.cpu.accm); 479 + let borrow = _mm_cmpeq_epi16(overflow, _mm_setzero_si128()); 480 + 481 + hi = _mm_sub_epi16(hi, borrow); 482 + device.rsp.cpu.acch = _mm_add_epi16(device.rsp.cpu.acch, hi); 483 + 484 + let lo_packed = _mm_unpacklo_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 485 + let hi_packed = _mm_unpackhi_epi16(device.rsp.cpu.accm, device.rsp.cpu.acch); 486 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_packs_epi32(lo_packed, hi_packed); 487 + } 488 + } 489 + 490 + pub fn vadd(device: &mut crate::Device, opcode: u32) { 491 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 492 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 493 + unsafe { 494 + let sum = _mm_add_epi16(vs_reg, vte); 495 + device.rsp.cpu.accl = _mm_sub_epi16(sum, device.rsp.cpu.vcol); 496 + 497 + let min = _mm_min_epi16(vs_reg, vte); 498 + let max = _mm_max_epi16(vs_reg, vte); 499 + let min_adjusted = _mm_subs_epi16(min, device.rsp.cpu.vcol); 500 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_adds_epi16(min_adjusted, max); 501 + 502 + device.rsp.cpu.vcol = _mm_setzero_si128(); 503 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 504 + } 505 + } 506 + 507 + pub fn vsub(device: &mut crate::Device, opcode: u32) { 508 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 509 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 510 + unsafe { 511 + let udiff = _mm_sub_epi16(vte, device.rsp.cpu.vcol); 512 + let sdiff = _mm_subs_epi16(vte, device.rsp.cpu.vcol); 513 + device.rsp.cpu.accl = _mm_sub_epi16(vs_reg, udiff); 514 + 515 + let ov = _mm_cmpgt_epi16(sdiff, udiff); 516 + let sub_result = _mm_subs_epi16(vs_reg, sdiff); 517 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_adds_epi16(sub_result, ov); 518 + 519 + device.rsp.cpu.vcol = _mm_setzero_si128(); 520 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 521 + } 522 + } 523 + 524 + pub fn vzero(device: &mut crate::Device, opcode: u32) { 525 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 526 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 527 + unsafe { 528 + device.rsp.cpu.accl = _mm_add_epi16(vs_reg, vte); 529 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_setzero_si128(); 530 + } 531 + } 532 + 533 + pub fn vabs(device: &mut crate::Device, opcode: u32) { 534 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 535 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 536 + unsafe { 537 + let vs0 = _mm_cmpeq_epi16(vs_reg, _mm_setzero_si128()); 538 + let slt = _mm_srai_epi16(vs_reg, 15); 539 + 540 + let mut result = _mm_andnot_si128(vs0, vte); 541 + result = _mm_xor_si128(result, slt); 542 + device.rsp.cpu.accl = _mm_sub_epi16(result, slt); 543 + device.rsp.cpu.vpr[vd(opcode) as usize] = _mm_subs_epi16(result, slt); 544 + } 545 + } 546 + 547 + pub fn vaddc(device: &mut crate::Device, opcode: u32) { 548 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 549 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 550 + unsafe { 551 + let sum = _mm_adds_epu16(vs_reg, vte); 552 + device.rsp.cpu.accl = _mm_add_epi16(vs_reg, vte); 553 + device.rsp.cpu.vcol = _mm_cmpeq_epi16( 554 + _mm_cmpeq_epi16(sum, device.rsp.cpu.accl), 555 + _mm_setzero_si128(), 556 + ); 557 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 558 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 559 + } 560 + } 561 + 562 + pub fn vsubc(device: &mut crate::Device, opcode: u32) { 563 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 564 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 565 + unsafe { 566 + let udiff = _mm_subs_epu16(vs_reg, vte); 567 + let equal = _mm_cmpeq_epi16(vs_reg, vte); 568 + let diff0 = _mm_cmpeq_epi16(udiff, _mm_setzero_si128()); 569 + 570 + device.rsp.cpu.vcoh = _mm_cmpeq_epi16(equal, _mm_setzero_si128()); 571 + device.rsp.cpu.vcol = _mm_andnot_si128(equal, diff0); 572 + device.rsp.cpu.accl = _mm_sub_epi16(vs_reg, vte); 573 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 574 + } 575 + } 576 + 577 + pub fn vsar(device: &mut crate::Device, opcode: u32) { 578 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 579 + match ve(opcode) { 580 + 0x8 => { 581 + *vd_reg = device.rsp.cpu.acch; 582 + } 583 + 0x9 => { 584 + *vd_reg = device.rsp.cpu.accm; 585 + } 586 + 0xa => { 587 + *vd_reg = device.rsp.cpu.accl; 588 + } 589 + _ => { 590 + *vd_reg = unsafe { _mm_setzero_si128() }; 591 + } 592 + } 593 + } 594 + 595 + pub fn vlt(device: &mut crate::Device, opcode: u32) { 596 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 597 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 598 + unsafe { 599 + let eq = _mm_cmpeq_epi16(vs_reg, vte); 600 + let lt = _mm_cmplt_epi16(vs_reg, vte); 601 + let eq_and_carry = 602 + _mm_and_si128(_mm_and_si128(device.rsp.cpu.vcoh, eq), device.rsp.cpu.vcol); 603 + 604 + device.rsp.cpu.vccl = _mm_or_si128(lt, eq_and_carry); 605 + device.rsp.cpu.accl = _mm_blendv_epi8(vte, vs_reg, device.rsp.cpu.vccl); 606 + 607 + device.rsp.cpu.vcch = _mm_setzero_si128(); 608 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 609 + device.rsp.cpu.vcol = _mm_setzero_si128(); 610 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 611 + } 612 + } 613 + 614 + pub fn veq(device: &mut crate::Device, opcode: u32) { 615 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 616 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 617 + unsafe { 618 + let eq = _mm_cmpeq_epi16(vs_reg, vte); 619 + device.rsp.cpu.vccl = _mm_andnot_si128(device.rsp.cpu.vcoh, eq); 620 + device.rsp.cpu.accl = _mm_blendv_epi8(vte, vs_reg, device.rsp.cpu.vccl); 621 + 622 + device.rsp.cpu.vcch = _mm_setzero_si128(); 623 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 624 + device.rsp.cpu.vcol = _mm_setzero_si128(); 625 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 626 + } 627 + } 628 + 629 + pub fn vne(device: &mut crate::Device, opcode: u32) { 630 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 631 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 632 + unsafe { 633 + let eq = _mm_cmpeq_epi16(vs_reg, vte); 634 + let ne = _mm_cmpeq_epi16(eq, _mm_setzero_si128()); 635 + device.rsp.cpu.vccl = _mm_or_si128(_mm_and_si128(device.rsp.cpu.vcoh, eq), ne); 636 + device.rsp.cpu.accl = _mm_blendv_epi8(vte, vs_reg, device.rsp.cpu.vccl); 637 + 638 + device.rsp.cpu.vcch = _mm_setzero_si128(); 639 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 640 + device.rsp.cpu.vcol = _mm_setzero_si128(); 641 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 642 + } 643 + } 644 + 645 + pub fn vge(device: &mut crate::Device, opcode: u32) { 646 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 647 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 648 + unsafe { 649 + let eq = _mm_cmpeq_epi16(vs_reg, vte); 650 + let gt = _mm_cmpgt_epi16(vs_reg, vte); 651 + let es = _mm_and_si128(device.rsp.cpu.vcoh, device.rsp.cpu.vcol); 652 + let eq_filtered = _mm_andnot_si128(es, eq); 653 + 654 + device.rsp.cpu.vccl = _mm_or_si128(gt, eq_filtered); 655 + device.rsp.cpu.accl = _mm_blendv_epi8(vte, vs_reg, device.rsp.cpu.vccl); 656 + 657 + device.rsp.cpu.vcch = _mm_setzero_si128(); 658 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 659 + device.rsp.cpu.vcol = _mm_setzero_si128(); 660 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 661 + } 662 + } 663 + 664 + pub fn vcl(device: &mut crate::Device, opcode: u32) { 665 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 666 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 667 + unsafe { 668 + let mut nvt = _mm_xor_si128(vte, device.rsp.cpu.vcol); 669 + nvt = _mm_sub_epi16(nvt, device.rsp.cpu.vcol); 670 + let diff = _mm_sub_epi16(vs_reg, nvt); 671 + 672 + let ncarry = _mm_cmpeq_epi16(diff, _mm_adds_epu16(vs_reg, vte)); 673 + let nvce = _mm_cmpeq_epi16(device.rsp.cpu.vce, _mm_setzero_si128()); 674 + let diff0 = _mm_cmpeq_epi16(diff, _mm_setzero_si128()); 675 + 676 + let lec1 = _mm_and_si128(_mm_and_si128(diff0, ncarry), nvce); 677 + let lec2 = _mm_and_si128(_mm_or_si128(diff0, ncarry), device.rsp.cpu.vce); 678 + let leeq = _mm_or_si128(lec1, lec2); 679 + 680 + let geeq = _mm_cmpeq_epi16(_mm_subs_epu16(vte, vs_reg), _mm_setzero_si128()); 681 + 682 + let le_sel = _mm_andnot_si128(device.rsp.cpu.vcoh, device.rsp.cpu.vcol); 683 + let le = _mm_blendv_epi8(device.rsp.cpu.vccl, leeq, le_sel); 684 + 685 + let ge_sel = _mm_or_si128(device.rsp.cpu.vcol, device.rsp.cpu.vcoh); 686 + let ge = _mm_blendv_epi8(geeq, device.rsp.cpu.vcch, ge_sel); 687 + 688 + let mask = _mm_blendv_epi8(ge, le, device.rsp.cpu.vcol); 689 + device.rsp.cpu.accl = _mm_blendv_epi8(vs_reg, nvt, mask); 690 + 691 + device.rsp.cpu.vcch = ge; 692 + device.rsp.cpu.vccl = le; 693 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 694 + device.rsp.cpu.vcol = _mm_setzero_si128(); 695 + device.rsp.cpu.vce = _mm_setzero_si128(); 696 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 697 + } 698 + } 699 + 700 + pub fn vch(device: &mut crate::Device, opcode: u32) { 701 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 702 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 703 + unsafe { 704 + device.rsp.cpu.vcol = _mm_cmplt_epi16(_mm_xor_si128(vs_reg, vte), _mm_setzero_si128()); 705 + 706 + let mut nvt = _mm_xor_si128(vte, device.rsp.cpu.vcol); 707 + nvt = _mm_sub_epi16(nvt, device.rsp.cpu.vcol); 708 + let diff = _mm_sub_epi16(vs_reg, nvt); 709 + let diff0 = _mm_cmpeq_epi16(diff, _mm_setzero_si128()); 710 + let vtn = _mm_cmplt_epi16(vte, _mm_setzero_si128()); 711 + 712 + let dlez = _mm_cmpeq_epi16( 713 + _mm_setzero_si128(), 714 + _mm_cmpgt_epi16(diff, _mm_setzero_si128()), 715 + ); 716 + let dgez = _mm_or_si128(_mm_cmpgt_epi16(diff, _mm_setzero_si128()), diff0); 717 + 718 + device.rsp.cpu.vcch = _mm_blendv_epi8(dgez, vtn, device.rsp.cpu.vcol); 719 + device.rsp.cpu.vccl = _mm_blendv_epi8(vtn, dlez, device.rsp.cpu.vcol); 720 + device.rsp.cpu.vce = _mm_and_si128( 721 + _mm_cmpeq_epi16(diff, device.rsp.cpu.vcol), 722 + device.rsp.cpu.vcol, 723 + ); 724 + device.rsp.cpu.vcoh = 725 + _mm_cmpeq_epi16(_mm_or_si128(diff0, device.rsp.cpu.vce), _mm_setzero_si128()); 726 + 727 + let mask = _mm_blendv_epi8( 728 + device.rsp.cpu.vcch, 729 + device.rsp.cpu.vccl, 730 + device.rsp.cpu.vcol, 731 + ); 732 + device.rsp.cpu.accl = _mm_blendv_epi8(vs_reg, nvt, mask); 733 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 734 + } 735 + } 736 + 737 + pub fn vcr(device: &mut crate::Device, opcode: u32) { 738 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 739 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 740 + unsafe { 741 + let sign = _mm_srai_epi16(_mm_xor_si128(vs_reg, vte), 15); 742 + let dlez = _mm_add_epi16(_mm_and_si128(vs_reg, sign), vte); 743 + device.rsp.cpu.vccl = _mm_srai_epi16(dlez, 15); 744 + 745 + let dgez = _mm_min_epi16(_mm_or_si128(vs_reg, sign), vte); 746 + device.rsp.cpu.vcch = _mm_cmpeq_epi16(dgez, vte); 747 + 748 + let nvt = _mm_xor_si128(vte, sign); 749 + let mask = _mm_blendv_epi8(device.rsp.cpu.vcch, device.rsp.cpu.vccl, sign); 750 + device.rsp.cpu.accl = _mm_blendv_epi8(vs_reg, nvt, mask); 751 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 752 + 753 + device.rsp.cpu.vcol = _mm_setzero_si128(); 754 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 755 + device.rsp.cpu.vce = _mm_setzero_si128(); 756 + } 757 + } 758 + 759 + pub fn vmrg(device: &mut crate::Device, opcode: u32) { 760 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 761 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 762 + unsafe { 763 + device.rsp.cpu.accl = _mm_blendv_epi8(vte, vs_reg, device.rsp.cpu.vccl); 764 + device.rsp.cpu.vcoh = _mm_setzero_si128(); 765 + device.rsp.cpu.vcol = _mm_setzero_si128(); 766 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 767 + } 768 + } 769 + 770 + pub fn vand(device: &mut crate::Device, opcode: u32) { 771 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 772 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 773 + unsafe { 774 + device.rsp.cpu.accl = _mm_and_si128(vs_reg, vte); 775 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 776 + } 777 + } 778 + 779 + pub fn vnand(device: &mut crate::Device, opcode: u32) { 780 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 781 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 782 + unsafe { 783 + let and_result = _mm_and_si128(vs_reg, vte); 784 + device.rsp.cpu.accl = _mm_xor_si128(and_result, _mm_set1_epi32(-1)); 785 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 786 + } 787 + } 788 + 789 + pub fn vor(device: &mut crate::Device, opcode: u32) { 790 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 791 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 792 + unsafe { 793 + device.rsp.cpu.accl = _mm_or_si128(vs_reg, vte); 794 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 795 + } 796 + } 797 + 798 + pub fn vnor(device: &mut crate::Device, opcode: u32) { 799 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 800 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 801 + unsafe { 802 + let or_result = _mm_or_si128(vs_reg, vte); 803 + device.rsp.cpu.accl = _mm_xor_si128(or_result, _mm_set1_epi32(-1)); 804 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 805 + } 806 + } 807 + 808 + pub fn vxor(device: &mut crate::Device, opcode: u32) { 809 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 810 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 811 + unsafe { 812 + device.rsp.cpu.accl = _mm_xor_si128(vs_reg, vte); 813 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 814 + } 815 + } 816 + 817 + pub fn vnxor(device: &mut crate::Device, opcode: u32) { 818 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 819 + let vs_reg = device.rsp.cpu.vpr[vs(opcode) as usize]; 820 + unsafe { 821 + let xor_result = _mm_xor_si128(vs_reg, vte); 822 + device.rsp.cpu.accl = _mm_xor_si128(xor_result, _mm_set1_epi32(-1)); 823 + device.rsp.cpu.vpr[vd(opcode) as usize] = device.rsp.cpu.accl; 824 + } 825 + } 826 + 827 + pub fn vrcp(device: &mut crate::Device, opcode: u32) { 828 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 829 + let input = get_vpr16(device.rsp.cpu.vpr[vt(opcode) as usize], ve(opcode) as u8) as i16 as i32; 830 + let result = compute_reciprocal(input, &device.rsp.cpu.reciprocals); 831 + 832 + device.rsp.cpu.divdp = false; 833 + device.rsp.cpu.divout = (result >> 16) as i16; 834 + device.rsp.cpu.accl = vte; 835 + modify_vpr16( 836 + &mut device.rsp.cpu.vpr[vd(opcode) as usize], 837 + de(opcode) as u8, 838 + result as u16, 839 + ); 840 + } 841 + 842 + pub fn vrcpl(device: &mut crate::Device, opcode: u32) { 843 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 844 + let input = if device.rsp.cpu.divdp { 845 + ((device.rsp.cpu.divin as i32) << 16) 846 + | get_vpr16(device.rsp.cpu.vpr[vt(opcode) as usize], ve(opcode) as u8) as u16 as i32 847 + } else { 848 + get_vpr16(device.rsp.cpu.vpr[vt(opcode) as usize], ve(opcode) as u8) as i16 as i32 849 + }; 850 + let result = compute_reciprocal(input, &device.rsp.cpu.reciprocals); 851 + 852 + device.rsp.cpu.divdp = false; 853 + device.rsp.cpu.divout = (result >> 16) as i16; 854 + device.rsp.cpu.accl = vte; 855 + modify_vpr16( 856 + &mut device.rsp.cpu.vpr[vd(opcode) as usize], 857 + de(opcode) as u8, 858 + result as u16, 859 + ); 860 + } 861 + 862 + pub fn vrcph(device: &mut crate::Device, opcode: u32) { 863 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 864 + let vt_reg = device.rsp.cpu.vpr[vt(opcode) as usize]; 865 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 866 + 867 + device.rsp.cpu.accl = vte; 868 + device.rsp.cpu.divdp = true; 869 + device.rsp.cpu.divin = get_vpr16(vt_reg, ve(opcode) as u8) as i16; 870 + 871 + modify_vpr16(vd_reg, de(opcode) as u8, device.rsp.cpu.divout as u16); 872 + } 873 + 874 + pub fn vmov(device: &mut crate::Device, opcode: u32) { 875 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 876 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 877 + let de_index = de(opcode) as u8; 878 + 879 + let value = get_vpr16(vte, de_index); 880 + modify_vpr16(vd_reg, de_index, value); 881 + device.rsp.cpu.accl = vte; 882 + } 883 + 884 + pub fn vrsq(device: &mut crate::Device, opcode: u32) { 885 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 886 + let input = get_vpr16(device.rsp.cpu.vpr[vt(opcode) as usize], ve(opcode) as u8) as i16 as i32; 887 + let result = compute_inverse_sqrt(input, &device.rsp.cpu.inverse_square_roots); 888 + 889 + device.rsp.cpu.divdp = false; 890 + device.rsp.cpu.divout = (result >> 16) as i16; 891 + device.rsp.cpu.accl = vte; 892 + modify_vpr16( 893 + &mut device.rsp.cpu.vpr[vd(opcode) as usize], 894 + de(opcode) as u8, 895 + result as u16, 896 + ); 897 + } 898 + 899 + pub fn vrsql(device: &mut crate::Device, opcode: u32) { 900 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 901 + let input = if device.rsp.cpu.divdp { 902 + ((device.rsp.cpu.divin as i32) << 16) 903 + | get_vpr16(device.rsp.cpu.vpr[vt(opcode) as usize], ve(opcode) as u8) as u16 as i32 904 + } else { 905 + get_vpr16(device.rsp.cpu.vpr[vt(opcode) as usize], ve(opcode) as u8) as i16 as i32 906 + }; 907 + let result = compute_inverse_sqrt(input, &device.rsp.cpu.inverse_square_roots); 908 + 909 + device.rsp.cpu.divdp = false; 910 + device.rsp.cpu.divout = (result >> 16) as i16; 911 + device.rsp.cpu.accl = vte; 912 + modify_vpr16( 913 + &mut device.rsp.cpu.vpr[vd(opcode) as usize], 914 + de(opcode) as u8, 915 + result as u16, 916 + ); 917 + } 918 + 919 + pub fn vrsqh(device: &mut crate::Device, opcode: u32) { 920 + let vte = vte(device, vt(opcode), ve(opcode) as usize); 921 + let vt_reg = device.rsp.cpu.vpr[vt(opcode) as usize]; 922 + let vd_reg = &mut device.rsp.cpu.vpr[vd(opcode) as usize]; 923 + 924 + device.rsp.cpu.accl = vte; 925 + device.rsp.cpu.divdp = true; 926 + device.rsp.cpu.divin = get_vpr16(vt_reg, ve(opcode) as u8) as i16; 927 + 928 + modify_vpr16(vd_reg, de(opcode) as u8, device.rsp.cpu.divout as u16); 929 + } 930 + 931 + pub fn vnop(_device: &mut crate::Device, _opcode: u32) {} 932 + 933 + pub fn execute_vec(device: &mut crate::Device, opcode: u32) { 934 + device.rsp.cpu.instruction_type = crate::cpu::InstructionType::Vu; 935 + device.rsp.cpu.vec_instrs[(opcode & 0x3F) as usize](device, opcode) 936 + } 937 + 938 + pub fn reserved(_device: &mut crate::Device, _opcode: u32) { 939 + panic!("rsp vu reserved") 940 + }