Implement AArch64 assembler infrastructure for JIT compiler

+1500

crates/js/src/jit/assembler.rs

··· 1 + //! AArch64 machine code assembler. 2 + //! 3 + //! Emits fixed-width 32-bit AArch64 instructions into a byte buffer. 4 + //! Supports label-based forward references with backpatching. 5 + 6 + use super::buffer::{CodePtr, JitBuffer}; 7 + use super::memory::MemoryError; 8 + 9 + // ── AArch64 register definitions ───────────────────────────────────────────── 10 + 11 + /// A 64-bit general-purpose register (X0–X30, SP/ZR). 12 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 13 + pub struct Reg(u8); 14 + 15 + impl Reg { 16 + pub const fn new(n: u8) -> Self { 17 + assert!(n < 32, "register index must be 0-31"); 18 + Reg(n) 19 + } 20 + 21 + pub const fn index(self) -> u8 { 22 + self.0 23 + } 24 + } 25 + 26 + // Arguments / return value 27 + pub const X0: Reg = Reg(0); 28 + pub const X1: Reg = Reg(1); 29 + pub const X2: Reg = Reg(2); 30 + pub const X3: Reg = Reg(3); 31 + pub const X4: Reg = Reg(4); 32 + pub const X5: Reg = Reg(5); 33 + pub const X6: Reg = Reg(6); 34 + pub const X7: Reg = Reg(7); 35 + 36 + // Indirect result location 37 + pub const X8: Reg = Reg(8); 38 + 39 + // Temporaries (caller-saved) 40 + pub const X9: Reg = Reg(9); 41 + pub const X10: Reg = Reg(10); 42 + pub const X11: Reg = Reg(11); 43 + pub const X12: Reg = Reg(12); 44 + pub const X13: Reg = Reg(13); 45 + pub const X14: Reg = Reg(14); 46 + pub const X15: Reg = Reg(15); 47 + 48 + // Intra-procedure-call temporary 49 + pub const X16: Reg = Reg(16); 50 + pub const X17: Reg = Reg(17); 51 + 52 + // Platform register (reserved) 53 + pub const X18: Reg = Reg(18); 54 + 55 + // Callee-saved registers 56 + pub const X19: Reg = Reg(19); 57 + pub const X20: Reg = Reg(20); 58 + pub const X21: Reg = Reg(21); 59 + pub const X22: Reg = Reg(22); 60 + pub const X23: Reg = Reg(23); 61 + pub const X24: Reg = Reg(24); 62 + pub const X25: Reg = Reg(25); 63 + pub const X26: Reg = Reg(26); 64 + pub const X27: Reg = Reg(27); 65 + pub const X28: Reg = Reg(28); 66 + 67 + // Frame pointer 68 + pub const FP: Reg = Reg(29); 69 + // Link register 70 + pub const LR: Reg = Reg(30); 71 + // Stack pointer / zero register (context-dependent) 72 + pub const SP: Reg = Reg(31); 73 + pub const ZR: Reg = Reg(31); 74 + 75 + /// A 64-bit floating-point / SIMD register (D0–D31). 76 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 77 + pub struct FpReg(u8); 78 + 79 + impl FpReg { 80 + pub const fn new(n: u8) -> Self { 81 + assert!(n < 32, "FP register index must be 0-31"); 82 + FpReg(n) 83 + } 84 + 85 + pub const fn index(self) -> u8 { 86 + self.0 87 + } 88 + } 89 + 90 + pub const D0: FpReg = FpReg(0); 91 + pub const D1: FpReg = FpReg(1); 92 + pub const D2: FpReg = FpReg(2); 93 + pub const D3: FpReg = FpReg(3); 94 + pub const D4: FpReg = FpReg(4); 95 + pub const D5: FpReg = FpReg(5); 96 + pub const D6: FpReg = FpReg(6); 97 + pub const D7: FpReg = FpReg(7); 98 + 99 + // ── Condition codes ────────────────────────────────────────────────────────── 100 + 101 + /// AArch64 condition codes for conditional branches. 102 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 103 + #[repr(u8)] 104 + pub enum Cond { 105 + EQ = 0b0000, // Equal (Z=1) 106 + NE = 0b0001, // Not equal (Z=0) 107 + CS = 0b0010, // Carry set / unsigned higher or same (C=1) 108 + CC = 0b0011, // Carry clear / unsigned lower (C=0) 109 + MI = 0b0100, // Minus / negative (N=1) 110 + PL = 0b0101, // Plus / positive or zero (N=0) 111 + VS = 0b0110, // Overflow (V=1) 112 + VC = 0b0111, // No overflow (V=0) 113 + HI = 0b1000, // Unsigned higher (C=1 && Z=0) 114 + LS = 0b1001, // Unsigned lower or same (C=0 || Z=1) 115 + GE = 0b1010, // Signed greater or equal (N==V) 116 + LT = 0b1011, // Signed less than (N!=V) 117 + GT = 0b1100, // Signed greater than (Z==0 && N==V) 118 + LE = 0b1101, // Signed less or equal (Z==1 || N!=V) 119 + AL = 0b1110, // Always 120 + } 121 + 122 + // ── Labels ─────────────────────────────────────────────────────────────────── 123 + 124 + /// A label for forward/backward branch references. 125 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 126 + pub struct Label(u32); 127 + 128 + /// Tracks pending fixups for forward references. 129 + struct PendingFixup { 130 + /// Byte offset in the code buffer where the branch instruction lives. 131 + instruction_offset: usize, 132 + /// The label being referenced. 133 + label: Label, 134 + /// What kind of fixup is needed. 135 + kind: FixupKind, 136 + } 137 + 138 + #[derive(Debug, Clone, Copy)] 139 + enum FixupKind { 140 + /// B/BL: 26-bit signed offset (imm26), in words 141 + Branch26, 142 + /// B.cond/CBZ/CBNZ: 19-bit signed offset (imm19), in words 143 + Branch19, 144 + } 145 + 146 + // ── Assembler ──────────────────────────────────────────────────────────────── 147 + 148 + /// AArch64 machine code assembler. 149 + /// 150 + /// Emits instructions into an internal byte buffer, then finalizes into 151 + /// executable code via a JitBuffer. 152 + pub struct Assembler { 153 + /// Emitted machine code bytes. 154 + code: Vec<u8>, 155 + /// Label ID → byte offset (None if not yet bound). 156 + labels: Vec<Option<usize>>, 157 + /// Forward references to patch when labels are bound. 158 + fixups: Vec<PendingFixup>, 159 + } 160 + 161 + impl Default for Assembler { 162 + fn default() -> Self { 163 + Self::new() 164 + } 165 + } 166 + 167 + impl Assembler { 168 + pub fn new() -> Self { 169 + Assembler { 170 + code: Vec::with_capacity(1024), 171 + labels: Vec::new(), 172 + fixups: Vec::new(), 173 + } 174 + } 175 + 176 + /// Current byte offset in the code buffer. 177 + pub fn offset(&self) -> usize { 178 + self.code.len() 179 + } 180 + 181 + /// Get the emitted code as a byte slice. 182 + pub fn code(&self) -> &[u8] { 183 + &self.code 184 + } 185 + 186 + /// Create a new unbound label. 187 + pub fn new_label(&mut self) -> Label { 188 + let id = self.labels.len() as u32; 189 + self.labels.push(None); 190 + Label(id) 191 + } 192 + 193 + /// Bind a label to the current code offset. 194 + /// Patches all pending forward references to this label. 195 + pub fn bind_label(&mut self, label: Label) { 196 + let offset = self.code.len(); 197 + assert!( 198 + self.labels[label.0 as usize].is_none(), 199 + "label already bound" 200 + ); 201 + self.labels[label.0 as usize] = Some(offset); 202 + 203 + // Backpatch pending fixups 204 + let fixups: Vec<_> = self 205 + .fixups 206 + .iter() 207 + .filter(|f| f.label == label) 208 + .map(|f| (f.instruction_offset, f.kind)) 209 + .collect(); 210 + 211 + for (inst_offset, kind) in fixups { 212 + let word_offset = (offset as i64 - inst_offset as i64) / 4; 213 + self.patch_branch(inst_offset, word_offset, kind); 214 + } 215 + self.fixups.retain(|f| f.label != label); 216 + } 217 + 218 + /// Finalize: resolve all labels, emit code into a JitBuffer, return CodePtr. 219 + pub fn finalize(&self, buffer: &mut JitBuffer) -> Result<CodePtr, MemoryError> { 220 + assert!(self.fixups.is_empty(), "unresolved forward references"); 221 + buffer.emit_code(&self.code) 222 + } 223 + 224 + /// Finalize and return the raw code bytes (for testing without executable memory). 225 + pub fn finalize_to_vec(&self) -> Vec<u8> { 226 + assert!(self.fixups.is_empty(), "unresolved forward references"); 227 + self.code.clone() 228 + } 229 + 230 + // ── Instruction emission helpers ───────────────────────────────────── 231 + 232 + fn emit_u32(&mut self, inst: u32) { 233 + self.code.extend_from_slice(&inst.to_le_bytes()); 234 + } 235 + 236 + fn patch_branch(&mut self, inst_offset: usize, word_offset: i64, kind: FixupKind) { 237 + let mut inst = 238 + u32::from_le_bytes(self.code[inst_offset..inst_offset + 4].try_into().unwrap()); 239 + 240 + match kind { 241 + FixupKind::Branch26 => { 242 + assert!( 243 + (-(1i64 << 25)..(1i64 << 25)).contains(&word_offset), 244 + "branch26 offset out of range: {}", 245 + word_offset 246 + ); 247 + let imm26 = (word_offset as u32) & 0x03FF_FFFF; 248 + inst = (inst & 0xFC00_0000) | imm26; 249 + } 250 + FixupKind::Branch19 => { 251 + assert!( 252 + (-(1i64 << 18)..(1i64 << 18)).contains(&word_offset), 253 + "branch19 offset out of range: {}", 254 + word_offset 255 + ); 256 + let imm19 = (word_offset as u32) & 0x7FFFF; 257 + inst = (inst & 0xFF00_001F) | (imm19 << 5); 258 + } 259 + } 260 + 261 + self.code[inst_offset..inst_offset + 4].copy_from_slice(&inst.to_le_bytes()); 262 + } 263 + 264 + fn emit_branch_to_label(&mut self, label: Label, kind: FixupKind, base_inst: u32) { 265 + if let Some(target) = self.labels[label.0 as usize] { 266 + // Backward reference — patch immediately 267 + let word_offset = (target as i64 - self.code.len() as i64) / 4; 268 + let inst = match kind { 269 + FixupKind::Branch26 => { 270 + let imm26 = (word_offset as u32) & 0x03FF_FFFF; 271 + (base_inst & 0xFC00_0000) | imm26 272 + } 273 + FixupKind::Branch19 => { 274 + let imm19 = (word_offset as u32) & 0x7FFFF; 275 + (base_inst & 0xFF00_001F) | (imm19 << 5) 276 + } 277 + }; 278 + self.emit_u32(inst); 279 + } else { 280 + // Forward reference — emit placeholder and record fixup 281 + self.fixups.push(PendingFixup { 282 + instruction_offset: self.code.len(), 283 + label, 284 + kind, 285 + }); 286 + self.emit_u32(base_inst); 287 + } 288 + } 289 + 290 + // ── Arithmetic instructions ────────────────────────────────────────── 291 + 292 + /// ADD Xd, Xn, Xm 293 + pub fn add_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 294 + // 1_00_01011_00_0_Rm_000000_Rn_Rd 295 + let inst = 0x8B000000 296 + | ((rm.index() as u32) << 16) 297 + | ((rn.index() as u32) << 5) 298 + | (rd.index() as u32); 299 + self.emit_u32(inst); 300 + } 301 + 302 + /// ADD Xd, Xn, #imm12 303 + pub fn add_imm(&mut self, rd: Reg, rn: Reg, imm12: u16) { 304 + assert!(imm12 < 4096, "immediate must be < 4096"); 305 + // 1_00_100010_0_imm12_Rn_Rd 306 + let inst = 307 + 0x91000000 | ((imm12 as u32) << 10) | ((rn.index() as u32) << 5) | (rd.index() as u32); 308 + self.emit_u32(inst); 309 + } 310 + 311 + /// SUB Xd, Xn, Xm 312 + pub fn sub_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 313 + // 1_10_01011_00_0_Rm_000000_Rn_Rd 314 + let inst = 0xCB000000 315 + | ((rm.index() as u32) << 16) 316 + | ((rn.index() as u32) << 5) 317 + | (rd.index() as u32); 318 + self.emit_u32(inst); 319 + } 320 + 321 + /// SUB Xd, Xn, #imm12 322 + pub fn sub_imm(&mut self, rd: Reg, rn: Reg, imm12: u16) { 323 + assert!(imm12 < 4096, "immediate must be < 4096"); 324 + // 1_10_100010_0_imm12_Rn_Rd 325 + let inst = 326 + 0xD1000000 | ((imm12 as u32) << 10) | ((rn.index() as u32) << 5) | (rd.index() as u32); 327 + self.emit_u32(inst); 328 + } 329 + 330 + /// MUL Xd, Xn, Xm (alias for MADD Xd, Xn, Xm, XZR) 331 + pub fn mul(&mut self, rd: Reg, rn: Reg, rm: Reg) { 332 + // 1_00_11011_000_Rm_0_11111_Rn_Rd (MADD with Ra=XZR) 333 + let inst = 0x9B007C00 334 + | ((rm.index() as u32) << 16) 335 + | ((rn.index() as u32) << 5) 336 + | (rd.index() as u32); 337 + self.emit_u32(inst); 338 + } 339 + 340 + /// SDIV Xd, Xn, Xm 341 + pub fn sdiv(&mut self, rd: Reg, rn: Reg, rm: Reg) { 342 + // 1_00_11010110_Rm_00001_1_Rn_Rd 343 + let inst = 0x9AC00C00 344 + | ((rm.index() as u32) << 16) 345 + | ((rn.index() as u32) << 5) 346 + | (rd.index() as u32); 347 + self.emit_u32(inst); 348 + } 349 + 350 + // ── Logic instructions ─────────────────────────────────────────────── 351 + 352 + /// AND Xd, Xn, Xm 353 + pub fn and_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 354 + // 1_00_01010_00_0_Rm_000000_Rn_Rd 355 + let inst = 0x8A000000 356 + | ((rm.index() as u32) << 16) 357 + | ((rn.index() as u32) << 5) 358 + | (rd.index() as u32); 359 + self.emit_u32(inst); 360 + } 361 + 362 + /// ORR Xd, Xn, Xm 363 + pub fn orr_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 364 + // 1_01_01010_00_0_Rm_000000_Rn_Rd 365 + let inst = 0xAA000000 366 + | ((rm.index() as u32) << 16) 367 + | ((rn.index() as u32) << 5) 368 + | (rd.index() as u32); 369 + self.emit_u32(inst); 370 + } 371 + 372 + /// EOR Xd, Xn, Xm 373 + pub fn eor_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 374 + // 1_10_01010_00_0_Rm_000000_Rn_Rd 375 + let inst = 0xCA000000 376 + | ((rm.index() as u32) << 16) 377 + | ((rn.index() as u32) << 5) 378 + | (rd.index() as u32); 379 + self.emit_u32(inst); 380 + } 381 + 382 + /// LSL Xd, Xn, Xm (alias for LSLV) 383 + pub fn lsl_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 384 + // 1_00_11010110_Rm_00100_0_Rn_Rd 385 + let inst = 0x9AC02000 386 + | ((rm.index() as u32) << 16) 387 + | ((rn.index() as u32) << 5) 388 + | (rd.index() as u32); 389 + self.emit_u32(inst); 390 + } 391 + 392 + /// LSR Xd, Xn, Xm (alias for LSRV) 393 + pub fn lsr_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 394 + // 1_00_11010110_Rm_00100_1_Rn_Rd 395 + let inst = 0x9AC02400 396 + | ((rm.index() as u32) << 16) 397 + | ((rn.index() as u32) << 5) 398 + | (rd.index() as u32); 399 + self.emit_u32(inst); 400 + } 401 + 402 + /// ASR Xd, Xn, Xm (alias for ASRV) 403 + pub fn asr_reg(&mut self, rd: Reg, rn: Reg, rm: Reg) { 404 + // 1_00_11010110_Rm_00101_0_Rn_Rd 405 + let inst = 0x9AC02800 406 + | ((rm.index() as u32) << 16) 407 + | ((rn.index() as u32) << 5) 408 + | (rd.index() as u32); 409 + self.emit_u32(inst); 410 + } 411 + 412 + // ── Memory instructions ────────────────────────────────────────────── 413 + 414 + /// LDR Xd, [Xn, Xm] (register offset) 415 + pub fn ldr_reg(&mut self, rt: Reg, rn: Reg, rm: Reg) { 416 + // 11_111_0_00_01_1_Rm_011_0_10_Rn_Rt (LSL #0) 417 + let inst = 0xF8606800 418 + | ((rm.index() as u32) << 16) 419 + | ((rn.index() as u32) << 5) 420 + | (rt.index() as u32); 421 + self.emit_u32(inst); 422 + } 423 + 424 + /// LDR Xd, [Xn, #imm12] (unsigned offset, scaled by 8) 425 + pub fn ldr_imm(&mut self, rt: Reg, rn: Reg, imm12: u16) { 426 + assert!(imm12.is_multiple_of(8), "offset must be 8-byte aligned"); 427 + let scaled = imm12 / 8; 428 + assert!(scaled < 4096, "scaled offset must be < 4096"); 429 + // 11_111_0_01_01_imm12_Rn_Rt 430 + let inst = 431 + 0xF9400000 | ((scaled as u32) << 10) | ((rn.index() as u32) << 5) | (rt.index() as u32); 432 + self.emit_u32(inst); 433 + } 434 + 435 + /// LDR Xd, [Xn, #simm9]! (pre-index) 436 + pub fn ldr_pre(&mut self, rt: Reg, rn: Reg, simm9: i16) { 437 + assert!( 438 + (-256..=255).contains(&simm9), 439 + "pre-index offset must be -256..255" 440 + ); 441 + let imm9 = (simm9 as u32) & 0x1FF; 442 + // 11_111_0_00_01_0_imm9_11_Rn_Rt 443 + let inst = 0xF8400C00 | (imm9 << 12) | ((rn.index() as u32) << 5) | (rt.index() as u32); 444 + self.emit_u32(inst); 445 + } 446 + 447 + /// LDR Xd, [Xn], #simm9 (post-index) 448 + pub fn ldr_post(&mut self, rt: Reg, rn: Reg, simm9: i16) { 449 + assert!( 450 + (-256..=255).contains(&simm9), 451 + "post-index offset must be -256..255" 452 + ); 453 + let imm9 = (simm9 as u32) & 0x1FF; 454 + // 11_111_0_00_01_0_imm9_01_Rn_Rt 455 + let inst = 0xF8400400 | (imm9 << 12) | ((rn.index() as u32) << 5) | (rt.index() as u32); 456 + self.emit_u32(inst); 457 + } 458 + 459 + /// STR Xd, [Xn, Xm] (register offset) 460 + pub fn str_reg(&mut self, rt: Reg, rn: Reg, rm: Reg) { 461 + // 11_111_0_00_00_1_Rm_011_0_10_Rn_Rt (LSL #0) 462 + let inst = 0xF8206800 463 + | ((rm.index() as u32) << 16) 464 + | ((rn.index() as u32) << 5) 465 + | (rt.index() as u32); 466 + self.emit_u32(inst); 467 + } 468 + 469 + /// STR Xd, [Xn, #imm12] (unsigned offset, scaled by 8) 470 + pub fn str_imm(&mut self, rt: Reg, rn: Reg, imm12: u16) { 471 + assert!(imm12.is_multiple_of(8), "offset must be 8-byte aligned"); 472 + let scaled = imm12 / 8; 473 + assert!(scaled < 4096, "scaled offset must be < 4096"); 474 + // 11_111_0_01_00_imm12_Rn_Rt 475 + let inst = 476 + 0xF9000000 | ((scaled as u32) << 10) | ((rn.index() as u32) << 5) | (rt.index() as u32); 477 + self.emit_u32(inst); 478 + } 479 + 480 + /// STR Xd, [Xn, #simm9]! (pre-index) 481 + pub fn str_pre(&mut self, rt: Reg, rn: Reg, simm9: i16) { 482 + assert!( 483 + (-256..=255).contains(&simm9), 484 + "pre-index offset must be -256..255" 485 + ); 486 + let imm9 = (simm9 as u32) & 0x1FF; 487 + // 11_111_0_00_00_0_imm9_11_Rn_Rt 488 + let inst = 0xF8000C00 | (imm9 << 12) | ((rn.index() as u32) << 5) | (rt.index() as u32); 489 + self.emit_u32(inst); 490 + } 491 + 492 + /// STR Xd, [Xn], #simm9 (post-index) 493 + pub fn str_post(&mut self, rt: Reg, rn: Reg, simm9: i16) { 494 + assert!( 495 + (-256..=255).contains(&simm9), 496 + "post-index offset must be -256..255" 497 + ); 498 + let imm9 = (simm9 as u32) & 0x1FF; 499 + // 11_111_0_00_00_0_imm9_01_Rn_Rt 500 + let inst = 0xF8000400 | (imm9 << 12) | ((rn.index() as u32) << 5) | (rt.index() as u32); 501 + self.emit_u32(inst); 502 + } 503 + 504 + /// STP Xt1, Xt2, [Xn, #imm7]! (pre-index, store pair) 505 + pub fn stp_pre(&mut self, rt1: Reg, rt2: Reg, rn: Reg, simm7: i16) { 506 + assert!(simm7 % 8 == 0, "offset must be 8-byte aligned"); 507 + let scaled = simm7 / 8; 508 + assert!( 509 + (-64..=63).contains(&scaled), 510 + "scaled offset must be -64..63" 511 + ); 512 + let imm7 = (scaled as u32) & 0x7F; 513 + // 10_101_0_011_imm7_Rt2_Rn_Rt1 514 + let inst = 0xA9800000 515 + | (imm7 << 15) 516 + | ((rt2.index() as u32) << 10) 517 + | ((rn.index() as u32) << 5) 518 + | (rt1.index() as u32); 519 + self.emit_u32(inst); 520 + } 521 + 522 + /// LDP Xt1, Xt2, [Xn], #imm7 (post-index, load pair) 523 + pub fn ldp_post(&mut self, rt1: Reg, rt2: Reg, rn: Reg, simm7: i16) { 524 + assert!(simm7 % 8 == 0, "offset must be 8-byte aligned"); 525 + let scaled = simm7 / 8; 526 + assert!( 527 + (-64..=63).contains(&scaled), 528 + "scaled offset must be -64..63" 529 + ); 530 + let imm7 = (scaled as u32) & 0x7F; 531 + // 10_101_0_001_1_imm7_Rt2_Rn_Rt1 532 + let inst = 0xA8C00000 533 + | (imm7 << 15) 534 + | ((rt2.index() as u32) << 10) 535 + | ((rn.index() as u32) << 5) 536 + | (rt1.index() as u32); 537 + self.emit_u32(inst); 538 + } 539 + 540 + // ── Branch instructions ────────────────────────────────────────────── 541 + 542 + /// B <label> (unconditional branch) 543 + pub fn b(&mut self, label: Label) { 544 + // 0_00101_imm26 545 + self.emit_branch_to_label(label, FixupKind::Branch26, 0x14000000); 546 + } 547 + 548 + /// B <offset> (unconditional branch, immediate word offset) 549 + pub fn b_offset(&mut self, word_offset: i32) { 550 + let imm26 = (word_offset as u32) & 0x03FF_FFFF; 551 + self.emit_u32(0x14000000 | imm26); 552 + } 553 + 554 + /// B.cond <label> (conditional branch) 555 + pub fn b_cond(&mut self, cond: Cond, label: Label) { 556 + // 0101010_0_imm19_0_cond 557 + let base = 0x54000000 | (cond as u32); 558 + self.emit_branch_to_label(label, FixupKind::Branch19, base); 559 + } 560 + 561 + /// BL <label> (branch with link — function call) 562 + pub fn bl(&mut self, label: Label) { 563 + // 1_00101_imm26 564 + self.emit_branch_to_label(label, FixupKind::Branch26, 0x94000000); 565 + } 566 + 567 + /// BL <offset> (branch with link, immediate word offset) 568 + pub fn bl_offset(&mut self, word_offset: i32) { 569 + let imm26 = (word_offset as u32) & 0x03FF_FFFF; 570 + self.emit_u32(0x94000000 | imm26); 571 + } 572 + 573 + /// BLR Xn (branch with link to register — indirect call) 574 + pub fn blr(&mut self, rn: Reg) { 575 + // 1101011_0_0_01_11111_0000_0_0_Rn_00000 576 + self.emit_u32(0xD63F0000 | ((rn.index() as u32) << 5)); 577 + } 578 + 579 + /// BR Xn (branch to register — indirect jump) 580 + pub fn br(&mut self, rn: Reg) { 581 + // 1101011_0_0_00_11111_0000_0_0_Rn_00000 582 + self.emit_u32(0xD61F0000 | ((rn.index() as u32) << 5)); 583 + } 584 + 585 + /// RET (return from subroutine, default LR=X30) 586 + pub fn ret(&mut self) { 587 + self.ret_reg(LR); 588 + } 589 + 590 + /// RET Xn (return from subroutine via specified register) 591 + pub fn ret_reg(&mut self, rn: Reg) { 592 + // 1101011_0_0_10_11111_0000_0_0_Rn_00000 593 + self.emit_u32(0xD65F0000 | ((rn.index() as u32) << 5)); 594 + } 595 + 596 + /// CBZ Xt, <label> (compare and branch if zero, 64-bit) 597 + pub fn cbz(&mut self, rt: Reg, label: Label) { 598 + // 1_011010_0_imm19_Rt 599 + let base = 0xB4000000 | (rt.index() as u32); 600 + self.emit_branch_to_label(label, FixupKind::Branch19, base); 601 + } 602 + 603 + /// CBNZ Xt, <label> (compare and branch if not zero, 64-bit) 604 + pub fn cbnz(&mut self, rt: Reg, label: Label) { 605 + // 1_011010_1_imm19_Rt 606 + let base = 0xB5000000 | (rt.index() as u32); 607 + self.emit_branch_to_label(label, FixupKind::Branch19, base); 608 + } 609 + 610 + // ── Comparison instructions ────────────────────────────────────────── 611 + 612 + /// CMP Xn, Xm (alias for SUBS XZR, Xn, Xm) 613 + pub fn cmp_reg(&mut self, rn: Reg, rm: Reg) { 614 + // SUBS XZR, Xn, Xm 615 + // 1_11_01011_00_0_Rm_000000_Rn_11111 616 + let inst = 0xEB00001F | ((rm.index() as u32) << 16) | ((rn.index() as u32) << 5); 617 + self.emit_u32(inst); 618 + } 619 + 620 + /// CMP Xn, #imm12 (alias for SUBS XZR, Xn, #imm12) 621 + pub fn cmp_imm(&mut self, rn: Reg, imm12: u16) { 622 + assert!(imm12 < 4096, "immediate must be < 4096"); 623 + // 1_11_100010_0_imm12_Rn_11111 624 + let inst = 0xF100001F | ((imm12 as u32) << 10) | ((rn.index() as u32) << 5); 625 + self.emit_u32(inst); 626 + } 627 + 628 + /// TST Xn, Xm (alias for ANDS XZR, Xn, Xm) 629 + pub fn tst_reg(&mut self, rn: Reg, rm: Reg) { 630 + // 1_11_01010_00_0_Rm_000000_Rn_11111 631 + let inst = 0xEA00001F | ((rm.index() as u32) << 16) | ((rn.index() as u32) << 5); 632 + self.emit_u32(inst); 633 + } 634 + 635 + // ── Move instructions ──────────────────────────────────────────────── 636 + 637 + /// MOV Xd, Xm (alias for ORR Xd, XZR, Xm) 638 + pub fn mov_reg(&mut self, rd: Reg, rm: Reg) { 639 + self.orr_reg(rd, ZR, rm); 640 + } 641 + 642 + /// MOVZ Xd, #imm16 (move wide with zero, at specified shift) 643 + /// shift must be 0, 16, 32, or 48. 644 + pub fn movz(&mut self, rd: Reg, imm16: u16, shift: u8) { 645 + assert!( 646 + shift == 0 || shift == 16 || shift == 32 || shift == 48, 647 + "shift must be 0, 16, 32, or 48" 648 + ); 649 + let hw = (shift / 16) as u32; 650 + // 1_10_100101_hw_imm16_Rd 651 + let inst = 0xD2800000 | (hw << 21) | ((imm16 as u32) << 5) | (rd.index() as u32); 652 + self.emit_u32(inst); 653 + } 654 + 655 + /// MOVK Xd, #imm16, LSL #shift (move wide with keep) 656 + /// shift must be 0, 16, 32, or 48. 657 + pub fn movk(&mut self, rd: Reg, imm16: u16, shift: u8) { 658 + assert!( 659 + shift == 0 || shift == 16 || shift == 32 || shift == 48, 660 + "shift must be 0, 16, 32, or 48" 661 + ); 662 + let hw = (shift / 16) as u32; 663 + // 1_11_100101_hw_imm16_Rd 664 + let inst = 0xF2800000 | (hw << 21) | ((imm16 as u32) << 5) | (rd.index() as u32); 665 + self.emit_u32(inst); 666 + } 667 + 668 + /// Load a full 64-bit immediate into a register using movz/movk sequence. 669 + /// Uses the minimum number of instructions needed. 670 + pub fn mov_imm64(&mut self, rd: Reg, value: u64) { 671 + let hw0 = value as u16; 672 + let hw1 = (value >> 16) as u16; 673 + let hw2 = (value >> 32) as u16; 674 + let hw3 = (value >> 48) as u16; 675 + 676 + // Find the first non-zero halfword for movz, then movk the rest 677 + let halfwords = [(hw0, 0u8), (hw1, 16), (hw2, 32), (hw3, 48)]; 678 + let non_zero: Vec<(u16, u8)> = halfwords 679 + .iter() 680 + .filter(|(hw, _)| *hw != 0) 681 + .copied() 682 + .collect(); 683 + 684 + if non_zero.is_empty() { 685 + // Value is zero 686 + self.movz(rd, 0, 0); 687 + return; 688 + } 689 + 690 + let (first_hw, first_shift) = non_zero[0]; 691 + self.movz(rd, first_hw, first_shift); 692 + 693 + for &(hw, shift) in &non_zero[1..] { 694 + self.movk(rd, hw, shift); 695 + } 696 + } 697 + 698 + // ── Floating-point instructions ────────────────────────────────────── 699 + 700 + /// FMOV Dd, Dn (double-precision move) 701 + pub fn fmov(&mut self, rd: FpReg, rn: FpReg) { 702 + // 0_00_11110_01_1_000000_10000_Rn_Rd 703 + let inst = 0x1E604000 | ((rn.index() as u32) << 5) | (rd.index() as u32); 704 + self.emit_u32(inst); 705 + } 706 + 707 + /// FMOV Dd, Xn (move general register to FP register, 64-bit) 708 + pub fn fmov_from_gpr(&mut self, rd: FpReg, rn: Reg) { 709 + // 1_00_11110_01_1_00_111_000000_Rn_Rd 710 + let inst = 0x9E670000 | ((rn.index() as u32) << 5) | (rd.index() as u32); 711 + self.emit_u32(inst); 712 + } 713 + 714 + /// FMOV Xd, Dn (move FP register to general register, 64-bit) 715 + pub fn fmov_to_gpr(&mut self, rd: Reg, rn: FpReg) { 716 + // 1_00_11110_01_1_00_110_000000_Rn_Rd 717 + let inst = 0x9E660000 | ((rn.index() as u32) << 5) | (rd.index() as u32); 718 + self.emit_u32(inst); 719 + } 720 + 721 + /// FADD Dd, Dn, Dm (double-precision add) 722 + pub fn fadd(&mut self, rd: FpReg, rn: FpReg, rm: FpReg) { 723 + // 0_00_11110_01_1_Rm_001_0_10_Rn_Rd 724 + let inst = 0x1E602800 725 + | ((rm.index() as u32) << 16) 726 + | ((rn.index() as u32) << 5) 727 + | (rd.index() as u32); 728 + self.emit_u32(inst); 729 + } 730 + 731 + /// FSUB Dd, Dn, Dm (double-precision subtract) 732 + pub fn fsub(&mut self, rd: FpReg, rn: FpReg, rm: FpReg) { 733 + // 0_00_11110_01_1_Rm_001_1_10_Rn_Rd 734 + let inst = 0x1E603800 735 + | ((rm.index() as u32) << 16) 736 + | ((rn.index() as u32) << 5) 737 + | (rd.index() as u32); 738 + self.emit_u32(inst); 739 + } 740 + 741 + /// FMUL Dd, Dn, Dm (double-precision multiply) 742 + pub fn fmul(&mut self, rd: FpReg, rn: FpReg, rm: FpReg) { 743 + // 0_00_11110_01_1_Rm_000_0_10_Rn_Rd 744 + let inst = 0x1E600800 745 + | ((rm.index() as u32) << 16) 746 + | ((rn.index() as u32) << 5) 747 + | (rd.index() as u32); 748 + self.emit_u32(inst); 749 + } 750 + 751 + /// FDIV Dd, Dn, Dm (double-precision divide) 752 + pub fn fdiv(&mut self, rd: FpReg, rn: FpReg, rm: FpReg) { 753 + // 0_00_11110_01_1_Rm_000_1_10_Rn_Rd 754 + let inst = 0x1E601800 755 + | ((rm.index() as u32) << 16) 756 + | ((rn.index() as u32) << 5) 757 + | (rd.index() as u32); 758 + self.emit_u32(inst); 759 + } 760 + 761 + /// FCMP Dn, Dm (double-precision compare, sets NZCV) 762 + pub fn fcmp(&mut self, rn: FpReg, rm: FpReg) { 763 + // 0_00_11110_01_1_Rm_00_1000_Rn_00_000 764 + let inst = 0x1E602000 | ((rm.index() as u32) << 16) | ((rn.index() as u32) << 5); 765 + self.emit_u32(inst); 766 + } 767 + 768 + /// SCVTF Dd, Xn (signed integer to double-precision float) 769 + pub fn scvtf(&mut self, rd: FpReg, rn: Reg) { 770 + // 1_00_11110_01_1_00_010_000000_Rn_Rd 771 + let inst = 0x9E620000 | ((rn.index() as u32) << 5) | (rd.index() as u32); 772 + self.emit_u32(inst); 773 + } 774 + 775 + /// FCVTZS Xd, Dn (double-precision float to signed integer, round toward zero) 776 + pub fn fcvtzs(&mut self, rd: Reg, rn: FpReg) { 777 + // 1_00_11110_01_1_11_000_000000_Rn_Rd 778 + let inst = 0x9E780000 | ((rn.index() as u32) << 5) | (rd.index() as u32); 779 + self.emit_u32(inst); 780 + } 781 + 782 + // ── FP memory instructions ─────────────────────────────────────────── 783 + 784 + /// LDR Dd, [Xn, #imm12] (load double from memory, unsigned offset scaled by 8) 785 + pub fn fp_ldr_imm(&mut self, rt: FpReg, rn: Reg, imm12: u16) { 786 + assert!(imm12.is_multiple_of(8), "offset must be 8-byte aligned"); 787 + let scaled = imm12 / 8; 788 + assert!(scaled < 4096, "scaled offset must be < 4096"); 789 + // 11_111_1_01_01_imm12_Rn_Rt 790 + let inst = 791 + 0xFD400000 | ((scaled as u32) << 10) | ((rn.index() as u32) << 5) | (rt.index() as u32); 792 + self.emit_u32(inst); 793 + } 794 + 795 + /// STR Dd, [Xn, #imm12] (store double to memory, unsigned offset scaled by 8) 796 + pub fn fp_str_imm(&mut self, rt: FpReg, rn: Reg, imm12: u16) { 797 + assert!(imm12.is_multiple_of(8), "offset must be 8-byte aligned"); 798 + let scaled = imm12 / 8; 799 + assert!(scaled < 4096, "scaled offset must be < 4096"); 800 + // 11_111_1_01_00_imm12_Rn_Rt 801 + let inst = 802 + 0xFD000000 | ((scaled as u32) << 10) | ((rn.index() as u32) << 5) | (rt.index() as u32); 803 + self.emit_u32(inst); 804 + } 805 + 806 + // ── Entry/exit stubs ───────────────────────────────────────────────── 807 + 808 + /// Emit a function prologue that saves callee-saved registers and sets up 809 + /// the frame pointer. This is the entry stub for JIT-compiled functions. 810 + /// 811 + /// Saves: FP, LR, X19-X28 (12 registers = 96 bytes) 812 + /// Frame layout after prologue: 813 + /// ```text 814 + /// [SP+88] X28 815 + /// [SP+80] X27 816 + /// [SP+72] X26 817 + /// [SP+64] X25 818 + /// [SP+56] X24 819 + /// [SP+48] X23 820 + /// [SP+40] X22 821 + /// [SP+32] X21 822 + /// [SP+24] X20 823 + /// [SP+16] X19 824 + /// [SP+8] LR 825 + /// [SP+0] FP <-- FP points here 826 + /// ``` 827 + pub fn emit_prologue(&mut self) { 828 + // Save FP and LR, pre-index SP by -96 (room for 12 registers) 829 + self.stp_pre(FP, LR, SP, -96); 830 + // Set up frame pointer 831 + self.mov_reg(FP, SP); 832 + // Save callee-saved registers 833 + self.str_imm(X19, SP, 16); 834 + self.str_imm(X20, SP, 24); 835 + self.str_imm(X21, SP, 32); 836 + self.str_imm(X22, SP, 40); 837 + self.str_imm(X23, SP, 48); 838 + self.str_imm(X24, SP, 56); 839 + self.str_imm(X25, SP, 64); 840 + self.str_imm(X26, SP, 72); 841 + self.str_imm(X27, SP, 80); 842 + self.str_imm(X28, SP, 88); 843 + } 844 + 845 + /// Emit a function epilogue that restores callee-saved registers and returns. 846 + /// This is the exit stub for JIT-compiled functions. 847 + pub fn emit_epilogue(&mut self) { 848 + // Restore callee-saved registers 849 + self.ldr_imm(X19, SP, 16); 850 + self.ldr_imm(X20, SP, 24); 851 + self.ldr_imm(X21, SP, 32); 852 + self.ldr_imm(X22, SP, 40); 853 + self.ldr_imm(X23, SP, 48); 854 + self.ldr_imm(X24, SP, 56); 855 + self.ldr_imm(X25, SP, 64); 856 + self.ldr_imm(X26, SP, 72); 857 + self.ldr_imm(X27, SP, 80); 858 + self.ldr_imm(X28, SP, 88); 859 + // Restore FP and LR, post-index SP by +96 860 + self.ldp_post(FP, LR, SP, 96); 861 + self.ret(); 862 + } 863 + 864 + /// Emit a NOP (no-operation) instruction. 865 + pub fn nop(&mut self) { 866 + self.emit_u32(0xD503201F); 867 + } 868 + 869 + /// Emit a BRK #imm16 (breakpoint) instruction. 870 + pub fn brk(&mut self, imm16: u16) { 871 + // 1101_0100_001_imm16_000_00 872 + self.emit_u32(0xD4200000 | ((imm16 as u32) << 5)); 873 + } 874 + } 875 + 876 + // ── Tests ──────────────────────────────────────────────────────────────────── 877 + 878 + #[cfg(test)] 879 + mod tests { 880 + use super::*; 881 + 882 + /// Helper: assemble one instruction and return the 32-bit encoding. 883 + fn encode_one(f: impl FnOnce(&mut Assembler)) -> u32 { 884 + let mut asm = Assembler::new(); 885 + f(&mut asm); 886 + assert_eq!(asm.code().len(), 4, "expected exactly one instruction"); 887 + u32::from_le_bytes(asm.code().try_into().unwrap()) 888 + } 889 + 890 + // ── Arithmetic ─────────────────────────────────────────────────────── 891 + 892 + #[test] 893 + fn test_add_reg() { 894 + // ADD X0, X1, X2 → 0x8B020020 895 + let inst = encode_one(|a| a.add_reg(X0, X1, X2)); 896 + assert_eq!(inst, 0x8B020020); 897 + } 898 + 899 + #[test] 900 + fn test_add_imm() { 901 + // ADD X0, X1, #42 → 0x9100A820 902 + let inst = encode_one(|a| a.add_imm(X0, X1, 42)); 903 + assert_eq!(inst, 0x9100A820); 904 + } 905 + 906 + #[test] 907 + fn test_sub_reg() { 908 + // SUB X3, X4, X5 → 0xCB050083 909 + let inst = encode_one(|a| a.sub_reg(X3, X4, X5)); 910 + assert_eq!(inst, 0xCB050083); 911 + } 912 + 913 + #[test] 914 + fn test_sub_imm() { 915 + // SUB X0, X1, #10 → 0xD1002820 916 + let inst = encode_one(|a| a.sub_imm(X0, X1, 10)); 917 + assert_eq!(inst, 0xD1002820); 918 + } 919 + 920 + #[test] 921 + fn test_mul() { 922 + // MUL X0, X1, X2 → MADD X0, X1, X2, XZR → 0x9B027C20 923 + let inst = encode_one(|a| a.mul(X0, X1, X2)); 924 + assert_eq!(inst, 0x9B027C20); 925 + } 926 + 927 + #[test] 928 + fn test_sdiv() { 929 + // SDIV X0, X1, X2 → 0x9AC20C20 930 + let inst = encode_one(|a| a.sdiv(X0, X1, X2)); 931 + assert_eq!(inst, 0x9AC20C20); 932 + } 933 + 934 + // ── Logic ──────────────────────────────────────────────────────────── 935 + 936 + #[test] 937 + fn test_and_reg() { 938 + // AND X0, X1, X2 → 0x8A020020 939 + let inst = encode_one(|a| a.and_reg(X0, X1, X2)); 940 + assert_eq!(inst, 0x8A020020); 941 + } 942 + 943 + #[test] 944 + fn test_orr_reg() { 945 + // ORR X0, X1, X2 → 0xAA020020 946 + let inst = encode_one(|a| a.orr_reg(X0, X1, X2)); 947 + assert_eq!(inst, 0xAA020020); 948 + } 949 + 950 + #[test] 951 + fn test_eor_reg() { 952 + // EOR X0, X1, X2 → 0xCA020020 953 + let inst = encode_one(|a| a.eor_reg(X0, X1, X2)); 954 + assert_eq!(inst, 0xCA020020); 955 + } 956 + 957 + #[test] 958 + fn test_lsl_reg() { 959 + // LSL X0, X1, X2 → LSLV → 0x9AC22020 960 + let inst = encode_one(|a| a.lsl_reg(X0, X1, X2)); 961 + assert_eq!(inst, 0x9AC22020); 962 + } 963 + 964 + #[test] 965 + fn test_lsr_reg() { 966 + // LSR X0, X1, X2 → LSRV → 0x9AC22420 967 + let inst = encode_one(|a| a.lsr_reg(X0, X1, X2)); 968 + assert_eq!(inst, 0x9AC22420); 969 + } 970 + 971 + #[test] 972 + fn test_asr_reg() { 973 + // ASR X0, X1, X2 → ASRV → 0x9AC22820 974 + let inst = encode_one(|a| a.asr_reg(X0, X1, X2)); 975 + assert_eq!(inst, 0x9AC22820); 976 + } 977 + 978 + // ── Memory ─────────────────────────────────────────────────────────── 979 + 980 + #[test] 981 + fn test_ldr_reg() { 982 + // LDR X0, [X1, X2] → 0xF8626820 983 + let inst = encode_one(|a| a.ldr_reg(X0, X1, X2)); 984 + assert_eq!(inst, 0xF8626820); 985 + } 986 + 987 + #[test] 988 + fn test_ldr_imm() { 989 + // LDR X0, [X1, #16] → offset=16/8=2 → 0xF9400820 990 + let inst = encode_one(|a| a.ldr_imm(X0, X1, 16)); 991 + assert_eq!(inst, 0xF9400820); 992 + } 993 + 994 + #[test] 995 + fn test_str_imm() { 996 + // STR X0, [X1, #8] → offset=8/8=1 → 0xF9000420 997 + let inst = encode_one(|a| a.str_imm(X0, X1, 8)); 998 + assert_eq!(inst, 0xF9000420); 999 + } 1000 + 1001 + #[test] 1002 + fn test_ldr_pre() { 1003 + // LDR X0, [X1, #-16]! → simm9=-16 → 0xF85F0C20 1004 + let inst = encode_one(|a| a.ldr_pre(X0, X1, -16)); 1005 + let imm9 = ((-16i16) as u32) & 0x1FF; 1006 + let expected = 0xF8400C00 | (imm9 << 12) | (1 << 5) | 0; 1007 + assert_eq!(inst, expected); 1008 + } 1009 + 1010 + #[test] 1011 + fn test_str_post() { 1012 + // STR X0, [X1], #8 → simm9=8 → 0xF8000420 | (8<<12) | (1<<5) | 0 1013 + let inst = encode_one(|a| a.str_post(X0, X1, 8)); 1014 + let expected = 0xF8000400 | (8 << 12) | (1 << 5) | 0; 1015 + assert_eq!(inst, expected); 1016 + } 1017 + 1018 + #[test] 1019 + fn test_stp_pre() { 1020 + // STP FP, LR, [SP, #-96]! → scaled=-12 → imm7 bits 1021 + let inst = encode_one(|a| a.stp_pre(FP, LR, SP, -96)); 1022 + let scaled = ((-12i16) as u32) & 0x7F; 1023 + let expected = 1024 + 0xA9800000 | (scaled << 15) | ((LR.index() as u32) << 10) | ((31u32) << 5) | 29; 1025 + assert_eq!(inst, expected); 1026 + } 1027 + 1028 + #[test] 1029 + fn test_ldp_post() { 1030 + // LDP FP, LR, [SP], #96 → scaled=12 → imm7 bits 1031 + let inst = encode_one(|a| a.ldp_post(FP, LR, SP, 96)); 1032 + let expected = 0xA8C00000 | (12 << 15) | ((LR.index() as u32) << 10) | ((31u32) << 5) | 29; 1033 + assert_eq!(inst, expected); 1034 + } 1035 + 1036 + // ── Branch ─────────────────────────────────────────────────────────── 1037 + 1038 + #[test] 1039 + fn test_ret() { 1040 + // RET → 0xD65F03C0 1041 + let inst = encode_one(|a| a.ret()); 1042 + assert_eq!(inst, 0xD65F03C0); 1043 + } 1044 + 1045 + #[test] 1046 + fn test_blr() { 1047 + // BLR X8 → 0xD63F0100 1048 + let inst = encode_one(|a| a.blr(X8)); 1049 + assert_eq!(inst, 0xD63F0100); 1050 + } 1051 + 1052 + #[test] 1053 + fn test_br() { 1054 + // BR X16 → 0xD61F0200 1055 + let inst = encode_one(|a| a.br(X16)); 1056 + assert_eq!(inst, 0xD61F0200); 1057 + } 1058 + 1059 + #[test] 1060 + fn test_b_offset() { 1061 + // B +4 words → 0x14000004 1062 + let inst = encode_one(|a| a.b_offset(4)); 1063 + assert_eq!(inst, 0x14000004); 1064 + } 1065 + 1066 + #[test] 1067 + fn test_bl_offset() { 1068 + // BL +1 word → 0x94000001 1069 + let inst = encode_one(|a| a.bl_offset(1)); 1070 + assert_eq!(inst, 0x94000001); 1071 + } 1072 + 1073 + // ── Comparison ─────────────────────────────────────────────────────── 1074 + 1075 + #[test] 1076 + fn test_cmp_reg() { 1077 + // CMP X0, X1 → SUBS XZR, X0, X1 → 0xEB01001F 1078 + let inst = encode_one(|a| a.cmp_reg(X0, X1)); 1079 + assert_eq!(inst, 0xEB01001F); 1080 + } 1081 + 1082 + #[test] 1083 + fn test_cmp_imm() { 1084 + // CMP X0, #5 → SUBS XZR, X0, #5 → 0xF100141F 1085 + let inst = encode_one(|a| a.cmp_imm(X0, 5)); 1086 + assert_eq!(inst, 0xF100141F); 1087 + } 1088 + 1089 + #[test] 1090 + fn test_tst_reg() { 1091 + // TST X0, X1 → ANDS XZR, X0, X1 → 0xEA01001F 1092 + let inst = encode_one(|a| a.tst_reg(X0, X1)); 1093 + assert_eq!(inst, 0xEA01001F); 1094 + } 1095 + 1096 + // ── Move ───────────────────────────────────────────────────────────── 1097 + 1098 + #[test] 1099 + fn test_mov_reg() { 1100 + // MOV X0, X1 → ORR X0, XZR, X1 → 0xAA0103E0 1101 + let inst = encode_one(|a| a.mov_reg(X0, X1)); 1102 + assert_eq!(inst, 0xAA0103E0); 1103 + } 1104 + 1105 + #[test] 1106 + fn test_movz() { 1107 + // MOVZ X0, #42 → 0xD2800540 1108 + let inst = encode_one(|a| a.movz(X0, 42, 0)); 1109 + assert_eq!(inst, 0xD2800540); 1110 + } 1111 + 1112 + #[test] 1113 + fn test_movz_shifted() { 1114 + // MOVZ X0, #0xFFFF, LSL #16 → hw=1 1115 + let inst = encode_one(|a| a.movz(X0, 0xFFFF, 16)); 1116 + let expected = 0xD2800000 | (1 << 21) | (0xFFFF << 5) | 0; 1117 + assert_eq!(inst, expected); 1118 + } 1119 + 1120 + #[test] 1121 + fn test_movk() { 1122 + // MOVK X0, #0x1234, LSL #16 → hw=1 1123 + let inst = encode_one(|a| a.movk(X0, 0x1234, 16)); 1124 + let expected = 0xF2800000 | (1 << 21) | (0x1234 << 5) | 0; 1125 + assert_eq!(inst, expected); 1126 + } 1127 + 1128 + #[test] 1129 + fn test_mov_imm64_small() { 1130 + // Small value: just one MOVZ 1131 + let mut asm = Assembler::new(); 1132 + asm.mov_imm64(X0, 42); 1133 + assert_eq!(asm.code().len(), 4); // Single MOVZ 1134 + let inst = u32::from_le_bytes(asm.code().try_into().unwrap()); 1135 + assert_eq!(inst, 0xD2800540); // MOVZ X0, #42 1136 + } 1137 + 1138 + #[test] 1139 + fn test_mov_imm64_large() { 1140 + // 0xDEAD_BEEF_CAFE_BABE needs all four halfwords 1141 + let mut asm = Assembler::new(); 1142 + asm.mov_imm64(X0, 0xDEAD_BEEF_CAFE_BABE); 1143 + // 4 non-zero halfwords → 4 instructions (1 movz + 3 movk) 1144 + assert_eq!(asm.code().len(), 16); 1145 + } 1146 + 1147 + #[test] 1148 + fn test_mov_imm64_zero() { 1149 + let mut asm = Assembler::new(); 1150 + asm.mov_imm64(X0, 0); 1151 + assert_eq!(asm.code().len(), 4); // Single MOVZ X0, #0 1152 + } 1153 + 1154 + // ── Floating-point ─────────────────────────────────────────────────── 1155 + 1156 + #[test] 1157 + fn test_fadd() { 1158 + // FADD D0, D1, D2 → 0x1E622820 1159 + let inst = encode_one(|a| a.fadd(D0, D1, D2)); 1160 + assert_eq!(inst, 0x1E622820); 1161 + } 1162 + 1163 + #[test] 1164 + fn test_fsub() { 1165 + // FSUB D0, D1, D2 → 0x1E623820 1166 + let inst = encode_one(|a| a.fsub(D0, D1, D2)); 1167 + assert_eq!(inst, 0x1E623820); 1168 + } 1169 + 1170 + #[test] 1171 + fn test_fmul() { 1172 + // FMUL D0, D1, D2 → 0x1E620820 1173 + let inst = encode_one(|a| a.fmul(D0, D1, D2)); 1174 + assert_eq!(inst, 0x1E620820); 1175 + } 1176 + 1177 + #[test] 1178 + fn test_fdiv() { 1179 + // FDIV D0, D1, D2 → 0x1E621820 1180 + let inst = encode_one(|a| a.fdiv(D0, D1, D2)); 1181 + assert_eq!(inst, 0x1E621820); 1182 + } 1183 + 1184 + #[test] 1185 + fn test_fcmp() { 1186 + // FCMP D0, D1 → 0x1E612000 1187 + let inst = encode_one(|a| a.fcmp(D0, D1)); 1188 + assert_eq!(inst, 0x1E612000); 1189 + } 1190 + 1191 + #[test] 1192 + fn test_scvtf() { 1193 + // SCVTF D0, X0 → 0x9E620000 1194 + let inst = encode_one(|a| a.scvtf(D0, X0)); 1195 + assert_eq!(inst, 0x9E620000); 1196 + } 1197 + 1198 + #[test] 1199 + fn test_fcvtzs() { 1200 + // FCVTZS X0, D0 → 0x9E780000 1201 + let inst = encode_one(|a| a.fcvtzs(X0, D0)); 1202 + assert_eq!(inst, 0x9E780000); 1203 + } 1204 + 1205 + #[test] 1206 + fn test_fmov() { 1207 + // FMOV D0, D1 → 0x1E604020 1208 + let inst = encode_one(|a| a.fmov(D0, D1)); 1209 + assert_eq!(inst, 0x1E604020); 1210 + } 1211 + 1212 + #[test] 1213 + fn test_fmov_from_gpr() { 1214 + // FMOV D0, X0 → 0x9E670000 1215 + let inst = encode_one(|a| a.fmov_from_gpr(D0, X0)); 1216 + assert_eq!(inst, 0x9E670000); 1217 + } 1218 + 1219 + #[test] 1220 + fn test_fmov_to_gpr() { 1221 + // FMOV X0, D0 → 0x9E660000 1222 + let inst = encode_one(|a| a.fmov_to_gpr(X0, D0)); 1223 + assert_eq!(inst, 0x9E660000); 1224 + } 1225 + 1226 + // ── Labels and branches ────────────────────────────────────────────── 1227 + 1228 + #[test] 1229 + fn test_backward_branch() { 1230 + let mut asm = Assembler::new(); 1231 + let top = asm.new_label(); 1232 + asm.bind_label(top); // offset 0 1233 + asm.nop(); // offset 0, 4 bytes 1234 + asm.b(top); // offset 4, branch back to 0 → offset = -1 word 1235 + let code = asm.finalize_to_vec(); 1236 + let branch_inst = u32::from_le_bytes(code[4..8].try_into().unwrap()); 1237 + // B -1 → imm26 = 0x03FFFFFF (2's complement of -1 masked to 26 bits) 1238 + assert_eq!(branch_inst, 0x14000000 | (0x03FFFFFFu32)); 1239 + } 1240 + 1241 + #[test] 1242 + fn test_forward_branch() { 1243 + let mut asm = Assembler::new(); 1244 + let skip = asm.new_label(); 1245 + asm.b(skip); // offset 0, forward ref 1246 + asm.nop(); // offset 4 1247 + asm.bind_label(skip); // offset 8 → branch offset = +2 words 1248 + let code = asm.finalize_to_vec(); 1249 + let branch_inst = u32::from_le_bytes(code[0..4].try_into().unwrap()); 1250 + assert_eq!(branch_inst, 0x14000002); // B +2 1251 + } 1252 + 1253 + #[test] 1254 + fn test_conditional_branch() { 1255 + let mut asm = Assembler::new(); 1256 + let target = asm.new_label(); 1257 + asm.cmp_imm(X0, 0); // offset 0 1258 + asm.b_cond(Cond::EQ, target); // offset 4, forward ref 1259 + asm.nop(); // offset 8 1260 + asm.bind_label(target); // offset 12 → branch offset = +2 words 1261 + let code = asm.finalize_to_vec(); 1262 + let branch_inst = u32::from_le_bytes(code[4..8].try_into().unwrap()); 1263 + // B.EQ +2 → 0x54000040 (imm19=2, cond=0) 1264 + assert_eq!(branch_inst, 0x54000040); 1265 + } 1266 + 1267 + #[test] 1268 + fn test_cbz() { 1269 + let mut asm = Assembler::new(); 1270 + let target = asm.new_label(); 1271 + asm.cbz(X0, target); // offset 0, forward ref 1272 + asm.nop(); // offset 4 1273 + asm.bind_label(target); // offset 8 → branch offset = +2 words 1274 + let code = asm.finalize_to_vec(); 1275 + let inst = u32::from_le_bytes(code[0..4].try_into().unwrap()); 1276 + // CBZ X0, +2 → imm19=2 → 0xB4000040 1277 + assert_eq!(inst, 0xB4000040); 1278 + } 1279 + 1280 + #[test] 1281 + fn test_cbnz() { 1282 + let mut asm = Assembler::new(); 1283 + let target = asm.new_label(); 1284 + asm.cbnz(X1, target); // offset 0 1285 + asm.nop(); // offset 4 1286 + asm.nop(); // offset 8 1287 + asm.bind_label(target); // offset 12 → +3 words 1288 + let code = asm.finalize_to_vec(); 1289 + let inst = u32::from_le_bytes(code[0..4].try_into().unwrap()); 1290 + // CBNZ X1, +3 → imm19=3 → 0xB5000061 1291 + assert_eq!(inst, 0xB5000061); 1292 + } 1293 + 1294 + // ── Misc ───────────────────────────────────────────────────────────── 1295 + 1296 + #[test] 1297 + fn test_nop() { 1298 + let inst = encode_one(|a| a.nop()); 1299 + assert_eq!(inst, 0xD503201F); 1300 + } 1301 + 1302 + #[test] 1303 + fn test_brk() { 1304 + // BRK #0 → 0xD4200000 1305 + let inst = encode_one(|a| a.brk(0)); 1306 + assert_eq!(inst, 0xD4200000); 1307 + // BRK #1 → 0xD4200020 1308 + let inst = encode_one(|a| a.brk(1)); 1309 + assert_eq!(inst, 0xD4200020); 1310 + } 1311 + 1312 + // ── Prologue/epilogue ──────────────────────────────────────────────── 1313 + 1314 + #[test] 1315 + fn test_prologue_epilogue_size() { 1316 + let mut asm = Assembler::new(); 1317 + asm.emit_prologue(); 1318 + let prologue_size = asm.offset(); 1319 + asm.emit_epilogue(); 1320 + let total_size = asm.offset(); 1321 + 1322 + // Prologue: STP + MOV + 10 STR = 12 instructions = 48 bytes 1323 + assert_eq!(prologue_size, 48); 1324 + // Epilogue: 10 LDR + LDP + RET = 12 instructions = 48 bytes 1325 + assert_eq!(total_size - prologue_size, 48); 1326 + } 1327 + 1328 + // ── Execution tests (run actual JIT code) ──────────────────────────── 1329 + 1330 + #[test] 1331 + fn test_execute_add() { 1332 + let mut asm = Assembler::new(); 1333 + asm.emit_prologue(); 1334 + // x0 already has first arg, add 5 1335 + asm.add_imm(X0, X0, 5); 1336 + asm.emit_epilogue(); 1337 + 1338 + let mut buf = JitBuffer::new().expect("buffer"); 1339 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1340 + let result = unsafe { ptr.call_u64_to_u64(10) }; 1341 + assert_eq!(result, 15); 1342 + } 1343 + 1344 + #[test] 1345 + fn test_execute_multiply() { 1346 + let mut asm = Assembler::new(); 1347 + asm.emit_prologue(); 1348 + // return x0 * x1 1349 + asm.mul(X0, X0, X1); 1350 + asm.emit_epilogue(); 1351 + 1352 + let mut buf = JitBuffer::new().expect("buffer"); 1353 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1354 + let result = unsafe { ptr.call_u64x2_to_u64(7, 6) }; 1355 + assert_eq!(result, 42); 1356 + } 1357 + 1358 + #[test] 1359 + fn test_execute_branch_loop() { 1360 + // Compute sum 1..=10 using a loop 1361 + let mut asm = Assembler::new(); 1362 + asm.emit_prologue(); 1363 + 1364 + // x19 = counter (1..=10), x20 = accumulator 1365 + asm.movz(X19, 1, 0); // counter = 1 1366 + asm.movz(X20, 0, 0); // sum = 0 1367 + asm.movz(X21, 10, 0); // limit = 10 1368 + 1369 + let loop_top = asm.new_label(); 1370 + let loop_end = asm.new_label(); 1371 + 1372 + asm.bind_label(loop_top); 1373 + asm.cmp_reg(X19, X21); 1374 + asm.b_cond(Cond::GT, loop_end); // if counter > 10, exit 1375 + 1376 + asm.add_reg(X20, X20, X19); // sum += counter 1377 + asm.add_imm(X19, X19, 1); // counter++ 1378 + asm.b(loop_top); 1379 + 1380 + asm.bind_label(loop_end); 1381 + asm.mov_reg(X0, X20); // return sum 1382 + 1383 + asm.emit_epilogue(); 1384 + 1385 + let mut buf = JitBuffer::new().expect("buffer"); 1386 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1387 + let result = unsafe { ptr.call_void_to_u64() }; 1388 + assert_eq!(result, 55); // 1+2+...+10 = 55 1389 + } 1390 + 1391 + #[test] 1392 + fn test_execute_conditional() { 1393 + // if (x0 == 0) return 100 else return 200 1394 + let mut asm = Assembler::new(); 1395 + asm.emit_prologue(); 1396 + 1397 + let is_zero = asm.new_label(); 1398 + let done = asm.new_label(); 1399 + 1400 + asm.cbz(X0, is_zero); 1401 + asm.movz(X0, 200, 0); 1402 + asm.b(done); 1403 + asm.bind_label(is_zero); 1404 + asm.movz(X0, 100, 0); 1405 + asm.bind_label(done); 1406 + 1407 + asm.emit_epilogue(); 1408 + 1409 + let mut buf = JitBuffer::new().expect("buffer"); 1410 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1411 + assert_eq!(unsafe { ptr.call_u64_to_u64(0) }, 100); 1412 + assert_eq!(unsafe { ptr.call_u64_to_u64(1) }, 200); 1413 + assert_eq!(unsafe { ptr.call_u64_to_u64(42) }, 200); 1414 + } 1415 + 1416 + #[test] 1417 + fn test_execute_memory_ops() { 1418 + // Store a value to stack, load it back 1419 + let mut asm = Assembler::new(); 1420 + asm.emit_prologue(); 1421 + 1422 + // Allocate 16 bytes on stack 1423 + asm.sub_imm(SP, SP, 16); 1424 + // Store x0 at [SP] 1425 + asm.str_imm(X0, SP, 0); 1426 + // Load it back into x1 1427 + asm.ldr_imm(X1, SP, 0); 1428 + // Add 1 to prove we loaded the right value 1429 + asm.add_imm(X0, X1, 1); 1430 + // Deallocate 1431 + asm.add_imm(SP, SP, 16); 1432 + 1433 + asm.emit_epilogue(); 1434 + 1435 + let mut buf = JitBuffer::new().expect("buffer"); 1436 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1437 + let result = unsafe { ptr.call_u64_to_u64(99) }; 1438 + assert_eq!(result, 100); // 99 + 1 1439 + } 1440 + 1441 + #[test] 1442 + fn test_execute_mov_imm64() { 1443 + let mut asm = Assembler::new(); 1444 + asm.emit_prologue(); 1445 + asm.mov_imm64(X0, 0x0000_DEAD_0000_BEEF); 1446 + asm.emit_epilogue(); 1447 + 1448 + let mut buf = JitBuffer::new().expect("buffer"); 1449 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1450 + let result = unsafe { ptr.call_void_to_u64() }; 1451 + assert_eq!(result, 0x0000_DEAD_0000_BEEF); 1452 + } 1453 + 1454 + #[test] 1455 + fn test_execute_fp_arithmetic() { 1456 + // Return 3.0 + 4.0 as u64 bits 1457 + let mut asm = Assembler::new(); 1458 + asm.emit_prologue(); 1459 + 1460 + // Load 3.0 into D0: move bits via GPR 1461 + asm.mov_imm64(X0, 3.0f64.to_bits()); 1462 + asm.fmov_from_gpr(D0, X0); 1463 + 1464 + // Load 4.0 into D1 1465 + asm.mov_imm64(X0, 4.0f64.to_bits()); 1466 + asm.fmov_from_gpr(D1, X0); 1467 + 1468 + // D0 = D0 + D1 (3.0 + 4.0 = 7.0) 1469 + asm.fadd(D0, D0, D1); 1470 + 1471 + // Move result back to X0 as raw bits 1472 + asm.fmov_to_gpr(X0, D0); 1473 + 1474 + asm.emit_epilogue(); 1475 + 1476 + let mut buf = JitBuffer::new().expect("buffer"); 1477 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1478 + let result = unsafe { ptr.call_void_to_u64() }; 1479 + let value = f64::from_bits(result); 1480 + assert_eq!(value, 7.0); 1481 + } 1482 + 1483 + #[test] 1484 + fn test_execute_int_to_float_roundtrip() { 1485 + // Convert 42 (int) to float, then back to int 1486 + let mut asm = Assembler::new(); 1487 + asm.emit_prologue(); 1488 + 1489 + asm.movz(X0, 42, 0); 1490 + asm.scvtf(D0, X0); // D0 = 42.0 1491 + asm.fcvtzs(X0, D0); // X0 = 42 1492 + 1493 + asm.emit_epilogue(); 1494 + 1495 + let mut buf = JitBuffer::new().expect("buffer"); 1496 + let ptr = asm.finalize(&mut buf).expect("finalize"); 1497 + let result = unsafe { ptr.call_void_to_u64() }; 1498 + assert_eq!(result, 42); 1499 + } 1500 + }

+218

crates/js/src/jit/buffer.rs

··· 1 + //! JIT code buffer: manages executable memory pages and compiled code regions. 2 + 3 + use super::memory::{ExecutableMemory, MemoryError}; 4 + 5 + /// Default code page size: 64 KiB. 6 + const DEFAULT_PAGE_SIZE: usize = 64 * 1024; 7 + 8 + /// A compiled code entry point that can be called. 9 + /// 10 + /// Wraps a function pointer into JIT-compiled code. The caller is responsible 11 + /// for ensuring the underlying memory remains valid (not freed) while the 12 + /// CodePtr is in use. 13 + #[derive(Clone, Copy)] 14 + pub struct CodePtr { 15 + ptr: *const u8, 16 + } 17 + 18 + impl CodePtr { 19 + /// Create a new CodePtr from a raw pointer to executable code. 20 + /// 21 + /// # Safety 22 + /// The pointer must point to valid AArch64 machine code in executable memory. 23 + pub unsafe fn new(ptr: *const u8) -> Self { 24 + CodePtr { ptr } 25 + } 26 + 27 + /// Get the raw function pointer. 28 + pub fn as_ptr(&self) -> *const u8 { 29 + self.ptr 30 + } 31 + 32 + /// Call this code as a function that takes no arguments and returns a u64. 33 + /// 34 + /// # Safety 35 + /// The code must be valid AArch64 machine code that follows the calling 36 + /// convention for `extern "C" fn() -> u64`. 37 + pub unsafe fn call_void_to_u64(&self) -> u64 { 38 + let func: extern "C" fn() -> u64 = std::mem::transmute(self.ptr); 39 + func() 40 + } 41 + 42 + /// Call this code as a function that takes one u64 argument and returns u64. 43 + /// 44 + /// # Safety 45 + /// The code must be valid AArch64 machine code that follows the calling 46 + /// convention for `extern "C" fn(u64) -> u64`. 47 + pub unsafe fn call_u64_to_u64(&self, arg: u64) -> u64 { 48 + let func: extern "C" fn(u64) -> u64 = std::mem::transmute(self.ptr); 49 + func(arg) 50 + } 51 + 52 + /// Call this code as a function that takes two u64 arguments and returns u64. 53 + /// 54 + /// # Safety 55 + /// The code must be valid AArch64 machine code that follows the calling 56 + /// convention for `extern "C" fn(u64, u64) -> u64`. 57 + pub unsafe fn call_u64x2_to_u64(&self, arg0: u64, arg1: u64) -> u64 { 58 + let func: extern "C" fn(u64, u64) -> u64 = std::mem::transmute(self.ptr); 59 + func(arg0, arg1) 60 + } 61 + } 62 + 63 + // SAFETY: CodePtr is a plain function pointer — safe to send across threads. 64 + unsafe impl Send for CodePtr {} 65 + unsafe impl Sync for CodePtr {} 66 + 67 + /// Manages allocated code pages and tracks used/free space. 68 + /// 69 + /// Each page is an independently allocated region of executable memory. 70 + /// Code is appended linearly within a page. When a page is full, a new 71 + /// page is allocated. 72 + pub struct JitBuffer { 73 + /// All allocated code pages. 74 + pages: Vec<ExecutableMemory>, 75 + /// Byte offset of the next free byte in the current (last) page. 76 + offset: usize, 77 + /// Size of each page allocation. 78 + page_size: usize, 79 + } 80 + 81 + impl JitBuffer { 82 + /// Create a new JitBuffer with the default page size (64 KiB). 83 + pub fn new() -> Result<Self, MemoryError> { 84 + Self::with_page_size(DEFAULT_PAGE_SIZE) 85 + } 86 + 87 + /// Create a new JitBuffer with a custom page size. 88 + pub fn with_page_size(page_size: usize) -> Result<Self, MemoryError> { 89 + let page = ExecutableMemory::allocate(page_size)?; 90 + Ok(JitBuffer { 91 + pages: vec![page], 92 + offset: 0, 93 + page_size, 94 + }) 95 + } 96 + 97 + /// Write compiled code into the buffer and return a CodePtr to it. 98 + /// 99 + /// The code is written in writable mode, then the page is made executable. 100 + /// If the current page doesn't have enough space, a new page is allocated. 101 + pub fn emit_code(&mut self, code: &[u8]) -> Result<CodePtr, MemoryError> { 102 + // Need a new page? 103 + if self.offset + code.len() > self.current_page().size() { 104 + self.allocate_new_page()?; 105 + } 106 + 107 + let page = self.pages.last_mut().unwrap(); 108 + page.make_writable(); 109 + let code_offset = self.offset; 110 + page.write_at(code_offset, code)?; 111 + self.offset += code.len(); 112 + page.make_executable(); 113 + 114 + unsafe { Ok(CodePtr::new(page.as_ptr().add(code_offset))) } 115 + } 116 + 117 + /// Total bytes of code emitted across all pages. 118 + pub fn total_code_size(&self) -> usize { 119 + if self.pages.is_empty() { 120 + return 0; 121 + } 122 + (self.pages.len() - 1) * self.page_size + self.offset 123 + } 124 + 125 + /// Available bytes in the current page. 126 + pub fn available(&self) -> usize { 127 + self.current_page().size() - self.offset 128 + } 129 + 130 + /// Number of allocated pages. 131 + pub fn page_count(&self) -> usize { 132 + self.pages.len() 133 + } 134 + 135 + fn current_page(&self) -> &ExecutableMemory { 136 + self.pages.last().unwrap() 137 + } 138 + 139 + fn allocate_new_page(&mut self) -> Result<(), MemoryError> { 140 + let page = ExecutableMemory::allocate(self.page_size)?; 141 + self.pages.push(page); 142 + self.offset = 0; 143 + Ok(()) 144 + } 145 + } 146 + 147 + #[cfg(test)] 148 + mod tests { 149 + use super::*; 150 + 151 + #[test] 152 + fn emit_and_call() { 153 + let mut buf = JitBuffer::new().expect("buffer creation failed"); 154 + // movz x0, #7; ret 155 + let code: [u8; 8] = [ 156 + 0xE0, 0x00, 0x80, 0xD2, // movz x0, #7 157 + 0xC0, 0x03, 0x5F, 0xD6, // ret 158 + ]; 159 + let ptr = buf.emit_code(&code).expect("emit failed"); 160 + let result = unsafe { ptr.call_void_to_u64() }; 161 + assert_eq!(result, 7); 162 + } 163 + 164 + #[test] 165 + fn multiple_functions() { 166 + let mut buf = JitBuffer::new().expect("buffer creation failed"); 167 + 168 + // Function 1: return 10 169 + let code1: [u8; 8] = [ 170 + 0x40, 0x01, 0x80, 0xD2, // movz x0, #10 171 + 0xC0, 0x03, 0x5F, 0xD6, // ret 172 + ]; 173 + let ptr1 = buf.emit_code(&code1).expect("emit1 failed"); 174 + 175 + // Function 2: return 20 176 + let code2: [u8; 8] = [ 177 + 0x80, 0x02, 0x80, 0xD2, // movz x0, #20 178 + 0xC0, 0x03, 0x5F, 0xD6, // ret 179 + ]; 180 + let ptr2 = buf.emit_code(&code2).expect("emit2 failed"); 181 + 182 + assert_eq!(unsafe { ptr1.call_void_to_u64() }, 10); 183 + assert_eq!(unsafe { ptr2.call_void_to_u64() }, 20); 184 + assert_eq!(buf.total_code_size(), 16); 185 + } 186 + 187 + #[test] 188 + fn page_overflow_allocates_new_page() { 189 + // Use a tiny page size to force overflow 190 + let mut buf = JitBuffer::with_page_size(4096).expect("buffer creation failed"); 191 + assert_eq!(buf.page_count(), 1); 192 + 193 + // Fill the page with ret instructions 194 + let ret_inst = [0xC0u8, 0x03, 0x5F, 0xD6]; // ret 195 + let count = 4096 / 4; 196 + for _ in 0..count { 197 + buf.emit_code(&ret_inst).expect("emit failed"); 198 + } 199 + assert_eq!(buf.page_count(), 1); 200 + 201 + // One more should trigger a new page 202 + buf.emit_code(&ret_inst).expect("emit failed"); 203 + assert_eq!(buf.page_count(), 2); 204 + } 205 + 206 + #[test] 207 + fn code_ptr_with_argument() { 208 + let mut buf = JitBuffer::new().expect("buffer creation failed"); 209 + // add x0, x0, #5; ret (return arg + 5) 210 + let code: [u8; 8] = [ 211 + 0x00, 0x14, 0x00, 0x91, // add x0, x0, #5 212 + 0xC0, 0x03, 0x5F, 0xD6, // ret 213 + ]; 214 + let ptr = buf.emit_code(&code).expect("emit failed"); 215 + let result = unsafe { ptr.call_u64_to_u64(10) }; 216 + assert_eq!(result, 15); 217 + } 218 + }

+220

crates/js/src/jit/memory.rs

··· 1 + //! Executable memory allocation using mmap/mprotect with W^X policy. 2 + //! 3 + //! Memory is allocated with write permissions for code emission, then flipped 4 + //! to read+execute before the code is called. This follows the Write XOR 5 + //! Execute (W^X) security policy. 6 + 7 + use std::ptr; 8 + 9 + // macOS mmap/mprotect constants and FFI 10 + mod ffi { 11 + pub const PROT_READ: i32 = 0x01; 12 + pub const PROT_WRITE: i32 = 0x02; 13 + pub const PROT_EXEC: i32 = 0x04; 14 + 15 + pub const MAP_PRIVATE: i32 = 0x0002; 16 + pub const MAP_ANONYMOUS: i32 = 0x1000; 17 + pub const MAP_JIT: i32 = 0x0800; 18 + 19 + pub const MAP_FAILED: *mut u8 = !0usize as *mut u8; 20 + 21 + extern "C" { 22 + pub fn mmap( 23 + addr: *mut u8, 24 + len: usize, 25 + prot: i32, 26 + flags: i32, 27 + fd: i32, 28 + offset: i64, 29 + ) -> *mut u8; 30 + pub fn munmap(addr: *mut u8, len: usize) -> i32; 31 + 32 + // macOS specific: required for MAP_JIT pages on Apple Silicon 33 + pub fn pthread_jit_write_protect_np(enabled: i32); 34 + } 35 + } 36 + 37 + /// A region of memory that can be made executable. 38 + /// 39 + /// Manages the lifecycle of mmap'd pages and provides W^X transitions. 40 + pub struct ExecutableMemory { 41 + ptr: *mut u8, 42 + size: usize, 43 + } 44 + 45 + impl ExecutableMemory { 46 + /// Allocate `size` bytes of memory (rounded up to page size). 47 + /// The memory starts with read+write permissions for code emission. 48 + pub fn allocate(size: usize) -> Result<Self, MemoryError> { 49 + let page_size = 4096usize; // AArch64 page size 50 + let size = (size + page_size - 1) & !(page_size - 1); 51 + 52 + // On Apple Silicon with MAP_JIT, we allocate with RWX and use 53 + // pthread_jit_write_protect_np to toggle between W and X. 54 + let prot = ffi::PROT_READ | ffi::PROT_WRITE | ffi::PROT_EXEC; 55 + let flags = ffi::MAP_PRIVATE | ffi::MAP_ANONYMOUS | ffi::MAP_JIT; 56 + 57 + let ptr = unsafe { ffi::mmap(ptr::null_mut(), size, prot, flags, -1, 0) }; 58 + 59 + if ptr == ffi::MAP_FAILED { 60 + return Err(MemoryError::AllocationFailed); 61 + } 62 + 63 + // Start in writable mode 64 + unsafe { 65 + ffi::pthread_jit_write_protect_np(0); 66 + } 67 + 68 + Ok(ExecutableMemory { ptr, size }) 69 + } 70 + 71 + /// Get a mutable pointer to the memory for writing code. 72 + /// Must be in writable mode (call `make_writable` first if needed). 73 + pub fn as_mut_ptr(&mut self) -> *mut u8 { 74 + self.ptr 75 + } 76 + 77 + /// Get an immutable pointer to the memory. 78 + pub fn as_ptr(&self) -> *const u8 { 79 + self.ptr 80 + } 81 + 82 + /// Total allocated size in bytes. 83 + pub fn size(&self) -> usize { 84 + self.size 85 + } 86 + 87 + /// Switch to writable mode (disable execute permission on this thread). 88 + /// On Apple Silicon, this uses pthread_jit_write_protect_np. 89 + pub fn make_writable(&self) { 90 + unsafe { 91 + ffi::pthread_jit_write_protect_np(0); 92 + } 93 + } 94 + 95 + /// Switch to executable mode (disable write permission on this thread). 96 + /// Must be called before executing any code written to this memory. 97 + pub fn make_executable(&self) { 98 + unsafe { 99 + ffi::pthread_jit_write_protect_np(1); 100 + // Instruction cache flush — required on AArch64 after writing code 101 + self.flush_icache(); 102 + } 103 + } 104 + 105 + /// Flush the instruction cache for the entire allocated region. 106 + /// Required on AArch64 because instruction and data caches are not coherent. 107 + unsafe fn flush_icache(&self) { 108 + // Use the system call that works on macOS/ARM64 109 + extern "C" { 110 + fn sys_icache_invalidate(start: *const u8, size: usize); 111 + } 112 + sys_icache_invalidate(self.ptr, self.size); 113 + } 114 + 115 + /// Write bytes to the memory at the given offset. 116 + /// Returns error if the write would exceed the allocated size. 117 + pub fn write_at(&mut self, offset: usize, data: &[u8]) -> Result<(), MemoryError> { 118 + if offset + data.len() > self.size { 119 + return Err(MemoryError::OutOfBounds); 120 + } 121 + unsafe { 122 + ptr::copy_nonoverlapping(data.as_ptr(), self.ptr.add(offset), data.len()); 123 + } 124 + Ok(()) 125 + } 126 + } 127 + 128 + impl Drop for ExecutableMemory { 129 + fn drop(&mut self) { 130 + unsafe { 131 + ffi::munmap(self.ptr, self.size); 132 + } 133 + } 134 + } 135 + 136 + // SAFETY: The memory region is owned and only accessed through &mut self for writes. 137 + // The raw pointer is not shared — only one ExecutableMemory instance owns each region. 138 + unsafe impl Send for ExecutableMemory {} 139 + 140 + /// Errors from executable memory operations. 141 + #[derive(Debug, Clone, PartialEq, Eq)] 142 + pub enum MemoryError { 143 + /// mmap failed to allocate memory 144 + AllocationFailed, 145 + /// Write would exceed allocated memory bounds 146 + OutOfBounds, 147 + } 148 + 149 + impl std::fmt::Display for MemoryError { 150 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 151 + match self { 152 + MemoryError::AllocationFailed => write!(f, "failed to allocate executable memory"), 153 + MemoryError::OutOfBounds => write!(f, "write exceeds allocated memory bounds"), 154 + } 155 + } 156 + } 157 + 158 + #[cfg(test)] 159 + mod tests { 160 + use super::*; 161 + 162 + #[test] 163 + fn allocate_and_write() { 164 + let mut mem = ExecutableMemory::allocate(4096).expect("allocation failed"); 165 + assert!(mem.size() >= 4096); 166 + // Write a ret instruction (0xD65F03C0) 167 + mem.write_at(0, &[0xC0, 0x03, 0x5F, 0xD6]) 168 + .expect("write failed"); 169 + } 170 + 171 + #[test] 172 + fn write_out_of_bounds() { 173 + let mut mem = ExecutableMemory::allocate(4096).expect("allocation failed"); 174 + let result = mem.write_at(4095, &[0x00, 0x00]); 175 + assert_eq!(result, Err(MemoryError::OutOfBounds)); 176 + } 177 + 178 + #[test] 179 + fn execute_ret_stub() { 180 + let mut mem = ExecutableMemory::allocate(4096).expect("allocation failed"); 181 + // AArch64: mov x0, #42; ret 182 + // movz x0, #42 = 0xD2800540 183 + // ret = 0xD65F03C0 184 + let code: [u8; 8] = [ 185 + 0x40, 0x05, 0x80, 0xD2, // movz x0, #42 186 + 0xC0, 0x03, 0x5F, 0xD6, // ret 187 + ]; 188 + mem.write_at(0, &code).expect("write failed"); 189 + mem.make_executable(); 190 + 191 + let func: extern "C" fn() -> u64 = unsafe { std::mem::transmute(mem.as_ptr()) }; 192 + let result = func(); 193 + assert_eq!(result, 42); 194 + } 195 + 196 + #[test] 197 + fn wxe_transitions() { 198 + let mut mem = ExecutableMemory::allocate(4096).expect("allocation failed"); 199 + // Write code 200 + let code: [u8; 8] = [ 201 + 0x00, 0x01, 0x80, 0xD2, // movz x0, #8 202 + 0xC0, 0x03, 0x5F, 0xD6, // ret 203 + ]; 204 + mem.write_at(0, &code).expect("write failed"); 205 + mem.make_executable(); 206 + 207 + let func: extern "C" fn() -> u64 = unsafe { std::mem::transmute(mem.as_ptr()) }; 208 + assert_eq!(func(), 8); 209 + 210 + // Transition back to writable, patch code 211 + mem.make_writable(); 212 + // movz x0, #99 = 0xD2800C60 213 + mem.write_at(0, &[0x60, 0x0C, 0x80, 0xD2]) 214 + .expect("write failed"); 215 + mem.make_executable(); 216 + 217 + let func2: extern "C" fn() -> u64 = unsafe { std::mem::transmute(mem.as_ptr()) }; 218 + assert_eq!(func2(), 99); 219 + } 220 + }

+14

crates/js/src/jit/mod.rs

··· 1 + //! JIT compiler infrastructure for AArch64. 2 + //! 3 + //! This module provides the low-level building blocks for JIT compilation: 4 + //! - Executable memory allocation with W^X protection 5 + //! - AArch64 machine code assembler 6 + //! - Entry/exit stubs for VM ↔ JIT transitions 7 + 8 + pub mod assembler; 9 + pub mod buffer; 10 + pub mod memory; 11 + 12 + pub use assembler::Assembler; 13 + pub use buffer::{CodePtr, JitBuffer}; 14 + pub use memory::ExecutableMemory;

+1

crates/js/src/lib.rs

··· 9 9 pub mod gc; 10 10 pub mod iframe_bridge; 11 11 pub mod indexeddb; 12 + pub mod jit; 12 13 pub mod lexer; 13 14 pub mod parser; 14 15 pub mod regex;

Configure Feed

Configure Feed