Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

gpu: nova-core: add PIO support for loading firmware images

Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.

Signed-off-by: Timur Tabi <ttabi@nvidia.com>
Co-developed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patch.msgid.link/20260306-turing_prep-v11-6-8f0042c5d026@nvidia.com

authored by

Timur Tabi and committed by
Alexandre Courbot
c1d2f747 9725005e

+251 -3
+216 -2
drivers/gpu/nova-core/falcon.rs
··· 367 367 368 368 /// Returns the load parameters for `DMEM`. 369 369 fn dmem_load_params(&self) -> FalconDmaLoadTarget; 370 + 371 + /// Returns an adapter that provides the required parameter to load this firmware using PIO. 372 + /// 373 + /// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in 374 + /// the headers are invalid. 375 + fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> { 376 + let new_pio_imem = |params: FalconDmaLoadTarget, secure| { 377 + let start = usize::from_safe_cast(params.src_start); 378 + let end = start + usize::from_safe_cast(params.len); 379 + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; 380 + 381 + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; 382 + 383 + Ok::<_, Error>(FalconPioImemLoadTarget { 384 + data, 385 + dst_start, 386 + secure, 387 + start_tag: dst_start >> 8, 388 + }) 389 + }; 390 + 391 + let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?; 392 + 393 + let imem_ns = if let Some(params) = self.imem_ns_load_params() { 394 + Some(new_pio_imem(params, false)?) 395 + } else { 396 + None 397 + }; 398 + 399 + let dmem = { 400 + let params = self.dmem_load_params(); 401 + let start = usize::from_safe_cast(params.src_start); 402 + let end = start + usize::from_safe_cast(params.len); 403 + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; 404 + 405 + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; 406 + 407 + FalconPioDmemLoadTarget { data, dst_start } 408 + }; 409 + 410 + Ok(FalconDmaFirmwarePioAdapter { 411 + fw: self, 412 + imem_sec, 413 + imem_ns, 414 + dmem, 415 + }) 416 + } 417 + } 418 + 419 + /// Represents a portion of the firmware to be loaded into IMEM using PIO. 420 + #[derive(Clone)] 421 + pub(crate) struct FalconPioImemLoadTarget<'a> { 422 + pub(crate) data: &'a [u8], 423 + pub(crate) dst_start: u16, 424 + pub(crate) secure: bool, 425 + pub(crate) start_tag: u16, 426 + } 427 + 428 + /// Represents a portion of the firmware to be loaded into DMEM using PIO. 429 + #[derive(Clone)] 430 + pub(crate) struct FalconPioDmemLoadTarget<'a> { 431 + pub(crate) data: &'a [u8], 432 + pub(crate) dst_start: u16, 433 + } 434 + 435 + /// Trait for providing PIO load parameters of falcon firmwares. 436 + pub(crate) trait FalconPioLoadable { 437 + /// Returns the load parameters for Secure `IMEM`, if any. 438 + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; 439 + 440 + /// Returns the load parameters for Non-Secure `IMEM`, if any. 441 + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; 442 + 443 + /// Returns the load parameters for `DMEM`. 444 + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>; 445 + } 446 + 447 + /// Adapter type that makes any DMA-loadable firmware also loadable via PIO. 448 + /// 449 + /// Created using [`FalconDmaLoadable::try_as_pio_loadable`]. 450 + pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> { 451 + /// Reference to the DMA firmware. 452 + fw: &'a T, 453 + /// Validated secure IMEM parameters. 454 + imem_sec: FalconPioImemLoadTarget<'a>, 455 + /// Validated non-secure IMEM parameters. 456 + imem_ns: Option<FalconPioImemLoadTarget<'a>>, 457 + /// Validated DMEM parameters. 458 + dmem: FalconPioDmemLoadTarget<'a>, 459 + } 460 + 461 + impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T> 462 + where 463 + T: FalconDmaLoadable + ?Sized, 464 + { 465 + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { 466 + Some(self.imem_sec.clone()) 467 + } 468 + 469 + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { 470 + self.imem_ns.clone() 471 + } 472 + 473 + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { 474 + self.dmem.clone() 475 + } 476 + } 477 + 478 + impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T> 479 + where 480 + T: FalconDmaLoadable + FalconFirmware + ?Sized, 481 + { 482 + type Target = <T as FalconFirmware>::Target; 483 + 484 + fn brom_params(&self) -> FalconBromParams { 485 + self.fw.brom_params() 486 + } 487 + 488 + fn boot_addr(&self) -> u32 { 489 + self.fw.boot_addr() 490 + } 370 491 } 371 492 372 493 /// Trait for a falcon firmware. ··· 533 412 534 413 regs::NV_PFALCON_FALCON_RM::default() 535 414 .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) 415 + .write(bar, &E::ID); 416 + 417 + Ok(()) 418 + } 419 + 420 + /// Falcons supports up to four ports, but we only ever use one, so just hard-code it. 421 + const PIO_PORT: usize = 0; 422 + 423 + /// Write a slice to Falcon IMEM memory using programmed I/O (PIO). 424 + /// 425 + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. 426 + fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result { 427 + // Rejecting misaligned images here allows us to avoid checking 428 + // inside the loops. 429 + if load_offsets.data.len() % 4 != 0 { 430 + return Err(EINVAL); 431 + } 432 + 433 + regs::NV_PFALCON_FALCON_IMEMC::default() 434 + .set_secure(load_offsets.secure) 435 + .set_aincw(true) 436 + .set_offs(load_offsets.dst_start) 437 + .write(bar, &E::ID, Self::PIO_PORT); 438 + 439 + for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() { 440 + let n = u16::try_from(n)?; 441 + let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?; 442 + regs::NV_PFALCON_FALCON_IMEMT::default().set_tag(tag).write( 443 + bar, 444 + &E::ID, 445 + Self::PIO_PORT, 446 + ); 447 + for word in block.chunks_exact(4) { 448 + let w = [word[0], word[1], word[2], word[3]]; 449 + regs::NV_PFALCON_FALCON_IMEMD::default() 450 + .set_data(u32::from_le_bytes(w)) 451 + .write(bar, &E::ID, Self::PIO_PORT); 452 + } 453 + } 454 + 455 + Ok(()) 456 + } 457 + 458 + /// Write a slice to Falcon DMEM memory using programmed I/O (PIO). 459 + /// 460 + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. 461 + fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result { 462 + // Rejecting misaligned images here allows us to avoid checking 463 + // inside the loops. 464 + if load_offsets.data.len() % 4 != 0 { 465 + return Err(EINVAL); 466 + } 467 + 468 + regs::NV_PFALCON_FALCON_DMEMC::default() 469 + .set_aincw(true) 470 + .set_offs(load_offsets.dst_start) 471 + .write(bar, &E::ID, Self::PIO_PORT); 472 + 473 + for word in load_offsets.data.chunks_exact(4) { 474 + let w = [word[0], word[1], word[2], word[3]]; 475 + regs::NV_PFALCON_FALCON_DMEMD::default() 476 + .set_data(u32::from_le_bytes(w)) 477 + .write(bar, &E::ID, Self::PIO_PORT); 478 + } 479 + 480 + Ok(()) 481 + } 482 + 483 + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. 484 + pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>( 485 + &self, 486 + bar: &Bar0, 487 + fw: &F, 488 + ) -> Result { 489 + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) 490 + .set_allow_phys_no_ctx(true) 491 + .write(bar, &E::ID); 492 + 493 + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); 494 + 495 + if let Some(imem_ns) = fw.imem_ns_load_params() { 496 + self.pio_wr_imem_slice(bar, imem_ns)?; 497 + } 498 + if let Some(imem_sec) = fw.imem_sec_load_params() { 499 + self.pio_wr_imem_slice(bar, imem_sec)?; 500 + } 501 + self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?; 502 + 503 + self.hal.program_brom(self, bar, &fw.brom_params())?; 504 + 505 + regs::NV_PFALCON_FALCON_BOOTVEC::default() 506 + .set_value(fw.boot_addr()) 536 507 .write(bar, &E::ID); 537 508 538 509 Ok(()) ··· 872 659 self.hal.is_riscv_active(bar) 873 660 } 874 661 875 - // Load a firmware image into Falcon memory 662 + /// Load a firmware image into Falcon memory, using the preferred method for the current 663 + /// chipset. 876 664 pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( 877 665 &self, 878 666 dev: &Device<device::Bound>, ··· 882 668 ) -> Result { 883 669 match self.hal.load_method() { 884 670 LoadMethod::Dma => self.dma_load(dev, bar, fw), 885 - LoadMethod::Pio => Err(ENOTSUPP), 671 + LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?), 886 672 } 887 673 } 888 674
+5 -1
drivers/gpu/nova-core/falcon/hal.rs
··· 58 58 /// Reset the falcon engine. 59 59 fn reset_eng(&self, bar: &Bar0) -> Result; 60 60 61 - /// returns the method needed to load data into Falcon memory 61 + /// Returns the method used to load data into the falcon's memory. 62 + /// 63 + /// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for 64 + /// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the 65 + /// only usable method. 62 66 fn load_method(&self) -> LoadMethod; 63 67 } 64 68
+30
drivers/gpu/nova-core/regs.rs
··· 364 364 1:1 startcpu as bool; 365 365 }); 366 366 367 + // IMEM access control register. Up to 4 ports are available for IMEM access. 368 + register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] { 369 + 15:0 offs as u16, "IMEM block and word offset"; 370 + 24:24 aincw as bool, "Auto-increment on write"; 371 + 28:28 secure as bool, "Access secure IMEM"; 372 + }); 373 + 374 + // IMEM data register. Reading/writing this register accesses IMEM at the address 375 + // specified by the corresponding IMEMC register. 376 + register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] { 377 + 31:0 data as u32; 378 + }); 379 + 380 + // IMEM tag register. Used to set the tag for the current IMEM block. 381 + register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] { 382 + 15:0 tag as u16; 383 + }); 384 + 385 + // DMEM access control register. Up to 8 ports are available for DMEM access. 386 + register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] { 387 + 15:0 offs as u16, "DMEM block and word offset"; 388 + 24:24 aincw as bool, "Auto-increment on write"; 389 + }); 390 + 391 + // DMEM data register. Reading/writing this register accesses DMEM at the address 392 + // specified by the corresponding DMEMC register. 393 + register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] { 394 + 31:0 data as u32; 395 + }); 396 + 367 397 // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon 368 398 // instance. 369 399 register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] {