Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

gpu: nova-core: create falcon firmware DMA objects lazily

When DMA was the only loading option for falcon firmwares, we decided to
store them in DMA objects as soon as they were loaded from disk and
patch them in-place to avoid having to do an extra copy.

This decision complicates the PIO loading patch considerably, and
actually does not even stand on its own when put into perspective with
the fact that it requires 8 unsafe statements in the code that wouldn't
exist if we stored the firmware into a `KVVec` and copied it into a DMA
object at the last minute.

The cost of the copy is, as can be expected, imperceptible at runtime.
Thus, switch to a lazy DMA object creation model and simplify our code
a bit. This will also have the nice side-effect of being more fit for
PIO loading.

Reviewed-by: Eliot Courtney <ecourtney@nvidia.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patch.msgid.link/20260306-turing_prep-v11-1-8f0042c5d026@nvidia.com
[acourbot@nvidia.com: add TODO item to switch back to a coherent
allocation when it becomes convenient to do so.]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>

+108 -127
+38 -19
drivers/gpu/nova-core/falcon.rs
··· 2 2 3 3 //! Falcon microprocessor base support 4 4 5 - use core::ops::Deref; 6 - 7 5 use hal::FalconHal; 8 6 9 7 use kernel::{ 10 - device, 8 + device::{ 9 + self, 10 + Device, // 11 + }, 11 12 dma::{ 12 13 DmaAddress, 13 14 DmaMask, // ··· 16 15 io::poll::read_poll_timeout, 17 16 prelude::*, 18 17 sync::aref::ARef, 19 - time::{ 20 - Delta, // 21 - }, 18 + time::Delta, 22 19 }; 23 20 24 21 use crate::{ ··· 350 351 351 352 /// Trait for providing load parameters of falcon firmwares. 352 353 pub(crate) trait FalconLoadParams { 354 + /// Returns the firmware data as a slice of bytes. 355 + fn as_slice(&self) -> &[u8]; 356 + 353 357 /// Returns the load parameters for Secure `IMEM`. 354 358 fn imem_sec_load_params(&self) -> FalconLoadTarget; 355 359 ··· 372 370 373 371 /// Trait for a falcon firmware. 374 372 /// 375 - /// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA 376 - /// object. 377 - pub(crate) trait FalconFirmware: FalconLoadParams + Deref<Target = DmaObject> { 373 + /// A falcon firmware can be loaded on a given engine. 374 + pub(crate) trait FalconFirmware: FalconLoadParams { 378 375 /// Engine on which this firmware is to be loaded. 379 376 type Target: FalconEngine; 380 377 } ··· 416 415 /// `target_mem`. 417 416 /// 418 417 /// `sec` is set if the loaded firmware is expected to run in secure mode. 419 - fn dma_wr<F: FalconFirmware<Target = E>>( 418 + fn dma_wr( 420 419 &self, 421 420 bar: &Bar0, 422 - fw: &F, 421 + dma_obj: &DmaObject, 423 422 target_mem: FalconMem, 424 423 load_offsets: FalconLoadTarget, 425 424 ) -> Result { ··· 431 430 // For DMEM we can fold the start offset into the DMA handle. 432 431 let (src_start, dma_start) = match target_mem { 433 432 FalconMem::ImemSecure | FalconMem::ImemNonSecure => { 434 - (load_offsets.src_start, fw.dma_handle()) 433 + (load_offsets.src_start, dma_obj.dma_handle()) 435 434 } 436 435 FalconMem::Dmem => ( 437 436 0, 438 - fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, 437 + dma_obj.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, 439 438 ), 440 439 }; 441 440 if dma_start % DmaAddress::from(DMA_LEN) > 0 { ··· 467 466 dev_err!(self.dev, "DMA transfer length overflow\n"); 468 467 return Err(EOVERFLOW); 469 468 } 470 - Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => { 469 + Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => { 471 470 dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n"); 472 471 return Err(EINVAL); 473 472 } ··· 516 515 } 517 516 518 517 /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. 519 - fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { 518 + fn dma_load<F: FalconFirmware<Target = E>>( 519 + &self, 520 + dev: &Device<device::Bound>, 521 + bar: &Bar0, 522 + fw: &F, 523 + ) -> Result { 520 524 // The Non-Secure section only exists on firmware used by Turing and GA100, and 521 525 // those platforms do not use DMA. 522 526 if fw.imem_ns_load_params().is_some() { ··· 529 523 return Err(EINVAL); 530 524 } 531 525 526 + // Create DMA object with firmware content as the source of the DMA engine. 527 + let dma_obj = DmaObject::from_data(dev, fw.as_slice())?; 528 + 532 529 self.dma_reset(bar); 533 530 regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { 534 531 v.set_target(FalconFbifTarget::CoherentSysmem) 535 532 .set_mem_type(FalconFbifMemType::Physical) 536 533 }); 537 534 538 - self.dma_wr(bar, fw, FalconMem::ImemSecure, fw.imem_sec_load_params())?; 539 - self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params())?; 535 + self.dma_wr( 536 + bar, 537 + &dma_obj, 538 + FalconMem::ImemSecure, 539 + fw.imem_sec_load_params(), 540 + )?; 541 + self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?; 540 542 541 543 self.hal.program_brom(self, bar, &fw.brom_params())?; 542 544 ··· 655 641 } 656 642 657 643 // Load a firmware image into Falcon memory 658 - pub(crate) fn load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { 644 + pub(crate) fn load<F: FalconFirmware<Target = E>>( 645 + &self, 646 + dev: &Device<device::Bound>, 647 + bar: &Bar0, 648 + fw: &F, 649 + ) -> Result { 659 650 match self.hal.load_method() { 660 - LoadMethod::Dma => self.dma_load(bar, fw), 651 + LoadMethod::Dma => self.dma_load(dev, bar, fw), 661 652 LoadMethod::Pio => Err(ENOTSUPP), 662 653 } 663 654 }
+20 -20
drivers/gpu/nova-core/firmware.rs
··· 15 15 }; 16 16 17 17 use crate::{ 18 - dma::DmaObject, 19 18 falcon::{ 20 19 FalconFirmware, 21 20 FalconLoadTarget, // ··· 291 292 struct Signed; 292 293 impl SignedState for Signed {} 293 294 294 - /// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. 295 + /// Microcode to be loaded into a specific falcon. 295 296 /// 296 297 /// This is module-local and meant for sub-modules to use internally. 297 298 /// ··· 299 300 /// before it can be loaded (with an exception for development hardware). The 300 301 /// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the 301 302 /// firmware to its [`Signed`] state. 302 - struct FirmwareDmaObject<F: FalconFirmware, S: SignedState>(DmaObject, PhantomData<(F, S)>); 303 + // TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports 304 + // temporary CPU-exclusive access to the object without unsafe methods. 305 + struct FirmwareObject<F: FalconFirmware, S: SignedState>(KVVec<u8>, PhantomData<(F, S)>); 303 306 304 307 /// Trait for signatures to be patched directly into a given firmware. 305 308 /// 306 309 /// This is module-local and meant for sub-modules to use internally. 307 310 trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {} 308 311 309 - impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> { 310 - /// Patches the firmware at offset `sig_base_img` with `signature`. 312 + impl<F: FalconFirmware> FirmwareObject<F, Unsigned> { 313 + /// Patches the firmware at offset `signature_start` with `signature`. 311 314 fn patch_signature<S: FirmwareSignature<F>>( 312 315 mut self, 313 316 signature: &S, 314 - sig_base_img: usize, 315 - ) -> Result<FirmwareDmaObject<F, Signed>> { 317 + signature_start: usize, 318 + ) -> Result<FirmwareObject<F, Signed>> { 316 319 let signature_bytes = signature.as_ref(); 317 - if sig_base_img + signature_bytes.len() > self.0.size() { 318 - return Err(EINVAL); 319 - } 320 + let signature_end = signature_start 321 + .checked_add(signature_bytes.len()) 322 + .ok_or(EOVERFLOW)?; 323 + let dst = self 324 + .0 325 + .get_mut(signature_start..signature_end) 326 + .ok_or(EINVAL)?; 320 327 321 - // SAFETY: We are the only user of this object, so there cannot be any race. 322 - let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; 328 + // PANIC: `dst` and `signature_bytes` have the same length. 329 + dst.copy_from_slice(signature_bytes); 323 330 324 - // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. 325 - unsafe { 326 - core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) 327 - }; 328 - 329 - Ok(FirmwareDmaObject(self.0, PhantomData)) 331 + Ok(FirmwareObject(self.0, PhantomData)) 330 332 } 331 333 332 334 /// Mark the firmware as signed without patching it. ··· 335 335 /// This method is used to explicitly confirm that we do not need to sign the firmware, while 336 336 /// allowing us to continue as if it was. This is typically only needed for development 337 337 /// hardware. 338 - fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { 339 - FirmwareDmaObject(self.0, PhantomData) 338 + fn no_patch_signature(self) -> FirmwareObject<F, Signed> { 339 + FirmwareObject(self.0, PhantomData) 340 340 } 341 341 } 342 342
+14 -19
drivers/gpu/nova-core/firmware/booter.rs
··· 4 4 //! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon 5 5 //! (and optionally unload it through a separate firmware image). 6 6 7 - use core::{ 8 - marker::PhantomData, 9 - ops::Deref, // 10 - }; 7 + use core::marker::PhantomData; 11 8 12 9 use kernel::{ 13 10 device, ··· 13 16 }; 14 17 15 18 use crate::{ 16 - dma::DmaObject, 17 19 driver::Bar0, 18 20 falcon::{ 19 21 sec2::Sec2, ··· 24 28 }, 25 29 firmware::{ 26 30 BinFirmware, 27 - FirmwareDmaObject, 31 + FirmwareObject, 28 32 FirmwareSignature, 29 33 Signed, 30 34 Unsigned, // ··· 265 269 // BROM falcon parameters. 266 270 brom_params: FalconBromParams, 267 271 // Device-mapped firmware image. 268 - ucode: FirmwareDmaObject<Self, Signed>, 272 + ucode: FirmwareObject<Self, Signed>, 269 273 } 270 274 271 - impl FirmwareDmaObject<BooterFirmware, Unsigned> { 272 - fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { 273 - DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) 275 + impl FirmwareObject<BooterFirmware, Unsigned> { 276 + fn new_booter(data: &[u8]) -> Result<Self> { 277 + let mut ucode = KVVec::new(); 278 + ucode.extend_from_slice(data, GFP_KERNEL)?; 279 + 280 + Ok(Self(ucode, PhantomData)) 274 281 } 275 282 } 276 283 ··· 327 328 let ucode = bin_fw 328 329 .data() 329 330 .ok_or(EINVAL) 330 - .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?; 331 + .and_then(FirmwareObject::<Self, _>::new_booter)?; 331 332 332 333 let ucode_signed = { 333 334 let mut signatures = hs_fw.signatures_iter()?.peekable(); ··· 399 400 } 400 401 401 402 impl FalconLoadParams for BooterFirmware { 403 + fn as_slice(&self) -> &[u8] { 404 + self.ucode.0.as_slice() 405 + } 406 + 402 407 fn imem_sec_load_params(&self) -> FalconLoadTarget { 403 408 self.imem_sec_load_target.clone() 404 409 } ··· 425 422 } else { 426 423 self.imem_sec_load_target.src_start 427 424 } 428 - } 429 - } 430 - 431 - impl Deref for BooterFirmware { 432 - type Target = DmaObject; 433 - 434 - fn deref(&self) -> &Self::Target { 435 - &self.ucode.0 436 425 } 437 426 } 438 427
+35 -68
drivers/gpu/nova-core/firmware/fwsec.rs
··· 10 10 //! - The command to be run, as this firmware can perform several tasks ; 11 11 //! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. 12 12 13 - use core::{ 14 - marker::PhantomData, 15 - ops::Deref, // 16 - }; 13 + use core::marker::PhantomData; 17 14 18 15 use kernel::{ 19 16 device::{ ··· 25 28 }; 26 29 27 30 use crate::{ 28 - dma::DmaObject, 29 31 driver::Bar0, 30 32 falcon::{ 31 33 gsp::Gsp, ··· 36 40 }, 37 41 firmware::{ 38 42 FalconUCodeDesc, 39 - FirmwareDmaObject, 43 + FirmwareObject, 40 44 FirmwareSignature, 41 45 Signed, 42 46 Unsigned, // ··· 170 174 171 175 impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {} 172 176 173 - /// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement 174 - /// [`FromBytes`]) and return a reference to it. 175 - /// 176 - /// # Safety 177 - /// 178 - /// * Callers must ensure that the device does not read/write to/from memory while the returned 179 - /// reference is live. 180 - /// * Callers must ensure that this call does not race with a write to the same region while 181 - /// the returned reference is live. 182 - unsafe fn transmute<T: Sized + FromBytes>(fw: &DmaObject, offset: usize) -> Result<&T> { 183 - // SAFETY: The safety requirements of the function guarantee the device won't read 184 - // or write to memory while the reference is alive and that this call won't race 185 - // with writes to the same memory region. 186 - T::from_bytes(unsafe { fw.as_slice(offset, size_of::<T>())? }).ok_or(EINVAL) 187 - } 188 - 189 - /// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must 190 - /// implement [`FromBytes`]) and return a reference to it. 191 - /// 192 - /// # Safety 193 - /// 194 - /// * Callers must ensure that the device does not read/write to/from memory while the returned 195 - /// slice is live. 196 - /// * Callers must ensure that this call does not race with a read or write to the same region 197 - /// while the returned slice is live. 198 - unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>( 199 - fw: &mut DmaObject, 200 - offset: usize, 201 - ) -> Result<&mut T> { 202 - // SAFETY: The safety requirements of the function guarantee the device won't read 203 - // or write to memory while the reference is alive and that this call won't race 204 - // with writes or reads to the same memory region. 205 - T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL) 206 - } 207 - 208 177 /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. 209 178 /// 210 179 /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. 211 180 pub(crate) struct FwsecFirmware { 212 181 /// Descriptor of the firmware. 213 182 desc: FalconUCodeDesc, 214 - /// GPU-accessible DMA object containing the firmware. 215 - ucode: FirmwareDmaObject<Self, Signed>, 183 + /// Object containing the firmware binary. 184 + ucode: FirmwareObject<Self, Signed>, 216 185 } 217 186 218 187 impl FalconLoadParams for FwsecFirmware { 188 + fn as_slice(&self) -> &[u8] { 189 + self.ucode.0.as_slice() 190 + } 191 + 219 192 fn imem_sec_load_params(&self) -> FalconLoadTarget { 220 193 self.desc.imem_sec_load_params() 221 194 } ··· 210 245 } 211 246 } 212 247 213 - impl Deref for FwsecFirmware { 214 - type Target = DmaObject; 215 - 216 - fn deref(&self) -> &Self::Target { 217 - &self.ucode.0 218 - } 219 - } 220 - 221 248 impl FalconFirmware for FwsecFirmware { 222 249 type Target = Gsp; 223 250 } 224 251 225 - impl FirmwareDmaObject<FwsecFirmware, Unsigned> { 226 - fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { 252 + impl FirmwareObject<FwsecFirmware, Unsigned> { 253 + fn new_fwsec(bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { 227 254 let desc = bios.fwsec_image().header()?; 228 - let ucode = bios.fwsec_image().ucode(&desc)?; 229 - let mut dma_object = DmaObject::from_data(dev, ucode)?; 255 + let mut ucode = KVVec::new(); 256 + ucode.extend_from_slice(bios.fwsec_image().ucode(&desc)?, GFP_KERNEL)?; 230 257 231 258 let hdr_offset = desc 232 259 .imem_load_size() ··· 226 269 .map(usize::from_safe_cast) 227 270 .ok_or(EINVAL)?; 228 271 229 - // SAFETY: we have exclusive access to `dma_object`. 230 - let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; 272 + let hdr = ucode 273 + .get(hdr_offset..) 274 + .and_then(FalconAppifHdrV1::from_bytes_prefix) 275 + .ok_or(EINVAL)? 276 + .0; 231 277 232 278 if hdr.version != 1 { 233 279 return Err(EINVAL); ··· 244 284 .and_then(|o| o.checked_add(i.checked_mul(usize::from(hdr.entry_size))?)) 245 285 .ok_or(EINVAL)?; 246 286 247 - // SAFETY: we have exclusive access to `dma_object`. 248 - let app: &FalconAppifV1 = unsafe { transmute(&dma_object, entry_offset) }?; 287 + let app = ucode 288 + .get(entry_offset..) 289 + .and_then(FalconAppifV1::from_bytes_prefix) 290 + .ok_or(EINVAL)? 291 + .0; 249 292 250 293 if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { 251 294 continue; ··· 261 298 .map(usize::from_safe_cast) 262 299 .ok_or(EINVAL)?; 263 300 264 - let dmem_mapper: &mut FalconAppifDmemmapperV3 = 265 - // SAFETY: we have exclusive access to `dma_object`. 266 - unsafe { transmute_mut(&mut dma_object, dmem_mapper_offset) }?; 301 + let dmem_mapper = ucode 302 + .get_mut(dmem_mapper_offset..) 303 + .and_then(FalconAppifDmemmapperV3::from_bytes_mut_prefix) 304 + .ok_or(EINVAL)? 305 + .0; 267 306 268 307 dmem_mapper.init_cmd = match cmd { 269 308 FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, ··· 279 314 .map(usize::from_safe_cast) 280 315 .ok_or(EINVAL)?; 281 316 282 - let frts_cmd: &mut FrtsCmd = 283 - // SAFETY: we have exclusive access to `dma_object`. 284 - unsafe { transmute_mut(&mut dma_object, frts_cmd_offset) }?; 317 + let frts_cmd = ucode 318 + .get_mut(frts_cmd_offset..) 319 + .and_then(FrtsCmd::from_bytes_mut_prefix) 320 + .ok_or(EINVAL)? 321 + .0; 285 322 286 323 frts_cmd.read_vbios = ReadVbios { 287 324 ver: 1, ··· 307 340 } 308 341 309 342 // Return early as we found and patched the DMEMMAPPER region. 310 - return Ok(Self(dma_object, PhantomData)); 343 + return Ok(Self(ucode, PhantomData)); 311 344 } 312 345 313 346 Err(ENOTSUPP) ··· 324 357 bios: &Vbios, 325 358 cmd: FwsecCommand, 326 359 ) -> Result<Self> { 327 - let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?; 360 + let ucode_dma = FirmwareObject::<Self, _>::new_fwsec(bios, cmd)?; 328 361 329 362 // Patch signature if needed. 330 363 let desc = bios.fwsec_image().header()?; ··· 396 429 .reset(bar) 397 430 .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; 398 431 falcon 399 - .load(bar, self) 432 + .load(dev, bar, self) 400 433 .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; 401 434 let (mbox0, _) = falcon 402 435 .boot(bar, Some(0), None)
+1 -1
drivers/gpu/nova-core/gsp/boot.rs
··· 178 178 ); 179 179 180 180 sec2_falcon.reset(bar)?; 181 - sec2_falcon.load(bar, &booter_loader)?; 181 + sec2_falcon.load(dev, bar, &booter_loader)?; 182 182 let wpr_handle = wpr_meta.dma_handle(); 183 183 let (mbox0, mbox1) = sec2_falcon.boot( 184 184 bar,