Merge tag 'drm-rust-next-2026-03-30' of https://gitlab.freedesktop.org/drm/rust/kernel into drm-next

-76

Documentation/gpu/nova/core/todo.rst

··· 51 51 | Link: https://lore.kernel.org/all/cover.1750689857.git.y.j3ms.n@gmail.com/ [1] 52 52 | Link: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/Implement.20.60FromPrimitive.60.20trait.20.2B.20derive.20macro.20for.20nova-core/with/541971854 [2] 53 53 54 - Generic register abstraction [REGA] 55 - ----------------------------------- 56 - 57 - Work out how register constants and structures can be automatically generated 58 - through generalized macros. 59 - 60 - Example: 61 - 62 - .. code-block:: rust 63 - 64 - register!(BOOT0, 0x0, u32, pci::Bar<SIZE>, Fields [ 65 - MINOR_REVISION(3:0, RO), 66 - MAJOR_REVISION(7:4, RO), 67 - REVISION(7:0, RO), // Virtual register combining major and minor rev. 68 - ]) 69 - 70 - This could expand to something like: 71 - 72 - .. code-block:: rust 73 - 74 - const BOOT0_OFFSET: usize = 0x00000000; 75 - const BOOT0_MINOR_REVISION_SHIFT: u8 = 0; 76 - const BOOT0_MINOR_REVISION_MASK: u32 = 0x0000000f; 77 - const BOOT0_MAJOR_REVISION_SHIFT: u8 = 4; 78 - const BOOT0_MAJOR_REVISION_MASK: u32 = 0x000000f0; 79 - const BOOT0_REVISION_SHIFT: u8 = BOOT0_MINOR_REVISION_SHIFT; 80 - const BOOT0_REVISION_MASK: u32 = BOOT0_MINOR_REVISION_MASK | BOOT0_MAJOR_REVISION_MASK; 81 - 82 - struct Boot0(u32); 83 - 84 - impl Boot0 { 85 - #[inline] 86 - fn read(bar: &RevocableGuard<'_, pci::Bar<SIZE>>) -> Self { 87 - Self(bar.readl(BOOT0_OFFSET)) 88 - } 89 - 90 - #[inline] 91 - fn minor_revision(&self) -> u32 { 92 - (self.0 & BOOT0_MINOR_REVISION_MASK) >> BOOT0_MINOR_REVISION_SHIFT 93 - } 94 - 95 - #[inline] 96 - fn major_revision(&self) -> u32 { 97 - (self.0 & BOOT0_MAJOR_REVISION_MASK) >> BOOT0_MAJOR_REVISION_SHIFT 98 - } 99 - 100 - #[inline] 101 - fn revision(&self) -> u32 { 102 - (self.0 & BOOT0_REVISION_MASK) >> BOOT0_REVISION_SHIFT 103 - } 104 - } 105 - 106 - Usage: 107 - 108 - .. code-block:: rust 109 - 110 - let bar = bar.try_access().ok_or(ENXIO)?; 111 - 112 - let boot0 = Boot0::read(&bar); 113 - pr_info!("Revision: {}\n", boot0.revision()); 114 - 115 - A work-in-progress implementation currently resides in 116 - `drivers/gpu/nova-core/regs/macros.rs` and is used in nova-core. It would be 117 - nice to improve it (possibly using proc macros) and move it to the `kernel` 118 - crate so it can be used by other components as well. 119 - 120 - Features desired before this happens: 121 - 122 - * Make I/O optional I/O (for field values that are not registers), 123 - * Support other sizes than `u32`, 124 - * Allow visibility control for registers and individual fields, 125 - * Use Rust slice syntax to express fields ranges. 126 - 127 - | Complexity: Advanced 128 - | Contact: Alexandre Courbot 129 - 130 54 Numerical operations [NUMM] 131 55 --------------------------- 132 56

+17 -1

MAINTAINERS

··· 7534 7534 F: include/linux/dma-buf.h 7535 7535 F: include/linux/dma-buf/ 7536 7536 F: include/linux/dma-resv.h 7537 + F: rust/helpers/dma-resv.c 7537 7538 K: \bdma_(?:buf|fence|resv)\b 7538 7539 7539 7540 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM ··· 8514 8513 F: drivers/gpu/drm/nova/ 8515 8514 F: drivers/gpu/drm/tyr/ 8516 8515 F: drivers/gpu/nova-core/ 8516 + F: rust/helpers/gpu.c 8517 8517 F: rust/kernel/drm/ 8518 + F: rust/kernel/gpu.rs 8519 + F: rust/kernel/gpu/ 8518 8520 8519 8521 DRM DRIVERS FOR ALLWINNER A10 8520 8522 M: Chen-Yu Tsai <wens@kernel.org> ··· 8935 8931 GPU BUDDY ALLOCATOR 8936 8932 M: Matthew Auld <matthew.auld@intel.com> 8937 8933 M: Arun Pravin <arunpravin.paneerselvam@amd.com> 8938 - R: Christian Koenig <christian.koenig@amd.com> 8934 + R: Joel Fernandes <joelagnelf@nvidia.com> 8939 8935 L: dri-devel@lists.freedesktop.org 8940 8936 S: Maintained 8941 8937 T: git https://gitlab.freedesktop.org/drm/misc/kernel.git ··· 8944 8940 F: drivers/gpu/tests/gpu_buddy_test.c 8945 8941 F: include/drm/drm_buddy.h 8946 8942 F: include/linux/gpu_buddy.h 8943 + F: rust/helpers/gpu.c 8944 + F: rust/kernel/gpu.rs 8945 + F: rust/kernel/gpu/ 8947 8946 8948 8947 DRM AUTOMATED TESTING 8949 8948 M: Helen Koike <helen.fornazier@gmail.com> ··· 23214 23207 T: git https://github.com/Rust-for-Linux/linux.git alloc-next 23215 23208 F: rust/kernel/alloc.rs 23216 23209 F: rust/kernel/alloc/ 23210 + 23211 + RUST [INTEROP] 23212 + M: Joel Fernandes <joelagnelf@nvidia.com> 23213 + M: Alexandre Courbot <acourbot@nvidia.com> 23214 + L: rust-for-linux@vger.kernel.org 23215 + S: Maintained 23216 + T: git https://github.com/Rust-for-Linux/linux.git interop-next 23217 + F: rust/kernel/interop.rs 23218 + F: rust/kernel/interop/ 23217 23219 23218 23220 RUST [NUM] 23219 23221 M: Alexandre Courbot <acourbot@nvidia.com>

+7

drivers/gpu/drm/Kconfig

··· 268 268 help 269 269 Choose this if you need the GEM shmem helper functions 270 270 271 + config RUST_DRM_GEM_SHMEM_HELPER 272 + bool 273 + depends on DRM && MMU 274 + select DRM_GEM_SHMEM_HELPER 275 + help 276 + Choose this if you need the GEM shmem helper functions In Rust 277 + 271 278 config DRM_SUBALLOC_HELPER 272 279 tristate 273 280 depends on DRM

+3 -2

drivers/gpu/drm/nova/gem.rs

··· 19 19 20 20 impl gem::DriverObject for NovaObject { 21 21 type Driver = NovaDriver; 22 + type Args = (); 22 23 23 - fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit<Self, Error> { 24 + fn new(_dev: &NovaDevice, _size: usize, _args: Self::Args) -> impl PinInit<Self, Error> { 24 25 try_pin_init!(NovaObject {}) 25 26 } 26 27 } ··· 34 33 } 35 34 let aligned_size = page::page_align(size).ok_or(EINVAL)?; 36 35 37 - gem::Object::new(dev, aligned_size) 36 + gem::Object::new(dev, aligned_size, ()) 38 37 } 39 38 40 39 /// Look up a GEM object handle for a `File` and return an `ObjectRef` for it.

+55 -43

drivers/gpu/drm/tyr/driver.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 2 3 - use kernel::clk::Clk; 4 - use kernel::clk::OptionalClk; 5 - use kernel::device::Bound; 6 - use kernel::device::Core; 7 - use kernel::device::Device; 8 - use kernel::devres::Devres; 9 - use kernel::drm; 10 - use kernel::drm::ioctl; 11 - use kernel::io::poll; 12 - use kernel::new_mutex; 13 - use kernel::of; 14 - use kernel::platform; 15 - use kernel::prelude::*; 16 - use kernel::regulator; 17 - use kernel::regulator::Regulator; 18 - use kernel::sizes::SZ_2M; 19 - use kernel::sync::aref::ARef; 20 - use kernel::sync::Arc; 21 - use kernel::sync::Mutex; 22 - use kernel::time; 3 + use kernel::{ 4 + clk::{ 5 + Clk, 6 + OptionalClk, // 7 + }, 8 + device::{ 9 + Bound, 10 + Core, 11 + Device, // 12 + }, 13 + devres::Devres, 14 + drm, 15 + drm::ioctl, 16 + io::poll, 17 + new_mutex, 18 + of, 19 + platform, 20 + prelude::*, 21 + regulator, 22 + regulator::Regulator, 23 + sizes::SZ_2M, 24 + sync::{ 25 + aref::ARef, 26 + Arc, 27 + Mutex, // 28 + }, 29 + time, // 30 + }; 23 31 24 - use crate::file::File; 25 - use crate::gem::TyrObject; 26 - use crate::gpu; 27 - use crate::gpu::GpuInfo; 28 - use crate::regs; 32 + use crate::{ 33 + file::TyrDrmFileData, 34 + gem::TyrObject, 35 + gpu, 36 + gpu::GpuInfo, 37 + regs, // 38 + }; 29 39 30 40 pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>; 31 41 42 + pub(crate) struct TyrDrmDriver; 43 + 32 44 /// Convenience type alias for the DRM device type for this driver. 33 - pub(crate) type TyrDevice = drm::Device<TyrDriver>; 45 + pub(crate) type TyrDrmDevice = drm::Device<TyrDrmDriver>; 34 46 35 47 #[pin_data(PinnedDrop)] 36 - pub(crate) struct TyrDriver { 37 - _device: ARef<TyrDevice>, 48 + pub(crate) struct TyrPlatformDriverData { 49 + _device: ARef<TyrDrmDevice>, 38 50 } 39 51 40 52 #[pin_data(PinnedDrop)] 41 - pub(crate) struct TyrData { 53 + pub(crate) struct TyrDrmDeviceData { 42 54 pub(crate) pdev: ARef<platform::Device>, 43 55 44 56 #[pin] ··· 73 61 // that it will be removed in a future patch. 74 62 // 75 63 // SAFETY: This will be removed in a future patch. 76 - unsafe impl Send for TyrData {} 64 + unsafe impl Send for TyrDrmDeviceData {} 77 65 // SAFETY: This will be removed in a future patch. 78 - unsafe impl Sync for TyrData {} 66 + unsafe impl Sync for TyrDrmDeviceData {} 79 67 80 68 fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { 81 69 regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; ··· 94 82 kernel::of_device_table!( 95 83 OF_TABLE, 96 84 MODULE_OF_TABLE, 97 - <TyrDriver as platform::Driver>::IdInfo, 85 + <TyrPlatformDriverData as platform::Driver>::IdInfo, 98 86 [ 99 87 (of::DeviceId::new(c"rockchip,rk3588-mali"), ()), 100 88 (of::DeviceId::new(c"arm,mali-valhall-csf"), ()) 101 89 ] 102 90 ); 103 91 104 - impl platform::Driver for TyrDriver { 92 + impl platform::Driver for TyrPlatformDriverData { 105 93 type IdInfo = (); 106 94 const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); 107 95 ··· 131 119 132 120 let platform: ARef<platform::Device> = pdev.into(); 133 121 134 - let data = try_pin_init!(TyrData { 122 + let data = try_pin_init!(TyrDrmDeviceData { 135 123 pdev: platform.clone(), 136 124 clks <- new_mutex!(Clocks { 137 125 core: core_clk, ··· 145 133 gpu_info, 146 134 }); 147 135 148 - let tdev: ARef<TyrDevice> = drm::Device::new(pdev.as_ref(), data)?; 149 - drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?; 136 + let ddev: ARef<TyrDrmDevice> = drm::Device::new(pdev.as_ref(), data)?; 137 + drm::driver::Registration::new_foreign_owned(&ddev, pdev.as_ref(), 0)?; 150 138 151 - let driver = TyrDriver { _device: tdev }; 139 + let driver = TyrPlatformDriverData { _device: ddev }; 152 140 153 141 // We need this to be dev_info!() because dev_dbg!() does not work at 154 142 // all in Rust for now, and we need to see whether probe succeeded. ··· 158 146 } 159 147 160 148 #[pinned_drop] 161 - impl PinnedDrop for TyrDriver { 149 + impl PinnedDrop for TyrPlatformDriverData { 162 150 fn drop(self: Pin<&mut Self>) {} 163 151 } 164 152 165 153 #[pinned_drop] 166 - impl PinnedDrop for TyrData { 154 + impl PinnedDrop for TyrDrmDeviceData { 167 155 fn drop(self: Pin<&mut Self>) { 168 156 // TODO: the type-state pattern for Clks will fix this. 169 157 let clks = self.clks.lock(); ··· 184 172 }; 185 173 186 174 #[vtable] 187 - impl drm::Driver for TyrDriver { 188 - type Data = TyrData; 189 - type File = File; 175 + impl drm::Driver for TyrDrmDriver { 176 + type Data = TyrDrmDeviceData; 177 + type File = TyrDrmFileData; 190 178 type Object = drm::gem::Object<TyrObject>; 191 179 192 180 const INFO: drm::DriverInfo = INFO; 193 181 194 182 kernel::declare_drm_ioctls! { 195 - (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), 183 + (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, TyrDrmFileData::dev_query), 196 184 } 197 185 } 198 186

+19 -15

drivers/gpu/drm/tyr/file.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 2 3 - use kernel::drm; 4 - use kernel::prelude::*; 5 - use kernel::uaccess::UserSlice; 6 - use kernel::uapi; 3 + use kernel::{ 4 + drm, 5 + prelude::*, 6 + uaccess::UserSlice, 7 + uapi, // 8 + }; 7 9 8 - use crate::driver::TyrDevice; 9 - use crate::TyrDriver; 10 + use crate::driver::{ 11 + TyrDrmDevice, 12 + TyrDrmDriver, // 13 + }; 10 14 11 15 #[pin_data] 12 - pub(crate) struct File {} 16 + pub(crate) struct TyrDrmFileData {} 13 17 14 18 /// Convenience type alias for our DRM `File` type 15 - pub(crate) type DrmFile = drm::file::File<File>; 19 + pub(crate) type TyrDrmFile = drm::file::File<TyrDrmFileData>; 16 20 17 - impl drm::file::DriverFile for File { 18 - type Driver = TyrDriver; 21 + impl drm::file::DriverFile for TyrDrmFileData { 22 + type Driver = TyrDrmDriver; 19 23 20 24 fn open(_dev: &drm::Device<Self::Driver>) -> Result<Pin<KBox<Self>>> { 21 25 KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL) 22 26 } 23 27 } 24 28 25 - impl File { 29 + impl TyrDrmFileData { 26 30 pub(crate) fn dev_query( 27 - tdev: &TyrDevice, 31 + ddev: &TyrDrmDevice, 28 32 devquery: &mut uapi::drm_panthor_dev_query, 29 - _file: &DrmFile, 33 + _file: &TyrDrmFile, 30 34 ) -> Result<u32> { 31 35 if devquery.pointer == 0 { 32 36 match devquery.type_ { 33 37 uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { 34 - devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32; 38 + devquery.size = core::mem::size_of_val(&ddev.gpu_info) as u32; 35 39 Ok(0) 36 40 } 37 41 _ => Err(EINVAL), ··· 49 45 ) 50 46 .writer(); 51 47 52 - writer.write(&tdev.gpu_info)?; 48 + writer.write(&ddev.gpu_info)?; 53 49 54 50 Ok(0) 55 51 }

+12 -6

drivers/gpu/drm/tyr/gem.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 2 3 - use crate::driver::TyrDevice; 4 - use crate::driver::TyrDriver; 5 - use kernel::drm::gem; 6 - use kernel::prelude::*; 3 + use kernel::{ 4 + drm::gem, 5 + prelude::*, // 6 + }; 7 + 8 + use crate::driver::{ 9 + TyrDrmDevice, 10 + TyrDrmDriver, // 11 + }; 7 12 8 13 /// GEM Object inner driver data 9 14 #[pin_data] 10 15 pub(crate) struct TyrObject {} 11 16 12 17 impl gem::DriverObject for TyrObject { 13 - type Driver = TyrDriver; 18 + type Driver = TyrDrmDriver; 19 + type Args = (); 14 20 15 - fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> { 21 + fn new(_dev: &TyrDrmDevice, _size: usize, _args: ()) -> impl PinInit<Self, Error> { 16 22 try_pin_init!(TyrObject {}) 17 23 } 18 24 }

+31 -25

drivers/gpu/drm/tyr/gpu.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 2 3 - use core::ops::Deref; 4 - use core::ops::DerefMut; 5 - use kernel::bits::genmask_u32; 6 - use kernel::device::Bound; 7 - use kernel::device::Device; 8 - use kernel::devres::Devres; 9 - use kernel::io::poll; 10 - use kernel::platform; 11 - use kernel::prelude::*; 12 - use kernel::time::Delta; 13 - use kernel::transmute::AsBytes; 14 - use kernel::uapi; 3 + use core::ops::{ 4 + Deref, 5 + DerefMut, // 6 + }; 7 + use kernel::{ 8 + bits::genmask_u32, 9 + device::{ 10 + Bound, 11 + Device, // 12 + }, 13 + devres::Devres, 14 + io::poll, 15 + platform, 16 + prelude::*, 17 + time::Delta, 18 + transmute::AsBytes, 19 + uapi, // 20 + }; 15 21 16 - use crate::driver::IoMem; 17 - use crate::regs; 22 + use crate::{ 23 + driver::IoMem, 24 + regs, // 25 + }; 18 26 19 27 /// Struct containing information that can be queried by userspace. This is read from 20 28 /// the GPU's registers. ··· 92 84 } 93 85 94 86 pub(crate) fn log(&self, pdev: &platform::Device) { 95 - let major = (self.gpu_id >> 16) & 0xff; 96 - let minor = (self.gpu_id >> 8) & 0xff; 97 - let status = self.gpu_id & 0xff; 87 + let gpu_id = GpuId::from(self.gpu_id); 98 88 99 89 let model_name = if let Some(model) = GPU_MODELS 100 90 .iter() 101 - .find(|&f| f.major == major && f.minor == minor) 91 + .find(|&f| f.arch_major == gpu_id.arch_major && f.prod_major == gpu_id.prod_major) 102 92 { 103 93 model.name 104 94 } else { ··· 108 102 "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", 109 103 model_name, 110 104 self.gpu_id >> 16, 111 - major, 112 - minor, 113 - status 105 + gpu_id.ver_major, 106 + gpu_id.ver_minor, 107 + gpu_id.ver_status 114 108 ); 115 109 116 110 dev_info!( ··· 172 166 173 167 struct GpuModels { 174 168 name: &'static str, 175 - major: u32, 176 - minor: u32, 169 + arch_major: u32, 170 + prod_major: u32, 177 171 } 178 172 179 173 const GPU_MODELS: [GpuModels; 1] = [GpuModels { 180 174 name: "g610", 181 - major: 10, 182 - minor: 7, 175 + arch_major: 10, 176 + prod_major: 7, 183 177 }]; 184 178 185 179 #[allow(dead_code)]

+10 -6

drivers/gpu/drm/tyr/regs.rs

··· 7 7 // does. 8 8 #![allow(dead_code)] 9 9 10 - use kernel::bits::bit_u32; 11 - use kernel::device::Bound; 12 - use kernel::device::Device; 13 - use kernel::devres::Devres; 14 - use kernel::io::Io; 15 - use kernel::prelude::*; 10 + use kernel::{ 11 + bits::bit_u32, 12 + device::{ 13 + Bound, 14 + Device, // 15 + }, 16 + devres::Devres, 17 + io::Io, 18 + prelude::*, // 19 + }; 16 20 17 21 use crate::driver::IoMem; 18 22

+2 -2

drivers/gpu/drm/tyr/tyr.rs

··· 5 5 //! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of 6 6 //! naming their GPUs after Nordic mythological figures and places. 7 7 8 - use crate::driver::TyrDriver; 8 + use crate::driver::TyrPlatformDriverData; 9 9 10 10 mod driver; 11 11 mod file; ··· 14 14 mod regs; 15 15 16 16 kernel::module_platform_driver! { 17 - type: TyrDriver, 17 + type: TyrPlatformDriverData, 18 18 name: "tyr", 19 19 authors: ["The Tyr driver authors"], 20 20 description: "Arm Mali Tyr DRM driver",

+1 -1

drivers/gpu/nova-core/Kconfig

··· 3 3 depends on 64BIT 4 4 depends on PCI 5 5 depends on RUST 6 - select RUST_FW_LOADER_ABSTRACTIONS 7 6 select AUXILIARY_BUS 7 + select RUST_FW_LOADER_ABSTRACTIONS 8 8 default n 9 9 help 10 10 Choose this if you want to build the Nova Core driver for Nvidia

-54

drivers/gpu/nova-core/dma.rs

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - 3 - //! Simple DMA object wrapper. 4 - 5 - use core::ops::{ 6 - Deref, 7 - DerefMut, // 8 - }; 9 - 10 - use kernel::{ 11 - device, 12 - dma::CoherentAllocation, 13 - page::PAGE_SIZE, 14 - prelude::*, // 15 - }; 16 - 17 - pub(crate) struct DmaObject { 18 - dma: CoherentAllocation<u8>, 19 - } 20 - 21 - impl DmaObject { 22 - pub(crate) fn new(dev: &device::Device<device::Bound>, len: usize) -> Result<Self> { 23 - let len = core::alloc::Layout::from_size_align(len, PAGE_SIZE) 24 - .map_err(|_| EINVAL)? 25 - .pad_to_align() 26 - .size(); 27 - let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?; 28 - 29 - Ok(Self { dma }) 30 - } 31 - 32 - pub(crate) fn from_data(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { 33 - Self::new(dev, data.len()).and_then(|mut dma_obj| { 34 - // SAFETY: We have just allocated the DMA memory, we are the only users and 35 - // we haven't made the device aware of the handle yet. 36 - unsafe { dma_obj.write(data, 0)? } 37 - Ok(dma_obj) 38 - }) 39 - } 40 - } 41 - 42 - impl Deref for DmaObject { 43 - type Target = CoherentAllocation<u8>; 44 - 45 - fn deref(&self) -> &Self::Target { 46 - &self.dma 47 - } 48 - } 49 - 50 - impl DerefMut for DmaObject { 51 - fn deref_mut(&mut self) -> &mut Self::Target { 52 - &mut self.dma 53 - } 54 - }

+14 -3

drivers/gpu/nova-core/driver.rs

··· 14 14 }, 15 15 prelude::*, 16 16 sizes::SZ_16M, 17 - sync::Arc, // 17 + sync::{ 18 + atomic::{ 19 + Atomic, 20 + Relaxed, // 21 + }, 22 + Arc, 23 + }, 18 24 }; 19 25 20 26 use crate::gpu::Gpu; 27 + 28 + /// Counter for generating unique auxiliary device IDs. 29 + static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0); 21 30 22 31 #[pin_data] 23 32 pub(crate) struct NovaCore { ··· 79 70 80 71 fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> { 81 72 pin_init::pin_init_scope(move || { 82 - dev_dbg!(pdev.as_ref(), "Probe Nova Core GPU driver.\n"); 73 + dev_dbg!(pdev, "Probe Nova Core GPU driver.\n"); 83 74 84 75 pdev.enable_device_mem()?; 85 76 pdev.set_master(); ··· 99 90 _reg <- auxiliary::Registration::new( 100 91 pdev.as_ref(), 101 92 c"nova-drm", 102 - 0, // TODO[XARR]: Once it lands, use XArray; for now we don't use the ID. 93 + // TODO[XARR]: Use XArray or perhaps IDA for proper ID allocation/recycling. For 94 + // now, use a simple atomic counter that never recycles IDs. 95 + AUXILIARY_ID_COUNTER.fetch_add(1, Relaxed), 103 96 crate::MODULE_NAME 104 97 ), 105 98 }))

+452 -337

drivers/gpu/nova-core/falcon.rs

··· 2 2 3 3 //! Falcon microprocessor base support 4 4 5 - use core::ops::Deref; 6 - 7 5 use hal::FalconHal; 8 6 9 7 use kernel::{ 10 - device, 8 + device::{ 9 + self, 10 + Device, // 11 + }, 11 12 dma::{ 13 + Coherent, 12 14 DmaAddress, 13 15 DmaMask, // 14 16 }, 15 - io::poll::read_poll_timeout, 17 + io::{ 18 + poll::read_poll_timeout, 19 + register::{ 20 + RegisterBase, 21 + WithBase, // 22 + }, 23 + Io, 24 + }, 16 25 prelude::*, 17 26 sync::aref::ARef, 18 - time::{ 19 - Delta, // 20 - }, 27 + time::Delta, 21 28 }; 22 29 23 30 use crate::{ 24 - dma::DmaObject, 31 + bounded_enum, 25 32 driver::Bar0, 26 33 falcon::hal::LoadMethod, 27 34 gpu::Chipset, 28 35 num::{ 29 - FromSafeCast, 30 - IntoSafeCast, // 36 + self, 37 + FromSafeCast, // 31 38 }, 32 39 regs, 33 - regs::macros::RegisterBase, // 34 40 }; 35 41 36 42 pub(crate) mod gsp; 37 43 mod hal; 38 44 pub(crate) mod sec2; 39 45 40 - // TODO[FPRI]: Replace with `ToPrimitive`. 41 - macro_rules! impl_from_enum_to_u8 { 42 - ($enum_type:ty) => { 43 - impl From<$enum_type> for u8 { 44 - fn from(value: $enum_type) -> Self { 45 - value as u8 46 - } 47 - } 48 - }; 49 - } 46 + /// Alignment (in bytes) of falcon memory blocks. 47 + pub(crate) const MEM_BLOCK_ALIGNMENT: usize = 256; 50 48 51 - /// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] 52 - /// register. 53 - #[repr(u8)] 54 - #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 55 - pub(crate) enum FalconCoreRev { 56 - #[default] 57 - Rev1 = 1, 58 - Rev2 = 2, 59 - Rev3 = 3, 60 - Rev4 = 4, 61 - Rev5 = 5, 62 - Rev6 = 6, 63 - Rev7 = 7, 64 - } 65 - impl_from_enum_to_u8!(FalconCoreRev); 66 - 67 - // TODO[FPRI]: replace with `FromPrimitive`. 68 - impl TryFrom<u8> for FalconCoreRev { 69 - type Error = Error; 70 - 71 - fn try_from(value: u8) -> Result<Self> { 72 - use FalconCoreRev::*; 73 - 74 - let rev = match value { 75 - 1 => Rev1, 76 - 2 => Rev2, 77 - 3 => Rev3, 78 - 4 => Rev4, 79 - 5 => Rev5, 80 - 6 => Rev6, 81 - 7 => Rev7, 82 - _ => return Err(EINVAL), 83 - }; 84 - 85 - Ok(rev) 49 + bounded_enum! { 50 + /// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] 51 + /// register. 52 + #[derive(Debug, Copy, Clone)] 53 + pub(crate) enum FalconCoreRev with TryFrom<Bounded<u32, 4>> { 54 + Rev1 = 1, 55 + Rev2 = 2, 56 + Rev3 = 3, 57 + Rev4 = 4, 58 + Rev5 = 5, 59 + Rev6 = 6, 60 + Rev7 = 7, 86 61 } 87 62 } 88 63 89 - /// Revision subversion number of a falcon core, used in the 90 - /// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. 91 - #[repr(u8)] 92 - #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 93 - pub(crate) enum FalconCoreRevSubversion { 94 - #[default] 95 - Subversion0 = 0, 96 - Subversion1 = 1, 97 - Subversion2 = 2, 98 - Subversion3 = 3, 99 - } 100 - impl_from_enum_to_u8!(FalconCoreRevSubversion); 101 - 102 - // TODO[FPRI]: replace with `FromPrimitive`. 103 - impl TryFrom<u8> for FalconCoreRevSubversion { 104 - type Error = Error; 105 - 106 - fn try_from(value: u8) -> Result<Self> { 107 - use FalconCoreRevSubversion::*; 108 - 109 - let sub_version = match value & 0b11 { 110 - 0 => Subversion0, 111 - 1 => Subversion1, 112 - 2 => Subversion2, 113 - 3 => Subversion3, 114 - _ => return Err(EINVAL), 115 - }; 116 - 117 - Ok(sub_version) 64 + bounded_enum! { 65 + /// Revision subversion number of a falcon core, used in the 66 + /// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. 67 + #[derive(Debug, Copy, Clone)] 68 + pub(crate) enum FalconCoreRevSubversion with From<Bounded<u32, 2>> { 69 + Subversion0 = 0, 70 + Subversion1 = 1, 71 + Subversion2 = 2, 72 + Subversion3 = 3, 118 73 } 119 74 } 120 75 121 - /// Security model of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] 122 - /// register. 123 - #[repr(u8)] 124 - #[derive(Debug, Default, Copy, Clone)] 125 - /// Security mode of the Falcon microprocessor. 126 - /// 127 - /// See `falcon.rst` for more details. 128 - pub(crate) enum FalconSecurityModel { 129 - /// Non-Secure: runs unsigned code without privileges. 130 - #[default] 131 - None = 0, 132 - /// Light-Secured (LS): Runs signed code with some privileges. 133 - /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the code's 134 - /// signature. 76 + bounded_enum! { 77 + /// Security mode of the Falcon microprocessor. 135 78 /// 136 - /// Also known as Low-Secure, Privilege Level 2 or PL2. 137 - Light = 2, 138 - /// Heavy-Secured (HS): Runs signed code with full privileges. 139 - /// The code's signature is verified by the Falcon Boot ROM (BROM). 140 - /// 141 - /// Also known as High-Secure, Privilege Level 3 or PL3. 142 - Heavy = 3, 143 - } 144 - impl_from_enum_to_u8!(FalconSecurityModel); 145 - 146 - // TODO[FPRI]: replace with `FromPrimitive`. 147 - impl TryFrom<u8> for FalconSecurityModel { 148 - type Error = Error; 149 - 150 - fn try_from(value: u8) -> Result<Self> { 151 - use FalconSecurityModel::*; 152 - 153 - let sec_model = match value { 154 - 0 => None, 155 - 2 => Light, 156 - 3 => Heavy, 157 - _ => return Err(EINVAL), 158 - }; 159 - 160 - Ok(sec_model) 79 + /// See `falcon.rst` for more details. 80 + #[derive(Debug, Copy, Clone)] 81 + pub(crate) enum FalconSecurityModel with TryFrom<Bounded<u32, 2>> { 82 + /// Non-Secure: runs unsigned code without privileges. 83 + None = 0, 84 + /// Light-Secured (LS): Runs signed code with some privileges. 85 + /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the 86 + /// code's signature. 87 + /// 88 + /// Also known as Low-Secure, Privilege Level 2 or PL2. 89 + Light = 2, 90 + /// Heavy-Secured (HS): Runs signed code with full privileges. 91 + /// The code's signature is verified by the Falcon Boot ROM (BROM). 92 + /// 93 + /// Also known as High-Secure, Privilege Level 3 or PL3. 94 + Heavy = 3, 161 95 } 162 96 } 163 97 164 - /// Signing algorithm for a given firmware, used in the [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] 165 - /// register. It is passed to the Falcon Boot ROM (BROM) as a parameter. 166 - #[repr(u8)] 167 - #[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] 168 - pub(crate) enum FalconModSelAlgo { 169 - /// AES. 170 - #[expect(dead_code)] 171 - Aes = 0, 172 - /// RSA3K. 173 - #[default] 174 - Rsa3k = 1, 175 - } 176 - impl_from_enum_to_u8!(FalconModSelAlgo); 177 - 178 - // TODO[FPRI]: replace with `FromPrimitive`. 179 - impl TryFrom<u8> for FalconModSelAlgo { 180 - type Error = Error; 181 - 182 - fn try_from(value: u8) -> Result<Self> { 183 - match value { 184 - 1 => Ok(FalconModSelAlgo::Rsa3k), 185 - _ => Err(EINVAL), 186 - } 98 + bounded_enum! { 99 + /// Signing algorithm for a given firmware, used in the 100 + /// [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] register. It is passed to the Falcon Boot ROM 101 + /// (BROM) as a parameter. 102 + #[derive(Debug, Copy, Clone)] 103 + pub(crate) enum FalconModSelAlgo with TryFrom<Bounded<u32, 8>> { 104 + /// AES. 105 + Aes = 0, 106 + /// RSA3K. 107 + Rsa3k = 1, 187 108 } 188 109 } 189 110 190 - /// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] register. 191 - #[repr(u8)] 192 - #[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] 193 - pub(crate) enum DmaTrfCmdSize { 194 - /// 256 bytes transfer. 195 - #[default] 196 - Size256B = 0x6, 197 - } 198 - impl_from_enum_to_u8!(DmaTrfCmdSize); 199 - 200 - // TODO[FPRI]: replace with `FromPrimitive`. 201 - impl TryFrom<u8> for DmaTrfCmdSize { 202 - type Error = Error; 203 - 204 - fn try_from(value: u8) -> Result<Self> { 205 - match value { 206 - 0x6 => Ok(Self::Size256B), 207 - _ => Err(EINVAL), 208 - } 111 + bounded_enum! { 112 + /// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] 113 + /// register. 114 + #[derive(Debug, Copy, Clone)] 115 + pub(crate) enum DmaTrfCmdSize with TryFrom<Bounded<u32, 3>> { 116 + /// 256 bytes transfer. 117 + Size256B = 0x6, 209 118 } 210 119 } 211 120 212 - /// Currently active core on a dual falcon/riscv (Peregrine) controller. 213 - #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] 214 - pub(crate) enum PeregrineCoreSelect { 215 - /// Falcon core is active. 216 - #[default] 217 - Falcon = 0, 218 - /// RISC-V core is active. 219 - Riscv = 1, 220 - } 221 - 222 - impl From<bool> for PeregrineCoreSelect { 223 - fn from(value: bool) -> Self { 224 - match value { 225 - false => PeregrineCoreSelect::Falcon, 226 - true => PeregrineCoreSelect::Riscv, 227 - } 228 - } 229 - } 230 - 231 - impl From<PeregrineCoreSelect> for bool { 232 - fn from(value: PeregrineCoreSelect) -> Self { 233 - match value { 234 - PeregrineCoreSelect::Falcon => false, 235 - PeregrineCoreSelect::Riscv => true, 236 - } 121 + bounded_enum! { 122 + /// Currently active core on a dual falcon/riscv (Peregrine) controller. 123 + #[derive(Debug, Copy, Clone, PartialEq, Eq)] 124 + pub(crate) enum PeregrineCoreSelect with From<Bounded<u32, 1>> { 125 + /// Falcon core is active. 126 + Falcon = 0, 127 + /// RISC-V core is active. 128 + Riscv = 1, 237 129 } 238 130 } 239 131 240 132 /// Different types of memory present in a falcon core. 241 - #[derive(Debug, Clone, Copy, PartialEq, Eq)] 133 + #[derive(Debug, Copy, Clone, PartialEq, Eq)] 242 134 pub(crate) enum FalconMem { 243 135 /// Secure Instruction Memory. 244 136 ImemSecure, ··· 141 249 Dmem, 142 250 } 143 251 144 - /// Defines the Framebuffer Interface (FBIF) aperture type. 145 - /// This determines the memory type for external memory access during a DMA transfer, which is 146 - /// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. 147 - #[derive(Debug, Clone, Default)] 148 - pub(crate) enum FalconFbifTarget { 149 - /// VRAM. 150 - #[default] 151 - /// Local Framebuffer (GPU's VRAM memory). 152 - LocalFb = 0, 153 - /// Coherent system memory (System DRAM). 154 - CoherentSysmem = 1, 155 - /// Non-coherent system memory (System DRAM). 156 - NoncoherentSysmem = 2, 157 - } 158 - impl_from_enum_to_u8!(FalconFbifTarget); 159 - 160 - // TODO[FPRI]: replace with `FromPrimitive`. 161 - impl TryFrom<u8> for FalconFbifTarget { 162 - type Error = Error; 163 - 164 - fn try_from(value: u8) -> Result<Self> { 165 - let res = match value { 166 - 0 => Self::LocalFb, 167 - 1 => Self::CoherentSysmem, 168 - 2 => Self::NoncoherentSysmem, 169 - _ => return Err(EINVAL), 170 - }; 171 - 172 - Ok(res) 252 + bounded_enum! { 253 + /// Defines the Framebuffer Interface (FBIF) aperture type. 254 + /// This determines the memory type for external memory access during a DMA transfer, which is 255 + /// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. 256 + #[derive(Debug, Copy, Clone)] 257 + pub(crate) enum FalconFbifTarget with TryFrom<Bounded<u32, 2>> { 258 + /// Local Framebuffer (GPU's VRAM memory). 259 + LocalFb = 0, 260 + /// Coherent system memory (System DRAM). 261 + CoherentSysmem = 1, 262 + /// Non-coherent system memory (System DRAM). 263 + NoncoherentSysmem = 2, 173 264 } 174 265 } 175 266 176 - /// Type of memory addresses to use. 177 - #[derive(Debug, Clone, Default)] 178 - pub(crate) enum FalconFbifMemType { 179 - /// Virtual memory addresses. 180 - #[default] 181 - Virtual = 0, 182 - /// Physical memory addresses. 183 - Physical = 1, 184 - } 185 - 186 - /// Conversion from a single-bit register field. 187 - impl From<bool> for FalconFbifMemType { 188 - fn from(value: bool) -> Self { 189 - match value { 190 - false => Self::Virtual, 191 - true => Self::Physical, 192 - } 193 - } 194 - } 195 - 196 - impl From<FalconFbifMemType> for bool { 197 - fn from(value: FalconFbifMemType) -> Self { 198 - match value { 199 - FalconFbifMemType::Virtual => false, 200 - FalconFbifMemType::Physical => true, 201 - } 267 + bounded_enum! { 268 + /// Type of memory addresses to use. 269 + #[derive(Debug, Copy, Clone)] 270 + pub(crate) enum FalconFbifMemType with From<Bounded<u32, 1>> { 271 + /// Virtual memory addresses. 272 + Virtual = 0, 273 + /// Physical memory addresses. 274 + Physical = 1, 202 275 } 203 276 } 204 277 ··· 175 318 176 319 /// Trait defining the parameters of a given Falcon engine. 177 320 /// 178 - /// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used 179 - /// to identify a given Falcon instance with register I/O methods. 321 + /// Each engine provides one base for `PFALCON` and `PFALCON2` registers. 180 322 pub(crate) trait FalconEngine: 181 323 Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized 182 324 { 183 - /// Singleton of the engine, used to identify it with register I/O methods. 184 - const ID: Self; 185 325 } 186 326 187 - /// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). 327 + /// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM) 328 + /// using DMA. 188 329 #[derive(Debug, Clone)] 189 - pub(crate) struct FalconLoadTarget { 330 + pub(crate) struct FalconDmaLoadTarget { 190 331 /// Offset from the start of the source object to copy from. 191 332 pub(crate) src_start: u32, 192 333 /// Offset from the start of the destination memory to copy into. ··· 204 349 pub(crate) ucode_id: u8, 205 350 } 206 351 207 - /// Trait for providing load parameters of falcon firmwares. 208 - pub(crate) trait FalconLoadParams { 352 + /// Trait implemented by falcon firmwares that can be loaded using DMA. 353 + pub(crate) trait FalconDmaLoadable { 354 + /// Returns the firmware data as a slice of bytes. 355 + fn as_slice(&self) -> &[u8]; 356 + 209 357 /// Returns the load parameters for Secure `IMEM`. 210 - fn imem_sec_load_params(&self) -> FalconLoadTarget; 358 + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget; 211 359 212 360 /// Returns the load parameters for Non-Secure `IMEM`, 213 361 /// used only on Turing and GA100. 214 - fn imem_ns_load_params(&self) -> Option<FalconLoadTarget>; 362 + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget>; 215 363 216 364 /// Returns the load parameters for `DMEM`. 217 - fn dmem_load_params(&self) -> FalconLoadTarget; 365 + fn dmem_load_params(&self) -> FalconDmaLoadTarget; 366 + 367 + /// Returns an adapter that provides the required parameter to load this firmware using PIO. 368 + /// 369 + /// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in 370 + /// the headers are invalid. 371 + fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> { 372 + let new_pio_imem = |params: FalconDmaLoadTarget, secure| { 373 + let start = usize::from_safe_cast(params.src_start); 374 + let end = start + usize::from_safe_cast(params.len); 375 + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; 376 + 377 + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; 378 + 379 + Ok::<_, Error>(FalconPioImemLoadTarget { 380 + data, 381 + dst_start, 382 + secure, 383 + start_tag: dst_start >> 8, 384 + }) 385 + }; 386 + 387 + let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?; 388 + 389 + let imem_ns = if let Some(params) = self.imem_ns_load_params() { 390 + Some(new_pio_imem(params, false)?) 391 + } else { 392 + None 393 + }; 394 + 395 + let dmem = { 396 + let params = self.dmem_load_params(); 397 + let start = usize::from_safe_cast(params.src_start); 398 + let end = start + usize::from_safe_cast(params.len); 399 + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; 400 + 401 + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; 402 + 403 + FalconPioDmemLoadTarget { data, dst_start } 404 + }; 405 + 406 + Ok(FalconDmaFirmwarePioAdapter { 407 + fw: self, 408 + imem_sec, 409 + imem_ns, 410 + dmem, 411 + }) 412 + } 413 + } 414 + 415 + /// Represents a portion of the firmware to be loaded into IMEM using PIO. 416 + #[derive(Clone)] 417 + pub(crate) struct FalconPioImemLoadTarget<'a> { 418 + pub(crate) data: &'a [u8], 419 + pub(crate) dst_start: u16, 420 + pub(crate) secure: bool, 421 + pub(crate) start_tag: u16, 422 + } 423 + 424 + /// Represents a portion of the firmware to be loaded into DMEM using PIO. 425 + #[derive(Clone)] 426 + pub(crate) struct FalconPioDmemLoadTarget<'a> { 427 + pub(crate) data: &'a [u8], 428 + pub(crate) dst_start: u16, 429 + } 430 + 431 + /// Trait for providing PIO load parameters of falcon firmwares. 432 + pub(crate) trait FalconPioLoadable { 433 + /// Returns the load parameters for Secure `IMEM`, if any. 434 + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; 435 + 436 + /// Returns the load parameters for Non-Secure `IMEM`, if any. 437 + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; 438 + 439 + /// Returns the load parameters for `DMEM`. 440 + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>; 441 + } 442 + 443 + /// Adapter type that makes any DMA-loadable firmware also loadable via PIO. 444 + /// 445 + /// Created using [`FalconDmaLoadable::try_as_pio_loadable`]. 446 + pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> { 447 + /// Reference to the DMA firmware. 448 + fw: &'a T, 449 + /// Validated secure IMEM parameters. 450 + imem_sec: FalconPioImemLoadTarget<'a>, 451 + /// Validated non-secure IMEM parameters. 452 + imem_ns: Option<FalconPioImemLoadTarget<'a>>, 453 + /// Validated DMEM parameters. 454 + dmem: FalconPioDmemLoadTarget<'a>, 455 + } 456 + 457 + impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T> 458 + where 459 + T: FalconDmaLoadable + ?Sized, 460 + { 461 + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { 462 + Some(self.imem_sec.clone()) 463 + } 464 + 465 + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { 466 + self.imem_ns.clone() 467 + } 468 + 469 + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { 470 + self.dmem.clone() 471 + } 472 + } 473 + 474 + impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T> 475 + where 476 + T: FalconDmaLoadable + FalconFirmware + ?Sized, 477 + { 478 + type Target = <T as FalconFirmware>::Target; 479 + 480 + fn brom_params(&self) -> FalconBromParams { 481 + self.fw.brom_params() 482 + } 483 + 484 + fn boot_addr(&self) -> u32 { 485 + self.fw.boot_addr() 486 + } 487 + } 488 + 489 + /// Trait for a falcon firmware. 490 + /// 491 + /// A falcon firmware can be loaded on a given engine. 492 + pub(crate) trait FalconFirmware { 493 + /// Engine on which this firmware is to be loaded. 494 + type Target: FalconEngine; 218 495 219 496 /// Returns the parameters to write into the BROM registers. 220 497 fn brom_params(&self) -> FalconBromParams; 221 498 222 499 /// Returns the start address of the firmware. 223 500 fn boot_addr(&self) -> u32; 224 - } 225 - 226 - /// Trait for a falcon firmware. 227 - /// 228 - /// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA 229 - /// object. 230 - pub(crate) trait FalconFirmware: FalconLoadParams + Deref<Target = DmaObject> { 231 - /// Engine on which this firmware is to be loaded. 232 - type Target: FalconEngine; 233 501 } 234 502 235 503 /// Contains the base parameters common to all Falcon instances. ··· 372 394 373 395 /// Resets DMA-related registers. 374 396 pub(crate) fn dma_reset(&self, bar: &Bar0) { 375 - regs::NV_PFALCON_FBIF_CTL::update(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); 376 - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); 397 + bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| { 398 + v.with_allow_phys_no_ctx(true) 399 + }); 400 + 401 + bar.write( 402 + WithBase::of::<E>(), 403 + regs::NV_PFALCON_FALCON_DMACTL::zeroed(), 404 + ); 377 405 } 378 406 379 407 /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete. ··· 388 404 self.hal.select_core(self, bar)?; 389 405 self.hal.reset_wait_mem_scrubbing(bar)?; 390 406 391 - regs::NV_PFALCON_FALCON_RM::default() 392 - .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) 393 - .write(bar, &E::ID); 407 + bar.write( 408 + WithBase::of::<E>(), 409 + regs::NV_PFALCON_FALCON_RM::from(bar.read(regs::NV_PMC_BOOT_0).into_raw()), 410 + ); 411 + 412 + Ok(()) 413 + } 414 + 415 + /// Falcons supports up to four ports, but we only ever use one, so just hard-code it. 416 + const PIO_PORT: usize = 0; 417 + 418 + /// Write a slice to Falcon IMEM memory using programmed I/O (PIO). 419 + /// 420 + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. 421 + fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result { 422 + // Rejecting misaligned images here allows us to avoid checking 423 + // inside the loops. 424 + if load_offsets.data.len() % 4 != 0 { 425 + return Err(EINVAL); 426 + } 427 + 428 + bar.write( 429 + WithBase::of::<E>().at(Self::PIO_PORT), 430 + regs::NV_PFALCON_FALCON_IMEMC::zeroed() 431 + .with_secure(load_offsets.secure) 432 + .with_aincw(true) 433 + .with_offs(load_offsets.dst_start), 434 + ); 435 + 436 + for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() { 437 + let n = u16::try_from(n)?; 438 + let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?; 439 + bar.write( 440 + WithBase::of::<E>().at(Self::PIO_PORT), 441 + regs::NV_PFALCON_FALCON_IMEMT::zeroed().with_tag(tag), 442 + ); 443 + for word in block.chunks_exact(4) { 444 + let w = [word[0], word[1], word[2], word[3]]; 445 + bar.write( 446 + WithBase::of::<E>().at(Self::PIO_PORT), 447 + regs::NV_PFALCON_FALCON_IMEMD::zeroed().with_data(u32::from_le_bytes(w)), 448 + ); 449 + } 450 + } 451 + 452 + Ok(()) 453 + } 454 + 455 + /// Write a slice to Falcon DMEM memory using programmed I/O (PIO). 456 + /// 457 + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. 458 + fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result { 459 + // Rejecting misaligned images here allows us to avoid checking 460 + // inside the loops. 461 + if load_offsets.data.len() % 4 != 0 { 462 + return Err(EINVAL); 463 + } 464 + 465 + bar.write( 466 + WithBase::of::<E>().at(Self::PIO_PORT), 467 + regs::NV_PFALCON_FALCON_DMEMC::zeroed() 468 + .with_aincw(true) 469 + .with_offs(load_offsets.dst_start), 470 + ); 471 + 472 + for word in load_offsets.data.chunks_exact(4) { 473 + let w = [word[0], word[1], word[2], word[3]]; 474 + bar.write( 475 + WithBase::of::<E>().at(Self::PIO_PORT), 476 + regs::NV_PFALCON_FALCON_DMEMD::zeroed().with_data(u32::from_le_bytes(w)), 477 + ); 478 + } 479 + 480 + Ok(()) 481 + } 482 + 483 + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. 484 + pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>( 485 + &self, 486 + bar: &Bar0, 487 + fw: &F, 488 + ) -> Result { 489 + bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| { 490 + v.with_allow_phys_no_ctx(true) 491 + }); 492 + 493 + bar.write( 494 + WithBase::of::<E>(), 495 + regs::NV_PFALCON_FALCON_DMACTL::zeroed(), 496 + ); 497 + 498 + if let Some(imem_ns) = fw.imem_ns_load_params() { 499 + self.pio_wr_imem_slice(bar, imem_ns)?; 500 + } 501 + if let Some(imem_sec) = fw.imem_sec_load_params() { 502 + self.pio_wr_imem_slice(bar, imem_sec)?; 503 + } 504 + self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?; 505 + 506 + self.hal.program_brom(self, bar, &fw.brom_params())?; 507 + 508 + bar.write( 509 + WithBase::of::<E>(), 510 + regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()), 511 + ); 394 512 395 513 Ok(()) 396 514 } ··· 501 415 /// `target_mem`. 502 416 /// 503 417 /// `sec` is set if the loaded firmware is expected to run in secure mode. 504 - fn dma_wr<F: FalconFirmware<Target = E>>( 418 + fn dma_wr( 505 419 &self, 506 420 bar: &Bar0, 507 - fw: &F, 421 + dma_obj: &Coherent<[u8]>, 508 422 target_mem: FalconMem, 509 - load_offsets: FalconLoadTarget, 423 + load_offsets: FalconDmaLoadTarget, 510 424 ) -> Result { 511 - const DMA_LEN: u32 = 256; 425 + const DMA_LEN: u32 = num::usize_into_u32::<{ MEM_BLOCK_ALIGNMENT }>(); 512 426 513 427 // For IMEM, we want to use the start offset as a virtual address tag for each page, since 514 428 // code addresses in the firmware (and the boot vector) are virtual. ··· 516 430 // For DMEM we can fold the start offset into the DMA handle. 517 431 let (src_start, dma_start) = match target_mem { 518 432 FalconMem::ImemSecure | FalconMem::ImemNonSecure => { 519 - (load_offsets.src_start, fw.dma_handle()) 433 + (load_offsets.src_start, dma_obj.dma_handle()) 520 434 } 521 435 FalconMem::Dmem => ( 522 436 0, 523 - fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, 437 + dma_obj.dma_handle() + DmaAddress::from(load_offsets.src_start), 524 438 ), 525 439 }; 526 440 if dma_start % DmaAddress::from(DMA_LEN) > 0 { ··· 552 466 dev_err!(self.dev, "DMA transfer length overflow\n"); 553 467 return Err(EOVERFLOW); 554 468 } 555 - Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => { 469 + Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => { 556 470 dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n"); 557 471 return Err(EINVAL); 558 472 } ··· 561 475 562 476 // Set up the base source DMA address. 563 477 564 - regs::NV_PFALCON_FALCON_DMATRFBASE::default() 565 - // CAST: `as u32` is used on purpose since we do want to strip the upper bits, which 566 - // will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. 567 - .set_base((dma_start >> 8) as u32) 568 - .write(bar, &E::ID); 569 - regs::NV_PFALCON_FALCON_DMATRFBASE1::default() 570 - // CAST: `as u16` is used on purpose since the remaining bits are guaranteed to fit 571 - // within a `u16`. 572 - .set_base((dma_start >> 40) as u16) 573 - .write(bar, &E::ID); 478 + bar.write( 479 + WithBase::of::<E>(), 480 + regs::NV_PFALCON_FALCON_DMATRFBASE::zeroed().with_base( 481 + // CAST: `as u32` is used on purpose since we do want to strip the upper bits, 482 + // which will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. 483 + (dma_start >> 8) as u32, 484 + ), 485 + ); 486 + bar.write( 487 + WithBase::of::<E>(), 488 + regs::NV_PFALCON_FALCON_DMATRFBASE1::zeroed().try_with_base(dma_start >> 40)?, 489 + ); 574 490 575 - let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() 576 - .set_size(DmaTrfCmdSize::Size256B) 491 + let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::zeroed() 492 + .with_size(DmaTrfCmdSize::Size256B) 577 493 .with_falcon_mem(target_mem); 578 494 579 495 for pos in (0..num_transfers).map(|i| i * DMA_LEN) { 580 496 // Perform a transfer of size `DMA_LEN`. 581 - regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() 582 - .set_offs(load_offsets.dst_start + pos) 583 - .write(bar, &E::ID); 584 - regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() 585 - .set_offs(src_start + pos) 586 - .write(bar, &E::ID); 587 - cmd.write(bar, &E::ID); 497 + bar.write( 498 + WithBase::of::<E>(), 499 + regs::NV_PFALCON_FALCON_DMATRFMOFFS::zeroed() 500 + .try_with_offs(load_offsets.dst_start + pos)?, 501 + ); 502 + bar.write( 503 + WithBase::of::<E>(), 504 + regs::NV_PFALCON_FALCON_DMATRFFBOFFS::zeroed().with_offs(src_start + pos), 505 + ); 506 + 507 + bar.write(WithBase::of::<E>(), cmd); 588 508 589 509 // Wait for the transfer to complete. 590 510 // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories 591 511 // should ever take that long. 592 512 read_poll_timeout( 593 - || Ok(regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID)), 513 + || Ok(bar.read(regs::NV_PFALCON_FALCON_DMATRFCMD::of::<E>())), 594 514 |r| r.idle(), 595 515 Delta::ZERO, 596 516 Delta::from_secs(2), ··· 607 515 } 608 516 609 517 /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. 610 - fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { 611 - // The Non-Secure section only exists on firmware used by Turing and GA100, and 612 - // those platforms do not use DMA. 613 - if fw.imem_ns_load_params().is_some() { 614 - debug_assert!(false); 615 - return Err(EINVAL); 616 - } 518 + fn dma_load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( 519 + &self, 520 + dev: &Device<device::Bound>, 521 + bar: &Bar0, 522 + fw: &F, 523 + ) -> Result { 524 + // Create DMA object with firmware content as the source of the DMA engine. 525 + let dma_obj = Coherent::from_slice(dev, fw.as_slice(), GFP_KERNEL)?; 617 526 618 527 self.dma_reset(bar); 619 - regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { 620 - v.set_target(FalconFbifTarget::CoherentSysmem) 621 - .set_mem_type(FalconFbifMemType::Physical) 528 + bar.update(regs::NV_PFALCON_FBIF_TRANSCFG::of::<E>().at(0), |v| { 529 + v.with_target(FalconFbifTarget::CoherentSysmem) 530 + .with_mem_type(FalconFbifMemType::Physical) 622 531 }); 623 532 624 - self.dma_wr(bar, fw, FalconMem::ImemSecure, fw.imem_sec_load_params())?; 625 - self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params())?; 533 + self.dma_wr( 534 + bar, 535 + &dma_obj, 536 + FalconMem::ImemSecure, 537 + fw.imem_sec_load_params(), 538 + )?; 539 + self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?; 626 540 627 541 self.hal.program_brom(self, bar, &fw.brom_params())?; 628 542 629 543 // Set `BootVec` to start of non-secure code. 630 - regs::NV_PFALCON_FALCON_BOOTVEC::default() 631 - .set_value(fw.boot_addr()) 632 - .write(bar, &E::ID); 544 + bar.write( 545 + WithBase::of::<E>(), 546 + regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()), 547 + ); 633 548 634 549 Ok(()) 635 550 } ··· 645 546 pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> { 646 547 // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. 647 548 read_poll_timeout( 648 - || Ok(regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID)), 549 + || Ok(bar.read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>())), 649 550 |r| r.halted(), 650 551 Delta::ZERO, 651 552 Delta::from_secs(2), ··· 656 557 657 558 /// Start the falcon CPU. 658 559 pub(crate) fn start(&self, bar: &Bar0) -> Result<()> { 659 - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { 660 - true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() 661 - .set_startcpu(true) 662 - .write(bar, &E::ID), 663 - false => regs::NV_PFALCON_FALCON_CPUCTL::default() 664 - .set_startcpu(true) 665 - .write(bar, &E::ID), 560 + match bar 561 + .read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>()) 562 + .alias_en() 563 + { 564 + true => bar.write( 565 + WithBase::of::<E>(), 566 + regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::zeroed().with_startcpu(true), 567 + ), 568 + false => bar.write( 569 + WithBase::of::<E>(), 570 + regs::NV_PFALCON_FALCON_CPUCTL::zeroed().with_startcpu(true), 571 + ), 666 572 } 667 573 668 574 Ok(()) ··· 676 572 /// Writes values to the mailbox registers if provided. 677 573 pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option<u32>, mbox1: Option<u32>) { 678 574 if let Some(mbox0) = mbox0 { 679 - regs::NV_PFALCON_FALCON_MAILBOX0::default() 680 - .set_value(mbox0) 681 - .write(bar, &E::ID); 575 + bar.write( 576 + WithBase::of::<E>(), 577 + regs::NV_PFALCON_FALCON_MAILBOX0::zeroed().with_value(mbox0), 578 + ); 682 579 } 683 580 684 581 if let Some(mbox1) = mbox1 { 685 - regs::NV_PFALCON_FALCON_MAILBOX1::default() 686 - .set_value(mbox1) 687 - .write(bar, &E::ID); 582 + bar.write( 583 + WithBase::of::<E>(), 584 + regs::NV_PFALCON_FALCON_MAILBOX1::zeroed().with_value(mbox1), 585 + ); 688 586 } 689 587 } 690 588 691 589 /// Reads the value from `mbox0` register. 692 590 pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 { 693 - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value() 591 + bar.read(regs::NV_PFALCON_FALCON_MAILBOX0::of::<E>()) 592 + .value() 694 593 } 695 594 696 595 /// Reads the value from `mbox1` register. 697 596 pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 { 698 - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value() 597 + bar.read(regs::NV_PFALCON_FALCON_MAILBOX1::of::<E>()) 598 + .value() 699 599 } 700 600 701 601 /// Reads values from both mailbox registers. ··· 748 640 self.hal.is_riscv_active(bar) 749 641 } 750 642 751 - // Load a firmware image into Falcon memory 752 - pub(crate) fn load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { 643 + /// Load a firmware image into Falcon memory, using the preferred method for the current 644 + /// chipset. 645 + pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( 646 + &self, 647 + dev: &Device<device::Bound>, 648 + bar: &Bar0, 649 + fw: &F, 650 + ) -> Result { 753 651 match self.hal.load_method() { 754 - LoadMethod::Dma => self.dma_load(bar, fw), 755 - LoadMethod::Pio => Err(ENOTSUPP), 652 + LoadMethod::Dma => self.dma_load(dev, bar, fw), 653 + LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?), 756 654 } 757 655 } 758 656 759 657 /// Write the application version to the OS register. 760 658 pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { 761 - regs::NV_PFALCON_FALCON_OS::default() 762 - .set_value(app_version) 763 - .write(bar, &E::ID); 659 + bar.write( 660 + WithBase::of::<E>(), 661 + regs::NV_PFALCON_FALCON_OS::zeroed().with_value(app_version), 662 + ); 764 663 } 765 664 }

+15 -12

drivers/gpu/nova-core/falcon/gsp.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 use kernel::{ 4 - io::poll::read_poll_timeout, 4 + io::{ 5 + poll::read_poll_timeout, 6 + register::{ 7 + RegisterBase, 8 + WithBase, // 9 + }, 10 + Io, 11 + }, 5 12 prelude::*, 6 13 time::Delta, // 7 14 }; ··· 21 14 PFalcon2Base, 22 15 PFalconBase, // 23 16 }, 24 - regs::{ 25 - self, 26 - macros::RegisterBase, // 27 - }, 17 + regs, 28 18 }; 29 19 30 20 /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. ··· 35 31 const BASE: usize = 0x00111000; 36 32 } 37 33 38 - impl FalconEngine for Gsp { 39 - const ID: Self = Gsp(()); 40 - } 34 + impl FalconEngine for Gsp {} 41 35 42 36 impl Falcon<Gsp> { 43 37 /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to 44 38 /// allow GSP to signal CPU for processing new messages in message queue. 45 39 pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { 46 - regs::NV_PFALCON_FALCON_IRQSCLR::default() 47 - .set_swgen0(true) 48 - .write(bar, &Gsp::ID); 40 + bar.write( 41 + WithBase::of::<Gsp>(), 42 + regs::NV_PFALCON_FALCON_IRQSCLR::zeroed().with_swgen0(true), 43 + ); 49 44 } 50 45 51 46 /// Checks if GSP reload/resume has completed during the boot process. 52 47 pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result<bool> { 53 48 read_poll_timeout( 54 - || Ok(regs::NV_PGC6_BSI_SECURE_SCRATCH_14::read(bar)), 49 + || Ok(bar.read(regs::NV_PGC6_BSI_SECURE_SCRATCH_14)), 55 50 |val| val.boot_stage_3_handoff(), 56 51 Delta::ZERO, 57 52 timeout,

+5 -1

drivers/gpu/nova-core/falcon/hal.rs

··· 58 58 /// Reset the falcon engine. 59 59 fn reset_eng(&self, bar: &Bar0) -> Result; 60 60 61 - /// returns the method needed to load data into Falcon memory 61 + /// Returns the method used to load data into the falcon's memory. 62 + /// 63 + /// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for 64 + /// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the 65 + /// only usable method. 62 66 fn load_method(&self) -> LoadMethod; 63 67 } 64 68

+43 -27

drivers/gpu/nova-core/falcon/hal/ga102.rs

··· 4 4 5 5 use kernel::{ 6 6 device, 7 - io::poll::read_poll_timeout, 7 + io::{ 8 + poll::read_poll_timeout, 9 + register::{ 10 + Array, 11 + WithBase, // 12 + }, 13 + Io, // 14 + }, 8 15 prelude::*, 9 16 time::Delta, // 10 17 }; ··· 32 25 use super::FalconHal; 33 26 34 27 fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { 35 - let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); 28 + let bcr_ctrl = bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>()); 36 29 if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { 37 - regs::NV_PRISCV_RISCV_BCR_CTRL::default() 38 - .set_core_select(PeregrineCoreSelect::Falcon) 39 - .write(bar, &E::ID); 30 + bar.write( 31 + WithBase::of::<E>(), 32 + regs::NV_PRISCV_RISCV_BCR_CTRL::zeroed().with_core_select(PeregrineCoreSelect::Falcon), 33 + ); 40 34 41 35 // TIMEOUT: falcon core should take less than 10ms to report being enabled. 42 36 read_poll_timeout( 43 - || Ok(regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID)), 37 + || Ok(bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>())), 44 38 |r| r.valid(), 45 39 Delta::ZERO, 46 40 Delta::from_millis(10), ··· 68 60 69 61 // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid 70 62 // at build-time. 71 - let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { 72 - regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() 63 + let reg_fuse_version: u16 = if engine_id_mask & 0x0001 != 0 { 64 + bar.read(regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::at(ucode_idx)) 65 + .data() 73 66 } else if engine_id_mask & 0x0004 != 0 { 74 - regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() 67 + bar.read(regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::at(ucode_idx)) 68 + .data() 75 69 } else if engine_id_mask & 0x0400 != 0 { 76 - regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() 70 + bar.read(regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::at(ucode_idx)) 71 + .data() 77 72 } else { 78 73 dev_err!(dev, "unexpected engine_id_mask {:#x}\n", engine_id_mask); 79 74 return Err(EINVAL); ··· 87 76 } 88 77 89 78 fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result { 90 - regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() 91 - .set_value(params.pkc_data_offset) 92 - .write(bar, &E::ID, 0); 93 - regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() 94 - .set_value(u32::from(params.engine_id_mask)) 95 - .write(bar, &E::ID); 96 - regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() 97 - .set_ucode_id(params.ucode_id) 98 - .write(bar, &E::ID); 99 - regs::NV_PFALCON2_FALCON_MOD_SEL::default() 100 - .set_algo(FalconModSelAlgo::Rsa3k) 101 - .write(bar, &E::ID); 79 + bar.write( 80 + WithBase::of::<E>().at(0), 81 + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::zeroed().with_value(params.pkc_data_offset), 82 + ); 83 + bar.write( 84 + WithBase::of::<E>(), 85 + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::zeroed() 86 + .with_value(u32::from(params.engine_id_mask)), 87 + ); 88 + bar.write( 89 + WithBase::of::<E>(), 90 + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::zeroed().with_ucode_id(params.ucode_id), 91 + ); 92 + bar.write( 93 + WithBase::of::<E>(), 94 + regs::NV_PFALCON2_FALCON_MOD_SEL::zeroed().with_algo(FalconModSelAlgo::Rsa3k), 95 + ); 102 96 103 97 Ok(()) 104 98 } ··· 136 120 } 137 121 138 122 fn is_riscv_active(&self, bar: &Bar0) -> bool { 139 - let cpuctl = regs::NV_PRISCV_RISCV_CPUCTL::read(bar, &E::ID); 140 - cpuctl.active_stat() 123 + bar.read(regs::NV_PRISCV_RISCV_CPUCTL::of::<E>()) 124 + .active_stat() 141 125 } 142 126 143 127 fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { 144 128 // TIMEOUT: memory scrubbing should complete in less than 20ms. 145 129 read_poll_timeout( 146 - || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), 130 + || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())), 147 131 |r| r.mem_scrubbing_done(), 148 132 Delta::ZERO, 149 133 Delta::from_millis(20), ··· 152 136 } 153 137 154 138 fn reset_eng(&self, bar: &Bar0) -> Result { 155 - let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); 139 + let _ = bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>()); 156 140 157 141 // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set 158 142 // RESET_READY so a non-failing timeout is used. 159 143 let _ = read_poll_timeout( 160 - || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), 144 + || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())), 161 145 |r| r.reset_ready(), 162 146 Delta::ZERO, 163 147 Delta::from_micros(150),

+8 -4

drivers/gpu/nova-core/falcon/hal/tu102.rs

··· 3 3 use core::marker::PhantomData; 4 4 5 5 use kernel::{ 6 - io::poll::read_poll_timeout, 6 + io::{ 7 + poll::read_poll_timeout, 8 + register::WithBase, 9 + Io, // 10 + }, 7 11 prelude::*, 8 12 time::Delta, // 9 13 }; ··· 53 49 } 54 50 55 51 fn is_riscv_active(&self, bar: &Bar0) -> bool { 56 - let cpuctl = regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::read(bar, &E::ID); 57 - cpuctl.active_stat() 52 + bar.read(regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::of::<E>()) 53 + .active_stat() 58 54 } 59 55 60 56 fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { 61 57 // TIMEOUT: memory scrubbing should complete in less than 10ms. 62 58 read_poll_timeout( 63 - || Ok(regs::NV_PFALCON_FALCON_DMACTL::read(bar, &E::ID)), 59 + || Ok(bar.read(regs::NV_PFALCON_FALCON_DMACTL::of::<E>())), 64 60 |r| r.mem_scrubbing_done(), 65 61 Delta::ZERO, 66 62 Delta::from_millis(10),

+7 -10

drivers/gpu/nova-core/falcon/sec2.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use crate::{ 4 - falcon::{ 5 - FalconEngine, 6 - PFalcon2Base, 7 - PFalconBase, // 8 - }, 9 - regs::macros::RegisterBase, 3 + use kernel::io::register::RegisterBase; 4 + 5 + use crate::falcon::{ 6 + FalconEngine, 7 + PFalcon2Base, 8 + PFalconBase, // 10 9 }; 11 10 12 11 /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. ··· 19 20 const BASE: usize = 0x00841000; 20 21 } 21 22 22 - impl FalconEngine for Sec2 { 23 - const ID: Self = Sec2(()); 24 - } 23 + impl FalconEngine for Sec2 {}

+80 -21

drivers/gpu/nova-core/fb.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use core::ops::Range; 3 + use core::ops::{ 4 + Deref, 5 + Range, // 6 + }; 4 7 5 8 use kernel::{ 6 9 device, 10 + dma::CoherentHandle, 11 + fmt, 12 + io::Io, 7 13 prelude::*, 8 14 ptr::{ 9 15 Alignable, ··· 20 14 }; 21 15 22 16 use crate::{ 23 - dma::DmaObject, 24 17 driver::Bar0, 25 18 firmware::gsp::GspFirmware, 26 19 gpu::Chipset, ··· 53 48 chipset: Chipset, 54 49 device: ARef<device::Device>, 55 50 /// Keep the page alive as long as we need it. 56 - page: DmaObject, 51 + page: CoherentHandle, 57 52 } 58 53 59 54 impl SysmemFlush { ··· 63 58 bar: &Bar0, 64 59 chipset: Chipset, 65 60 ) -> Result<Self> { 66 - let page = DmaObject::new(dev, kernel::page::PAGE_SIZE)?; 61 + let page = CoherentHandle::alloc(dev, kernel::page::PAGE_SIZE, GFP_KERNEL)?; 67 62 68 63 hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; 69 64 ··· 99 94 } 100 95 } 101 96 97 + pub(crate) struct FbRange(Range<u64>); 98 + 99 + impl FbRange { 100 + pub(crate) fn len(&self) -> u64 { 101 + self.0.end - self.0.start 102 + } 103 + } 104 + 105 + impl From<Range<u64>> for FbRange { 106 + fn from(range: Range<u64>) -> Self { 107 + Self(range) 108 + } 109 + } 110 + 111 + impl Deref for FbRange { 112 + type Target = Range<u64>; 113 + 114 + fn deref(&self) -> &Self::Target { 115 + &self.0 116 + } 117 + } 118 + 119 + impl fmt::Debug for FbRange { 120 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 121 + // Use alternate format ({:#?}) to include size, compact format ({:?}) for just the range. 122 + if f.alternate() { 123 + let size = self.len(); 124 + 125 + if size < usize_as_u64(SZ_1M) { 126 + let size_kib = size / usize_as_u64(SZ_1K); 127 + f.write_fmt(fmt!( 128 + "{:#x}..{:#x} ({} KiB)", 129 + self.0.start, 130 + self.0.end, 131 + size_kib 132 + )) 133 + } else { 134 + let size_mib = size / usize_as_u64(SZ_1M); 135 + f.write_fmt(fmt!( 136 + "{:#x}..{:#x} ({} MiB)", 137 + self.0.start, 138 + self.0.end, 139 + size_mib 140 + )) 141 + } 142 + } else { 143 + f.write_fmt(fmt!("{:#x}..{:#x}", self.0.start, self.0.end)) 144 + } 145 + } 146 + } 147 + 102 148 /// Layout of the GPU framebuffer memory. 103 149 /// 104 150 /// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. 105 151 #[derive(Debug)] 106 152 pub(crate) struct FbLayout { 107 153 /// Range of the framebuffer. Starts at `0`. 108 - pub(crate) fb: Range<u64>, 154 + pub(crate) fb: FbRange, 109 155 /// VGA workspace, small area of reserved memory at the end of the framebuffer. 110 - pub(crate) vga_workspace: Range<u64>, 156 + pub(crate) vga_workspace: FbRange, 111 157 /// FRTS range. 112 - pub(crate) frts: Range<u64>, 158 + pub(crate) frts: FbRange, 113 159 /// Memory area containing the GSP bootloader image. 114 - pub(crate) boot: Range<u64>, 160 + pub(crate) boot: FbRange, 115 161 /// Memory area containing the GSP firmware image. 116 - pub(crate) elf: Range<u64>, 162 + pub(crate) elf: FbRange, 117 163 /// WPR2 heap. 118 - pub(crate) wpr2_heap: Range<u64>, 164 + pub(crate) wpr2_heap: FbRange, 119 165 /// WPR2 region range, starting with an instance of `GspFwWprMeta`. 120 - pub(crate) wpr2: Range<u64>, 121 - pub(crate) heap: Range<u64>, 166 + pub(crate) wpr2: FbRange, 167 + pub(crate) heap: FbRange, 122 168 pub(crate) vf_partition_count: u8, 123 169 } 124 170 ··· 181 125 let fb = { 182 126 let fb_size = hal.vidmem_size(bar); 183 127 184 - 0..fb_size 128 + FbRange(0..fb_size) 185 129 }; 186 130 187 131 let vga_workspace = { ··· 190 134 let base = fb.end - NV_PRAMIN_SIZE; 191 135 192 136 if hal.supports_display(bar) { 193 - match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { 137 + match bar 138 + .read(regs::NV_PDISP_VGA_WORKSPACE_BASE) 139 + .vga_workspace_addr() 140 + { 194 141 Some(addr) => { 195 142 if addr < base { 196 143 const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K); ··· 211 152 } 212 153 }; 213 154 214 - vga_base..fb.end 155 + FbRange(vga_base..fb.end) 215 156 }; 216 157 217 158 let frts = { ··· 219 160 const FRTS_SIZE: u64 = usize_as_u64(SZ_1M); 220 161 let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; 221 162 222 - frts_base..frts_base + FRTS_SIZE 163 + FbRange(frts_base..frts_base + FRTS_SIZE) 223 164 }; 224 165 225 166 let boot = { ··· 227 168 let bootloader_size = u64::from_safe_cast(gsp_fw.bootloader.ucode.size()); 228 169 let bootloader_base = (frts.start - bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN); 229 170 230 - bootloader_base..bootloader_base + bootloader_size 171 + FbRange(bootloader_base..bootloader_base + bootloader_size) 231 172 }; 232 173 233 174 let elf = { ··· 235 176 let elf_size = u64::from_safe_cast(gsp_fw.size); 236 177 let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN); 237 178 238 - elf_addr..elf_addr + elf_size 179 + FbRange(elf_addr..elf_addr + elf_size) 239 180 }; 240 181 241 182 let wpr2_heap = { ··· 244 185 gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end); 245 186 let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN); 246 187 247 - wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN) 188 + FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN)) 248 189 }; 249 190 250 191 let wpr2 = { ··· 252 193 let wpr2_addr = (wpr2_heap.start - u64::from_safe_cast(size_of::<gsp::GspFwWprMeta>())) 253 194 .align_down(WPR2_DOWN_ALIGN); 254 195 255 - wpr2_addr..frts.end 196 + FbRange(wpr2_addr..frts.end) 256 197 }; 257 198 258 199 let heap = { 259 200 const HEAP_SIZE: u64 = usize_as_u64(SZ_1M); 260 201 261 - wpr2.start - HEAP_SIZE..wpr2.start 202 + FbRange(wpr2.start - HEAP_SIZE..wpr2.start) 262 203 }; 263 204 264 205 Ok(Self {

+23 -14

drivers/gpu/nova-core/fb/hal/ga100.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use kernel::prelude::*; 3 + use kernel::{ 4 + io::Io, 5 + num::Bounded, 6 + prelude::*, // 7 + }; 4 8 5 9 use crate::{ 6 10 driver::Bar0, ··· 17 13 struct Ga100; 18 14 19 15 pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { 20 - u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT 21 - | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) 16 + u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT 17 + | u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI).adr_63_40()) 22 18 << FLUSH_SYSMEM_ADDR_SHIFT_HI 23 19 } 24 20 25 21 pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { 26 - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() 27 - // CAST: `as u32` is used on purpose since the remaining bits are guaranteed to fit within 28 - // a `u32`. 29 - .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) 30 - .write(bar); 31 - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() 32 - // CAST: `as u32` is used on purpose since we want to strip the upper bits that have been 33 - // written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. 34 - .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) 35 - .write(bar); 22 + bar.write_reg( 23 + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr_63_40( 24 + Bounded::<u64, _>::from(addr) 25 + .shr::<FLUSH_SYSMEM_ADDR_SHIFT_HI, _>() 26 + .cast(), 27 + ), 28 + ); 29 + 30 + bar.write_reg( 31 + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed() 32 + // CAST: `as u32` is used on purpose since we want to strip the upper bits that have 33 + // been written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. 34 + .with_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32), 35 + ); 36 36 } 37 37 38 38 pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool { 39 - !regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() 39 + !bar.read(regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY) 40 + .display_disabled() 40 41 } 41 42 42 43 /// Shift applied to the sysmem address before it is written into

+5 -2

drivers/gpu/nova-core/fb/hal/ga102.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use kernel::prelude::*; 3 + use kernel::{ 4 + io::Io, 5 + prelude::*, // 6 + }; 4 7 5 8 use crate::{ 6 9 driver::Bar0, ··· 12 9 }; 13 10 14 11 fn vidmem_size_ga102(bar: &Bar0) -> u64 { 15 - regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() 12 + bar.read(regs::NV_USABLE_FB_SIZE_IN_MB).usable_fb_size() 16 13 } 17 14 18 15 struct Ga102;

+10 -7

drivers/gpu/nova-core/fb/hal/tu102.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use kernel::prelude::*; 3 + use kernel::{ 4 + io::Io, 5 + prelude::*, // 6 + }; 4 7 5 8 use crate::{ 6 9 driver::Bar0, ··· 16 13 pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; 17 14 18 15 pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { 19 - u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT 16 + u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT 20 17 } 21 18 22 19 pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { ··· 24 21 u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT) 25 22 .map_err(|_| EINVAL) 26 23 .map(|addr| { 27 - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() 28 - .set_adr_39_08(addr) 29 - .write(bar) 24 + bar.write_reg(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed().with_adr_39_08(addr)) 30 25 }) 31 26 } 32 27 33 28 pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { 34 - !regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() 29 + !bar.read(regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY) 30 + .display_disabled() 35 31 } 36 32 37 33 pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 { 38 - regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE::read(bar).usable_fb_size() 34 + bar.read(regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE) 35 + .usable_fb_size() 39 36 } 40 37 41 38 struct Tu102;

+146 -48

drivers/gpu/nova-core/firmware.rs

··· 15 15 }; 16 16 17 17 use crate::{ 18 - dma::DmaObject, 19 18 falcon::{ 20 - FalconFirmware, 21 - FalconLoadTarget, // 19 + FalconDmaLoadTarget, 20 + FalconFirmware, // 22 21 }, 23 22 gpu, 24 23 num::{ ··· 63 64 pub(crate) interface_offset: u32, 64 65 /// Base address at which to load the code segment into 'IMEM'. 65 66 pub(crate) imem_phys_base: u32, 66 - /// Size in bytes of the code to copy into 'IMEM'. 67 + /// Size in bytes of the code to copy into 'IMEM' (includes both secure and non-secure 68 + /// segments). 67 69 pub(crate) imem_load_size: u32, 68 70 /// Virtual 'IMEM' address (i.e. 'tag') at which the code should start. 69 71 pub(crate) imem_virt_base: u32, ··· 171 171 ((hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT).into_safe_cast() 172 172 } 173 173 174 - fn imem_sec_load_params(&self) -> FalconLoadTarget; 175 - fn imem_ns_load_params(&self) -> Option<FalconLoadTarget>; 176 - fn dmem_load_params(&self) -> FalconLoadTarget; 174 + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget; 175 + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget>; 176 + fn dmem_load_params(&self) -> FalconDmaLoadTarget; 177 177 } 178 178 179 179 impl FalconUCodeDescriptor for FalconUCodeDescV2 { ··· 205 205 0 206 206 } 207 207 208 - fn imem_sec_load_params(&self) -> FalconLoadTarget { 209 - FalconLoadTarget { 210 - src_start: 0, 211 - dst_start: self.imem_sec_base, 208 + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { 209 + // `imem_sec_base` is the *virtual* start address of the secure IMEM segment, so subtract 210 + // `imem_virt_base` to get its physical offset. 211 + let imem_sec_start = self.imem_sec_base.saturating_sub(self.imem_virt_base); 212 + 213 + FalconDmaLoadTarget { 214 + src_start: imem_sec_start, 215 + dst_start: self.imem_phys_base.saturating_add(imem_sec_start), 212 216 len: self.imem_sec_size, 213 217 } 214 218 } 215 219 216 - fn imem_ns_load_params(&self) -> Option<FalconLoadTarget> { 217 - Some(FalconLoadTarget { 220 + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { 221 + Some(FalconDmaLoadTarget { 222 + // Non-secure code always starts at offset 0. 218 223 src_start: 0, 219 224 dst_start: self.imem_phys_base, 220 - len: self.imem_load_size.checked_sub(self.imem_sec_size)?, 225 + // `imem_load_size` includes the size of the secure segment, so subtract it to 226 + // get the correct amount of data to copy. 227 + len: self.imem_load_size.saturating_sub(self.imem_sec_size), 221 228 }) 222 229 } 223 230 224 - fn dmem_load_params(&self) -> FalconLoadTarget { 225 - FalconLoadTarget { 231 + fn dmem_load_params(&self) -> FalconDmaLoadTarget { 232 + FalconDmaLoadTarget { 226 233 src_start: self.dmem_offset, 227 234 dst_start: self.dmem_phys_base, 228 235 len: self.dmem_load_size, ··· 266 259 self.signature_versions 267 260 } 268 261 269 - fn imem_sec_load_params(&self) -> FalconLoadTarget { 270 - FalconLoadTarget { 262 + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { 263 + FalconDmaLoadTarget { 264 + // IMEM segment always starts at offset 0. 271 265 src_start: 0, 272 266 dst_start: self.imem_phys_base, 273 267 len: self.imem_load_size, 274 268 } 275 269 } 276 270 277 - fn imem_ns_load_params(&self) -> Option<FalconLoadTarget> { 271 + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { 278 272 // Not used on V3 platforms 279 273 None 280 274 } 281 275 282 - fn dmem_load_params(&self) -> FalconLoadTarget { 283 - FalconLoadTarget { 276 + fn dmem_load_params(&self) -> FalconDmaLoadTarget { 277 + FalconDmaLoadTarget { 278 + // DMEM segment starts right after the IMEM one. 284 279 src_start: self.imem_load_size, 285 280 dst_start: self.dmem_phys_base, 286 281 len: self.dmem_load_size, ··· 301 292 struct Signed; 302 293 impl SignedState for Signed {} 303 294 304 - /// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. 295 + /// Microcode to be loaded into a specific falcon. 305 296 /// 306 297 /// This is module-local and meant for sub-modules to use internally. 307 298 /// ··· 309 300 /// before it can be loaded (with an exception for development hardware). The 310 301 /// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the 311 302 /// firmware to its [`Signed`] state. 312 - struct FirmwareDmaObject<F: FalconFirmware, S: SignedState>(DmaObject, PhantomData<(F, S)>); 303 + // TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports 304 + // temporary CPU-exclusive access to the object without unsafe methods. 305 + struct FirmwareObject<F: FalconFirmware, S: SignedState>(KVVec<u8>, PhantomData<(F, S)>); 313 306 314 307 /// Trait for signatures to be patched directly into a given firmware. 315 308 /// 316 309 /// This is module-local and meant for sub-modules to use internally. 317 310 trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {} 318 311 319 - impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> { 320 - /// Patches the firmware at offset `sig_base_img` with `signature`. 312 + impl<F: FalconFirmware> FirmwareObject<F, Unsigned> { 313 + /// Patches the firmware at offset `signature_start` with `signature`. 321 314 fn patch_signature<S: FirmwareSignature<F>>( 322 315 mut self, 323 316 signature: &S, 324 - sig_base_img: usize, 325 - ) -> Result<FirmwareDmaObject<F, Signed>> { 317 + signature_start: usize, 318 + ) -> Result<FirmwareObject<F, Signed>> { 326 319 let signature_bytes = signature.as_ref(); 327 - if sig_base_img + signature_bytes.len() > self.0.size() { 328 - return Err(EINVAL); 329 - } 320 + let signature_end = signature_start 321 + .checked_add(signature_bytes.len()) 322 + .ok_or(EOVERFLOW)?; 323 + let dst = self 324 + .0 325 + .get_mut(signature_start..signature_end) 326 + .ok_or(EINVAL)?; 330 327 331 - // SAFETY: We are the only user of this object, so there cannot be any race. 332 - let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; 328 + // PANIC: `dst` and `signature_bytes` have the same length. 329 + dst.copy_from_slice(signature_bytes); 333 330 334 - // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. 335 - unsafe { 336 - core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) 337 - }; 338 - 339 - Ok(FirmwareDmaObject(self.0, PhantomData)) 331 + Ok(FirmwareObject(self.0, PhantomData)) 340 332 } 341 333 342 334 /// Mark the firmware as signed without patching it. ··· 345 335 /// This method is used to explicitly confirm that we do not need to sign the firmware, while 346 336 /// allowing us to continue as if it was. This is typically only needed for development 347 337 /// hardware. 348 - fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { 349 - FirmwareDmaObject(self.0, PhantomData) 338 + fn no_patch_signature(self) -> FirmwareObject<F, Signed> { 339 + FirmwareObject(self.0, PhantomData) 350 340 } 351 341 } 352 342 ··· 404 394 fn data(&self) -> Option<&[u8]> { 405 395 let fw_start = usize::from_safe_cast(self.hdr.data_offset); 406 396 let fw_size = usize::from_safe_cast(self.hdr.data_size); 397 + let fw_end = fw_start.checked_add(fw_size)?; 407 398 408 - self.fw.get(fw_start..fw_start + fw_size) 399 + self.fw.get(fw_start..fw_end) 409 400 } 410 401 } 411 402 ··· 427 416 ) 428 417 } 429 418 430 - const fn make_entry_chipset(self, chipset: &str) -> Self { 431 - self.make_entry_file(chipset, "booter_load") 432 - .make_entry_file(chipset, "booter_unload") 433 - .make_entry_file(chipset, "bootloader") 434 - .make_entry_file(chipset, "gsp") 419 + const fn make_entry_chipset(self, chipset: gpu::Chipset) -> Self { 420 + let name = chipset.name(); 421 + 422 + let this = self 423 + .make_entry_file(name, "booter_load") 424 + .make_entry_file(name, "booter_unload") 425 + .make_entry_file(name, "bootloader") 426 + .make_entry_file(name, "gsp"); 427 + 428 + if chipset.needs_fwsec_bootloader() { 429 + this.make_entry_file(name, "gen_bootloader") 430 + } else { 431 + this 432 + } 435 433 } 436 434 437 435 pub(crate) const fn create( 438 - module_name: &'static kernel::str::CStr, 436 + module_name: &'static core::ffi::CStr, 439 437 ) -> firmware::ModInfoBuilder<N> { 440 438 let mut this = Self(firmware::ModInfoBuilder::new(module_name)); 441 439 let mut i = 0; 442 440 443 441 while i < gpu::Chipset::ALL.len() { 444 - this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); 442 + this = this.make_entry_chipset(gpu::Chipset::ALL[i]); 445 443 i += 1; 446 444 } 447 445 448 446 this.0 447 + } 448 + } 449 + 450 + /// Ad-hoc and temporary module to extract sections from ELF images. 451 + /// 452 + /// Some firmware images are currently packaged as ELF files, where sections names are used as keys 453 + /// to specific and related bits of data. Future firmware versions are scheduled to move away from 454 + /// that scheme before nova-core becomes stable, which means this module will eventually be 455 + /// removed. 456 + mod elf { 457 + use core::mem::size_of; 458 + 459 + use kernel::{ 460 + bindings, 461 + str::CStr, 462 + transmute::FromBytes, // 463 + }; 464 + 465 + /// Newtype to provide a [`FromBytes`] implementation. 466 + #[repr(transparent)] 467 + struct Elf64Hdr(bindings::elf64_hdr); 468 + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. 469 + unsafe impl FromBytes for Elf64Hdr {} 470 + 471 + #[repr(transparent)] 472 + struct Elf64SHdr(bindings::elf64_shdr); 473 + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. 474 + unsafe impl FromBytes for Elf64SHdr {} 475 + 476 + /// Returns a NULL-terminated string from the ELF image at `offset`. 477 + fn elf_str(elf: &[u8], offset: u64) -> Option<&str> { 478 + let idx = usize::try_from(offset).ok()?; 479 + let bytes = elf.get(idx..)?; 480 + CStr::from_bytes_until_nul(bytes).ok()?.to_str().ok() 481 + } 482 + 483 + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. 484 + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { 485 + let hdr = &elf 486 + .get(0..size_of::<bindings::elf64_hdr>()) 487 + .and_then(Elf64Hdr::from_bytes)? 488 + .0; 489 + 490 + // Get all the section headers. 491 + let mut shdr = { 492 + let shdr_num = usize::from(hdr.e_shnum); 493 + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; 494 + let shdr_end = shdr_num 495 + .checked_mul(size_of::<Elf64SHdr>()) 496 + .and_then(|v| v.checked_add(shdr_start))?; 497 + 498 + elf.get(shdr_start..shdr_end) 499 + .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? 500 + }; 501 + 502 + // Get the strings table. 503 + let strhdr = shdr 504 + .clone() 505 + .nth(usize::from(hdr.e_shstrndx)) 506 + .and_then(Elf64SHdr::from_bytes)?; 507 + 508 + // Find the section which name matches `name` and return it. 509 + shdr.find_map(|sh| { 510 + let hdr = Elf64SHdr::from_bytes(sh)?; 511 + let name_offset = strhdr.0.sh_offset.checked_add(u64::from(hdr.0.sh_name))?; 512 + let section_name = elf_str(elf, name_offset)?; 513 + 514 + if section_name != name { 515 + return None; 516 + } 517 + 518 + let start = usize::try_from(hdr.0.sh_offset).ok()?; 519 + let end = usize::try_from(hdr.0.sh_size) 520 + .ok() 521 + .and_then(|sh_size| start.checked_add(sh_size))?; 522 + 523 + elf.get(start..end) 524 + }) 449 525 } 450 526 }

+45 -42

drivers/gpu/nova-core/firmware/booter.rs

··· 4 4 //! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon 5 5 //! (and optionally unload it through a separate firmware image). 6 6 7 - use core::{ 8 - marker::PhantomData, 9 - ops::Deref, // 10 - }; 7 + use core::marker::PhantomData; 11 8 12 9 use kernel::{ 13 10 device, ··· 13 16 }; 14 17 15 18 use crate::{ 16 - dma::DmaObject, 17 19 driver::Bar0, 18 20 falcon::{ 19 21 sec2::Sec2, 20 22 Falcon, 21 23 FalconBromParams, 22 - FalconFirmware, 23 - FalconLoadParams, 24 - FalconLoadTarget, // 24 + FalconDmaLoadTarget, 25 + FalconDmaLoadable, 26 + FalconFirmware, // 25 27 }, 26 28 firmware::{ 27 29 BinFirmware, 28 - FirmwareDmaObject, 30 + FirmwareObject, 29 31 FirmwareSignature, 30 32 Signed, 31 33 Unsigned, // ··· 39 43 /// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at 40 44 /// `offset` in `slice`. 41 45 fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> { 46 + let end = offset.checked_add(size_of::<S>()).ok_or(EINVAL)?; 42 47 slice 43 - .get(offset..offset + size_of::<S>()) 48 + .get(offset..end) 44 49 .and_then(S::from_bytes_copy) 45 50 .ok_or(EINVAL) 46 51 } ··· 116 119 Some(sig_size) => { 117 120 let patch_sig = 118 121 frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset.into_safe_cast())?; 119 - let signatures_start = usize::from_safe_cast(self.hdr.sig_prod_offset + patch_sig); 122 + 123 + let signatures_start = self 124 + .hdr 125 + .sig_prod_offset 126 + .checked_add(patch_sig) 127 + .map(usize::from_safe_cast) 128 + .ok_or(EINVAL)?; 129 + 130 + let signatures_end = signatures_start 131 + .checked_add(usize::from_safe_cast(self.hdr.sig_prod_size)) 132 + .ok_or(EINVAL)?; 120 133 121 134 self.fw 122 135 // Get signatures range. 123 - .get( 124 - signatures_start 125 - ..signatures_start + usize::from_safe_cast(self.hdr.sig_prod_size), 126 - ) 136 + .get(signatures_start..signatures_end) 127 137 .ok_or(EINVAL)? 128 138 .chunks_exact(sig_size.into_safe_cast()) 129 139 } ··· 256 252 /// The `Booter` loader firmware, responsible for loading the GSP. 257 253 pub(crate) struct BooterFirmware { 258 254 // Load parameters for Secure `IMEM` falcon memory. 259 - imem_sec_load_target: FalconLoadTarget, 255 + imem_sec_load_target: FalconDmaLoadTarget, 260 256 // Load parameters for Non-Secure `IMEM` falcon memory, 261 257 // used only on Turing and GA100 262 - imem_ns_load_target: Option<FalconLoadTarget>, 258 + imem_ns_load_target: Option<FalconDmaLoadTarget>, 263 259 // Load parameters for `DMEM` falcon memory. 264 - dmem_load_target: FalconLoadTarget, 260 + dmem_load_target: FalconDmaLoadTarget, 265 261 // BROM falcon parameters. 266 262 brom_params: FalconBromParams, 267 263 // Device-mapped firmware image. 268 - ucode: FirmwareDmaObject<Self, Signed>, 264 + ucode: FirmwareObject<Self, Signed>, 269 265 } 270 266 271 - impl FirmwareDmaObject<BooterFirmware, Unsigned> { 272 - fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { 273 - DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) 267 + impl FirmwareObject<BooterFirmware, Unsigned> { 268 + fn new_booter(data: &[u8]) -> Result<Self> { 269 + let mut ucode = KVVec::new(); 270 + ucode.extend_from_slice(data, GFP_KERNEL)?; 271 + 272 + Ok(Self(ucode, PhantomData)) 274 273 } 275 274 } 276 275 ··· 327 320 let ucode = bin_fw 328 321 .data() 329 322 .ok_or(EINVAL) 330 - .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?; 323 + .and_then(FirmwareObject::<Self, _>::new_booter)?; 331 324 332 325 let ucode_signed = { 333 326 let mut signatures = hs_fw.signatures_iter()?.peekable(); ··· 370 363 let (imem_sec_dst_start, imem_ns_load_target) = if chipset <= Chipset::GA100 { 371 364 ( 372 365 app0.offset, 373 - Some(FalconLoadTarget { 366 + Some(FalconDmaLoadTarget { 374 367 src_start: 0, 375 368 dst_start: load_hdr.os_code_offset, 376 369 len: load_hdr.os_code_size, ··· 381 374 }; 382 375 383 376 Ok(Self { 384 - imem_sec_load_target: FalconLoadTarget { 377 + imem_sec_load_target: FalconDmaLoadTarget { 385 378 src_start: app0.offset, 386 379 dst_start: imem_sec_dst_start, 387 380 len: app0.len, 388 381 }, 389 382 imem_ns_load_target, 390 - dmem_load_target: FalconLoadTarget { 383 + dmem_load_target: FalconDmaLoadTarget { 391 384 src_start: load_hdr.os_data_offset, 392 385 dst_start: 0, 393 386 len: load_hdr.os_data_size, ··· 398 391 } 399 392 } 400 393 401 - impl FalconLoadParams for BooterFirmware { 402 - fn imem_sec_load_params(&self) -> FalconLoadTarget { 394 + impl FalconDmaLoadable for BooterFirmware { 395 + fn as_slice(&self) -> &[u8] { 396 + self.ucode.0.as_slice() 397 + } 398 + 399 + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { 403 400 self.imem_sec_load_target.clone() 404 401 } 405 402 406 - fn imem_ns_load_params(&self) -> Option<FalconLoadTarget> { 403 + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { 407 404 self.imem_ns_load_target.clone() 408 405 } 409 406 410 - fn dmem_load_params(&self) -> FalconLoadTarget { 407 + fn dmem_load_params(&self) -> FalconDmaLoadTarget { 411 408 self.dmem_load_target.clone() 412 409 } 410 + } 411 + 412 + impl FalconFirmware for BooterFirmware { 413 + type Target = Sec2; 413 414 414 415 fn brom_params(&self) -> FalconBromParams { 415 416 self.brom_params.clone() ··· 430 415 self.imem_sec_load_target.src_start 431 416 } 432 417 } 433 - } 434 - 435 - impl Deref for BooterFirmware { 436 - type Target = DmaObject; 437 - 438 - fn deref(&self) -> &Self::Target { 439 - &self.ucode.0 440 - } 441 - } 442 - 443 - impl FalconFirmware for BooterFirmware { 444 - type Target = Sec2; 445 418 }

+82 -99

drivers/gpu/nova-core/firmware/fwsec.rs

··· 10 10 //! - The command to be run, as this firmware can perform several tasks ; 11 11 //! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. 12 12 13 - use core::{ 14 - marker::PhantomData, 15 - ops::Deref, // 16 - }; 13 + pub(crate) mod bootloader; 14 + 15 + use core::marker::PhantomData; 17 16 18 17 use kernel::{ 19 18 device::{ ··· 27 28 }; 28 29 29 30 use crate::{ 30 - dma::DmaObject, 31 31 driver::Bar0, 32 32 falcon::{ 33 33 gsp::Gsp, 34 34 Falcon, 35 35 FalconBromParams, 36 - FalconFirmware, 37 - FalconLoadParams, 38 - FalconLoadTarget, // 36 + FalconDmaLoadTarget, 37 + FalconDmaLoadable, 38 + FalconFirmware, // 39 39 }, 40 40 firmware::{ 41 41 FalconUCodeDesc, 42 - FirmwareDmaObject, 42 + FirmwareObject, 43 43 FirmwareSignature, 44 44 Signed, 45 45 Unsigned, // 46 46 }, 47 - num::{ 48 - FromSafeCast, 49 - IntoSafeCast, // 50 - }, 47 + num::FromSafeCast, 51 48 vbios::Vbios, 52 49 }; 53 50 ··· 172 177 173 178 impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {} 174 179 175 - /// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement 176 - /// [`FromBytes`]) and return a reference to it. 177 - /// 178 - /// # Safety 179 - /// 180 - /// * Callers must ensure that the device does not read/write to/from memory while the returned 181 - /// reference is live. 182 - /// * Callers must ensure that this call does not race with a write to the same region while 183 - /// the returned reference is live. 184 - unsafe fn transmute<T: Sized + FromBytes>(fw: &DmaObject, offset: usize) -> Result<&T> { 185 - // SAFETY: The safety requirements of the function guarantee the device won't read 186 - // or write to memory while the reference is alive and that this call won't race 187 - // with writes to the same memory region. 188 - T::from_bytes(unsafe { fw.as_slice(offset, size_of::<T>())? }).ok_or(EINVAL) 189 - } 190 - 191 - /// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must 192 - /// implement [`FromBytes`]) and return a reference to it. 193 - /// 194 - /// # Safety 195 - /// 196 - /// * Callers must ensure that the device does not read/write to/from memory while the returned 197 - /// slice is live. 198 - /// * Callers must ensure that this call does not race with a read or write to the same region 199 - /// while the returned slice is live. 200 - unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>( 201 - fw: &mut DmaObject, 202 - offset: usize, 203 - ) -> Result<&mut T> { 204 - // SAFETY: The safety requirements of the function guarantee the device won't read 205 - // or write to memory while the reference is alive and that this call won't race 206 - // with writes or reads to the same memory region. 207 - T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL) 208 - } 209 - 210 180 /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. 211 181 /// 212 182 /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. 213 183 pub(crate) struct FwsecFirmware { 214 184 /// Descriptor of the firmware. 215 185 desc: FalconUCodeDesc, 216 - /// GPU-accessible DMA object containing the firmware. 217 - ucode: FirmwareDmaObject<Self, Signed>, 186 + /// Object containing the firmware binary. 187 + ucode: FirmwareObject<Self, Signed>, 218 188 } 219 189 220 - impl FalconLoadParams for FwsecFirmware { 221 - fn imem_sec_load_params(&self) -> FalconLoadTarget { 190 + impl FalconDmaLoadable for FwsecFirmware { 191 + fn as_slice(&self) -> &[u8] { 192 + self.ucode.0.as_slice() 193 + } 194 + 195 + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { 222 196 self.desc.imem_sec_load_params() 223 197 } 224 198 225 - fn imem_ns_load_params(&self) -> Option<FalconLoadTarget> { 199 + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { 226 200 self.desc.imem_ns_load_params() 227 201 } 228 202 229 - fn dmem_load_params(&self) -> FalconLoadTarget { 203 + fn dmem_load_params(&self) -> FalconDmaLoadTarget { 230 204 self.desc.dmem_load_params() 231 205 } 206 + } 207 + 208 + impl FalconFirmware for FwsecFirmware { 209 + type Target = Gsp; 232 210 233 211 fn brom_params(&self) -> FalconBromParams { 234 212 FalconBromParams { ··· 216 248 } 217 249 } 218 250 219 - impl Deref for FwsecFirmware { 220 - type Target = DmaObject; 221 - 222 - fn deref(&self) -> &Self::Target { 223 - &self.ucode.0 224 - } 225 - } 226 - 227 - impl FalconFirmware for FwsecFirmware { 228 - type Target = Gsp; 229 - } 230 - 231 - impl FirmwareDmaObject<FwsecFirmware, Unsigned> { 232 - fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { 251 + impl FirmwareObject<FwsecFirmware, Unsigned> { 252 + fn new_fwsec(bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { 233 253 let desc = bios.fwsec_image().header()?; 234 - let ucode = bios.fwsec_image().ucode(&desc)?; 235 - let mut dma_object = DmaObject::from_data(dev, ucode)?; 254 + let mut ucode = KVVec::new(); 255 + ucode.extend_from_slice(bios.fwsec_image().ucode(&desc)?, GFP_KERNEL)?; 236 256 237 - let hdr_offset = usize::from_safe_cast(desc.imem_load_size() + desc.interface_offset()); 238 - // SAFETY: we have exclusive access to `dma_object`. 239 - let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; 257 + let hdr_offset = desc 258 + .imem_load_size() 259 + .checked_add(desc.interface_offset()) 260 + .map(usize::from_safe_cast) 261 + .ok_or(EINVAL)?; 262 + 263 + let hdr = ucode 264 + .get(hdr_offset..) 265 + .and_then(FalconAppifHdrV1::from_bytes_prefix) 266 + .ok_or(EINVAL)? 267 + .0; 240 268 241 269 if hdr.version != 1 { 242 270 return Err(EINVAL); ··· 240 276 241 277 // Find the DMEM mapper section in the firmware. 242 278 for i in 0..usize::from(hdr.entry_count) { 243 - // SAFETY: we have exclusive access to `dma_object`. 244 - let app: &FalconAppifV1 = unsafe { 245 - transmute( 246 - &dma_object, 247 - hdr_offset + usize::from(hdr.header_size) + i * usize::from(hdr.entry_size), 248 - ) 249 - }?; 279 + // CALC: hdr_offset + header_size + i * entry_size. 280 + let entry_offset = hdr_offset 281 + .checked_add(usize::from(hdr.header_size)) 282 + .and_then(|o| o.checked_add(i.checked_mul(usize::from(hdr.entry_size))?)) 283 + .ok_or(EINVAL)?; 284 + 285 + let app = ucode 286 + .get(entry_offset..) 287 + .and_then(FalconAppifV1::from_bytes_prefix) 288 + .ok_or(EINVAL)? 289 + .0; 250 290 251 291 if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { 252 292 continue; 253 293 } 254 294 let dmem_base = app.dmem_base; 255 295 256 - // SAFETY: we have exclusive access to `dma_object`. 257 - let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { 258 - transmute_mut( 259 - &mut dma_object, 260 - (desc.imem_load_size() + dmem_base).into_safe_cast(), 261 - ) 262 - }?; 296 + let dmem_mapper_offset = desc 297 + .imem_load_size() 298 + .checked_add(dmem_base) 299 + .map(usize::from_safe_cast) 300 + .ok_or(EINVAL)?; 301 + 302 + let dmem_mapper = ucode 303 + .get_mut(dmem_mapper_offset..) 304 + .and_then(FalconAppifDmemmapperV3::from_bytes_mut_prefix) 305 + .ok_or(EINVAL)? 306 + .0; 263 307 264 308 dmem_mapper.init_cmd = match cmd { 265 309 FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, ··· 275 303 }; 276 304 let cmd_in_buffer_offset = dmem_mapper.cmd_in_buffer_offset; 277 305 278 - // SAFETY: we have exclusive access to `dma_object`. 279 - let frts_cmd: &mut FrtsCmd = unsafe { 280 - transmute_mut( 281 - &mut dma_object, 282 - (desc.imem_load_size() + cmd_in_buffer_offset).into_safe_cast(), 283 - ) 284 - }?; 306 + let frts_cmd_offset = desc 307 + .imem_load_size() 308 + .checked_add(cmd_in_buffer_offset) 309 + .map(usize::from_safe_cast) 310 + .ok_or(EINVAL)?; 311 + 312 + let frts_cmd = ucode 313 + .get_mut(frts_cmd_offset..) 314 + .and_then(FrtsCmd::from_bytes_mut_prefix) 315 + .ok_or(EINVAL)? 316 + .0; 285 317 286 318 frts_cmd.read_vbios = ReadVbios { 287 319 ver: 1, ··· 309 333 } 310 334 311 335 // Return early as we found and patched the DMEMMAPPER region. 312 - return Ok(Self(dma_object, PhantomData)); 336 + return Ok(Self(ucode, PhantomData)); 313 337 } 314 338 315 339 Err(ENOTSUPP) ··· 326 350 bios: &Vbios, 327 351 cmd: FwsecCommand, 328 352 ) -> Result<Self> { 329 - let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?; 353 + let ucode_dma = FirmwareObject::<Self, _>::new_fwsec(bios, cmd)?; 330 354 331 355 // Patch signature if needed. 332 356 let desc = bios.fwsec_image().header()?; 333 357 let ucode_signed = if desc.signature_count() != 0 { 334 - let sig_base_img = 335 - usize::from_safe_cast(desc.imem_load_size() + desc.pkc_data_offset()); 358 + let sig_base_img = desc 359 + .imem_load_size() 360 + .checked_add(desc.pkc_data_offset()) 361 + .map(usize::from_safe_cast) 362 + .ok_or(EINVAL)?; 336 363 let desc_sig_versions = u32::from(desc.signature_versions()); 337 364 let reg_fuse_version = 338 365 falcon.signature_reg_fuse_version(bar, desc.engine_id_mask(), desc.ucode_id())?; ··· 387 408 } 388 409 389 410 /// Loads the FWSEC firmware into `falcon` and execute it. 411 + /// 412 + /// This must only be called on chipsets that do not need the FWSEC bootloader (i.e., where 413 + /// [`Chipset::needs_fwsec_bootloader()`](crate::gpu::Chipset::needs_fwsec_bootloader) returns 414 + /// `false`). On chipsets that do, use [`bootloader::FwsecFirmwareWithBl`] instead. 390 415 pub(crate) fn run( 391 416 &self, 392 417 dev: &Device<device::Bound>, ··· 402 419 .reset(bar) 403 420 .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; 404 421 falcon 405 - .load(bar, self) 422 + .load(dev, bar, self) 406 423 .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; 407 424 let (mbox0, _) = falcon 408 425 .boot(bar, Some(0), None)

+350

drivers/gpu/nova-core/firmware/fwsec/bootloader.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Bootloader support for the FWSEC firmware. 4 + //! 5 + //! On Turing, the FWSEC firmware is not loaded directly, but is instead loaded through a small 6 + //! bootloader program that performs the required DMA operations. This bootloader itself needs to 7 + //! be loaded using PIO. 8 + 9 + use kernel::{ 10 + alloc::KVec, 11 + device::{ 12 + self, 13 + Device, // 14 + }, 15 + dma::Coherent, 16 + io::{ 17 + register::WithBase, // 18 + Io, 19 + }, 20 + prelude::*, 21 + ptr::{ 22 + Alignable, 23 + Alignment, // 24 + }, 25 + sizes, 26 + transmute::{ 27 + AsBytes, 28 + FromBytes, // 29 + }, 30 + }; 31 + 32 + use crate::{ 33 + driver::Bar0, 34 + falcon::{ 35 + self, 36 + gsp::Gsp, 37 + Falcon, 38 + FalconBromParams, 39 + FalconDmaLoadable, 40 + FalconFbifMemType, 41 + FalconFbifTarget, 42 + FalconFirmware, 43 + FalconPioDmemLoadTarget, 44 + FalconPioImemLoadTarget, 45 + FalconPioLoadable, // 46 + }, 47 + firmware::{ 48 + fwsec::FwsecFirmware, 49 + request_firmware, 50 + BinHdr, 51 + FIRMWARE_VERSION, // 52 + }, 53 + gpu::Chipset, 54 + num::FromSafeCast, 55 + regs, 56 + }; 57 + 58 + /// Descriptor used by RM to figure out the requirements of the boot loader. 59 + /// 60 + /// Most of its fields appear to be legacy and carry incorrect values, so they are left unused. 61 + #[repr(C)] 62 + #[derive(Debug, Clone)] 63 + struct BootloaderDesc { 64 + /// Starting tag of bootloader. 65 + start_tag: u32, 66 + /// DMEM load offset - unused here as we always load at offset `0`. 67 + _dmem_load_off: u32, 68 + /// Offset of code section in the image. Unused as there is only one section in the bootloader 69 + /// binary. 70 + _code_off: u32, 71 + /// Size of code section in the image. 72 + code_size: u32, 73 + /// Offset of data section in the image. Unused as we build the data section ourselves. 74 + _data_off: u32, 75 + /// Size of data section in the image. Unused as we build the data section ourselves. 76 + _data_size: u32, 77 + } 78 + // SAFETY: any byte sequence is valid for this struct. 79 + unsafe impl FromBytes for BootloaderDesc {} 80 + 81 + /// Structure used by the boot-loader to load the rest of the code. 82 + /// 83 + /// This has to be filled by the GPU driver and copied into DMEM at offset 84 + /// [`BootloaderDesc.dmem_load_off`]. 85 + #[repr(C, packed)] 86 + #[derive(Debug, Clone)] 87 + struct BootloaderDmemDescV2 { 88 + /// Reserved, should always be first element. 89 + reserved: [u32; 4], 90 + /// 16B signature for secure code, 0s if no secure code. 91 + signature: [u32; 4], 92 + /// DMA context used by the bootloader while loading code/data. 93 + ctx_dma: u32, 94 + /// 256B-aligned physical FB address where code is located. 95 + code_dma_base: u64, 96 + /// Offset from `code_dma_base` where the non-secure code is located. 97 + /// 98 + /// Also used as destination IMEM offset of non-secure code as the DMA firmware object is 99 + /// expected to be a mirror image of its loaded state. 100 + /// 101 + /// Must be multiple of 256. 102 + non_sec_code_off: u32, 103 + /// Size of the non-secure code part. 104 + non_sec_code_size: u32, 105 + /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256). 106 + /// 107 + /// Also used as destination IMEM offset of secure code as the DMA firmware object is expected 108 + /// to be a mirror image of its loaded state. 109 + /// 110 + /// Must be multiple of 256. 111 + sec_code_off: u32, 112 + /// Size of the secure code part. 113 + sec_code_size: u32, 114 + /// Code entry point invoked by the bootloader after code is loaded. 115 + code_entry_point: u32, 116 + /// 256B-aligned physical FB address where data is located. 117 + data_dma_base: u64, 118 + /// Size of data block (should be multiple of 256B). 119 + data_size: u32, 120 + /// Number of arguments to be passed to the target firmware being loaded. 121 + argc: u32, 122 + /// Arguments to be passed to the target firmware being loaded. 123 + argv: u32, 124 + } 125 + // SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. 126 + unsafe impl AsBytes for BootloaderDmemDescV2 {} 127 + 128 + /// Wrapper for [`FwsecFirmware`] that includes the bootloader performing the actual load 129 + /// operation. 130 + pub(crate) struct FwsecFirmwareWithBl { 131 + /// DMA object the bootloader will copy the firmware from. 132 + _firmware_dma: Coherent<[u8]>, 133 + /// Code of the bootloader to be loaded into non-secure IMEM. 134 + ucode: KVec<u8>, 135 + /// Descriptor to be loaded into DMEM for the bootloader to read. 136 + dmem_desc: BootloaderDmemDescV2, 137 + /// Range-validated start offset of the firmware code in IMEM. 138 + imem_dst_start: u16, 139 + /// BROM parameters of the loaded firmware. 140 + brom_params: FalconBromParams, 141 + /// Range-validated `desc.start_tag`. 142 + start_tag: u16, 143 + } 144 + 145 + impl FwsecFirmwareWithBl { 146 + /// Loads the bootloader firmware for `dev` and `chipset`, and wrap `firmware` so it can be 147 + /// loaded using it. 148 + pub(crate) fn new( 149 + firmware: FwsecFirmware, 150 + dev: &Device<device::Bound>, 151 + chipset: Chipset, 152 + ) -> Result<Self> { 153 + let fw = request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION)?; 154 + let hdr = fw 155 + .data() 156 + .get(0..size_of::<BinHdr>()) 157 + .and_then(BinHdr::from_bytes_copy) 158 + .ok_or(EINVAL)?; 159 + 160 + let desc = { 161 + let desc_offset = usize::from_safe_cast(hdr.header_offset); 162 + 163 + fw.data() 164 + .get(desc_offset..) 165 + .and_then(BootloaderDesc::from_bytes_copy_prefix) 166 + .ok_or(EINVAL)? 167 + .0 168 + }; 169 + 170 + let ucode = { 171 + let ucode_start = usize::from_safe_cast(hdr.data_offset); 172 + let code_size = usize::from_safe_cast(desc.code_size); 173 + // Align to falcon block size (256 bytes). 174 + let aligned_code_size = code_size 175 + .align_up(Alignment::new::<{ falcon::MEM_BLOCK_ALIGNMENT }>()) 176 + .ok_or(EINVAL)?; 177 + 178 + let mut ucode = KVec::with_capacity(aligned_code_size, GFP_KERNEL)?; 179 + ucode.extend_from_slice( 180 + fw.data() 181 + .get(ucode_start..ucode_start + code_size) 182 + .ok_or(EINVAL)?, 183 + GFP_KERNEL, 184 + )?; 185 + ucode.resize(aligned_code_size, 0, GFP_KERNEL)?; 186 + 187 + ucode 188 + }; 189 + 190 + // `BootloaderDmemDescV2` expects the source to be a mirror image of the destination and 191 + // uses the same offset parameter for both. 192 + // 193 + // Thus, the start of the source object needs to be padded with the difference between the 194 + // destination and source offsets. 195 + // 196 + // In practice, this is expected to always be zero but is required for code correctness. 197 + let (align_padding, firmware_dma) = { 198 + let align_padding = { 199 + let imem_sec = firmware.imem_sec_load_params(); 200 + 201 + imem_sec 202 + .dst_start 203 + .checked_sub(imem_sec.src_start) 204 + .map(usize::from_safe_cast) 205 + .ok_or(EOVERFLOW)? 206 + }; 207 + 208 + let mut firmware_obj = KVVec::new(); 209 + firmware_obj.extend_with(align_padding, 0u8, GFP_KERNEL)?; 210 + firmware_obj.extend_from_slice(firmware.ucode.0.as_slice(), GFP_KERNEL)?; 211 + 212 + ( 213 + align_padding, 214 + Coherent::from_slice(dev, firmware_obj.as_slice(), GFP_KERNEL)?, 215 + ) 216 + }; 217 + 218 + let dmem_desc = { 219 + // Bootloader payload is in non-coherent system memory. 220 + const FALCON_DMAIDX_PHYS_SYS_NCOH: u32 = 4; 221 + 222 + let imem_sec = firmware.imem_sec_load_params(); 223 + let imem_ns = firmware.imem_ns_load_params().ok_or(EINVAL)?; 224 + let dmem = firmware.dmem_load_params(); 225 + 226 + // The bootloader does not have a data destination offset field and copies the data at 227 + // the start of DMEM, so it can only be used if the destination offset of the firmware 228 + // is 0. 229 + if dmem.dst_start != 0 { 230 + return Err(EINVAL); 231 + } 232 + 233 + BootloaderDmemDescV2 { 234 + reserved: [0; 4], 235 + signature: [0; 4], 236 + ctx_dma: FALCON_DMAIDX_PHYS_SYS_NCOH, 237 + code_dma_base: firmware_dma.dma_handle(), 238 + // `dst_start` is also valid as the source offset since the firmware DMA object is 239 + // a mirror image of the target IMEM layout. 240 + non_sec_code_off: imem_ns.dst_start, 241 + non_sec_code_size: imem_ns.len, 242 + // `dst_start` is also valid as the source offset since the firmware DMA object is 243 + // a mirror image of the target IMEM layout. 244 + sec_code_off: imem_sec.dst_start, 245 + sec_code_size: imem_sec.len, 246 + code_entry_point: 0, 247 + // Start of data section is the added padding + the DMEM `src_start` field. 248 + data_dma_base: firmware_dma 249 + .dma_handle() 250 + .checked_add(u64::from_safe_cast(align_padding)) 251 + .and_then(|offset| offset.checked_add(dmem.src_start.into())) 252 + .ok_or(EOVERFLOW)?, 253 + data_size: dmem.len, 254 + argc: 0, 255 + argv: 0, 256 + } 257 + }; 258 + 259 + // The bootloader's code must be loaded in the area right below the first 64K of IMEM. 260 + const BOOTLOADER_LOAD_CEILING: usize = sizes::SZ_64K; 261 + let imem_dst_start = BOOTLOADER_LOAD_CEILING 262 + .checked_sub(ucode.len()) 263 + .ok_or(EOVERFLOW)?; 264 + 265 + Ok(Self { 266 + _firmware_dma: firmware_dma, 267 + ucode, 268 + dmem_desc, 269 + brom_params: firmware.brom_params(), 270 + imem_dst_start: u16::try_from(imem_dst_start)?, 271 + start_tag: u16::try_from(desc.start_tag)?, 272 + }) 273 + } 274 + 275 + /// Loads the bootloader into `falcon` and execute it. 276 + /// 277 + /// The bootloader will load the FWSEC firmware and then execute it. This function returns 278 + /// after FWSEC has reached completion. 279 + pub(crate) fn run( 280 + &self, 281 + dev: &Device<device::Bound>, 282 + falcon: &Falcon<Gsp>, 283 + bar: &Bar0, 284 + ) -> Result<()> { 285 + // Reset falcon, load the firmware, and run it. 286 + falcon 287 + .reset(bar) 288 + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; 289 + falcon 290 + .pio_load(bar, self) 291 + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; 292 + 293 + // Configure DMA index for the bootloader to fetch the FWSEC firmware from system memory. 294 + bar.update( 295 + regs::NV_PFALCON_FBIF_TRANSCFG::of::<Gsp>() 296 + .try_at(usize::from_safe_cast(self.dmem_desc.ctx_dma)) 297 + .ok_or(EINVAL)?, 298 + |v| { 299 + v.with_target(FalconFbifTarget::CoherentSysmem) 300 + .with_mem_type(FalconFbifMemType::Physical) 301 + }, 302 + ); 303 + 304 + let (mbox0, _) = falcon 305 + .boot(bar, Some(0), None) 306 + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; 307 + if mbox0 != 0 { 308 + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); 309 + Err(EIO) 310 + } else { 311 + Ok(()) 312 + } 313 + } 314 + } 315 + 316 + impl FalconFirmware for FwsecFirmwareWithBl { 317 + type Target = Gsp; 318 + 319 + fn brom_params(&self) -> FalconBromParams { 320 + self.brom_params.clone() 321 + } 322 + 323 + fn boot_addr(&self) -> u32 { 324 + // On V2 platforms, the boot address is extracted from the generic bootloader, because the 325 + // gbl is what actually copies FWSEC into memory, so that is what needs to be booted. 326 + u32::from(self.start_tag) << 8 327 + } 328 + } 329 + 330 + impl FalconPioLoadable for FwsecFirmwareWithBl { 331 + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { 332 + None 333 + } 334 + 335 + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { 336 + Some(FalconPioImemLoadTarget { 337 + data: self.ucode.as_ref(), 338 + dst_start: self.imem_dst_start, 339 + secure: false, 340 + start_tag: self.start_tag, 341 + }) 342 + } 343 + 344 + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { 345 + FalconPioDmemLoadTarget { 346 + data: self.dmem_desc.as_bytes(), 347 + dst_start: 0, 348 + } 349 + } 350 + }

+19 -99

drivers/gpu/nova-core/firmware/gsp.rs

··· 3 3 use kernel::{ 4 4 device, 5 5 dma::{ 6 + Coherent, 7 + CoherentBox, 6 8 DataDirection, 7 9 DmaAddress, // 8 10 }, 9 - kvec, 10 11 prelude::*, 11 12 scatterlist::{ 12 13 Owned, ··· 16 15 }; 17 16 18 17 use crate::{ 19 - dma::DmaObject, 20 - firmware::riscv::RiscvFirmware, 18 + firmware::{ 19 + elf, 20 + riscv::RiscvFirmware, // 21 + }, 21 22 gpu::{ 22 23 Architecture, 23 24 Chipset, // ··· 27 24 gsp::GSP_PAGE_SIZE, 28 25 num::FromSafeCast, 29 26 }; 30 - 31 - /// Ad-hoc and temporary module to extract sections from ELF images. 32 - /// 33 - /// Some firmware images are currently packaged as ELF files, where sections names are used as keys 34 - /// to specific and related bits of data. Future firmware versions are scheduled to move away from 35 - /// that scheme before nova-core becomes stable, which means this module will eventually be 36 - /// removed. 37 - mod elf { 38 - use kernel::{ 39 - bindings, 40 - prelude::*, 41 - transmute::FromBytes, // 42 - }; 43 - 44 - /// Newtype to provide a [`FromBytes`] implementation. 45 - #[repr(transparent)] 46 - struct Elf64Hdr(bindings::elf64_hdr); 47 - // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. 48 - unsafe impl FromBytes for Elf64Hdr {} 49 - 50 - #[repr(transparent)] 51 - struct Elf64SHdr(bindings::elf64_shdr); 52 - // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. 53 - unsafe impl FromBytes for Elf64SHdr {} 54 - 55 - /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. 56 - pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { 57 - let hdr = &elf 58 - .get(0..size_of::<bindings::elf64_hdr>()) 59 - .and_then(Elf64Hdr::from_bytes)? 60 - .0; 61 - 62 - // Get all the section headers. 63 - let mut shdr = { 64 - let shdr_num = usize::from(hdr.e_shnum); 65 - let shdr_start = usize::try_from(hdr.e_shoff).ok()?; 66 - let shdr_end = shdr_num 67 - .checked_mul(size_of::<Elf64SHdr>()) 68 - .and_then(|v| v.checked_add(shdr_start))?; 69 - 70 - elf.get(shdr_start..shdr_end) 71 - .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? 72 - }; 73 - 74 - // Get the strings table. 75 - let strhdr = shdr 76 - .clone() 77 - .nth(usize::from(hdr.e_shstrndx)) 78 - .and_then(Elf64SHdr::from_bytes)?; 79 - 80 - // Find the section which name matches `name` and return it. 81 - shdr.find(|&sh| { 82 - let Some(hdr) = Elf64SHdr::from_bytes(sh) else { 83 - return false; 84 - }; 85 - 86 - let Some(name_idx) = strhdr 87 - .0 88 - .sh_offset 89 - .checked_add(u64::from(hdr.0.sh_name)) 90 - .and_then(|idx| usize::try_from(idx).ok()) 91 - else { 92 - return false; 93 - }; 94 - 95 - // Get the start of the name. 96 - elf.get(name_idx..) 97 - .and_then(|nstr| CStr::from_bytes_until_nul(nstr).ok()) 98 - // Convert into str. 99 - .and_then(|c_str| c_str.to_str().ok()) 100 - // Check that the name matches. 101 - .map(|str| str == name) 102 - .unwrap_or(false) 103 - }) 104 - // Return the slice containing the section. 105 - .and_then(|sh| { 106 - let hdr = Elf64SHdr::from_bytes(sh)?; 107 - let start = usize::try_from(hdr.0.sh_offset).ok()?; 108 - let end = usize::try_from(hdr.0.sh_size) 109 - .ok() 110 - .and_then(|sh_size| start.checked_add(sh_size))?; 111 - 112 - elf.get(start..end) 113 - }) 114 - } 115 - } 116 27 117 28 /// GSP firmware with 3-level radix page tables for the GSP bootloader. 118 29 /// ··· 53 136 #[pin] 54 137 level1: SGTable<Owned<VVec<u8>>>, 55 138 /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. 56 - level0: DmaObject, 139 + level0: Coherent<[u64]>, 57 140 /// Size in bytes of the firmware contained in [`Self::fw`]. 58 141 pub(crate) size: usize, 59 142 /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. 60 - pub(crate) signatures: DmaObject, 143 + pub(crate) signatures: Coherent<[u8]>, 61 144 /// GSP bootloader, verifies the GSP firmware before loading and running it. 62 145 pub(crate) bootloader: RiscvFirmware, 63 146 } ··· 114 197 // Allocate the level 0 page table as a device-visible DMA object, and map the 115 198 // level 1 page table onto it. 116 199 117 - // Level 0 page table data. 118 - let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; 119 - 120 200 // Fill level 1 page entry. 121 201 let level1_entry = level1.iter().next().ok_or(EINVAL)?; 122 202 let level1_entry_addr = level1_entry.dma_address(); 123 - let dst = &mut level0_data[..size_of_val(&level1_entry_addr)]; 124 - dst.copy_from_slice(&level1_entry_addr.to_le_bytes()); 125 203 126 - // Turn the level0 page table into a [`DmaObject`]. 127 - DmaObject::from_data(dev, &level0_data)? 204 + // Create level 0 page table data and fill its first entry with the level 1 205 + // table. 206 + let mut level0 = CoherentBox::<[u64]>::zeroed_slice( 207 + dev, 208 + GSP_PAGE_SIZE / size_of::<u64>(), 209 + GFP_KERNEL 210 + )?; 211 + level0[0] = level1_entry_addr.to_le(); 212 + 213 + level0.into() 128 214 }, 129 215 size, 130 216 signatures: { ··· 146 226 147 227 elf::elf64_section(firmware.data(), sigs_section) 148 228 .ok_or(EINVAL) 149 - .and_then(|data| DmaObject::from_data(dev, data))? 229 + .and_then(|data| Coherent::from_slice(dev, data, GFP_KERNEL))? 150 230 }, 151 231 bootloader: { 152 232 let bl = super::request_firmware(dev, chipset, "bootloader", ver)?;

+6 -4

drivers/gpu/nova-core/firmware/riscv.rs

··· 5 5 6 6 use kernel::{ 7 7 device, 8 + dma::Coherent, 8 9 firmware::Firmware, 9 10 prelude::*, 10 11 transmute::FromBytes, // 11 12 }; 12 13 13 14 use crate::{ 14 - dma::DmaObject, 15 15 firmware::BinFirmware, 16 16 num::FromSafeCast, // 17 17 }; ··· 45 45 /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. 46 46 fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { 47 47 let offset = usize::from_safe_cast(bin_fw.hdr.header_offset); 48 + let end = offset.checked_add(size_of::<Self>()).ok_or(EINVAL)?; 48 49 49 50 bin_fw 50 51 .fw 51 - .get(offset..offset + size_of::<Self>()) 52 + .get(offset..end) 52 53 .and_then(Self::from_bytes_copy) 53 54 .ok_or(EINVAL) 54 55 } ··· 66 65 /// Application version. 67 66 pub(crate) app_version: u32, 68 67 /// Device-mapped firmware image. 69 - pub(crate) ucode: DmaObject, 68 + pub(crate) ucode: Coherent<[u8]>, 70 69 } 71 70 72 71 impl RiscvFirmware { ··· 79 78 let ucode = { 80 79 let start = usize::from_safe_cast(bin_fw.hdr.data_offset); 81 80 let len = usize::from_safe_cast(bin_fw.hdr.data_size); 81 + let end = start.checked_add(len).ok_or(EINVAL)?; 82 82 83 - DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? 83 + Coherent::from_slice(dev, fw.data().get(start..end).ok_or(EINVAL)?, GFP_KERNEL)? 84 84 }; 85 85 86 86 Ok(Self {

+8 -3

drivers/gpu/nova-core/gfw.rs

··· 19 19 //! Note that the devinit sequence also needs to run during suspend/resume. 20 20 21 21 use kernel::{ 22 - io::poll::read_poll_timeout, 22 + io::{ 23 + poll::read_poll_timeout, 24 + Io, // 25 + }, 23 26 prelude::*, 24 27 time::Delta, // 25 28 }; ··· 61 58 Ok( 62 59 // Check that FWSEC has lowered its protection level before reading the GFW_BOOT 63 60 // status. 64 - regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) 61 + bar.read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK) 65 62 .read_protection_level0() 66 - && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(), 63 + && bar 64 + .read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT) 65 + .completed(), 67 66 ) 68 67 }, 69 68 |&gfw_booted| gfw_booted,

+26 -40

drivers/gpu/nova-core/gpu.rs

··· 4 4 device, 5 5 devres::Devres, 6 6 fmt, 7 + io::Io, 8 + num::Bounded, 7 9 pci, 8 10 prelude::*, 9 11 sync::Arc, // 10 12 }; 11 13 12 14 use crate::{ 15 + bounded_enum, 13 16 driver::Bar0, 14 17 falcon::{ 15 18 gsp::Gsp as GspFalcon, ··· 95 92 }); 96 93 97 94 impl Chipset { 98 - pub(crate) fn arch(&self) -> Architecture { 95 + pub(crate) const fn arch(self) -> Architecture { 99 96 match self { 100 97 Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { 101 98 Architecture::Turing ··· 107 104 Architecture::Ada 108 105 } 109 106 } 107 + } 108 + 109 + /// Returns `true` if this chipset requires the PIO-loaded bootloader in order to boot FWSEC. 110 + /// 111 + /// This includes all chipsets < GA102. 112 + pub(crate) const fn needs_fwsec_bootloader(self) -> bool { 113 + matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100) 110 114 } 111 115 } 112 116 ··· 131 121 } 132 122 } 133 123 134 - /// Enum representation of the GPU generation. 135 - /// 136 - /// TODO: remove the `Default` trait implementation, and the `#[default]` 137 - /// attribute, once the register!() macro (which creates Architecture items) no 138 - /// longer requires it for read-only fields. 139 - #[derive(fmt::Debug, Default, Copy, Clone)] 140 - #[repr(u8)] 141 - pub(crate) enum Architecture { 142 - #[default] 143 - Turing = 0x16, 144 - Ampere = 0x17, 145 - Ada = 0x19, 146 - } 147 - 148 - impl TryFrom<u8> for Architecture { 149 - type Error = Error; 150 - 151 - fn try_from(value: u8) -> Result<Self> { 152 - match value { 153 - 0x16 => Ok(Self::Turing), 154 - 0x17 => Ok(Self::Ampere), 155 - 0x19 => Ok(Self::Ada), 156 - _ => Err(ENODEV), 157 - } 158 - } 159 - } 160 - 161 - impl From<Architecture> for u8 { 162 - fn from(value: Architecture) -> Self { 163 - // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless. 164 - value as u8 124 + bounded_enum! { 125 + /// Enum representation of the GPU generation. 126 + #[derive(fmt::Debug, Copy, Clone)] 127 + pub(crate) enum Architecture with TryFrom<Bounded<u32, 6>> { 128 + Turing = 0x16, 129 + Ampere = 0x17, 130 + Ada = 0x19, 165 131 } 166 132 } 167 133 168 134 pub(crate) struct Revision { 169 - major: u8, 170 - minor: u8, 135 + major: Bounded<u8, 4>, 136 + minor: Bounded<u8, 4>, 171 137 } 172 138 173 139 impl From<regs::NV_PMC_BOOT_42> for Revision { 174 140 fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { 175 141 Self { 176 - major: boot0.major_revision(), 177 - minor: boot0.minor_revision(), 142 + major: boot0.major_revision().cast(), 143 + minor: boot0.minor_revision().cast(), 178 144 } 179 145 } 180 146 } ··· 187 201 // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. 188 202 // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. 189 203 190 - let boot0 = regs::NV_PMC_BOOT_0::read(bar); 204 + let boot0 = bar.read(regs::NV_PMC_BOOT_0); 191 205 192 206 if boot0.is_older_than_fermi() { 193 207 return Err(ENODEV); 194 208 } 195 209 196 - let boot42 = regs::NV_PMC_BOOT_42::read(bar); 210 + let boot42 = bar.read(regs::NV_PMC_BOOT_42); 197 211 Spec::try_from(boot42).inspect_err(|_| { 198 212 dev_err!(dev, "Unsupported chipset: {}\n", boot42); 199 213 }) ··· 248 262 ) -> impl PinInit<Self, Error> + 'a { 249 263 try_pin_init!(Self { 250 264 spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { 251 - dev_info!(pdev.as_ref(),"NVIDIA ({})\n", spec); 265 + dev_info!(pdev,"NVIDIA ({})\n", spec); 252 266 })?, 253 267 254 268 // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. 255 269 _: { 256 270 gfw::wait_gfw_boot_completion(bar) 257 - .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete\n"))?; 271 + .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?; 258 272 }, 259 273 260 274 sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,

+72 -46

drivers/gpu/nova-core/gsp.rs

··· 3 3 mod boot; 4 4 5 5 use kernel::{ 6 + debugfs, 6 7 device, 7 8 dma::{ 8 - CoherentAllocation, 9 + Coherent, 10 + CoherentBox, 9 11 DmaAddress, // 10 12 }, 11 - dma_write, 12 13 pci, 13 14 prelude::*, 14 - transmute::AsBytes, // 15 + transmute::{ 16 + AsBytes, 17 + FromBytes, // 18 + }, // 15 19 }; 16 20 17 21 pub(crate) mod cmdq; ··· 42 38 43 39 /// Number of GSP pages to use in a RM log buffer. 44 40 const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10; 41 + const LOG_BUFFER_SIZE: usize = RM_LOG_BUFFER_NUM_PAGES * GSP_PAGE_SIZE; 45 42 46 43 /// Array of page table entries, as understood by the GSP bootloader. 47 44 #[repr(C)] 48 45 struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]); 46 + 47 + /// SAFETY: arrays of `u64` implement `FromBytes` and we are but a wrapper around one. 48 + unsafe impl<const NUM_ENTRIES: usize> FromBytes for PteArray<NUM_ENTRIES> {} 49 49 50 50 /// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one. 51 51 unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {} ··· 78 70 /// then pp points to index into the buffer where the next logging entry will 79 71 /// be written. Therefore, the logging data is valid if: 80 72 /// 1 <= pp < sizeof(buffer)/sizeof(u64) 81 - struct LogBuffer(CoherentAllocation<u8>); 73 + struct LogBuffer(Coherent<[u8; LOG_BUFFER_SIZE]>); 82 74 83 75 impl LogBuffer { 84 76 /// Creates a new `LogBuffer` mapped on `dev`. 85 77 fn new(dev: &device::Device<device::Bound>) -> Result<Self> { 86 - const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES; 87 - 88 - let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent( 89 - dev, 90 - NUM_PAGES * GSP_PAGE_SIZE, 91 - GFP_KERNEL | __GFP_ZERO, 92 - )?); 78 + let obj = Self(Coherent::zeroed(dev, GFP_KERNEL)?); 93 79 94 80 let start_addr = obj.0.dma_handle(); 95 81 96 82 // SAFETY: `obj` has just been created and we are its sole user. 97 83 let pte_region = unsafe { 98 - obj.0 99 - .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())? 84 + &mut obj.0.as_mut()[size_of::<u64>()..][..RM_LOG_BUFFER_NUM_PAGES * size_of::<u64>()] 100 85 }; 101 86 102 87 // Write values one by one to avoid an on-stack instance of `PteArray`. ··· 103 102 } 104 103 } 105 104 106 - /// GSP runtime data. 107 - #[pin_data] 108 - pub(crate) struct Gsp { 109 - /// Libos arguments. 110 - pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>, 105 + struct LogBuffers { 111 106 /// Init log buffer. 112 107 loginit: LogBuffer, 113 108 /// Interrupts log buffer. 114 109 logintr: LogBuffer, 115 110 /// RM log buffer. 116 111 logrm: LogBuffer, 112 + } 113 + 114 + /// GSP runtime data. 115 + #[pin_data] 116 + pub(crate) struct Gsp { 117 + /// Libos arguments. 118 + pub(crate) libos: Coherent<[LibosMemoryRegionInitArgument]>, 119 + /// Log buffers, optionally exposed via debugfs. 120 + #[pin] 121 + logs: debugfs::Scope<LogBuffers>, 117 122 /// Command queue. 123 + #[pin] 118 124 pub(crate) cmdq: Cmdq, 119 125 /// RM arguments. 120 - rmargs: CoherentAllocation<GspArgumentsPadded>, 126 + rmargs: Coherent<GspArgumentsPadded>, 121 127 } 122 128 123 129 impl Gsp { ··· 133 125 pin_init::pin_init_scope(move || { 134 126 let dev = pdev.as_ref(); 135 127 128 + let loginit = LogBuffer::new(dev)?; 129 + let logintr = LogBuffer::new(dev)?; 130 + let logrm = LogBuffer::new(dev)?; 131 + 132 + // Initialise the logging structures. The OpenRM equivalents are in: 133 + // _kgspInitLibosLoggingStructures (allocates memory for buffers) 134 + // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) 136 135 Ok(try_pin_init!(Self { 137 - libos: CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent( 138 - dev, 139 - GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(), 140 - GFP_KERNEL | __GFP_ZERO, 141 - )?, 142 - loginit: LogBuffer::new(dev)?, 143 - logintr: LogBuffer::new(dev)?, 144 - logrm: LogBuffer::new(dev)?, 145 - cmdq: Cmdq::new(dev)?, 146 - rmargs: CoherentAllocation::<GspArgumentsPadded>::alloc_coherent( 147 - dev, 148 - 1, 149 - GFP_KERNEL | __GFP_ZERO, 150 - )?, 151 - _: { 152 - // Initialise the logging structures. The OpenRM equivalents are in: 153 - // _kgspInitLibosLoggingStructures (allocates memory for buffers) 154 - // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) 155 - dma_write!( 156 - libos, [0]?, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) 157 - ); 158 - dma_write!( 159 - libos, [1]?, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) 160 - ); 161 - dma_write!(libos, [2]?, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)); 162 - dma_write!(rmargs, [0]?.inner, fw::GspArgumentsCached::new(cmdq)); 163 - dma_write!(libos, [3]?, LibosMemoryRegionInitArgument::new("RMARGS", rmargs)); 136 + cmdq <- Cmdq::new(dev), 137 + rmargs: Coherent::init(dev, GFP_KERNEL, GspArgumentsPadded::new(&cmdq))?, 138 + libos: { 139 + let mut libos = CoherentBox::zeroed_slice( 140 + dev, 141 + GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(), 142 + GFP_KERNEL, 143 + )?; 144 + 145 + libos.init_at(0, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0))?; 146 + libos.init_at(1, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0))?; 147 + libos.init_at(2, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; 148 + libos.init_at(3, LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?; 149 + 150 + libos.into() 151 + }, 152 + logs <- { 153 + let log_buffers = LogBuffers { 154 + loginit, 155 + logintr, 156 + logrm, 157 + }; 158 + 159 + #[allow(static_mut_refs)] 160 + // SAFETY: `DEBUGFS_ROOT` is created before driver registration and cleared 161 + // after driver unregistration, so no probe() can race with its modification. 162 + // 163 + // PANIC: `DEBUGFS_ROOT` cannot be `None` here. It is set before driver 164 + // registration and cleared after driver unregistration, so it is always 165 + // `Some` for the entire lifetime that probe() can be called. 166 + let log_parent: &debugfs::Dir = unsafe { crate::DEBUGFS_ROOT.as_ref() } 167 + .expect("DEBUGFS_ROOT not initialized"); 168 + 169 + log_parent.scope(log_buffers, dev.name(), |logs, dir| { 170 + dir.read_binary_file(c"loginit", &logs.loginit.0); 171 + dir.read_binary_file(c"logintr", &logs.logintr.0); 172 + dir.read_binary_file(c"logrm", &logs.logrm.0); 173 + }) 164 174 }, 165 175 })) 166 176 })

+37 -45

drivers/gpu/nova-core/gsp/boot.rs

··· 2 2 3 3 use kernel::{ 4 4 device, 5 - dma::CoherentAllocation, 6 - dma_write, 5 + dma::Coherent, 7 6 io::poll::read_poll_timeout, 7 + io::Io, 8 8 pci, 9 9 prelude::*, 10 10 time::Delta, // ··· 24 24 BooterKind, // 25 25 }, 26 26 fwsec::{ 27 + bootloader::FwsecFirmwareWithBl, 27 28 FwsecCommand, 28 29 FwsecFirmware, // 29 30 }, ··· 49 48 /// created the WPR2 region. 50 49 fn run_fwsec_frts( 51 50 dev: &device::Device<device::Bound>, 51 + chipset: Chipset, 52 52 falcon: &Falcon<Gsp>, 53 53 bar: &Bar0, 54 54 bios: &Vbios, ··· 57 55 ) -> Result<()> { 58 56 // Check that the WPR2 region does not already exists - if it does, we cannot run 59 57 // FWSEC-FRTS until the GPU is reset. 60 - if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { 58 + if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() != 0 { 61 59 dev_err!( 62 60 dev, 63 61 "WPR2 region already exists - GPU needs to be reset to proceed\n" ··· 65 63 return Err(EBUSY); 66 64 } 67 65 66 + // FWSEC-FRTS will create the WPR2 region. 68 67 let fwsec_frts = FwsecFirmware::new( 69 68 dev, 70 69 falcon, ··· 73 70 bios, 74 71 FwsecCommand::Frts { 75 72 frts_addr: fb_layout.frts.start, 76 - frts_size: fb_layout.frts.end - fb_layout.frts.start, 73 + frts_size: fb_layout.frts.len(), 77 74 }, 78 75 )?; 79 76 80 - // Run FWSEC-FRTS to create the WPR2 region. 81 - fwsec_frts.run(dev, falcon, bar)?; 77 + if chipset.needs_fwsec_bootloader() { 78 + let fwsec_frts_bl = FwsecFirmwareWithBl::new(fwsec_frts, dev, chipset)?; 79 + // Load and run the bootloader, which will load FWSEC-FRTS and run it. 80 + fwsec_frts_bl.run(dev, falcon, bar)?; 81 + } else { 82 + // Load and run FWSEC-FRTS directly. 83 + fwsec_frts.run(dev, falcon, bar)?; 84 + } 82 85 83 86 // SCRATCH_E contains the error code for FWSEC-FRTS. 84 - let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); 87 + let frts_status = bar 88 + .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR) 89 + .frts_err_code(); 85 90 if frts_status != 0 { 86 91 dev_err!( 87 92 dev, ··· 102 91 103 92 // Check that the WPR2 region has been created as we requested. 104 93 let (wpr2_lo, wpr2_hi) = ( 105 - regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), 106 - regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), 94 + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(), 95 + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(), 107 96 ); 108 97 109 98 match (wpr2_lo, wpr2_hi) { ··· 139 128 /// 140 129 /// Upon return, the GSP is up and running, and its runtime object given as return value. 141 130 pub(crate) fn boot( 142 - mut self: Pin<&mut Self>, 131 + self: Pin<&mut Self>, 143 132 pdev: &pci::Device<device::Bound>, 144 133 bar: &Bar0, 145 134 chipset: Chipset, ··· 155 144 let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; 156 145 dev_dbg!(dev, "{:#x?}\n", fb_layout); 157 146 158 - Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; 147 + Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?; 159 148 160 149 let booter_loader = BooterFirmware::new( 161 150 dev, ··· 166 155 bar, 167 156 )?; 168 157 169 - let wpr_meta = 170 - CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; 171 - dma_write!(wpr_meta, [0]?, GspFwWprMeta::new(&gsp_fw, &fb_layout)); 158 + let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?; 172 159 173 160 self.cmdq 174 - .send_command(bar, commands::SetSystemInfo::new(pdev))?; 175 - self.cmdq.send_command(bar, commands::SetRegistry::new())?; 161 + .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev))?; 162 + self.cmdq 163 + .send_command_no_wait(bar, commands::SetRegistry::new())?; 176 164 177 165 gsp_falcon.reset(bar)?; 178 166 let libos_handle = self.libos.dma_handle(); ··· 180 170 Some(libos_handle as u32), 181 171 Some((libos_handle >> 32) as u32), 182 172 )?; 183 - dev_dbg!( 184 - pdev.as_ref(), 185 - "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", 186 - mbox0, 187 - mbox1 188 - ); 173 + dev_dbg!(pdev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1); 189 174 190 175 dev_dbg!( 191 - pdev.as_ref(), 176 + pdev, 192 177 "Using SEC2 to load and run the booter_load firmware...\n" 193 178 ); 194 179 195 180 sec2_falcon.reset(bar)?; 196 - sec2_falcon.load(bar, &booter_loader)?; 181 + sec2_falcon.load(dev, bar, &booter_loader)?; 197 182 let wpr_handle = wpr_meta.dma_handle(); 198 183 let (mbox0, mbox1) = sec2_falcon.boot( 199 184 bar, 200 185 Some(wpr_handle as u32), 201 186 Some((wpr_handle >> 32) as u32), 202 187 )?; 203 - dev_dbg!( 204 - pdev.as_ref(), 205 - "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", 206 - mbox0, 207 - mbox1 208 - ); 188 + dev_dbg!(pdev, "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", mbox0, mbox1); 209 189 210 190 if mbox0 != 0 { 211 - dev_err!( 212 - pdev.as_ref(), 213 - "Booter-load failed with error {:#x}\n", 214 - mbox0 215 - ); 191 + dev_err!(pdev, "Booter-load failed with error {:#x}\n", mbox0); 216 192 return Err(ENODEV); 217 193 } 218 194 ··· 212 216 Delta::from_secs(5), 213 217 )?; 214 218 215 - dev_dbg!( 216 - pdev.as_ref(), 217 - "RISC-V active? {}\n", 218 - gsp_falcon.is_riscv_active(bar), 219 - ); 219 + dev_dbg!(pdev, "RISC-V active? {}\n", gsp_falcon.is_riscv_active(bar),); 220 220 221 221 // Create and run the GSP sequencer. 222 222 let seq_params = GspSequencerParams { ··· 223 231 dev: pdev.as_ref().into(), 224 232 bar, 225 233 }; 226 - GspSequencer::run(&mut self.cmdq, seq_params)?; 234 + GspSequencer::run(&self.cmdq, seq_params)?; 227 235 228 236 // Wait until GSP is fully initialized. 229 - commands::wait_gsp_init_done(&mut self.cmdq)?; 237 + commands::wait_gsp_init_done(&self.cmdq)?; 230 238 231 239 // Obtain and display basic GPU information. 232 - let info = commands::get_gsp_info(&mut self.cmdq, bar)?; 240 + let info = commands::get_gsp_info(&self.cmdq, bar)?; 233 241 match info.gpu_name() { 234 - Ok(name) => dev_info!(pdev.as_ref(), "GPU name: {}\n", name), 235 - Err(e) => dev_warn!(pdev.as_ref(), "GPU name unavailable: {:?}\n", e), 242 + Ok(name) => dev_info!(pdev, "GPU name: {}\n", name), 243 + Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), 236 244 } 237 245 238 246 Ok(())

+285 -108

drivers/gpu/nova-core/gsp/cmdq.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use core::{ 4 - cmp, 5 - mem, // 6 - }; 3 + mod continuation; 4 + 5 + use core::mem; 7 6 8 7 use kernel::{ 9 8 device, 10 9 dma::{ 11 - CoherentAllocation, 10 + Coherent, 12 11 DmaAddress, // 13 12 }, 14 13 dma_write, 15 - io::poll::read_poll_timeout, 14 + io::{ 15 + poll::read_poll_timeout, 16 + Io, // 17 + }, 18 + new_mutex, 16 19 prelude::*, 17 - sync::aref::ARef, 20 + sync::{ 21 + aref::ARef, 22 + Mutex, // 23 + }, 18 24 time::Delta, 19 25 transmute::{ 20 26 AsBytes, 21 27 FromBytes, // 22 28 }, 23 29 }; 30 + 31 + use continuation::{ 32 + ContinuationRecord, 33 + SplitState, // 34 + }; 35 + 36 + use pin_init::pin_init_scope; 24 37 25 38 use crate::{ 26 39 driver::Bar0, ··· 42 29 GspMsgElement, 43 30 MsgFunction, 44 31 MsgqRxHeader, 45 - MsgqTxHeader, // 32 + MsgqTxHeader, 33 + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, // 46 34 }, 47 35 PteArray, 48 36 GSP_PAGE_SHIFT, ··· 54 40 sbuffer::SBufferIter, // 55 41 }; 56 42 43 + /// Marker type representing the absence of a reply for a command. Commands using this as their 44 + /// reply type are sent using [`Cmdq::send_command_no_wait`]. 45 + pub(crate) struct NoReply; 46 + 57 47 /// Trait implemented by types representing a command to send to the GSP. 58 48 /// 59 - /// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it 60 - /// needs to send a given command. 49 + /// The main purpose of this trait is to provide [`Cmdq`] with the information it needs to send 50 + /// a given command. 61 51 /// 62 52 /// [`CommandToGsp::init`] in particular is responsible for initializing the command directly 63 53 /// into the space reserved for it in the command queue buffer. ··· 75 57 76 58 /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer. 77 59 type Command: FromBytes + AsBytes; 60 + 61 + /// Type of the reply expected from the GSP, or [`NoReply`] for commands that don't 62 + /// have a reply. 63 + type Reply; 78 64 79 65 /// Error type returned by [`CommandToGsp::init`]. 80 66 type InitError; ··· 111 89 _dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, 112 90 ) -> Result { 113 91 Ok(()) 92 + } 93 + 94 + /// Total size of the command (including its variable-length payload) without the 95 + /// [`GspMsgElement`] header. 96 + fn size(&self) -> usize { 97 + size_of::<Self::Command>() + self.variable_payload_len() 114 98 } 115 99 } 116 100 ··· 187 159 /// Self-mapping page table entries. 188 160 ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, 189 161 /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the 190 - /// write and read pointers that the CPU updates. 162 + /// write and read pointers that the CPU updates. This means that the read pointer here is an 163 + /// index into the GSP queue. 191 164 /// 192 165 /// This member is read-only for the GSP. 193 166 pub(super) cpuq: Msgq, 194 167 /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the 195 - /// write and read pointers that the GSP updates. 168 + /// write and read pointers that the GSP updates. This means that the read pointer here is an 169 + /// index into the CPU queue. 196 170 /// 197 171 /// This member is read-only for the driver. 198 172 pub(super) gspq: Msgq, ··· 212 182 // that is not a problem because they are not used outside the kernel. 213 183 unsafe impl FromBytes for GspMem {} 214 184 215 - /// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`]. 185 + /// Wrapper around [`GspMem`] to share it with the GPU using a [`Coherent`]. 216 186 /// 217 187 /// This provides the low-level functionality to communicate with the GSP, including allocation of 218 188 /// queue space to write messages to and management of read/write pointers. ··· 223 193 /// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`]. 224 194 /// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read 225 195 /// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`]. 226 - struct DmaGspMem(CoherentAllocation<GspMem>); 196 + struct DmaGspMem(Coherent<GspMem>); 227 197 228 198 impl DmaGspMem { 229 199 /// Allocate a new instance and map it for `dev`. ··· 231 201 const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>(); 232 202 const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>(); 233 203 234 - let gsp_mem = 235 - CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; 204 + let gsp_mem = Coherent::<GspMem>::zeroed(dev, GFP_KERNEL)?; 236 205 237 206 let start = gsp_mem.dma_handle(); 238 207 // Write values one by one to avoid an on-stack instance of `PteArray`. 239 208 for i in 0..GspMem::PTE_ARRAY_SIZE { 240 - dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?); 209 + dma_write!(gsp_mem, .ptes.0[i], PteArray::<0>::entry(start, i)?); 241 210 } 242 211 243 212 dma_write!( 244 213 gsp_mem, 245 - [0]?.cpuq.tx, 214 + .cpuq.tx, 246 215 MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES) 247 216 ); 248 - dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new()); 217 + dma_write!(gsp_mem, .cpuq.rx, MsgqRxHeader::new()); 249 218 250 219 Ok(Self(gsp_mem)) 251 220 } ··· 259 230 let rx = self.gsp_read_ptr() as usize; 260 231 261 232 // SAFETY: 262 - // - The `CoherentAllocation` contains exactly one object. 263 233 // - We will only access the driver-owned part of the shared memory. 264 234 // - Per the safety statement of the function, no concurrent access will be performed. 265 - let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0]; 266 - // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`. 235 + let gsp_mem = unsafe { &mut *self.0.as_mut() }; 236 + // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `< MSGQ_NUM_PAGES`. 267 237 let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx); 268 238 269 - if rx <= tx { 270 - // The area from `tx` up to the end of the ring, and from the beginning of the ring up 271 - // to `rx`, minus one unit, belongs to the driver. 272 - if rx == 0 { 273 - let last = after_tx.len() - 1; 274 - (&mut after_tx[..last], &mut before_tx[0..0]) 275 - } else { 276 - (after_tx, &mut before_tx[..rx]) 277 - } 239 + // The area starting at `tx` and ending at `rx - 2` modulo MSGQ_NUM_PAGES, inclusive, 240 + // belongs to the driver for writing. 241 + 242 + if rx == 0 { 243 + // Since `rx` is zero, leave an empty slot at end of the buffer. 244 + let last = after_tx.len() - 1; 245 + (&mut after_tx[..last], &mut []) 246 + } else if rx <= tx { 247 + // The area is discontiguous and we leave an empty slot before `rx`. 248 + // PANIC: 249 + // - The index `rx - 1` is non-negative because `rx != 0` in this branch. 250 + // - The index does not exceed `before_tx.len()` (which equals `tx`) because 251 + // `rx <= tx` in this branch. 252 + (after_tx, &mut before_tx[..(rx - 1)]) 278 253 } else { 279 - // The area from `tx` to `rx`, minus one unit, belongs to the driver. 280 - // 281 - // PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are 282 - // `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`. 283 - (after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0]) 254 + // The area is contiguous and we leave an empty slot before `rx`. 255 + // PANIC: 256 + // - The index `rx - tx - 1` is non-negative because `rx > tx` in this branch. 257 + // - The index does not exceed `after_tx.len()` (which is `MSGQ_NUM_PAGES - tx`) 258 + // because `rx < MSGQ_NUM_PAGES` by the `gsp_read_ptr` invariant. 259 + (&mut after_tx[..(rx - tx - 1)], &mut []) 284 260 } 261 + } 262 + 263 + /// Returns the size of the region of the CPU message queue that the driver is currently allowed 264 + /// to write to, in bytes. 265 + fn driver_write_area_size(&self) -> usize { 266 + let tx = self.cpu_write_ptr(); 267 + let rx = self.gsp_read_ptr(); 268 + 269 + // `rx` and `tx` are both in `0..MSGQ_NUM_PAGES` per the invariants of `gsp_read_ptr` and 270 + // `cpu_write_ptr`. The minimum value case is where `rx == 0` and `tx == MSGQ_NUM_PAGES - 271 + // 1`, which gives `0 + MSGQ_NUM_PAGES - (MSGQ_NUM_PAGES - 1) - 1 == 0`. 272 + let slots = (rx + MSGQ_NUM_PAGES - tx - 1) % MSGQ_NUM_PAGES; 273 + num::u32_as_usize(slots) * GSP_PAGE_SIZE 285 274 } 286 275 287 276 /// Returns the region of the GSP message queue that the driver is currently allowed to read ··· 312 265 let rx = self.cpu_read_ptr() as usize; 313 266 314 267 // SAFETY: 315 - // - The `CoherentAllocation` contains exactly one object. 316 268 // - We will only access the driver-owned part of the shared memory. 317 269 // - Per the safety statement of the function, no concurrent access will be performed. 318 - let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0]; 319 - // PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`. 320 - let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx); 270 + let gsp_mem = unsafe { &*self.0.as_ptr() }; 271 + let data = &gsp_mem.gspq.msgq.data; 321 272 322 - match tx.cmp(&rx) { 323 - cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]), 324 - cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]), 325 - cmp::Ordering::Less => (after_rx, &before_rx[..tx]), 273 + // The area starting at `rx` and ending at `tx - 1` modulo MSGQ_NUM_PAGES, inclusive, 274 + // belongs to the driver for reading. 275 + // PANIC: 276 + // - per the invariant of `cpu_read_ptr`, `rx < MSGQ_NUM_PAGES` 277 + // - per the invariant of `gsp_write_ptr`, `tx < MSGQ_NUM_PAGES` 278 + if rx <= tx { 279 + // The area is contiguous. 280 + (&data[rx..tx], &[]) 281 + } else { 282 + // The area is discontiguous. 283 + (&data[rx..], &data[..tx]) 326 284 } 327 285 } 328 286 329 287 /// Allocates a region on the command queue that is large enough to send a command of `size` 330 - /// bytes. 288 + /// bytes, waiting for space to become available based on the provided timeout. 331 289 /// 332 290 /// This returns a [`GspCommand`] ready to be written to by the caller. 333 291 /// 334 292 /// # Errors 335 293 /// 336 - /// - `EAGAIN` if the driver area is too small to hold the requested command. 294 + /// - `EMSGSIZE` if the command is larger than [`GSP_MSG_QUEUE_ELEMENT_SIZE_MAX`]. 295 + /// - `ETIMEDOUT` if space does not become available within the timeout. 337 296 /// - `EIO` if the command header is not properly aligned. 338 - fn allocate_command(&mut self, size: usize) -> Result<GspCommand<'_>> { 297 + fn allocate_command(&mut self, size: usize, timeout: Delta) -> Result<GspCommand<'_>> { 298 + if size_of::<GspMsgElement>() + size > GSP_MSG_QUEUE_ELEMENT_SIZE_MAX { 299 + return Err(EMSGSIZE); 300 + } 301 + read_poll_timeout( 302 + || Ok(self.driver_write_area_size()), 303 + |available_bytes| *available_bytes >= size_of::<GspMsgElement>() + size, 304 + Delta::from_micros(1), 305 + timeout, 306 + )?; 307 + 339 308 // Get the current writable area as an array of bytes. 340 309 let (slice_1, slice_2) = { 341 310 let (slice_1, slice_2) = self.driver_write_area(); ··· 359 296 #[allow(clippy::incompatible_msrv)] 360 297 (slice_1.as_flattened_mut(), slice_2.as_flattened_mut()) 361 298 }; 362 - 363 - // If the GSP is still processing previous messages the shared region 364 - // may be full in which case we will have to retry once the GSP has 365 - // processed the existing commands. 366 - if size_of::<GspMsgElement>() + size > slice_1.len() + slice_2.len() { 367 - return Err(EAGAIN); 368 - } 369 299 370 300 // Extract area for the `GspMsgElement`. 371 301 let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?; ··· 383 327 // 384 328 // # Invariants 385 329 // 386 - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. 330 + // - The returned value is within `0..MSGQ_NUM_PAGES`. 387 331 fn gsp_write_ptr(&self) -> u32 { 388 332 super::fw::gsp_mem::gsp_write_ptr(&self.0) 389 333 } ··· 392 336 // 393 337 // # Invariants 394 338 // 395 - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. 339 + // - The returned value is within `0..MSGQ_NUM_PAGES`. 396 340 fn gsp_read_ptr(&self) -> u32 { 397 341 super::fw::gsp_mem::gsp_read_ptr(&self.0) 398 342 } ··· 401 345 // 402 346 // # Invariants 403 347 // 404 - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. 348 + // - The returned value is within `0..MSGQ_NUM_PAGES`. 405 349 fn cpu_read_ptr(&self) -> u32 { 406 350 super::fw::gsp_mem::cpu_read_ptr(&self.0) 407 351 } ··· 415 359 // 416 360 // # Invariants 417 361 // 418 - // - The returned value is between `0` and `MSGQ_NUM_PAGES`. 362 + // - The returned value is within `0..MSGQ_NUM_PAGES`. 419 363 fn cpu_write_ptr(&self) -> u32 { 420 364 super::fw::gsp_mem::cpu_write_ptr(&self.0) 421 365 } ··· 452 396 /// 453 397 /// Provides the ability to send commands and receive messages from the GSP using a shared memory 454 398 /// area. 399 + #[pin_data] 455 400 pub(crate) struct Cmdq { 456 - /// Device this command queue belongs to. 457 - dev: ARef<device::Device>, 458 - /// Current command sequence number. 459 - seq: u32, 460 - /// Memory area shared with the GSP for communicating commands and messages. 461 - gsp_mem: DmaGspMem, 401 + /// Inner mutex-protected state. 402 + #[pin] 403 + inner: Mutex<CmdqInner>, 404 + /// DMA handle of the command queue's shared memory region. 405 + pub(super) dma_handle: DmaAddress, 462 406 } 463 407 464 408 impl Cmdq { ··· 478 422 /// Number of page table entries for the GSP shared region. 479 423 pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT; 480 424 481 - /// Creates a new command queue for `dev`. 482 - pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> { 483 - let gsp_mem = DmaGspMem::new(dev)?; 425 + /// Default timeout for receiving a message from the GSP. 426 + pub(super) const RECEIVE_TIMEOUT: Delta = Delta::from_secs(5); 484 427 485 - Ok(Cmdq { 486 - dev: dev.into(), 487 - seq: 0, 488 - gsp_mem, 428 + /// Creates a new command queue for `dev`. 429 + pub(crate) fn new(dev: &device::Device<device::Bound>) -> impl PinInit<Self, Error> + '_ { 430 + pin_init_scope(move || { 431 + let gsp_mem = DmaGspMem::new(dev)?; 432 + 433 + Ok(try_pin_init!(Self { 434 + dma_handle: gsp_mem.0.dma_handle(), 435 + inner <- new_mutex!(CmdqInner { 436 + dev: dev.into(), 437 + gsp_mem, 438 + seq: 0, 439 + }), 440 + })) 489 441 }) 490 442 } 491 443 ··· 512 448 513 449 /// Notifies the GSP that we have updated the command queue pointers. 514 450 fn notify_gsp(bar: &Bar0) { 515 - regs::NV_PGSP_QUEUE_HEAD::default() 516 - .set_address(0) 517 - .write(bar); 451 + bar.write_reg(regs::NV_PGSP_QUEUE_HEAD::zeroed().with_address(0u32)); 518 452 } 519 453 520 - /// Sends `command` to the GSP. 454 + /// Sends `command` to the GSP and waits for the reply. 455 + /// 456 + /// Messages with non-matching function codes are silently consumed until the expected reply 457 + /// arrives. 458 + /// 459 + /// The queue is locked for the entire send+receive cycle to ensure that no other command can 460 + /// be interleaved. 521 461 /// 522 462 /// # Errors 523 463 /// 524 - /// - `EAGAIN` if there was not enough space in the command queue to send the command. 464 + /// - `ETIMEDOUT` if space does not become available to send the command, or if the reply is 465 + /// not received within the timeout. 466 + /// - `EIO` if the variable payload requested by the command has not been entirely 467 + /// written to by its [`CommandToGsp::init_variable_payload`] method. 468 + /// 469 + /// Error codes returned by the command and reply initializers are propagated as-is. 470 + pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply> 471 + where 472 + M: CommandToGsp, 473 + M::Reply: MessageFromGsp, 474 + Error: From<M::InitError>, 475 + Error: From<<M::Reply as MessageFromGsp>::InitError>, 476 + { 477 + let mut inner = self.inner.lock(); 478 + inner.send_command(bar, command)?; 479 + 480 + loop { 481 + match inner.receive_msg::<M::Reply>(Self::RECEIVE_TIMEOUT) { 482 + Ok(reply) => break Ok(reply), 483 + Err(ERANGE) => continue, 484 + Err(e) => break Err(e), 485 + } 486 + } 487 + } 488 + 489 + /// Sends `command` to the GSP without waiting for a reply. 490 + /// 491 + /// # Errors 492 + /// 493 + /// - `ETIMEDOUT` if space does not become available within the timeout. 525 494 /// - `EIO` if the variable payload requested by the command has not been entirely 526 495 /// written to by its [`CommandToGsp::init_variable_payload`] method. 527 496 /// 528 497 /// Error codes returned by the command initializers are propagated as-is. 529 - pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result 498 + pub(crate) fn send_command_no_wait<M>(&self, bar: &Bar0, command: M) -> Result 499 + where 500 + M: CommandToGsp<Reply = NoReply>, 501 + Error: From<M::InitError>, 502 + { 503 + self.inner.lock().send_command(bar, command) 504 + } 505 + 506 + /// Receive a message from the GSP. 507 + /// 508 + /// See [`CmdqInner::receive_msg`] for details. 509 + pub(crate) fn receive_msg<M: MessageFromGsp>(&self, timeout: Delta) -> Result<M> 510 + where 511 + // This allows all error types, including `Infallible`, to be used for `M::InitError`. 512 + Error: From<M::InitError>, 513 + { 514 + self.inner.lock().receive_msg(timeout) 515 + } 516 + } 517 + 518 + /// Inner mutex protected state of [`Cmdq`]. 519 + struct CmdqInner { 520 + /// Device this command queue belongs to. 521 + dev: ARef<device::Device>, 522 + /// Current command sequence number. 523 + seq: u32, 524 + /// Memory area shared with the GSP for communicating commands and messages. 525 + gsp_mem: DmaGspMem, 526 + } 527 + 528 + impl CmdqInner { 529 + /// Timeout for waiting for space on the command queue. 530 + const ALLOCATE_TIMEOUT: Delta = Delta::from_secs(1); 531 + 532 + /// Sends `command` to the GSP, without splitting it. 533 + /// 534 + /// # Errors 535 + /// 536 + /// - `EMSGSIZE` if the command exceeds the maximum queue element size. 537 + /// - `ETIMEDOUT` if space does not become available within the timeout. 538 + /// - `EIO` if the variable payload requested by the command has not been entirely 539 + /// written to by its [`CommandToGsp::init_variable_payload`] method. 540 + /// 541 + /// Error codes returned by the command initializers are propagated as-is. 542 + fn send_single_command<M>(&mut self, bar: &Bar0, command: M) -> Result 530 543 where 531 544 M: CommandToGsp, 532 545 // This allows all error types, including `Infallible`, to be used for `M::InitError`. 533 546 Error: From<M::InitError>, 534 547 { 535 - let command_size = size_of::<M::Command>() + command.variable_payload_len(); 536 - let dst = self.gsp_mem.allocate_command(command_size)?; 548 + let size_in_bytes = command.size(); 549 + let dst = self 550 + .gsp_mem 551 + .allocate_command(size_in_bytes, Self::ALLOCATE_TIMEOUT)?; 537 552 538 - // Extract area for the command itself. 553 + // Extract area for the command itself. The GSP message header and the command header 554 + // together are guaranteed to fit entirely into a single page, so it's ok to only look 555 + // at `dst.contents.0` here. 539 556 let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?; 540 557 541 558 // Fill the header and command in-place. 542 - let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION); 559 + let msg_element = GspMsgElement::init(self.seq, size_in_bytes, M::FUNCTION); 543 560 // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer 544 561 // fails. 545 562 unsafe { ··· 628 483 command.init().__init(core::ptr::from_mut(cmd))?; 629 484 } 630 485 631 - // Fill the variable-length payload. 632 - if command_size > size_of::<M::Command>() { 633 - let mut sbuffer = 634 - SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); 635 - command.init_variable_payload(&mut sbuffer)?; 486 + // Fill the variable-length payload, which may be empty. 487 + let mut sbuffer = SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); 488 + command.init_variable_payload(&mut sbuffer)?; 636 489 637 - if !sbuffer.is_empty() { 638 - return Err(EIO); 639 - } 490 + if !sbuffer.is_empty() { 491 + return Err(EIO); 640 492 } 493 + drop(sbuffer); 641 494 642 495 // Compute checksum now that the whole message is ready. 643 496 dst.header ··· 647 504 648 505 dev_dbg!( 649 506 &self.dev, 650 - "GSP RPC: send: seq# {}, function={}, length=0x{:x}\n", 507 + "GSP RPC: send: seq# {}, function={:?}, length=0x{:x}\n", 651 508 self.seq, 652 509 M::FUNCTION, 653 510 dst.header.length(), ··· 660 517 Cmdq::notify_gsp(bar); 661 518 662 519 Ok(()) 520 + } 521 + 522 + /// Sends `command` to the GSP. 523 + /// 524 + /// The command may be split into multiple messages if it is large. 525 + /// 526 + /// # Errors 527 + /// 528 + /// - `ETIMEDOUT` if space does not become available within the timeout. 529 + /// - `EIO` if the variable payload requested by the command has not been entirely 530 + /// written to by its [`CommandToGsp::init_variable_payload`] method. 531 + /// 532 + /// Error codes returned by the command initializers are propagated as-is. 533 + fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result 534 + where 535 + M: CommandToGsp, 536 + Error: From<M::InitError>, 537 + { 538 + match SplitState::new(command)? { 539 + SplitState::Single(command) => self.send_single_command(bar, command), 540 + SplitState::Split(command, mut continuations) => { 541 + self.send_single_command(bar, command)?; 542 + 543 + while let Some(continuation) = continuations.next() { 544 + // Turbofish needed because the compiler cannot infer M here. 545 + self.send_single_command::<ContinuationRecord<'_>>(bar, continuation)?; 546 + } 547 + 548 + Ok(()) 549 + } 550 + } 663 551 } 664 552 665 553 /// Wait for a message to become available on the message queue. ··· 728 554 let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?; 729 555 730 556 dev_dbg!( 731 - self.dev, 557 + &self.dev, 732 558 "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n", 733 559 header.sequence(), 734 560 header.function(), ··· 763 589 ])) != 0 764 590 { 765 591 dev_err!( 766 - self.dev, 592 + &self.dev, 767 593 "GSP RPC: receive: Call {} - bad checksum\n", 768 594 header.sequence() 769 595 ); ··· 778 604 779 605 /// Receive a message from the GSP. 780 606 /// 781 - /// `init` is a closure tasked with processing the message. It receives a reference to the 782 - /// message in the message queue, and a [`SBufferIter`] pointing to its variable-length 783 - /// payload, if any. 607 + /// The expected message type is specified using the `M` generic parameter. If the pending 608 + /// message has a different function code, `ERANGE` is returned and the message is consumed. 784 609 /// 785 - /// The expected message is specified using the `M` generic parameter. If the pending message 786 - /// is different, `EAGAIN` is returned and the unexpected message is dropped. 787 - /// 788 - /// This design is by no means final, but it is simple and will let us go through GSP 789 - /// initialization. 610 + /// The read pointer is always advanced past the message, regardless of whether it matched. 790 611 /// 791 612 /// # Errors 792 613 /// 793 614 /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. 794 615 /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the 795 616 /// message queue. 796 - /// - `EINVAL` if the function of the message was unrecognized. 797 - pub(crate) fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M> 617 + /// - `EINVAL` if the function code of the message was not recognized. 618 + /// - `ERANGE` if the message had a recognized but non-matching function code. 619 + /// 620 + /// Error codes returned by [`MessageFromGsp::read`] are propagated as-is. 621 + fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M> 798 622 where 799 623 // This allows all error types, including `Infallible`, to be used for `M::InitError`. 800 624 Error: From<M::InitError>, ··· 806 634 let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?; 807 635 let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]); 808 636 809 - M::read(cmd, &mut sbuffer).map_err(|e| e.into()) 637 + M::read(cmd, &mut sbuffer) 638 + .map_err(|e| e.into()) 639 + .inspect(|_| { 640 + if !sbuffer.is_empty() { 641 + dev_warn!( 642 + &self.dev, 643 + "GSP message {:?} has unprocessed data\n", 644 + function 645 + ); 646 + } 647 + }) 810 648 } else { 811 649 Err(ERANGE) 812 650 }; ··· 827 645 )?); 828 646 829 647 result 830 - } 831 - 832 - /// Returns the DMA handle of the command queue's shared memory region. 833 - pub(crate) fn dma_handle(&self) -> DmaAddress { 834 - self.gsp_mem.0.dma_handle() 835 648 } 836 649 }

+307

drivers/gpu/nova-core/gsp/cmdq/continuation.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Support for splitting large GSP commands across continuation records. 4 + 5 + use core::convert::Infallible; 6 + 7 + use kernel::prelude::*; 8 + 9 + use super::{ 10 + CommandToGsp, 11 + NoReply, // 12 + }; 13 + 14 + use crate::{ 15 + gsp::fw::{ 16 + GspMsgElement, 17 + MsgFunction, 18 + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, // 19 + }, 20 + sbuffer::SBufferIter, 21 + }; 22 + 23 + /// Maximum command size that fits in a single queue element. 24 + const MAX_CMD_SIZE: usize = GSP_MSG_QUEUE_ELEMENT_SIZE_MAX - size_of::<GspMsgElement>(); 25 + 26 + /// Acts as an iterator over the continuation records for a split command. 27 + pub(super) struct ContinuationRecords { 28 + payload: KVVec<u8>, 29 + offset: usize, 30 + } 31 + 32 + impl ContinuationRecords { 33 + /// Creates a new iterator over continuation records for the given payload. 34 + fn new(payload: KVVec<u8>) -> Self { 35 + Self { payload, offset: 0 } 36 + } 37 + 38 + /// Returns the next continuation record, or [`None`] if there are no more. 39 + pub(super) fn next(&mut self) -> Option<ContinuationRecord<'_>> { 40 + let remaining = self.payload.len() - self.offset; 41 + 42 + if remaining > 0 { 43 + let chunk_size = remaining.min(MAX_CMD_SIZE); 44 + let record = 45 + ContinuationRecord::new(&self.payload[self.offset..(self.offset + chunk_size)]); 46 + self.offset += chunk_size; 47 + Some(record) 48 + } else { 49 + None 50 + } 51 + } 52 + } 53 + 54 + /// The [`ContinuationRecord`] command. 55 + pub(super) struct ContinuationRecord<'a> { 56 + data: &'a [u8], 57 + } 58 + 59 + impl<'a> ContinuationRecord<'a> { 60 + /// Creates a new [`ContinuationRecord`] command with the given data. 61 + fn new(data: &'a [u8]) -> Self { 62 + Self { data } 63 + } 64 + } 65 + 66 + impl<'a> CommandToGsp for ContinuationRecord<'a> { 67 + const FUNCTION: MsgFunction = MsgFunction::ContinuationRecord; 68 + type Command = (); 69 + type Reply = NoReply; 70 + type InitError = Infallible; 71 + 72 + fn init(&self) -> impl Init<Self::Command, Self::InitError> { 73 + <()>::init_zeroed() 74 + } 75 + 76 + fn variable_payload_len(&self) -> usize { 77 + self.data.len() 78 + } 79 + 80 + fn init_variable_payload( 81 + &self, 82 + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, 83 + ) -> Result { 84 + dst.write_all(self.data) 85 + } 86 + } 87 + 88 + /// Whether a command needs to be split across continuation records or not. 89 + pub(super) enum SplitState<C: CommandToGsp> { 90 + /// A command that fits in a single queue element. 91 + Single(C), 92 + /// A command split across continuation records. 93 + Split(SplitCommand<C>, ContinuationRecords), 94 + } 95 + 96 + impl<C: CommandToGsp> SplitState<C> { 97 + /// Maximum variable payload size that fits in the first command alongside the command header. 98 + const MAX_FIRST_PAYLOAD: usize = MAX_CMD_SIZE - size_of::<C::Command>(); 99 + 100 + /// Creates a new [`SplitState`] for the given command. 101 + /// 102 + /// If the command is too large, it will be split into a main command and some number of 103 + /// continuation records. 104 + pub(super) fn new(command: C) -> Result<Self> { 105 + let payload_len = command.variable_payload_len(); 106 + 107 + if command.size() > MAX_CMD_SIZE { 108 + let mut command_payload = 109 + KVVec::<u8>::from_elem(0u8, payload_len.min(Self::MAX_FIRST_PAYLOAD), GFP_KERNEL)?; 110 + let mut continuation_payload = 111 + KVVec::<u8>::from_elem(0u8, payload_len - command_payload.len(), GFP_KERNEL)?; 112 + let mut sbuffer = SBufferIter::new_writer([ 113 + command_payload.as_mut_slice(), 114 + continuation_payload.as_mut_slice(), 115 + ]); 116 + 117 + command.init_variable_payload(&mut sbuffer)?; 118 + if !sbuffer.is_empty() { 119 + return Err(EIO); 120 + } 121 + drop(sbuffer); 122 + 123 + Ok(Self::Split( 124 + SplitCommand::new(command, command_payload), 125 + ContinuationRecords::new(continuation_payload), 126 + )) 127 + } else { 128 + Ok(Self::Single(command)) 129 + } 130 + } 131 + } 132 + 133 + /// A command that has been truncated to maximum accepted length of the command queue. 134 + /// 135 + /// The remainder of its payload is expected to be sent using [`ContinuationRecords`]. 136 + pub(super) struct SplitCommand<C: CommandToGsp> { 137 + command: C, 138 + payload: KVVec<u8>, 139 + } 140 + 141 + impl<C: CommandToGsp> SplitCommand<C> { 142 + /// Creates a new [`SplitCommand`] wrapping `command` with the given truncated payload. 143 + fn new(command: C, payload: KVVec<u8>) -> Self { 144 + Self { command, payload } 145 + } 146 + } 147 + 148 + impl<C: CommandToGsp> CommandToGsp for SplitCommand<C> { 149 + const FUNCTION: MsgFunction = C::FUNCTION; 150 + type Command = C::Command; 151 + type Reply = C::Reply; 152 + type InitError = C::InitError; 153 + 154 + fn init(&self) -> impl Init<Self::Command, Self::InitError> { 155 + self.command.init() 156 + } 157 + 158 + fn variable_payload_len(&self) -> usize { 159 + self.payload.len() 160 + } 161 + 162 + fn init_variable_payload( 163 + &self, 164 + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, 165 + ) -> Result { 166 + dst.write_all(&self.payload) 167 + } 168 + } 169 + 170 + #[kunit_tests(nova_core_gsp_continuation)] 171 + mod tests { 172 + use super::*; 173 + 174 + use kernel::transmute::{ 175 + AsBytes, 176 + FromBytes, // 177 + }; 178 + 179 + /// Non-zero-sized command header for testing. 180 + #[repr(C)] 181 + #[derive(Clone, Copy, Zeroable)] 182 + struct TestHeader([u8; 64]); 183 + 184 + // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid. 185 + unsafe impl FromBytes for TestHeader {} 186 + 187 + // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid. 188 + unsafe impl AsBytes for TestHeader {} 189 + 190 + struct TestPayload { 191 + data: KVVec<u8>, 192 + } 193 + 194 + impl TestPayload { 195 + fn generate_pattern(len: usize) -> Result<KVVec<u8>> { 196 + let mut data = KVVec::with_capacity(len, GFP_KERNEL)?; 197 + for i in 0..len { 198 + // Mix in higher bits so the pattern does not repeat every 256 bytes. 199 + data.push((i ^ (i >> 8)) as u8, GFP_KERNEL)?; 200 + } 201 + Ok(data) 202 + } 203 + 204 + fn new(len: usize) -> Result<Self> { 205 + Ok(Self { 206 + data: Self::generate_pattern(len)?, 207 + }) 208 + } 209 + } 210 + 211 + impl CommandToGsp for TestPayload { 212 + const FUNCTION: MsgFunction = MsgFunction::Nop; 213 + type Command = TestHeader; 214 + type Reply = NoReply; 215 + type InitError = Infallible; 216 + 217 + fn init(&self) -> impl Init<Self::Command, Self::InitError> { 218 + TestHeader::init_zeroed() 219 + } 220 + 221 + fn variable_payload_len(&self) -> usize { 222 + self.data.len() 223 + } 224 + 225 + fn init_variable_payload( 226 + &self, 227 + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, 228 + ) -> Result { 229 + dst.write_all(self.data.as_slice()) 230 + } 231 + } 232 + 233 + /// Maximum variable payload size that fits in the first command alongside the header. 234 + const MAX_FIRST_PAYLOAD: usize = SplitState::<TestPayload>::MAX_FIRST_PAYLOAD; 235 + 236 + fn read_payload(cmd: impl CommandToGsp) -> Result<KVVec<u8>> { 237 + let len = cmd.variable_payload_len(); 238 + let mut buf = KVVec::from_elem(0u8, len, GFP_KERNEL)?; 239 + let mut sbuf = SBufferIter::new_writer([buf.as_mut_slice(), &mut []]); 240 + cmd.init_variable_payload(&mut sbuf)?; 241 + drop(sbuf); 242 + Ok(buf) 243 + } 244 + 245 + struct SplitTest { 246 + payload_size: usize, 247 + num_continuations: usize, 248 + } 249 + 250 + fn check_split(t: SplitTest) -> Result { 251 + let payload = TestPayload::new(t.payload_size)?; 252 + let mut num_continuations = 0; 253 + 254 + let buf = match SplitState::new(payload)? { 255 + SplitState::Single(cmd) => read_payload(cmd)?, 256 + SplitState::Split(cmd, mut continuations) => { 257 + let mut buf = read_payload(cmd)?; 258 + assert!(size_of::<TestHeader>() + buf.len() <= MAX_CMD_SIZE); 259 + 260 + while let Some(cont) = continuations.next() { 261 + let payload = read_payload(cont)?; 262 + assert!(payload.len() <= MAX_CMD_SIZE); 263 + buf.extend_from_slice(&payload, GFP_KERNEL)?; 264 + num_continuations += 1; 265 + } 266 + 267 + buf 268 + } 269 + }; 270 + 271 + assert_eq!(num_continuations, t.num_continuations); 272 + assert_eq!( 273 + buf.as_slice(), 274 + TestPayload::generate_pattern(t.payload_size)?.as_slice() 275 + ); 276 + Ok(()) 277 + } 278 + 279 + #[test] 280 + fn split_command() -> Result { 281 + check_split(SplitTest { 282 + payload_size: 0, 283 + num_continuations: 0, 284 + })?; 285 + check_split(SplitTest { 286 + payload_size: MAX_FIRST_PAYLOAD, 287 + num_continuations: 0, 288 + })?; 289 + check_split(SplitTest { 290 + payload_size: MAX_FIRST_PAYLOAD + 1, 291 + num_continuations: 1, 292 + })?; 293 + check_split(SplitTest { 294 + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE, 295 + num_continuations: 1, 296 + })?; 297 + check_split(SplitTest { 298 + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE + 1, 299 + num_continuations: 2, 300 + })?; 301 + check_split(SplitTest { 302 + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE * 3 + MAX_CMD_SIZE / 2, 303 + num_continuations: 4, 304 + })?; 305 + Ok(()) 306 + } 307 + }

+9 -14

drivers/gpu/nova-core/gsp/commands.rs

··· 11 11 device, 12 12 pci, 13 13 prelude::*, 14 - time::Delta, 15 14 transmute::{ 16 15 AsBytes, 17 16 FromBytes, // ··· 23 24 cmdq::{ 24 25 Cmdq, 25 26 CommandToGsp, 26 - MessageFromGsp, // 27 + MessageFromGsp, 28 + NoReply, // 27 29 }, 28 30 fw::{ 29 31 commands::*, ··· 49 49 impl<'a> CommandToGsp for SetSystemInfo<'a> { 50 50 const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo; 51 51 type Command = GspSetSystemInfo; 52 + type Reply = NoReply; 52 53 type InitError = Error; 53 54 54 55 fn init(&self) -> impl Init<Self::Command, Self::InitError> { ··· 101 100 impl CommandToGsp for SetRegistry { 102 101 const FUNCTION: MsgFunction = MsgFunction::SetRegistry; 103 102 type Command = PackedRegistryTable; 103 + type Reply = NoReply; 104 104 type InitError = Infallible; 105 105 106 106 fn init(&self) -> impl Init<Self::Command, Self::InitError> { ··· 165 163 } 166 164 167 165 /// Waits for GSP initialization to complete. 168 - pub(crate) fn wait_gsp_init_done(cmdq: &mut Cmdq) -> Result { 166 + pub(crate) fn wait_gsp_init_done(cmdq: &Cmdq) -> Result { 169 167 loop { 170 - match cmdq.receive_msg::<GspInitDone>(Delta::from_secs(10)) { 168 + match cmdq.receive_msg::<GspInitDone>(Cmdq::RECEIVE_TIMEOUT) { 171 169 Ok(_) => break Ok(()), 172 170 Err(ERANGE) => continue, 173 171 Err(e) => break Err(e), ··· 181 179 impl CommandToGsp for GetGspStaticInfo { 182 180 const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; 183 181 type Command = GspStaticConfigInfo; 182 + type Reply = GetGspStaticInfoReply; 184 183 type InitError = Infallible; 185 184 186 185 fn init(&self) -> impl Init<Self::Command, Self::InitError> { ··· 234 231 } 235 232 236 233 /// Send the [`GetGspInfo`] command and awaits for its reply. 237 - pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> { 238 - cmdq.send_command(bar, GetGspStaticInfo)?; 239 - 240 - loop { 241 - match cmdq.receive_msg::<GetGspStaticInfoReply>(Delta::from_secs(5)) { 242 - Ok(info) => return Ok(info), 243 - Err(ERANGE) => continue, 244 - Err(e) => return Err(e), 245 - } 246 - } 234 + pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> { 235 + cmdq.send_command(bar, GetGspStaticInfo) 247 236 }

+128 -181

drivers/gpu/nova-core/gsp/fw.rs

··· 9 9 use core::ops::Range; 10 10 11 11 use kernel::{ 12 - dma::CoherentAllocation, 13 - fmt, 12 + dma::Coherent, 14 13 prelude::*, 15 14 ptr::{ 16 15 Alignable, 17 - Alignment, // 16 + Alignment, 17 + KnownSize, // 18 18 }, 19 19 sizes::{ 20 20 SZ_128K, ··· 40 40 }, 41 41 }; 42 42 43 - // TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we 44 - // switch to the new `dma::Coherent` API. 43 + // TODO: Replace with `IoView` projections once available. 45 44 pub(super) mod gsp_mem { 46 45 use core::sync::atomic::{ 47 46 fence, ··· 48 49 }; 49 50 50 51 use kernel::{ 51 - dma::CoherentAllocation, 52 + dma::Coherent, 52 53 dma_read, 53 - dma_write, 54 - prelude::*, // 54 + dma_write, // 55 55 }; 56 56 57 57 use crate::gsp::cmdq::{ ··· 58 60 MSGQ_NUM_PAGES, // 59 61 }; 60 62 61 - pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { 62 - // PANIC: A `dma::CoherentAllocation` always contains at least one element. 63 - || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() 63 + pub(in crate::gsp) fn gsp_write_ptr(qs: &Coherent<GspMem>) -> u32 { 64 + dma_read!(qs, .gspq.tx.0.writePtr) % MSGQ_NUM_PAGES 64 65 } 65 66 66 - pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { 67 - // PANIC: A `dma::CoherentAllocation` always contains at least one element. 68 - || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() 67 + pub(in crate::gsp) fn gsp_read_ptr(qs: &Coherent<GspMem>) -> u32 { 68 + dma_read!(qs, .gspq.rx.0.readPtr) % MSGQ_NUM_PAGES 69 69 } 70 70 71 - pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { 72 - // PANIC: A `dma::CoherentAllocation` always contains at least one element. 73 - || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() 71 + pub(in crate::gsp) fn cpu_read_ptr(qs: &Coherent<GspMem>) -> u32 { 72 + dma_read!(qs, .cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES 74 73 } 75 74 76 - pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation<GspMem>, count: u32) { 75 + pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &Coherent<GspMem>, count: u32) { 77 76 let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; 78 77 79 78 // Ensure read pointer is properly ordered. 80 79 fence(Ordering::SeqCst); 81 80 82 - // PANIC: A `dma::CoherentAllocation` always contains at least one element. 83 - || -> Result { 84 - dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr); 85 - Ok(()) 86 - }() 87 - .unwrap() 81 + dma_write!(qs, .cpuq.rx.0.readPtr, rptr); 88 82 } 89 83 90 - pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { 91 - // PANIC: A `dma::CoherentAllocation` always contains at least one element. 92 - || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() 84 + pub(in crate::gsp) fn cpu_write_ptr(qs: &Coherent<GspMem>) -> u32 { 85 + dma_read!(qs, .cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES 93 86 } 94 87 95 - pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation<GspMem>, count: u32) { 88 + pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) { 96 89 let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; 97 90 98 - // PANIC: A `dma::CoherentAllocation` always contains at least one element. 99 - || -> Result { 100 - dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr); 101 - Ok(()) 102 - }() 103 - .unwrap(); 91 + dma_write!(qs, .cpuq.tx.0.writePtr, wptr); 104 92 105 93 // Ensure all command data is visible before triggering the GSP read. 106 94 fence(Ordering::SeqCst); 107 95 } 108 96 } 97 + 98 + /// Maximum size of a single GSP message queue element in bytes. 99 + pub(crate) const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: usize = 100 + num::u32_as_usize(bindings::GSP_MSG_QUEUE_ELEMENT_SIZE_MAX); 109 101 110 102 /// Empty type to group methods related to heap parameters for running the GSP firmware. 111 103 enum GspFwHeapParams {} ··· 189 201 /// Structure passed to the GSP bootloader, containing the framebuffer layout as well as the DMA 190 202 /// addresses of the GSP bootloader and firmware. 191 203 #[repr(transparent)] 192 - pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta); 204 + pub(crate) struct GspFwWprMeta { 205 + inner: bindings::GspFwWprMeta, 206 + } 193 207 194 208 // SAFETY: Padding is explicit and does not contain uninitialized data. 195 209 unsafe impl AsBytes for GspFwWprMeta {} ··· 204 214 type GspFwWprMetaBootInfo = bindings::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; 205 215 206 216 impl GspFwWprMeta { 207 - /// Fill in and return a `GspFwWprMeta` suitable for booting `gsp_firmware` using the 217 + /// Returns an initializer for a `GspFwWprMeta` suitable for booting `gsp_firmware` using the 208 218 /// `fb_layout` layout. 209 - pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout: &FbLayout) -> Self { 210 - Self(bindings::GspFwWprMeta { 219 + pub(crate) fn new<'a>( 220 + gsp_firmware: &'a GspFirmware, 221 + fb_layout: &'a FbLayout, 222 + ) -> impl Init<Self> + 'a { 223 + #[allow(non_snake_case)] 224 + let init_inner = init!(bindings::GspFwWprMeta { 211 225 // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified. 212 226 magic: bindings::GSP_FW_WPR_META_MAGIC as u64, 213 227 revision: u64::from(bindings::GSP_FW_WPR_META_REVISION), ··· 246 252 fbSize: fb_layout.fb.end - fb_layout.fb.start, 247 253 vgaWorkspaceOffset: fb_layout.vga_workspace.start, 248 254 vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start, 249 - ..Default::default() 255 + ..Zeroable::init_zeroed() 256 + }); 257 + 258 + init!(GspFwWprMeta { 259 + inner <- init_inner, 250 260 }) 251 261 } 252 262 } ··· 259 261 #[repr(u32)] 260 262 pub(crate) enum MsgFunction { 261 263 // Common function codes 262 - Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, 263 - SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, 264 - AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, 264 + AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, 265 + AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, 265 266 AllocDevice = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE, 266 267 AllocMemory = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY, 267 - AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, 268 - AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, 269 - MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, 270 - BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, 271 268 AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT, 269 + AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, 270 + BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, 271 + ContinuationRecord = bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, 272 272 Free = bindings::NV_VGPU_MSG_FUNCTION_FREE, 273 - Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, 274 273 GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO, 275 - SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, 276 - GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, 274 + GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, 277 275 GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU, 278 276 GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, 279 - GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, 277 + GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, 278 + Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, 279 + MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, 280 + Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, 281 + SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, 282 + SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, 280 283 281 284 // Event codes 282 285 GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE, 286 + GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, 287 + GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, 283 288 GspRunCpuSequencer = bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER, 284 - PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, 285 - RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, 286 289 MmuFaultQueued = bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, 287 290 OsErrorLog = bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG, 288 - GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, 289 - GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, 291 + PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, 292 + RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, 290 293 UcodeLibOsPrint = bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, 291 - } 292 - 293 - impl fmt::Display for MsgFunction { 294 - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 295 - match self { 296 - // Common function codes 297 - MsgFunction::Nop => write!(f, "NOP"), 298 - MsgFunction::SetGuestSystemInfo => write!(f, "SET_GUEST_SYSTEM_INFO"), 299 - MsgFunction::AllocRoot => write!(f, "ALLOC_ROOT"), 300 - MsgFunction::AllocDevice => write!(f, "ALLOC_DEVICE"), 301 - MsgFunction::AllocMemory => write!(f, "ALLOC_MEMORY"), 302 - MsgFunction::AllocCtxDma => write!(f, "ALLOC_CTX_DMA"), 303 - MsgFunction::AllocChannelDma => write!(f, "ALLOC_CHANNEL_DMA"), 304 - MsgFunction::MapMemory => write!(f, "MAP_MEMORY"), 305 - MsgFunction::BindCtxDma => write!(f, "BIND_CTX_DMA"), 306 - MsgFunction::AllocObject => write!(f, "ALLOC_OBJECT"), 307 - MsgFunction::Free => write!(f, "FREE"), 308 - MsgFunction::Log => write!(f, "LOG"), 309 - MsgFunction::GetGspStaticInfo => write!(f, "GET_GSP_STATIC_INFO"), 310 - MsgFunction::SetRegistry => write!(f, "SET_REGISTRY"), 311 - MsgFunction::GspSetSystemInfo => write!(f, "GSP_SET_SYSTEM_INFO"), 312 - MsgFunction::GspInitPostObjGpu => write!(f, "GSP_INIT_POST_OBJGPU"), 313 - MsgFunction::GspRmControl => write!(f, "GSP_RM_CONTROL"), 314 - MsgFunction::GetStaticInfo => write!(f, "GET_STATIC_INFO"), 315 - 316 - // Event codes 317 - MsgFunction::GspInitDone => write!(f, "INIT_DONE"), 318 - MsgFunction::GspRunCpuSequencer => write!(f, "RUN_CPU_SEQUENCER"), 319 - MsgFunction::PostEvent => write!(f, "POST_EVENT"), 320 - MsgFunction::RcTriggered => write!(f, "RC_TRIGGERED"), 321 - MsgFunction::MmuFaultQueued => write!(f, "MMU_FAULT_QUEUED"), 322 - MsgFunction::OsErrorLog => write!(f, "OS_ERROR_LOG"), 323 - MsgFunction::GspPostNoCat => write!(f, "NOCAT"), 324 - MsgFunction::GspLockdownNotice => write!(f, "LOCKDOWN_NOTICE"), 325 - MsgFunction::UcodeLibOsPrint => write!(f, "LIBOS_PRINT"), 326 - } 327 - } 328 294 } 329 295 330 296 impl TryFrom<u32> for MsgFunction { ··· 296 334 297 335 fn try_from(value: u32) -> Result<MsgFunction> { 298 336 match value { 299 - bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), 300 - bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { 301 - Ok(MsgFunction::SetGuestSystemInfo) 302 - } 303 - bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), 337 + // Common function codes 338 + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), 339 + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), 304 340 bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE => Ok(MsgFunction::AllocDevice), 305 341 bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY => Ok(MsgFunction::AllocMemory), 306 - bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), 307 - bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), 308 - bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), 309 - bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), 310 342 bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject), 343 + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), 344 + bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), 345 + bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD => { 346 + Ok(MsgFunction::ContinuationRecord) 347 + } 311 348 bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free), 312 - bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), 313 349 bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo), 314 - bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), 315 - bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), 350 + bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), 316 351 bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU => { 317 352 Ok(MsgFunction::GspInitPostObjGpu) 318 353 } 319 354 bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl), 320 - bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), 355 + bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), 356 + bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), 357 + bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), 358 + bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), 359 + bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { 360 + Ok(MsgFunction::SetGuestSystemInfo) 361 + } 362 + bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), 363 + 364 + // Event codes 321 365 bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone), 366 + bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), 367 + bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), 322 368 bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => { 323 369 Ok(MsgFunction::GspRunCpuSequencer) 324 370 } 325 - bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), 326 - bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), 327 371 bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED => Ok(MsgFunction::MmuFaultQueued), 328 372 bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG => Ok(MsgFunction::OsErrorLog), 329 - bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), 330 - bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), 373 + bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), 374 + bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), 331 375 bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT => Ok(MsgFunction::UcodeLibOsPrint), 332 376 _ => Err(EINVAL), 333 377 } ··· 365 397 RegPoll = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL, 366 398 RegStore = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE, 367 399 RegWrite = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, 368 - } 369 - 370 - impl fmt::Display for SeqBufOpcode { 371 - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 372 - match self { 373 - SeqBufOpcode::CoreReset => write!(f, "CORE_RESET"), 374 - SeqBufOpcode::CoreResume => write!(f, "CORE_RESUME"), 375 - SeqBufOpcode::CoreStart => write!(f, "CORE_START"), 376 - SeqBufOpcode::CoreWaitForHalt => write!(f, "CORE_WAIT_FOR_HALT"), 377 - SeqBufOpcode::DelayUs => write!(f, "DELAY_US"), 378 - SeqBufOpcode::RegModify => write!(f, "REG_MODIFY"), 379 - SeqBufOpcode::RegPoll => write!(f, "REG_POLL"), 380 - SeqBufOpcode::RegStore => write!(f, "REG_STORE"), 381 - SeqBufOpcode::RegWrite => write!(f, "REG_WRITE"), 382 - } 383 - } 384 400 } 385 401 386 402 impl TryFrom<u32> for SeqBufOpcode { ··· 405 453 406 454 /// Wrapper for GSP sequencer register write payload. 407 455 #[repr(transparent)] 408 - #[derive(Copy, Clone)] 456 + #[derive(Copy, Clone, Debug)] 409 457 pub(crate) struct RegWritePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); 410 458 411 459 impl RegWritePayload { ··· 428 476 429 477 /// Wrapper for GSP sequencer register modify payload. 430 478 #[repr(transparent)] 431 - #[derive(Copy, Clone)] 479 + #[derive(Copy, Clone, Debug)] 432 480 pub(crate) struct RegModifyPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); 433 481 434 482 impl RegModifyPayload { ··· 456 504 457 505 /// Wrapper for GSP sequencer register poll payload. 458 506 #[repr(transparent)] 459 - #[derive(Copy, Clone)] 507 + #[derive(Copy, Clone, Debug)] 460 508 pub(crate) struct RegPollPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_POLL); 461 509 462 510 impl RegPollPayload { ··· 489 537 490 538 /// Wrapper for GSP sequencer delay payload. 491 539 #[repr(transparent)] 492 - #[derive(Copy, Clone)] 540 + #[derive(Copy, Clone, Debug)] 493 541 pub(crate) struct DelayUsPayload(bindings::GSP_SEQ_BUF_PAYLOAD_DELAY_US); 494 542 495 543 impl DelayUsPayload { ··· 507 555 508 556 /// Wrapper for GSP sequencer register store payload. 509 557 #[repr(transparent)] 510 - #[derive(Copy, Clone)] 558 + #[derive(Copy, Clone, Debug)] 511 559 pub(crate) struct RegStorePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_STORE); 512 560 513 561 impl RegStorePayload { ··· 547 595 return Err(EINVAL); 548 596 } 549 597 // SAFETY: Opcode is verified to be `RegWrite`, so union contains valid `RegWritePayload`. 550 - let payload_bytes = unsafe { 551 - core::slice::from_raw_parts( 552 - core::ptr::addr_of!(self.0.payload.regWrite).cast::<u8>(), 553 - core::mem::size_of::<RegWritePayload>(), 554 - ) 555 - }; 556 - Ok(*RegWritePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) 598 + Ok(RegWritePayload(unsafe { self.0.payload.regWrite })) 557 599 } 558 600 559 601 /// Returns the register modify payload by value. ··· 558 612 return Err(EINVAL); 559 613 } 560 614 // SAFETY: Opcode is verified to be `RegModify`, so union contains valid `RegModifyPayload`. 561 - let payload_bytes = unsafe { 562 - core::slice::from_raw_parts( 563 - core::ptr::addr_of!(self.0.payload.regModify).cast::<u8>(), 564 - core::mem::size_of::<RegModifyPayload>(), 565 - ) 566 - }; 567 - Ok(*RegModifyPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) 615 + Ok(RegModifyPayload(unsafe { self.0.payload.regModify })) 568 616 } 569 617 570 618 /// Returns the register poll payload by value. ··· 569 629 return Err(EINVAL); 570 630 } 571 631 // SAFETY: Opcode is verified to be `RegPoll`, so union contains valid `RegPollPayload`. 572 - let payload_bytes = unsafe { 573 - core::slice::from_raw_parts( 574 - core::ptr::addr_of!(self.0.payload.regPoll).cast::<u8>(), 575 - core::mem::size_of::<RegPollPayload>(), 576 - ) 577 - }; 578 - Ok(*RegPollPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) 632 + Ok(RegPollPayload(unsafe { self.0.payload.regPoll })) 579 633 } 580 634 581 635 /// Returns the delay payload by value. ··· 580 646 return Err(EINVAL); 581 647 } 582 648 // SAFETY: Opcode is verified to be `DelayUs`, so union contains valid `DelayUsPayload`. 583 - let payload_bytes = unsafe { 584 - core::slice::from_raw_parts( 585 - core::ptr::addr_of!(self.0.payload.delayUs).cast::<u8>(), 586 - core::mem::size_of::<DelayUsPayload>(), 587 - ) 588 - }; 589 - Ok(*DelayUsPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) 649 + Ok(DelayUsPayload(unsafe { self.0.payload.delayUs })) 590 650 } 591 651 592 652 /// Returns the register store payload by value. ··· 591 663 return Err(EINVAL); 592 664 } 593 665 // SAFETY: Opcode is verified to be `RegStore`, so union contains valid `RegStorePayload`. 594 - let payload_bytes = unsafe { 595 - core::slice::from_raw_parts( 596 - core::ptr::addr_of!(self.0.payload.regStore).cast::<u8>(), 597 - core::mem::size_of::<RegStorePayload>(), 598 - ) 599 - }; 600 - Ok(*RegStorePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) 666 + Ok(RegStorePayload(unsafe { self.0.payload.regStore })) 601 667 } 602 668 } 603 669 ··· 633 711 /// The memory allocated for the arguments must remain until the GSP sends the 634 712 /// init_done RPC. 635 713 #[repr(transparent)] 636 - pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument); 714 + pub(crate) struct LibosMemoryRegionInitArgument { 715 + inner: bindings::LibosMemoryRegionInitArgument, 716 + } 637 717 638 718 // SAFETY: Padding is explicit and does not contain uninitialized data. 639 719 unsafe impl AsBytes for LibosMemoryRegionInitArgument {} ··· 645 721 unsafe impl FromBytes for LibosMemoryRegionInitArgument {} 646 722 647 723 impl LibosMemoryRegionInitArgument { 648 - pub(crate) fn new<A: AsBytes + FromBytes>( 724 + pub(crate) fn new<'a, A: AsBytes + FromBytes + KnownSize + ?Sized>( 649 725 name: &'static str, 650 - obj: &CoherentAllocation<A>, 651 - ) -> Self { 726 + obj: &'a Coherent<A>, 727 + ) -> impl Init<Self> + 'a { 652 728 /// Generates the `ID8` identifier required for some GSP objects. 653 729 fn id8(name: &str) -> u64 { 654 730 let mut bytes = [0u8; core::mem::size_of::<u64>()]; ··· 660 736 u64::from_ne_bytes(bytes) 661 737 } 662 738 663 - Self(bindings::LibosMemoryRegionInitArgument { 739 + #[allow(non_snake_case)] 740 + let init_inner = init!(bindings::LibosMemoryRegionInitArgument { 664 741 id8: id8(name), 665 742 pa: obj.dma_handle(), 666 743 size: num::usize_as_u64(obj.size()), ··· 671 746 loc: num::u32_into_u8::< 672 747 { bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM }, 673 748 >(), 674 - ..Default::default() 749 + ..Zeroable::init_zeroed() 750 + }); 751 + 752 + init!(LibosMemoryRegionInitArgument { 753 + inner <- init_inner, 675 754 }) 676 755 } 677 756 } ··· 854 925 855 926 /// Arguments for GSP startup. 856 927 #[repr(transparent)] 857 - pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED); 928 + #[derive(Zeroable)] 929 + pub(crate) struct GspArgumentsCached { 930 + inner: bindings::GSP_ARGUMENTS_CACHED, 931 + } 858 932 859 933 impl GspArgumentsCached { 860 934 /// Creates the arguments for starting the GSP up using `cmdq` as its command queue. 861 - pub(crate) fn new(cmdq: &Cmdq) -> Self { 862 - Self(bindings::GSP_ARGUMENTS_CACHED { 863 - messageQueueInitArguments: MessageQueueInitArguments::new(cmdq).0, 935 + pub(crate) fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ { 936 + #[allow(non_snake_case)] 937 + let init_inner = init!(bindings::GSP_ARGUMENTS_CACHED { 938 + messageQueueInitArguments <- MessageQueueInitArguments::new(cmdq), 864 939 bDmemStack: 1, 865 - ..Default::default() 940 + ..Zeroable::init_zeroed() 941 + }); 942 + 943 + init!(GspArgumentsCached { 944 + inner <- init_inner, 866 945 }) 867 946 } 868 947 } ··· 882 945 /// must all be a multiple of GSP_PAGE_SIZE in size, so add padding to force it 883 946 /// to that size. 884 947 #[repr(C)] 948 + #[derive(Zeroable)] 885 949 pub(crate) struct GspArgumentsPadded { 886 950 pub(crate) inner: GspArgumentsCached, 887 951 _padding: [u8; GSP_PAGE_SIZE - core::mem::size_of::<bindings::GSP_ARGUMENTS_CACHED>()], 952 + } 953 + 954 + impl GspArgumentsPadded { 955 + pub(crate) fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ { 956 + init!(GspArgumentsPadded { 957 + inner <- GspArgumentsCached::new(cmdq), 958 + ..Zeroable::init_zeroed() 959 + }) 960 + } 888 961 } 889 962 890 963 // SAFETY: Padding is explicit and will not contain uninitialized data. ··· 905 958 unsafe impl FromBytes for GspArgumentsPadded {} 906 959 907 960 /// Init arguments for the message queue. 908 - #[repr(transparent)] 909 - struct MessageQueueInitArguments(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS); 961 + type MessageQueueInitArguments = bindings::MESSAGE_QUEUE_INIT_ARGUMENTS; 910 962 911 963 impl MessageQueueInitArguments { 912 964 /// Creates a new init arguments structure for `cmdq`. 913 - fn new(cmdq: &Cmdq) -> Self { 914 - Self(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS { 915 - sharedMemPhysAddr: cmdq.dma_handle(), 965 + #[allow(non_snake_case)] 966 + fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ { 967 + init!(MessageQueueInitArguments { 968 + sharedMemPhysAddr: cmdq.dma_handle, 916 969 pageTableEntryCount: num::usize_into_u32::<{ Cmdq::NUM_PTES }>(), 917 970 cmdQueueOffset: num::usize_as_u64(Cmdq::CMDQ_OFFSET), 918 971 statQueueOffset: num::usize_as_u64(Cmdq::STATQ_OFFSET), 919 - ..Default::default() 972 + ..Zeroable::init_zeroed() 920 973 }) 921 974 } 922 975 }

+10 -7

drivers/gpu/nova-core/gsp/fw/commands.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - use kernel::prelude::*; 4 - use kernel::transmute::{AsBytes, FromBytes}; 5 - use kernel::{device, pci}; 3 + use kernel::{ 4 + device, 5 + pci, 6 + prelude::*, 7 + transmute::{ 8 + AsBytes, 9 + FromBytes, // 10 + }, // 11 + }; 6 12 7 13 use crate::gsp::GSP_PAGE_SIZE; 8 14 ··· 113 107 114 108 /// Payload of the `GetGspStaticInfo` command and message. 115 109 #[repr(transparent)] 110 + #[derive(Zeroable)] 116 111 pub(crate) struct GspStaticConfigInfo(bindings::GspStaticConfigInfo_t); 117 112 118 113 impl GspStaticConfigInfo { ··· 129 122 // SAFETY: This struct only contains integer types for which all bit patterns 130 123 // are valid. 131 124 unsafe impl FromBytes for GspStaticConfigInfo {} 132 - 133 - // SAFETY: This struct only contains integer types and fixed-size arrays for which 134 - // all bit patterns are valid. 135 - unsafe impl Zeroable for GspStaticConfigInfo {}

+1

drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs

··· 43 43 pub const GSP_FW_WPR_META_REVISION: u32 = 1; 44 44 pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285; 45 45 pub const REGISTRY_TABLE_ENTRY_TYPE_DWORD: u32 = 1; 46 + pub const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: u32 = 65536; 46 47 pub type __u8 = ffi::c_uchar; 47 48 pub type __u16 = ffi::c_ushort; 48 49 pub type __u32 = ffi::c_uint;

+9 -13

drivers/gpu/nova-core/gsp/sequencer.rs

··· 67 67 /// GSP Sequencer Command types with payload data. 68 68 /// Commands have an opcode and an opcode-dependent struct. 69 69 #[allow(clippy::enum_variant_names)] 70 + #[derive(Debug)] 70 71 pub(crate) enum GspSeqCmd { 71 72 RegWrite(fw::RegWritePayload), 72 73 RegModify(fw::RegModifyPayload), ··· 145 144 dev: ARef<device::Device>, 146 145 } 147 146 148 - /// Trait for running sequencer commands. 149 - pub(crate) trait GspSeqCmdRunner { 150 - fn run(&self, sequencer: &GspSequencer<'_>) -> Result; 151 - } 152 - 153 - impl GspSeqCmdRunner for fw::RegWritePayload { 147 + impl fw::RegWritePayload { 154 148 fn run(&self, sequencer: &GspSequencer<'_>) -> Result { 155 149 let addr = usize::from_safe_cast(self.addr()); 156 150 ··· 153 157 } 154 158 } 155 159 156 - impl GspSeqCmdRunner for fw::RegModifyPayload { 160 + impl fw::RegModifyPayload { 157 161 fn run(&self, sequencer: &GspSequencer<'_>) -> Result { 158 162 let addr = usize::from_safe_cast(self.addr()); 159 163 ··· 165 169 } 166 170 } 167 171 168 - impl GspSeqCmdRunner for fw::RegPollPayload { 172 + impl fw::RegPollPayload { 169 173 fn run(&self, sequencer: &GspSequencer<'_>) -> Result { 170 174 let addr = usize::from_safe_cast(self.addr()); 171 175 ··· 190 194 } 191 195 } 192 196 193 - impl GspSeqCmdRunner for fw::DelayUsPayload { 197 + impl fw::DelayUsPayload { 194 198 fn run(&self, _sequencer: &GspSequencer<'_>) -> Result { 195 199 fsleep(Delta::from_micros(i64::from(self.val()))); 196 200 Ok(()) 197 201 } 198 202 } 199 203 200 - impl GspSeqCmdRunner for fw::RegStorePayload { 204 + impl fw::RegStorePayload { 201 205 fn run(&self, sequencer: &GspSequencer<'_>) -> Result { 202 206 let addr = usize::from_safe_cast(self.addr()); 203 207 ··· 205 209 } 206 210 } 207 211 208 - impl GspSeqCmdRunner for GspSeqCmd { 212 + impl GspSeqCmd { 209 213 fn run(&self, seq: &GspSequencer<'_>) -> Result { 210 214 match self { 211 215 GspSeqCmd::RegWrite(cmd) => cmd.run(seq), ··· 356 360 } 357 361 358 362 impl<'a> GspSequencer<'a> { 359 - pub(crate) fn run(cmdq: &mut Cmdq, params: GspSequencerParams<'a>) -> Result { 363 + pub(crate) fn run(cmdq: &Cmdq, params: GspSequencerParams<'a>) -> Result { 360 364 let seq_info = loop { 361 - match cmdq.receive_msg::<GspSequence>(Delta::from_secs(10)) { 365 + match cmdq.receive_msg::<GspSequence>(Cmdq::RECEIVE_TIMEOUT) { 362 366 Ok(seq_info) => break seq_info, 363 367 Err(ERANGE) => continue, 364 368 Err(e) => return Err(e),

+50 -4

drivers/gpu/nova-core/nova_core.rs

··· 2 2 3 3 //! Nova Core GPU Driver 4 4 5 + use kernel::{ 6 + debugfs, 7 + driver::Registration, 8 + pci, 9 + prelude::*, 10 + InPlaceModule, // 11 + }; 12 + 5 13 #[macro_use] 6 14 mod bitfield; 7 15 8 - mod dma; 9 16 mod driver; 10 17 mod falcon; 11 18 mod fb; ··· 20 13 mod gfw; 21 14 mod gpu; 22 15 mod gsp; 16 + #[macro_use] 23 17 mod num; 24 18 mod regs; 25 19 mod sbuffer; 26 20 mod vbios; 27 21 28 - pub(crate) const MODULE_NAME: &kernel::str::CStr = <LocalModule as kernel::ModuleMetadata>::NAME; 22 + pub(crate) const MODULE_NAME: &core::ffi::CStr = <LocalModule as kernel::ModuleMetadata>::NAME; 29 23 30 - kernel::module_pci_driver! { 31 - type: driver::NovaCore, 24 + // TODO: Move this into per-module data once that exists. 25 + static mut DEBUGFS_ROOT: Option<debugfs::Dir> = None; 26 + 27 + /// Guard that clears `DEBUGFS_ROOT` when dropped. 28 + struct DebugfsRootGuard; 29 + 30 + impl Drop for DebugfsRootGuard { 31 + fn drop(&mut self) { 32 + // SAFETY: This guard is dropped after `_driver` (due to field order), 33 + // so the driver is unregistered and no probe() can be running. 34 + unsafe { DEBUGFS_ROOT = None }; 35 + } 36 + } 37 + 38 + #[pin_data] 39 + struct NovaCoreModule { 40 + // Fields are dropped in declaration order, so `_driver` is dropped first, 41 + // then `_debugfs_guard` clears `DEBUGFS_ROOT`. 42 + #[pin] 43 + _driver: Registration<pci::Adapter<driver::NovaCore>>, 44 + _debugfs_guard: DebugfsRootGuard, 45 + } 46 + 47 + impl InPlaceModule for NovaCoreModule { 48 + fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> { 49 + let dir = debugfs::Dir::new(kernel::c_str!("nova_core")); 50 + 51 + // SAFETY: We are the only driver code running during init, so there 52 + // cannot be any concurrent access to `DEBUGFS_ROOT`. 53 + unsafe { DEBUGFS_ROOT = Some(dir) }; 54 + 55 + try_pin_init!(Self { 56 + _driver <- Registration::new(MODULE_NAME, module), 57 + _debugfs_guard: DebugfsRootGuard, 58 + }) 59 + } 60 + } 61 + 62 + module! { 63 + type: NovaCoreModule, 32 64 name: "NovaCore", 33 65 authors: ["Danilo Krummrich"], 34 66 description: "Nova Core GPU driver",

+80

drivers/gpu/nova-core/num.rs

··· 215 215 impl_const_into!(u64 => { u8, u16, u32 }); 216 216 impl_const_into!(u32 => { u8, u16 }); 217 217 impl_const_into!(u16 => { u8 }); 218 + 219 + /// Creates an enum type associated to a [`Bounded`](kernel::num::Bounded), with a [`From`] 220 + /// conversion to the associated `Bounded` and either a [`TryFrom`] or `From` conversion from the 221 + /// associated `Bounded`. 222 + // TODO[FPRI]: This is a temporary solution to be replaced with the corresponding derive macros 223 + // once they land. 224 + #[macro_export] 225 + macro_rules! bounded_enum { 226 + ( 227 + $(#[$enum_meta:meta])* 228 + $vis:vis enum $enum_type:ident with $from_impl:ident<Bounded<$width:ty, $length:literal>> { 229 + $( $(#[doc = $variant_doc:expr])* $variant:ident = $value:expr),* $(,)* 230 + } 231 + ) => { 232 + $(#[$enum_meta])* 233 + $vis enum $enum_type { 234 + $( 235 + $(#[doc = $variant_doc])* 236 + $variant = $value 237 + ),* 238 + } 239 + 240 + impl core::convert::From<$enum_type> for kernel::num::Bounded<$width, $length> { 241 + fn from(value: $enum_type) -> Self { 242 + match value { 243 + $($enum_type::$variant => 244 + kernel::num::Bounded::<$width, _>::new::<{ $value }>()),* 245 + } 246 + } 247 + } 248 + 249 + bounded_enum!(@impl_from $enum_type with $from_impl<Bounded<$width, $length>> { 250 + $($variant = $value),* 251 + }); 252 + }; 253 + 254 + // `TryFrom` implementation from associated `Bounded` to enum type. 255 + (@impl_from $enum_type:ident with TryFrom<Bounded<$width:ty, $length:literal>> { 256 + $($variant:ident = $value:expr),* $(,)* 257 + }) => { 258 + impl core::convert::TryFrom<kernel::num::Bounded<$width, $length>> for $enum_type { 259 + type Error = kernel::error::Error; 260 + 261 + fn try_from( 262 + value: kernel::num::Bounded<$width, $length> 263 + ) -> kernel::error::Result<Self> { 264 + match value.get() { 265 + $( 266 + $value => Ok($enum_type::$variant), 267 + )* 268 + _ => Err(kernel::error::code::EINVAL), 269 + } 270 + } 271 + } 272 + }; 273 + 274 + // `From` implementation from associated `Bounded` to enum type. Triggers a build-time error if 275 + // all possible values of the `Bounded` are not covered by the enum type. 276 + (@impl_from $enum_type:ident with From<Bounded<$width:ty, $length:literal>> { 277 + $($variant:ident = $value:expr),* $(,)* 278 + }) => { 279 + impl core::convert::From<kernel::num::Bounded<$width, $length>> for $enum_type { 280 + fn from(value: kernel::num::Bounded<$width, $length>) -> Self { 281 + const MAX: $width = 1 << $length; 282 + 283 + // Makes the compiler optimizer aware of the possible range of values. 284 + let value = value.get() & ((1 << $length) - 1); 285 + match value { 286 + $( 287 + $value => $enum_type::$variant, 288 + )* 289 + // PANIC: we cannot reach this arm as all possible variants are handled by the 290 + // match arms above. It is here to make the compiler complain if `$enum_type` 291 + // does not cover all values of the `0..MAX` range. 292 + MAX.. => unreachable!(), 293 + } 294 + } 295 + } 296 + } 297 + }

+330 -241

drivers/gpu/nova-core/regs.rs

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - // Required to retain the original register names used by OpenRM, which are all capital snake case 4 - // but are mapped to types. 5 - #![allow(non_camel_case_types)] 6 - 7 - #[macro_use] 8 - pub(crate) mod macros; 9 - 10 3 use kernel::{ 4 + io::{ 5 + register, 6 + register::WithBase, 7 + Io, // 8 + }, 11 9 prelude::*, 12 10 time, // 13 11 }; ··· 35 37 36 38 // PMC 37 39 38 - register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { 39 - 3:0 minor_revision as u8, "Minor revision of the chip"; 40 - 7:4 major_revision as u8, "Major revision of the chip"; 41 - 8:8 architecture_1 as u8, "MSB of the architecture"; 42 - 23:20 implementation as u8, "Implementation version of the architecture"; 43 - 28:24 architecture_0 as u8, "Lower bits of the architecture"; 44 - }); 40 + register! { 41 + /// Basic revision information about the GPU. 42 + pub(crate) NV_PMC_BOOT_0(u32) @ 0x00000000 { 43 + /// Lower bits of the architecture. 44 + 28:24 architecture_0; 45 + /// Implementation version of the architecture. 46 + 23:20 implementation; 47 + /// MSB of the architecture. 48 + 8:8 architecture_1; 49 + /// Major revision of the chip. 50 + 7:4 major_revision; 51 + /// Minor revision of the chip. 52 + 3:0 minor_revision; 53 + } 54 + 55 + /// Extended architecture information. 56 + pub(crate) NV_PMC_BOOT_42(u32) @ 0x00000a00 { 57 + /// Architecture value. 58 + 29:24 architecture ?=> Architecture; 59 + /// Implementation version of the architecture. 60 + 23:20 implementation; 61 + /// Major revision of the chip. 62 + 19:16 major_revision; 63 + /// Minor revision of the chip. 64 + 15:12 minor_revision; 65 + } 66 + } 45 67 46 68 impl NV_PMC_BOOT_0 { 47 69 pub(crate) fn is_older_than_fermi(self) -> bool { 48 70 // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals : 49 - const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc; 71 + const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc; 50 72 51 73 // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than 52 74 // GF100, means "older than Fermi". 53 75 self.architecture_1() == 0 && self.architecture_0() < NV_PMC_BOOT_0_ARCHITECTURE_GF100 54 76 } 55 77 } 56 - 57 - register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" { 58 - 15:12 minor_revision as u8, "Minor revision of the chip"; 59 - 19:16 major_revision as u8, "Major revision of the chip"; 60 - 23:20 implementation as u8, "Implementation version of the architecture"; 61 - 29:24 architecture as u8 ?=> Architecture, "Architecture value"; 62 - }); 63 78 64 79 impl NV_PMC_BOOT_42 { 65 80 /// Combines `architecture` and `implementation` to obtain a code unique to the chipset. ··· 87 76 88 77 /// Returns the raw architecture value from the register. 89 78 fn architecture_raw(self) -> u8 { 90 - ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) 91 - as u8 79 + ((self.into_raw() >> Self::ARCHITECTURE_RANGE.start()) 80 + & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) as u8 92 81 } 93 82 } 94 83 ··· 97 86 write!( 98 87 f, 99 88 "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})", 100 - self.0, 89 + self.inner, 101 90 self.architecture_raw(), 102 91 self.implementation() 103 92 ) ··· 106 95 107 96 // PBUS 108 97 109 - register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); 98 + register! { 99 + pub(crate) NV_PBUS_SW_SCRATCH(u32)[64] @ 0x00001400 {} 110 100 111 - register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], 112 - "scratch register 0xe used as FRTS firmware error code" { 113 - 31:16 frts_err_code as u16; 114 - }); 101 + /// Scratch register 0xe used as FRTS firmware error code. 102 + pub(crate) NV_PBUS_SW_SCRATCH_0E_FRTS_ERR(u32) => NV_PBUS_SW_SCRATCH[0xe] { 103 + 31:16 frts_err_code; 104 + } 105 + } 115 106 116 107 // PFB 117 108 118 - // The following two registers together hold the physical system memory address that is used by the 119 - // GPU to perform sysmembar operations (see `fb::SysmemFlush`). 109 + register! { 110 + /// Low bits of the physical system memory address used by the GPU to perform sysmembar 111 + /// operations (see [`crate::fb::SysmemFlush`]). 112 + pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR(u32) @ 0x00100c10 { 113 + 31:0 adr_39_08; 114 + } 120 115 121 - register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { 122 - 31:0 adr_39_08 as u32; 123 - }); 116 + /// High bits of the physical system memory address used by the GPU to perform sysmembar 117 + /// operations (see [`crate::fb::SysmemFlush`]). 118 + pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00100c40 { 119 + 23:0 adr_63_40; 120 + } 124 121 125 - register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI @ 0x00100c40 { 126 - 23:0 adr_63_40 as u32; 127 - }); 122 + pub(crate) NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE(u32) @ 0x00100ce0 { 123 + 30:30 ecc_mode_enabled => bool; 124 + 9:4 lower_mag; 125 + 3:0 lower_scale; 126 + } 128 127 129 - register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { 130 - 3:0 lower_scale as u8; 131 - 9:4 lower_mag as u8; 132 - 30:30 ecc_mode_enabled as bool; 133 - }); 128 + pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_LO(u32) @ 0x001fa824 { 129 + /// Bits 12..40 of the lower (inclusive) bound of the WPR2 region. 130 + 31:4 lo_val; 131 + } 134 132 135 - register!(NV_PGSP_QUEUE_HEAD @ 0x00110c00 { 136 - 31:0 address as u32; 137 - }); 133 + pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_HI(u32) @ 0x001fa828 { 134 + /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region. 135 + 31:4 hi_val; 136 + } 137 + } 138 138 139 139 impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { 140 140 /// Returns the usable framebuffer size, in bytes. ··· 162 140 } 163 141 } 164 142 165 - register!(NV_PFB_PRI_MMU_WPR2_ADDR_LO@0x001fa824 { 166 - 31:4 lo_val as u32, "Bits 12..40 of the lower (inclusive) bound of the WPR2 region"; 167 - }); 168 - 169 143 impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { 170 144 /// Returns the lower (inclusive) bound of the WPR2 region. 171 145 pub(crate) fn lower_bound(self) -> u64 { ··· 169 151 } 170 152 } 171 153 172 - register!(NV_PFB_PRI_MMU_WPR2_ADDR_HI@0x001fa828 { 173 - 31:4 hi_val as u32, "Bits 12..40 of the higher (exclusive) bound of the WPR2 region"; 174 - }); 175 - 176 154 impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { 177 155 /// Returns the higher (exclusive) bound of the WPR2 region. 178 156 /// 179 157 /// A value of zero means the WPR2 region is not set. 180 158 pub(crate) fn higher_bound(self) -> u64 { 181 159 u64::from(self.hi_val()) << 12 160 + } 161 + } 162 + 163 + // PGSP 164 + 165 + register! { 166 + pub(crate) NV_PGSP_QUEUE_HEAD(u32) @ 0x00110c00 { 167 + 31:0 address; 182 168 } 183 169 } 184 170 ··· 195 173 // These scratch registers remain powered on even in a low-power state and have a designated group 196 174 // number. 197 175 198 - // Boot Sequence Interface (BSI) register used to determine 199 - // if GSP reload/resume has completed during the boot process. 200 - register!(NV_PGC6_BSI_SECURE_SCRATCH_14 @ 0x001180f8 { 201 - 26:26 boot_stage_3_handoff as bool; 202 - }); 203 - 204 - // Privilege level mask register. It dictates whether the host CPU has privilege to access the 205 - // `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). 206 - register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, 207 - "Privilege level mask register" { 208 - 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; 209 - }); 210 - 211 - // OpenRM defines this as a register array, but doesn't specify its size and only uses its first 212 - // element. Be conservative until we know the actual size or need to use more registers. 213 - register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); 214 - 215 - register!( 216 - NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], 217 - "Scratch group 05 register 0 used as GFW boot progress indicator" { 218 - 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; 176 + register! { 177 + /// Boot Sequence Interface (BSI) register used to determine 178 + /// if GSP reload/resume has completed during the boot process. 179 + pub(crate) NV_PGC6_BSI_SECURE_SCRATCH_14(u32) @ 0x001180f8 { 180 + 26:26 boot_stage_3_handoff => bool; 219 181 } 220 - ); 182 + 183 + /// Privilege level mask register. It dictates whether the host CPU has privilege to access the 184 + /// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). 185 + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK(u32) @ 0x00118128 { 186 + /// Set after FWSEC lowers its protection level. 187 + 0:0 read_protection_level0 => bool; 188 + } 189 + 190 + /// OpenRM defines this as a register array, but doesn't specify its size and only uses its 191 + /// first element. Be conservative until we know the actual size or need to use more registers. 192 + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05(u32)[1] @ 0x00118234 {} 193 + 194 + /// Scratch group 05 register 0 used as GFW boot progress indicator. 195 + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT(u32) 196 + => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0] { 197 + /// Progress of GFW boot (0xff means completed). 198 + 7:0 progress; 199 + } 200 + 201 + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_42(u32) @ 0x001183a4 { 202 + 31:0 value; 203 + } 204 + 205 + /// Scratch group 42 register used as framebuffer size. 206 + pub(crate) NV_USABLE_FB_SIZE_IN_MB(u32) => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 { 207 + /// Usable framebuffer size, in megabytes. 208 + 31:0 value; 209 + } 210 + } 221 211 222 212 impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { 223 213 /// Returns `true` if GFW boot is completed. ··· 237 203 self.progress() == 0xff 238 204 } 239 205 } 240 - 241 - register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 @ 0x001183a4 { 242 - 31:0 value as u32; 243 - }); 244 - 245 - register!( 246 - NV_USABLE_FB_SIZE_IN_MB => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42, 247 - "Scratch group 42 register used as framebuffer size" { 248 - 31:0 value as u32, "Usable framebuffer size, in megabytes"; 249 - } 250 - ); 251 206 252 207 impl NV_USABLE_FB_SIZE_IN_MB { 253 208 /// Returns the usable framebuffer size, in bytes. ··· 247 224 248 225 // PDISP 249 226 250 - register!(NV_PDISP_VGA_WORKSPACE_BASE @ 0x00625f04 { 251 - 3:3 status_valid as bool, "Set if the `addr` field is valid"; 252 - 31:8 addr as u32, "VGA workspace base address divided by 0x10000"; 253 - }); 227 + register! { 228 + pub(crate) NV_PDISP_VGA_WORKSPACE_BASE(u32) @ 0x00625f04 { 229 + /// VGA workspace base address divided by 0x10000. 230 + 31:8 addr; 231 + /// Set if the `addr` field is valid. 232 + 3:3 status_valid => bool; 233 + } 234 + } 254 235 255 236 impl NV_PDISP_VGA_WORKSPACE_BASE { 256 237 /// Returns the base address of the VGA workspace, or `None` if none exists. ··· 271 244 272 245 pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; 273 246 274 - register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { 275 - 15:0 data as u16; 276 - }); 247 + register! { 248 + pub(crate) NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824100 { 249 + 15:0 data => u16; 250 + } 277 251 278 - register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { 279 - 15:0 data as u16; 280 - }); 252 + pub(crate) NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824140 { 253 + 15:0 data => u16; 254 + } 281 255 282 - register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { 283 - 15:0 data as u16; 284 - }); 285 - 286 - // PFALCON 287 - 288 - register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { 289 - 4:4 halt as bool; 290 - 6:6 swgen0 as bool; 291 - }); 292 - 293 - register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { 294 - 31:0 value as u32; 295 - }); 296 - 297 - register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { 298 - 31:0 value as u32; 299 - }); 300 - 301 - // Used to store version information about the firmware running 302 - // on the Falcon processor. 303 - register!(NV_PFALCON_FALCON_OS @ PFalconBase[0x00000080] { 304 - 31:0 value as u32; 305 - }); 306 - 307 - register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { 308 - 31:0 value as u32; 309 - }); 310 - 311 - register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { 312 - 10:10 riscv as bool; 313 - 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; 314 - 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; 315 - }); 316 - 317 - impl NV_PFALCON_FALCON_HWCFG2 { 318 - /// Returns `true` if memory scrubbing is completed. 319 - pub(crate) fn mem_scrubbing_done(self) -> bool { 320 - !self.mem_scrubbing() 256 + pub(crate) NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x008241c0 { 257 + 15:0 data => u16; 321 258 } 322 259 } 323 260 324 - register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { 325 - 1:1 startcpu as bool; 326 - 4:4 halted as bool; 327 - 6:6 alias_en as bool; 328 - }); 261 + // PFALCON 329 262 330 - register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { 331 - 31:0 value as u32; 332 - }); 263 + register! { 264 + pub(crate) NV_PFALCON_FALCON_IRQSCLR(u32) @ PFalconBase + 0x00000004 { 265 + 6:6 swgen0 => bool; 266 + 4:4 halt => bool; 267 + } 333 268 334 - register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { 335 - 0:0 require_ctx as bool; 336 - 1:1 dmem_scrubbing as bool; 337 - 2:2 imem_scrubbing as bool; 338 - 6:3 dmaq_num as u8; 339 - 7:7 secure_stat as bool; 340 - }); 269 + pub(crate) NV_PFALCON_FALCON_MAILBOX0(u32) @ PFalconBase + 0x00000040 { 270 + 31:0 value => u32; 271 + } 272 + 273 + pub(crate) NV_PFALCON_FALCON_MAILBOX1(u32) @ PFalconBase + 0x00000044 { 274 + 31:0 value => u32; 275 + } 276 + 277 + /// Used to store version information about the firmware running 278 + /// on the Falcon processor. 279 + pub(crate) NV_PFALCON_FALCON_OS(u32) @ PFalconBase + 0x00000080 { 280 + 31:0 value => u32; 281 + } 282 + 283 + pub(crate) NV_PFALCON_FALCON_RM(u32) @ PFalconBase + 0x00000084 { 284 + 31:0 value => u32; 285 + } 286 + 287 + pub(crate) NV_PFALCON_FALCON_HWCFG2(u32) @ PFalconBase + 0x000000f4 { 288 + /// Signal indicating that reset is completed (GA102+). 289 + 31:31 reset_ready => bool; 290 + /// Set to 0 after memory scrubbing is completed. 291 + 12:12 mem_scrubbing => bool; 292 + 10:10 riscv => bool; 293 + } 294 + 295 + pub(crate) NV_PFALCON_FALCON_CPUCTL(u32) @ PFalconBase + 0x00000100 { 296 + 6:6 alias_en => bool; 297 + 4:4 halted => bool; 298 + 1:1 startcpu => bool; 299 + } 300 + 301 + pub(crate) NV_PFALCON_FALCON_BOOTVEC(u32) @ PFalconBase + 0x00000104 { 302 + 31:0 value => u32; 303 + } 304 + 305 + pub(crate) NV_PFALCON_FALCON_DMACTL(u32) @ PFalconBase + 0x0000010c { 306 + 7:7 secure_stat => bool; 307 + 6:3 dmaq_num; 308 + 2:2 imem_scrubbing => bool; 309 + 1:1 dmem_scrubbing => bool; 310 + 0:0 require_ctx => bool; 311 + } 312 + 313 + pub(crate) NV_PFALCON_FALCON_DMATRFBASE(u32) @ PFalconBase + 0x00000110 { 314 + 31:0 base => u32; 315 + } 316 + 317 + pub(crate) NV_PFALCON_FALCON_DMATRFMOFFS(u32) @ PFalconBase + 0x00000114 { 318 + 23:0 offs; 319 + } 320 + 321 + pub(crate) NV_PFALCON_FALCON_DMATRFCMD(u32) @ PFalconBase + 0x00000118 { 322 + 16:16 set_dmtag; 323 + 14:12 ctxdma; 324 + 10:8 size ?=> DmaTrfCmdSize; 325 + 5:5 is_write => bool; 326 + 4:4 imem => bool; 327 + 3:2 sec; 328 + 1:1 idle => bool; 329 + 0:0 full => bool; 330 + } 331 + 332 + pub(crate) NV_PFALCON_FALCON_DMATRFFBOFFS(u32) @ PFalconBase + 0x0000011c { 333 + 31:0 offs => u32; 334 + } 335 + 336 + pub(crate) NV_PFALCON_FALCON_DMATRFBASE1(u32) @ PFalconBase + 0x00000128 { 337 + 8:0 base; 338 + } 339 + 340 + pub(crate) NV_PFALCON_FALCON_HWCFG1(u32) @ PFalconBase + 0x0000012c { 341 + /// Core revision subversion. 342 + 7:6 core_rev_subversion => FalconCoreRevSubversion; 343 + /// Security model. 344 + 5:4 security_model ?=> FalconSecurityModel; 345 + /// Core revision. 346 + 3:0 core_rev ?=> FalconCoreRev; 347 + } 348 + 349 + pub(crate) NV_PFALCON_FALCON_CPUCTL_ALIAS(u32) @ PFalconBase + 0x00000130 { 350 + 1:1 startcpu => bool; 351 + } 352 + 353 + /// IMEM access control register. Up to 4 ports are available for IMEM access. 354 + pub(crate) NV_PFALCON_FALCON_IMEMC(u32)[4, stride = 16] @ PFalconBase + 0x00000180 { 355 + /// Access secure IMEM. 356 + 28:28 secure => bool; 357 + /// Auto-increment on write. 358 + 24:24 aincw => bool; 359 + /// IMEM block and word offset. 360 + 15:0 offs; 361 + } 362 + 363 + /// IMEM data register. Reading/writing this register accesses IMEM at the address 364 + /// specified by the corresponding IMEMC register. 365 + pub(crate) NV_PFALCON_FALCON_IMEMD(u32)[4, stride = 16] @ PFalconBase + 0x00000184 { 366 + 31:0 data; 367 + } 368 + 369 + /// IMEM tag register. Used to set the tag for the current IMEM block. 370 + pub(crate) NV_PFALCON_FALCON_IMEMT(u32)[4, stride = 16] @ PFalconBase + 0x00000188 { 371 + 15:0 tag; 372 + } 373 + 374 + /// DMEM access control register. Up to 8 ports are available for DMEM access. 375 + pub(crate) NV_PFALCON_FALCON_DMEMC(u32)[8, stride = 8] @ PFalconBase + 0x000001c0 { 376 + /// Auto-increment on write. 377 + 24:24 aincw => bool; 378 + /// DMEM block and word offset. 379 + 15:0 offs; 380 + } 381 + 382 + /// DMEM data register. Reading/writing this register accesses DMEM at the address 383 + /// specified by the corresponding DMEMC register. 384 + pub(crate) NV_PFALCON_FALCON_DMEMD(u32)[8, stride = 8] @ PFalconBase + 0x000001c4 { 385 + 31:0 data; 386 + } 387 + 388 + /// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the 389 + /// falcon instance. 390 + pub(crate) NV_PFALCON_FALCON_ENGINE(u32) @ PFalconBase + 0x000003c0 { 391 + 0:0 reset => bool; 392 + } 393 + 394 + pub(crate) NV_PFALCON_FBIF_TRANSCFG(u32)[8] @ PFalconBase + 0x00000600 { 395 + 2:2 mem_type => FalconFbifMemType; 396 + 1:0 target ?=> FalconFbifTarget; 397 + } 398 + 399 + pub(crate) NV_PFALCON_FBIF_CTL(u32) @ PFalconBase + 0x00000624 { 400 + 7:7 allow_phys_no_ctx => bool; 401 + } 402 + } 341 403 342 404 impl NV_PFALCON_FALCON_DMACTL { 343 405 /// Returns `true` if memory scrubbing is completed. ··· 435 319 } 436 320 } 437 321 438 - register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { 439 - 31:0 base as u32; 440 - }); 441 - 442 - register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { 443 - 23:0 offs as u32; 444 - }); 445 - 446 - register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { 447 - 0:0 full as bool; 448 - 1:1 idle as bool; 449 - 3:2 sec as u8; 450 - 4:4 imem as bool; 451 - 5:5 is_write as bool; 452 - 10:8 size as u8 ?=> DmaTrfCmdSize; 453 - 14:12 ctxdma as u8; 454 - 16:16 set_dmtag as u8; 455 - }); 456 - 457 322 impl NV_PFALCON_FALCON_DMATRFCMD { 458 323 /// Programs the `imem` and `sec` fields for the given FalconMem 459 324 pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self { 460 - self.set_imem(mem != FalconMem::Dmem) 461 - .set_sec(if mem == FalconMem::ImemSecure { 1 } else { 0 }) 325 + let this = self.with_imem(mem != FalconMem::Dmem); 326 + 327 + match mem { 328 + FalconMem::ImemSecure => this.with_const_sec::<1>(), 329 + _ => this.with_const_sec::<0>(), 330 + } 462 331 } 463 332 } 464 - 465 - register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { 466 - 31:0 offs as u32; 467 - }); 468 - 469 - register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { 470 - 8:0 base as u16; 471 - }); 472 - 473 - register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { 474 - 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; 475 - 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; 476 - 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; 477 - }); 478 - 479 - register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { 480 - 1:1 startcpu as bool; 481 - }); 482 - 483 - // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon 484 - // instance. 485 - register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { 486 - 0:0 reset as bool; 487 - }); 488 333 489 334 impl NV_PFALCON_FALCON_ENGINE { 490 335 /// Resets the falcon 491 336 pub(crate) fn reset_engine<E: FalconEngine>(bar: &Bar0) { 492 - Self::read(bar, &E::ID).set_reset(true).write(bar, &E::ID); 337 + bar.update(Self::of::<E>(), |r| r.with_reset(true)); 493 338 494 339 // TIMEOUT: falcon engine should not take more than 10us to reset. 495 340 time::delay::fsleep(time::Delta::from_micros(10)); 496 341 497 - Self::read(bar, &E::ID).set_reset(false).write(bar, &E::ID); 342 + bar.update(Self::of::<E>(), |r| r.with_reset(false)); 498 343 } 499 344 } 500 345 501 - register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { 502 - 1:0 target as u8 ?=> FalconFbifTarget; 503 - 2:2 mem_type as bool => FalconFbifMemType; 504 - }); 505 - 506 - register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { 507 - 7:7 allow_phys_no_ctx as bool; 508 - }); 346 + impl NV_PFALCON_FALCON_HWCFG2 { 347 + /// Returns `true` if memory scrubbing is completed. 348 + pub(crate) fn mem_scrubbing_done(self) -> bool { 349 + !self.mem_scrubbing() 350 + } 351 + } 509 352 510 353 /* PFALCON2 */ 511 354 512 - register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { 513 - 7:0 algo as u8 ?=> FalconModSelAlgo; 514 - }); 355 + register! { 356 + pub(crate) NV_PFALCON2_FALCON_MOD_SEL(u32) @ PFalcon2Base + 0x00000180 { 357 + 7:0 algo ?=> FalconModSelAlgo; 358 + } 515 359 516 - register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { 517 - 7:0 ucode_id as u8; 518 - }); 360 + pub(crate) NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID(u32) @ PFalcon2Base + 0x00000198 { 361 + 7:0 ucode_id => u8; 362 + } 519 363 520 - register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { 521 - 31:0 value as u32; 522 - }); 364 + pub(crate) NV_PFALCON2_FALCON_BROM_ENGIDMASK(u32) @ PFalcon2Base + 0x0000019c { 365 + 31:0 value => u32; 366 + } 523 367 524 - // OpenRM defines this as a register array, but doesn't specify its size and only uses its first 525 - // element. Be conservative until we know the actual size or need to use more registers. 526 - register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { 527 - 31:0 value as u32; 528 - }); 368 + /// OpenRM defines this as a register array, but doesn't specify its size and only uses its 369 + /// first element. Be conservative until we know the actual size or need to use more registers. 370 + pub(crate) NV_PFALCON2_FALCON_BROM_PARAADDR(u32)[1] @ PFalcon2Base + 0x00000210 { 371 + 31:0 value => u32; 372 + } 373 + } 529 374 530 375 // PRISCV 531 376 532 - // RISC-V status register for debug (Turing and GA100 only). 533 - // Reflects current RISC-V core status. 534 - register!(NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS @ PFalcon2Base[0x00000240] { 535 - 0:0 active_stat as bool, "RISC-V core active/inactive status"; 536 - }); 377 + register! { 378 + /// RISC-V status register for debug (Turing and GA100 only). 379 + /// Reflects current RISC-V core status. 380 + pub(crate) NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS(u32) @ PFalcon2Base + 0x00000240 { 381 + /// RISC-V core active/inactive status. 382 + 0:0 active_stat => bool; 383 + } 537 384 538 - // GA102 and later 539 - register!(NV_PRISCV_RISCV_CPUCTL @ PFalcon2Base[0x00000388] { 540 - 0:0 halted as bool; 541 - 7:7 active_stat as bool; 542 - }); 385 + /// GA102 and later. 386 + pub(crate) NV_PRISCV_RISCV_CPUCTL(u32) @ PFalcon2Base + 0x00000388 { 387 + 7:7 active_stat => bool; 388 + 0:0 halted => bool; 389 + } 543 390 544 - register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalcon2Base[0x00000668] { 545 - 0:0 valid as bool; 546 - 4:4 core_select as bool => PeregrineCoreSelect; 547 - 8:8 br_fetch as bool; 548 - }); 391 + /// GA102 and later. 392 + pub(crate) NV_PRISCV_RISCV_BCR_CTRL(u32) @ PFalcon2Base + 0x00000668 { 393 + 8:8 br_fetch => bool; 394 + 4:4 core_select => PeregrineCoreSelect; 395 + 0:0 valid => bool; 396 + } 397 + } 549 398 550 399 // The modules below provide registers that are not identical on all supported chips. They should 551 400 // only be used in HAL modules. 552 401 553 402 pub(crate) mod gm107 { 403 + use kernel::io::register; 404 + 554 405 // FUSE 555 406 556 - register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00021c04 { 557 - 0:0 display_disabled as bool; 558 - }); 407 + register! { 408 + pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00021c04 { 409 + 0:0 display_disabled => bool; 410 + } 411 + } 559 412 } 560 413 561 414 pub(crate) mod ga100 { 415 + use kernel::io::register; 416 + 562 417 // FUSE 563 418 564 - register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00820c04 { 565 - 0:0 display_disabled as bool; 566 - }); 419 + register! { 420 + pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00820c04 { 421 + 0:0 display_disabled => bool; 422 + } 423 + } 567 424 }

-739

drivers/gpu/nova-core/regs/macros.rs

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - 3 - //! `register!` macro to define register layout and accessors. 4 - //! 5 - //! A single register typically includes several fields, which are accessed through a combination 6 - //! of bit-shift and mask operations that introduce a class of potential mistakes, notably because 7 - //! not all possible field values are necessarily valid. 8 - //! 9 - //! The `register!` macro in this module provides an intuitive and readable syntax for defining a 10 - //! dedicated type for each register. Each such type comes with its own field accessors that can 11 - //! return an error if a field's value is invalid. Please look at the [`bitfield`] macro for the 12 - //! complete syntax of fields definitions. 13 - 14 - /// Trait providing a base address to be added to the offset of a relative register to obtain 15 - /// its actual offset. 16 - /// 17 - /// The `T` generic argument is used to distinguish which base to use, in case a type provides 18 - /// several bases. It is given to the `register!` macro to restrict the use of the register to 19 - /// implementors of this particular variant. 20 - pub(crate) trait RegisterBase<T> { 21 - const BASE: usize; 22 - } 23 - 24 - /// Defines a dedicated type for a register with an absolute offset, including getter and setter 25 - /// methods for its fields and methods to read and write it from an `Io` region. 26 - /// 27 - /// Example: 28 - /// 29 - /// ```no_run 30 - /// register!(BOOT_0 @ 0x00000100, "Basic revision information about the GPU" { 31 - /// 3:0 minor_revision as u8, "Minor revision of the chip"; 32 - /// 7:4 major_revision as u8, "Major revision of the chip"; 33 - /// 28:20 chipset as u32 ?=> Chipset, "Chipset model"; 34 - /// }); 35 - /// ``` 36 - /// 37 - /// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` 38 - /// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least 39 - /// significant bits of the register. Each field can be accessed and modified using accessor 40 - /// methods: 41 - /// 42 - /// ```no_run 43 - /// // Read from the register's defined offset (0x100). 44 - /// let boot0 = BOOT_0::read(&bar); 45 - /// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); 46 - /// 47 - /// // `Chipset::try_from` is called with the value of the `chipset` field and returns an 48 - /// // error if it is invalid. 49 - /// let chipset = boot0.chipset()?; 50 - /// 51 - /// // Update some fields and write the value back. 52 - /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); 53 - /// 54 - /// // Or, just read and update the register in a single step: 55 - /// BOOT_0::update(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); 56 - /// ``` 57 - /// 58 - /// The documentation strings are optional. If present, they will be added to the type's 59 - /// definition, or the field getter and setter methods they are attached to. 60 - /// 61 - /// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful 62 - /// for cases where a register's interpretation depends on the context: 63 - /// 64 - /// ```no_run 65 - /// register!(SCRATCH @ 0x00000200, "Scratch register" { 66 - /// 31:0 value as u32, "Raw value"; 67 - /// }); 68 - /// 69 - /// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { 70 - /// 0:0 completed as bool, "Whether the firmware has completed booting"; 71 - /// }); 72 - /// ``` 73 - /// 74 - /// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also 75 - /// providing its own `completed` field. 76 - /// 77 - /// ## Relative registers 78 - /// 79 - /// A register can be defined as being accessible from a fixed offset of a provided base. For 80 - /// instance, imagine the following I/O space: 81 - /// 82 - /// ```text 83 - /// +-----------------------------+ 84 - /// | ... | 85 - /// | | 86 - /// 0x100--->+------------CPU0-------------+ 87 - /// | | 88 - /// 0x110--->+-----------------------------+ 89 - /// | CPU_CTL | 90 - /// +-----------------------------+ 91 - /// | ... | 92 - /// | | 93 - /// | | 94 - /// 0x200--->+------------CPU1-------------+ 95 - /// | | 96 - /// 0x210--->+-----------------------------+ 97 - /// | CPU_CTL | 98 - /// +-----------------------------+ 99 - /// | ... | 100 - /// +-----------------------------+ 101 - /// ``` 102 - /// 103 - /// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O 104 - /// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define 105 - /// them twice and would prefer a way to select which one to use from a single definition 106 - /// 107 - /// This can be done using the `Base[Offset]` syntax when specifying the register's address. 108 - /// 109 - /// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the 110 - /// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for 111 - /// this register needs to implement `RegisterBase<Base>`. Here is the above example translated 112 - /// into code: 113 - /// 114 - /// ```no_run 115 - /// // Type used to identify the base. 116 - /// pub(crate) struct CpuCtlBase; 117 - /// 118 - /// // ZST describing `CPU0`. 119 - /// struct Cpu0; 120 - /// impl RegisterBase<CpuCtlBase> for Cpu0 { 121 - /// const BASE: usize = 0x100; 122 - /// } 123 - /// // Singleton of `CPU0` used to identify it. 124 - /// const CPU0: Cpu0 = Cpu0; 125 - /// 126 - /// // ZST describing `CPU1`. 127 - /// struct Cpu1; 128 - /// impl RegisterBase<CpuCtlBase> for Cpu1 { 129 - /// const BASE: usize = 0x200; 130 - /// } 131 - /// // Singleton of `CPU1` used to identify it. 132 - /// const CPU1: Cpu1 = Cpu1; 133 - /// 134 - /// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`. 135 - /// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { 136 - /// 0:0 start as bool, "Start the CPU core"; 137 - /// }); 138 - /// 139 - /// // The `read`, `write` and `update` methods of relative registers take an extra `base` argument 140 - /// // that is used to resolve its final address by adding its `BASE` to the offset of the 141 - /// // register. 142 - /// 143 - /// // Start `CPU0`. 144 - /// CPU_CTL::update(bar, &CPU0, |r| r.set_start(true)); 145 - /// 146 - /// // Start `CPU1`. 147 - /// CPU_CTL::update(bar, &CPU1, |r| r.set_start(true)); 148 - /// 149 - /// // Aliases can also be defined for relative register. 150 - /// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { 151 - /// 1:1 alias_start as bool, "Start the aliased CPU core"; 152 - /// }); 153 - /// 154 - /// // Start the aliased `CPU0`. 155 - /// CPU_CTL_ALIAS::update(bar, &CPU0, |r| r.set_alias_start(true)); 156 - /// ``` 157 - /// 158 - /// ## Arrays of registers 159 - /// 160 - /// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas 161 - /// can be defined as an array of identical registers, allowing them to be accessed by index with 162 - /// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add 163 - /// an `idx` parameter to their `read`, `write` and `update` methods: 164 - /// 165 - /// ```no_run 166 - /// # fn no_run() -> Result<(), Error> { 167 - /// # fn get_scratch_idx() -> usize { 168 - /// # 0x15 169 - /// # } 170 - /// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. 171 - /// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { 172 - /// 31:0 value as u32; 173 - /// }); 174 - /// 175 - /// // Read scratch register 0, i.e. I/O address `0x80`. 176 - /// let scratch_0 = SCRATCH::read(bar, 0).value(); 177 - /// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. 178 - /// let scratch_15 = SCRATCH::read(bar, 15).value(); 179 - /// 180 - /// // This is out of bounds and won't build. 181 - /// // let scratch_128 = SCRATCH::read(bar, 128).value(); 182 - /// 183 - /// // Runtime-obtained array index. 184 - /// let scratch_idx = get_scratch_idx(); 185 - /// // Access on a runtime index returns an error if it is out-of-bounds. 186 - /// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); 187 - /// 188 - /// // Alias to a particular register in an array. 189 - /// // Here `SCRATCH[8]` is used to convey the firmware exit code. 190 - /// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { 191 - /// 7:0 status as u8; 192 - /// }); 193 - /// 194 - /// let status = FIRMWARE_STATUS::read(bar).status(); 195 - /// 196 - /// // Non-contiguous register arrays can be defined by adding a stride parameter. 197 - /// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the 198 - /// // registers of the two declarations below are interleaved. 199 - /// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { 200 - /// 31:0 value as u32; 201 - /// }); 202 - /// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { 203 - /// 31:0 value as u32; 204 - /// }); 205 - /// # Ok(()) 206 - /// # } 207 - /// ``` 208 - /// 209 - /// ## Relative arrays of registers 210 - /// 211 - /// Combining the two features described in the sections above, arrays of registers accessible from 212 - /// a base can also be defined: 213 - /// 214 - /// ```no_run 215 - /// # fn no_run() -> Result<(), Error> { 216 - /// # fn get_scratch_idx() -> usize { 217 - /// # 0x15 218 - /// # } 219 - /// // Type used as parameter of `RegisterBase` to specify the base. 220 - /// pub(crate) struct CpuCtlBase; 221 - /// 222 - /// // ZST describing `CPU0`. 223 - /// struct Cpu0; 224 - /// impl RegisterBase<CpuCtlBase> for Cpu0 { 225 - /// const BASE: usize = 0x100; 226 - /// } 227 - /// // Singleton of `CPU0` used to identify it. 228 - /// const CPU0: Cpu0 = Cpu0; 229 - /// 230 - /// // ZST describing `CPU1`. 231 - /// struct Cpu1; 232 - /// impl RegisterBase<CpuCtlBase> for Cpu1 { 233 - /// const BASE: usize = 0x200; 234 - /// } 235 - /// // Singleton of `CPU1` used to identify it. 236 - /// const CPU1: Cpu1 = Cpu1; 237 - /// 238 - /// // 64 per-cpu scratch registers, arranged as an contiguous array. 239 - /// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { 240 - /// 31:0 value as u32; 241 - /// }); 242 - /// 243 - /// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); 244 - /// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); 245 - /// 246 - /// // This won't build. 247 - /// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); 248 - /// 249 - /// // Runtime-obtained array index. 250 - /// let scratch_idx = get_scratch_idx(); 251 - /// // Access on a runtime value returns an error if it is out-of-bounds. 252 - /// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); 253 - /// 254 - /// // `SCRATCH[8]` is used to convey the firmware exit code. 255 - /// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], 256 - /// "Per-CPU firmware exit status code" { 257 - /// 7:0 status as u8; 258 - /// }); 259 - /// 260 - /// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); 261 - /// 262 - /// // Non-contiguous register arrays can be defined by adding a stride parameter. 263 - /// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the 264 - /// // registers of the two declarations below are interleaved. 265 - /// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], 266 - /// "Scratch registers bank 0" { 267 - /// 31:0 value as u32; 268 - /// }); 269 - /// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], 270 - /// "Scratch registers bank 1" { 271 - /// 31:0 value as u32; 272 - /// }); 273 - /// # Ok(()) 274 - /// # } 275 - /// ``` 276 - macro_rules! register { 277 - // Creates a register at a fixed offset of the MMIO space. 278 - ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { 279 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 280 - register!(@io_fixed $name @ $offset); 281 - }; 282 - 283 - // Creates an alias register of fixed offset register `alias` with its own fields. 284 - ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { 285 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 286 - register!(@io_fixed $name @ $alias::OFFSET); 287 - }; 288 - 289 - // Creates a register at a relative offset from a base address provider. 290 - ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { 291 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 292 - register!(@io_relative $name @ $base [ $offset ]); 293 - }; 294 - 295 - // Creates an alias register of relative offset register `alias` with its own fields. 296 - ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { 297 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 298 - register!(@io_relative $name @ $base [ $alias::OFFSET ]); 299 - }; 300 - 301 - // Creates an array of registers at a fixed offset of the MMIO space. 302 - ( 303 - $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { 304 - $($fields:tt)* 305 - } 306 - ) => { 307 - static_assert!(::core::mem::size_of::<u32>() <= $stride); 308 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 309 - register!(@io_array $name @ $offset [ $size ; $stride ]); 310 - }; 311 - 312 - // Shortcut for contiguous array of registers (stride == size of element). 313 - ( 314 - $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { 315 - $($fields:tt)* 316 - } 317 - ) => { 318 - register!($name @ $offset [ $size ; ::core::mem::size_of::<u32>() ] $(, $comment)? { 319 - $($fields)* 320 - } ); 321 - }; 322 - 323 - // Creates an array of registers at a relative offset from a base address provider. 324 - ( 325 - $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] 326 - $(, $comment:literal)? { $($fields:tt)* } 327 - ) => { 328 - static_assert!(::core::mem::size_of::<u32>() <= $stride); 329 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 330 - register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); 331 - }; 332 - 333 - // Shortcut for contiguous array of relative registers (stride == size of element). 334 - ( 335 - $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { 336 - $($fields:tt)* 337 - } 338 - ) => { 339 - register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::<u32>() ] ] 340 - $(, $comment)? { $($fields)* } ); 341 - }; 342 - 343 - // Creates an alias of register `idx` of relative array of registers `alias` with its own 344 - // fields. 345 - ( 346 - $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { 347 - $($fields:tt)* 348 - } 349 - ) => { 350 - static_assert!($idx < $alias::SIZE); 351 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 352 - register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); 353 - }; 354 - 355 - // Creates an alias of register `idx` of array of registers `alias` with its own fields. 356 - // This rule belongs to the (non-relative) register arrays set, but needs to be put last 357 - // to avoid it being interpreted in place of the relative register array alias rule. 358 - ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { 359 - static_assert!($idx < $alias::SIZE); 360 - bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); 361 - register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); 362 - }; 363 - 364 - // Generates the IO accessors for a fixed offset register. 365 - (@io_fixed $name:ident @ $offset:expr) => { 366 - #[allow(dead_code)] 367 - impl $name { 368 - pub(crate) const OFFSET: usize = $offset; 369 - 370 - /// Read the register from its address in `io`. 371 - #[inline(always)] 372 - pub(crate) fn read<T, I>(io: &T) -> Self where 373 - T: ::core::ops::Deref<Target = I>, 374 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 375 - { 376 - Self(io.read32($offset)) 377 - } 378 - 379 - /// Write the value contained in `self` to the register address in `io`. 380 - #[inline(always)] 381 - pub(crate) fn write<T, I>(self, io: &T) where 382 - T: ::core::ops::Deref<Target = I>, 383 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 384 - { 385 - io.write32(self.0, $offset) 386 - } 387 - 388 - /// Read the register from its address in `io` and run `f` on its value to obtain a new 389 - /// value to write back. 390 - #[inline(always)] 391 - pub(crate) fn update<T, I, F>( 392 - io: &T, 393 - f: F, 394 - ) where 395 - T: ::core::ops::Deref<Target = I>, 396 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 397 - F: ::core::ops::FnOnce(Self) -> Self, 398 - { 399 - let reg = f(Self::read(io)); 400 - reg.write(io); 401 - } 402 - } 403 - }; 404 - 405 - // Generates the IO accessors for a relative offset register. 406 - (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { 407 - #[allow(dead_code)] 408 - impl $name { 409 - pub(crate) const OFFSET: usize = $offset; 410 - 411 - /// Read the register from `io`, using the base address provided by `base` and adding 412 - /// the register's offset to it. 413 - #[inline(always)] 414 - pub(crate) fn read<T, I, B>( 415 - io: &T, 416 - #[allow(unused_variables)] 417 - base: &B, 418 - ) -> Self where 419 - T: ::core::ops::Deref<Target = I>, 420 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 421 - B: crate::regs::macros::RegisterBase<$base>, 422 - { 423 - const OFFSET: usize = $name::OFFSET; 424 - 425 - let value = io.read32( 426 - >::BASE + OFFSET 427 - ); 428 - 429 - Self(value) 430 - } 431 - 432 - /// Write the value contained in `self` to `io`, using the base address provided by 433 - /// `base` and adding the register's offset to it. 434 - #[inline(always)] 435 - pub(crate) fn write<T, I, B>( 436 - self, 437 - io: &T, 438 - #[allow(unused_variables)] 439 - base: &B, 440 - ) where 441 - T: ::core::ops::Deref<Target = I>, 442 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 443 - B: crate::regs::macros::RegisterBase<$base>, 444 - { 445 - const OFFSET: usize = $name::OFFSET; 446 - 447 - io.write32( 448 - self.0, 449 - >::BASE + OFFSET 450 - ); 451 - } 452 - 453 - /// Read the register from `io`, using the base address provided by `base` and adding 454 - /// the register's offset to it, then run `f` on its value to obtain a new value to 455 - /// write back. 456 - #[inline(always)] 457 - pub(crate) fn update<T, I, B, F>( 458 - io: &T, 459 - base: &B, 460 - f: F, 461 - ) where 462 - T: ::core::ops::Deref<Target = I>, 463 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 464 - B: crate::regs::macros::RegisterBase<$base>, 465 - F: ::core::ops::FnOnce(Self) -> Self, 466 - { 467 - let reg = f(Self::read(io, base)); 468 - reg.write(io, base); 469 - } 470 - } 471 - }; 472 - 473 - // Generates the IO accessors for an array of registers. 474 - (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { 475 - #[allow(dead_code)] 476 - impl $name { 477 - pub(crate) const OFFSET: usize = $offset; 478 - pub(crate) const SIZE: usize = $size; 479 - pub(crate) const STRIDE: usize = $stride; 480 - 481 - /// Read the array register at index `idx` from its address in `io`. 482 - #[inline(always)] 483 - pub(crate) fn read<T, I>( 484 - io: &T, 485 - idx: usize, 486 - ) -> Self where 487 - T: ::core::ops::Deref<Target = I>, 488 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 489 - { 490 - build_assert!(idx < Self::SIZE); 491 - 492 - let offset = Self::OFFSET + (idx * Self::STRIDE); 493 - let value = io.read32(offset); 494 - 495 - Self(value) 496 - } 497 - 498 - /// Write the value contained in `self` to the array register with index `idx` in `io`. 499 - #[inline(always)] 500 - pub(crate) fn write<T, I>( 501 - self, 502 - io: &T, 503 - idx: usize 504 - ) where 505 - T: ::core::ops::Deref<Target = I>, 506 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 507 - { 508 - build_assert!(idx < Self::SIZE); 509 - 510 - let offset = Self::OFFSET + (idx * Self::STRIDE); 511 - 512 - io.write32(self.0, offset); 513 - } 514 - 515 - /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a 516 - /// new value to write back. 517 - #[inline(always)] 518 - pub(crate) fn update<T, I, F>( 519 - io: &T, 520 - idx: usize, 521 - f: F, 522 - ) where 523 - T: ::core::ops::Deref<Target = I>, 524 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 525 - F: ::core::ops::FnOnce(Self) -> Self, 526 - { 527 - let reg = f(Self::read(io, idx)); 528 - reg.write(io, idx); 529 - } 530 - 531 - /// Read the array register at index `idx` from its address in `io`. 532 - /// 533 - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the 534 - /// access was out-of-bounds. 535 - #[inline(always)] 536 - pub(crate) fn try_read<T, I>( 537 - io: &T, 538 - idx: usize, 539 - ) -> ::kernel::error::Result<Self> where 540 - T: ::core::ops::Deref<Target = I>, 541 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 542 - { 543 - if idx < Self::SIZE { 544 - Ok(Self::read(io, idx)) 545 - } else { 546 - Err(EINVAL) 547 - } 548 - } 549 - 550 - /// Write the value contained in `self` to the array register with index `idx` in `io`. 551 - /// 552 - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the 553 - /// access was out-of-bounds. 554 - #[inline(always)] 555 - pub(crate) fn try_write<T, I>( 556 - self, 557 - io: &T, 558 - idx: usize, 559 - ) -> ::kernel::error::Result where 560 - T: ::core::ops::Deref<Target = I>, 561 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 562 - { 563 - if idx < Self::SIZE { 564 - Ok(self.write(io, idx)) 565 - } else { 566 - Err(EINVAL) 567 - } 568 - } 569 - 570 - /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a 571 - /// new value to write back. 572 - /// 573 - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the 574 - /// access was out-of-bounds. 575 - #[inline(always)] 576 - pub(crate) fn try_update<T, I, F>( 577 - io: &T, 578 - idx: usize, 579 - f: F, 580 - ) -> ::kernel::error::Result where 581 - T: ::core::ops::Deref<Target = I>, 582 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 583 - F: ::core::ops::FnOnce(Self) -> Self, 584 - { 585 - if idx < Self::SIZE { 586 - Ok(Self::update(io, idx, f)) 587 - } else { 588 - Err(EINVAL) 589 - } 590 - } 591 - } 592 - }; 593 - 594 - // Generates the IO accessors for an array of relative registers. 595 - ( 596 - @io_relative_array $name:ident @ $base:ty 597 - [ $offset:literal [ $size:expr ; $stride:expr ] ] 598 - ) => { 599 - #[allow(dead_code)] 600 - impl $name { 601 - pub(crate) const OFFSET: usize = $offset; 602 - pub(crate) const SIZE: usize = $size; 603 - pub(crate) const STRIDE: usize = $stride; 604 - 605 - /// Read the array register at index `idx` from `io`, using the base address provided 606 - /// by `base` and adding the register's offset to it. 607 - #[inline(always)] 608 - pub(crate) fn read<T, I, B>( 609 - io: &T, 610 - #[allow(unused_variables)] 611 - base: &B, 612 - idx: usize, 613 - ) -> Self where 614 - T: ::core::ops::Deref<Target = I>, 615 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 616 - B: crate::regs::macros::RegisterBase<$base>, 617 - { 618 - build_assert!(idx < Self::SIZE); 619 - 620 - let offset = >::BASE + 621 - Self::OFFSET + (idx * Self::STRIDE); 622 - let value = io.read32(offset); 623 - 624 - Self(value) 625 - } 626 - 627 - /// Write the value contained in `self` to `io`, using the base address provided by 628 - /// `base` and adding the offset of array register `idx` to it. 629 - #[inline(always)] 630 - pub(crate) fn write<T, I, B>( 631 - self, 632 - io: &T, 633 - #[allow(unused_variables)] 634 - base: &B, 635 - idx: usize 636 - ) where 637 - T: ::core::ops::Deref<Target = I>, 638 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 639 - B: crate::regs::macros::RegisterBase<$base>, 640 - { 641 - build_assert!(idx < Self::SIZE); 642 - 643 - let offset = >::BASE + 644 - Self::OFFSET + (idx * Self::STRIDE); 645 - 646 - io.write32(self.0, offset); 647 - } 648 - 649 - /// Read the array register at index `idx` from `io`, using the base address provided 650 - /// by `base` and adding the register's offset to it, then run `f` on its value to 651 - /// obtain a new value to write back. 652 - #[inline(always)] 653 - pub(crate) fn update<T, I, B, F>( 654 - io: &T, 655 - base: &B, 656 - idx: usize, 657 - f: F, 658 - ) where 659 - T: ::core::ops::Deref<Target = I>, 660 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 661 - B: crate::regs::macros::RegisterBase<$base>, 662 - F: ::core::ops::FnOnce(Self) -> Self, 663 - { 664 - let reg = f(Self::read(io, base, idx)); 665 - reg.write(io, base, idx); 666 - } 667 - 668 - /// Read the array register at index `idx` from `io`, using the base address provided 669 - /// by `base` and adding the register's offset to it. 670 - /// 671 - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the 672 - /// access was out-of-bounds. 673 - #[inline(always)] 674 - pub(crate) fn try_read<T, I, B>( 675 - io: &T, 676 - base: &B, 677 - idx: usize, 678 - ) -> ::kernel::error::Result<Self> where 679 - T: ::core::ops::Deref<Target = I>, 680 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 681 - B: crate::regs::macros::RegisterBase<$base>, 682 - { 683 - if idx < Self::SIZE { 684 - Ok(Self::read(io, base, idx)) 685 - } else { 686 - Err(EINVAL) 687 - } 688 - } 689 - 690 - /// Write the value contained in `self` to `io`, using the base address provided by 691 - /// `base` and adding the offset of array register `idx` to it. 692 - /// 693 - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the 694 - /// access was out-of-bounds. 695 - #[inline(always)] 696 - pub(crate) fn try_write<T, I, B>( 697 - self, 698 - io: &T, 699 - base: &B, 700 - idx: usize, 701 - ) -> ::kernel::error::Result where 702 - T: ::core::ops::Deref<Target = I>, 703 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 704 - B: crate::regs::macros::RegisterBase<$base>, 705 - { 706 - if idx < Self::SIZE { 707 - Ok(self.write(io, base, idx)) 708 - } else { 709 - Err(EINVAL) 710 - } 711 - } 712 - 713 - /// Read the array register at index `idx` from `io`, using the base address provided 714 - /// by `base` and adding the register's offset to it, then run `f` on its value to 715 - /// obtain a new value to write back. 716 - /// 717 - /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the 718 - /// access was out-of-bounds. 719 - #[inline(always)] 720 - pub(crate) fn try_update<T, I, B, F>( 721 - io: &T, 722 - base: &B, 723 - idx: usize, 724 - f: F, 725 - ) -> ::kernel::error::Result where 726 - T: ::core::ops::Deref<Target = I>, 727 - I: ::kernel::io::IoKnownSize + ::kernel::io::IoCapable<u32>, 728 - B: crate::regs::macros::RegisterBase<$base>, 729 - F: ::core::ops::FnOnce(Self) -> Self, 730 - { 731 - if idx < Self::SIZE { 732 - Ok(Self::update(io, base, idx, f)) 733 - } else { 734 - Err(EINVAL) 735 - } 736 - } 737 - } 738 - }; 739 - }

+14

rust/bindings/bindings_helper.h

··· 29 29 #include <linux/hrtimer_types.h> 30 30 31 31 #include <linux/acpi.h> 32 + #include <linux/gpu_buddy.h> 32 33 #include <drm/drm_device.h> 33 34 #include <drm/drm_drv.h> 34 35 #include <drm/drm_file.h> 35 36 #include <drm/drm_gem.h> 37 + #include <drm/drm_gem_shmem_helper.h> 36 38 #include <drm/drm_ioctl.h> 37 39 #include <kunit/test.h> 38 40 #include <linux/auxiliary_bus.h> ··· 53 51 #include <linux/device/faux.h> 54 52 #include <linux/dma-direction.h> 55 53 #include <linux/dma-mapping.h> 54 + #include <linux/dma-resv.h> 56 55 #include <linux/errname.h> 57 56 #include <linux/ethtool.h> 58 57 #include <linux/fdtable.h> ··· 64 61 #include <linux/interrupt.h> 65 62 #include <linux/io-pgtable.h> 66 63 #include <linux/ioport.h> 64 + #include <linux/iosys-map.h> 67 65 #include <linux/jiffies.h> 68 66 #include <linux/jump_label.h> 69 67 #include <linux/mdio.h> ··· 149 145 const vm_flags_t RUST_CONST_HELPER_VM_MIXEDMAP = VM_MIXEDMAP; 150 146 const vm_flags_t RUST_CONST_HELPER_VM_HUGEPAGE = VM_HUGEPAGE; 151 147 const vm_flags_t RUST_CONST_HELPER_VM_NOHUGEPAGE = VM_NOHUGEPAGE; 148 + 149 + #if IS_ENABLED(CONFIG_GPU_BUDDY) 150 + const unsigned long RUST_CONST_HELPER_GPU_BUDDY_RANGE_ALLOCATION = GPU_BUDDY_RANGE_ALLOCATION; 151 + const unsigned long RUST_CONST_HELPER_GPU_BUDDY_TOPDOWN_ALLOCATION = GPU_BUDDY_TOPDOWN_ALLOCATION; 152 + const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CONTIGUOUS_ALLOCATION = 153 + GPU_BUDDY_CONTIGUOUS_ALLOCATION; 154 + const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CLEAR_ALLOCATION = GPU_BUDDY_CLEAR_ALLOCATION; 155 + const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CLEARED = GPU_BUDDY_CLEARED; 156 + const unsigned long RUST_CONST_HELPER_GPU_BUDDY_TRIM_DISABLE = GPU_BUDDY_TRIM_DISABLE; 157 + #endif 152 158 153 159 #if IS_ENABLED(CONFIG_ANDROID_BINDER_IPC_RUST) 154 160 #include "../../drivers/android/binder/rust_binder.h"

+5

rust/helpers/device.c

··· 25 25 { 26 26 dev_set_drvdata(dev, data); 27 27 } 28 + 29 + __rust_helper const char *rust_helper_dev_name(const struct device *dev) 30 + { 31 + return dev_name(dev); 32 + }

+14

rust/helpers/dma-resv.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/dma-resv.h> 4 + 5 + __rust_helper 6 + int rust_helper_dma_resv_lock(struct dma_resv *obj, struct ww_acquire_ctx *ctx) 7 + { 8 + return dma_resv_lock(obj, ctx); 9 + } 10 + 11 + __rust_helper void rust_helper_dma_resv_unlock(struct dma_resv *obj) 12 + { 13 + dma_resv_unlock(obj); 14 + }

+55 -1

rust/helpers/drm.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #include <drm/drm_gem.h> 4 + #include <drm/drm_gem_shmem_helper.h> 4 5 #include <drm/drm_vma_manager.h> 5 6 6 7 #ifdef CONFIG_DRM ··· 22 21 return drm_vma_node_offset_addr(node); 23 22 } 24 23 25 - #endif 24 + #ifdef CONFIG_DRM_GEM_SHMEM_HELPER 25 + __rust_helper void 26 + rust_helper_drm_gem_shmem_object_free(struct drm_gem_object *obj) 27 + { 28 + return drm_gem_shmem_object_free(obj); 29 + } 30 + 31 + __rust_helper void 32 + rust_helper_drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, 33 + const struct drm_gem_object *obj) 34 + { 35 + drm_gem_shmem_object_print_info(p, indent, obj); 36 + } 37 + 38 + __rust_helper int 39 + rust_helper_drm_gem_shmem_object_pin(struct drm_gem_object *obj) 40 + { 41 + return drm_gem_shmem_object_pin(obj); 42 + } 43 + 44 + __rust_helper void 45 + rust_helper_drm_gem_shmem_object_unpin(struct drm_gem_object *obj) 46 + { 47 + drm_gem_shmem_object_unpin(obj); 48 + } 49 + 50 + __rust_helper struct sg_table * 51 + rust_helper_drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) 52 + { 53 + return drm_gem_shmem_object_get_sg_table(obj); 54 + } 55 + 56 + __rust_helper int 57 + rust_helper_drm_gem_shmem_object_vmap(struct drm_gem_object *obj, 58 + struct iosys_map *map) 59 + { 60 + return drm_gem_shmem_object_vmap(obj, map); 61 + } 62 + 63 + __rust_helper void 64 + rust_helper_drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, 65 + struct iosys_map *map) 66 + { 67 + drm_gem_shmem_object_vunmap(obj, map); 68 + } 69 + 70 + __rust_helper int 71 + rust_helper_drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) 72 + { 73 + return drm_gem_shmem_object_mmap(obj, vma); 74 + } 75 + 76 + #endif /* CONFIG_DRM_GEM_SHMEM_HELPER */ 77 + #endif /* CONFIG_DRM */

+17

rust/helpers/gpu.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/gpu_buddy.h> 4 + 5 + #ifdef CONFIG_GPU_BUDDY 6 + 7 + __rust_helper u64 rust_helper_gpu_buddy_block_offset(const struct gpu_buddy_block *block) 8 + { 9 + return gpu_buddy_block_offset(block); 10 + } 11 + 12 + __rust_helper unsigned int rust_helper_gpu_buddy_block_order(struct gpu_buddy_block *block) 13 + { 14 + return gpu_buddy_block_order(block); 15 + } 16 + 17 + #endif /* CONFIG_GPU_BUDDY */

+3

rust/helpers/helpers.c

··· 28 28 #include "cred.c" 29 29 #include "device.c" 30 30 #include "dma.c" 31 + #include "dma-resv.c" 31 32 #include "drm.c" 32 33 #include "err.c" 33 34 #include "irq.c" 34 35 #include "fs.c" 36 + #include "gpu.c" 35 37 #include "io.c" 36 38 #include "jump_label.c" 37 39 #include "kunit.c" 40 + #include "list.c" 38 41 #include "maple_tree.c" 39 42 #include "mm.c" 40 43 #include "mutex.c"

+17

rust/helpers/list.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Helpers for C circular doubly linked list implementation. 5 + */ 6 + 7 + #include <linux/list.h> 8 + 9 + __rust_helper void rust_helper_INIT_LIST_HEAD(struct list_head *list) 10 + { 11 + INIT_LIST_HEAD(list); 12 + } 13 + 14 + __rust_helper void rust_helper_list_add_tail(struct list_head *new, struct list_head *head) 15 + { 16 + list_add_tail(new, head); 17 + }

+13 -2

rust/kernel/device.rs

··· 489 489 // defined as a `#[repr(transparent)]` wrapper around `fwnode_handle`. 490 490 Some(unsafe { &*fwnode_handle.cast() }) 491 491 } 492 + 493 + /// Returns the name of the device. 494 + /// 495 + /// This is the kobject name of the device, or its initial name if the kobject is not yet 496 + /// available. 497 + #[inline] 498 + pub fn name(&self) -> &CStr { 499 + // SAFETY: By its type invariant `self.as_raw()` is a valid pointer to a `struct device`. 500 + // The returned string is valid for the lifetime of the device. 501 + unsafe { CStr::from_char_ptr(bindings::dev_name(self.as_raw())) } 502 + } 492 503 } 493 504 494 505 // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic ··· 586 575 /// The bound context indicates that for the entire duration of the lifetime of a [`Device<Bound>`] 587 576 /// reference, the [`Device`] is guaranteed to be bound to a driver. 588 577 /// 589 - /// Some APIs, such as [`dma::CoherentAllocation`] or [`Devres`] rely on the [`Device`] to be bound, 578 + /// Some APIs, such as [`dma::Coherent`] or [`Devres`] rely on the [`Device`] to be bound, 590 579 /// which can be proven with the [`Bound`] device context. 591 580 /// 592 581 /// Any abstraction that can guarantee a scope where the corresponding bus device is bound, should ··· 595 584 /// 596 585 /// [`Devres`]: kernel::devres::Devres 597 586 /// [`Devres::access`]: kernel::devres::Devres::access 598 - /// [`dma::CoherentAllocation`]: kernel::dma::CoherentAllocation 587 + /// [`dma::Coherent`]: kernel::dma::Coherent 599 588 pub struct Bound; 600 589 601 590 mod private {

+684 -215

rust/kernel/dma.rs

··· 5 5 //! C header: [`include/linux/dma-mapping.h`](srctree/include/linux/dma-mapping.h) 6 6 7 7 use crate::{ 8 - bindings, build_assert, device, 9 - device::{Bound, Core}, 10 - error::{to_result, Result}, 8 + bindings, 9 + debugfs, 10 + device::{ 11 + self, 12 + Bound, 13 + Core, // 14 + }, 15 + error::to_result, 16 + fs::file, 11 17 prelude::*, 18 + ptr::KnownSize, 12 19 sync::aref::ARef, 13 - transmute::{AsBytes, FromBytes}, 20 + transmute::{ 21 + AsBytes, 22 + FromBytes, // 23 + }, // 24 + uaccess::UserSliceWriter, 14 25 }; 15 - use core::ptr::NonNull; 26 + use core::{ 27 + ops::{ 28 + Deref, 29 + DerefMut, // 30 + }, 31 + ptr::NonNull, // 32 + }; 16 33 17 34 /// DMA address type. 18 35 /// ··· 56 39 /// # Safety 57 40 /// 58 41 /// This method must not be called concurrently with any DMA allocation or mapping primitives, 59 - /// such as [`CoherentAllocation::alloc_attrs`]. 42 + /// such as [`Coherent::zeroed`]. 60 43 unsafe fn dma_set_mask(&self, mask: DmaMask) -> Result { 61 44 // SAFETY: 62 45 // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. ··· 73 56 /// # Safety 74 57 /// 75 58 /// This method must not be called concurrently with any DMA allocation or mapping primitives, 76 - /// such as [`CoherentAllocation::alloc_attrs`]. 59 + /// such as [`Coherent::zeroed`]. 77 60 unsafe fn dma_set_coherent_mask(&self, mask: DmaMask) -> Result { 78 61 // SAFETY: 79 62 // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. ··· 92 75 /// # Safety 93 76 /// 94 77 /// This method must not be called concurrently with any DMA allocation or mapping primitives, 95 - /// such as [`CoherentAllocation::alloc_attrs`]. 78 + /// such as [`Coherent::zeroed`]. 96 79 unsafe fn dma_set_mask_and_coherent(&self, mask: DmaMask) -> Result { 97 80 // SAFETY: 98 81 // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. ··· 111 94 /// # Safety 112 95 /// 113 96 /// This method must not be called concurrently with any DMA allocation or mapping primitives, 114 - /// such as [`CoherentAllocation::alloc_attrs`]. 97 + /// such as [`Coherent::zeroed`]. 115 98 unsafe fn dma_set_max_seg_size(&self, size: u32) { 116 99 // SAFETY: 117 100 // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid. ··· 211 194 /// 212 195 /// ``` 213 196 /// # use kernel::device::{Bound, Device}; 214 - /// use kernel::dma::{attrs::*, CoherentAllocation}; 197 + /// use kernel::dma::{attrs::*, Coherent}; 215 198 /// 216 199 /// # fn test(dev: &Device<Bound>) -> Result { 217 200 /// let attribs = DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_WARN; 218 - /// let c: CoherentAllocation<u64> = 219 - /// CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, attribs)?; 201 + /// let c: Coherent<[u64]> = 202 + /// Coherent::zeroed_slice_with_attrs(dev, 4, GFP_KERNEL, attribs)?; 220 203 /// # Ok::<(), Error>(()) } 221 204 /// ``` 222 205 #[derive(Clone, Copy, PartialEq)] ··· 266 249 267 250 /// Specifies that writes to the mapping may be buffered to improve performance. 268 251 pub const DMA_ATTR_WRITE_COMBINE: Attrs = Attrs(bindings::DMA_ATTR_WRITE_COMBINE); 269 - 270 - /// Lets the platform to avoid creating a kernel virtual mapping for the allocated buffer. 271 - pub const DMA_ATTR_NO_KERNEL_MAPPING: Attrs = Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING); 272 252 273 253 /// Allows platform code to skip synchronization of the CPU cache for the given buffer assuming 274 254 /// that it has been already transferred to 'device' domain. ··· 358 344 } 359 345 } 360 346 347 + /// CPU-owned DMA allocation that can be converted into a device-shared [`Coherent`] object. 348 + /// 349 + /// Unlike [`Coherent`], a [`CoherentBox`] is guaranteed to be fully owned by the CPU -- its DMA 350 + /// address is not exposed and it cannot be accessed by a device. This means it can safely be used 351 + /// like a normal boxed allocation (e.g. direct reads, writes, and mutable slices are all safe). 352 + /// 353 + /// A typical use is to allocate a [`CoherentBox`], populate it with normal CPU access, and then 354 + /// convert it into a [`Coherent`] object to share it with the device. 355 + /// 356 + /// # Examples 357 + /// 358 + /// `CoherentBox<T>`: 359 + /// 360 + /// ``` 361 + /// # use kernel::device::{ 362 + /// # Bound, 363 + /// # Device, 364 + /// # }; 365 + /// use kernel::dma::{attrs::*, 366 + /// Coherent, 367 + /// CoherentBox, 368 + /// }; 369 + /// 370 + /// # fn test(dev: &Device<Bound>) -> Result { 371 + /// let mut dmem: CoherentBox<u64> = CoherentBox::zeroed(dev, GFP_KERNEL)?; 372 + /// *dmem = 42; 373 + /// let dmem: Coherent<u64> = dmem.into(); 374 + /// # Ok::<(), Error>(()) } 375 + /// ``` 376 + /// 377 + /// `CoherentBox<[T]>`: 378 + /// 379 + /// 380 + /// ``` 381 + /// # use kernel::device::{ 382 + /// # Bound, 383 + /// # Device, 384 + /// # }; 385 + /// use kernel::dma::{attrs::*, 386 + /// Coherent, 387 + /// CoherentBox, 388 + /// }; 389 + /// 390 + /// # fn test(dev: &Device<Bound>) -> Result { 391 + /// let mut dmem: CoherentBox<[u64]> = CoherentBox::zeroed_slice(dev, 4, GFP_KERNEL)?; 392 + /// dmem.fill(42); 393 + /// let dmem: Coherent<[u64]> = dmem.into(); 394 + /// # Ok::<(), Error>(()) } 395 + /// ``` 396 + pub struct CoherentBox<T: KnownSize + ?Sized>(Coherent<T>); 397 + 398 + impl<T: AsBytes + FromBytes> CoherentBox<[T]> { 399 + /// [`CoherentBox`] variant of [`Coherent::zeroed_slice_with_attrs`]. 400 + #[inline] 401 + pub fn zeroed_slice_with_attrs( 402 + dev: &device::Device<Bound>, 403 + count: usize, 404 + gfp_flags: kernel::alloc::Flags, 405 + dma_attrs: Attrs, 406 + ) -> Result<Self> { 407 + Coherent::zeroed_slice_with_attrs(dev, count, gfp_flags, dma_attrs).map(Self) 408 + } 409 + 410 + /// Same as [CoherentBox::zeroed_slice_with_attrs], but with `dma::Attrs(0)`. 411 + #[inline] 412 + pub fn zeroed_slice( 413 + dev: &device::Device<Bound>, 414 + count: usize, 415 + gfp_flags: kernel::alloc::Flags, 416 + ) -> Result<Self> { 417 + Self::zeroed_slice_with_attrs(dev, count, gfp_flags, Attrs(0)) 418 + } 419 + 420 + /// Initializes the element at `i` using the given initializer. 421 + /// 422 + /// Returns `EINVAL` if `i` is out of bounds. 423 + pub fn init_at<E>(&mut self, i: usize, init: impl Init<T, E>) -> Result 424 + where 425 + Error: From<E>, 426 + { 427 + if i >= self.0.len() { 428 + return Err(EINVAL); 429 + } 430 + 431 + let ptr = &raw mut self[i]; 432 + 433 + // SAFETY: 434 + // - `ptr` is valid, properly aligned, and within this allocation. 435 + // - `T: AsBytes + FromBytes` guarantees all bit patterns are valid, so partial writes on 436 + // error cannot leave the element in an invalid state. 437 + // - The DMA address has not been exposed yet, so there is no concurrent device access. 438 + unsafe { init.__init(ptr)? }; 439 + 440 + Ok(()) 441 + } 442 + 443 + /// Allocates a region of coherent memory of the same size as `data` and initializes it with a 444 + /// copy of its contents. 445 + /// 446 + /// This is the [`CoherentBox`] variant of [`Coherent::from_slice_with_attrs`]. 447 + /// 448 + /// # Examples 449 + /// 450 + /// ``` 451 + /// use core::ops::Deref; 452 + /// 453 + /// # use kernel::device::{Bound, Device}; 454 + /// use kernel::dma::{ 455 + /// attrs::*, 456 + /// CoherentBox 457 + /// }; 458 + /// 459 + /// # fn test(dev: &Device<Bound>) -> Result { 460 + /// let data = [0u8, 1u8, 2u8, 3u8]; 461 + /// let c: CoherentBox<[u8]> = 462 + /// CoherentBox::from_slice_with_attrs(dev, &data, GFP_KERNEL, DMA_ATTR_NO_WARN)?; 463 + /// 464 + /// assert_eq!(c.deref(), &data); 465 + /// # Ok::<(), Error>(()) } 466 + /// ``` 467 + pub fn from_slice_with_attrs( 468 + dev: &device::Device<Bound>, 469 + data: &[T], 470 + gfp_flags: kernel::alloc::Flags, 471 + dma_attrs: Attrs, 472 + ) -> Result<Self> 473 + where 474 + T: Copy, 475 + { 476 + let mut slice = Self(Coherent::<T>::alloc_slice_with_attrs( 477 + dev, 478 + data.len(), 479 + gfp_flags, 480 + dma_attrs, 481 + )?); 482 + 483 + // PANIC: `slice` was created with length `data.len()`. 484 + slice.copy_from_slice(data); 485 + 486 + Ok(slice) 487 + } 488 + 489 + /// Performs the same functionality as [`CoherentBox::from_slice_with_attrs`], except the 490 + /// `dma_attrs` is 0 by default. 491 + #[inline] 492 + pub fn from_slice( 493 + dev: &device::Device<Bound>, 494 + data: &[T], 495 + gfp_flags: kernel::alloc::Flags, 496 + ) -> Result<Self> 497 + where 498 + T: Copy, 499 + { 500 + Self::from_slice_with_attrs(dev, data, gfp_flags, Attrs(0)) 501 + } 502 + } 503 + 504 + impl<T: AsBytes + FromBytes> CoherentBox<T> { 505 + /// Same as [`CoherentBox::zeroed_slice_with_attrs`], but for a single element. 506 + #[inline] 507 + pub fn zeroed_with_attrs( 508 + dev: &device::Device<Bound>, 509 + gfp_flags: kernel::alloc::Flags, 510 + dma_attrs: Attrs, 511 + ) -> Result<Self> { 512 + Coherent::zeroed_with_attrs(dev, gfp_flags, dma_attrs).map(Self) 513 + } 514 + 515 + /// Same as [`CoherentBox::zeroed_slice`], but for a single element. 516 + #[inline] 517 + pub fn zeroed(dev: &device::Device<Bound>, gfp_flags: kernel::alloc::Flags) -> Result<Self> { 518 + Self::zeroed_with_attrs(dev, gfp_flags, Attrs(0)) 519 + } 520 + } 521 + 522 + impl<T: KnownSize + ?Sized> Deref for CoherentBox<T> { 523 + type Target = T; 524 + 525 + #[inline] 526 + fn deref(&self) -> &Self::Target { 527 + // SAFETY: 528 + // - We have not exposed the DMA address yet, so there can't be any concurrent access by a 529 + // device. 530 + // - We have exclusive access to `self.0`. 531 + unsafe { self.0.as_ref() } 532 + } 533 + } 534 + 535 + impl<T: AsBytes + FromBytes + KnownSize + ?Sized> DerefMut for CoherentBox<T> { 536 + #[inline] 537 + fn deref_mut(&mut self) -> &mut Self::Target { 538 + // SAFETY: 539 + // - We have not exposed the DMA address yet, so there can't be any concurrent access by a 540 + // device. 541 + // - We have exclusive access to `self.0`. 542 + unsafe { self.0.as_mut() } 543 + } 544 + } 545 + 546 + impl<T: AsBytes + FromBytes + KnownSize + ?Sized> From<CoherentBox<T>> for Coherent<T> { 547 + #[inline] 548 + fn from(value: CoherentBox<T>) -> Self { 549 + value.0 550 + } 551 + } 552 + 361 553 /// An abstraction of the `dma_alloc_coherent` API. 362 554 /// 363 555 /// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map 364 556 /// large coherent DMA regions. 365 557 /// 366 - /// A [`CoherentAllocation`] instance contains a pointer to the allocated region (in the 558 + /// A [`Coherent`] instance contains a pointer to the allocated region (in the 367 559 /// processor's virtual address space) and the device address which can be given to the device 368 - /// as the DMA address base of the region. The region is released once [`CoherentAllocation`] 560 + /// as the DMA address base of the region. The region is released once [`Coherent`] 369 561 /// is dropped. 370 562 /// 371 563 /// # Invariants 372 564 /// 373 - /// - For the lifetime of an instance of [`CoherentAllocation`], the `cpu_addr` is a valid pointer 565 + /// - For the lifetime of an instance of [`Coherent`], the `cpu_addr` is a valid pointer 374 566 /// to an allocated region of coherent memory and `dma_handle` is the DMA address base of the 375 567 /// region. 376 - /// - The size in bytes of the allocation is equal to `size_of::<T> * count`. 377 - /// - `size_of::<T> * count` fits into a `usize`. 568 + /// - The size in bytes of the allocation is equal to size information via pointer. 378 569 // TODO 379 570 // 380 571 // DMA allocations potentially carry device resources (e.g.IOMMU mappings), hence for soundness ··· 590 371 // allocation from surviving device unbind; it would require RCU read side critical sections to 591 372 // access the memory, which may require subsequent unnecessary copies. 592 373 // 593 - // Hence, find a way to revoke the device resources of a `CoherentAllocation`, but not the 594 - // entire `CoherentAllocation` including the allocated memory itself. 595 - pub struct CoherentAllocation<T: AsBytes + FromBytes> { 374 + // Hence, find a way to revoke the device resources of a `Coherent`, but not the 375 + // entire `Coherent` including the allocated memory itself. 376 + pub struct Coherent<T: KnownSize + ?Sized> { 596 377 dev: ARef<device::Device>, 597 378 dma_handle: DmaAddress, 598 - count: usize, 599 379 cpu_addr: NonNull<T>, 600 380 dma_attrs: Attrs, 601 381 } 602 382 603 - impl<T: AsBytes + FromBytes> CoherentAllocation<T> { 604 - /// Allocates a region of `size_of::<T> * count` of coherent memory. 605 - /// 606 - /// # Examples 607 - /// 608 - /// ``` 609 - /// # use kernel::device::{Bound, Device}; 610 - /// use kernel::dma::{attrs::*, CoherentAllocation}; 611 - /// 612 - /// # fn test(dev: &Device<Bound>) -> Result { 613 - /// let c: CoherentAllocation<u64> = 614 - /// CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?; 615 - /// # Ok::<(), Error>(()) } 616 - /// ``` 617 - pub fn alloc_attrs( 618 - dev: &device::Device<Bound>, 619 - count: usize, 620 - gfp_flags: kernel::alloc::Flags, 621 - dma_attrs: Attrs, 622 - ) -> Result<CoherentAllocation<T>> { 623 - build_assert!( 624 - core::mem::size_of::<T>() > 0, 625 - "It doesn't make sense for the allocated type to be a ZST" 626 - ); 627 - 628 - let size = count 629 - .checked_mul(core::mem::size_of::<T>()) 630 - .ok_or(EOVERFLOW)?; 631 - let mut dma_handle = 0; 632 - // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. 633 - let addr = unsafe { 634 - bindings::dma_alloc_attrs( 635 - dev.as_raw(), 636 - size, 637 - &mut dma_handle, 638 - gfp_flags.as_raw(), 639 - dma_attrs.as_raw(), 640 - ) 641 - }; 642 - let addr = NonNull::new(addr).ok_or(ENOMEM)?; 643 - // INVARIANT: 644 - // - We just successfully allocated a coherent region which is accessible for 645 - // `count` elements, hence the cpu address is valid. We also hold a refcounted reference 646 - // to the device. 647 - // - The allocated `size` is equal to `size_of::<T> * count`. 648 - // - The allocated `size` fits into a `usize`. 649 - Ok(Self { 650 - dev: dev.into(), 651 - dma_handle, 652 - count, 653 - cpu_addr: addr.cast(), 654 - dma_attrs, 655 - }) 656 - } 657 - 658 - /// Performs the same functionality as [`CoherentAllocation::alloc_attrs`], except the 659 - /// `dma_attrs` is 0 by default. 660 - pub fn alloc_coherent( 661 - dev: &device::Device<Bound>, 662 - count: usize, 663 - gfp_flags: kernel::alloc::Flags, 664 - ) -> Result<CoherentAllocation<T>> { 665 - CoherentAllocation::alloc_attrs(dev, count, gfp_flags, Attrs(0)) 666 - } 667 - 668 - /// Returns the number of elements `T` in this allocation. 669 - /// 670 - /// Note that this is not the size of the allocation in bytes, which is provided by 671 - /// [`Self::size`]. 672 - pub fn count(&self) -> usize { 673 - self.count 674 - } 675 - 383 + impl<T: KnownSize + ?Sized> Coherent<T> { 676 384 /// Returns the size in bytes of this allocation. 385 + #[inline] 677 386 pub fn size(&self) -> usize { 678 - // INVARIANT: The type invariant of `Self` guarantees that `size_of::<T> * count` fits into 679 - // a `usize`. 680 - self.count * core::mem::size_of::<T>() 387 + T::size(self.cpu_addr.as_ptr()) 681 388 } 682 389 683 390 /// Returns the raw pointer to the allocated region in the CPU's virtual address space. 684 391 #[inline] 685 - pub fn as_ptr(&self) -> *const [T] { 686 - core::ptr::slice_from_raw_parts(self.cpu_addr.as_ptr(), self.count) 392 + pub fn as_ptr(&self) -> *const T { 393 + self.cpu_addr.as_ptr() 687 394 } 688 395 689 396 /// Returns the raw pointer to the allocated region in the CPU's virtual address space as 690 397 /// a mutable pointer. 691 398 #[inline] 692 - pub fn as_mut_ptr(&self) -> *mut [T] { 693 - core::ptr::slice_from_raw_parts_mut(self.cpu_addr.as_ptr(), self.count) 694 - } 695 - 696 - /// Returns the base address to the allocated region in the CPU's virtual address space. 697 - pub fn start_ptr(&self) -> *const T { 698 - self.cpu_addr.as_ptr() 699 - } 700 - 701 - /// Returns the base address to the allocated region in the CPU's virtual address space as 702 - /// a mutable pointer. 703 - pub fn start_ptr_mut(&mut self) -> *mut T { 399 + pub fn as_mut_ptr(&self) -> *mut T { 704 400 self.cpu_addr.as_ptr() 705 401 } 706 402 707 403 /// Returns a DMA handle which may be given to the device as the DMA address base of 708 404 /// the region. 405 + #[inline] 709 406 pub fn dma_handle(&self) -> DmaAddress { 710 407 self.dma_handle 711 408 } 712 409 713 - /// Returns a DMA handle starting at `offset` (in units of `T`) which may be given to the 714 - /// device as the DMA address base of the region. 715 - /// 716 - /// Returns `EINVAL` if `offset` is not within the bounds of the allocation. 717 - pub fn dma_handle_with_offset(&self, offset: usize) -> Result<DmaAddress> { 718 - if offset >= self.count { 719 - Err(EINVAL) 720 - } else { 721 - // INVARIANT: The type invariant of `Self` guarantees that `size_of::<T> * count` fits 722 - // into a `usize`, and `offset` is inferior to `count`. 723 - Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as DmaAddress) 724 - } 725 - } 726 - 727 - /// Common helper to validate a range applied from the allocated region in the CPU's virtual 728 - /// address space. 729 - fn validate_range(&self, offset: usize, count: usize) -> Result { 730 - if offset.checked_add(count).ok_or(EOVERFLOW)? > self.count { 731 - return Err(EINVAL); 732 - } 733 - Ok(()) 734 - } 735 - 736 - /// Returns the data from the region starting from `offset` as a slice. 737 - /// `offset` and `count` are in units of `T`, not the number of bytes. 738 - /// 739 - /// For ringbuffer type of r/w access or use-cases where the pointer to the live data is needed, 740 - /// [`CoherentAllocation::start_ptr`] or [`CoherentAllocation::start_ptr_mut`] could be used 741 - /// instead. 410 + /// Returns a reference to the data in the region. 742 411 /// 743 412 /// # Safety 744 413 /// ··· 634 527 /// slice is live. 635 528 /// * Callers must ensure that this call does not race with a write to the same region while 636 529 /// the returned slice is live. 637 - pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> { 638 - self.validate_range(offset, count)?; 639 - // SAFETY: 640 - // - The pointer is valid due to type invariant on `CoherentAllocation`, 641 - // we've just checked that the range and index is within bounds. The immutability of the 642 - // data is also guaranteed by the safety requirements of the function. 643 - // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked 644 - // that `self.count` won't overflow early in the constructor. 645 - Ok(unsafe { core::slice::from_raw_parts(self.start_ptr().add(offset), count) }) 530 + #[inline] 531 + pub unsafe fn as_ref(&self) -> &T { 532 + // SAFETY: per safety requirement. 533 + unsafe { &*self.as_ptr() } 646 534 } 647 535 648 - /// Performs the same functionality as [`CoherentAllocation::as_slice`], except that a mutable 649 - /// slice is returned. 536 + /// Returns a mutable reference to the data in the region. 650 537 /// 651 538 /// # Safety 652 539 /// ··· 648 547 /// slice is live. 649 548 /// * Callers must ensure that this call does not race with a read or write to the same region 650 549 /// while the returned slice is live. 651 - pub unsafe fn as_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> { 652 - self.validate_range(offset, count)?; 653 - // SAFETY: 654 - // - The pointer is valid due to type invariant on `CoherentAllocation`, 655 - // we've just checked that the range and index is within bounds. The immutability of the 656 - // data is also guaranteed by the safety requirements of the function. 657 - // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked 658 - // that `self.count` won't overflow early in the constructor. 659 - Ok(unsafe { core::slice::from_raw_parts_mut(self.start_ptr_mut().add(offset), count) }) 660 - } 661 - 662 - /// Writes data to the region starting from `offset`. `offset` is in units of `T`, not the 663 - /// number of bytes. 664 - /// 665 - /// # Safety 666 - /// 667 - /// * Callers must ensure that this call does not race with a read or write to the same region 668 - /// that overlaps with this write. 669 - /// 670 - /// # Examples 671 - /// 672 - /// ``` 673 - /// # fn test(alloc: &mut kernel::dma::CoherentAllocation<u8>) -> Result { 674 - /// let somedata: [u8; 4] = [0xf; 4]; 675 - /// let buf: &[u8] = &somedata; 676 - /// // SAFETY: There is no concurrent HW operation on the device and no other R/W access to the 677 - /// // region. 678 - /// unsafe { alloc.write(buf, 0)?; } 679 - /// # Ok::<(), Error>(()) } 680 - /// ``` 681 - pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result { 682 - self.validate_range(offset, src.len())?; 683 - // SAFETY: 684 - // - The pointer is valid due to type invariant on `CoherentAllocation` 685 - // and we've just checked that the range and index is within bounds. 686 - // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked 687 - // that `self.count` won't overflow early in the constructor. 688 - unsafe { 689 - core::ptr::copy_nonoverlapping( 690 - src.as_ptr(), 691 - self.start_ptr_mut().add(offset), 692 - src.len(), 693 - ) 694 - }; 695 - Ok(()) 550 + #[expect(clippy::mut_from_ref, reason = "unsafe to use API")] 551 + #[inline] 552 + pub unsafe fn as_mut(&self) -> &mut T { 553 + // SAFETY: per safety requirement. 554 + unsafe { &mut *self.as_mut_ptr() } 696 555 } 697 556 698 557 /// Reads the value of `field` and ensures that its type is [`FromBytes`]. ··· 702 641 } 703 642 } 704 643 644 + impl<T: AsBytes + FromBytes> Coherent<T> { 645 + /// Allocates a region of `T` of coherent memory. 646 + fn alloc_with_attrs( 647 + dev: &device::Device<Bound>, 648 + gfp_flags: kernel::alloc::Flags, 649 + dma_attrs: Attrs, 650 + ) -> Result<Self> { 651 + const { 652 + assert!( 653 + core::mem::size_of::<T>() > 0, 654 + "It doesn't make sense for the allocated type to be a ZST" 655 + ); 656 + } 657 + 658 + let mut dma_handle = 0; 659 + // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. 660 + let addr = unsafe { 661 + bindings::dma_alloc_attrs( 662 + dev.as_raw(), 663 + core::mem::size_of::<T>(), 664 + &mut dma_handle, 665 + gfp_flags.as_raw(), 666 + dma_attrs.as_raw(), 667 + ) 668 + }; 669 + let cpu_addr = NonNull::new(addr.cast()).ok_or(ENOMEM)?; 670 + // INVARIANT: 671 + // - We just successfully allocated a coherent region which is adequately sized for `T`, 672 + // hence the cpu address is valid. 673 + // - We also hold a refcounted reference to the device. 674 + Ok(Self { 675 + dev: dev.into(), 676 + dma_handle, 677 + cpu_addr, 678 + dma_attrs, 679 + }) 680 + } 681 + 682 + /// Allocates a region of type `T` of coherent memory. 683 + /// 684 + /// # Examples 685 + /// 686 + /// ``` 687 + /// # use kernel::device::{ 688 + /// # Bound, 689 + /// # Device, 690 + /// # }; 691 + /// use kernel::dma::{ 692 + /// attrs::*, 693 + /// Coherent, 694 + /// }; 695 + /// 696 + /// # fn test(dev: &Device<Bound>) -> Result { 697 + /// let c: Coherent<[u64; 4]> = 698 + /// Coherent::zeroed_with_attrs(dev, GFP_KERNEL, DMA_ATTR_NO_WARN)?; 699 + /// # Ok::<(), Error>(()) } 700 + /// ``` 701 + #[inline] 702 + pub fn zeroed_with_attrs( 703 + dev: &device::Device<Bound>, 704 + gfp_flags: kernel::alloc::Flags, 705 + dma_attrs: Attrs, 706 + ) -> Result<Self> { 707 + Self::alloc_with_attrs(dev, gfp_flags | __GFP_ZERO, dma_attrs) 708 + } 709 + 710 + /// Performs the same functionality as [`Coherent::zeroed_with_attrs`], except the 711 + /// `dma_attrs` is 0 by default. 712 + #[inline] 713 + pub fn zeroed(dev: &device::Device<Bound>, gfp_flags: kernel::alloc::Flags) -> Result<Self> { 714 + Self::zeroed_with_attrs(dev, gfp_flags, Attrs(0)) 715 + } 716 + 717 + /// Same as [`Coherent::zeroed_with_attrs`], but instead of a zero-initialization the memory is 718 + /// initialized with `init`. 719 + pub fn init_with_attrs<E>( 720 + dev: &device::Device<Bound>, 721 + gfp_flags: kernel::alloc::Flags, 722 + dma_attrs: Attrs, 723 + init: impl Init<T, E>, 724 + ) -> Result<Self> 725 + where 726 + Error: From<E>, 727 + { 728 + let dmem = Self::alloc_with_attrs(dev, gfp_flags, dma_attrs)?; 729 + let ptr = dmem.as_mut_ptr(); 730 + 731 + // SAFETY: 732 + // - `ptr` is valid, properly aligned, and points to exclusively owned memory. 733 + // - If `__init` fails, `self` is dropped, which safely frees the underlying `Coherent`'s 734 + // DMA memory. `T: AsBytes + FromBytes` ensures there are no complex `Drop` requirements 735 + // we are bypassing. 736 + unsafe { init.__init(ptr)? }; 737 + 738 + Ok(dmem) 739 + } 740 + 741 + /// Same as [`Coherent::zeroed`], but instead of a zero-initialization the memory is initialized 742 + /// with `init`. 743 + #[inline] 744 + pub fn init<E>( 745 + dev: &device::Device<Bound>, 746 + gfp_flags: kernel::alloc::Flags, 747 + init: impl Init<T, E>, 748 + ) -> Result<Self> 749 + where 750 + Error: From<E>, 751 + { 752 + Self::init_with_attrs(dev, gfp_flags, Attrs(0), init) 753 + } 754 + 755 + /// Allocates a region of `[T; len]` of coherent memory. 756 + fn alloc_slice_with_attrs( 757 + dev: &device::Device<Bound>, 758 + len: usize, 759 + gfp_flags: kernel::alloc::Flags, 760 + dma_attrs: Attrs, 761 + ) -> Result<Coherent<[T]>> { 762 + const { 763 + assert!( 764 + core::mem::size_of::<T>() > 0, 765 + "It doesn't make sense for the allocated type to be a ZST" 766 + ); 767 + } 768 + 769 + // `dma_alloc_attrs` cannot handle zero-length allocation, bail early. 770 + if len == 0 { 771 + Err(EINVAL)?; 772 + } 773 + 774 + let size = core::mem::size_of::<T>().checked_mul(len).ok_or(ENOMEM)?; 775 + let mut dma_handle = 0; 776 + // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. 777 + let addr = unsafe { 778 + bindings::dma_alloc_attrs( 779 + dev.as_raw(), 780 + size, 781 + &mut dma_handle, 782 + gfp_flags.as_raw(), 783 + dma_attrs.as_raw(), 784 + ) 785 + }; 786 + let cpu_addr = NonNull::slice_from_raw_parts(NonNull::new(addr.cast()).ok_or(ENOMEM)?, len); 787 + // INVARIANT: 788 + // - We just successfully allocated a coherent region which is adequately sized for 789 + // `[T; len]`, hence the cpu address is valid. 790 + // - We also hold a refcounted reference to the device. 791 + Ok(Coherent { 792 + dev: dev.into(), 793 + dma_handle, 794 + cpu_addr, 795 + dma_attrs, 796 + }) 797 + } 798 + 799 + /// Allocates a zeroed region of type `T` of coherent memory. 800 + /// 801 + /// Unlike `Coherent::<[T; N]>::zeroed_with_attrs`, `Coherent::<T>::zeroed_slices` support 802 + /// a runtime length. 803 + /// 804 + /// # Examples 805 + /// 806 + /// ``` 807 + /// # use kernel::device::{ 808 + /// # Bound, 809 + /// # Device, 810 + /// # }; 811 + /// use kernel::dma::{ 812 + /// attrs::*, 813 + /// Coherent, 814 + /// }; 815 + /// 816 + /// # fn test(dev: &Device<Bound>) -> Result { 817 + /// let c: Coherent<[u64]> = 818 + /// Coherent::zeroed_slice_with_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?; 819 + /// # Ok::<(), Error>(()) } 820 + /// ``` 821 + #[inline] 822 + pub fn zeroed_slice_with_attrs( 823 + dev: &device::Device<Bound>, 824 + len: usize, 825 + gfp_flags: kernel::alloc::Flags, 826 + dma_attrs: Attrs, 827 + ) -> Result<Coherent<[T]>> { 828 + Coherent::alloc_slice_with_attrs(dev, len, gfp_flags | __GFP_ZERO, dma_attrs) 829 + } 830 + 831 + /// Performs the same functionality as [`Coherent::zeroed_slice_with_attrs`], except the 832 + /// `dma_attrs` is 0 by default. 833 + #[inline] 834 + pub fn zeroed_slice( 835 + dev: &device::Device<Bound>, 836 + len: usize, 837 + gfp_flags: kernel::alloc::Flags, 838 + ) -> Result<Coherent<[T]>> { 839 + Self::zeroed_slice_with_attrs(dev, len, gfp_flags, Attrs(0)) 840 + } 841 + 842 + /// Allocates a region of coherent memory of the same size as `data` and initializes it with a 843 + /// copy of its contents. 844 + /// 845 + /// # Examples 846 + /// 847 + /// ``` 848 + /// # use kernel::device::{Bound, Device}; 849 + /// use kernel::dma::{ 850 + /// attrs::*, 851 + /// Coherent 852 + /// }; 853 + /// 854 + /// # fn test(dev: &Device<Bound>) -> Result { 855 + /// let data = [0u8, 1u8, 2u8, 3u8]; 856 + /// // `c` has the same content as `data`. 857 + /// let c: Coherent<[u8]> = 858 + /// Coherent::from_slice_with_attrs(dev, &data, GFP_KERNEL, DMA_ATTR_NO_WARN)?; 859 + /// 860 + /// # Ok::<(), Error>(()) } 861 + /// ``` 862 + #[inline] 863 + pub fn from_slice_with_attrs( 864 + dev: &device::Device<Bound>, 865 + data: &[T], 866 + gfp_flags: kernel::alloc::Flags, 867 + dma_attrs: Attrs, 868 + ) -> Result<Coherent<[T]>> 869 + where 870 + T: Copy, 871 + { 872 + CoherentBox::from_slice_with_attrs(dev, data, gfp_flags, dma_attrs).map(Into::into) 873 + } 874 + 875 + /// Performs the same functionality as [`Coherent::from_slice_with_attrs`], except the 876 + /// `dma_attrs` is 0 by default. 877 + #[inline] 878 + pub fn from_slice( 879 + dev: &device::Device<Bound>, 880 + data: &[T], 881 + gfp_flags: kernel::alloc::Flags, 882 + ) -> Result<Coherent<[T]>> 883 + where 884 + T: Copy, 885 + { 886 + Self::from_slice_with_attrs(dev, data, gfp_flags, Attrs(0)) 887 + } 888 + } 889 + 890 + impl<T> Coherent<[T]> { 891 + /// Returns the number of elements `T` in this allocation. 892 + /// 893 + /// Note that this is not the size of the allocation in bytes, which is provided by 894 + /// [`Self::size`]. 895 + #[inline] 896 + #[expect(clippy::len_without_is_empty, reason = "Coherent slice is never empty")] 897 + pub fn len(&self) -> usize { 898 + self.cpu_addr.len() 899 + } 900 + } 901 + 705 902 /// Note that the device configured to do DMA must be halted before this object is dropped. 706 - impl<T: AsBytes + FromBytes> Drop for CoherentAllocation<T> { 903 + impl<T: KnownSize + ?Sized> Drop for Coherent<T> { 707 904 fn drop(&mut self) { 708 - let size = self.count * core::mem::size_of::<T>(); 905 + let size = T::size(self.cpu_addr.as_ptr()); 709 906 // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`. 710 907 // The cpu address, and the dma handle are valid due to the type invariants on 711 - // `CoherentAllocation`. 908 + // `Coherent`. 712 909 unsafe { 713 910 bindings::dma_free_attrs( 714 911 self.dev.as_raw(), 715 912 size, 716 - self.start_ptr_mut().cast(), 913 + self.cpu_addr.as_ptr().cast(), 717 914 self.dma_handle, 718 915 self.dma_attrs.as_raw(), 719 916 ) ··· 979 660 } 980 661 } 981 662 982 - // SAFETY: It is safe to send a `CoherentAllocation` to another thread if `T` 663 + // SAFETY: It is safe to send a `Coherent` to another thread if `T` 983 664 // can be sent to another thread. 984 - unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {} 665 + unsafe impl<T: KnownSize + Send + ?Sized> Send for Coherent<T> {} 666 + 667 + // SAFETY: Sharing `&Coherent` across threads is safe if `T` is `Sync`, because all 668 + // methods that access the buffer contents (`field_read`, `field_write`, `as_slice`, 669 + // `as_slice_mut`) are `unsafe`, and callers are responsible for ensuring no data races occur. 670 + // The safe methods only return metadata or raw pointers whose use requires `unsafe`. 671 + unsafe impl<T: KnownSize + ?Sized + AsBytes + FromBytes + Sync> Sync for Coherent<T> {} 672 + 673 + impl<T: KnownSize + AsBytes + ?Sized> debugfs::BinaryWriter for Coherent<T> { 674 + fn write_to_slice( 675 + &self, 676 + writer: &mut UserSliceWriter, 677 + offset: &mut file::Offset, 678 + ) -> Result<usize> { 679 + if offset.is_negative() { 680 + return Err(EINVAL); 681 + } 682 + 683 + // If the offset is too large for a usize (e.g. on 32-bit platforms), 684 + // then consider that as past EOF and just return 0 bytes. 685 + let Ok(offset_val) = usize::try_from(*offset) else { 686 + return Ok(0); 687 + }; 688 + 689 + let count = self.size().saturating_sub(offset_val).min(writer.len()); 690 + 691 + writer.write_dma(self, offset_val, count)?; 692 + 693 + *offset += count as i64; 694 + Ok(count) 695 + } 696 + } 697 + 698 + /// An opaque DMA allocation without a kernel virtual mapping. 699 + /// 700 + /// Unlike [`Coherent`], a `CoherentHandle` does not provide CPU access to the allocated memory. 701 + /// The allocation is always performed with `DMA_ATTR_NO_KERNEL_MAPPING`, meaning no kernel 702 + /// virtual mapping is created for the buffer. The value returned by the C API as the CPU 703 + /// address is an opaque handle used only to free the allocation. 704 + /// 705 + /// This is useful for buffers that are only ever accessed by hardware. 706 + /// 707 + /// # Invariants 708 + /// 709 + /// - `cpu_handle` holds the opaque handle returned by `dma_alloc_attrs` with 710 + /// `DMA_ATTR_NO_KERNEL_MAPPING` set, and is only valid for passing back to `dma_free_attrs`. 711 + /// - `dma_handle` is the corresponding bus address for device DMA. 712 + /// - `size` is the allocation size in bytes as passed to `dma_alloc_attrs`. 713 + /// - `dma_attrs` contains the attributes used for the allocation, always including 714 + /// `DMA_ATTR_NO_KERNEL_MAPPING`. 715 + pub struct CoherentHandle { 716 + dev: ARef<device::Device>, 717 + dma_handle: DmaAddress, 718 + cpu_handle: NonNull<c_void>, 719 + size: usize, 720 + dma_attrs: Attrs, 721 + } 722 + 723 + impl CoherentHandle { 724 + /// Allocates `size` bytes of coherent DMA memory without creating a kernel virtual mapping. 725 + /// 726 + /// Additional DMA attributes may be passed via `dma_attrs`; `DMA_ATTR_NO_KERNEL_MAPPING` is 727 + /// always set implicitly. 728 + /// 729 + /// Returns `EINVAL` if `size` is zero, `ENOMEM` if the allocation fails. 730 + pub fn alloc_with_attrs( 731 + dev: &device::Device<Bound>, 732 + size: usize, 733 + gfp_flags: kernel::alloc::Flags, 734 + dma_attrs: Attrs, 735 + ) -> Result<Self> { 736 + if size == 0 { 737 + return Err(EINVAL); 738 + } 739 + 740 + let dma_attrs = dma_attrs | Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING); 741 + let mut dma_handle = 0; 742 + // SAFETY: `dev.as_raw()` is valid by the type invariant on `device::Device`. 743 + let cpu_handle = unsafe { 744 + bindings::dma_alloc_attrs( 745 + dev.as_raw(), 746 + size, 747 + &mut dma_handle, 748 + gfp_flags.as_raw(), 749 + dma_attrs.as_raw(), 750 + ) 751 + }; 752 + 753 + let cpu_handle = NonNull::new(cpu_handle).ok_or(ENOMEM)?; 754 + 755 + // INVARIANT: `cpu_handle` is the opaque handle from a successful `dma_alloc_attrs` call 756 + // with `DMA_ATTR_NO_KERNEL_MAPPING`, `dma_handle` is the corresponding DMA address, 757 + // and we hold a refcounted reference to the device. 758 + Ok(Self { 759 + dev: dev.into(), 760 + dma_handle, 761 + cpu_handle, 762 + size, 763 + dma_attrs, 764 + }) 765 + } 766 + 767 + /// Allocates `size` bytes of coherent DMA memory without creating a kernel virtual mapping. 768 + #[inline] 769 + pub fn alloc( 770 + dev: &device::Device<Bound>, 771 + size: usize, 772 + gfp_flags: kernel::alloc::Flags, 773 + ) -> Result<Self> { 774 + Self::alloc_with_attrs(dev, size, gfp_flags, Attrs(0)) 775 + } 776 + 777 + /// Returns the DMA handle for this allocation. 778 + /// 779 + /// This address can be programmed into device hardware for DMA access. 780 + #[inline] 781 + pub fn dma_handle(&self) -> DmaAddress { 782 + self.dma_handle 783 + } 784 + 785 + /// Returns the size in bytes of this allocation. 786 + #[inline] 787 + pub fn size(&self) -> usize { 788 + self.size 789 + } 790 + } 791 + 792 + impl Drop for CoherentHandle { 793 + fn drop(&mut self) { 794 + // SAFETY: All values are valid by the type invariants on `CoherentHandle`. 795 + // `cpu_handle` is the opaque handle from `dma_alloc_attrs` and is passed back unchanged. 796 + unsafe { 797 + bindings::dma_free_attrs( 798 + self.dev.as_raw(), 799 + self.size, 800 + self.cpu_handle.as_ptr(), 801 + self.dma_handle, 802 + self.dma_attrs.as_raw(), 803 + ) 804 + } 805 + } 806 + } 807 + 808 + // SAFETY: `CoherentHandle` only holds a device reference, a DMA handle, an opaque CPU handle, 809 + // and a size. None of these are tied to a specific thread. 810 + unsafe impl Send for CoherentHandle {} 811 + 812 + // SAFETY: `CoherentHandle` provides no CPU access to the underlying allocation. The only 813 + // operations on `&CoherentHandle` are reading the DMA handle and size, both of which are 814 + // plain `Copy` values. 815 + unsafe impl Sync for CoherentHandle {} 985 816 986 817 /// Reads a field of an item from an allocated region of structs. 987 818 /// 988 819 /// The syntax is of the form `kernel::dma_read!(dma, proj)` where `dma` is an expression evaluating 989 - /// to a [`CoherentAllocation`] and `proj` is a [projection specification](kernel::ptr::project!). 820 + /// to a [`Coherent`] and `proj` is a [projection specification](kernel::ptr::project!). 990 821 /// 991 822 /// # Examples 992 823 /// 993 824 /// ``` 994 825 /// use kernel::device::Device; 995 - /// use kernel::dma::{attrs::*, CoherentAllocation}; 826 + /// use kernel::dma::{attrs::*, Coherent}; 996 827 /// 997 828 /// struct MyStruct { field: u32, } 998 829 /// ··· 1151 682 /// // SAFETY: Instances of `MyStruct` have no uninitialized portions. 1152 683 /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; 1153 684 /// 1154 - /// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result { 685 + /// # fn test(alloc: &kernel::dma::Coherent<[MyStruct]>) -> Result { 1155 686 /// let whole = kernel::dma_read!(alloc, [2]?); 1156 687 /// let field = kernel::dma_read!(alloc, [1]?.field); 1157 688 /// # Ok::<(), Error>(()) } ··· 1161 692 ($dma:expr, $($proj:tt)*) => {{ 1162 693 let dma = &$dma; 1163 694 let ptr = $crate::ptr::project!( 1164 - $crate::dma::CoherentAllocation::as_ptr(dma), $($proj)* 695 + $crate::dma::Coherent::as_ptr(dma), $($proj)* 1165 696 ); 1166 697 // SAFETY: The pointer created by the projection is within the DMA region. 1167 - unsafe { $crate::dma::CoherentAllocation::field_read(dma, ptr) } 698 + unsafe { $crate::dma::Coherent::field_read(dma, ptr) } 1168 699 }}; 1169 700 } 1170 701 1171 702 /// Writes to a field of an item from an allocated region of structs. 1172 703 /// 1173 704 /// The syntax is of the form `kernel::dma_write!(dma, proj, val)` where `dma` is an expression 1174 - /// evaluating to a [`CoherentAllocation`], `proj` is a 705 + /// evaluating to a [`Coherent`], `proj` is a 1175 706 /// [projection specification](kernel::ptr::project!), and `val` is the value to be written to the 1176 707 /// projected location. 1177 708 /// ··· 1179 710 /// 1180 711 /// ``` 1181 712 /// use kernel::device::Device; 1182 - /// use kernel::dma::{attrs::*, CoherentAllocation}; 713 + /// use kernel::dma::{attrs::*, Coherent}; 1183 714 /// 1184 715 /// struct MyStruct { member: u32, } 1185 716 /// ··· 1188 719 /// // SAFETY: Instances of `MyStruct` have no uninitialized portions. 1189 720 /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; 1190 721 /// 1191 - /// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result { 722 + /// # fn test(alloc: &kernel::dma::Coherent<[MyStruct]>) -> Result { 1192 723 /// kernel::dma_write!(alloc, [2]?.member, 0xf); 1193 724 /// kernel::dma_write!(alloc, [1]?, MyStruct { member: 0xf }); 1194 725 /// # Ok::<(), Error>(()) } ··· 1198 729 (@parse [$dma:expr] [$($proj:tt)*] [, $val:expr]) => {{ 1199 730 let dma = &$dma; 1200 731 let ptr = $crate::ptr::project!( 1201 - mut $crate::dma::CoherentAllocation::as_mut_ptr(dma), $($proj)* 732 + mut $crate::dma::Coherent::as_mut_ptr(dma), $($proj)* 1202 733 ); 1203 734 let val = $val; 1204 735 // SAFETY: The pointer created by the projection is within the DMA region. 1205 - unsafe { $crate::dma::CoherentAllocation::field_write(dma, ptr, val) } 736 + unsafe { $crate::dma::Coherent::field_write(dma, ptr, val) } 1206 737 }}; 1207 738 (@parse [$dma:expr] [$($proj:tt)*] [.$field:tt $($rest:tt)*]) => { 1208 739 $crate::dma_write!(@parse [$dma] [$($proj)* .$field] [$($rest)*])

+82 -5

rust/kernel/drm/device.rs

··· 6 6 7 7 use crate::{ 8 8 alloc::allocator::Kmalloc, 9 - bindings, device, drm, 10 - drm::driver::AllocImpl, 9 + bindings, device, 10 + drm::{ 11 + self, 12 + driver::AllocImpl, // 13 + }, 11 14 error::from_err_ptr, 12 - error::Result, 13 15 prelude::*, 14 - sync::aref::{ARef, AlwaysRefCounted}, 16 + sync::aref::{ 17 + ARef, 18 + AlwaysRefCounted, // 19 + }, 15 20 types::Opaque, 21 + workqueue::{ 22 + HasDelayedWork, 23 + HasWork, 24 + Work, 25 + WorkItem, // 26 + }, 16 27 }; 17 - use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull}; 28 + use core::{ 29 + alloc::Layout, 30 + mem, 31 + ops::Deref, 32 + ptr::{ 33 + self, 34 + NonNull, // 35 + }, 36 + }; 18 37 19 38 #[cfg(CONFIG_DRM_LEGACY)] 20 39 macro_rules! drm_legacy_fields { ··· 246 227 // SAFETY: A `drm::Device` can be shared among threads because all immutable methods are protected 247 228 // by the synchronization in `struct drm_device`. 248 229 unsafe impl<T: drm::Driver> Sync for Device<T> {} 230 + 231 + impl<T, const ID: u64> WorkItem<ID> for Device<T> 232 + where 233 + T: drm::Driver, 234 + T::Data: WorkItem<ID, Pointer = ARef<Device<T>>>, 235 + T::Data: HasWork<Device<T>, ID>, 236 + { 237 + type Pointer = ARef<Device<T>>; 238 + 239 + fn run(ptr: ARef<Device<T>>) { 240 + T::Data::run(ptr); 241 + } 242 + } 243 + 244 + // SAFETY: 245 + // 246 + // - `raw_get_work` and `work_container_of` return valid pointers by relying on 247 + // `T::Data::raw_get_work` and `container_of`. In particular, `T::Data` is 248 + // stored inline in `drm::Device`, so the `container_of` call is valid. 249 + // 250 + // - The two methods are true inverses of each other: given `ptr: *mut 251 + // Device<T>`, `raw_get_work` will return a `*mut Work<Device<T>, ID>` through 252 + // `T::Data::raw_get_work` and given a `ptr: *mut Work<Device<T>, ID>`, 253 + // `work_container_of` will return a `*mut Device<T>` through `container_of`. 254 + unsafe impl<T, const ID: u64> HasWork<Device<T>, ID> for Device<T> 255 + where 256 + T: drm::Driver, 257 + T::Data: HasWork<Device<T>, ID>, 258 + { 259 + unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<Device<T>, ID> { 260 + // SAFETY: The caller promises that `ptr` points to a valid `Device<T>`. 261 + let data_ptr = unsafe { &raw mut (*ptr).data }; 262 + 263 + // SAFETY: `data_ptr` is a valid pointer to `T::Data`. 264 + unsafe { T::Data::raw_get_work(data_ptr) } 265 + } 266 + 267 + unsafe fn work_container_of(ptr: *mut Work<Device<T>, ID>) -> *mut Self { 268 + // SAFETY: The caller promises that `ptr` points at a `Work` field in 269 + // `T::Data`. 270 + let data_ptr = unsafe { T::Data::work_container_of(ptr) }; 271 + 272 + // SAFETY: `T::Data` is stored as the `data` field in `Device<T>`. 273 + unsafe { crate::container_of!(data_ptr, Self, data) } 274 + } 275 + } 276 + 277 + // SAFETY: Our `HasWork<T, ID>` implementation returns a `work_struct` that is 278 + // stored in the `work` field of a `delayed_work` with the same access rules as 279 + // the `work_struct` owing to the bound on `T::Data: HasDelayedWork<Device<T>, 280 + // ID>`, which requires that `T::Data::raw_get_work` return a `work_struct` that 281 + // is inside a `delayed_work`. 282 + unsafe impl<T, const ID: u64> HasDelayedWork<Device<T>, ID> for Device<T> 283 + where 284 + T: drm::Driver, 285 + T::Data: HasDelayedWork<Device<T>, ID>, 286 + { 287 + }

+6 -4

rust/kernel/drm/driver.rs

··· 5 5 //! C header: [`include/drm/drm_drv.h`](srctree/include/drm/drm_drv.h) 6 6 7 7 use crate::{ 8 - bindings, device, devres, drm, 9 - error::{to_result, Result}, 8 + bindings, 9 + device, 10 + devres, 11 + drm, 12 + error::to_result, 10 13 prelude::*, 11 - sync::aref::ARef, 14 + sync::aref::ARef, // 12 15 }; 13 - use macros::vtable; 14 16 15 17 /// Driver use the GEM memory manager. This should be set for all modern drivers. 16 18 pub(crate) const FEAT_GEM: u32 = bindings::drm_driver_feature_DRIVER_GEM;

+6 -2

rust/kernel/drm/file.rs

··· 4 4 //! 5 5 //! C header: [`include/drm/drm_file.h`](srctree/include/drm/drm_file.h) 6 6 7 - use crate::{bindings, drm, error::Result, prelude::*, types::Opaque}; 7 + use crate::{ 8 + bindings, 9 + drm, 10 + prelude::*, 11 + types::Opaque, // 12 + }; 8 13 use core::marker::PhantomData; 9 - use core::pin::Pin; 10 14 11 15 /// Trait that must be implemented by DRM drivers to represent a DRM File (a client instance). 12 16 pub trait DriverFile {

+80 -24

rust/kernel/drm/gem/mod.rs

··· 5 5 //! C header: [`include/drm/drm_gem.h`](srctree/include/drm/drm_gem.h) 6 6 7 7 use crate::{ 8 - alloc::flags::*, 9 - bindings, drm, 10 - drm::driver::{AllocImpl, AllocOps}, 11 - error::{to_result, Result}, 8 + bindings, 9 + drm::{ 10 + self, 11 + driver::{ 12 + AllocImpl, 13 + AllocOps, // 14 + }, 15 + }, 16 + error::to_result, 12 17 prelude::*, 13 - sync::aref::{ARef, AlwaysRefCounted}, 18 + sync::aref::{ 19 + ARef, 20 + AlwaysRefCounted, // 21 + }, 14 22 types::Opaque, 15 23 }; 16 - use core::{ops::Deref, ptr::NonNull}; 24 + use core::{ 25 + ops::Deref, 26 + ptr::NonNull, // 27 + }; 28 + 29 + #[cfg(CONFIG_RUST_DRM_GEM_SHMEM_HELPER)] 30 + pub mod shmem; 31 + 32 + /// A macro for implementing [`AlwaysRefCounted`] for any GEM object type. 33 + /// 34 + /// Since all GEM objects use the same refcounting scheme. 35 + #[macro_export] 36 + macro_rules! impl_aref_for_gem_obj { 37 + ( 38 + impl $( <$( $tparam_id:ident ),+> )? for $type:ty 39 + $( 40 + where 41 + $( $bind_param:path : $bind_trait:path ),+ 42 + )? 43 + ) => { 44 + // SAFETY: All GEM objects are refcounted. 45 + unsafe impl $( <$( $tparam_id ),+> )? $crate::sync::aref::AlwaysRefCounted for $type 46 + where 47 + Self: IntoGEMObject, 48 + $( $( $bind_param : $bind_trait ),+ )? 49 + { 50 + fn inc_ref(&self) { 51 + // SAFETY: The existence of a shared reference guarantees that the refcount is 52 + // non-zero. 53 + unsafe { bindings::drm_gem_object_get(self.as_raw()) }; 54 + } 55 + 56 + unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) { 57 + // SAFETY: `obj` is a valid pointer to an `Object<T>`. 58 + let obj = unsafe { obj.as_ref() }.as_raw(); 59 + 60 + // SAFETY: The safety requirements guarantee that the refcount is non-zero. 61 + unsafe { bindings::drm_gem_object_put(obj) }; 62 + } 63 + } 64 + }; 65 + } 66 + #[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), allow(unused))] 67 + pub(crate) use impl_aref_for_gem_obj; 17 68 18 69 /// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its 19 70 /// [`DriverObject`] implementation. ··· 78 27 /// Parent `Driver` for this object. 79 28 type Driver: drm::Driver; 80 29 30 + /// The data type to use for passing arguments to [`DriverObject::new`]. 31 + type Args; 32 + 81 33 /// Create a new driver data object for a GEM object of a given size. 82 - fn new(dev: &drm::Device<Self::Driver>, size: usize) -> impl PinInit<Self, Error>; 34 + fn new( 35 + dev: &drm::Device<Self::Driver>, 36 + size: usize, 37 + args: Self::Args, 38 + ) -> impl PinInit<Self, Error>; 83 39 84 40 /// Open a new handle to an existing object, associated with a File. 85 41 fn open(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) -> Result { ··· 220 162 221 163 impl<T: IntoGEMObject> BaseObject for T {} 222 164 165 + /// Crate-private base operations shared by all GEM object classes. 166 + #[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), expect(unused))] 167 + pub(crate) trait BaseObjectPrivate: IntoGEMObject { 168 + /// Return a pointer to this object's dma_resv. 169 + fn raw_dma_resv(&self) -> *mut bindings::dma_resv { 170 + // SAFETY: `self.as_raw()` always returns a valid pointer to the base DRM GEM object. 171 + unsafe { (*self.as_raw()).resv } 172 + } 173 + } 174 + 175 + impl<T: IntoGEMObject> BaseObjectPrivate for T {} 176 + 223 177 /// A base GEM object. 224 178 /// 225 179 /// # Invariants ··· 265 195 }; 266 196 267 197 /// Create a new GEM object. 268 - pub fn new(dev: &drm::Device<T::Driver>, size: usize) -> Result<ARef<Self>> { 198 + pub fn new(dev: &drm::Device<T::Driver>, size: usize, args: T::Args) -> Result<ARef<Self>> { 269 199 let obj: Pin<KBox<Self>> = KBox::pin_init( 270 200 try_pin_init!(Self { 271 201 obj: Opaque::new(bindings::drm_gem_object::default()), 272 - data <- T::new(dev, size), 202 + data <- T::new(dev, size, args), 273 203 }), 274 204 GFP_KERNEL, 275 205 )?; ··· 322 252 } 323 253 } 324 254 325 - // SAFETY: Instances of `Object<T>` are always reference-counted. 326 - unsafe impl<T: DriverObject> crate::sync::aref::AlwaysRefCounted for Object<T> { 327 - fn inc_ref(&self) { 328 - // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. 329 - unsafe { bindings::drm_gem_object_get(self.as_raw()) }; 330 - } 331 - 332 - unsafe fn dec_ref(obj: NonNull<Self>) { 333 - // SAFETY: `obj` is a valid pointer to an `Object<T>`. 334 - let obj = unsafe { obj.as_ref() }; 335 - 336 - // SAFETY: The safety requirements guarantee that the refcount is non-zero. 337 - unsafe { bindings::drm_gem_object_put(obj.as_raw()) } 338 - } 339 - } 255 + impl_aref_for_gem_obj!(impl<T> for Object<T> where T: DriverObject); 340 256 341 257 impl<T: DriverObject> super::private::Sealed for Object<T> {} 342 258

+228

rust/kernel/drm/gem/shmem.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! DRM GEM shmem helper objects 4 + //! 5 + //! C header: [`include/linux/drm/drm_gem_shmem_helper.h`](srctree/include/drm/drm_gem_shmem_helper.h) 6 + 7 + // TODO: 8 + // - There are a number of spots here that manually acquire/release the DMA reservation lock using 9 + // dma_resv_(un)lock(). In the future we should add support for ww mutex, expose a method to 10 + // acquire a reference to the WwMutex, and then use that directly instead of the C functions here. 11 + 12 + use crate::{ 13 + container_of, 14 + drm::{ 15 + device, 16 + driver, 17 + gem, 18 + private::Sealed, // 19 + }, 20 + error::to_result, 21 + prelude::*, 22 + types::{ 23 + ARef, 24 + Opaque, // 25 + }, // 26 + }; 27 + use core::{ 28 + ops::{ 29 + Deref, 30 + DerefMut, // 31 + }, 32 + ptr::NonNull, 33 + }; 34 + use gem::{ 35 + BaseObjectPrivate, 36 + DriverObject, 37 + IntoGEMObject, // 38 + }; 39 + 40 + /// A struct for controlling the creation of shmem-backed GEM objects. 41 + /// 42 + /// This is used with [`Object::new()`] to control various properties that can only be set when 43 + /// initially creating a shmem-backed GEM object. 44 + #[derive(Default)] 45 + pub struct ObjectConfig<'a, T: DriverObject> { 46 + /// Whether to set the write-combine map flag. 47 + pub map_wc: bool, 48 + 49 + /// Reuse the DMA reservation from another GEM object. 50 + /// 51 + /// The newly created [`Object`] will hold an owned refcount to `parent_resv_obj` if specified. 52 + pub parent_resv_obj: Option<&'a Object<T>>, 53 + } 54 + 55 + /// A shmem-backed GEM object. 56 + /// 57 + /// # Invariants 58 + /// 59 + /// `obj` contains a valid initialized `struct drm_gem_shmem_object` for the lifetime of this 60 + /// object. 61 + #[repr(C)] 62 + #[pin_data] 63 + pub struct Object<T: DriverObject> { 64 + #[pin] 65 + obj: Opaque<bindings::drm_gem_shmem_object>, 66 + /// Parent object that owns this object's DMA reservation object. 67 + parent_resv_obj: Option<ARef<Object<T>>>, 68 + #[pin] 69 + inner: T, 70 + } 71 + 72 + super::impl_aref_for_gem_obj!(impl<T> for Object<T> where T: DriverObject); 73 + 74 + // SAFETY: All GEM objects are thread-safe. 75 + unsafe impl<T: DriverObject> Send for Object<T> {} 76 + 77 + // SAFETY: All GEM objects are thread-safe. 78 + unsafe impl<T: DriverObject> Sync for Object<T> {} 79 + 80 + impl<T: DriverObject> Object<T> { 81 + /// `drm_gem_object_funcs` vtable suitable for GEM shmem objects. 82 + const VTABLE: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { 83 + free: Some(Self::free_callback), 84 + open: Some(super::open_callback::<T>), 85 + close: Some(super::close_callback::<T>), 86 + print_info: Some(bindings::drm_gem_shmem_object_print_info), 87 + export: None, 88 + pin: Some(bindings::drm_gem_shmem_object_pin), 89 + unpin: Some(bindings::drm_gem_shmem_object_unpin), 90 + get_sg_table: Some(bindings::drm_gem_shmem_object_get_sg_table), 91 + vmap: Some(bindings::drm_gem_shmem_object_vmap), 92 + vunmap: Some(bindings::drm_gem_shmem_object_vunmap), 93 + mmap: Some(bindings::drm_gem_shmem_object_mmap), 94 + status: None, 95 + rss: None, 96 + #[allow(unused_unsafe, reason = "Safe since Rust 1.82.0")] 97 + // SAFETY: `drm_gem_shmem_vm_ops` is a valid, static const on the C side. 98 + vm_ops: unsafe { &raw const bindings::drm_gem_shmem_vm_ops }, 99 + evict: None, 100 + }; 101 + 102 + /// Return a raw pointer to the embedded drm_gem_shmem_object. 103 + fn as_raw_shmem(&self) -> *mut bindings::drm_gem_shmem_object { 104 + self.obj.get() 105 + } 106 + 107 + /// Create a new shmem-backed DRM object of the given size. 108 + /// 109 + /// Additional config options can be specified using `config`. 110 + pub fn new( 111 + dev: &device::Device<T::Driver>, 112 + size: usize, 113 + config: ObjectConfig<'_, T>, 114 + args: T::Args, 115 + ) -> Result<ARef<Self>> { 116 + let new: Pin<KBox<Self>> = KBox::try_pin_init( 117 + try_pin_init!(Self { 118 + obj <- Opaque::init_zeroed(), 119 + parent_resv_obj: config.parent_resv_obj.map(|p| p.into()), 120 + inner <- T::new(dev, size, args), 121 + }), 122 + GFP_KERNEL, 123 + )?; 124 + 125 + // SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above. 126 + unsafe { (*new.as_raw()).funcs = &Self::VTABLE }; 127 + 128 + // SAFETY: The arguments are all valid via the type invariants. 129 + to_result(unsafe { bindings::drm_gem_shmem_init(dev.as_raw(), new.as_raw_shmem(), size) })?; 130 + 131 + // SAFETY: We never move out of `self`. 132 + let new = KBox::into_raw(unsafe { Pin::into_inner_unchecked(new) }); 133 + 134 + // SAFETY: We're taking over the owned refcount from `drm_gem_shmem_init`. 135 + let obj = unsafe { ARef::from_raw(NonNull::new_unchecked(new)) }; 136 + 137 + // Start filling out values from `config` 138 + if let Some(parent_resv) = config.parent_resv_obj { 139 + // SAFETY: We have yet to expose the new gem object outside of this function, so it is 140 + // safe to modify this field. 141 + unsafe { (*obj.obj.get()).base.resv = parent_resv.raw_dma_resv() }; 142 + } 143 + 144 + // SAFETY: We have yet to expose this object outside of this function, so we're guaranteed 145 + // to have exclusive access - thus making this safe to hold a mutable reference to. 146 + let shmem = unsafe { &mut *obj.as_raw_shmem() }; 147 + shmem.set_map_wc(config.map_wc); 148 + 149 + Ok(obj) 150 + } 151 + 152 + /// Returns the `Device` that owns this GEM object. 153 + pub fn dev(&self) -> &device::Device<T::Driver> { 154 + // SAFETY: `dev` will have been initialized in `Self::new()` by `drm_gem_shmem_init()`. 155 + unsafe { device::Device::from_raw((*self.as_raw()).dev) } 156 + } 157 + 158 + extern "C" fn free_callback(obj: *mut bindings::drm_gem_object) { 159 + // SAFETY: 160 + // - DRM always passes a valid gem object here 161 + // - We used drm_gem_shmem_create() in our create_gem_object callback, so we know that 162 + // `obj` is contained within a drm_gem_shmem_object 163 + let this = unsafe { container_of!(obj, bindings::drm_gem_shmem_object, base) }; 164 + 165 + // SAFETY: 166 + // - We're in free_callback - so this function is safe to call. 167 + // - We won't be using the gem resources on `this` after this call. 168 + unsafe { bindings::drm_gem_shmem_release(this) }; 169 + 170 + // SAFETY: 171 + // - We verified above that `obj` is valid, which makes `this` valid 172 + // - This function is set in AllocOps, so we know that `this` is contained within a 173 + // `Object<T>` 174 + let this = unsafe { container_of!(Opaque::cast_from(this), Self, obj) }.cast_mut(); 175 + 176 + // SAFETY: We're recovering the Kbox<> we created in gem_create_object() 177 + let _ = unsafe { KBox::from_raw(this) }; 178 + } 179 + } 180 + 181 + impl<T: DriverObject> Deref for Object<T> { 182 + type Target = T; 183 + 184 + fn deref(&self) -> &Self::Target { 185 + &self.inner 186 + } 187 + } 188 + 189 + impl<T: DriverObject> DerefMut for Object<T> { 190 + fn deref_mut(&mut self) -> &mut Self::Target { 191 + &mut self.inner 192 + } 193 + } 194 + 195 + impl<T: DriverObject> Sealed for Object<T> {} 196 + 197 + impl<T: DriverObject> gem::IntoGEMObject for Object<T> { 198 + fn as_raw(&self) -> *mut bindings::drm_gem_object { 199 + // SAFETY: 200 + // - Our immutable reference is proof that this is safe to dereference. 201 + // - `obj` is always a valid drm_gem_shmem_object via our type invariants. 202 + unsafe { &raw mut (*self.obj.get()).base } 203 + } 204 + 205 + unsafe fn from_raw<'a>(obj: *mut bindings::drm_gem_object) -> &'a Object<T> { 206 + // SAFETY: The safety contract of from_gem_obj() guarantees that `obj` is contained within 207 + // `Self` 208 + unsafe { 209 + let obj = Opaque::cast_from(container_of!(obj, bindings::drm_gem_shmem_object, base)); 210 + 211 + &*container_of!(obj, Object<T>, obj) 212 + } 213 + } 214 + } 215 + 216 + impl<T: DriverObject> driver::AllocImpl for Object<T> { 217 + type Driver = T::Driver; 218 + 219 + const ALLOC_OPS: driver::AllocOps = driver::AllocOps { 220 + gem_create_object: None, 221 + prime_handle_to_fd: None, 222 + prime_fd_to_handle: None, 223 + gem_prime_import: None, 224 + gem_prime_import_sg_table: Some(bindings::drm_gem_shmem_prime_import_sg_table), 225 + dumb_create: Some(bindings::drm_gem_shmem_dumb_create), 226 + dumb_map_offset: None, 227 + }; 228 + }

+1

rust/kernel/error.rs

··· 67 67 declare_err!(EDOM, "Math argument out of domain of func."); 68 68 declare_err!(ERANGE, "Math result not representable."); 69 69 declare_err!(EOVERFLOW, "Value too large for defined data type."); 70 + declare_err!(EMSGSIZE, "Message too long."); 70 71 declare_err!(ETIMEDOUT, "Connection timed out."); 71 72 declare_err!(ERESTARTSYS, "Restart the system call."); 72 73 declare_err!(ERESTARTNOINTR, "System call was interrupted by a signal and will be restarted.");

+6

rust/kernel/gpu.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! GPU subsystem abstractions. 4 + 5 + #[cfg(CONFIG_GPU_BUDDY = "y")] 6 + pub mod buddy;

+614

rust/kernel/gpu/buddy.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! GPU buddy allocator bindings. 4 + //! 5 + //! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h) 6 + //! 7 + //! This module provides Rust abstractions over the Linux kernel's GPU buddy 8 + //! allocator, which implements a binary buddy memory allocator. 9 + //! 10 + //! The buddy allocator manages a contiguous address space and allocates blocks 11 + //! in power-of-two sizes, useful for GPU physical memory management. 12 + //! 13 + //! # Examples 14 + //! 15 + //! Create a buddy allocator and perform a basic range allocation: 16 + //! 17 + //! ``` 18 + //! use kernel::{ 19 + //! gpu::buddy::{ 20 + //! GpuBuddy, 21 + //! GpuBuddyAllocFlags, 22 + //! GpuBuddyAllocMode, 23 + //! GpuBuddyParams, // 24 + //! }, 25 + //! prelude::*, 26 + //! ptr::Alignment, 27 + //! sizes::*, // 28 + //! }; 29 + //! 30 + //! // Create a 1GB buddy allocator with 4KB minimum chunk size. 31 + //! let buddy = GpuBuddy::new(GpuBuddyParams { 32 + //! base_offset: 0, 33 + //! size: SZ_1G as u64, 34 + //! chunk_size: Alignment::new::<SZ_4K>(), 35 + //! })?; 36 + //! 37 + //! assert_eq!(buddy.size(), SZ_1G as u64); 38 + //! assert_eq!(buddy.chunk_size(), Alignment::new::<SZ_4K>()); 39 + //! let initial_free = buddy.avail(); 40 + //! 41 + //! // Allocate 16MB. Block lands at the top of the address range. 42 + //! let allocated = KBox::pin_init( 43 + //! buddy.alloc_blocks( 44 + //! GpuBuddyAllocMode::Simple, 45 + //! SZ_16M as u64, 46 + //! Alignment::new::<SZ_16M>(), 47 + //! GpuBuddyAllocFlags::default(), 48 + //! ), 49 + //! GFP_KERNEL, 50 + //! )?; 51 + //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); 52 + //! 53 + //! let block = allocated.iter().next().expect("expected one block"); 54 + //! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); 55 + //! assert_eq!(block.order(), 12); // 2^12 pages = 16MB 56 + //! assert_eq!(block.size(), SZ_16M as u64); 57 + //! assert_eq!(allocated.iter().count(), 1); 58 + //! 59 + //! // Dropping the allocation returns the range to the buddy allocator. 60 + //! drop(allocated); 61 + //! assert_eq!(buddy.avail(), initial_free); 62 + //! # Ok::<(), Error>(()) 63 + //! ``` 64 + //! 65 + //! Top-down allocation allocates from the highest addresses: 66 + //! 67 + //! ``` 68 + //! # use kernel::{ 69 + //! # gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams}, 70 + //! # prelude::*, 71 + //! # ptr::Alignment, 72 + //! # sizes::*, // 73 + //! # }; 74 + //! # let buddy = GpuBuddy::new(GpuBuddyParams { 75 + //! # base_offset: 0, 76 + //! # size: SZ_1G as u64, 77 + //! # chunk_size: Alignment::new::<SZ_4K>(), 78 + //! # })?; 79 + //! # let initial_free = buddy.avail(); 80 + //! let topdown = KBox::pin_init( 81 + //! buddy.alloc_blocks( 82 + //! GpuBuddyAllocMode::TopDown, 83 + //! SZ_16M as u64, 84 + //! Alignment::new::<SZ_16M>(), 85 + //! GpuBuddyAllocFlags::default(), 86 + //! ), 87 + //! GFP_KERNEL, 88 + //! )?; 89 + //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); 90 + //! 91 + //! let block = topdown.iter().next().expect("expected one block"); 92 + //! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); 93 + //! assert_eq!(block.order(), 12); 94 + //! assert_eq!(block.size(), SZ_16M as u64); 95 + //! 96 + //! // Dropping the allocation returns the range to the buddy allocator. 97 + //! drop(topdown); 98 + //! assert_eq!(buddy.avail(), initial_free); 99 + //! # Ok::<(), Error>(()) 100 + //! ``` 101 + //! 102 + //! Non-contiguous allocation can fill fragmented memory by returning multiple 103 + //! blocks: 104 + //! 105 + //! ``` 106 + //! # use kernel::{ 107 + //! # gpu::buddy::{ 108 + //! # GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, 109 + //! # }, 110 + //! # prelude::*, 111 + //! # ptr::Alignment, 112 + //! # sizes::*, // 113 + //! # }; 114 + //! # let buddy = GpuBuddy::new(GpuBuddyParams { 115 + //! # base_offset: 0, 116 + //! # size: SZ_1G as u64, 117 + //! # chunk_size: Alignment::new::<SZ_4K>(), 118 + //! # })?; 119 + //! # let initial_free = buddy.avail(); 120 + //! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M). 121 + //! let frag1 = KBox::pin_init( 122 + //! buddy.alloc_blocks( 123 + //! GpuBuddyAllocMode::Range(0..SZ_4M as u64), 124 + //! SZ_4M as u64, 125 + //! Alignment::new::<SZ_4M>(), 126 + //! GpuBuddyAllocFlags::default(), 127 + //! ), 128 + //! GFP_KERNEL, 129 + //! )?; 130 + //! assert_eq!(buddy.avail(), initial_free - SZ_4M as u64); 131 + //! 132 + //! let frag2 = KBox::pin_init( 133 + //! buddy.alloc_blocks( 134 + //! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), 135 + //! SZ_4M as u64, 136 + //! Alignment::new::<SZ_4M>(), 137 + //! GpuBuddyAllocFlags::default(), 138 + //! ), 139 + //! GFP_KERNEL, 140 + //! )?; 141 + //! assert_eq!(buddy.avail(), initial_free - SZ_8M as u64); 142 + //! 143 + //! // Allocate 8MB, this returns 2 blocks from the holes. 144 + //! let fragmented = KBox::pin_init( 145 + //! buddy.alloc_blocks( 146 + //! GpuBuddyAllocMode::Range(0..SZ_16M as u64), 147 + //! SZ_8M as u64, 148 + //! Alignment::new::<SZ_4M>(), 149 + //! GpuBuddyAllocFlags::default(), 150 + //! ), 151 + //! GFP_KERNEL, 152 + //! )?; 153 + //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); 154 + //! 155 + //! let (mut count, mut total) = (0u32, 0u64); 156 + //! for block in fragmented.iter() { 157 + //! assert_eq!(block.size(), SZ_4M as u64); 158 + //! total += block.size(); 159 + //! count += 1; 160 + //! } 161 + //! assert_eq!(total, SZ_8M as u64); 162 + //! assert_eq!(count, 2); 163 + //! # Ok::<(), Error>(()) 164 + //! ``` 165 + //! 166 + //! Contiguous allocation fails when only fragmented space is available: 167 + //! 168 + //! ``` 169 + //! # use kernel::{ 170 + //! # gpu::buddy::{ 171 + //! # GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, 172 + //! # }, 173 + //! # prelude::*, 174 + //! # ptr::Alignment, 175 + //! # sizes::*, // 176 + //! # }; 177 + //! // Create a small 16MB buddy allocator with fragmented memory. 178 + //! let small = GpuBuddy::new(GpuBuddyParams { 179 + //! base_offset: 0, 180 + //! size: SZ_16M as u64, 181 + //! chunk_size: Alignment::new::<SZ_4K>(), 182 + //! })?; 183 + //! 184 + //! let _hole1 = KBox::pin_init( 185 + //! small.alloc_blocks( 186 + //! GpuBuddyAllocMode::Range(0..SZ_4M as u64), 187 + //! SZ_4M as u64, 188 + //! Alignment::new::<SZ_4M>(), 189 + //! GpuBuddyAllocFlags::default(), 190 + //! ), 191 + //! GFP_KERNEL, 192 + //! )?; 193 + //! 194 + //! let _hole2 = KBox::pin_init( 195 + //! small.alloc_blocks( 196 + //! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), 197 + //! SZ_4M as u64, 198 + //! Alignment::new::<SZ_4M>(), 199 + //! GpuBuddyAllocFlags::default(), 200 + //! ), 201 + //! GFP_KERNEL, 202 + //! )?; 203 + //! 204 + //! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist. 205 + //! let result = KBox::pin_init( 206 + //! small.alloc_blocks( 207 + //! GpuBuddyAllocMode::Simple, 208 + //! SZ_8M as u64, 209 + //! Alignment::new::<SZ_4M>(), 210 + //! GpuBuddyAllocFlag::Contiguous, 211 + //! ), 212 + //! GFP_KERNEL, 213 + //! ); 214 + //! assert!(result.is_err()); 215 + //! # Ok::<(), Error>(()) 216 + //! ``` 217 + 218 + use core::ops::Range; 219 + 220 + use crate::{ 221 + bindings, 222 + clist_create, 223 + error::to_result, 224 + interop::list::CListHead, 225 + new_mutex, 226 + prelude::*, 227 + ptr::Alignment, 228 + sync::{ 229 + lock::mutex::MutexGuard, 230 + Arc, 231 + Mutex, // 232 + }, 233 + types::Opaque, // 234 + }; 235 + 236 + /// Allocation mode for the GPU buddy allocator. 237 + /// 238 + /// The mode determines the primary allocation strategy. Modes are mutually 239 + /// exclusive: an allocation is either simple, range-constrained, or top-down. 240 + /// 241 + /// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately 242 + /// via [`GpuBuddyAllocFlags`]. 243 + #[derive(Clone, Debug, PartialEq, Eq)] 244 + pub enum GpuBuddyAllocMode { 245 + /// Simple allocation without constraints. 246 + Simple, 247 + /// Range-based allocation within the given address range. 248 + Range(Range<u64>), 249 + /// Allocate from top of address space downward. 250 + TopDown, 251 + } 252 + 253 + impl GpuBuddyAllocMode { 254 + /// Returns the C flags corresponding to the allocation mode. 255 + fn as_flags(&self) -> usize { 256 + match self { 257 + Self::Simple => 0, 258 + Self::Range(_) => bindings::GPU_BUDDY_RANGE_ALLOCATION, 259 + Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION, 260 + } 261 + } 262 + 263 + /// Extracts the range start/end, defaulting to `(0, 0)` for non-range modes. 264 + fn range(&self) -> (u64, u64) { 265 + match self { 266 + Self::Range(range) => (range.start, range.end), 267 + _ => (0, 0), 268 + } 269 + } 270 + } 271 + 272 + crate::impl_flags!( 273 + /// Modifier flags for GPU buddy allocation. 274 + /// 275 + /// These flags can be combined with any [`GpuBuddyAllocMode`] to control 276 + /// additional allocation behavior. 277 + #[derive(Clone, Copy, Default, PartialEq, Eq)] 278 + pub struct GpuBuddyAllocFlags(usize); 279 + 280 + /// Individual modifier flag for GPU buddy allocation. 281 + #[derive(Clone, Copy, PartialEq, Eq)] 282 + pub enum GpuBuddyAllocFlag { 283 + /// Allocate physically contiguous blocks. 284 + Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION, 285 + 286 + /// Request allocation from cleared (zeroed) memory. 287 + Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION, 288 + 289 + /// Disable trimming of partially used blocks. 290 + TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE, 291 + } 292 + ); 293 + 294 + /// Parameters for creating a GPU buddy allocator. 295 + pub struct GpuBuddyParams { 296 + /// Base offset (in bytes) where the managed memory region starts. 297 + /// Allocations will be offset by this value. 298 + pub base_offset: u64, 299 + /// Total size (in bytes) of the address space managed by the allocator. 300 + pub size: u64, 301 + /// Minimum allocation unit / chunk size; must be >= 4KB. 302 + pub chunk_size: Alignment, 303 + } 304 + 305 + /// Inner structure holding the actual buddy allocator. 306 + /// 307 + /// # Synchronization 308 + /// 309 + /// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`). 310 + /// Internal locking ensures all allocator and free operations are properly 311 + /// synchronized, preventing races between concurrent allocations and the 312 + /// freeing that occurs when [`AllocatedBlocks`] is dropped. 313 + /// 314 + /// # Invariants 315 + /// 316 + /// The inner [`Opaque`] contains an initialized buddy allocator. 317 + #[pin_data(PinnedDrop)] 318 + struct GpuBuddyInner { 319 + #[pin] 320 + inner: Opaque<bindings::gpu_buddy>, 321 + 322 + // TODO: Replace `Mutex<()>` with `Mutex<Opaque<..>>` once `Mutex::new()` 323 + // accepts `impl PinInit<T>`. 324 + #[pin] 325 + lock: Mutex<()>, 326 + /// Cached creation parameters (do not change after init). 327 + params: GpuBuddyParams, 328 + } 329 + 330 + impl GpuBuddyInner { 331 + /// Create a pin-initializer for the buddy allocator. 332 + fn new(params: GpuBuddyParams) -> impl PinInit<Self, Error> { 333 + let size = params.size; 334 + let chunk_size = params.chunk_size; 335 + 336 + // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the 337 + // `gpu_buddy` structure is initialized and ready for use with all 338 + // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed, 339 + // so the invariant holds when construction finishes. 340 + try_pin_init!(Self { 341 + inner <- Opaque::try_ffi_init(|ptr| { 342 + // SAFETY: `ptr` points to valid uninitialized memory from the pin-init 343 + // infrastructure. `gpu_buddy_init` will initialize the structure. 344 + to_result(unsafe { 345 + bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64) 346 + }) 347 + }), 348 + lock <- new_mutex!(()), 349 + params, 350 + }) 351 + } 352 + 353 + /// Lock the mutex and return a guard for accessing the allocator. 354 + fn lock(&self) -> GpuBuddyGuard<'_> { 355 + GpuBuddyGuard { 356 + inner: self, 357 + _guard: self.lock.lock(), 358 + } 359 + } 360 + } 361 + 362 + #[pinned_drop] 363 + impl PinnedDrop for GpuBuddyInner { 364 + fn drop(self: Pin<&mut Self>) { 365 + let guard = self.lock(); 366 + 367 + // SAFETY: Per the type invariant, `inner` contains an initialized 368 + // allocator. `guard` provides exclusive access. 369 + unsafe { bindings::gpu_buddy_fini(guard.as_raw()) }; 370 + } 371 + } 372 + 373 + // SAFETY: `GpuBuddyInner` can be sent between threads. 374 + unsafe impl Send for GpuBuddyInner {} 375 + 376 + // SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock` 377 + // serializes all access to the C allocator, preventing data races. 378 + unsafe impl Sync for GpuBuddyInner {} 379 + 380 + /// Guard that proves the lock is held, enabling access to the allocator. 381 + /// 382 + /// The `_guard` holds the lock for the duration of this guard's lifetime. 383 + struct GpuBuddyGuard<'a> { 384 + inner: &'a GpuBuddyInner, 385 + _guard: MutexGuard<'a, ()>, 386 + } 387 + 388 + impl GpuBuddyGuard<'_> { 389 + /// Get a raw pointer to the underlying C `gpu_buddy` structure. 390 + fn as_raw(&self) -> *mut bindings::gpu_buddy { 391 + self.inner.inner.get() 392 + } 393 + } 394 + 395 + /// GPU buddy allocator instance. 396 + /// 397 + /// This structure wraps the C `gpu_buddy` allocator using reference counting. 398 + /// The allocator is automatically cleaned up when all references are dropped. 399 + /// 400 + /// Refer to the module-level documentation for usage examples. 401 + pub struct GpuBuddy(Arc<GpuBuddyInner>); 402 + 403 + impl GpuBuddy { 404 + /// Create a new buddy allocator. 405 + /// 406 + /// The allocator manages a contiguous address space of the given size, with the 407 + /// specified minimum allocation unit (chunk_size must be at least 4KB). 408 + pub fn new(params: GpuBuddyParams) -> Result<Self> { 409 + Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self) 410 + } 411 + 412 + /// Get the base offset for allocations. 413 + pub fn base_offset(&self) -> u64 { 414 + self.0.params.base_offset 415 + } 416 + 417 + /// Get the chunk size (minimum allocation unit). 418 + pub fn chunk_size(&self) -> Alignment { 419 + self.0.params.chunk_size 420 + } 421 + 422 + /// Get the total managed size. 423 + pub fn size(&self) -> u64 { 424 + self.0.params.size 425 + } 426 + 427 + /// Get the available (free) memory in bytes. 428 + pub fn avail(&self) -> u64 { 429 + let guard = self.0.lock(); 430 + 431 + // SAFETY: Per the type invariant, `inner` contains an initialized allocator. 432 + // `guard` provides exclusive access. 433 + unsafe { (*guard.as_raw()).avail } 434 + } 435 + 436 + /// Allocate blocks from the buddy allocator. 437 + /// 438 + /// Returns a pin-initializer for [`AllocatedBlocks`]. 439 + pub fn alloc_blocks( 440 + &self, 441 + mode: GpuBuddyAllocMode, 442 + size: u64, 443 + min_block_size: Alignment, 444 + flags: impl Into<GpuBuddyAllocFlags>, 445 + ) -> impl PinInit<AllocatedBlocks, Error> { 446 + let buddy_arc = Arc::clone(&self.0); 447 + let (start, end) = mode.range(); 448 + let mode_flags = mode.as_flags(); 449 + let modifier_flags = flags.into(); 450 + 451 + // Create pin-initializer that initializes list and allocates blocks. 452 + try_pin_init!(AllocatedBlocks { 453 + buddy: buddy_arc, 454 + list <- CListHead::new(), 455 + _: { 456 + // Reject zero-sized or inverted ranges. 457 + if let GpuBuddyAllocMode::Range(range) = &mode { 458 + if range.is_empty() { 459 + Err::<(), Error>(EINVAL)?; 460 + } 461 + } 462 + 463 + // Lock while allocating to serialize with concurrent frees. 464 + let guard = buddy.lock(); 465 + 466 + // SAFETY: Per the type invariant, `inner` contains an initialized 467 + // allocator. `guard` provides exclusive access. 468 + to_result(unsafe { 469 + bindings::gpu_buddy_alloc_blocks( 470 + guard.as_raw(), 471 + start, 472 + end, 473 + size, 474 + min_block_size.as_usize() as u64, 475 + list.as_raw(), 476 + mode_flags | usize::from(modifier_flags), 477 + ) 478 + })? 479 + } 480 + }) 481 + } 482 + } 483 + 484 + /// Allocated blocks from the buddy allocator with automatic cleanup. 485 + /// 486 + /// This structure owns a list of allocated blocks and ensures they are 487 + /// automatically freed when dropped. Use `iter()` to iterate over all 488 + /// allocated blocks. 489 + /// 490 + /// # Invariants 491 + /// 492 + /// - `list` is an initialized, valid list head containing allocated blocks. 493 + #[pin_data(PinnedDrop)] 494 + pub struct AllocatedBlocks { 495 + #[pin] 496 + list: CListHead, 497 + buddy: Arc<GpuBuddyInner>, 498 + } 499 + 500 + impl AllocatedBlocks { 501 + /// Check if the block list is empty. 502 + pub fn is_empty(&self) -> bool { 503 + // An empty list head points to itself. 504 + !self.list.is_linked() 505 + } 506 + 507 + /// Iterate over allocated blocks. 508 + /// 509 + /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`] 510 + /// borrows `self` and is only valid for the duration of that borrow. 511 + pub fn iter(&self) -> impl Iterator<Item = AllocatedBlock<'_>> + '_ { 512 + let head = self.list.as_raw(); 513 + // SAFETY: Per the type invariant, `list` is an initialized sentinel `list_head` 514 + // and is not concurrently modified (we hold a `&self` borrow). The list contains 515 + // `gpu_buddy_block` items linked via `__bindgen_anon_1.link`. `Block` is 516 + // `#[repr(transparent)]` over `gpu_buddy_block`. 517 + let clist = unsafe { 518 + clist_create!( 519 + head, 520 + Block, 521 + bindings::gpu_buddy_block, 522 + __bindgen_anon_1.link 523 + ) 524 + }; 525 + 526 + clist 527 + .iter() 528 + .map(|this| AllocatedBlock { this, blocks: self }) 529 + } 530 + } 531 + 532 + #[pinned_drop] 533 + impl PinnedDrop for AllocatedBlocks { 534 + fn drop(self: Pin<&mut Self>) { 535 + let guard = self.buddy.lock(); 536 + 537 + // SAFETY: 538 + // - list is valid per the type's invariants. 539 + // - guard provides exclusive access to the allocator. 540 + unsafe { 541 + bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0); 542 + } 543 + } 544 + } 545 + 546 + /// A GPU buddy block. 547 + /// 548 + /// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned 549 + /// as references during iteration over [`AllocatedBlocks`]. 550 + /// 551 + /// # Invariants 552 + /// 553 + /// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`. 554 + #[repr(transparent)] 555 + struct Block(Opaque<bindings::gpu_buddy_block>); 556 + 557 + impl Block { 558 + /// Get a raw pointer to the underlying C block. 559 + fn as_raw(&self) -> *mut bindings::gpu_buddy_block { 560 + self.0.get() 561 + } 562 + 563 + /// Get the block's raw offset in the buddy address space (without base offset). 564 + fn offset(&self) -> u64 { 565 + // SAFETY: `self.as_raw()` is valid per the type's invariants. 566 + unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) } 567 + } 568 + 569 + /// Get the block order. 570 + fn order(&self) -> u32 { 571 + // SAFETY: `self.as_raw()` is valid per the type's invariants. 572 + unsafe { bindings::gpu_buddy_block_order(self.as_raw()) } 573 + } 574 + } 575 + 576 + // SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be 577 + // sent across threads safely. 578 + unsafe impl Send for Block {} 579 + 580 + // SAFETY: `Block` is only accessed through shared references after 581 + // allocation, and thus safe to access concurrently across threads. 582 + unsafe impl Sync for Block {} 583 + 584 + /// A buddy block paired with its owning [`AllocatedBlocks`] context. 585 + /// 586 + /// Unlike a raw block, which only knows its offset within the buddy address 587 + /// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset` 588 + /// and `chunk_size`, enabling it to compute absolute offsets and byte sizes. 589 + /// 590 + /// Returned by [`AllocatedBlocks::iter()`]. 591 + pub struct AllocatedBlock<'a> { 592 + this: &'a Block, 593 + blocks: &'a AllocatedBlocks, 594 + } 595 + 596 + impl AllocatedBlock<'_> { 597 + /// Get the block's offset in the address space. 598 + /// 599 + /// Returns the absolute offset including the allocator's base offset. 600 + /// This is the actual address to use for accessing the allocated memory. 601 + pub fn offset(&self) -> u64 { 602 + self.blocks.buddy.params.base_offset + self.this.offset() 603 + } 604 + 605 + /// Get the block order (size = chunk_size << order). 606 + pub fn order(&self) -> u32 { 607 + self.this.order() 608 + } 609 + 610 + /// Get the block's size in bytes. 611 + pub fn size(&self) -> u64 { 612 + (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order() 613 + } 614 + }

+9

rust/kernel/interop.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Infrastructure for interfacing Rust code with C kernel subsystems. 4 + //! 5 + //! This module is intended for low-level, unsafe Rust infrastructure code 6 + //! that interoperates between Rust and C. It is *not* for use directly in 7 + //! Rust drivers. 8 + 9 + pub mod list;

+339

rust/kernel/interop/list.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Rust interface for C doubly circular intrusive linked lists. 4 + //! 5 + //! This module provides Rust abstractions for iterating over C `list_head`-based 6 + //! linked lists. It should only be used for cases where C and Rust code share 7 + //! direct access to the same linked list through a C interop interface. 8 + //! 9 + //! Note: This *must not* be used by Rust components that just need a linked list 10 + //! primitive. Use [`kernel::list::List`] instead. 11 + //! 12 + //! # Examples 13 + //! 14 + //! ``` 15 + //! use kernel::{ 16 + //! bindings, 17 + //! interop::list::clist_create, 18 + //! types::Opaque, 19 + //! }; 20 + //! # // Create test list with values (0, 10, 20) - normally done by C code but it is 21 + //! # // emulated here for doctests using the C bindings. 22 + //! # use core::mem::MaybeUninit; 23 + //! # 24 + //! # /// C struct with embedded `list_head` (typically will be allocated by C code). 25 + //! # #[repr(C)] 26 + //! # pub struct SampleItemC { 27 + //! # pub value: i32, 28 + //! # pub link: bindings::list_head, 29 + //! # } 30 + //! # 31 + //! # let mut head = MaybeUninit::<bindings::list_head>::uninit(); 32 + //! # 33 + //! # let head = head.as_mut_ptr(); 34 + //! # // SAFETY: `head` and all the items are test objects allocated in this scope. 35 + //! # unsafe { bindings::INIT_LIST_HEAD(head) }; 36 + //! # 37 + //! # let mut items = [ 38 + //! # MaybeUninit::<SampleItemC>::uninit(), 39 + //! # MaybeUninit::<SampleItemC>::uninit(), 40 + //! # MaybeUninit::<SampleItemC>::uninit(), 41 + //! # ]; 42 + //! # 43 + //! # for (i, item) in items.iter_mut().enumerate() { 44 + //! # let ptr = item.as_mut_ptr(); 45 + //! # // SAFETY: `ptr` points to a valid `MaybeUninit<SampleItemC>`. 46 + //! # unsafe { (*ptr).value = i as i32 * 10 }; 47 + //! # // SAFETY: `&raw mut` creates a pointer valid for `INIT_LIST_HEAD`. 48 + //! # unsafe { bindings::INIT_LIST_HEAD(&raw mut (*ptr).link) }; 49 + //! # // SAFETY: `link` was just initialized and `head` is a valid list head. 50 + //! # unsafe { bindings::list_add_tail(&mut (*ptr).link, head) }; 51 + //! # } 52 + //! 53 + //! /// Rust wrapper for the C struct. 54 + //! /// 55 + //! /// The list item struct in this example is defined in C code as: 56 + //! /// 57 + //! /// ```c 58 + //! /// struct SampleItemC { 59 + //! /// int value; 60 + //! /// struct list_head link; 61 + //! /// }; 62 + //! /// ``` 63 + //! #[repr(transparent)] 64 + //! pub struct Item(Opaque<SampleItemC>); 65 + //! 66 + //! impl Item { 67 + //! pub fn value(&self) -> i32 { 68 + //! // SAFETY: `Item` has the same layout as `SampleItemC`. 69 + //! unsafe { (*self.0.get()).value } 70 + //! } 71 + //! } 72 + //! 73 + //! // Create typed [`CList`] from sentinel head. 74 + //! // SAFETY: `head` is valid and initialized, items are `SampleItemC` with 75 + //! // embedded `link` field, and `Item` is `#[repr(transparent)]` over `SampleItemC`. 76 + //! let list = unsafe { clist_create!(head, Item, SampleItemC, link) }; 77 + //! 78 + //! // Iterate directly over typed items. 79 + //! let mut found_0 = false; 80 + //! let mut found_10 = false; 81 + //! let mut found_20 = false; 82 + //! 83 + //! for item in list.iter() { 84 + //! let val = item.value(); 85 + //! if val == 0 { found_0 = true; } 86 + //! if val == 10 { found_10 = true; } 87 + //! if val == 20 { found_20 = true; } 88 + //! } 89 + //! 90 + //! assert!(found_0 && found_10 && found_20); 91 + //! ``` 92 + 93 + use core::{ 94 + iter::FusedIterator, 95 + marker::PhantomData, // 96 + }; 97 + 98 + use crate::{ 99 + bindings, 100 + types::Opaque, // 101 + }; 102 + 103 + use pin_init::{ 104 + pin_data, 105 + pin_init, 106 + PinInit, // 107 + }; 108 + 109 + /// FFI wrapper for a C `list_head` object used in intrusive linked lists. 110 + /// 111 + /// # Invariants 112 + /// 113 + /// - The underlying `list_head` is initialized with valid non-`NULL` `next`/`prev` pointers. 114 + #[pin_data] 115 + #[repr(transparent)] 116 + pub struct CListHead { 117 + #[pin] 118 + inner: Opaque<bindings::list_head>, 119 + } 120 + 121 + impl CListHead { 122 + /// Create a `&CListHead` reference from a raw `list_head` pointer. 123 + /// 124 + /// # Safety 125 + /// 126 + /// - `ptr` must be a valid pointer to an initialized `list_head` (e.g. via 127 + /// `INIT_LIST_HEAD()`), with valid non-`NULL` `next`/`prev` pointers. 128 + /// - `ptr` must remain valid for the lifetime `'a`. 129 + /// - The list and all linked `list_head` nodes must not be modified from 130 + /// anywhere for the lifetime `'a`, unless done so via any [`CListHead`] APIs. 131 + #[inline] 132 + pub unsafe fn from_raw<'a>(ptr: *mut bindings::list_head) -> &'a Self { 133 + // SAFETY: 134 + // - `CListHead` has the same layout as `list_head`. 135 + // - `ptr` is valid and unmodified for `'a` per caller guarantees. 136 + unsafe { &*ptr.cast() } 137 + } 138 + 139 + /// Get the raw `list_head` pointer. 140 + #[inline] 141 + pub fn as_raw(&self) -> *mut bindings::list_head { 142 + self.inner.get() 143 + } 144 + 145 + /// Get the next [`CListHead`] in the list. 146 + #[inline] 147 + pub fn next(&self) -> &Self { 148 + let raw = self.as_raw(); 149 + // SAFETY: 150 + // - `self.as_raw()` is valid and initialized per type invariants. 151 + // - The `next` pointer is valid and non-`NULL` per type invariants 152 + // (initialized via `INIT_LIST_HEAD()` or equivalent). 153 + unsafe { Self::from_raw((*raw).next) } 154 + } 155 + 156 + /// Check if this node is linked in a list (not isolated). 157 + #[inline] 158 + pub fn is_linked(&self) -> bool { 159 + let raw = self.as_raw(); 160 + // SAFETY: `self.as_raw()` is valid per type invariants. 161 + unsafe { (*raw).next != raw && (*raw).prev != raw } 162 + } 163 + 164 + /// Returns a pin-initializer for the list head. 165 + pub fn new() -> impl PinInit<Self> { 166 + pin_init!(Self { 167 + // SAFETY: `INIT_LIST_HEAD` initializes `slot` to a valid empty list. 168 + inner <- Opaque::ffi_init(|slot| unsafe { bindings::INIT_LIST_HEAD(slot) }), 169 + }) 170 + } 171 + } 172 + 173 + // SAFETY: `list_head` contains no thread-bound state; it only holds 174 + // `next`/`prev` pointers. 175 + unsafe impl Send for CListHead {} 176 + 177 + // SAFETY: `CListHead` can be shared among threads as modifications are 178 + // not allowed at the moment. 179 + unsafe impl Sync for CListHead {} 180 + 181 + impl PartialEq for CListHead { 182 + #[inline] 183 + fn eq(&self, other: &Self) -> bool { 184 + core::ptr::eq(self, other) 185 + } 186 + } 187 + 188 + impl Eq for CListHead {} 189 + 190 + /// Low-level iterator over `list_head` nodes. 191 + /// 192 + /// An iterator used to iterate over a C intrusive linked list (`list_head`). The caller has to 193 + /// perform conversion of returned [`CListHead`] to an item (using [`container_of`] or similar). 194 + /// 195 + /// # Invariants 196 + /// 197 + /// `current` and `sentinel` are valid references into an initialized linked list. 198 + struct CListHeadIter<'a> { 199 + /// Current position in the list. 200 + current: &'a CListHead, 201 + /// The sentinel head (used to detect end of iteration). 202 + sentinel: &'a CListHead, 203 + } 204 + 205 + impl<'a> Iterator for CListHeadIter<'a> { 206 + type Item = &'a CListHead; 207 + 208 + #[inline] 209 + fn next(&mut self) -> Option<Self::Item> { 210 + // Check if we've reached the sentinel (end of list). 211 + if self.current == self.sentinel { 212 + return None; 213 + } 214 + 215 + let item = self.current; 216 + self.current = item.next(); 217 + Some(item) 218 + } 219 + } 220 + 221 + impl<'a> FusedIterator for CListHeadIter<'a> {} 222 + 223 + /// A typed C linked list with a sentinel head intended for FFI use-cases where 224 + /// a C subsystem manages a linked list that Rust code needs to read. Generally 225 + /// required only for special cases. 226 + /// 227 + /// A sentinel head [`CListHead`] represents the entire linked list and can be used 228 + /// for iteration over items of type `T`; it is not associated with a specific item. 229 + /// 230 + /// The const generic `OFFSET` specifies the byte offset of the `list_head` field within 231 + /// the struct that `T` wraps. 232 + /// 233 + /// # Invariants 234 + /// 235 + /// - The sentinel [`CListHead`] has valid non-`NULL` `next`/`prev` pointers. 236 + /// - `OFFSET` is the byte offset of the `list_head` field within the struct that `T` wraps. 237 + /// - All the list's `list_head` nodes have valid non-`NULL` `next`/`prev` pointers. 238 + #[repr(transparent)] 239 + pub struct CList<T, const OFFSET: usize>(CListHead, PhantomData<T>); 240 + 241 + impl<T, const OFFSET: usize> CList<T, OFFSET> { 242 + /// Create a typed [`CList`] reference from a raw sentinel `list_head` pointer. 243 + /// 244 + /// # Safety 245 + /// 246 + /// - `ptr` must be a valid pointer to an initialized sentinel `list_head` (e.g. via 247 + /// `INIT_LIST_HEAD()`), with valid non-`NULL` `next`/`prev` pointers. 248 + /// - `ptr` must remain valid for the lifetime `'a`. 249 + /// - The list and all linked nodes must not be concurrently modified for the lifetime `'a`. 250 + /// - The list must contain items where the `list_head` field is at byte offset `OFFSET`. 251 + /// - `T` must be `#[repr(transparent)]` over the C struct. 252 + #[inline] 253 + pub unsafe fn from_raw<'a>(ptr: *mut bindings::list_head) -> &'a Self { 254 + // SAFETY: 255 + // - `CList` has the same layout as `CListHead` due to `#[repr(transparent)]`. 256 + // - Caller guarantees `ptr` is a valid, sentinel `list_head` object. 257 + unsafe { &*ptr.cast() } 258 + } 259 + 260 + /// Check if the list is empty. 261 + #[inline] 262 + pub fn is_empty(&self) -> bool { 263 + !self.0.is_linked() 264 + } 265 + 266 + /// Create an iterator over typed items. 267 + #[inline] 268 + pub fn iter(&self) -> CListIter<'_, T, OFFSET> { 269 + let head = &self.0; 270 + CListIter { 271 + head_iter: CListHeadIter { 272 + current: head.next(), 273 + sentinel: head, 274 + }, 275 + _phantom: PhantomData, 276 + } 277 + } 278 + } 279 + 280 + /// High-level iterator over typed list items. 281 + pub struct CListIter<'a, T, const OFFSET: usize> { 282 + head_iter: CListHeadIter<'a>, 283 + _phantom: PhantomData<&'a T>, 284 + } 285 + 286 + impl<'a, T, const OFFSET: usize> Iterator for CListIter<'a, T, OFFSET> { 287 + type Item = &'a T; 288 + 289 + #[inline] 290 + fn next(&mut self) -> Option<Self::Item> { 291 + let head = self.head_iter.next()?; 292 + 293 + // Convert to item using `OFFSET`. 294 + // 295 + // SAFETY: The pointer calculation is valid because `OFFSET` is derived 296 + // from `offset_of!` per type invariants. 297 + Some(unsafe { &*head.as_raw().byte_sub(OFFSET).cast::<T>() }) 298 + } 299 + } 300 + 301 + impl<'a, T, const OFFSET: usize> FusedIterator for CListIter<'a, T, OFFSET> {} 302 + 303 + /// Create a C doubly-circular linked list interface [`CList`] from a raw `list_head` pointer. 304 + /// 305 + /// This macro creates a `CList<T, OFFSET>` that can iterate over items of type `$rust_type` 306 + /// linked via the `$field` field in the underlying C struct `$c_type`. 307 + /// 308 + /// # Arguments 309 + /// 310 + /// - `$head`: Raw pointer to the sentinel `list_head` object (`*mut bindings::list_head`). 311 + /// - `$rust_type`: Each item's Rust wrapper type. 312 + /// - `$c_type`: Each item's C struct type that contains the embedded `list_head`. 313 + /// - `$field`: The name of the `list_head` field within the C struct. 314 + /// 315 + /// # Safety 316 + /// 317 + /// The caller must ensure: 318 + /// 319 + /// - `$head` is a valid, initialized sentinel `list_head` (e.g. via `INIT_LIST_HEAD()`) 320 + /// pointing to a list that is not concurrently modified for the lifetime of the [`CList`]. 321 + /// - The list contains items of type `$c_type` linked via an embedded `$field`. 322 + /// - `$rust_type` is `#[repr(transparent)]` over `$c_type` or has compatible layout. 323 + /// 324 + /// # Examples 325 + /// 326 + /// Refer to the examples in the [`crate::interop::list`] module documentation. 327 + #[macro_export] 328 + macro_rules! clist_create { 329 + ($head:expr, $rust_type:ty, $c_type:ty, $($field:tt).+) => {{ 330 + // Compile-time check that field path is a `list_head`. 331 + let _: fn(*const $c_type) -> *const $crate::bindings::list_head = 332 + |p| &raw const (*p).$($field).+; 333 + 334 + // Calculate offset and create `CList`. 335 + const OFFSET: usize = ::core::mem::offset_of!($c_type, $($field).+); 336 + $crate::interop::list::CList::<$rust_type, OFFSET>::from_raw($head) 337 + }}; 338 + } 339 + pub use clist_create;

+491 -291

rust/kernel/io.rs

··· 11 11 12 12 pub mod mem; 13 13 pub mod poll; 14 + pub mod register; 14 15 pub mod resource; 15 16 17 + pub use crate::register; 16 18 pub use resource::Resource; 19 + 20 + use register::LocatedRegister; 17 21 18 22 /// Physical address type. 19 23 /// ··· 141 137 #[repr(transparent)] 142 138 pub struct Mmio<const SIZE: usize = 0>(MmioRaw<SIZE>); 143 139 144 - /// Internal helper macros used to invoke C MMIO read functions. 145 - /// 146 - /// This macro is intended to be used by higher-level MMIO access macros (io_define_read) and 147 - /// provides a unified expansion for infallible vs. fallible read semantics. It emits a direct call 148 - /// into the corresponding C helper and performs the required cast to the Rust return type. 149 - /// 150 - /// # Parameters 151 - /// 152 - /// * `$c_fn` – The C function performing the MMIO read. 153 - /// * `$self` – The I/O backend object. 154 - /// * `$ty` – The type of the value to be read. 155 - /// * `$addr` – The MMIO address to read. 156 - /// 157 - /// This macro does not perform any validation; all invariants must be upheld by the higher-level 158 - /// abstraction invoking it. 159 - macro_rules! call_mmio_read { 160 - (infallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => { 161 - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. 162 - unsafe { bindings::$c_fn($addr as *const c_void) as $type } 163 - }; 164 - 165 - (fallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => {{ 166 - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. 167 - Ok(unsafe { bindings::$c_fn($addr as *const c_void) as $type }) 168 - }}; 169 - } 170 - 171 - /// Internal helper macros used to invoke C MMIO write functions. 172 - /// 173 - /// This macro is intended to be used by higher-level MMIO access macros (io_define_write) and 174 - /// provides a unified expansion for infallible vs. fallible write semantics. It emits a direct call 175 - /// into the corresponding C helper and performs the required cast to the Rust return type. 176 - /// 177 - /// # Parameters 178 - /// 179 - /// * `$c_fn` – The C function performing the MMIO write. 180 - /// * `$self` – The I/O backend object. 181 - /// * `$ty` – The type of the written value. 182 - /// * `$addr` – The MMIO address to write. 183 - /// * `$value` – The value to write. 184 - /// 185 - /// This macro does not perform any validation; all invariants must be upheld by the higher-level 186 - /// abstraction invoking it. 187 - macro_rules! call_mmio_write { 188 - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => { 189 - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. 190 - unsafe { bindings::$c_fn($value, $addr as *mut c_void) } 191 - }; 192 - 193 - (fallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => {{ 194 - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. 195 - unsafe { bindings::$c_fn($value, $addr as *mut c_void) }; 196 - Ok(()) 197 - }}; 198 - } 199 - 200 - /// Generates an accessor method for reading from an I/O backend. 201 - /// 202 - /// This macro reduces boilerplate by automatically generating either compile-time bounds-checked 203 - /// (infallible) or runtime bounds-checked (fallible) read methods. It abstracts the address 204 - /// calculation and bounds checking, and delegates the actual I/O read operation to a specified 205 - /// helper macro, making it generic over different I/O backends. 206 - /// 207 - /// # Parameters 208 - /// 209 - /// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on 210 - /// `IoKnownSize` for compile-time checks and returns the value directly. `fallible` performs 211 - /// runtime checks against `maxsize()` and returns a `Result<T>`. 212 - /// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., 213 - /// `#[cfg(CONFIG_64BIT)]` or inline directives). 214 - /// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). 215 - /// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `read32`, 216 - /// `try_read8`). 217 - /// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call 218 - /// (e.g., `call_mmio_read`). 219 - /// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the 220 - /// `$call_macro`. 221 - /// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`). 222 - #[macro_export] 223 - macro_rules! io_define_read { 224 - (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) -> 225 - $type_name:ty) => { 226 - /// Read IO data from a given offset known at compile time. 227 - /// 228 - /// Bound checks are performed on compile time, hence if the offset is not known at compile 229 - /// time, the build will fail. 230 - $(#[$attr])* 231 - // Always inline to optimize out error path of `io_addr_assert`. 232 - #[inline(always)] 233 - $vis fn $name(&self, offset: usize) -> $type_name { 234 - let addr = self.io_addr_assert::<$type_name>(offset); 235 - 236 - // SAFETY: By the type invariant `addr` is a valid address for IO operations. 237 - $call_macro!(infallible, $c_fn, self, $type_name, addr) 238 - } 239 - }; 240 - 241 - (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) -> 242 - $type_name:ty) => { 243 - /// Read IO data from a given offset. 244 - /// 245 - /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is 246 - /// out of bounds. 247 - $(#[$attr])* 248 - $vis fn $try_name(&self, offset: usize) -> Result<$type_name> { 249 - let addr = self.io_addr::<$type_name>(offset)?; 250 - 251 - // SAFETY: By the type invariant `addr` is a valid address for IO operations. 252 - $call_macro!(fallible, $c_fn, self, $type_name, addr) 253 - } 254 - }; 255 - } 256 - pub use io_define_read; 257 - 258 - /// Generates an accessor method for writing to an I/O backend. 259 - /// 260 - /// This macro reduces boilerplate by automatically generating either compile-time bounds-checked 261 - /// (infallible) or runtime bounds-checked (fallible) write methods. It abstracts the address 262 - /// calculation and bounds checking, and delegates the actual I/O write operation to a specified 263 - /// helper macro, making it generic over different I/O backends. 264 - /// 265 - /// # Parameters 266 - /// 267 - /// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on 268 - /// `IoKnownSize` for compile-time checks and returns `()`. `fallible` performs runtime checks 269 - /// against `maxsize()` and returns a `Result`. 270 - /// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., 271 - /// `#[cfg(CONFIG_64BIT)]` or inline directives). 272 - /// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). 273 - /// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `write32`, 274 - /// `try_write8`). 275 - /// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call 276 - /// (e.g., `call_mmio_write`). 277 - /// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the 278 - /// `$call_macro`. 279 - /// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use 280 - /// of `<-` before the type to denote a write operation. 281 - #[macro_export] 282 - macro_rules! io_define_write { 283 - (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <- 284 - $type_name:ty) => { 285 - /// Write IO data from a given offset known at compile time. 286 - /// 287 - /// Bound checks are performed on compile time, hence if the offset is not known at compile 288 - /// time, the build will fail. 289 - $(#[$attr])* 290 - // Always inline to optimize out error path of `io_addr_assert`. 291 - #[inline(always)] 292 - $vis fn $name(&self, value: $type_name, offset: usize) { 293 - let addr = self.io_addr_assert::<$type_name>(offset); 294 - 295 - $call_macro!(infallible, $c_fn, self, $type_name, addr, value); 296 - } 297 - }; 298 - 299 - (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) <- 300 - $type_name:ty) => { 301 - /// Write IO data from a given offset. 302 - /// 303 - /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is 304 - /// out of bounds. 305 - $(#[$attr])* 306 - $vis fn $try_name(&self, value: $type_name, offset: usize) -> Result { 307 - let addr = self.io_addr::<$type_name>(offset)?; 308 - 309 - $call_macro!(fallible, $c_fn, self, $type_name, addr, value) 310 - } 311 - }; 312 - } 313 - pub use io_define_write; 314 - 315 140 /// Checks whether an access of type `U` at the given `offset` 316 141 /// is valid within this region. 317 142 #[inline] ··· 153 320 } 154 321 } 155 322 156 - /// Marker trait indicating that an I/O backend supports operations of a certain type. 323 + /// Trait indicating that an I/O backend supports operations of a certain type and providing an 324 + /// implementation for these operations. 157 325 /// 158 326 /// Different I/O backends can implement this trait to expose only the operations they support. 159 327 /// 160 328 /// For example, a PCI configuration space may implement `IoCapable<u8>`, `IoCapable<u16>`, 161 329 /// and `IoCapable<u32>`, but not `IoCapable<u64>`, while an MMIO region on a 64-bit 162 330 /// system might implement all four. 163 - pub trait IoCapable<T> {} 331 + pub trait IoCapable<T> { 332 + /// Performs an I/O read of type `T` at `address` and returns the result. 333 + /// 334 + /// # Safety 335 + /// 336 + /// The range `[address..address + size_of::<T>()]` must be within the bounds of `Self`. 337 + unsafe fn io_read(&self, address: usize) -> T; 338 + 339 + /// Performs an I/O write of `value` at `address`. 340 + /// 341 + /// # Safety 342 + /// 343 + /// The range `[address..address + size_of::<T>()]` must be within the bounds of `Self`. 344 + unsafe fn io_write(&self, value: T, address: usize); 345 + } 346 + 347 + /// Describes a given I/O location: its offset, width, and type to convert the raw value from and 348 + /// into. 349 + /// 350 + /// This trait is the key abstraction allowing [`Io::read`], [`Io::write`], and [`Io::update`] (and 351 + /// their fallible [`try_read`](Io::try_read), [`try_write`](Io::try_write) and 352 + /// [`try_update`](Io::try_update) counterparts) to work uniformly with both raw [`usize`] offsets 353 + /// (for primitive types like [`u32`]) and typed ones (like those generated by the [`register!`] 354 + /// macro). 355 + /// 356 + /// An `IoLoc<T>` carries three pieces of information: 357 + /// 358 + /// - The offset to access (returned by [`IoLoc::offset`]), 359 + /// - The width of the access (determined by [`IoLoc::IoType`]), 360 + /// - The type `T` in which the raw data is returned or provided. 361 + /// 362 + /// `T` and `IoLoc::IoType` may differ: for instance, a typed register has `T` = the register type 363 + /// with its bitfields, and `IoType` = its backing primitive (e.g. `u32`). 364 + pub trait IoLoc<T> { 365 + /// Size ([`u8`], [`u16`], etc) of the I/O performed on the returned [`offset`](IoLoc::offset). 366 + type IoType: Into<T> + From<T>; 367 + 368 + /// Consumes `self` and returns the offset of this location. 369 + fn offset(self) -> usize; 370 + } 371 + 372 + /// Implements [`IoLoc<$ty>`] for [`usize`], allowing [`usize`] to be used as a parameter of 373 + /// [`Io::read`] and [`Io::write`]. 374 + macro_rules! impl_usize_ioloc { 375 + ($($ty:ty),*) => { 376 + $( 377 + impl IoLoc<$ty> for usize { 378 + type IoType = $ty; 379 + 380 + #[inline(always)] 381 + fn offset(self) -> usize { 382 + self 383 + } 384 + } 385 + )* 386 + } 387 + } 388 + 389 + // Provide the ability to read any primitive type from a [`usize`]. 390 + impl_usize_ioloc!(u8, u16, u32, u64); 164 391 165 392 /// Types implementing this trait (e.g. MMIO BARs or PCI config regions) 166 393 /// can perform I/O operations on regions of memory. ··· 262 369 263 370 /// Fallible 8-bit read with runtime bounds check. 264 371 #[inline(always)] 265 - fn try_read8(&self, _offset: usize) -> Result<u8> 372 + fn try_read8(&self, offset: usize) -> Result<u8> 266 373 where 267 374 Self: IoCapable<u8>, 268 375 { 269 - build_error!("Backend does not support fallible 8-bit read") 376 + self.try_read(offset) 270 377 } 271 378 272 379 /// Fallible 16-bit read with runtime bounds check. 273 380 #[inline(always)] 274 - fn try_read16(&self, _offset: usize) -> Result<u16> 381 + fn try_read16(&self, offset: usize) -> Result<u16> 275 382 where 276 383 Self: IoCapable<u16>, 277 384 { 278 - build_error!("Backend does not support fallible 16-bit read") 385 + self.try_read(offset) 279 386 } 280 387 281 388 /// Fallible 32-bit read with runtime bounds check. 282 389 #[inline(always)] 283 - fn try_read32(&self, _offset: usize) -> Result<u32> 390 + fn try_read32(&self, offset: usize) -> Result<u32> 284 391 where 285 392 Self: IoCapable<u32>, 286 393 { 287 - build_error!("Backend does not support fallible 32-bit read") 394 + self.try_read(offset) 288 395 } 289 396 290 397 /// Fallible 64-bit read with runtime bounds check. 291 398 #[inline(always)] 292 - fn try_read64(&self, _offset: usize) -> Result<u64> 399 + fn try_read64(&self, offset: usize) -> Result<u64> 293 400 where 294 401 Self: IoCapable<u64>, 295 402 { 296 - build_error!("Backend does not support fallible 64-bit read") 403 + self.try_read(offset) 297 404 } 298 405 299 406 /// Fallible 8-bit write with runtime bounds check. 300 407 #[inline(always)] 301 - fn try_write8(&self, _value: u8, _offset: usize) -> Result 408 + fn try_write8(&self, value: u8, offset: usize) -> Result 302 409 where 303 410 Self: IoCapable<u8>, 304 411 { 305 - build_error!("Backend does not support fallible 8-bit write") 412 + self.try_write(offset, value) 306 413 } 307 414 308 415 /// Fallible 16-bit write with runtime bounds check. 309 416 #[inline(always)] 310 - fn try_write16(&self, _value: u16, _offset: usize) -> Result 417 + fn try_write16(&self, value: u16, offset: usize) -> Result 311 418 where 312 419 Self: IoCapable<u16>, 313 420 { 314 - build_error!("Backend does not support fallible 16-bit write") 421 + self.try_write(offset, value) 315 422 } 316 423 317 424 /// Fallible 32-bit write with runtime bounds check. 318 425 #[inline(always)] 319 - fn try_write32(&self, _value: u32, _offset: usize) -> Result 426 + fn try_write32(&self, value: u32, offset: usize) -> Result 320 427 where 321 428 Self: IoCapable<u32>, 322 429 { 323 - build_error!("Backend does not support fallible 32-bit write") 430 + self.try_write(offset, value) 324 431 } 325 432 326 433 /// Fallible 64-bit write with runtime bounds check. 327 434 #[inline(always)] 328 - fn try_write64(&self, _value: u64, _offset: usize) -> Result 435 + fn try_write64(&self, value: u64, offset: usize) -> Result 329 436 where 330 437 Self: IoCapable<u64>, 331 438 { 332 - build_error!("Backend does not support fallible 64-bit write") 439 + self.try_write(offset, value) 333 440 } 334 441 335 442 /// Infallible 8-bit read with compile-time bounds check. 336 443 #[inline(always)] 337 - fn read8(&self, _offset: usize) -> u8 444 + fn read8(&self, offset: usize) -> u8 338 445 where 339 446 Self: IoKnownSize + IoCapable<u8>, 340 447 { 341 - build_error!("Backend does not support infallible 8-bit read") 448 + self.read(offset) 342 449 } 343 450 344 451 /// Infallible 16-bit read with compile-time bounds check. 345 452 #[inline(always)] 346 - fn read16(&self, _offset: usize) -> u16 453 + fn read16(&self, offset: usize) -> u16 347 454 where 348 455 Self: IoKnownSize + IoCapable<u16>, 349 456 { 350 - build_error!("Backend does not support infallible 16-bit read") 457 + self.read(offset) 351 458 } 352 459 353 460 /// Infallible 32-bit read with compile-time bounds check. 354 461 #[inline(always)] 355 - fn read32(&self, _offset: usize) -> u32 462 + fn read32(&self, offset: usize) -> u32 356 463 where 357 464 Self: IoKnownSize + IoCapable<u32>, 358 465 { 359 - build_error!("Backend does not support infallible 32-bit read") 466 + self.read(offset) 360 467 } 361 468 362 469 /// Infallible 64-bit read with compile-time bounds check. 363 470 #[inline(always)] 364 - fn read64(&self, _offset: usize) -> u64 471 + fn read64(&self, offset: usize) -> u64 365 472 where 366 473 Self: IoKnownSize + IoCapable<u64>, 367 474 { 368 - build_error!("Backend does not support infallible 64-bit read") 475 + self.read(offset) 369 476 } 370 477 371 478 /// Infallible 8-bit write with compile-time bounds check. 372 479 #[inline(always)] 373 - fn write8(&self, _value: u8, _offset: usize) 480 + fn write8(&self, value: u8, offset: usize) 374 481 where 375 482 Self: IoKnownSize + IoCapable<u8>, 376 483 { 377 - build_error!("Backend does not support infallible 8-bit write") 484 + self.write(offset, value) 378 485 } 379 486 380 487 /// Infallible 16-bit write with compile-time bounds check. 381 488 #[inline(always)] 382 - fn write16(&self, _value: u16, _offset: usize) 489 + fn write16(&self, value: u16, offset: usize) 383 490 where 384 491 Self: IoKnownSize + IoCapable<u16>, 385 492 { 386 - build_error!("Backend does not support infallible 16-bit write") 493 + self.write(offset, value) 387 494 } 388 495 389 496 /// Infallible 32-bit write with compile-time bounds check. 390 497 #[inline(always)] 391 - fn write32(&self, _value: u32, _offset: usize) 498 + fn write32(&self, value: u32, offset: usize) 392 499 where 393 500 Self: IoKnownSize + IoCapable<u32>, 394 501 { 395 - build_error!("Backend does not support infallible 32-bit write") 502 + self.write(offset, value) 396 503 } 397 504 398 505 /// Infallible 64-bit write with compile-time bounds check. 399 506 #[inline(always)] 400 - fn write64(&self, _value: u64, _offset: usize) 507 + fn write64(&self, value: u64, offset: usize) 401 508 where 402 509 Self: IoKnownSize + IoCapable<u64>, 403 510 { 404 - build_error!("Backend does not support infallible 64-bit write") 511 + self.write(offset, value) 512 + } 513 + 514 + /// Generic fallible read with runtime bounds check. 515 + /// 516 + /// # Examples 517 + /// 518 + /// Read a primitive type from an I/O address: 519 + /// 520 + /// ```no_run 521 + /// use kernel::io::{ 522 + /// Io, 523 + /// Mmio, 524 + /// }; 525 + /// 526 + /// fn do_reads(io: &Mmio) -> Result { 527 + /// // 32-bit read from address `0x10`. 528 + /// let v: u32 = io.try_read(0x10)?; 529 + /// 530 + /// // 8-bit read from address `0xfff`. 531 + /// let v: u8 = io.try_read(0xfff)?; 532 + /// 533 + /// Ok(()) 534 + /// } 535 + /// ``` 536 + #[inline(always)] 537 + fn try_read<T, L>(&self, location: L) -> Result<T> 538 + where 539 + L: IoLoc<T>, 540 + Self: IoCapable<L::IoType>, 541 + { 542 + let address = self.io_addr::<L::IoType>(location.offset())?; 543 + 544 + // SAFETY: `address` has been validated by `io_addr`. 545 + Ok(unsafe { self.io_read(address) }.into()) 546 + } 547 + 548 + /// Generic fallible write with runtime bounds check. 549 + /// 550 + /// # Examples 551 + /// 552 + /// Write a primitive type to an I/O address: 553 + /// 554 + /// ```no_run 555 + /// use kernel::io::{ 556 + /// Io, 557 + /// Mmio, 558 + /// }; 559 + /// 560 + /// fn do_writes(io: &Mmio) -> Result { 561 + /// // 32-bit write of value `1` at address `0x10`. 562 + /// io.try_write(0x10, 1u32)?; 563 + /// 564 + /// // 8-bit write of value `0xff` at address `0xfff`. 565 + /// io.try_write(0xfff, 0xffu8)?; 566 + /// 567 + /// Ok(()) 568 + /// } 569 + /// ``` 570 + #[inline(always)] 571 + fn try_write<T, L>(&self, location: L, value: T) -> Result 572 + where 573 + L: IoLoc<T>, 574 + Self: IoCapable<L::IoType>, 575 + { 576 + let address = self.io_addr::<L::IoType>(location.offset())?; 577 + let io_value = value.into(); 578 + 579 + // SAFETY: `address` has been validated by `io_addr`. 580 + unsafe { self.io_write(io_value, address) } 581 + 582 + Ok(()) 583 + } 584 + 585 + /// Generic fallible write of a fully-located register value. 586 + /// 587 + /// # Examples 588 + /// 589 + /// Tuples carrying a location and a value can be used with this method: 590 + /// 591 + /// ```no_run 592 + /// use kernel::io::{ 593 + /// register, 594 + /// Io, 595 + /// Mmio, 596 + /// }; 597 + /// 598 + /// register! { 599 + /// VERSION(u32) @ 0x100 { 600 + /// 15:8 major; 601 + /// 7:0 minor; 602 + /// } 603 + /// } 604 + /// 605 + /// impl VERSION { 606 + /// fn new(major: u8, minor: u8) -> Self { 607 + /// VERSION::zeroed().with_major(major).with_minor(minor) 608 + /// } 609 + /// } 610 + /// 611 + /// fn do_write_reg(io: &Mmio) -> Result { 612 + /// 613 + /// io.try_write_reg(VERSION::new(1, 0)) 614 + /// } 615 + /// ``` 616 + #[inline(always)] 617 + fn try_write_reg<T, L, V>(&self, value: V) -> Result 618 + where 619 + L: IoLoc<T>, 620 + V: LocatedRegister<Location = L, Value = T>, 621 + Self: IoCapable<L::IoType>, 622 + { 623 + let (location, value) = value.into_io_op(); 624 + 625 + self.try_write(location, value) 626 + } 627 + 628 + /// Generic fallible update with runtime bounds check. 629 + /// 630 + /// Note: this does not perform any synchronization. The caller is responsible for ensuring 631 + /// exclusive access if required. 632 + /// 633 + /// # Examples 634 + /// 635 + /// Read the u32 value at address `0x10`, increment it, and store the updated value back: 636 + /// 637 + /// ```no_run 638 + /// use kernel::io::{ 639 + /// Io, 640 + /// Mmio, 641 + /// }; 642 + /// 643 + /// fn do_update(io: &Mmio<0x1000>) -> Result { 644 + /// io.try_update(0x10, |v: u32| { 645 + /// v + 1 646 + /// }) 647 + /// } 648 + /// ``` 649 + #[inline(always)] 650 + fn try_update<T, L, F>(&self, location: L, f: F) -> Result 651 + where 652 + L: IoLoc<T>, 653 + Self: IoCapable<L::IoType>, 654 + F: FnOnce(T) -> T, 655 + { 656 + let address = self.io_addr::<L::IoType>(location.offset())?; 657 + 658 + // SAFETY: `address` has been validated by `io_addr`. 659 + let value: T = unsafe { self.io_read(address) }.into(); 660 + let io_value = f(value).into(); 661 + 662 + // SAFETY: `address` has been validated by `io_addr`. 663 + unsafe { self.io_write(io_value, address) } 664 + 665 + Ok(()) 666 + } 667 + 668 + /// Generic infallible read with compile-time bounds check. 669 + /// 670 + /// # Examples 671 + /// 672 + /// Read a primitive type from an I/O address: 673 + /// 674 + /// ```no_run 675 + /// use kernel::io::{ 676 + /// Io, 677 + /// Mmio, 678 + /// }; 679 + /// 680 + /// fn do_reads(io: &Mmio<0x1000>) { 681 + /// // 32-bit read from address `0x10`. 682 + /// let v: u32 = io.read(0x10); 683 + /// 684 + /// // 8-bit read from the top of the I/O space. 685 + /// let v: u8 = io.read(0xfff); 686 + /// } 687 + /// ``` 688 + #[inline(always)] 689 + fn read<T, L>(&self, location: L) -> T 690 + where 691 + L: IoLoc<T>, 692 + Self: IoKnownSize + IoCapable<L::IoType>, 693 + { 694 + let address = self.io_addr_assert::<L::IoType>(location.offset()); 695 + 696 + // SAFETY: `address` has been validated by `io_addr_assert`. 697 + unsafe { self.io_read(address) }.into() 698 + } 699 + 700 + /// Generic infallible write with compile-time bounds check. 701 + /// 702 + /// # Examples 703 + /// 704 + /// Write a primitive type to an I/O address: 705 + /// 706 + /// ```no_run 707 + /// use kernel::io::{ 708 + /// Io, 709 + /// Mmio, 710 + /// }; 711 + /// 712 + /// fn do_writes(io: &Mmio<0x1000>) { 713 + /// // 32-bit write of value `1` at address `0x10`. 714 + /// io.write(0x10, 1u32); 715 + /// 716 + /// // 8-bit write of value `0xff` at the top of the I/O space. 717 + /// io.write(0xfff, 0xffu8); 718 + /// } 719 + /// ``` 720 + #[inline(always)] 721 + fn write<T, L>(&self, location: L, value: T) 722 + where 723 + L: IoLoc<T>, 724 + Self: IoKnownSize + IoCapable<L::IoType>, 725 + { 726 + let address = self.io_addr_assert::<L::IoType>(location.offset()); 727 + let io_value = value.into(); 728 + 729 + // SAFETY: `address` has been validated by `io_addr_assert`. 730 + unsafe { self.io_write(io_value, address) } 731 + } 732 + 733 + /// Generic infallible write of a fully-located register value. 734 + /// 735 + /// # Examples 736 + /// 737 + /// Tuples carrying a location and a value can be used with this method: 738 + /// 739 + /// ```no_run 740 + /// use kernel::io::{ 741 + /// register, 742 + /// Io, 743 + /// Mmio, 744 + /// }; 745 + /// 746 + /// register! { 747 + /// VERSION(u32) @ 0x100 { 748 + /// 15:8 major; 749 + /// 7:0 minor; 750 + /// } 751 + /// } 752 + /// 753 + /// impl VERSION { 754 + /// fn new(major: u8, minor: u8) -> Self { 755 + /// VERSION::zeroed().with_major(major).with_minor(minor) 756 + /// } 757 + /// } 758 + /// 759 + /// fn do_write_reg(io: &Mmio<0x1000>) { 760 + /// io.write_reg(VERSION::new(1, 0)); 761 + /// } 762 + /// ``` 763 + #[inline(always)] 764 + fn write_reg<T, L, V>(&self, value: V) 765 + where 766 + L: IoLoc<T>, 767 + V: LocatedRegister<Location = L, Value = T>, 768 + Self: IoKnownSize + IoCapable<L::IoType>, 769 + { 770 + let (location, value) = value.into_io_op(); 771 + 772 + self.write(location, value) 773 + } 774 + 775 + /// Generic infallible update with compile-time bounds check. 776 + /// 777 + /// Note: this does not perform any synchronization. The caller is responsible for ensuring 778 + /// exclusive access if required. 779 + /// 780 + /// # Examples 781 + /// 782 + /// Read the u32 value at address `0x10`, increment it, and store the updated value back: 783 + /// 784 + /// ```no_run 785 + /// use kernel::io::{ 786 + /// Io, 787 + /// Mmio, 788 + /// }; 789 + /// 790 + /// fn do_update(io: &Mmio<0x1000>) { 791 + /// io.update(0x10, |v: u32| { 792 + /// v + 1 793 + /// }) 794 + /// } 795 + /// ``` 796 + #[inline(always)] 797 + fn update<T, L, F>(&self, location: L, f: F) 798 + where 799 + L: IoLoc<T>, 800 + Self: IoKnownSize + IoCapable<L::IoType> + Sized, 801 + F: FnOnce(T) -> T, 802 + { 803 + let address = self.io_addr_assert::<L::IoType>(location.offset()); 804 + 805 + // SAFETY: `address` has been validated by `io_addr_assert`. 806 + let value: T = unsafe { self.io_read(address) }.into(); 807 + let io_value = f(value).into(); 808 + 809 + // SAFETY: `address` has been validated by `io_addr_assert`. 810 + unsafe { self.io_write(io_value, address) } 405 811 } 406 812 } 407 813 ··· 726 534 } 727 535 } 728 536 729 - // MMIO regions support 8, 16, and 32-bit accesses. 730 - impl<const SIZE: usize> IoCapable<u8> for Mmio<SIZE> {} 731 - impl<const SIZE: usize> IoCapable<u16> for Mmio<SIZE> {} 732 - impl<const SIZE: usize> IoCapable<u32> for Mmio<SIZE> {} 537 + /// Implements [`IoCapable`] on `$mmio` for `$ty` using `$read_fn` and `$write_fn`. 538 + macro_rules! impl_mmio_io_capable { 539 + ($mmio:ident, $(#[$attr:meta])* $ty:ty, $read_fn:ident, $write_fn:ident) => { 540 + $(#[$attr])* 541 + impl<const SIZE: usize> IoCapable<$ty> for $mmio<SIZE> { 542 + unsafe fn io_read(&self, address: usize) -> $ty { 543 + // SAFETY: By the trait invariant `address` is a valid address for MMIO operations. 544 + unsafe { bindings::$read_fn(address as *const c_void) } 545 + } 733 546 547 + unsafe fn io_write(&self, value: $ty, address: usize) { 548 + // SAFETY: By the trait invariant `address` is a valid address for MMIO operations. 549 + unsafe { bindings::$write_fn(value, address as *mut c_void) } 550 + } 551 + } 552 + }; 553 + } 554 + 555 + // MMIO regions support 8, 16, and 32-bit accesses. 556 + impl_mmio_io_capable!(Mmio, u8, readb, writeb); 557 + impl_mmio_io_capable!(Mmio, u16, readw, writew); 558 + impl_mmio_io_capable!(Mmio, u32, readl, writel); 734 559 // MMIO regions on 64-bit systems also support 64-bit accesses. 735 - #[cfg(CONFIG_64BIT)] 736 - impl<const SIZE: usize> IoCapable<u64> for Mmio<SIZE> {} 560 + impl_mmio_io_capable!( 561 + Mmio, 562 + #[cfg(CONFIG_64BIT)] 563 + u64, 564 + readq, 565 + writeq 566 + ); 737 567 738 568 impl<const SIZE: usize> Io for Mmio<SIZE> { 739 569 /// Returns the base address of this mapping. ··· 769 555 fn maxsize(&self) -> usize { 770 556 self.0.maxsize() 771 557 } 772 - 773 - io_define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); 774 - io_define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); 775 - io_define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); 776 - io_define_read!( 777 - fallible, 778 - #[cfg(CONFIG_64BIT)] 779 - try_read64, 780 - call_mmio_read(readq) -> u64 781 - ); 782 - 783 - io_define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); 784 - io_define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); 785 - io_define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); 786 - io_define_write!( 787 - fallible, 788 - #[cfg(CONFIG_64BIT)] 789 - try_write64, 790 - call_mmio_write(writeq) <- u64 791 - ); 792 - 793 - io_define_read!(infallible, read8, call_mmio_read(readb) -> u8); 794 - io_define_read!(infallible, read16, call_mmio_read(readw) -> u16); 795 - io_define_read!(infallible, read32, call_mmio_read(readl) -> u32); 796 - io_define_read!( 797 - infallible, 798 - #[cfg(CONFIG_64BIT)] 799 - read64, 800 - call_mmio_read(readq) -> u64 801 - ); 802 - 803 - io_define_write!(infallible, write8, call_mmio_write(writeb) <- u8); 804 - io_define_write!(infallible, write16, call_mmio_write(writew) <- u16); 805 - io_define_write!(infallible, write32, call_mmio_write(writel) <- u32); 806 - io_define_write!( 807 - infallible, 808 - #[cfg(CONFIG_64BIT)] 809 - write64, 810 - call_mmio_write(writeq) <- u64 811 - ); 812 558 } 813 559 814 560 impl<const SIZE: usize> IoKnownSize for Mmio<SIZE> { ··· 786 612 // SAFETY: `Mmio` is a transparent wrapper around `MmioRaw`. 787 613 unsafe { &*core::ptr::from_ref(raw).cast() } 788 614 } 789 - 790 - io_define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); 791 - io_define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); 792 - io_define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); 793 - io_define_read!( 794 - infallible, 795 - #[cfg(CONFIG_64BIT)] 796 - pub read64_relaxed, 797 - call_mmio_read(readq_relaxed) -> u64 798 - ); 799 - 800 - io_define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); 801 - io_define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); 802 - io_define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); 803 - io_define_read!( 804 - fallible, 805 - #[cfg(CONFIG_64BIT)] 806 - pub try_read64_relaxed, 807 - call_mmio_read(readq_relaxed) -> u64 808 - ); 809 - 810 - io_define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); 811 - io_define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); 812 - io_define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); 813 - io_define_write!( 814 - infallible, 815 - #[cfg(CONFIG_64BIT)] 816 - pub write64_relaxed, 817 - call_mmio_write(writeq_relaxed) <- u64 818 - ); 819 - 820 - io_define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); 821 - io_define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); 822 - io_define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); 823 - io_define_write!( 824 - fallible, 825 - #[cfg(CONFIG_64BIT)] 826 - pub try_write64_relaxed, 827 - call_mmio_write(writeq_relaxed) <- u64 828 - ); 829 615 } 616 + 617 + /// [`Mmio`] wrapper using relaxed accessors. 618 + /// 619 + /// This type provides an implementation of [`Io`] that uses relaxed I/O MMIO operands instead of 620 + /// the regular ones. 621 + /// 622 + /// See [`Mmio::relaxed`] for a usage example. 623 + #[repr(transparent)] 624 + pub struct RelaxedMmio<const SIZE: usize = 0>(Mmio<SIZE>); 625 + 626 + impl<const SIZE: usize> Io for RelaxedMmio<SIZE> { 627 + #[inline] 628 + fn addr(&self) -> usize { 629 + self.0.addr() 630 + } 631 + 632 + #[inline] 633 + fn maxsize(&self) -> usize { 634 + self.0.maxsize() 635 + } 636 + } 637 + 638 + impl<const SIZE: usize> IoKnownSize for RelaxedMmio<SIZE> { 639 + const MIN_SIZE: usize = SIZE; 640 + } 641 + 642 + impl<const SIZE: usize> Mmio<SIZE> { 643 + /// Returns a [`RelaxedMmio`] reference that performs relaxed I/O operations. 644 + /// 645 + /// Relaxed accessors do not provide ordering guarantees with respect to DMA or memory accesses 646 + /// and can be used when such ordering is not required. 647 + /// 648 + /// # Examples 649 + /// 650 + /// ```no_run 651 + /// use kernel::io::{ 652 + /// Io, 653 + /// Mmio, 654 + /// RelaxedMmio, 655 + /// }; 656 + /// 657 + /// fn do_io(io: &Mmio<0x100>) { 658 + /// // The access is performed using `readl_relaxed` instead of `readl`. 659 + /// let v = io.relaxed().read32(0x10); 660 + /// } 661 + /// 662 + /// ``` 663 + pub fn relaxed(&self) -> &RelaxedMmio<SIZE> { 664 + // SAFETY: `RelaxedMmio` is `#[repr(transparent)]` over `Mmio`, so `Mmio<SIZE>` and 665 + // `RelaxedMmio<SIZE>` have identical layout. 666 + unsafe { core::mem::transmute(self) } 667 + } 668 + } 669 + 670 + // MMIO regions support 8, 16, and 32-bit accesses. 671 + impl_mmio_io_capable!(RelaxedMmio, u8, readb_relaxed, writeb_relaxed); 672 + impl_mmio_io_capable!(RelaxedMmio, u16, readw_relaxed, writew_relaxed); 673 + impl_mmio_io_capable!(RelaxedMmio, u32, readl_relaxed, writel_relaxed); 674 + // MMIO regions on 64-bit systems also support 64-bit accesses. 675 + impl_mmio_io_capable!( 676 + RelaxedMmio, 677 + #[cfg(CONFIG_64BIT)] 678 + u64, 679 + readq_relaxed, 680 + writeq_relaxed 681 + );

+6 -4

rust/kernel/io/mem.rs

··· 54 54 /// use kernel::{ 55 55 /// bindings, 56 56 /// device::Core, 57 + /// io::Io, 57 58 /// of, 58 59 /// platform, 59 60 /// }; ··· 79 78 /// let io = iomem.access(pdev.as_ref())?; 80 79 /// 81 80 /// // Read and write a 32-bit value at `offset`. 82 - /// let data = io.read32_relaxed(offset); 81 + /// let data = io.read32(offset); 83 82 /// 84 - /// io.write32_relaxed(data, offset); 83 + /// io.write32(data, offset); 85 84 /// 86 85 /// # Ok(SampleDriver) 87 86 /// } ··· 118 117 /// use kernel::{ 119 118 /// bindings, 120 119 /// device::Core, 120 + /// io::Io, 121 121 /// of, 122 122 /// platform, 123 123 /// }; ··· 143 141 /// 144 142 /// let io = iomem.access(pdev.as_ref())?; 145 143 /// 146 - /// let data = io.try_read32_relaxed(offset)?; 144 + /// let data = io.try_read32(offset)?; 147 145 /// 148 - /// io.try_write32_relaxed(data, offset)?; 146 + /// io.try_write32(data, offset)?; 149 147 /// 150 148 /// # Ok(SampleDriver) 151 149 /// }

+1260

rust/kernel/io/register.rs

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Macro to define register layout and accessors. 4 + //! 5 + //! The [`register!`](kernel::io::register!) macro provides an intuitive and readable syntax for 6 + //! defining a dedicated type for each register and accessing it using [`Io`](super::Io). Each such 7 + //! type comes with its own field accessors that can return an error if a field's value is invalid. 8 + //! 9 + //! Note: most of the items in this module are public so they can be referenced by the macro, but 10 + //! most are not to be used directly by users. Outside of the `register!` macro itself, the only 11 + //! items you might want to import from this module are [`WithBase`] and [`Array`]. 12 + //! 13 + //! # Simple example 14 + //! 15 + //! ```no_run 16 + //! use kernel::io::register; 17 + //! 18 + //! register! { 19 + //! /// Basic information about the chip. 20 + //! pub BOOT_0(u32) @ 0x00000100 { 21 + //! /// Vendor ID. 22 + //! 15:8 vendor_id; 23 + //! /// Major revision of the chip. 24 + //! 7:4 major_revision; 25 + //! /// Minor revision of the chip. 26 + //! 3:0 minor_revision; 27 + //! } 28 + //! } 29 + //! ``` 30 + //! 31 + //! This defines a 32-bit `BOOT_0` type which can be read from or written to offset `0x100` of an 32 + //! `Io` region, with the described bitfields. For instance, `minor_revision` consists of the 4 33 + //! least significant bits of the type. 34 + //! 35 + //! Fields are instances of [`Bounded`](kernel::num::Bounded) and can be read by calling their 36 + //! getter method, which is named after them. They also have setter methods prefixed with `with_` 37 + //! for runtime values and `with_const_` for constant values. All setters return the updated 38 + //! register value. 39 + //! 40 + //! Fields can also be transparently converted from/to an arbitrary type by using the `=>` and 41 + //! `?=>` syntaxes. 42 + //! 43 + //! If present, doc comments above register or fields definitions are added to the relevant item 44 + //! they document (the register type itself, or the field's setter and getter methods). 45 + //! 46 + //! Note that multiple registers can be defined in a single `register!` invocation. This can be 47 + //! useful to group related registers together. 48 + //! 49 + //! Here is how the register defined above can be used in code: 50 + //! 51 + //! 52 + //! ```no_run 53 + //! use kernel::{ 54 + //! io::{ 55 + //! register, 56 + //! Io, 57 + //! IoLoc, 58 + //! }, 59 + //! num::Bounded, 60 + //! }; 61 + //! # use kernel::io::Mmio; 62 + //! # register! { 63 + //! # pub BOOT_0(u32) @ 0x00000100 { 64 + //! # 15:8 vendor_id; 65 + //! # 7:4 major_revision; 66 + //! # 3:0 minor_revision; 67 + //! # } 68 + //! # } 69 + //! # fn test(io: &Mmio<0x1000>) { 70 + //! # fn obtain_vendor_id() -> u8 { 0xff } 71 + //! 72 + //! // Read from the register's defined offset (0x100). 73 + //! let boot0 = io.read(BOOT_0); 74 + //! pr_info!("chip revision: {}.{}", boot0.major_revision().get(), boot0.minor_revision().get()); 75 + //! 76 + //! // Update some fields and write the new value back. 77 + //! let new_boot0 = boot0 78 + //! // Constant values. 79 + //! .with_const_major_revision::<3>() 80 + //! .with_const_minor_revision::<10>() 81 + //! // Runtime value. 82 + //! .with_vendor_id(obtain_vendor_id()); 83 + //! io.write_reg(new_boot0); 84 + //! 85 + //! // Or, build a new value from zero and write it: 86 + //! io.write_reg(BOOT_0::zeroed() 87 + //! .with_const_major_revision::<3>() 88 + //! .with_const_minor_revision::<10>() 89 + //! .with_vendor_id(obtain_vendor_id()) 90 + //! ); 91 + //! 92 + //! // Or, read and update the register in a single step. 93 + //! io.update(BOOT_0, |r| r 94 + //! .with_const_major_revision::<3>() 95 + //! .with_const_minor_revision::<10>() 96 + //! .with_vendor_id(obtain_vendor_id()) 97 + //! ); 98 + //! 99 + //! // Constant values can also be built using the const setters. 100 + //! const V: BOOT_0 = pin_init::zeroed::<BOOT_0>() 101 + //! .with_const_major_revision::<3>() 102 + //! .with_const_minor_revision::<10>(); 103 + //! # } 104 + //! ``` 105 + //! 106 + //! For more extensive documentation about how to define registers, see the 107 + //! [`register!`](kernel::io::register!) macro. 108 + 109 + use core::marker::PhantomData; 110 + 111 + use crate::io::IoLoc; 112 + 113 + use kernel::build_assert; 114 + 115 + /// Trait implemented by all registers. 116 + pub trait Register: Sized { 117 + /// Backing primitive type of the register. 118 + type Storage: Into<Self> + From<Self>; 119 + 120 + /// Start offset of the register. 121 + /// 122 + /// The interpretation of this offset depends on the type of the register. 123 + const OFFSET: usize; 124 + } 125 + 126 + /// Trait implemented by registers with a fixed offset. 127 + pub trait FixedRegister: Register {} 128 + 129 + /// Allows `()` to be used as the `location` parameter of [`Io::write`](super::Io::write) when 130 + /// passing a [`FixedRegister`] value. 131 + impl<T> IoLoc<T> for () 132 + where 133 + T: FixedRegister, 134 + { 135 + type IoType = T::Storage; 136 + 137 + #[inline(always)] 138 + fn offset(self) -> usize { 139 + T::OFFSET 140 + } 141 + } 142 + 143 + /// A [`FixedRegister`] carries its location in its type. Thus `FixedRegister` values can be used 144 + /// as an [`IoLoc`]. 145 + impl<T> IoLoc<T> for T 146 + where 147 + T: FixedRegister, 148 + { 149 + type IoType = T::Storage; 150 + 151 + #[inline(always)] 152 + fn offset(self) -> usize { 153 + T::OFFSET 154 + } 155 + } 156 + 157 + /// Location of a fixed register. 158 + pub struct FixedRegisterLoc<T: FixedRegister>(PhantomData<T>); 159 + 160 + impl<T: FixedRegister> FixedRegisterLoc<T> { 161 + /// Returns the location of `T`. 162 + #[inline(always)] 163 + // We do not implement `Default` so we can be const. 164 + #[expect(clippy::new_without_default)] 165 + pub const fn new() -> Self { 166 + Self(PhantomData) 167 + } 168 + } 169 + 170 + impl<T> IoLoc<T> for FixedRegisterLoc<T> 171 + where 172 + T: FixedRegister, 173 + { 174 + type IoType = T::Storage; 175 + 176 + #[inline(always)] 177 + fn offset(self) -> usize { 178 + T::OFFSET 179 + } 180 + } 181 + 182 + /// Trait providing a base address to be added to the offset of a relative register to obtain 183 + /// its actual offset. 184 + /// 185 + /// The `T` generic argument is used to distinguish which base to use, in case a type provides 186 + /// several bases. It is given to the `register!` macro to restrict the use of the register to 187 + /// implementors of this particular variant. 188 + pub trait RegisterBase<T> { 189 + /// Base address to which register offsets are added. 190 + const BASE: usize; 191 + } 192 + 193 + /// Trait implemented by all registers that are relative to a base. 194 + pub trait WithBase { 195 + /// Family of bases applicable to this register. 196 + type BaseFamily; 197 + 198 + /// Returns the absolute location of this type when using `B` as its base. 199 + #[inline(always)] 200 + fn of<B: RegisterBase<Self::BaseFamily>>() -> RelativeRegisterLoc<Self, B> 201 + where 202 + Self: Register, 203 + { 204 + RelativeRegisterLoc::new() 205 + } 206 + } 207 + 208 + /// Trait implemented by relative registers. 209 + pub trait RelativeRegister: Register + WithBase {} 210 + 211 + /// Location of a relative register. 212 + /// 213 + /// This can either be an immediately accessible regular [`RelativeRegister`], or a 214 + /// [`RelativeRegisterArray`] that needs one additional resolution through 215 + /// [`RelativeRegisterLoc::at`]. 216 + pub struct RelativeRegisterLoc<T: WithBase, B: ?Sized>(PhantomData<T>, PhantomData); 217 + 218 + impl<T, B> RelativeRegisterLoc<T, B> 219 + where 220 + T: Register + WithBase, 221 + B: RegisterBase<T::BaseFamily> + ?Sized, 222 + { 223 + /// Returns the location of a relative register or register array. 224 + #[inline(always)] 225 + // We do not implement `Default` so we can be const. 226 + #[expect(clippy::new_without_default)] 227 + pub const fn new() -> Self { 228 + Self(PhantomData, PhantomData) 229 + } 230 + 231 + // Returns the absolute offset of the relative register using base `B`. 232 + // 233 + // This is implemented as a private const method so it can be reused by the [`IoLoc`] 234 + // implementations of both [`RelativeRegisterLoc`] and [`RelativeRegisterArrayLoc`]. 235 + #[inline] 236 + const fn offset(self) -> usize { 237 + B::BASE + T::OFFSET 238 + } 239 + } 240 + 241 + impl<T, B> IoLoc<T> for RelativeRegisterLoc<T, B> 242 + where 243 + T: RelativeRegister, 244 + B: RegisterBase<T::BaseFamily> + ?Sized, 245 + { 246 + type IoType = T::Storage; 247 + 248 + #[inline(always)] 249 + fn offset(self) -> usize { 250 + RelativeRegisterLoc::offset(self) 251 + } 252 + } 253 + 254 + /// Trait implemented by arrays of registers. 255 + pub trait RegisterArray: Register { 256 + /// Number of elements in the registers array. 257 + const SIZE: usize; 258 + /// Number of bytes between the start of elements in the registers array. 259 + const STRIDE: usize; 260 + } 261 + 262 + /// Location of an array register. 263 + pub struct RegisterArrayLoc<T: RegisterArray>(usize, PhantomData<T>); 264 + 265 + impl<T: RegisterArray> RegisterArrayLoc<T> { 266 + /// Returns the location of register `T` at position `idx`, with build-time validation. 267 + #[inline(always)] 268 + pub fn new(idx: usize) -> Self { 269 + build_assert!(idx < T::SIZE); 270 + 271 + Self(idx, PhantomData) 272 + } 273 + 274 + /// Attempts to return the location of register `T` at position `idx`, with runtime validation. 275 + #[inline(always)] 276 + pub fn try_new(idx: usize) -> Option<Self> { 277 + if idx < T::SIZE { 278 + Some(Self(idx, PhantomData)) 279 + } else { 280 + None 281 + } 282 + } 283 + } 284 + 285 + impl<T> IoLoc<T> for RegisterArrayLoc<T> 286 + where 287 + T: RegisterArray, 288 + { 289 + type IoType = T::Storage; 290 + 291 + #[inline(always)] 292 + fn offset(self) -> usize { 293 + T::OFFSET + self.0 * T::STRIDE 294 + } 295 + } 296 + 297 + /// Trait providing location builders for [`RegisterArray`]s. 298 + pub trait Array { 299 + /// Returns the location of the register at position `idx`, with build-time validation. 300 + #[inline(always)] 301 + fn at(idx: usize) -> RegisterArrayLoc<Self> 302 + where 303 + Self: RegisterArray, 304 + { 305 + RegisterArrayLoc::new(idx) 306 + } 307 + 308 + /// Returns the location of the register at position `idx`, with runtime validation. 309 + #[inline(always)] 310 + fn try_at(idx: usize) -> Option<RegisterArrayLoc<Self>> 311 + where 312 + Self: RegisterArray, 313 + { 314 + RegisterArrayLoc::try_new(idx) 315 + } 316 + } 317 + 318 + /// Trait implemented by arrays of relative registers. 319 + pub trait RelativeRegisterArray: RegisterArray + WithBase {} 320 + 321 + /// Location of a relative array register. 322 + pub struct RelativeRegisterArrayLoc< 323 + T: RelativeRegisterArray, 324 + B: RegisterBase<T::BaseFamily> + ?Sized, 325 + >(RelativeRegisterLoc<T, B>, usize); 326 + 327 + impl<T, B> RelativeRegisterArrayLoc<T, B> 328 + where 329 + T: RelativeRegisterArray, 330 + B: RegisterBase<T::BaseFamily> + ?Sized, 331 + { 332 + /// Returns the location of register `T` from the base `B` at index `idx`, with build-time 333 + /// validation. 334 + #[inline(always)] 335 + pub fn new(idx: usize) -> Self { 336 + build_assert!(idx < T::SIZE); 337 + 338 + Self(RelativeRegisterLoc::new(), idx) 339 + } 340 + 341 + /// Attempts to return the location of register `T` from the base `B` at index `idx`, with 342 + /// runtime validation. 343 + #[inline(always)] 344 + pub fn try_new(idx: usize) -> Option<Self> { 345 + if idx < T::SIZE { 346 + Some(Self(RelativeRegisterLoc::new(), idx)) 347 + } else { 348 + None 349 + } 350 + } 351 + } 352 + 353 + /// Methods exclusive to [`RelativeRegisterLoc`]s created with a [`RelativeRegisterArray`]. 354 + impl<T, B> RelativeRegisterLoc<T, B> 355 + where 356 + T: RelativeRegisterArray, 357 + B: RegisterBase<T::BaseFamily> + ?Sized, 358 + { 359 + /// Returns the location of the register at position `idx`, with build-time validation. 360 + #[inline(always)] 361 + pub fn at(self, idx: usize) -> RelativeRegisterArrayLoc<T, B> { 362 + RelativeRegisterArrayLoc::new(idx) 363 + } 364 + 365 + /// Returns the location of the register at position `idx`, with runtime validation. 366 + #[inline(always)] 367 + pub fn try_at(self, idx: usize) -> Option<RelativeRegisterArrayLoc<T, B>> { 368 + RelativeRegisterArrayLoc::try_new(idx) 369 + } 370 + } 371 + 372 + impl<T, B> IoLoc<T> for RelativeRegisterArrayLoc<T, B> 373 + where 374 + T: RelativeRegisterArray, 375 + B: RegisterBase<T::BaseFamily> + ?Sized, 376 + { 377 + type IoType = T::Storage; 378 + 379 + #[inline(always)] 380 + fn offset(self) -> usize { 381 + self.0.offset() + self.1 * T::STRIDE 382 + } 383 + } 384 + 385 + /// Trait implemented by items that contain both a register value and the absolute I/O location at 386 + /// which to write it. 387 + /// 388 + /// Implementors can be used with [`Io::write_reg`](super::Io::write_reg). 389 + pub trait LocatedRegister { 390 + /// Register value to write. 391 + type Value: Register; 392 + /// Full location information at which to write the value. 393 + type Location: IoLoc<Self::Value>; 394 + 395 + /// Consumes `self` and returns a `(location, value)` tuple describing a valid I/O write 396 + /// operation. 397 + fn into_io_op(self) -> (Self::Location, Self::Value); 398 + } 399 + 400 + impl<T> LocatedRegister for T 401 + where 402 + T: FixedRegister, 403 + { 404 + type Location = FixedRegisterLoc<Self::Value>; 405 + type Value = T; 406 + 407 + #[inline(always)] 408 + fn into_io_op(self) -> (FixedRegisterLoc<T>, T) { 409 + (FixedRegisterLoc::new(), self) 410 + } 411 + } 412 + 413 + /// Defines a dedicated type for a register, including getter and setter methods for its fields and 414 + /// methods to read and write it from an [`Io`](kernel::io::Io) region. 415 + /// 416 + /// This documentation focuses on how to declare registers. See the [module-level 417 + /// documentation](mod@kernel::io::register) for examples of how to access them. 418 + /// 419 + /// There are 4 possible kinds of registers: fixed offset registers, relative registers, arrays of 420 + /// registers, and relative arrays of registers. 421 + /// 422 + /// ## Fixed offset registers 423 + /// 424 + /// These are the simplest kind of registers. Their location is simply an offset inside the I/O 425 + /// region. For instance: 426 + /// 427 + /// ```ignore 428 + /// register! { 429 + /// pub FIXED_REG(u16) @ 0x80 { 430 + /// ... 431 + /// } 432 + /// } 433 + /// ``` 434 + /// 435 + /// This creates a 16-bit register named `FIXED_REG` located at offset `0x80` of an I/O region. 436 + /// 437 + /// These registers' location can be built simply by referencing their name: 438 + /// 439 + /// ```no_run 440 + /// use kernel::{ 441 + /// io::{ 442 + /// register, 443 + /// Io, 444 + /// }, 445 + /// }; 446 + /// # use kernel::io::Mmio; 447 + /// 448 + /// register! { 449 + /// FIXED_REG(u32) @ 0x100 { 450 + /// 16:8 high_byte; 451 + /// 7:0 low_byte; 452 + /// } 453 + /// } 454 + /// 455 + /// # fn test(io: &Mmio<0x1000>) { 456 + /// let val = io.read(FIXED_REG); 457 + /// 458 + /// // Write from an already-existing value. 459 + /// io.write(FIXED_REG, val.with_low_byte(0xff)); 460 + /// 461 + /// // Create a register value from scratch. 462 + /// let val2 = FIXED_REG::zeroed().with_high_byte(0x80); 463 + /// 464 + /// // The location of fixed offset registers is already contained in their type. Thus, the 465 + /// // `location` argument of `Io::write` is technically redundant and can be replaced by `()`. 466 + /// io.write((), val2); 467 + /// 468 + /// // Or, the single-argument `Io::write_reg` can be used. 469 + /// io.write_reg(val2); 470 + /// # } 471 + /// 472 + /// ``` 473 + /// 474 + /// It is possible to create an alias of an existing register with new field definitions by using 475 + /// the `=> ALIAS` syntax. This is useful for cases where a register's interpretation depends on 476 + /// the context: 477 + /// 478 + /// ```no_run 479 + /// use kernel::io::register; 480 + /// 481 + /// register! { 482 + /// /// Scratch register. 483 + /// pub SCRATCH(u32) @ 0x00000200 { 484 + /// 31:0 value; 485 + /// } 486 + /// 487 + /// /// Boot status of the firmware. 488 + /// pub SCRATCH_BOOT_STATUS(u32) => SCRATCH { 489 + /// 0:0 completed; 490 + /// } 491 + /// } 492 + /// ``` 493 + /// 494 + /// In this example, `SCRATCH_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while providing 495 + /// its own `completed` field. 496 + /// 497 + /// ## Relative registers 498 + /// 499 + /// Relative registers can be instantiated several times at a relative offset of a group of bases. 500 + /// For instance, imagine the following I/O space: 501 + /// 502 + /// ```text 503 + /// +-----------------------------+ 504 + /// | ... | 505 + /// | | 506 + /// 0x100--->+------------CPU0-------------+ 507 + /// | | 508 + /// 0x110--->+-----------------------------+ 509 + /// | CPU_CTL | 510 + /// +-----------------------------+ 511 + /// | ... | 512 + /// | | 513 + /// | | 514 + /// 0x200--->+------------CPU1-------------+ 515 + /// | | 516 + /// 0x210--->+-----------------------------+ 517 + /// | CPU_CTL | 518 + /// +-----------------------------+ 519 + /// | ... | 520 + /// +-----------------------------+ 521 + /// ``` 522 + /// 523 + /// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O 524 + /// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define 525 + /// them twice and would prefer a way to select which one to use from a single definition. 526 + /// 527 + /// This can be done using the `Base + Offset` syntax when specifying the register's address: 528 + /// 529 + /// ```ignore 530 + /// register! { 531 + /// pub RELATIVE_REG(u32) @ Base + 0x80 { 532 + /// ... 533 + /// } 534 + /// } 535 + /// ``` 536 + /// 537 + /// This creates a register with an offset of `0x80` from a given base. 538 + /// 539 + /// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the 540 + /// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for 541 + /// this register needs to implement `RegisterBase<Base>`. 542 + /// 543 + /// The location of relative registers can be built using the [`WithBase::of`] method to specify 544 + /// its base. All relative registers implement [`WithBase`]. 545 + /// 546 + /// Here is the above layout translated into code: 547 + /// 548 + /// ```no_run 549 + /// use kernel::{ 550 + /// io::{ 551 + /// register, 552 + /// register::{ 553 + /// RegisterBase, 554 + /// WithBase, 555 + /// }, 556 + /// Io, 557 + /// }, 558 + /// }; 559 + /// # use kernel::io::Mmio; 560 + /// 561 + /// // Type used to identify the base. 562 + /// pub struct CpuCtlBase; 563 + /// 564 + /// // ZST describing `CPU0`. 565 + /// struct Cpu0; 566 + /// impl RegisterBase<CpuCtlBase> for Cpu0 { 567 + /// const BASE: usize = 0x100; 568 + /// } 569 + /// 570 + /// // ZST describing `CPU1`. 571 + /// struct Cpu1; 572 + /// impl RegisterBase<CpuCtlBase> for Cpu1 { 573 + /// const BASE: usize = 0x200; 574 + /// } 575 + /// 576 + /// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`. 577 + /// register! { 578 + /// /// CPU core control. 579 + /// pub CPU_CTL(u32) @ CpuCtlBase + 0x10 { 580 + /// 0:0 start; 581 + /// } 582 + /// } 583 + /// 584 + /// # fn test(io: Mmio<0x1000>) { 585 + /// // Read the status of `Cpu0`. 586 + /// let cpu0_started = io.read(CPU_CTL::of::<Cpu0>()); 587 + /// 588 + /// // Stop `Cpu0`. 589 + /// io.write(WithBase::of::<Cpu0>(), CPU_CTL::zeroed()); 590 + /// # } 591 + /// 592 + /// // Aliases can also be defined for relative register. 593 + /// register! { 594 + /// /// Alias to CPU core control. 595 + /// pub CPU_CTL_ALIAS(u32) => CpuCtlBase + CPU_CTL { 596 + /// /// Start the aliased CPU core. 597 + /// 1:1 alias_start; 598 + /// } 599 + /// } 600 + /// 601 + /// # fn test2(io: Mmio<0x1000>) { 602 + /// // Start the aliased `CPU0`, leaving its other fields untouched. 603 + /// io.update(CPU_CTL_ALIAS::of::<Cpu0>(), |r| r.with_alias_start(true)); 604 + /// # } 605 + /// ``` 606 + /// 607 + /// ## Arrays of registers 608 + /// 609 + /// Some I/O areas contain consecutive registers that share the same field layout. These areas can 610 + /// be defined as an array of identical registers, allowing them to be accessed by index with 611 + /// compile-time or runtime bound checking: 612 + /// 613 + /// ```ignore 614 + /// register! { 615 + /// pub REGISTER_ARRAY(u8)[10, stride = 4] @ 0x100 { 616 + /// ... 617 + /// } 618 + /// } 619 + /// ``` 620 + /// 621 + /// This defines `REGISTER_ARRAY`, an array of 10 byte registers starting at offset `0x100`. Each 622 + /// register is separated from its neighbor by 4 bytes. 623 + /// 624 + /// The `stride` parameter is optional; if unspecified, the registers are placed consecutively from 625 + /// each other. 626 + /// 627 + /// A location for a register in a register array is built using the [`Array::at`] trait method. 628 + /// All arrays of registers implement [`Array`]. 629 + /// 630 + /// ```no_run 631 + /// use kernel::{ 632 + /// io::{ 633 + /// register, 634 + /// register::Array, 635 + /// Io, 636 + /// }, 637 + /// }; 638 + /// # use kernel::io::Mmio; 639 + /// # fn get_scratch_idx() -> usize { 640 + /// # 0x15 641 + /// # } 642 + /// 643 + /// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. 644 + /// register! { 645 + /// /// Scratch registers. 646 + /// pub SCRATCH(u32)[64] @ 0x00000080 { 647 + /// 31:0 value; 648 + /// } 649 + /// } 650 + /// 651 + /// # fn test(io: &Mmio<0x1000>) 652 + /// # -> Result<(), Error>{ 653 + /// // Read scratch register 0, i.e. I/O address `0x80`. 654 + /// let scratch_0 = io.read(SCRATCH::at(0)).value(); 655 + /// 656 + /// // Write scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. 657 + /// io.write(Array::at(15), SCRATCH::from(0xffeeaabb)); 658 + /// 659 + /// // This is out of bounds and won't build. 660 + /// // let scratch_128 = io.read(SCRATCH::at(128)).value(); 661 + /// 662 + /// // Runtime-obtained array index. 663 + /// let idx = get_scratch_idx(); 664 + /// // Access on a runtime index returns an error if it is out-of-bounds. 665 + /// let some_scratch = io.read(SCRATCH::try_at(idx).ok_or(EINVAL)?).value(); 666 + /// 667 + /// // Alias to a specific register in an array. 668 + /// // Here `SCRATCH[8]` is used to convey the firmware exit code. 669 + /// register! { 670 + /// /// Firmware exit status code. 671 + /// pub FIRMWARE_STATUS(u32) => SCRATCH[8] { 672 + /// 7:0 status; 673 + /// } 674 + /// } 675 + /// 676 + /// let status = io.read(FIRMWARE_STATUS).status(); 677 + /// 678 + /// // Non-contiguous register arrays can be defined by adding a stride parameter. 679 + /// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the 680 + /// // registers of the two declarations below are interleaved. 681 + /// register! { 682 + /// /// Scratch registers bank 0. 683 + /// pub SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ 0x000000c0 { 684 + /// 31:0 value; 685 + /// } 686 + /// 687 + /// /// Scratch registers bank 1. 688 + /// pub SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ 0x000000c4 { 689 + /// 31:0 value; 690 + /// } 691 + /// } 692 + /// # Ok(()) 693 + /// # } 694 + /// ``` 695 + /// 696 + /// ## Relative arrays of registers 697 + /// 698 + /// Combining the two features described in the sections above, arrays of registers accessible from 699 + /// a base can also be defined: 700 + /// 701 + /// ```ignore 702 + /// register! { 703 + /// pub RELATIVE_REGISTER_ARRAY(u8)[10, stride = 4] @ Base + 0x100 { 704 + /// ... 705 + /// } 706 + /// } 707 + /// ``` 708 + /// 709 + /// Like relative registers, they implement the [`WithBase`] trait. However the return value of 710 + /// [`WithBase::of`] cannot be used directly as a location and must be further specified using the 711 + /// [`at`](RelativeRegisterLoc::at) method. 712 + /// 713 + /// ```no_run 714 + /// use kernel::{ 715 + /// io::{ 716 + /// register, 717 + /// register::{ 718 + /// RegisterBase, 719 + /// WithBase, 720 + /// }, 721 + /// Io, 722 + /// }, 723 + /// }; 724 + /// # use kernel::io::Mmio; 725 + /// # fn get_scratch_idx() -> usize { 726 + /// # 0x15 727 + /// # } 728 + /// 729 + /// // Type used as parameter of `RegisterBase` to specify the base. 730 + /// pub struct CpuCtlBase; 731 + /// 732 + /// // ZST describing `CPU0`. 733 + /// struct Cpu0; 734 + /// impl RegisterBase<CpuCtlBase> for Cpu0 { 735 + /// const BASE: usize = 0x100; 736 + /// } 737 + /// 738 + /// // ZST describing `CPU1`. 739 + /// struct Cpu1; 740 + /// impl RegisterBase<CpuCtlBase> for Cpu1 { 741 + /// const BASE: usize = 0x200; 742 + /// } 743 + /// 744 + /// // 64 per-cpu scratch registers, arranged as a contiguous array. 745 + /// register! { 746 + /// /// Per-CPU scratch registers. 747 + /// pub CPU_SCRATCH(u32)[64] @ CpuCtlBase + 0x00000080 { 748 + /// 31:0 value; 749 + /// } 750 + /// } 751 + /// 752 + /// # fn test(io: &Mmio<0x1000>) -> Result<(), Error> { 753 + /// // Read scratch register 0 of CPU0. 754 + /// let scratch = io.read(CPU_SCRATCH::of::<Cpu0>().at(0)); 755 + /// 756 + /// // Write the retrieved value into scratch register 15 of CPU1. 757 + /// io.write(WithBase::of::<Cpu1>().at(15), scratch); 758 + /// 759 + /// // This won't build. 760 + /// // let cpu0_scratch_128 = io.read(CPU_SCRATCH::of::<Cpu0>().at(128)).value(); 761 + /// 762 + /// // Runtime-obtained array index. 763 + /// let scratch_idx = get_scratch_idx(); 764 + /// // Access on a runtime index returns an error if it is out-of-bounds. 765 + /// let cpu0_scratch = io.read( 766 + /// CPU_SCRATCH::of::<Cpu0>().try_at(scratch_idx).ok_or(EINVAL)? 767 + /// ).value(); 768 + /// # Ok(()) 769 + /// # } 770 + /// 771 + /// // Alias to `SCRATCH[8]` used to convey the firmware exit code. 772 + /// register! { 773 + /// /// Per-CPU firmware exit status code. 774 + /// pub CPU_FIRMWARE_STATUS(u32) => CpuCtlBase + CPU_SCRATCH[8] { 775 + /// 7:0 status; 776 + /// } 777 + /// } 778 + /// 779 + /// // Non-contiguous relative register arrays can be defined by adding a stride parameter. 780 + /// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the 781 + /// // registers of the two declarations below are interleaved. 782 + /// register! { 783 + /// /// Scratch registers bank 0. 784 + /// pub CPU_SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d00 { 785 + /// 31:0 value; 786 + /// } 787 + /// 788 + /// /// Scratch registers bank 1. 789 + /// pub CPU_SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d04 { 790 + /// 31:0 value; 791 + /// } 792 + /// } 793 + /// 794 + /// # fn test2(io: &Mmio<0x1000>) -> Result<(), Error> { 795 + /// let cpu0_status = io.read(CPU_FIRMWARE_STATUS::of::<Cpu0>()).status(); 796 + /// # Ok(()) 797 + /// # } 798 + /// ``` 799 + #[macro_export] 800 + macro_rules! register { 801 + // Entry point for the macro, allowing multiple registers to be defined in one call. 802 + // It matches all possible register declaration patterns to dispatch them to corresponding 803 + // `@reg` rule that defines a single register. 804 + ( 805 + $( 806 + $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) 807 + $([ $size:expr $(, stride = $stride:expr)? ])? 808 + $(@ $($base:ident +)? $offset:literal)? 809 + $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )? 810 + { $($fields:tt)* } 811 + )* 812 + ) => { 813 + $( 814 + $crate::register!( 815 + @reg $(#[$attr])* $vis $name ($storage) $([$size $(, stride = $stride)?])? 816 + $(@ $($base +)? $offset)? 817 + $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )? 818 + { $($fields)* } 819 + ); 820 + )* 821 + }; 822 + 823 + // All the rules below are private helpers. 824 + 825 + // Creates a register at a fixed offset of the MMIO space. 826 + ( 827 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $offset:literal 828 + { $($fields:tt)* } 829 + ) => { 830 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 831 + $crate::register!(@io_base $name($storage) @ $offset); 832 + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); 833 + }; 834 + 835 + // Creates an alias register of fixed offset register `alias` with its own fields. 836 + ( 837 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident 838 + { $($fields:tt)* } 839 + ) => { 840 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 841 + $crate::register!( 842 + @io_base $name($storage) @ 843 + <$alias as $crate::io::register::Register>::OFFSET 844 + ); 845 + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); 846 + }; 847 + 848 + // Creates a register at a relative offset from a base address provider. 849 + ( 850 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $base:ident + $offset:literal 851 + { $($fields:tt)* } 852 + ) => { 853 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 854 + $crate::register!(@io_base $name($storage) @ $offset); 855 + $crate::register!(@io_relative $vis $name($storage) @ $base); 856 + }; 857 + 858 + // Creates an alias register of relative offset register `alias` with its own fields. 859 + ( 860 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $base:ident + $alias:ident 861 + { $($fields:tt)* } 862 + ) => { 863 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 864 + $crate::register!( 865 + @io_base $name($storage) @ <$alias as $crate::io::register::Register>::OFFSET 866 + ); 867 + $crate::register!(@io_relative $vis $name($storage) @ $base); 868 + }; 869 + 870 + // Creates an array of registers at a fixed offset of the MMIO space. 871 + ( 872 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) 873 + [ $size:expr, stride = $stride:expr ] @ $offset:literal { $($fields:tt)* } 874 + ) => { 875 + ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride); 876 + 877 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 878 + $crate::register!(@io_base $name($storage) @ $offset); 879 + $crate::register!(@io_array $vis $name($storage) [ $size, stride = $stride ]); 880 + }; 881 + 882 + // Shortcut for contiguous array of registers (stride == size of element). 883 + ( 884 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] @ $offset:literal 885 + { $($fields:tt)* } 886 + ) => { 887 + $crate::register!( 888 + $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ] 889 + @ $offset { $($fields)* } 890 + ); 891 + }; 892 + 893 + // Creates an alias of register `idx` of array of registers `alias` with its own fields. 894 + ( 895 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident [ $idx:expr ] 896 + { $($fields:tt)* } 897 + ) => { 898 + ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE); 899 + 900 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 901 + $crate::register!( 902 + @io_base $name($storage) @ 903 + <$alias as $crate::io::register::Register>::OFFSET 904 + + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE 905 + ); 906 + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); 907 + }; 908 + 909 + // Creates an array of registers at a relative offset from a base address provider. 910 + ( 911 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) 912 + [ $size:expr, stride = $stride:expr ] 913 + @ $base:ident + $offset:literal { $($fields:tt)* } 914 + ) => { 915 + ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride); 916 + 917 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 918 + $crate::register!(@io_base $name($storage) @ $offset); 919 + $crate::register!( 920 + @io_relative_array $vis $name($storage) [ $size, stride = $stride ] @ $base + $offset 921 + ); 922 + }; 923 + 924 + // Shortcut for contiguous array of relative registers (stride == size of element). 925 + ( 926 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] 927 + @ $base:ident + $offset:literal { $($fields:tt)* } 928 + ) => { 929 + $crate::register!( 930 + $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ] 931 + @ $base + $offset { $($fields)* } 932 + ); 933 + }; 934 + 935 + // Creates an alias of register `idx` of relative array of registers `alias` with its own 936 + // fields. 937 + ( 938 + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) 939 + => $base:ident + $alias:ident [ $idx:expr ] { $($fields:tt)* } 940 + ) => { 941 + ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE); 942 + 943 + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); 944 + $crate::register!( 945 + @io_base $name($storage) @ 946 + <$alias as $crate::io::register::Register>::OFFSET + 947 + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE 948 + ); 949 + $crate::register!(@io_relative $vis $name($storage) @ $base); 950 + }; 951 + 952 + // Generates the bitfield for the register. 953 + // 954 + // `#[allow(non_camel_case_types)]` is added since register names typically use 955 + // `SCREAMING_CASE`. 956 + ( 957 + @bitfield $(#[$attr:meta])* $vis:vis struct $name:ident($storage:ty) { $($fields:tt)* } 958 + ) => { 959 + $crate::register!(@bitfield_core 960 + #[allow(non_camel_case_types)] 961 + $(#[$attr])* $vis $name $storage 962 + ); 963 + $crate::register!(@bitfield_fields $vis $name $storage { $($fields)* }); 964 + }; 965 + 966 + // Implementations shared by all registers types. 967 + (@io_base $name:ident($storage:ty) @ $offset:expr) => { 968 + impl $crate::io::register::Register for $name { 969 + type Storage = $storage; 970 + 971 + const OFFSET: usize = $offset; 972 + } 973 + }; 974 + 975 + // Implementations of fixed registers. 976 + (@io_fixed $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)) => { 977 + impl $crate::io::register::FixedRegister for $name {} 978 + 979 + $(#[$attr])* 980 + $vis const $name: $crate::io::register::FixedRegisterLoc<$name> = 981 + $crate::io::register::FixedRegisterLoc::<$name>::new(); 982 + }; 983 + 984 + // Implementations of relative registers. 985 + (@io_relative $vis:vis $name:ident ($storage:ty) @ $base:ident) => { 986 + impl $crate::io::register::WithBase for $name { 987 + type BaseFamily = $base; 988 + } 989 + 990 + impl $crate::io::register::RelativeRegister for $name {} 991 + }; 992 + 993 + // Implementations of register arrays. 994 + (@io_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ]) => { 995 + impl $crate::io::register::Array for $name {} 996 + 997 + impl $crate::io::register::RegisterArray for $name { 998 + const SIZE: usize = $size; 999 + const STRIDE: usize = $stride; 1000 + } 1001 + }; 1002 + 1003 + // Implementations of relative array registers. 1004 + ( 1005 + @io_relative_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ] 1006 + @ $base:ident + $offset:literal 1007 + ) => { 1008 + impl $crate::io::register::WithBase for $name { 1009 + type BaseFamily = $base; 1010 + } 1011 + 1012 + impl $crate::io::register::RegisterArray for $name { 1013 + const SIZE: usize = $size; 1014 + const STRIDE: usize = $stride; 1015 + } 1016 + 1017 + impl $crate::io::register::RelativeRegisterArray for $name {} 1018 + }; 1019 + 1020 + // Defines the wrapper `$name` type and its conversions from/to the storage type. 1021 + (@bitfield_core $(#[$attr:meta])* $vis:vis $name:ident $storage:ty) => { 1022 + $(#[$attr])* 1023 + #[repr(transparent)] 1024 + #[derive(Clone, Copy, PartialEq, Eq)] 1025 + $vis struct $name { 1026 + inner: $storage, 1027 + } 1028 + 1029 + #[allow(dead_code)] 1030 + impl $name { 1031 + /// Creates a bitfield from a raw value. 1032 + #[inline(always)] 1033 + $vis const fn from_raw(value: $storage) -> Self { 1034 + Self{ inner: value } 1035 + } 1036 + 1037 + /// Turns this bitfield into its raw value. 1038 + /// 1039 + /// This is similar to the [`From`] implementation, but is shorter to invoke in 1040 + /// most cases. 1041 + #[inline(always)] 1042 + $vis const fn into_raw(self) -> $storage { 1043 + self.inner 1044 + } 1045 + } 1046 + 1047 + // SAFETY: `$storage` is `Zeroable` and `$name` is transparent. 1048 + unsafe impl ::pin_init::Zeroable for $name {} 1049 + 1050 + impl ::core::convert::From<$name> for $storage { 1051 + #[inline(always)] 1052 + fn from(val: $name) -> $storage { 1053 + val.into_raw() 1054 + } 1055 + } 1056 + 1057 + impl ::core::convert::From<$storage> for $name { 1058 + #[inline(always)] 1059 + fn from(val: $storage) -> $name { 1060 + Self::from_raw(val) 1061 + } 1062 + } 1063 + }; 1064 + 1065 + // Definitions requiring knowledge of individual fields: private and public field accessors, 1066 + // and `Debug` implementation. 1067 + (@bitfield_fields $vis:vis $name:ident $storage:ty { 1068 + $($(#[doc = $doc:expr])* $hi:literal:$lo:literal $field:ident 1069 + $(?=> $try_into_type:ty)? 1070 + $(=> $into_type:ty)? 1071 + ; 1072 + )* 1073 + } 1074 + ) => { 1075 + #[allow(dead_code)] 1076 + impl $name { 1077 + $( 1078 + $crate::register!(@private_field_accessors $vis $name $storage : $hi:$lo $field); 1079 + $crate::register!( 1080 + @public_field_accessors $(#[doc = $doc])* $vis $name $storage : $hi:$lo $field 1081 + $(?=> $try_into_type)? 1082 + $(=> $into_type)? 1083 + ); 1084 + )* 1085 + } 1086 + 1087 + $crate::register!(@debug $name { $($field;)* }); 1088 + }; 1089 + 1090 + // Private field accessors working with the exact `Bounded` type for the field. 1091 + ( 1092 + @private_field_accessors $vis:vis $name:ident $storage:ty : $hi:tt:$lo:tt $field:ident 1093 + ) => { 1094 + ::kernel::macros::paste!( 1095 + $vis const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; 1096 + $vis const [<$field:upper _MASK>]: $storage = 1097 + ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); 1098 + $vis const [<$field:upper _SHIFT>]: u32 = $lo; 1099 + ); 1100 + 1101 + ::kernel::macros::paste!( 1102 + fn [<__ $field>](self) -> 1103 + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> { 1104 + // Left shift to align the field's MSB with the storage MSB. 1105 + const ALIGN_TOP: u32 = $storage::BITS - ($hi + 1); 1106 + // Right shift to move the top-aligned field to bit 0 of the storage. 1107 + const ALIGN_BOTTOM: u32 = ALIGN_TOP + $lo; 1108 + 1109 + // Extract the field using two shifts. `Bounded::shr` produces the correctly-sized 1110 + // output type. 1111 + let val = ::kernel::num::Bounded::<$storage, { $storage::BITS }>::from( 1112 + self.inner << ALIGN_TOP 1113 + ); 1114 + val.shr::<ALIGN_BOTTOM, { $hi + 1 - $lo } >() 1115 + } 1116 + 1117 + const fn [<__with_ $field>]( 1118 + mut self, 1119 + value: ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>, 1120 + ) -> Self 1121 + { 1122 + const MASK: $storage = <$name>::[<$field:upper _MASK>]; 1123 + const SHIFT: u32 = <$name>::[<$field:upper _SHIFT>]; 1124 + 1125 + let value = value.get() << SHIFT; 1126 + self.inner = (self.inner & !MASK) | value; 1127 + 1128 + self 1129 + } 1130 + ); 1131 + }; 1132 + 1133 + // Public accessors for fields infallibly (`=>`) converted to a type. 1134 + ( 1135 + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : 1136 + $hi:literal:$lo:literal $field:ident => $into_type:ty 1137 + ) => { 1138 + ::kernel::macros::paste!( 1139 + 1140 + $(#[doc = $doc])* 1141 + #[doc = "Returns the value of this field."] 1142 + #[inline(always)] 1143 + $vis fn $field(self) -> $into_type 1144 + { 1145 + self.[<__ $field>]().into() 1146 + } 1147 + 1148 + $(#[doc = $doc])* 1149 + #[doc = "Sets this field to the given `value`."] 1150 + #[inline(always)] 1151 + $vis fn [<with_ $field>](self, value: $into_type) -> Self 1152 + { 1153 + self.[<__with_ $field>](value.into()) 1154 + } 1155 + 1156 + ); 1157 + }; 1158 + 1159 + // Public accessors for fields fallibly (`?=>`) converted to a type. 1160 + ( 1161 + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : 1162 + $hi:tt:$lo:tt $field:ident ?=> $try_into_type:ty 1163 + ) => { 1164 + ::kernel::macros::paste!( 1165 + 1166 + $(#[doc = $doc])* 1167 + #[doc = "Returns the value of this field."] 1168 + #[inline(always)] 1169 + $vis fn $field(self) -> 1170 + Result< 1171 + $try_into_type, 1172 + <$try_into_type as ::core::convert::TryFrom< 1173 + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> 1174 + >>::Error 1175 + > 1176 + { 1177 + self.[<__ $field>]().try_into() 1178 + } 1179 + 1180 + $(#[doc = $doc])* 1181 + #[doc = "Sets this field to the given `value`."] 1182 + #[inline(always)] 1183 + $vis fn [<with_ $field>](self, value: $try_into_type) -> Self 1184 + { 1185 + self.[<__with_ $field>](value.into()) 1186 + } 1187 + 1188 + ); 1189 + }; 1190 + 1191 + // Public accessors for fields not converted to a type. 1192 + ( 1193 + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : 1194 + $hi:tt:$lo:tt $field:ident 1195 + ) => { 1196 + ::kernel::macros::paste!( 1197 + 1198 + $(#[doc = $doc])* 1199 + #[doc = "Returns the value of this field."] 1200 + #[inline(always)] 1201 + $vis fn $field(self) -> 1202 + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> 1203 + { 1204 + self.[<__ $field>]() 1205 + } 1206 + 1207 + $(#[doc = $doc])* 1208 + #[doc = "Sets this field to the compile-time constant `VALUE`."] 1209 + #[inline(always)] 1210 + $vis const fn [<with_const_ $field>]<const VALUE: $storage>(self) -> Self { 1211 + self.[<__with_ $field>]( 1212 + ::kernel::num::Bounded::<$storage, { $hi + 1 - $lo }>::new::<VALUE>() 1213 + ) 1214 + } 1215 + 1216 + $(#[doc = $doc])* 1217 + #[doc = "Sets this field to the given `value`."] 1218 + #[inline(always)] 1219 + $vis fn [<with_ $field>]<T>( 1220 + self, 1221 + value: T, 1222 + ) -> Self 1223 + where T: Into<::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>>, 1224 + { 1225 + self.[<__with_ $field>](value.into()) 1226 + } 1227 + 1228 + $(#[doc = $doc])* 1229 + #[doc = "Tries to set this field to `value`, returning an error if it is out of range."] 1230 + #[inline(always)] 1231 + $vis fn [<try_with_ $field>]<T>( 1232 + self, 1233 + value: T, 1234 + ) -> ::kernel::error::Result<Self> 1235 + where T: ::kernel::num::TryIntoBounded<$storage, { $hi + 1 - $lo }>, 1236 + { 1237 + Ok( 1238 + self.[<__with_ $field>]( 1239 + value.try_into_bounded().ok_or(::kernel::error::code::EOVERFLOW)? 1240 + ) 1241 + ) 1242 + } 1243 + 1244 + ); 1245 + }; 1246 + 1247 + // `Debug` implementation. 1248 + (@debug $name:ident { $($field:ident;)* }) => { 1249 + impl ::kernel::fmt::Debug for $name { 1250 + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { 1251 + f.debug_struct(stringify!($name)) 1252 + .field("<raw>", &::kernel::prelude::fmt!("{:#x}", self.inner)) 1253 + $( 1254 + .field(stringify!($field), &self.$field()) 1255 + )* 1256 + .finish() 1257 + } 1258 + } 1259 + }; 1260 + }

+8

rust/kernel/lib.rs

··· 29 29 #![feature(lint_reasons)] 30 30 // 31 31 // Stable since Rust 1.82.0. 32 + #![feature(offset_of_nested)] 32 33 #![feature(raw_ref_op)] 33 34 // 34 35 // Stable since Rust 1.83.0. ··· 38 37 #![feature(const_option)] 39 38 #![feature(const_ptr_write)] 40 39 #![feature(const_refs_to_cell)] 40 + #![feature(const_refs_to_static)] 41 41 // 42 42 // Stable since Rust 1.84.0. 43 43 #![feature(strict_provenance)] 44 + // 45 + // Stable since Rust 1.89.0. 46 + #![feature(generic_arg_infer)] 44 47 // 45 48 // Expected to become stable. 46 49 #![feature(arbitrary_self_types)] ··· 106 101 pub mod firmware; 107 102 pub mod fmt; 108 103 pub mod fs; 104 + #[cfg(CONFIG_GPU_BUDDY = "y")] 105 + pub mod gpu; 109 106 #[cfg(CONFIG_I2C = "y")] 110 107 pub mod i2c; 111 108 pub mod id_pool; 112 109 #[doc(hidden)] 113 110 pub mod impl_flags; 114 111 pub mod init; 112 + pub mod interop; 115 113 pub mod io; 116 114 pub mod ioctl; 117 115 pub mod iommu;

+68 -2

rust/kernel/num/bounded.rs

··· 379 379 380 380 /// Returns the wrapped value as the backing type. 381 381 /// 382 + /// This is similar to the [`Deref`] implementation, but doesn't enforce the size invariant of 383 + /// the [`Bounded`], which might produce slightly less optimal code. 384 + /// 382 385 /// # Examples 383 386 /// 384 387 /// ``` ··· 390 387 /// let v = Bounded::<u32, 4>::new::<7>(); 391 388 /// assert_eq!(v.get(), 7u32); 392 389 /// ``` 393 - pub fn get(self) -> T { 394 - *self.deref() 390 + pub const fn get(self) -> T { 391 + self.0 395 392 } 396 393 397 394 /// Increases the number of bits usable for `self`. ··· 475 472 // SAFETY: Although the backing type has changed, the value is still represented within 476 473 // `N` bits, and with the same signedness. 477 474 unsafe { Bounded::__new(value) } 475 + } 476 + 477 + /// Right-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >= 478 + /// N - SHIFT`. 479 + /// 480 + /// # Examples 481 + /// 482 + /// ``` 483 + /// use kernel::num::Bounded; 484 + /// 485 + /// let v = Bounded::<u32, 16>::new::<0xff00>(); 486 + /// let v_shifted: Bounded::<u32, 8> = v.shr::<8, _>(); 487 + /// 488 + /// assert_eq!(v_shifted.get(), 0xff); 489 + /// ``` 490 + pub fn shr<const SHIFT: u32, const RES: u32>(self) -> Bounded<T, RES> { 491 + const { assert!(RES + SHIFT >= N) } 492 + 493 + // SAFETY: We shift the value right by `SHIFT`, reducing the number of bits needed to 494 + // represent the shifted value by as much, and just asserted that `RES >= N - SHIFT`. 495 + unsafe { Bounded::__new(self.0 >> SHIFT) } 496 + } 497 + 498 + /// Left-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >= 499 + /// N + SHIFT`. 500 + /// 501 + /// # Examples 502 + /// 503 + /// ``` 504 + /// use kernel::num::Bounded; 505 + /// 506 + /// let v = Bounded::<u32, 8>::new::<0xff>(); 507 + /// let v_shifted: Bounded::<u32, 16> = v.shl::<8, _>(); 508 + /// 509 + /// assert_eq!(v_shifted.get(), 0xff00); 510 + /// ``` 511 + pub fn shl<const SHIFT: u32, const RES: u32>(self) -> Bounded<T, RES> { 512 + const { assert!(RES >= N + SHIFT) } 513 + 514 + // SAFETY: We shift the value left by `SHIFT`, augmenting the number of bits needed to 515 + // represent the shifted value by as much, and just asserted that `RES >= N + SHIFT`. 516 + unsafe { Bounded::__new(self.0 << SHIFT) } 478 517 } 479 518 } 480 519 ··· 1102 1057 // SAFETY: A boolean can be represented using a single bit, and thus fits within any 1103 1058 // integer type for any `N` > 0. 1104 1059 unsafe { Self::__new(T::from(value)) } 1060 + } 1061 + } 1062 + 1063 + impl<T> Bounded<T, 1> 1064 + where 1065 + T: Integer + Zeroable, 1066 + { 1067 + /// Converts this [`Bounded`] into a [`bool`]. 1068 + /// 1069 + /// This is a shorter way of writing `bool::from(self)`. 1070 + /// 1071 + /// # Examples 1072 + /// 1073 + /// ``` 1074 + /// use kernel::num::Bounded; 1075 + /// 1076 + /// assert_eq!(Bounded::<u8, 1>::new::<0>().into_bool(), false); 1077 + /// assert_eq!(Bounded::<u8, 1>::new::<1>().into_bool(), true); 1078 + /// ``` 1079 + pub fn into_bool(self) -> bool { 1080 + self.into() 1105 1081 } 1106 1082 }

+30 -69

rust/kernel/pci/io.rs

··· 8 8 device, 9 9 devres::Devres, 10 10 io::{ 11 - io_define_read, 12 - io_define_write, 13 11 Io, 14 12 IoCapable, 15 13 IoKnownSize, ··· 83 85 _marker: PhantomData<S>, 84 86 } 85 87 86 - /// Internal helper macros used to invoke C PCI configuration space read functions. 87 - /// 88 - /// This macro is intended to be used by higher-level PCI configuration space access macros 89 - /// (io_define_read) and provides a unified expansion for infallible vs. fallible read semantics. It 90 - /// emits a direct call into the corresponding C helper and performs the required cast to the Rust 91 - /// return type. 92 - /// 93 - /// # Parameters 94 - /// 95 - /// * `$c_fn` – The C function performing the PCI configuration space write. 96 - /// * `$self` – The I/O backend object. 97 - /// * `$ty` – The type of the value to read. 98 - /// * `$addr` – The PCI configuration space offset to read. 99 - /// 100 - /// This macro does not perform any validation; all invariants must be upheld by the higher-level 101 - /// abstraction invoking it. 102 - macro_rules! call_config_read { 103 - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr) => {{ 104 - let mut val: $ty = 0; 105 - // SAFETY: By the type invariant `$self.pdev` is a valid address. 106 - // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset 107 - // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits 108 - // within `i32` without truncation or sign change. 109 - // Return value from C function is ignored in infallible accessors. 110 - let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, &mut val) }; 111 - val 112 - }}; 113 - } 88 + /// Implements [`IoCapable`] on [`ConfigSpace`] for `$ty` using `$read_fn` and `$write_fn`. 89 + macro_rules! impl_config_space_io_capable { 90 + ($ty:ty, $read_fn:ident, $write_fn:ident) => { 91 + impl<'a, S: ConfigSpaceKind> IoCapable<$ty> for ConfigSpace<'a, S> { 92 + unsafe fn io_read(&self, address: usize) -> $ty { 93 + let mut val: $ty = 0; 114 94 115 - /// Internal helper macros used to invoke C PCI configuration space write functions. 116 - /// 117 - /// This macro is intended to be used by higher-level PCI configuration space access macros 118 - /// (io_define_write) and provides a unified expansion for infallible vs. fallible read semantics. 119 - /// It emits a direct call into the corresponding C helper and performs the required cast to the 120 - /// Rust return type. 121 - /// 122 - /// # Parameters 123 - /// 124 - /// * `$c_fn` – The C function performing the PCI configuration space write. 125 - /// * `$self` – The I/O backend object. 126 - /// * `$ty` – The type of the written value. 127 - /// * `$addr` – The configuration space offset to write. 128 - /// * `$value` – The value to write. 129 - /// 130 - /// This macro does not perform any validation; all invariants must be upheld by the higher-level 131 - /// abstraction invoking it. 132 - macro_rules! call_config_write { 133 - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => { 134 - // SAFETY: By the type invariant `$self.pdev` is a valid address. 135 - // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset 136 - // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits 137 - // within `i32` without truncation or sign change. 138 - // Return value from C function is ignored in infallible accessors. 139 - let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, $value) }; 95 + // Return value from C function is ignored in infallible accessors. 96 + let _ret = 97 + // SAFETY: By the type invariant `self.pdev` is a valid address. 98 + // CAST: The offset is cast to `i32` because the C functions expect a 32-bit 99 + // signed offset parameter. PCI configuration space size is at most 4096 bytes, 100 + // so the value always fits within `i32` without truncation or sign change. 101 + unsafe { bindings::$read_fn(self.pdev.as_raw(), address as i32, &mut val) }; 102 + 103 + val 104 + } 105 + 106 + unsafe fn io_write(&self, value: $ty, address: usize) { 107 + // Return value from C function is ignored in infallible accessors. 108 + let _ret = 109 + // SAFETY: By the type invariant `self.pdev` is a valid address. 110 + // CAST: The offset is cast to `i32` because the C functions expect a 32-bit 111 + // signed offset parameter. PCI configuration space size is at most 4096 bytes, 112 + // so the value always fits within `i32` without truncation or sign change. 113 + unsafe { bindings::$write_fn(self.pdev.as_raw(), address as i32, value) }; 114 + } 115 + } 140 116 }; 141 117 } 142 118 143 119 // PCI configuration space supports 8, 16, and 32-bit accesses. 144 - impl<'a, S: ConfigSpaceKind> IoCapable<u8> for ConfigSpace<'a, S> {} 145 - impl<'a, S: ConfigSpaceKind> IoCapable<u16> for ConfigSpace<'a, S> {} 146 - impl<'a, S: ConfigSpaceKind> IoCapable<u32> for ConfigSpace<'a, S> {} 120 + impl_config_space_io_capable!(u8, pci_read_config_byte, pci_write_config_byte); 121 + impl_config_space_io_capable!(u16, pci_read_config_word, pci_write_config_word); 122 + impl_config_space_io_capable!(u32, pci_read_config_dword, pci_write_config_dword); 147 123 148 124 impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> { 149 125 /// Returns the base address of the I/O region. It is always 0 for configuration space. ··· 131 159 fn maxsize(&self) -> usize { 132 160 self.pdev.cfg_size().into_raw() 133 161 } 134 - 135 - // PCI configuration space does not support fallible operations. 136 - // The default implementations from the Io trait are not used. 137 - 138 - io_define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); 139 - io_define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); 140 - io_define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); 141 - 142 - io_define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); 143 - io_define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); 144 - io_define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); 145 162 } 146 163 147 164 impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> {

+81 -10

rust/kernel/uaccess.rs

··· 7 7 use crate::{ 8 8 alloc::{Allocator, Flags}, 9 9 bindings, 10 + dma::Coherent, 10 11 error::Result, 11 12 ffi::{c_char, c_void}, 12 13 fs::file, 13 14 prelude::*, 15 + ptr::KnownSize, 14 16 transmute::{AsBytes, FromBytes}, 15 17 }; 16 18 use core::mem::{size_of, MaybeUninit}; ··· 461 459 self.length == 0 462 460 } 463 461 464 - /// Writes raw data to this user pointer from a kernel buffer. 462 + /// Low-level write from a raw pointer. 465 463 /// 466 - /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of 467 - /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even 468 - /// if it returns an error. 469 - pub fn write_slice(&mut self, data: &[u8]) -> Result { 470 - let len = data.len(); 471 - let data_ptr = data.as_ptr().cast::<c_void>(); 464 + /// # Safety 465 + /// 466 + /// The caller must ensure that `from` is valid for reads of `len` bytes. 467 + unsafe fn write_raw(&mut self, from: *const u8, len: usize) -> Result { 472 468 if len > self.length { 473 469 return Err(EFAULT); 474 470 } 475 - // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read 476 - // that many bytes from it. 477 - let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), data_ptr, len) }; 471 + 472 + // SAFETY: Caller guarantees `from` is valid for `len` bytes (see this function's 473 + // safety contract). 474 + let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), from.cast(), len) }; 478 475 if res != 0 { 479 476 return Err(EFAULT); 480 477 } 481 478 self.ptr = self.ptr.wrapping_byte_add(len); 482 479 self.length -= len; 483 480 Ok(()) 481 + } 482 + 483 + /// Writes raw data to this user pointer from a kernel buffer. 484 + /// 485 + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of 486 + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even 487 + /// if it returns an error. 488 + pub fn write_slice(&mut self, data: &[u8]) -> Result { 489 + // SAFETY: `data` is a valid slice, so `data.as_ptr()` is valid for 490 + // reading `data.len()` bytes. 491 + unsafe { self.write_raw(data.as_ptr(), data.len()) } 492 + } 493 + 494 + /// Writes raw data to this user pointer from a DMA coherent allocation. 495 + /// 496 + /// Copies `count` bytes from `alloc` starting from `offset` into this userspace slice. 497 + /// 498 + /// # Errors 499 + /// 500 + /// - [`EOVERFLOW`]: `offset + count` overflows. 501 + /// - [`ERANGE`]: `offset + count` exceeds the size of `alloc`, or `count` exceeds the 502 + /// size of the user-space buffer. 503 + /// - [`EFAULT`]: the write hits a bad address or goes out of bounds of this 504 + /// [`UserSliceWriter`]. 505 + /// 506 + /// This call may modify the associated userspace slice even if it returns an error. 507 + /// 508 + /// Note: The memory may be concurrently modified by hardware (e.g., DMA). In such cases, 509 + /// the copied data may be inconsistent, but this does not cause undefined behavior. 510 + /// 511 + /// # Example 512 + /// 513 + /// Copy the first 256 bytes of a DMA coherent allocation into a userspace buffer: 514 + /// 515 + /// ```no_run 516 + /// use kernel::uaccess::UserSliceWriter; 517 + /// use kernel::dma::Coherent; 518 + /// 519 + /// fn copy_dma_to_user( 520 + /// mut writer: UserSliceWriter, 521 + /// alloc: &Coherent<[u8]>, 522 + /// ) -> Result { 523 + /// writer.write_dma(alloc, 0, 256) 524 + /// } 525 + /// ``` 526 + pub fn write_dma<T: KnownSize + AsBytes + ?Sized>( 527 + &mut self, 528 + alloc: &Coherent<T>, 529 + offset: usize, 530 + count: usize, 531 + ) -> Result { 532 + let len = alloc.size(); 533 + if offset.checked_add(count).ok_or(EOVERFLOW)? > len { 534 + return Err(ERANGE); 535 + } 536 + 537 + if count > self.len() { 538 + return Err(ERANGE); 539 + } 540 + 541 + // SAFETY: `as_ptr()` returns a valid pointer to a memory region of `count()` bytes, as 542 + // guaranteed by the `Coherent` invariants. The check above ensures `offset + count <= len`. 543 + let src_ptr = unsafe { alloc.as_ptr().cast::<u8>().add(offset) }; 544 + 545 + // Note: Use `write_raw` instead of `write_slice` because the allocation is coherent 546 + // memory that hardware may modify (e.g., DMA); we cannot form a `&[u8]` slice over 547 + // such volatile memory. 548 + // 549 + // SAFETY: `src_ptr` points into the allocation and is valid for `count` bytes (see above). 550 + unsafe { self.write_raw(src_ptr, count) } 484 551 } 485 552 486 553 /// Writes raw data to this user pointer from a kernel buffer partially.

+97 -7

rust/kernel/workqueue.rs

··· 189 189 alloc::{AllocError, Flags}, 190 190 container_of, 191 191 prelude::*, 192 - sync::Arc, 193 - sync::LockClassKey, 192 + sync::{ 193 + aref::{ 194 + ARef, 195 + AlwaysRefCounted, // 196 + }, 197 + Arc, 198 + LockClassKey, // 199 + }, 194 200 time::Jiffies, 195 201 types::Opaque, 196 202 }; 197 - use core::marker::PhantomData; 203 + use core::{marker::PhantomData, ptr::NonNull}; 198 204 199 205 /// Creates a [`Work`] initialiser with the given name and a newly-created lock class. 200 206 #[macro_export] ··· 431 425 432 426 /// Defines the method that should be called directly when a work item is executed. 433 427 /// 434 - /// This trait is implemented by `Pin<KBox<T>>` and [`Arc<T>`], and is mainly intended to be 435 - /// implemented for smart pointer types. For your own structs, you would implement [`WorkItem`] 436 - /// instead. The [`run`] method on this trait will usually just perform the appropriate 437 - /// `container_of` translation and then call into the [`run`][WorkItem::run] method from the 428 + /// This trait is implemented by `Pin<KBox<T>>`, [`Arc<T>`] and [`ARef<T>`], and 429 + /// is mainly intended to be implemented for smart pointer types. For your own 430 + /// structs, you would implement [`WorkItem`] instead. The [`run`] method on 431 + /// this trait will usually just perform the appropriate `container_of` 432 + /// translation and then call into the [`run`][WorkItem::run] method from the 438 433 /// [`WorkItem`] trait. 439 434 /// 440 435 /// This trait is used when the `work_struct` field is defined using the [`Work`] helper. ··· 938 931 where 939 932 T: WorkItem<ID, Pointer = Self>, 940 933 T: HasDelayedWork<T, ID>, 934 + { 935 + } 936 + 937 + // SAFETY: Like the `Arc<T>` implementation, the `__enqueue` implementation for 938 + // `ARef<T>` obtains a `work_struct` from the `Work` field using 939 + // `T::raw_get_work`, so the same safety reasoning applies: 940 + // 941 + // - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`. 942 + // - The only safe way to create a `Work` object is through `Work::new`. 943 + // - `Work::new` makes sure that `T::Pointer::run` is passed to `init_work_with_key`. 944 + // - Finally `Work` and `RawWorkItem` guarantee that the correct `Work` field 945 + // will be used because of the ID const generic bound. This makes sure that `T::raw_get_work` 946 + // uses the correct offset for the `Work` field, and `Work::new` picks the correct 947 + // implementation of `WorkItemPointer` for `ARef<T>`. 948 + unsafe impl<T, const ID: u64> WorkItemPointer<ID> for ARef<T> 949 + where 950 + T: AlwaysRefCounted, 951 + T: WorkItem<ID, Pointer = Self>, 952 + T: HasWork<T, ID>, 953 + { 954 + unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { 955 + // The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`. 956 + let ptr = ptr.cast::<Work<T, ID>>(); 957 + 958 + // SAFETY: This computes the pointer that `__enqueue` got from 959 + // `ARef::into_raw`. 960 + let ptr = unsafe { T::work_container_of(ptr) }; 961 + 962 + // SAFETY: The safety contract of `work_container_of` ensures that it 963 + // returns a valid non-null pointer. 964 + let ptr = unsafe { NonNull::new_unchecked(ptr) }; 965 + 966 + // SAFETY: This pointer comes from `ARef::into_raw` and we've been given 967 + // back ownership. 968 + let aref = unsafe { ARef::from_raw(ptr) }; 969 + 970 + T::run(aref) 971 + } 972 + } 973 + 974 + // SAFETY: The `work_struct` raw pointer is guaranteed to be valid for the duration of the call to 975 + // the closure because we get it from an `ARef`, which means that the ref count will be at least 1, 976 + // and we don't drop the `ARef` ourselves. If `queue_work_on` returns true, it is further guaranteed 977 + // to be valid until a call to the function pointer in `work_struct` because we leak the memory it 978 + // points to, and only reclaim it if the closure returns false, or in `WorkItemPointer::run`, which 979 + // is what the function pointer in the `work_struct` must be pointing to, according to the safety 980 + // requirements of `WorkItemPointer`. 981 + unsafe impl<T, const ID: u64> RawWorkItem<ID> for ARef<T> 982 + where 983 + T: AlwaysRefCounted, 984 + T: WorkItem<ID, Pointer = Self>, 985 + T: HasWork<T, ID>, 986 + { 987 + type EnqueueOutput = Result<(), Self>; 988 + 989 + unsafe fn __enqueue<F>(self, queue_work_on: F) -> Self::EnqueueOutput 990 + where 991 + F: FnOnce(*mut bindings::work_struct) -> bool, 992 + { 993 + let ptr = ARef::into_raw(self); 994 + 995 + // SAFETY: Pointers from ARef::into_raw are valid and non-null. 996 + let work_ptr = unsafe { T::raw_get_work(ptr.as_ptr()) }; 997 + // SAFETY: `raw_get_work` returns a pointer to a valid value. 998 + let work_ptr = unsafe { Work::raw_get(work_ptr) }; 999 + 1000 + if queue_work_on(work_ptr) { 1001 + Ok(()) 1002 + } else { 1003 + // SAFETY: The work queue has not taken ownership of the pointer. 1004 + Err(unsafe { ARef::from_raw(ptr) }) 1005 + } 1006 + } 1007 + } 1008 + 1009 + // SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in 1010 + // `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of 1011 + // the `delayed_work` has the same access rules as its `work` field. 1012 + unsafe impl<T, const ID: u64> RawDelayedWorkItem<ID> for ARef<T> 1013 + where 1014 + T: WorkItem<ID, Pointer = Self>, 1015 + T: HasDelayedWork<T, ID>, 1016 + T: AlwaysRefCounted, 941 1017 { 942 1018 } 943 1019

+9 -4

samples/rust/rust_dma.rs

··· 6 6 7 7 use kernel::{ 8 8 device::Core, 9 - dma::{CoherentAllocation, DataDirection, Device, DmaMask}, 9 + dma::{ 10 + Coherent, 11 + DataDirection, 12 + Device, 13 + DmaMask, // 14 + }, 10 15 page, pci, 11 16 prelude::*, 12 17 scatterlist::{Owned, SGTable}, ··· 21 16 #[pin_data(PinnedDrop)] 22 17 struct DmaSampleDriver { 23 18 pdev: ARef<pci::Device>, 24 - ca: CoherentAllocation<MyStruct>, 19 + ca: Coherent<[MyStruct]>, 25 20 #[pin] 26 21 sgt: SGTable<Owned<VVec<u8>>>, 27 22 } ··· 69 64 // SAFETY: There are no concurrent calls to DMA allocation and mapping primitives. 70 65 unsafe { pdev.dma_set_mask_and_coherent(mask)? }; 71 66 72 - let ca: CoherentAllocation<MyStruct> = 73 - CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; 67 + let ca: Coherent<[MyStruct]> = 68 + Coherent::zeroed_slice(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; 74 69 75 70 for (i, value) in TEST_VALUES.into_iter().enumerate() { 76 71 kernel::dma_write!(ca, [i]?, MyStruct::new(value.0, value.1));

+68 -22

samples/rust/rust_driver_pci.rs

··· 5 5 //! To make this driver probe, QEMU must be run with `-device pci-testdev`. 6 6 7 7 use kernel::{ 8 - device::Bound, 9 - device::Core, 8 + device::{ 9 + Bound, 10 + Core, // 11 + }, 10 12 devres::Devres, 11 - io::Io, 13 + io::{ 14 + register, 15 + register::Array, 16 + Io, // 17 + }, 18 + num::Bounded, 12 19 pci, 13 20 prelude::*, 14 21 sync::aref::ARef, // 15 22 }; 16 23 17 - struct Regs; 24 + mod regs { 25 + use super::*; 18 26 19 - impl Regs { 20 - const TEST: usize = 0x0; 21 - const OFFSET: usize = 0x4; 22 - const DATA: usize = 0x8; 23 - const COUNT: usize = 0xC; 24 - const END: usize = 0x10; 27 + register! { 28 + pub(super) TEST(u8) @ 0x0 { 29 + 7:0 index => TestIndex; 30 + } 31 + 32 + pub(super) OFFSET(u32) @ 0x4 { 33 + 31:0 offset; 34 + } 35 + 36 + pub(super) DATA(u8) @ 0x8 { 37 + 7:0 data; 38 + } 39 + 40 + pub(super) COUNT(u32) @ 0xC { 41 + 31:0 count; 42 + } 43 + } 44 + 45 + pub(super) const END: usize = 0x10; 25 46 } 26 47 27 - type Bar0 = pci::Bar<{ Regs::END }>; 48 + type Bar0 = pci::Bar<{ regs::END }>; 28 49 29 50 #[derive(Copy, Clone, Debug)] 30 51 struct TestIndex(u8); 52 + 53 + impl From<Bounded<u8, 8>> for TestIndex { 54 + fn from(value: Bounded<u8, 8>) -> Self { 55 + Self(value.into()) 56 + } 57 + } 58 + 59 + impl From<TestIndex> for Bounded<u8, 8> { 60 + fn from(value: TestIndex) -> Self { 61 + value.0.into() 62 + } 63 + } 31 64 32 65 impl TestIndex { 33 66 const NO_EVENTFD: Self = Self(0); ··· 87 54 impl SampleDriver { 88 55 fn testdev(index: &TestIndex, bar: &Bar0) -> Result<u32> { 89 56 // Select the test. 90 - bar.write8(index.0, Regs::TEST); 57 + bar.write_reg(regs::TEST::zeroed().with_index(*index)); 91 58 92 - let offset = bar.read32(Regs::OFFSET) as usize; 93 - let data = bar.read8(Regs::DATA); 59 + let offset = bar.read(regs::OFFSET).into_raw() as usize; 60 + let data = bar.read(regs::DATA).into(); 94 61 95 62 // Write `data` to `offset` to increase `count` by one. 96 63 // 97 64 // Note that we need `try_write8`, since `offset` can't be checked at compile-time. 98 65 bar.try_write8(data, offset)?; 99 66 100 - Ok(bar.read32(Regs::COUNT)) 67 + Ok(bar.read(regs::COUNT).into()) 101 68 } 102 69 103 70 fn config_space(pdev: &pci::Device<Bound>) { 104 71 let config = pdev.config_space(); 105 72 106 - // TODO: use the register!() macro for defining PCI configuration space registers once it 107 - // has been move out of nova-core. 73 + // Some PCI configuration space registers. 74 + register! { 75 + VENDOR_ID(u16) @ 0x0 { 76 + 15:0 vendor_id; 77 + } 78 + 79 + REVISION_ID(u8) @ 0x8 { 80 + 7:0 revision_id; 81 + } 82 + 83 + BAR(u32)[6] @ 0x10 { 84 + 31:0 value; 85 + } 86 + } 87 + 108 88 dev_info!( 109 89 pdev, 110 90 "pci-testdev config space read8 rev ID: {:x}\n", 111 - config.read8(0x8) 91 + config.read(REVISION_ID).revision_id() 112 92 ); 113 93 114 94 dev_info!( 115 95 pdev, 116 96 "pci-testdev config space read16 vendor ID: {:x}\n", 117 - config.read16(0) 97 + config.read(VENDOR_ID).vendor_id() 118 98 ); 119 99 120 100 dev_info!( 121 101 pdev, 122 102 "pci-testdev config space read32 BAR 0: {:x}\n", 123 - config.read32(0x10) 103 + config.read(BAR::at(0)).value() 124 104 ); 125 105 } 126 106 } ··· 157 111 pdev.set_master(); 158 112 159 113 Ok(try_pin_init!(Self { 160 - bar <- pdev.iomap_region_sized::<{ Regs::END }>(0, c"rust_driver_pci"), 114 + bar <- pdev.iomap_region_sized::<{ regs::END }>(0, c"rust_driver_pci"), 161 115 index: *info, 162 116 _: { 163 117 let bar = bar.access(pdev.as_ref())?; ··· 177 131 fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { 178 132 if let Ok(bar) = this.bar.access(pdev.as_ref()) { 179 133 // Reset pci-testdev by writing a new test index. 180 - bar.write8(this.index.0, Regs::TEST); 134 + bar.write_reg(regs::TEST::zeroed().with_index(this.index)); 181 135 } 182 136 } 183 137 }

+2 -1

scripts/Makefile.build

··· 316 316 # `feature(offset_of_nested)`, `feature(raw_ref_op)`. 317 317 # - Stable since Rust 1.84.0: `feature(strict_provenance)`. 318 318 # - Stable since Rust 1.87.0: `feature(asm_goto)`. 319 + # - Stable since Rust 1.89.0: `feature(generic_arg_infer)`. 319 320 # - Expected to become stable: `feature(arbitrary_self_types)`. 320 321 # - To be determined: `feature(used_with_arg)`. 321 322 # 322 323 # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on 323 324 # the unstable features in use. 324 - rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg 325 + rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,generic_arg_infer,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg 325 326 326 327 # `--out-dir` is required to avoid temporaries being created by `rustc` in the 327 328 # current working directory, which may be not accessible in the out-of-tree

Configure Feed

Configure Feed