gpu: nova-core: fix stack overflow in GSP memory allocation

The `Cmdq::new` function was allocating a `PteArray` struct on the stack
and was causing a stack overflow with 8216 bytes.

Modify the `PteArray` to calculate and write the Page Table Entries
directly into the coherent DMA buffer one-by-one. This reduces the stack
usage quite a lot.

Reported-by: Gary Guo <gary@garyguo.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549
Link: https://lore.kernel.org/rust-for-linux/CANiq72mAQxbRJZDnik3Qmd4phvFwPA01O2jwaaXRh_T+2=L-qA@mail.gmail.com/
Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp")
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Tim Kovalenko <tim.kovalenko@proton.me>
Link: https://patch.msgid.link/20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me
[ * Use PteArray::entry() in LogBuffer::new(),
* Add TODO comment to use IoView projections once available,
* Add PTE_ARRAY_SIZE constant to avoid duplication.

- Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>

authored by

Tim Kovalenko and committed by

Danilo Krummrich 3 months ago c7940c8b 4da879a0

+29 -17

2 changed files

expand all

drivers

gpu

nova-core

gsp

cmdq.rs

gsp.rs

+17 -15

drivers/gpu/nova-core/gsp.rs

··· 47 47 unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {} 48 48 49 49 impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> { 50 - /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`. 51 - fn new(start: DmaAddress) -> Result<Self> { 52 - let mut ptes = [0u64; NUM_PAGES]; 53 - for (i, pte) in ptes.iter_mut().enumerate() { 54 - *pte = start 55 - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) 56 - .ok_or(EOVERFLOW)?; 57 - } 58 - 59 - Ok(Self(ptes)) 50 + /// Returns the page table entry for `index`, for a mapping starting at `start`. 51 + // TODO: Replace with `IoView` projection once available. 52 + fn entry(start: DmaAddress, index: usize) -> Result<u64> { 53 + start 54 + .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) 55 + .ok_or(EOVERFLOW) 60 56 } 61 57 } 62 58 ··· 82 86 NUM_PAGES * GSP_PAGE_SIZE, 83 87 GFP_KERNEL | __GFP_ZERO, 84 88 )?); 85 - let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?; 89 + 90 + let start_addr = obj.0.dma_handle(); 86 91 87 92 // SAFETY: `obj` has just been created and we are its sole user. 88 - unsafe { 89 - // Copy the self-mapping PTE at the expected location. 93 + let pte_region = unsafe { 90 94 obj.0 91 - .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))? 92 - .copy_from_slice(ptes.as_bytes()) 95 + .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())? 93 96 }; 97 + 98 + // Write values one by one to avoid an on-stack instance of `PteArray`. 99 + for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() { 100 + let pte_value = PteArray::<0>::entry(start_addr, i)?; 101 + 102 + chunk.copy_from_slice(&pte_value.to_ne_bytes()); 103 + } 94 104 95 105 Ok(obj) 96 106 }

+12 -2

drivers/gpu/nova-core/gsp/cmdq.rs

··· 159 159 #[repr(C)] 160 160 struct GspMem { 161 161 /// Self-mapping page table entries. 162 - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>, 162 + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, 163 163 /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the 164 164 /// write and read pointers that the CPU updates. 165 165 /// ··· 170 170 /// 171 171 /// This member is read-only for the driver. 172 172 gspq: Msgq, 173 + } 174 + 175 + impl GspMem { 176 + const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>(); 173 177 } 174 178 175 179 // SAFETY: These structs don't meet the no-padding requirements of AsBytes but ··· 205 201 206 202 let gsp_mem = 207 203 CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; 208 - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?); 204 + 205 + let start = gsp_mem.dma_handle(); 206 + // Write values one by one to avoid an on-stack instance of `PteArray`. 207 + for i in 0..GspMem::PTE_ARRAY_SIZE { 208 + dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?); 209 + } 210 + 209 211 dma_write!( 210 212 gsp_mem, 211 213 [0]?.cpuq.tx,

Configure Feed

Configure Feed