Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

rust: devres: fix race condition due to nesting

Commit f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc") did
attempt to optimize away the internal reference count of Devres.

However, without an internal reference count, we can't support cases
where Devres is indirectly nested, resulting into a deadlock.

Such indirect nesting easily happens in the following way:

A registration object (which is guarded by devres) hold a reference
count of an object that holds a device resource guarded by devres
itself.

For instance a drm::Registration holds a reference of a drm::Device. The
drm::Device itself holds a device resource in its private data.

When the drm::Registration is dropped by devres, and it happens that it
did hold the last reference count of the drm::Device, it also drops the
device resource, which is guarded by devres itself.

Thus, resulting into a deadlock in the Devres destructor of the device
resource, as in the following backtrace.

sysrq: Show Blocked State
task:rmmod state:D stack:0 pid:1331 tgid:1331 ppid:1330 task_flags:0x400100 flags:0x00000010
Call trace:
__switch_to+0x190/0x294 (T)
__schedule+0x878/0xf10
schedule+0x4c/0xcc
schedule_timeout+0x44/0x118
wait_for_common+0xc0/0x18c
wait_for_completion+0x18/0x24
_RINvNtCs4gKlGRWyJ5S_4core3ptr13drop_in_placeINtNtNtCsgzhNYVB7wSz_6kernel4sync3arc3ArcINtNtBN_6devres6DevresmEEECsRdyc7Hyps3_15rust_driver_pci+0x68/0xe8 [rust_driver_pci]
_RINvNvNtCsgzhNYVB7wSz_6kernel6devres16register_foreign8callbackINtNtCs4gKlGRWyJ5S_4core3pin3PinINtNtNtB6_5alloc4kbox3BoxINtNtNtB6_4sync3arc3ArcINtB4_6DevresmEENtNtB1A_9allocator7KmallocEEECsRdyc7Hyps3_15rust_driver_pci+0x34/0xc8 [rust_driver_pci]
devm_action_release+0x14/0x20
devres_release_all+0xb8/0x118
device_release_driver_internal+0x1c4/0x28c
driver_detach+0x94/0xd4
bus_remove_driver+0xdc/0x11c
driver_unregister+0x34/0x58
pci_unregister_driver+0x20/0x80
__arm64_sys_delete_module+0x1d8/0x254
invoke_syscall+0x40/0xcc
el0_svc_common+0x8c/0xd8
do_el0_svc+0x1c/0x28
el0_svc+0x54/0x1d4
el0t_64_sync_handler+0x84/0x12c
el0t_64_sync+0x198/0x19c

In order to fix this, re-introduce the internal reference count.

Reported-by: Boris Brezillon <boris.brezillon@collabora.com>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/.E2.9C.94.20Deadlock.20caused.20by.20nested.20Devres/with/571242651
Reported-by: Markus Probst <markus.probst@posteo.de>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/.E2.9C.94.20Devres.20inside.20Devres.20stuck.20on.20cleanup/with/571239721
Reported-by: Alice Ryhl <aliceryhl@google.com>
Closes: https://gitlab.freedesktop.org/panfrost/linux/-/merge_requests/56#note_3282757
Fixes: f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc")
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Tested-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patch.msgid.link/20260205222529.91465-1-dakr@kernel.org
[ Call clone() prior to devm_add_action(). - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>

+41 -110
+41 -110
rust/kernel/devres.rs
··· 21 21 sync::{ 22 22 aref::ARef, 23 23 rcu, 24 - Completion, // 24 + Arc, // 25 25 }, 26 - types::{ 27 - ForeignOwnable, 28 - Opaque, 29 - ScopeGuard, // 30 - }, 26 + types::ForeignOwnable, 31 27 }; 32 - 33 - use pin_init::Wrapper; 34 - 35 - /// [`Devres`] inner data accessed from [`Devres::callback`]. 36 - #[pin_data] 37 - struct Inner<T: Send> { 38 - #[pin] 39 - data: Revocable<T>, 40 - /// Tracks whether [`Devres::callback`] has been completed. 41 - #[pin] 42 - devm: Completion, 43 - /// Tracks whether revoking [`Self::data`] has been completed. 44 - #[pin] 45 - revoke: Completion, 46 - } 47 28 48 29 /// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to 49 30 /// manage their lifetime. ··· 102 121 /// # fn no_run(dev: &Device<Bound>) -> Result<(), Error> { 103 122 /// // SAFETY: Invalid usage for example purposes. 104 123 /// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? }; 105 - /// let devres = KBox::pin_init(Devres::new(dev, iomem), GFP_KERNEL)?; 124 + /// let devres = Devres::new(dev, iomem)?; 106 125 /// 107 126 /// let res = devres.try_access().ok_or(ENXIO)?; 108 127 /// res.write8(0x42, 0x0); 109 128 /// # Ok(()) 110 129 /// # } 111 130 /// ``` 112 - /// 113 - /// # Invariants 114 - /// 115 - /// `Self::inner` is guaranteed to be initialized and is always accessed read-only. 116 - #[pin_data(PinnedDrop)] 117 131 pub struct Devres<T: Send> { 118 132 dev: ARef<Device>, 119 133 /// Pointer to [`Self::devres_callback`]. ··· 116 140 /// Has to be stored, since Rust does not guarantee to always return the same address for a 117 141 /// function. However, the C API uses the address as a key. 118 142 callback: unsafe extern "C" fn(*mut c_void), 119 - /// Contains all the fields shared with [`Self::callback`]. 120 - // TODO: Replace with `UnsafePinned`, once available. 121 - // 122 - // Subsequently, the `drop_in_place()` in `Devres::drop` and `Devres::new` as well as the 123 - // explicit `Send` and `Sync' impls can be removed. 124 - #[pin] 125 - inner: Opaque<Inner<T>>, 126 - _add_action: (), 143 + data: Arc<Revocable<T>>, 127 144 } 128 145 129 146 impl<T: Send> Devres<T> { ··· 124 155 /// 125 156 /// The `data` encapsulated within the returned `Devres` instance' `data` will be 126 157 /// (revoked)[`Revocable`] once the device is detached. 127 - pub fn new<'a, E>( 128 - dev: &'a Device<Bound>, 129 - data: impl PinInit<T, E> + 'a, 130 - ) -> impl PinInit<Self, Error> + 'a 158 + pub fn new<E>(dev: &Device<Bound>, data: impl PinInit<T, E>) -> Result<Self> 131 159 where 132 - T: 'a, 133 160 Error: From<E>, 134 161 { 135 - try_pin_init!(&this in Self { 162 + let callback = Self::devres_callback; 163 + let data = Arc::pin_init(Revocable::new(data), GFP_KERNEL)?; 164 + let devres_data = data.clone(); 165 + 166 + // SAFETY: 167 + // - `dev.as_raw()` is a pointer to a valid bound device. 168 + // - `data` is guaranteed to be a valid for the duration of the lifetime of `Self`. 169 + // - `devm_add_action()` is guaranteed not to call `callback` for the entire lifetime of 170 + // `dev`. 171 + to_result(unsafe { 172 + bindings::devm_add_action( 173 + dev.as_raw(), 174 + Some(callback), 175 + Arc::as_ptr(&data).cast_mut().cast(), 176 + ) 177 + })?; 178 + 179 + // `devm_add_action()` was successful and has consumed the reference count. 180 + core::mem::forget(devres_data); 181 + 182 + Ok(Self { 136 183 dev: dev.into(), 137 - callback: Self::devres_callback, 138 - // INVARIANT: `inner` is properly initialized. 139 - inner <- Opaque::pin_init(try_pin_init!(Inner { 140 - devm <- Completion::new(), 141 - revoke <- Completion::new(), 142 - data <- Revocable::new(data), 143 - })), 144 - // TODO: Replace with "initializer code blocks" [1] once available. 145 - // 146 - // [1] https://github.com/Rust-for-Linux/pin-init/pull/69 147 - _add_action: { 148 - // SAFETY: `this` is a valid pointer to uninitialized memory. 149 - let inner = unsafe { &raw mut (*this.as_ptr()).inner }; 150 - 151 - // SAFETY: 152 - // - `dev.as_raw()` is a pointer to a valid bound device. 153 - // - `inner` is guaranteed to be a valid for the duration of the lifetime of `Self`. 154 - // - `devm_add_action()` is guaranteed not to call `callback` until `this` has been 155 - // properly initialized, because we require `dev` (i.e. the *bound* device) to 156 - // live at least as long as the returned `impl PinInit<Self, Error>`. 157 - to_result(unsafe { 158 - bindings::devm_add_action(dev.as_raw(), Some(*callback), inner.cast()) 159 - }).inspect_err(|_| { 160 - let inner = Opaque::cast_into(inner); 161 - 162 - // SAFETY: `inner` is a valid pointer to an `Inner<T>` and valid for both reads 163 - // and writes. 164 - unsafe { core::ptr::drop_in_place(inner) }; 165 - })?; 166 - }, 184 + callback, 185 + data, 167 186 }) 168 187 } 169 188 170 - fn inner(&self) -> &Inner<T> { 171 - // SAFETY: By the type invairants of `Self`, `inner` is properly initialized and always 172 - // accessed read-only. 173 - unsafe { &*self.inner.get() } 174 - } 175 - 176 189 fn data(&self) -> &Revocable<T> { 177 - &self.inner().data 190 + &self.data 178 191 } 179 192 180 193 #[allow(clippy::missing_safety_doc)] 181 194 unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { 182 - // SAFETY: In `Self::new` we've passed a valid pointer to `Inner` to `devm_add_action()`, 183 - // hence `ptr` must be a valid pointer to `Inner`. 184 - let inner = unsafe { &*ptr.cast::<Inner<T>>() }; 195 + // SAFETY: In `Self::new` we've passed a valid pointer of `Revocable<T>` to 196 + // `devm_add_action()`, hence `ptr` must be a valid pointer to `Revocable<T>`. 197 + let data = unsafe { Arc::from_raw(ptr.cast::<Revocable<T>>()) }; 185 198 186 - // Ensure that `inner` can't be used anymore after we signal completion of this callback. 187 - let inner = ScopeGuard::new_with_data(inner, |inner| inner.devm.complete_all()); 188 - 189 - if !inner.data.revoke() { 190 - // If `revoke()` returns false, it means that `Devres::drop` already started revoking 191 - // `data` for us. Hence we have to wait until `Devres::drop` signals that it 192 - // completed revoking `data`. 193 - inner.revoke.wait_for_completion(); 194 - } 199 + data.revoke(); 195 200 } 196 201 197 202 fn remove_action(&self) -> bool { ··· 177 234 bindings::devm_remove_action_nowarn( 178 235 self.dev.as_raw(), 179 236 Some(self.callback), 180 - core::ptr::from_ref(self.inner()).cast_mut().cast(), 237 + core::ptr::from_ref(self.data()).cast_mut().cast(), 181 238 ) 182 239 } == 0) 183 240 } ··· 256 313 // SAFETY: `Devres` can be shared with any task, if `T: Sync`. 257 314 unsafe impl<T: Send + Sync> Sync for Devres<T> {} 258 315 259 - #[pinned_drop] 260 - impl<T: Send> PinnedDrop for Devres<T> { 261 - fn drop(self: Pin<&mut Self>) { 316 + impl<T: Send> Drop for Devres<T> { 317 + fn drop(&mut self) { 262 318 // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data 263 319 // anymore, hence it is safe not to wait for the grace period to finish. 264 320 if unsafe { self.data().revoke_nosync() } { 265 321 // We revoked `self.data` before the devres action did, hence try to remove it. 266 - if !self.remove_action() { 267 - // We could not remove the devres action, which means that it now runs concurrently, 268 - // hence signal that `self.data` has been revoked by us successfully. 269 - self.inner().revoke.complete_all(); 270 - 271 - // Wait for `Self::devres_callback` to be done using this object. 272 - self.inner().devm.wait_for_completion(); 322 + if self.remove_action() { 323 + // SAFETY: In `Self::new` we have taken an additional reference count of `self.data` 324 + // for `devm_add_action()`. Since `remove_action()` was successful, we have to drop 325 + // this additional reference count. 326 + drop(unsafe { Arc::from_raw(Arc::as_ptr(&self.data)) }); 273 327 } 274 - } else { 275 - // `Self::devres_callback` revokes `self.data` for us, hence wait for it to be done 276 - // using this object. 277 - self.inner().devm.wait_for_completion(); 278 328 } 279 - 280 - // INVARIANT: At this point it is guaranteed that `inner` can't be accessed any more. 281 - // 282 - // SAFETY: `inner` is valid for dropping. 283 - unsafe { core::ptr::drop_in_place(self.inner.get()) }; 284 329 } 285 330 } 286 331