Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fsnotify: fix inode reference leak in fsnotify_recalc_mask()

fsnotify_recalc_mask() fails to handle the return value of
__fsnotify_recalc_mask(), which may return an inode pointer that needs
to be released via fsnotify_drop_object() when the connector's HAS_IREF
flag transitions from set to cleared.

This manifests as a hung task with the following call trace:

INFO: task umount:1234 blocked for more than 120 seconds.
Call Trace:
__schedule
schedule
fsnotify_sb_delete
generic_shutdown_super
kill_anon_super
cleanup_mnt
task_work_run
do_exit
do_group_exit

The race window that triggers the iref leak:

Thread A (adding mark) Thread B (removing mark)
────────────────────── ────────────────────────
fsnotify_add_mark_locked():
fsnotify_add_mark_list():
spin_lock(conn->lock)
add mark_B(evictable) to list
spin_unlock(conn->lock)
return

/* ---- gap: no lock held ---- */

fsnotify_detach_mark(mark_A):
spin_lock(mark_A->lock)
clear ATTACHED flag on mark_A
spin_unlock(mark_A->lock)
fsnotify_put_mark(mark_A)

fsnotify_recalc_mask():
spin_lock(conn->lock)
__fsnotify_recalc_mask():
/* mark_A skipped: ATTACHED cleared */
/* only mark_B(evictable) remains */
want_iref = false
has_iref = true /* not yet cleared */
-> HAS_IREF transitions true -> false
-> returns inode pointer
spin_unlock(conn->lock)
/* BUG: return value discarded!
* iput() and fsnotify_put_sb_watched_objects()
* are never called */

Fix this by deferring the transition true -> false of HAS_IREF flag from
fsnotify_recalc_mask() (Thread A) to fsnotify_put_mark() (thread B).

Fixes: c3638b5b1374 ("fsnotify: allow adding an inode mark without pinning inode")
Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://patch.msgid.link/CAOQ4uxiPsbHb0o5voUKyPFMvBsDkG914FYDcs4C5UpBMNm0Vcg@mail.gmail.com
Signed-off-by: Jan Kara <jack@suse.cz>

authored by

Amir Goldstein and committed by
Jan Kara
4aca914a ae974ca6

+36 -3
+36 -3
fs/notify/mark.c
··· 238 238 return inode; 239 239 } 240 240 241 - static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 241 + /* 242 + * Calculate mask of events for a list of marks. 243 + * 244 + * Return true if any of the attached marks want to hold an inode reference. 245 + */ 246 + static bool __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 242 247 { 243 248 u32 new_mask = 0; 244 249 bool want_iref = false; ··· 266 261 * confusing readers not holding conn->lock with partial updates. 267 262 */ 268 263 WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); 264 + 265 + return want_iref; 266 + } 267 + 268 + /* 269 + * Calculate mask of events for a list of marks after attach/modify mark 270 + * and get an inode reference for the connector if needed. 271 + * 272 + * A concurrent add of evictable mark and detach of non-evictable mark can 273 + * lead to __fsnotify_recalc_mask() returning false want_iref, but in this 274 + * case we defer clearing iref to fsnotify_recalc_mask_clear_iref() called 275 + * from fsnotify_put_mark(). 276 + */ 277 + static void fsnotify_recalc_mask_set_iref(struct fsnotify_mark_connector *conn) 278 + { 279 + bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; 280 + bool want_iref = __fsnotify_recalc_mask(conn) || has_iref; 281 + 282 + (void) fsnotify_update_iref(conn, want_iref); 283 + } 284 + 285 + /* 286 + * Calculate mask of events for a list of marks after detach mark 287 + * and return the inode object if its reference is no longer needed. 288 + */ 289 + static void *fsnotify_recalc_mask_clear_iref(struct fsnotify_mark_connector *conn) 290 + { 291 + bool want_iref = __fsnotify_recalc_mask(conn); 269 292 270 293 return fsnotify_update_iref(conn, want_iref); 271 294 } ··· 331 298 332 299 spin_lock(&conn->lock); 333 300 update_children = !fsnotify_conn_watches_children(conn); 334 - __fsnotify_recalc_mask(conn); 301 + fsnotify_recalc_mask_set_iref(conn); 335 302 update_children &= fsnotify_conn_watches_children(conn); 336 303 spin_unlock(&conn->lock); 337 304 /* ··· 452 419 /* Update watched objects after detaching mark */ 453 420 if (sb) 454 421 fsnotify_update_sb_watchers(sb, conn); 455 - objp = __fsnotify_recalc_mask(conn); 422 + objp = fsnotify_recalc_mask_clear_iref(conn); 456 423 type = conn->type; 457 424 } 458 425 WRITE_ONCE(mark->connector, NULL);