Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'fsnotify_for_v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:

- reduce overhead of fsnotify infrastructure when no permission events
are in use

- a few small cleanups

* tag 'fsnotify_for_v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
fsnotify: fix UAF from FS_ERROR event on a shutting down filesystem
fsnotify: optimize the case of no permission event watchers
fsnotify: use an enum for group priority constants
fsnotify: move s_fsnotify_connectors into fsnotify_sb_info
fsnotify: lazy attach fsnotify_sb_info state to sb
fsnotify: create helper fsnotify_update_sb_watchers()
fsnotify: pass object pointer and type to fsnotify mark helpers
fanotify: merge two checks regarding add of ignore mark
fsnotify: create a wrapper fsnotify_find_inode_mark()
fsnotify: create helpers to get sb and connp from object
fsnotify: rename fsnotify_{get,put}_sb_connectors()
fsnotify: Avoid -Wflex-array-member-not-at-end warning
fanotify: remove unneeded sub-zero check for unsigned value

+338 -220
+2 -2
fs/nfsd/filecache.c
··· 159 159 160 160 do { 161 161 fsnotify_group_lock(nfsd_file_fsnotify_group); 162 - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 163 - nfsd_file_fsnotify_group); 162 + mark = fsnotify_find_inode_mark(inode, 163 + nfsd_file_fsnotify_group); 164 164 if (mark) { 165 165 nfm = nfsd_file_mark_get(container_of(mark, 166 166 struct nfsd_file_mark,
+2 -2
fs/notify/dnotify/dnotify.c
··· 162 162 if (!S_ISDIR(inode->i_mode)) 163 163 return; 164 164 165 - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); 165 + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); 166 166 if (!fsn_mark) 167 167 return; 168 168 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); ··· 326 326 fsnotify_group_lock(dnotify_group); 327 327 328 328 /* add the new_fsn_mark or find an old one. */ 329 - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); 329 + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); 330 330 if (fsn_mark) { 331 331 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); 332 332 spin_lock(&fsn_mark->lock);
+43 -100
fs/notify/fanotify/fanotify_user.c
··· 502 502 } 503 503 504 504 /* Pad with 0's */ 505 - WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); 505 + WARN_ON_ONCE(len >= FANOTIFY_EVENT_ALIGN); 506 506 if (len > 0 && clear_user(buf, len)) 507 507 return -EFAULT; 508 508 ··· 1076 1076 } 1077 1077 1078 1078 static int fanotify_remove_mark(struct fsnotify_group *group, 1079 - fsnotify_connp_t *connp, __u32 mask, 1079 + void *obj, unsigned int obj_type, __u32 mask, 1080 1080 unsigned int flags, __u32 umask) 1081 1081 { 1082 1082 struct fsnotify_mark *fsn_mark = NULL; ··· 1084 1084 int destroy_mark; 1085 1085 1086 1086 fsnotify_group_lock(group); 1087 - fsn_mark = fsnotify_find_mark(connp, group); 1087 + fsn_mark = fsnotify_find_mark(obj, obj_type, group); 1088 1088 if (!fsn_mark) { 1089 1089 fsnotify_group_unlock(group); 1090 1090 return -ENOENT; ··· 1103 1103 /* matches the fsnotify_find_mark() */ 1104 1104 fsnotify_put_mark(fsn_mark); 1105 1105 return 0; 1106 - } 1107 - 1108 - static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, 1109 - struct vfsmount *mnt, __u32 mask, 1110 - unsigned int flags, __u32 umask) 1111 - { 1112 - return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 1113 - mask, flags, umask); 1114 - } 1115 - 1116 - static int fanotify_remove_sb_mark(struct fsnotify_group *group, 1117 - struct super_block *sb, __u32 mask, 1118 - unsigned int flags, __u32 umask) 1119 - { 1120 - return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, 1121 - flags, umask); 1122 - } 1123 - 1124 - static int fanotify_remove_inode_mark(struct fsnotify_group *group, 1125 - struct inode *inode, __u32 mask, 1126 - unsigned int flags, __u32 umask) 1127 - { 1128 - return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, 1129 - flags, umask); 1130 1106 } 1131 1107 1132 1108 static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, ··· 1225 1249 } 1226 1250 1227 1251 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, 1228 - fsnotify_connp_t *connp, 1252 + void *obj, 1229 1253 unsigned int obj_type, 1230 1254 unsigned int fan_flags, 1231 1255 struct fan_fsid *fsid) ··· 1264 1288 fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; 1265 1289 } 1266 1290 1267 - ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0); 1291 + ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0); 1268 1292 if (ret) 1269 1293 goto out_put_mark; 1270 1294 ··· 1320 1344 } 1321 1345 1322 1346 static int fanotify_add_mark(struct fsnotify_group *group, 1323 - fsnotify_connp_t *connp, unsigned int obj_type, 1347 + void *obj, unsigned int obj_type, 1324 1348 __u32 mask, unsigned int fan_flags, 1325 1349 struct fan_fsid *fsid) 1326 1350 { ··· 1329 1353 int ret = 0; 1330 1354 1331 1355 fsnotify_group_lock(group); 1332 - fsn_mark = fsnotify_find_mark(connp, group); 1356 + fsn_mark = fsnotify_find_mark(obj, obj_type, group); 1333 1357 if (!fsn_mark) { 1334 - fsn_mark = fanotify_add_new_mark(group, connp, obj_type, 1358 + fsn_mark = fanotify_add_new_mark(group, obj, obj_type, 1335 1359 fan_flags, fsid); 1336 1360 if (IS_ERR(fsn_mark)) { 1337 1361 fsnotify_group_unlock(group); ··· 1366 1390 1367 1391 fsnotify_put_mark(fsn_mark); 1368 1392 return ret; 1369 - } 1370 - 1371 - static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, 1372 - struct vfsmount *mnt, __u32 mask, 1373 - unsigned int flags, struct fan_fsid *fsid) 1374 - { 1375 - return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 1376 - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); 1377 - } 1378 - 1379 - static int fanotify_add_sb_mark(struct fsnotify_group *group, 1380 - struct super_block *sb, __u32 mask, 1381 - unsigned int flags, struct fan_fsid *fsid) 1382 - { 1383 - return fanotify_add_mark(group, &sb->s_fsnotify_marks, 1384 - FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); 1385 - } 1386 - 1387 - static int fanotify_add_inode_mark(struct fsnotify_group *group, 1388 - struct inode *inode, __u32 mask, 1389 - unsigned int flags, struct fan_fsid *fsid) 1390 - { 1391 - pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 1392 - 1393 - /* 1394 - * If some other task has this inode open for write we should not add 1395 - * an ignore mask, unless that ignore mask is supposed to survive 1396 - * modification changes anyway. 1397 - */ 1398 - if ((flags & FANOTIFY_MARK_IGNORE_BITS) && 1399 - !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && 1400 - inode_is_open_for_write(inode)) 1401 - return 0; 1402 - 1403 - return fanotify_add_mark(group, &inode->i_fsnotify_marks, 1404 - FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); 1405 1393 } 1406 1394 1407 1395 static struct fsnotify_event *fanotify_alloc_overflow_event(void) ··· 1516 1576 INIT_LIST_HEAD(&group->fanotify_data.access_list); 1517 1577 switch (class) { 1518 1578 case FAN_CLASS_NOTIF: 1519 - group->priority = FS_PRIO_0; 1579 + group->priority = FSNOTIFY_PRIO_NORMAL; 1520 1580 break; 1521 1581 case FAN_CLASS_CONTENT: 1522 - group->priority = FS_PRIO_1; 1582 + group->priority = FSNOTIFY_PRIO_CONTENT; 1523 1583 break; 1524 1584 case FAN_CLASS_PRE_CONTENT: 1525 - group->priority = FS_PRIO_2; 1585 + group->priority = FSNOTIFY_PRIO_PRE_CONTENT; 1526 1586 break; 1527 1587 default: 1528 1588 fd = -EINVAL; ··· 1690 1750 unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; 1691 1751 unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; 1692 1752 unsigned int obj_type, fid_mode; 1753 + void *obj; 1693 1754 u32 umask = 0; 1694 1755 int ret; 1695 1756 ··· 1774 1833 goto fput_and_out; 1775 1834 1776 1835 /* 1777 - * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 1778 - * allowed to set permissions events. 1836 + * Permission events require minimum priority FAN_CLASS_CONTENT. 1779 1837 */ 1780 1838 ret = -EINVAL; 1781 1839 if (mask & FANOTIFY_PERM_EVENTS && 1782 - group->priority == FS_PRIO_0) 1840 + group->priority < FSNOTIFY_PRIO_CONTENT) 1783 1841 goto fput_and_out; 1784 1842 1785 1843 if (mask & FAN_FS_ERROR && ··· 1848 1908 } 1849 1909 1850 1910 /* inode held in place by reference to path; group by fget on fd */ 1851 - if (mark_type == FAN_MARK_INODE) 1911 + if (mark_type == FAN_MARK_INODE) { 1852 1912 inode = path.dentry->d_inode; 1853 - else 1913 + obj = inode; 1914 + } else { 1854 1915 mnt = path.mnt; 1916 + if (mark_type == FAN_MARK_MOUNT) 1917 + obj = mnt; 1918 + else 1919 + obj = mnt->mnt_sb; 1920 + } 1855 1921 1856 - ret = mnt ? -EINVAL : -EISDIR; 1857 - /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ 1858 - if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && 1859 - (mnt || S_ISDIR(inode->i_mode)) && 1860 - !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) 1861 - goto path_put_and_out; 1922 + /* 1923 + * If some other task has this inode open for write we should not add 1924 + * an ignore mask, unless that ignore mask is supposed to survive 1925 + * modification changes anyway. 1926 + */ 1927 + if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && 1928 + !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { 1929 + ret = mnt ? -EINVAL : -EISDIR; 1930 + /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ 1931 + if (ignore == FAN_MARK_IGNORE && 1932 + (mnt || S_ISDIR(inode->i_mode))) 1933 + goto path_put_and_out; 1934 + 1935 + ret = 0; 1936 + if (inode && inode_is_open_for_write(inode)) 1937 + goto path_put_and_out; 1938 + } 1862 1939 1863 1940 /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ 1864 1941 if (mnt || !S_ISDIR(inode->i_mode)) { ··· 1893 1936 /* create/update an inode mark */ 1894 1937 switch (mark_cmd) { 1895 1938 case FAN_MARK_ADD: 1896 - if (mark_type == FAN_MARK_MOUNT) 1897 - ret = fanotify_add_vfsmount_mark(group, mnt, mask, 1898 - flags, fsid); 1899 - else if (mark_type == FAN_MARK_FILESYSTEM) 1900 - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, 1901 - flags, fsid); 1902 - else 1903 - ret = fanotify_add_inode_mark(group, inode, mask, 1904 - flags, fsid); 1939 + ret = fanotify_add_mark(group, obj, obj_type, mask, flags, 1940 + fsid); 1905 1941 break; 1906 1942 case FAN_MARK_REMOVE: 1907 - if (mark_type == FAN_MARK_MOUNT) 1908 - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, 1909 - flags, umask); 1910 - else if (mark_type == FAN_MARK_FILESYSTEM) 1911 - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, 1912 - flags, umask); 1913 - else 1914 - ret = fanotify_remove_inode_mark(group, inode, mask, 1915 - flags, umask); 1943 + ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, 1944 + umask); 1916 1945 break; 1917 1946 default: 1918 1947 ret = -EINVAL;
+8 -12
fs/notify/fdinfo.c
··· 41 41 #if defined(CONFIG_EXPORTFS) 42 42 static void show_mark_fhandle(struct seq_file *m, struct inode *inode) 43 43 { 44 - struct { 45 - struct file_handle handle; 46 - u8 pad[MAX_HANDLE_SZ]; 47 - } f; 44 + DEFINE_FLEX(struct file_handle, f, f_handle, handle_bytes, MAX_HANDLE_SZ); 48 45 int size, ret, i; 49 46 50 - f.handle.handle_bytes = sizeof(f.pad); 51 - size = f.handle.handle_bytes >> 2; 47 + size = f->handle_bytes >> 2; 52 48 53 - ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size); 49 + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); 54 50 if ((ret == FILEID_INVALID) || (ret < 0)) { 55 51 WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); 56 52 return; 57 53 } 58 54 59 - f.handle.handle_type = ret; 60 - f.handle.handle_bytes = size * sizeof(u32); 55 + f->handle_type = ret; 56 + f->handle_bytes = size * sizeof(u32); 61 57 62 58 seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", 63 - f.handle.handle_bytes, f.handle.handle_type); 59 + f->handle_bytes, f->handle_type); 64 60 65 - for (i = 0; i < f.handle.handle_bytes; i++) 66 - seq_printf(m, "%02x", (int)f.handle.f_handle[i]); 61 + for (i = 0; i < f->handle_bytes; i++) 62 + seq_printf(m, "%02x", (int)f->f_handle[i]); 67 63 } 68 64 #else 69 65 static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
+22 -5
fs/notify/fsnotify.c
··· 89 89 90 90 void fsnotify_sb_delete(struct super_block *sb) 91 91 { 92 + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 93 + 94 + /* Were any marks ever added to any object on this sb? */ 95 + if (!sbinfo) 96 + return; 97 + 92 98 fsnotify_unmount_inodes(sb); 93 99 fsnotify_clear_marks_by_sb(sb); 94 100 /* Wait for outstanding object references from connectors */ 95 - wait_var_event(&sb->s_fsnotify_connectors, 96 - !atomic_long_read(&sb->s_fsnotify_connectors)); 101 + wait_var_event(fsnotify_sb_watched_objects(sb), 102 + !atomic_long_read(fsnotify_sb_watched_objects(sb))); 103 + WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); 104 + WARN_ON(fsnotify_sb_has_priority_watchers(sb, 105 + FSNOTIFY_PRIO_PRE_CONTENT)); 106 + } 107 + 108 + void fsnotify_sb_free(struct super_block *sb) 109 + { 110 + kfree(sb->s_fsnotify_info); 97 111 } 98 112 99 113 /* ··· 503 489 { 504 490 const struct path *path = fsnotify_data_path(data, data_type); 505 491 struct super_block *sb = fsnotify_data_sb(data, data_type); 492 + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 506 493 struct fsnotify_iter_info iter_info = {}; 507 494 struct mount *mnt = NULL; 508 495 struct inode *inode2 = NULL; ··· 540 525 * SRCU because we have no references to any objects and do not 541 526 * need SRCU to keep them "alive". 542 527 */ 543 - if (!sb->s_fsnotify_marks && 528 + if ((!sbinfo || !sbinfo->sb_marks) && 544 529 (!mnt || !mnt->mnt_fsnotify_marks) && 545 530 (!inode || !inode->i_fsnotify_marks) && 546 531 (!inode2 || !inode2->i_fsnotify_marks)) ··· 567 552 568 553 iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 569 554 570 - iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = 571 - fsnotify_first_mark(&sb->s_fsnotify_marks); 555 + if (sbinfo) { 556 + iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = 557 + fsnotify_first_mark(&sbinfo->sb_marks); 558 + } 572 559 if (mnt) { 573 560 iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = 574 561 fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
+33 -14
fs/notify/fsnotify.h
··· 9 9 10 10 #include "../mount.h" 11 11 12 + /* 13 + * fsnotify_connp_t is what we embed in objects which connector can be attached 14 + * to. 15 + */ 16 + typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; 17 + 12 18 static inline struct inode *fsnotify_conn_inode( 13 19 struct fsnotify_mark_connector *conn) 14 20 { 15 - return container_of(conn->obj, struct inode, i_fsnotify_marks); 21 + return conn->obj; 16 22 } 17 23 18 24 static inline struct mount *fsnotify_conn_mount( 19 25 struct fsnotify_mark_connector *conn) 20 26 { 21 - return container_of(conn->obj, struct mount, mnt_fsnotify_marks); 27 + return real_mount(conn->obj); 22 28 } 23 29 24 30 static inline struct super_block *fsnotify_conn_sb( 25 31 struct fsnotify_mark_connector *conn) 26 32 { 27 - return container_of(conn->obj, struct super_block, s_fsnotify_marks); 33 + return conn->obj; 34 + } 35 + 36 + static inline struct super_block *fsnotify_object_sb(void *obj, 37 + enum fsnotify_obj_type obj_type) 38 + { 39 + switch (obj_type) { 40 + case FSNOTIFY_OBJ_TYPE_INODE: 41 + return ((struct inode *)obj)->i_sb; 42 + case FSNOTIFY_OBJ_TYPE_VFSMOUNT: 43 + return ((struct vfsmount *)obj)->mnt_sb; 44 + case FSNOTIFY_OBJ_TYPE_SB: 45 + return (struct super_block *)obj; 46 + default: 47 + return NULL; 48 + } 28 49 } 29 50 30 51 static inline struct super_block *fsnotify_connector_sb( 31 52 struct fsnotify_mark_connector *conn) 32 53 { 33 - switch (conn->type) { 34 - case FSNOTIFY_OBJ_TYPE_INODE: 35 - return fsnotify_conn_inode(conn)->i_sb; 36 - case FSNOTIFY_OBJ_TYPE_VFSMOUNT: 37 - return fsnotify_conn_mount(conn)->mnt.mnt_sb; 38 - case FSNOTIFY_OBJ_TYPE_SB: 39 - return fsnotify_conn_sb(conn); 40 - default: 41 - return NULL; 42 - } 54 + return fsnotify_object_sb(conn->obj, conn->type); 55 + } 56 + 57 + static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb) 58 + { 59 + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 60 + 61 + return sbinfo ? &sbinfo->sb_marks : NULL; 43 62 } 44 63 45 64 /* destroy all events sitting in this groups notification queue */ ··· 86 67 /* run the list of all marks associated with sb and destroy them */ 87 68 static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) 88 69 { 89 - fsnotify_destroy_marks(&sb->s_fsnotify_marks); 70 + fsnotify_destroy_marks(fsnotify_sb_marks(sb)); 90 71 } 91 72 92 73 /*
+1 -1
fs/notify/inotify/inotify_user.c
··· 544 544 int create = (arg & IN_MASK_CREATE); 545 545 int ret; 546 546 547 - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); 547 + fsn_mark = fsnotify_find_inode_mark(inode, group); 548 548 if (!fsn_mark) 549 549 return -ENOENT; 550 550 else if (create) {
+132 -42
fs/notify/mark.c
··· 97 97 refcount_inc(&mark->refcnt); 98 98 } 99 99 100 + static fsnotify_connp_t *fsnotify_object_connp(void *obj, 101 + enum fsnotify_obj_type obj_type) 102 + { 103 + switch (obj_type) { 104 + case FSNOTIFY_OBJ_TYPE_INODE: 105 + return &((struct inode *)obj)->i_fsnotify_marks; 106 + case FSNOTIFY_OBJ_TYPE_VFSMOUNT: 107 + return &real_mount(obj)->mnt_fsnotify_marks; 108 + case FSNOTIFY_OBJ_TYPE_SB: 109 + return fsnotify_sb_marks(obj); 110 + default: 111 + return NULL; 112 + } 113 + } 114 + 100 115 static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) 101 116 { 102 117 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) ··· 131 116 return *fsnotify_conn_mask_p(conn); 132 117 } 133 118 119 + static void fsnotify_get_sb_watched_objects(struct super_block *sb) 120 + { 121 + atomic_long_inc(fsnotify_sb_watched_objects(sb)); 122 + } 123 + 124 + static void fsnotify_put_sb_watched_objects(struct super_block *sb) 125 + { 126 + if (atomic_long_dec_and_test(fsnotify_sb_watched_objects(sb))) 127 + wake_up_var(fsnotify_sb_watched_objects(sb)); 128 + } 129 + 134 130 static void fsnotify_get_inode_ref(struct inode *inode) 135 131 { 136 132 ihold(inode); 137 - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); 133 + fsnotify_get_sb_watched_objects(inode->i_sb); 134 + } 135 + 136 + static void fsnotify_put_inode_ref(struct inode *inode) 137 + { 138 + fsnotify_put_sb_watched_objects(inode->i_sb); 139 + iput(inode); 140 + } 141 + 142 + /* 143 + * Grab or drop watched objects reference depending on whether the connector 144 + * is attached and has any marks attached. 145 + */ 146 + static void fsnotify_update_sb_watchers(struct super_block *sb, 147 + struct fsnotify_mark_connector *conn) 148 + { 149 + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 150 + bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; 151 + struct fsnotify_mark *first_mark = NULL; 152 + unsigned int highest_prio = 0; 153 + 154 + if (conn->obj) 155 + first_mark = hlist_entry_safe(conn->list.first, 156 + struct fsnotify_mark, obj_list); 157 + if (first_mark) 158 + highest_prio = first_mark->group->priority; 159 + if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM)) 160 + highest_prio = 0; 161 + 162 + /* 163 + * If the highest priority of group watching this object is prio, 164 + * then watched object has a reference on counters [0..prio]. 165 + * Update priority >= 1 watched objects counters. 166 + */ 167 + for (unsigned int p = conn->prio + 1; p <= highest_prio; p++) 168 + atomic_long_inc(&sbinfo->watched_objects[p]); 169 + for (unsigned int p = conn->prio; p > highest_prio; p--) 170 + atomic_long_dec(&sbinfo->watched_objects[p]); 171 + conn->prio = highest_prio; 172 + 173 + /* Update priority >= 0 (a.k.a total) watched objects counter */ 174 + BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0); 175 + if (first_mark && !is_watched) { 176 + conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; 177 + fsnotify_get_sb_watched_objects(sb); 178 + } else if (!first_mark && is_watched) { 179 + conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; 180 + fsnotify_put_sb_watched_objects(sb); 181 + } 138 182 } 139 183 140 184 /* ··· 287 213 } 288 214 } 289 215 290 - static void fsnotify_put_inode_ref(struct inode *inode) 291 - { 292 - struct super_block *sb = inode->i_sb; 293 - 294 - iput(inode); 295 - if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) 296 - wake_up_var(&sb->s_fsnotify_connectors); 297 - } 298 - 299 - static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn) 300 - { 301 - struct super_block *sb = fsnotify_connector_sb(conn); 302 - 303 - if (sb) 304 - atomic_long_inc(&sb->s_fsnotify_connectors); 305 - } 306 - 307 - static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn) 308 - { 309 - struct super_block *sb = fsnotify_connector_sb(conn); 310 - 311 - if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) 312 - wake_up_var(&sb->s_fsnotify_connectors); 313 - } 314 - 315 216 static void *fsnotify_detach_connector_from_object( 316 217 struct fsnotify_mark_connector *conn, 317 218 unsigned int *type) 318 219 { 220 + fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); 221 + struct super_block *sb = fsnotify_connector_sb(conn); 319 222 struct inode *inode = NULL; 320 223 321 224 *type = conn->type; ··· 312 261 fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; 313 262 } 314 263 315 - fsnotify_put_sb_connectors(conn); 316 - rcu_assign_pointer(*(conn->obj), NULL); 264 + rcu_assign_pointer(*connp, NULL); 317 265 conn->obj = NULL; 318 266 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 267 + fsnotify_update_sb_watchers(sb, conn); 319 268 320 269 return inode; 321 270 } ··· 367 316 objp = fsnotify_detach_connector_from_object(conn, &type); 368 317 free_conn = true; 369 318 } else { 319 + struct super_block *sb = fsnotify_connector_sb(conn); 320 + 321 + /* Update watched objects after detaching mark */ 322 + if (sb) 323 + fsnotify_update_sb_watchers(sb, conn); 370 324 objp = __fsnotify_recalc_mask(conn); 371 325 type = conn->type; 372 326 } ··· 592 536 return -1; 593 537 } 594 538 539 + static int fsnotify_attach_info_to_sb(struct super_block *sb) 540 + { 541 + struct fsnotify_sb_info *sbinfo; 542 + 543 + /* sb info is freed on fsnotify_sb_delete() */ 544 + sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); 545 + if (!sbinfo) 546 + return -ENOMEM; 547 + 548 + /* 549 + * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() 550 + * will observe an initialized structure 551 + */ 552 + if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { 553 + /* Someone else created sbinfo for us */ 554 + kfree(sbinfo); 555 + } 556 + return 0; 557 + } 558 + 595 559 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, 596 - unsigned int obj_type) 560 + void *obj, unsigned int obj_type) 597 561 { 598 562 struct fsnotify_mark_connector *conn; 599 563 ··· 623 547 spin_lock_init(&conn->lock); 624 548 INIT_HLIST_HEAD(&conn->list); 625 549 conn->flags = 0; 550 + conn->prio = 0; 626 551 conn->type = obj_type; 627 - conn->obj = connp; 628 - conn->flags = 0; 629 - fsnotify_get_sb_connectors(conn); 552 + conn->obj = obj; 630 553 631 554 /* 632 555 * cmpxchg() provides the barrier so that readers of *connp can see ··· 633 558 */ 634 559 if (cmpxchg(connp, NULL, conn)) { 635 560 /* Someone else created list structure for us */ 636 - fsnotify_put_sb_connectors(conn); 637 561 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 638 562 } 639 - 640 563 return 0; 641 564 } 642 565 ··· 671 598 * to which group and for which inodes. These marks are ordered according to 672 599 * priority, highest number first, and then by the group's location in memory. 673 600 */ 674 - static int fsnotify_add_mark_list(struct fsnotify_mark *mark, 675 - fsnotify_connp_t *connp, 601 + static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, 676 602 unsigned int obj_type, int add_flags) 677 603 { 604 + struct super_block *sb = fsnotify_object_sb(obj, obj_type); 678 605 struct fsnotify_mark *lmark, *last = NULL; 679 606 struct fsnotify_mark_connector *conn; 607 + fsnotify_connp_t *connp; 680 608 int cmp; 681 609 int err = 0; 682 610 683 611 if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) 684 612 return -EINVAL; 685 613 614 + /* 615 + * Attach the sb info before attaching a connector to any object on sb. 616 + * The sb info will remain attached as long as sb lives. 617 + */ 618 + if (!fsnotify_sb_info(sb)) { 619 + err = fsnotify_attach_info_to_sb(sb); 620 + if (err) 621 + return err; 622 + } 623 + 624 + connp = fsnotify_object_connp(obj, obj_type); 686 625 restart: 687 626 spin_lock(&mark->lock); 688 627 conn = fsnotify_grab_connector(connp); 689 628 if (!conn) { 690 629 spin_unlock(&mark->lock); 691 - err = fsnotify_attach_connector_to_object(connp, obj_type); 630 + err = fsnotify_attach_connector_to_object(connp, obj, obj_type); 692 631 if (err) 693 632 return err; 694 633 goto restart; ··· 734 649 /* mark should be the last entry. last is the current last entry */ 735 650 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 736 651 added: 652 + fsnotify_update_sb_watchers(sb, conn); 737 653 /* 738 654 * Since connector is attached to object using cmpxchg() we are 739 655 * guaranteed that connector initialization is fully visible by anyone ··· 753 667 * event types should be delivered to which group. 754 668 */ 755 669 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 756 - fsnotify_connp_t *connp, unsigned int obj_type, 670 + void *obj, unsigned int obj_type, 757 671 int add_flags) 758 672 { 759 673 struct fsnotify_group *group = mark->group; ··· 774 688 fsnotify_get_mark(mark); /* for g_list */ 775 689 spin_unlock(&mark->lock); 776 690 777 - ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags); 691 + ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags); 778 692 if (ret) 779 693 goto err; 780 694 ··· 792 706 return ret; 793 707 } 794 708 795 - int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, 709 + int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, 796 710 unsigned int obj_type, int add_flags) 797 711 { 798 712 int ret; 799 713 struct fsnotify_group *group = mark->group; 800 714 801 715 fsnotify_group_lock(group); 802 - ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags); 716 + ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags); 803 717 fsnotify_group_unlock(group); 804 718 return ret; 805 719 } ··· 809 723 * Given a list of marks, find the mark associated with given group. If found 810 724 * take a reference to that mark and return it, else return NULL. 811 725 */ 812 - struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, 726 + struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, 813 727 struct fsnotify_group *group) 814 728 { 729 + fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type); 815 730 struct fsnotify_mark_connector *conn; 816 731 struct fsnotify_mark *mark; 732 + 733 + if (!connp) 734 + return NULL; 817 735 818 736 conn = fsnotify_grab_connector(connp); 819 737 if (!conn)
+1
fs/super.c
··· 274 274 { 275 275 struct super_block *s = container_of(work, struct super_block, 276 276 destroy_work); 277 + fsnotify_sb_free(s); 277 278 security_sb_free(s); 278 279 put_user_ns(s->s_user_ns); 279 280 kfree(s->s_subtype);
+4 -10
include/linux/fs.h
··· 73 73 struct fscrypt_operations; 74 74 struct fsverity_info; 75 75 struct fsverity_operations; 76 + struct fsnotify_mark_connector; 77 + struct fsnotify_sb_info; 76 78 struct fs_context; 77 79 struct fs_parameter_spec; 78 80 struct fileattr; ··· 619 617 #define IOP_NOFOLLOW 0x0004 620 618 #define IOP_XATTR 0x0008 621 619 #define IOP_DEFAULT_READLINK 0x0010 622 - 623 - struct fsnotify_mark_connector; 624 620 625 621 /* 626 622 * Keep mostly read-only and often accessed (especially for ··· 1248 1248 1249 1249 /* 1250 1250 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and 1251 - * s_fsnotify_marks together for cache efficiency. They are frequently 1251 + * s_fsnotify_info together for cache efficiency. They are frequently 1252 1252 * accessed and rarely modified. 1253 1253 */ 1254 1254 void *s_fs_info; /* Filesystem private info */ ··· 1260 1260 time64_t s_time_max; 1261 1261 #ifdef CONFIG_FSNOTIFY 1262 1262 __u32 s_fsnotify_mask; 1263 - struct fsnotify_mark_connector __rcu *s_fsnotify_marks; 1263 + struct fsnotify_sb_info *s_fsnotify_info; 1264 1264 #endif 1265 1265 1266 1266 /* ··· 1300 1300 1301 1301 /* Number of inodes with nlink == 0 but still referenced */ 1302 1302 atomic_long_t s_remove_count; 1303 - 1304 - /* 1305 - * Number of inode/mount/sb objects that are being watched, note that 1306 - * inodes objects are currently double-accounted. 1307 - */ 1308 - atomic_long_t s_fsnotify_connectors; 1309 1303 1310 1304 /* Read-only state of the superblock is being changed */ 1311 1305 int s_readonly_remount;
+20 -1
include/linux/fsnotify.h
··· 17 17 #include <linux/slab.h> 18 18 #include <linux/bug.h> 19 19 20 + /* Are there any inode/mount/sb objects watched with priority prio or above? */ 21 + static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, 22 + int prio) 23 + { 24 + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 25 + 26 + /* Were any marks ever added to any object on this sb? */ 27 + if (!sbinfo) 28 + return false; 29 + 30 + return atomic_long_read(&sbinfo->watched_objects[prio]); 31 + } 32 + 20 33 /* Are there any inode/mount/sb objects that are being watched at all? */ 21 34 static inline bool fsnotify_sb_has_watchers(struct super_block *sb) 22 35 { 23 - return atomic_long_read(&sb->s_fsnotify_connectors); 36 + return fsnotify_sb_has_priority_watchers(sb, 0); 24 37 } 25 38 26 39 /* ··· 116 103 return 0; 117 104 118 105 path = &file->f_path; 106 + /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ 107 + if (mask & ALL_FSNOTIFY_PERM_EVENTS && 108 + !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, 109 + FSNOTIFY_PRIO_CONTENT)) 110 + return 0; 111 + 119 112 return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); 120 113 } 121 114
+68 -29
include/linux/fsnotify_backend.h
··· 177 177 }; 178 178 179 179 /* 180 + * fsnotify group priorities. 181 + * Events are sent in order from highest priority to lowest priority. 182 + */ 183 + enum fsnotify_group_prio { 184 + FSNOTIFY_PRIO_NORMAL = 0, /* normal notifiers, no permissions */ 185 + FSNOTIFY_PRIO_CONTENT, /* fanotify permission events */ 186 + FSNOTIFY_PRIO_PRE_CONTENT, /* fanotify pre-content events */ 187 + __FSNOTIFY_PRIO_NUM 188 + }; 189 + 190 + /* 180 191 * A group is a "thing" that wants to receive notification about filesystem 181 192 * events. The mask holds the subset of event types this group cares about. 182 193 * refcnt on a group is up to the implementor and at any moment if it goes 0 ··· 212 201 wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ 213 202 unsigned int q_len; /* events on the queue */ 214 203 unsigned int max_events; /* maximum events allowed on the list */ 215 - /* 216 - * Valid fsnotify group priorities. Events are send in order from highest 217 - * priority to lowest priority. We default to the lowest priority. 218 - */ 219 - #define FS_PRIO_0 0 /* normal notifiers, no permissions */ 220 - #define FS_PRIO_1 1 /* fanotify content based access control */ 221 - #define FS_PRIO_2 2 /* fanotify pre-content access */ 222 - unsigned int priority; 204 + enum fsnotify_group_prio priority; /* priority for sending events */ 223 205 bool shutdown; /* group is being shut down, don't queue more events */ 224 206 225 207 #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ ··· 461 457 type++) 462 458 463 459 /* 464 - * fsnotify_connp_t is what we embed in objects which connector can be attached 465 - * to. fsnotify_connp_t * is how we refer from connector back to object. 466 - */ 467 - struct fsnotify_mark_connector; 468 - typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; 469 - 470 - /* 471 460 * Inode/vfsmount/sb point to this structure which tracks all marks attached to 472 461 * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this 473 462 * structure. We destroy this structure when there are no more marks attached ··· 468 471 */ 469 472 struct fsnotify_mark_connector { 470 473 spinlock_t lock; 471 - unsigned short type; /* Type of object [lock] */ 474 + unsigned char type; /* Type of object [lock] */ 475 + unsigned char prio; /* Highest priority group */ 476 + #define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01 472 477 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 473 478 unsigned short flags; /* flags [lock] */ 474 479 union { 475 480 /* Object pointer [lock] */ 476 - fsnotify_connp_t *obj; 481 + void *obj; 477 482 /* Used listing heads to free after srcu period expires */ 478 483 struct fsnotify_mark_connector *destroy_next; 479 484 }; 480 485 struct hlist_head list; 481 486 }; 487 + 488 + /* 489 + * Container for per-sb fsnotify state (sb marks and more). 490 + * Attached lazily on first marked object on the sb and freed when killing sb. 491 + */ 492 + struct fsnotify_sb_info { 493 + struct fsnotify_mark_connector __rcu *sb_marks; 494 + /* 495 + * Number of inode/mount/sb objects that are being watched in this sb. 496 + * Note that inodes objects are currently double-accounted. 497 + * 498 + * The value in watched_objects[prio] is the number of objects that are 499 + * watched by groups of priority >= prio, so watched_objects[0] is the 500 + * total number of watched objects in this sb. 501 + */ 502 + atomic_long_t watched_objects[__FSNOTIFY_PRIO_NUM]; 503 + }; 504 + 505 + static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) 506 + { 507 + #ifdef CONFIG_FSNOTIFY 508 + return READ_ONCE(sb->s_fsnotify_info); 509 + #else 510 + return NULL; 511 + #endif 512 + } 513 + 514 + static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) 515 + { 516 + return &fsnotify_sb_info(sb)->watched_objects[0]; 517 + } 482 518 483 519 /* 484 520 * A mark is simply an object attached to an in core inode which allows an ··· 576 546 extern void __fsnotify_inode_delete(struct inode *inode); 577 547 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); 578 548 extern void fsnotify_sb_delete(struct super_block *sb); 549 + extern void fsnotify_sb_free(struct super_block *sb); 579 550 extern u32 fsnotify_get_cookie(void); 580 551 581 552 static inline __u32 fsnotify_parent_needed_mask(__u32 mask) ··· 789 758 extern void fsnotify_init_mark(struct fsnotify_mark *mark, 790 759 struct fsnotify_group *group); 791 760 /* Find mark belonging to given group in the list of marks */ 792 - extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, 793 - struct fsnotify_group *group); 761 + struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, 762 + struct fsnotify_group *group); 794 763 /* attach the mark to the object */ 795 - extern int fsnotify_add_mark(struct fsnotify_mark *mark, 796 - fsnotify_connp_t *connp, unsigned int obj_type, 797 - int add_flags); 798 - extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 799 - fsnotify_connp_t *connp, 800 - unsigned int obj_type, int add_flags); 764 + int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, 765 + unsigned int obj_type, int add_flags); 766 + int fsnotify_add_mark_locked(struct fsnotify_mark *mark, void *obj, 767 + unsigned int obj_type, int add_flags); 801 768 802 769 /* attach the mark to the inode */ 803 770 static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, 804 771 struct inode *inode, 805 772 int add_flags) 806 773 { 807 - return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, 808 - FSNOTIFY_OBJ_TYPE_INODE, add_flags); 774 + return fsnotify_add_mark(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, 775 + add_flags); 809 776 } 810 777 static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, 811 778 struct inode *inode, 812 779 int add_flags) 813 780 { 814 - return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, 815 - FSNOTIFY_OBJ_TYPE_INODE, add_flags); 781 + return fsnotify_add_mark_locked(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, 782 + add_flags); 783 + } 784 + 785 + static inline struct fsnotify_mark *fsnotify_find_inode_mark( 786 + struct inode *inode, 787 + struct fsnotify_group *group) 788 + { 789 + return fsnotify_find_mark(inode, FSNOTIFY_OBJ_TYPE_INODE, group); 816 790 } 817 791 818 792 /* given a group and a mark, flag mark to be freed when all references are dropped */ ··· 879 843 {} 880 844 881 845 static inline void fsnotify_sb_delete(struct super_block *sb) 846 + {} 847 + 848 + static inline void fsnotify_sb_free(struct super_block *sb) 882 849 {} 883 850 884 851 static inline void fsnotify_update_flags(struct dentry *dentry)
+1 -1
kernel/audit_tree.c
··· 463 463 int n; 464 464 465 465 fsnotify_group_lock(audit_tree_group); 466 - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group); 466 + mark = fsnotify_find_inode_mark(inode, audit_tree_group); 467 467 if (!mark) 468 468 return create_chunk(inode, tree); 469 469
+1 -1
kernel/audit_watch.c
··· 90 90 struct audit_parent *parent = NULL; 91 91 struct fsnotify_mark *entry; 92 92 93 - entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_watch_group); 93 + entry = fsnotify_find_inode_mark(inode, audit_watch_group); 94 94 if (entry) 95 95 parent = container_of(entry, struct audit_parent, mark); 96 96