Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fsnotify: use hash table for faster events merge

In order to improve event merge performance, hash events in a 128 size
hash table by the event merge key.

The fanotify_event size grows by two pointers, but we just reduced its
size by removing the objectid member, so overall its size is increased
by one pointer.

Permission events and overflow event are not merged so they are also
not hashed.

Link: https://lore.kernel.org/r/20210304104826.3993892-5-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>

authored by

Amir Goldstein and committed by
Jan Kara
94e00d28 7e3e5c69

+123 -20
+33 -7
fs/notify/fanotify/fanotify.c
··· 149 149 } 150 150 151 151 /* and the list better be locked by something too! */ 152 - static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) 152 + static int fanotify_merge(struct fsnotify_group *group, 153 + struct fsnotify_event *event) 153 154 { 154 - struct fsnotify_event *test_event; 155 155 struct fanotify_event *old, *new = FANOTIFY_E(event); 156 + unsigned int bucket = fanotify_event_hash_bucket(group, new); 157 + struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; 156 158 157 - pr_debug("%s: list=%p event=%p\n", __func__, list, event); 159 + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, 160 + group, event, bucket); 158 161 159 162 /* 160 163 * Don't merge a permission event with any other event so that we know ··· 167 164 if (fanotify_is_perm_event(new->mask)) 168 165 return 0; 169 166 170 - list_for_each_entry_reverse(test_event, list, list) { 171 - old = FANOTIFY_E(test_event); 167 + hlist_for_each_entry(old, hlist, merge_list) { 172 168 if (fanotify_should_merge(old, new)) { 173 169 old->mask |= new->mask; 174 170 return 1; ··· 205 203 return ret; 206 204 } 207 205 /* Event not yet reported? Just remove it. */ 208 - if (event->state == FAN_EVENT_INIT) 206 + if (event->state == FAN_EVENT_INIT) { 209 207 fsnotify_remove_queued_event(group, &event->fae.fse); 208 + /* Permission events are not supposed to be hashed */ 209 + WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list)); 210 + } 210 211 /* 211 212 * Event may be also answered in case signal delivery raced 212 213 * with wakeup. In that case we have nothing to do besides ··· 684 679 return fsid; 685 680 } 686 681 682 + /* 683 + * Add an event to hash table for faster merge. 684 + */ 685 + static void fanotify_insert_event(struct fsnotify_group *group, 686 + struct fsnotify_event *fsn_event) 687 + { 688 + struct fanotify_event *event = FANOTIFY_E(fsn_event); 689 + unsigned int bucket = fanotify_event_hash_bucket(group, event); 690 + struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; 691 + 692 + assert_spin_locked(&group->notification_lock); 693 + 694 + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, 695 + group, event, bucket); 696 + 697 + hlist_add_head(&event->merge_list, hlist); 698 + } 699 + 687 700 static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, 688 701 const void *data, int data_type, 689 702 struct inode *dir, ··· 772 749 } 773 750 774 751 fsn_event = &event->fse; 775 - ret = fsnotify_add_event(group, fsn_event, fanotify_merge); 752 + ret = fsnotify_add_event(group, fsn_event, fanotify_merge, 753 + fanotify_is_hashed_event(mask) ? 754 + fanotify_insert_event : NULL); 776 755 if (ret) { 777 756 /* Permission events shouldn't be merged */ 778 757 BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); ··· 797 772 { 798 773 struct user_struct *user; 799 774 775 + kfree(group->fanotify_data.merge_hash); 800 776 user = group->fanotify_data.user; 801 777 atomic_dec(&user->fanotify_listeners); 802 778 free_uid(user);
+25
fs/notify/fanotify/fanotify.h
··· 3 3 #include <linux/path.h> 4 4 #include <linux/slab.h> 5 5 #include <linux/exportfs.h> 6 + #include <linux/hashtable.h> 6 7 7 8 extern struct kmem_cache *fanotify_mark_cache; 8 9 extern struct kmem_cache *fanotify_fid_event_cachep; ··· 151 150 152 151 struct fanotify_event { 153 152 struct fsnotify_event fse; 153 + struct hlist_node merge_list; /* List for hashed merge */ 154 154 u32 mask; 155 155 struct { 156 156 unsigned int type : FANOTIFY_EVENT_TYPE_BITS; ··· 164 162 unsigned int hash, u32 mask) 165 163 { 166 164 fsnotify_init_event(&event->fse); 165 + INIT_HLIST_NODE(&event->merge_list); 167 166 event->hash = hash; 168 167 event->mask = mask; 169 168 event->pid = NULL; ··· 301 298 return &FANOTIFY_PERM(event)->path; 302 299 else 303 300 return NULL; 301 + } 302 + 303 + /* 304 + * Use 128 size hash table to speed up events merge. 305 + */ 306 + #define FANOTIFY_HTABLE_BITS (7) 307 + #define FANOTIFY_HTABLE_SIZE (1 << FANOTIFY_HTABLE_BITS) 308 + #define FANOTIFY_HTABLE_MASK (FANOTIFY_HTABLE_SIZE - 1) 309 + 310 + /* 311 + * Permission events and overflow event do not get merged - don't hash them. 312 + */ 313 + static inline bool fanotify_is_hashed_event(u32 mask) 314 + { 315 + return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW); 316 + } 317 + 318 + static inline unsigned int fanotify_event_hash_bucket( 319 + struct fsnotify_group *group, 320 + struct fanotify_event *event) 321 + { 322 + return event->hash & FANOTIFY_HTABLE_MASK; 304 323 }
+39
fs/notify/fanotify/fanotify_user.c
··· 90 90 } 91 91 92 92 /* 93 + * Remove an hashed event from merge hash table. 94 + */ 95 + static void fanotify_unhash_event(struct fsnotify_group *group, 96 + struct fanotify_event *event) 97 + { 98 + assert_spin_locked(&group->notification_lock); 99 + 100 + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, 101 + group, event, fanotify_event_hash_bucket(group, event)); 102 + 103 + if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) 104 + return; 105 + 106 + hlist_del_init(&event->merge_list); 107 + } 108 + 109 + /* 93 110 * Get an fanotify notification event if one exists and is small 94 111 * enough to fit in "count". Return an error pointer if the count 95 112 * is not large enough. When permission event is dequeued, its state is ··· 143 126 fsnotify_remove_first_event(group); 144 127 if (fanotify_is_perm_event(event->mask)) 145 128 FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; 129 + if (fanotify_is_hashed_event(event->mask)) 130 + fanotify_unhash_event(group, event); 146 131 out: 147 132 spin_unlock(&group->notification_lock); 148 133 return event; ··· 944 925 return &oevent->fse; 945 926 } 946 927 928 + static struct hlist_head *fanotify_alloc_merge_hash(void) 929 + { 930 + struct hlist_head *hash; 931 + 932 + hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, 933 + GFP_KERNEL_ACCOUNT); 934 + if (!hash) 935 + return NULL; 936 + 937 + __hash_init(hash, FANOTIFY_HTABLE_SIZE); 938 + 939 + return hash; 940 + } 941 + 947 942 /* fanotify syscalls */ 948 943 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) 949 944 { ··· 1025 992 group->fanotify_data.flags = flags; 1026 993 atomic_inc(&user->fanotify_listeners); 1027 994 group->memcg = get_mem_cgroup_from_mm(current->mm); 995 + 996 + group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); 997 + if (!group->fanotify_data.merge_hash) { 998 + fd = -ENOMEM; 999 + goto out_destroy_group; 1000 + } 1028 1001 1029 1002 group->overflow_event = fanotify_alloc_overflow_event(); 1030 1003 if (unlikely(!group->overflow_event)) {
+4 -3
fs/notify/inotify/inotify_fsnotify.c
··· 46 46 return false; 47 47 } 48 48 49 - static int inotify_merge(struct list_head *list, 50 - struct fsnotify_event *event) 49 + static int inotify_merge(struct fsnotify_group *group, 50 + struct fsnotify_event *event) 51 51 { 52 + struct list_head *list = &group->notification_list; 52 53 struct fsnotify_event *last_event; 53 54 54 55 last_event = list_entry(list->prev, struct fsnotify_event, list); ··· 116 115 if (len) 117 116 strcpy(event->name, name->name); 118 117 119 - ret = fsnotify_add_event(group, fsn_event, inotify_merge); 118 + ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL); 120 119 if (ret) { 121 120 /* Our event wasn't used in the end. Free it. */ 122 121 fsnotify_destroy_event(group, fsn_event);
+15 -7
fs/notify/notification.c
··· 68 68 } 69 69 70 70 /* 71 - * Add an event to the group notification queue. The group can later pull this 72 - * event off the queue to deal with. The function returns 0 if the event was 73 - * added to the queue, 1 if the event was merged with some other queued event, 71 + * Try to add an event to the notification queue. 72 + * The group can later pull this event off the queue to deal with. 73 + * The group can use the @merge hook to merge the event with a queued event. 74 + * The group can use the @insert hook to insert the event into hash table. 75 + * The function returns: 76 + * 0 if the event was added to a queue 77 + * 1 if the event was merged with some other queued event 74 78 * 2 if the event was not queued - either the queue of events has overflown 75 - * or the group is shutting down. 79 + * or the group is shutting down. 76 80 */ 77 81 int fsnotify_add_event(struct fsnotify_group *group, 78 82 struct fsnotify_event *event, 79 - int (*merge)(struct list_head *, 80 - struct fsnotify_event *)) 83 + int (*merge)(struct fsnotify_group *, 84 + struct fsnotify_event *), 85 + void (*insert)(struct fsnotify_group *, 86 + struct fsnotify_event *)) 81 87 { 82 88 int ret = 0; 83 89 struct list_head *list = &group->notification_list; ··· 110 104 } 111 105 112 106 if (!list_empty(list) && merge) { 113 - ret = merge(list, event); 107 + ret = merge(group, event); 114 108 if (ret) { 115 109 spin_unlock(&group->notification_lock); 116 110 return ret; ··· 120 114 queue: 121 115 group->q_len++; 122 116 list_add_tail(&event->list, list); 117 + if (insert) 118 + insert(group, event); 123 119 spin_unlock(&group->notification_lock); 124 120 125 121 wake_up(&group->notification_waitq);
+7 -3
include/linux/fsnotify_backend.h
··· 233 233 #endif 234 234 #ifdef CONFIG_FANOTIFY 235 235 struct fanotify_group_private_data { 236 + /* Hash table of events for merge */ 237 + struct hlist_head *merge_hash; 236 238 /* allows a group to block waiting for a userspace response */ 237 239 struct list_head access_list; 238 240 wait_queue_head_t access_waitq; ··· 488 486 /* attach the event to the group notification queue */ 489 487 extern int fsnotify_add_event(struct fsnotify_group *group, 490 488 struct fsnotify_event *event, 491 - int (*merge)(struct list_head *, 492 - struct fsnotify_event *)); 489 + int (*merge)(struct fsnotify_group *, 490 + struct fsnotify_event *), 491 + void (*insert)(struct fsnotify_group *, 492 + struct fsnotify_event *)); 493 493 /* Queue overflow event to a notification group */ 494 494 static inline void fsnotify_queue_overflow(struct fsnotify_group *group) 495 495 { 496 - fsnotify_add_event(group, group->overflow_event, NULL); 496 + fsnotify_add_event(group, group->overflow_event, NULL, NULL); 497 497 } 498 498 499 499 static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)