Merge tag 'vfs-7.1-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

+3 -2

fs/aio.c

··· 422 422 423 423 ctx = rcu_dereference(table->table[i]); 424 424 if (ctx && ctx->aio_ring_file == file) { 425 - if (!atomic_read(&ctx->dead)) { 425 + if (!atomic_read(&ctx->dead) && 426 + (ctx->mmap_size == (vma->vm_end - vma->vm_start))) { 426 427 ctx->user_id = ctx->mmap_base = vma->vm_start; 427 428 res = 0; 428 429 } ··· 448 447 449 448 static int aio_ring_mmap_prepare(struct vm_area_desc *desc) 450 449 { 451 - vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT); 450 + vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT, VMA_DONTCOPY_BIT); 452 451 desc->vm_ops = &aio_ring_vm_ops; 453 452 return 0; 454 453 }

+8 -1

fs/buffer.c

··· 719 719 mark_buffer_dirty(bh); 720 720 if (!bh->b_mmb) { 721 721 spin_lock(&mmb->lock); 722 + /* 723 + * For a corrupted filesystem with multiply claimed blocks this 724 + * can fail. Avoid corrupting the linked list in that case. 725 + */ 726 + if (cmpxchg(&bh->b_mmb, NULL, mmb) != NULL) { 727 + spin_unlock(&mmb->lock); 728 + return; 729 + } 722 730 list_move_tail(&bh->b_assoc_buffers, &mmb->list); 723 - bh->b_mmb = mmb; 724 731 spin_unlock(&mmb->lock); 725 732 } 726 733 }

+78 -80

fs/eventpoll.c

··· 148 148 /* The file descriptor information this item refers to */ 149 149 struct epoll_filefd ffd; 150 150 151 - /* 152 - * Protected by file->f_lock, true for to-be-released epitem already 153 - * removed from the "struct file" items list; together with 154 - * eventpoll->refcount orchestrates "struct eventpoll" disposal 155 - */ 156 - bool dying; 157 - 158 151 /* List containing poll wait queues */ 159 152 struct eppoll_entry *pwqlist; 160 153 ··· 213 220 struct hlist_head refs; 214 221 u8 loop_check_depth; 215 222 216 - /* 217 - * usage count, used together with epitem->dying to 218 - * orchestrate the disposal of this struct 219 - */ 223 + /* usage count, orchestrates "struct eventpoll" disposal */ 220 224 refcount_t refcount; 221 225 222 226 /* used to defer freeing past ep_get_upwards_depth_proc() RCU walk */ ··· 817 827 } 818 828 819 829 /* 820 - * Removes a "struct epitem" from the eventpoll RB tree and deallocates 821 - * all the associated resources. Must be called with "mtx" held. 822 - * If the dying flag is set, do the removal only if force is true. 823 - * This prevents ep_clear_and_put() from dropping all the ep references 824 - * while running concurrently with eventpoll_release_file(). 825 - * Returns true if the eventpoll can be disposed. 830 + * The ffd.file pointer may be in the process of being torn down due to 831 + * being closed, but we may not have finished eventpoll_release() yet. 832 + * 833 + * Normally, even with the atomic_long_inc_not_zero, the file may have 834 + * been free'd and then gotten re-allocated to something else (since 835 + * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). 836 + * 837 + * But for epoll, users hold the ep->mtx mutex, and as such any file in 838 + * the process of being free'd will block in eventpoll_release_file() 839 + * and thus the underlying file allocation will not be free'd, and the 840 + * file re-use cannot happen. 841 + * 842 + * For the same reason we can avoid a rcu_read_lock() around the 843 + * operation - 'ffd.file' cannot go away even if the refcount has 844 + * reached zero (but we must still not call out to ->poll() functions 845 + * etc). 826 846 */ 827 - static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) 847 + static struct file *epi_fget(const struct epitem *epi) 828 848 { 829 - struct file *file = epi->ffd.file; 830 - struct epitems_head *to_free; 849 + struct file *file; 850 + 851 + file = epi->ffd.file; 852 + if (!file_ref_get(&file->f_ref)) 853 + file = NULL; 854 + return file; 855 + } 856 + 857 + /* 858 + * Takes &file->f_lock; returns with it released. 859 + */ 860 + static void ep_remove_file(struct eventpoll *ep, struct epitem *epi, 861 + struct file *file) 862 + { 863 + struct epitems_head *to_free = NULL; 831 864 struct hlist_head *head; 832 865 833 - lockdep_assert_irqs_enabled(); 866 + lockdep_assert_held(&ep->mtx); 834 867 835 - /* 836 - * Removes poll wait queue hooks. 837 - */ 838 - ep_unregister_pollwait(ep, epi); 839 - 840 - /* Remove the current item from the list of epoll hooks */ 841 868 spin_lock(&file->f_lock); 842 - if (epi->dying && !force) { 843 - spin_unlock(&file->f_lock); 844 - return false; 845 - } 846 - 847 - to_free = NULL; 848 869 head = file->f_ep; 849 - if (head->first == &epi->fllink && !epi->fllink.next) { 870 + if (hlist_is_singular_node(&epi->fllink, head)) { 850 871 /* See eventpoll_release() for details. */ 851 872 WRITE_ONCE(file->f_ep, NULL); 852 873 if (!is_file_epoll(file)) { ··· 870 869 hlist_del_rcu(&epi->fllink); 871 870 spin_unlock(&file->f_lock); 872 871 free_ephead(to_free); 872 + } 873 + 874 + static void ep_remove_epi(struct eventpoll *ep, struct epitem *epi) 875 + { 876 + lockdep_assert_held(&ep->mtx); 873 877 874 878 rb_erase_cached(&epi->rbn, &ep->rbr); 875 879 ··· 894 888 kfree_rcu(epi, rcu); 895 889 896 890 percpu_counter_dec(&ep->user->epoll_watches); 897 - return true; 898 891 } 899 892 900 893 /* 901 894 * ep_remove variant for callers owing an additional reference to the ep 902 895 */ 903 - static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) 896 + static void ep_remove(struct eventpoll *ep, struct epitem *epi) 904 897 { 905 - if (__ep_remove(ep, epi, false)) 906 - WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); 898 + struct file *file __free(fput) = NULL; 899 + 900 + lockdep_assert_irqs_enabled(); 901 + lockdep_assert_held(&ep->mtx); 902 + 903 + ep_unregister_pollwait(ep, epi); 904 + 905 + /* 906 + * If we manage to grab a reference it means we're not in 907 + * eventpoll_release_file() and aren't going to be: once @file's 908 + * refcount has reached zero, file_ref_get() cannot bring it back. 909 + */ 910 + file = epi_fget(epi); 911 + if (!file) 912 + return; 913 + 914 + ep_remove_file(ep, epi, file); 915 + ep_remove_epi(ep, epi); 916 + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); 907 917 } 908 918 909 919 static void ep_clear_and_put(struct eventpoll *ep) ··· 945 923 946 924 /* 947 925 * Walks through the whole tree and try to free each "struct epitem". 948 - * Note that ep_remove_safe() will not remove the epitem in case of a 926 + * Note that ep_remove() will not remove the epitem in case of a 949 927 * racing eventpoll_release_file(); the latter will do the removal. 950 928 * At this point we are sure no poll callbacks will be lingering around. 951 929 * Since we still own a reference to the eventpoll struct, the loop can't ··· 954 932 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) { 955 933 next = rb_next(rbp); 956 934 epi = rb_entry(rbp, struct epitem, rbn); 957 - ep_remove_safe(ep, epi); 935 + ep_remove(ep, epi); 958 936 cond_resched(); 959 937 } 960 938 ··· 1035 1013 } 1036 1014 1037 1015 /* 1038 - * The ffd.file pointer may be in the process of being torn down due to 1039 - * being closed, but we may not have finished eventpoll_release() yet. 1040 - * 1041 - * Normally, even with the atomic_long_inc_not_zero, the file may have 1042 - * been free'd and then gotten re-allocated to something else (since 1043 - * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). 1044 - * 1045 - * But for epoll, users hold the ep->mtx mutex, and as such any file in 1046 - * the process of being free'd will block in eventpoll_release_file() 1047 - * and thus the underlying file allocation will not be free'd, and the 1048 - * file re-use cannot happen. 1049 - * 1050 - * For the same reason we can avoid a rcu_read_lock() around the 1051 - * operation - 'ffd.file' cannot go away even if the refcount has 1052 - * reached zero (but we must still not call out to ->poll() functions 1053 - * etc). 1054 - */ 1055 - static struct file *epi_fget(const struct epitem *epi) 1056 - { 1057 - struct file *file; 1058 - 1059 - file = epi->ffd.file; 1060 - if (!file_ref_get(&file->f_ref)) 1061 - file = NULL; 1062 - return file; 1063 - } 1064 - 1065 - /* 1066 1016 * Differs from ep_eventpoll_poll() in that internal callers already have 1067 1017 * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() 1068 1018 * is correctly annotated. ··· 1111 1117 { 1112 1118 struct eventpoll *ep; 1113 1119 struct epitem *epi; 1114 - bool dispose; 1115 1120 1116 1121 /* 1117 - * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from 1118 - * touching the epitems list before eventpoll_release_file() can access 1119 - * the ep->mtx. 1122 + * A concurrent ep_remove() cannot outrace us: it pins @file via 1123 + * epi_fget(), which fails once __fput() has dropped the refcount 1124 + * to zero -- the path we're on. So any racing ep_remove() bails 1125 + * and leaves the epi for us to clean up here. 1120 1126 */ 1121 1127 again: 1122 1128 spin_lock(&file->f_lock); 1123 1129 if (file->f_ep && file->f_ep->first) { 1124 1130 epi = hlist_entry(file->f_ep->first, struct epitem, fllink); 1125 - epi->dying = true; 1126 1131 spin_unlock(&file->f_lock); 1127 1132 1128 1133 /* ··· 1130 1137 */ 1131 1138 ep = epi->ep; 1132 1139 mutex_lock(&ep->mtx); 1133 - dispose = __ep_remove(ep, epi, true); 1140 + 1141 + ep_unregister_pollwait(ep, epi); 1142 + 1143 + ep_remove_file(ep, epi, file); 1144 + ep_remove_epi(ep, epi); 1145 + 1134 1146 mutex_unlock(&ep->mtx); 1135 1147 1136 - if (dispose && ep_refcount_dec_and_test(ep)) 1148 + if (ep_refcount_dec_and_test(ep)) 1137 1149 ep_free(ep); 1138 1150 goto again; 1139 1151 } ··· 1617 1619 mutex_unlock(&tep->mtx); 1618 1620 1619 1621 /* 1620 - * ep_remove_safe() calls in the later error paths can't lead to 1622 + * ep_remove() calls in the later error paths can't lead to 1621 1623 * ep_free() as the ep file itself still holds an ep reference. 1622 1624 */ 1623 1625 ep_get(ep); 1624 1626 1625 1627 /* now check if we've created too many backpaths */ 1626 1628 if (unlikely(full_check && reverse_path_check())) { 1627 - ep_remove_safe(ep, epi); 1629 + ep_remove(ep, epi); 1628 1630 return -EINVAL; 1629 1631 } 1630 1632 1631 1633 if (epi->event.events & EPOLLWAKEUP) { 1632 1634 error = ep_create_wakeup_source(epi); 1633 1635 if (error) { 1634 - ep_remove_safe(ep, epi); 1636 + ep_remove(ep, epi); 1635 1637 return error; 1636 1638 } 1637 1639 } ··· 1655 1657 * high memory pressure. 1656 1658 */ 1657 1659 if (unlikely(!epq.epi)) { 1658 - ep_remove_safe(ep, epi); 1660 + ep_remove(ep, epi); 1659 1661 return -ENOMEM; 1660 1662 } 1661 1663 ··· 2350 2352 * The eventpoll itself is still alive: the refcount 2351 2353 * can't go to zero here. 2352 2354 */ 2353 - ep_remove_safe(ep, epi); 2355 + ep_remove(ep, epi); 2354 2356 error = 0; 2355 2357 } else { 2356 2358 error = -ENOENT;

+19 -17

fs/fs-writeback.c

··· 568 568 struct inode_switch_wbs_context *isw, *next_isw; 569 569 struct llist_node *list; 570 570 571 + list = llist_del_all(&new_wb->switch_wbs_ctxs); 571 572 /* 572 - * Grab out reference to wb so that it cannot get freed under us 573 + * Nothing to do? That would be a problem as references held by isw 574 + * items protect wb from freeing... 575 + */ 576 + if (WARN_ON_ONCE(!list)) 577 + return; 578 + 579 + /* 580 + * Grab our reference to wb so that it cannot get freed under us 573 581 * after we process all the isw items. 574 582 */ 575 583 wb_get(new_wb); 576 - while (1) { 577 - list = llist_del_all(&new_wb->switch_wbs_ctxs); 578 - /* Nothing to do? */ 579 - if (!list) 580 - break; 581 - /* 582 - * In addition to synchronizing among switchers, I_WB_SWITCH 583 - * tells the RCU protected stat update paths to grab the i_page 584 - * lock so that stat transfer can synchronize against them. 585 - * Let's continue after I_WB_SWITCH is guaranteed to be 586 - * visible. 587 - */ 588 - synchronize_rcu(); 584 + /* 585 + * In addition to synchronizing among switchers, I_WB_SWITCH 586 + * tells the RCU protected stat update paths to grab the i_page 587 + * lock so that stat transfer can synchronize against them. 588 + * Let's continue after I_WB_SWITCH is guaranteed to be 589 + * visible. 590 + */ 591 + synchronize_rcu(); 589 592 590 - llist_for_each_entry_safe(isw, next_isw, list, list) 591 - process_inode_switch_wbs(new_wb, isw); 592 - } 593 + llist_for_each_entry_safe(isw, next_isw, list, list) 594 + process_inode_switch_wbs(new_wb, isw); 593 595 wb_put(new_wb); 594 596 } 595 597

+4

fs/fuse/readdir.c

··· 41 41 unsigned int offset; 42 42 void *addr; 43 43 44 + /* Dirent doesn't fit in readdir cache page? Skip caching. */ 45 + if (reclen > PAGE_SIZE) 46 + return; 47 + 44 48 spin_lock(&fi->rdc.lock); 45 49 /* 46 50 * Is cache already completed? Or this entry does not go at the end of

+10 -6

include/linux/eventpoll.h

··· 39 39 { 40 40 41 41 /* 42 - * Fast check to avoid the get/release of the semaphore. Since 43 - * we're doing this outside the semaphore lock, it might return 44 - * false negatives, but we don't care. It'll help in 99.99% of cases 45 - * to avoid the semaphore lock. False positives simply cannot happen 46 - * because the file in on the way to be removed and nobody ( but 47 - * eventpoll ) has still a reference to this file. 42 + * Fast check to skip the slow path in the common case where the 43 + * file was never attached to an epoll. Safe without file->f_lock 44 + * because every f_ep writer excludes a concurrent __fput() on 45 + * @file: 46 + * - ep_insert() requires the file alive (refcount > 0); 47 + * - ep_remove() holds @file pinned via epi_fget() across the 48 + * write; 49 + * - eventpoll_release_file() runs from __fput() itself. 50 + * We are in __fput() here, so none of those can race us: a NULL 51 + * observation truly means no epoll path has work left on @file. 48 52 */ 49 53 if (likely(!READ_ONCE(file->f_ep))) 50 54 return;

+3 -3

include/linux/nstree.h

··· 61 61 62 62 /** 63 63 * ns_tree_add_raw - Add a namespace to a namespace 64 - * @ns: Namespace to add 64 + * @__ns: Namespace to add 65 65 * 66 66 * This function adds a namespace to the appropriate namespace tree 67 67 * without assigning a id. ··· 70 70 71 71 /** 72 72 * ns_tree_add - Add a namespace to a namespace tree 73 - * @ns: Namespace to add 73 + * @__ns: Namespace to add 74 74 * 75 75 * This function assigns a new id to the namespace and adds it to the 76 76 * appropriate namespace tree and list. ··· 81 81 82 82 /** 83 83 * ns_tree_remove - Remove a namespace from a namespace tree 84 - * @ns: Namespace to remove 84 + * @__ns: Namespace to remove 85 85 * 86 86 * This function removes a namespace from the appropriate namespace 87 87 * tree and list.

Configure Feed

Configure Feed