Merge branch 'akpm' (patches from Andrew)

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"13 patches.

Subsystems affected by this patch series: resource, squashfs, hfsplus,
modprobe, and mm (hugetlb, slub, userfaultfd, ksm, pagealloc, kasan,
pagemap, and ioremap)"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm/ioremap: fix iomap_max_page_shift
docs: admin-guide: update description for kernel.modprobe sysctl
hfsplus: prevent corruption in shrinking truncate
mm/filemap: fix readahead return types
kasan: fix unit tests with CONFIG_UBSAN_LOCAL_BOUNDS enabled
mm: fix struct page layout on 32-bit systems
ksm: revert "use GET_KSM_PAGE_NOLOCK to get ksm page in remove_rmap_item_from_tree()"
userfaultfd: release page in error path to avoid BUG_ON
squashfs: fix divide error in calculate_skip()
kernel/resource: fix return code check in __request_free_mem_region
mm, slub: move slub_debug static key enabling outside slab_mutex
mm/hugetlb: fix cow where page writtable in child
mm/hugetlb: fix F_SEAL_FUTURE_WRITE

Linus Torvalds 5 years ago a4147415 f36edc55

+129 -62

18 changed files

expand all collapse all

Documentation

admin-guide

sysctl

kernel.rst

hfsplus

extents.c

hugetlbfs

inode.c

iomap

buffered-io.c

squashfs

file.c

include

linux

mm.h

mm_types.h

pagemap.h

net

page_pool.h

kernel

resource.c

lib

test_kasan.c

hugetlb.c

ioremap.c

ksm.c

shmem.c

slab_common.c

slub.c

net

core

page_pool.c

+5 -4

Documentation/admin-guide/sysctl/kernel.rst

reviewed

··· 483 483 ======== 484 484 485 485 The full path to the usermode helper for autoloading kernel modules, 486 486 - by default "/sbin/modprobe". This binary is executed when the kernel 487 487 - requests a module. For example, if userspace passes an unknown 488 488 - filesystem type to mount(), then the kernel will automatically request 489 489 - the corresponding filesystem module by executing this usermode helper. 486 486 + by default ``CONFIG_MODPROBE_PATH``, which in turn defaults to 487 487 + "/sbin/modprobe". This binary is executed when the kernel requests a 488 488 + module. For example, if userspace passes an unknown filesystem type 489 489 + to mount(), then the kernel will automatically request the 490 490 + corresponding filesystem module by executing this usermode helper. 490 491 This usermode helper should insert the needed module into the kernel. 491 492 492 493 This sysctl only affects module autoloading. It has no effect on the

+4 -3

fs/hfsplus/extents.c

reviewed

··· 598 598 res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); 599 599 if (res) 600 600 break; 601 601 - hfs_brec_remove(&fd); 602 601 603 603 - mutex_unlock(&fd.tree->tree_lock); 604 602 start = hip->cached_start; 603 603 + if (blk_cnt <= start) 604 604 + hfs_brec_remove(&fd); 605 605 + mutex_unlock(&fd.tree->tree_lock); 605 606 hfsplus_free_extents(sb, hip->cached_extents, 606 607 alloc_cnt - start, alloc_cnt - blk_cnt); 607 608 hfsplus_dump_extent(hip->cached_extents); 609 609 + mutex_lock(&fd.tree->tree_lock); 608 610 if (blk_cnt > start) { 609 611 hip->extent_state |= HFSPLUS_EXT_DIRTY; 610 612 break; ··· 614 612 alloc_cnt = start; 615 613 hip->cached_start = hip->cached_blocks = 0; 616 614 hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); 617 617 - mutex_lock(&fd.tree->tree_lock); 618 615 } 619 616 hfs_find_exit(&fd); 620 617

fs/hugetlbfs/inode.c

reviewed

··· 131 131 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 132 132 { 133 133 struct inode *inode = file_inode(file); 134 134 + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); 134 135 loff_t len, vma_len; 135 136 int ret; 136 137 struct hstate *h = hstate_file(file); ··· 146 145 */ 147 146 vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; 148 147 vma->vm_ops = &hugetlb_vm_ops; 148 148 + 149 149 + ret = seal_check_future_write(info->seals, vma); 150 150 + if (ret) 151 151 + return ret; 149 152 150 153 /* 151 154 * page based offset in vm_pgoff could be sufficiently large to

+2 -2

fs/iomap/buffered-io.c

reviewed

··· 394 394 { 395 395 struct inode *inode = rac->mapping->host; 396 396 loff_t pos = readahead_pos(rac); 397 397 - loff_t length = readahead_length(rac); 397 397 + size_t length = readahead_length(rac); 398 398 struct iomap_readpage_ctx ctx = { 399 399 .rac = rac, 400 400 }; ··· 402 402 trace_iomap_readahead(inode, readahead_count(rac)); 403 403 404 404 while (length > 0) { 405 405 - loff_t ret = iomap_apply(inode, pos, length, 0, ops, 405 405 + ssize_t ret = iomap_apply(inode, pos, length, 0, ops, 406 406 &ctx, iomap_readahead_actor); 407 407 if (ret <= 0) { 408 408 WARN_ON_ONCE(ret == 0);

+3 -3

fs/squashfs/file.c

reviewed

··· 211 211 * If the skip factor is limited in this way then the file will use multiple 212 212 * slots. 213 213 */ 214 214 - static inline int calculate_skip(int blocks) 214 214 + static inline int calculate_skip(u64 blocks) 215 215 { 216 216 - int skip = blocks / ((SQUASHFS_META_ENTRIES + 1) 216 216 + u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1) 217 217 * SQUASHFS_META_INDEXES); 218 218 - return min(SQUASHFS_CACHED_BLKS - 1, skip + 1); 218 218 + return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1); 219 219 } 220 220 221 221

+32

include/linux/mm.h

reviewed

··· 3216 3216 static inline void mem_dump_obj(void *object) {} 3217 3217 #endif 3218 3218 3219 3219 + /** 3220 3220 + * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it 3221 3221 + * @seals: the seals to check 3222 3222 + * @vma: the vma to operate on 3223 3223 + * 3224 3224 + * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on 3225 3225 + * the vma flags. Return 0 if check pass, or <0 for errors. 3226 3226 + */ 3227 3227 + static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) 3228 3228 + { 3229 3229 + if (seals & F_SEAL_FUTURE_WRITE) { 3230 3230 + /* 3231 3231 + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when 3232 3232 + * "future write" seal active. 3233 3233 + */ 3234 3234 + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) 3235 3235 + return -EPERM; 3236 3236 + 3237 3237 + /* 3238 3238 + * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as 3239 3239 + * MAP_SHARED and read-only, take care to not allow mprotect to 3240 3240 + * revert protections on such mappings. Do this only for shared 3241 3241 + * mappings. For private mappings, don't need to mask 3242 3242 + * VM_MAYWRITE as we still want them to be COW-writable. 3243 3243 + */ 3244 3244 + if (vma->vm_flags & VM_SHARED) 3245 3245 + vma->vm_flags &= ~(VM_MAYWRITE); 3246 3246 + } 3247 3247 + 3248 3248 + return 0; 3249 3249 + } 3250 3250 + 3219 3251 #endif /* __KERNEL__ */ 3220 3252 #endif /* _LINUX_MM_H */

+2 -2

include/linux/mm_types.h

reviewed

··· 97 97 }; 98 98 struct { /* page_pool used by netstack */ 99 99 /** 100 100 - * @dma_addr: might require a 64-bit value even on 100 100 + * @dma_addr: might require a 64-bit value on 101 101 * 32-bit architectures. 102 102 */ 103 103 - dma_addr_t dma_addr; 103 103 + unsigned long dma_addr[2]; 104 104 }; 105 105 struct { /* slab, slob and slub */ 106 106 union {

+3 -3

include/linux/pagemap.h

reviewed

··· 997 997 * readahead_length - The number of bytes in this readahead request. 998 998 * @rac: The readahead request. 999 999 */ 1000 1000 - static inline loff_t readahead_length(struct readahead_control *rac) 1000 1000 + static inline size_t readahead_length(struct readahead_control *rac) 1001 1001 { 1002 1002 - return (loff_t)rac->_nr_pages * PAGE_SIZE; 1002 1002 + return rac->_nr_pages * PAGE_SIZE; 1003 1003 } 1004 1004 1005 1005 /** ··· 1024 1024 * readahead_batch_length - The number of bytes in the current batch. 1025 1025 * @rac: The readahead request. 1026 1026 */ 1027 1027 - static inline loff_t readahead_batch_length(struct readahead_control *rac) 1027 1027 + static inline size_t readahead_batch_length(struct readahead_control *rac) 1028 1028 { 1029 1029 return rac->_batch_count * PAGE_SIZE; 1030 1030 }

+11 -1

include/net/page_pool.h

reviewed

··· 198 198 199 199 static inline dma_addr_t page_pool_get_dma_addr(struct page *page) 200 200 { 201 201 - return page->dma_addr; 201 201 + dma_addr_t ret = page->dma_addr[0]; 202 202 + if (sizeof(dma_addr_t) > sizeof(unsigned long)) 203 203 + ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; 204 204 + return ret; 205 205 + } 206 206 + 207 207 + static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) 208 208 + { 209 209 + page->dma_addr[0] = addr; 210 210 + if (sizeof(dma_addr_t) > sizeof(unsigned long)) 211 211 + page->dma_addr[1] = upper_32_bits(addr); 202 212 } 203 213 204 214 static inline bool is_page_pool_compiled_in(void)

+1 -1

kernel/resource.c

reviewed

··· 1805 1805 REGION_DISJOINT) 1806 1806 continue; 1807 1807 1808 1808 - if (!__request_region_locked(res, &iomem_resource, addr, size, 1808 1808 + if (__request_region_locked(res, &iomem_resource, addr, size, 1809 1809 name, 0)) 1810 1810 break; 1811 1811

+23 -6

lib/test_kasan.c

reviewed

··· 654 654 655 655 static void kasan_global_oob(struct kunit *test) 656 656 { 657 657 - volatile int i = 3; 658 658 - char *p = &global_array[ARRAY_SIZE(global_array) + i]; 657 657 + /* 658 658 + * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS 659 659 + * from failing here and panicing the kernel, access the array via a 660 660 + * volatile pointer, which will prevent the compiler from being able to 661 661 + * determine the array bounds. 662 662 + * 663 663 + * This access uses a volatile pointer to char (char *volatile) rather 664 664 + * than the more conventional pointer to volatile char (volatile char *) 665 665 + * because we want to prevent the compiler from making inferences about 666 666 + * the pointer itself (i.e. its array bounds), not the data that it 667 667 + * refers to. 668 668 + */ 669 669 + char *volatile array = global_array; 670 670 + char *p = &array[ARRAY_SIZE(global_array) + 3]; 659 671 660 672 /* Only generic mode instruments globals. */ 661 673 KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); ··· 715 703 static void kasan_stack_oob(struct kunit *test) 716 704 { 717 705 char stack_array[10]; 718 718 - volatile int i = OOB_TAG_OFF; 719 719 - char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; 706 706 + /* See comment in kasan_global_oob. */ 707 707 + char *volatile array = stack_array; 708 708 + char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF]; 720 709 721 710 KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK); 722 711 ··· 728 715 { 729 716 volatile int i = 10; 730 717 char alloca_array[i]; 731 731 - char *p = alloca_array - 1; 718 718 + /* See comment in kasan_global_oob. */ 719 719 + char *volatile array = alloca_array; 720 720 + char *p = array - 1; 732 721 733 722 /* Only generic mode instruments dynamic allocas. */ 734 723 KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); ··· 743 728 { 744 729 volatile int i = 10; 745 730 char alloca_array[i]; 746 746 - char *p = alloca_array + i; 731 731 + /* See comment in kasan_global_oob. */ 732 732 + char *volatile array = alloca_array; 733 733 + char *p = array + i; 747 734 748 735 /* Only generic mode instruments dynamic allocas. */ 749 736 KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);

mm/hugetlb.c

reviewed

··· 4056 4056 * See Documentation/vm/mmu_notifier.rst 4057 4057 */ 4058 4058 huge_ptep_set_wrprotect(src, addr, src_pte); 4059 4059 + entry = huge_pte_wrprotect(entry); 4059 4060 } 4060 4061 4061 4062 page_dup_rmap(ptepage, true);

+3 -3

mm/ioremap.c

reviewed

··· 16 16 #include "pgalloc-track.h" 17 17 18 18 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 19 19 - static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT; 19 19 + static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1; 20 20 21 21 static int __init set_nohugeiomap(char *str) 22 22 { 23 23 - iomap_max_page_shift = P4D_SHIFT; 23 23 + iomap_max_page_shift = PAGE_SHIFT; 24 24 return 0; 25 25 } 26 26 early_param("nohugeiomap", set_nohugeiomap); 27 27 #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 28 28 - static const bool iomap_max_page_shift = PAGE_SHIFT; 28 28 + static const unsigned int iomap_max_page_shift = PAGE_SHIFT; 29 29 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 30 30 31 31 int ioremap_page_range(unsigned long addr,

+2 -1

mm/ksm.c

reviewed

··· 776 776 struct page *page; 777 777 778 778 stable_node = rmap_item->head; 779 779 - page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK); 779 779 + page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK); 780 780 if (!page) 781 781 goto out; 782 782 783 783 hlist_del(&rmap_item->hlist); 784 784 + unlock_page(page); 784 785 put_page(page); 785 786 786 787 if (!hlist_empty(&stable_node->hlist))

+15 -19

mm/shmem.c

reviewed

··· 2258 2258 static int shmem_mmap(struct file *file, struct vm_area_struct *vma) 2259 2259 { 2260 2260 struct shmem_inode_info *info = SHMEM_I(file_inode(file)); 2261 2261 + int ret; 2261 2262 2262 2262 - if (info->seals & F_SEAL_FUTURE_WRITE) { 2263 2263 - /* 2264 2264 - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when 2265 2265 - * "future write" seal active. 2266 2266 - */ 2267 2267 - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) 2268 2268 - return -EPERM; 2269 2269 - 2270 2270 - /* 2271 2271 - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as 2272 2272 - * MAP_SHARED and read-only, take care to not allow mprotect to 2273 2273 - * revert protections on such mappings. Do this only for shared 2274 2274 - * mappings. For private mappings, don't need to mask 2275 2275 - * VM_MAYWRITE as we still want them to be COW-writable. 2276 2276 - */ 2277 2277 - if (vma->vm_flags & VM_SHARED) 2278 2278 - vma->vm_flags &= ~(VM_MAYWRITE); 2279 2279 - } 2263 2263 + ret = seal_check_future_write(info->seals, vma); 2264 2264 + if (ret) 2265 2265 + return ret; 2280 2266 2281 2267 /* arm64 - allow memory tagging on RAM-based files */ 2282 2268 vma->vm_flags |= VM_MTE_ALLOWED; ··· 2361 2375 pgoff_t offset, max_off; 2362 2376 2363 2377 ret = -ENOMEM; 2364 2364 - if (!shmem_inode_acct_block(inode, 1)) 2378 2378 + if (!shmem_inode_acct_block(inode, 1)) { 2379 2379 + /* 2380 2380 + * We may have got a page, returned -ENOENT triggering a retry, 2381 2381 + * and now we find ourselves with -ENOMEM. Release the page, to 2382 2382 + * avoid a BUG_ON in our caller. 2383 2383 + */ 2384 2384 + if (unlikely(*pagep)) { 2385 2385 + put_page(*pagep); 2386 2386 + *pagep = NULL; 2387 2387 + } 2365 2388 goto out; 2389 2389 + } 2366 2390 2367 2391 if (!*pagep) { 2368 2392 page = shmem_alloc_page(gfp, info, pgoff);

+10

mm/slab_common.c

reviewed

··· 318 318 const char *cache_name; 319 319 int err; 320 320 321 321 + #ifdef CONFIG_SLUB_DEBUG 322 322 + /* 323 323 + * If no slub_debug was enabled globally, the static key is not yet 324 324 + * enabled by setup_slub_debug(). Enable it if the cache is being 325 325 + * created with any of the debugging flags passed explicitly. 326 326 + */ 327 327 + if (flags & SLAB_DEBUG_FLAGS) 328 328 + static_branch_enable(&slub_debug_enabled); 329 329 + #endif 330 330 + 321 331 mutex_lock(&slab_mutex); 322 332 323 333 err = kmem_cache_sanity_check(name, size);

-9

mm/slub.c

reviewed

··· 3828 3828 3829 3829 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) 3830 3830 { 3831 3831 - #ifdef CONFIG_SLUB_DEBUG 3832 3832 - /* 3833 3833 - * If no slub_debug was enabled globally, the static key is not yet 3834 3834 - * enabled by setup_slub_debug(). Enable it if the cache is being 3835 3835 - * created with any of the debugging flags passed explicitly. 3836 3836 - */ 3837 3837 - if (flags & SLAB_DEBUG_FLAGS) 3838 3838 - static_branch_enable(&slub_debug_enabled); 3839 3839 - #endif 3840 3831 s->flags = kmem_cache_flags(s->size, flags, s->name); 3841 3832 #ifdef CONFIG_SLAB_FREELIST_HARDENED 3842 3833 s->random = get_random_long();

+7 -5

net/core/page_pool.c

reviewed

··· 174 174 struct page *page, 175 175 unsigned int dma_sync_size) 176 176 { 177 177 + dma_addr_t dma_addr = page_pool_get_dma_addr(page); 178 178 + 177 179 dma_sync_size = min(dma_sync_size, pool->p.max_len); 178 178 - dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, 180 180 + dma_sync_single_range_for_device(pool->p.dev, dma_addr, 179 181 pool->p.offset, dma_sync_size, 180 182 pool->p.dma_dir); 181 183 } ··· 197 195 if (dma_mapping_error(pool->p.dev, dma)) 198 196 return false; 199 197 200 200 - page->dma_addr = dma; 198 198 + page_pool_set_dma_addr(page, dma); 201 199 202 200 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 203 201 page_pool_dma_sync_for_device(pool, page, pool->p.max_len); ··· 333 331 */ 334 332 goto skip_dma_unmap; 335 333 336 336 - dma = page->dma_addr; 334 334 + dma = page_pool_get_dma_addr(page); 337 335 338 338 - /* When page is unmapped, it cannot be returned our pool */ 336 336 + /* When page is unmapped, it cannot be returned to our pool */ 339 337 dma_unmap_page_attrs(pool->p.dev, dma, 340 338 PAGE_SIZE << pool->p.order, pool->p.dma_dir, 341 339 DMA_ATTR_SKIP_CPU_SYNC); 342 342 - page->dma_addr = 0; 340 340 + page_pool_set_dma_addr(page, 0); 343 341 skip_dma_unmap: 344 342 /* This may be the last page returned, releasing the pool, so 345 343 * it is not safe to reference pool afterwards.