Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc mm fixes from Andrew Morton:
"15 patches.

VM subsystems affected by this patch series: userfaultfd, kfence,
highmem, pagealloc, memblock, pagecache, secretmem, pagemap, and
hugetlbfs"

* akpm:
hugetlbfs: fix mount mode command line processing
mm: fix the deadlock in finish_fault()
mm: mmap_lock: fix disabling preemption directly
mm/secretmem: wire up ->set_page_dirty
writeback, cgroup: do not reparent dax inodes
writeback, cgroup: remove wb from offline list before releasing refcnt
memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions
mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction
mm: use kmap_local_page in memzero_page
mm: call flush_dcache_page() in memcpy_to_page() and memzero_page()
kfence: skip all GFP_ZONEMASK allocations
kfence: move the size check to the beginning of __kfence_alloc()
kfence: defer kfence_test_init to ensure that kunit debugfs is created
selftest: use mmap instead of posix_memalign to allocate memory
userfaultfd: do not untag user pointers

+92 -50
+17 -7
Documentation/arm64/tagged-address-abi.rst
··· 45 45 46 46 1. User addresses not accessed by the kernel but used for address space 47 47 management (e.g. ``mprotect()``, ``madvise()``). The use of valid 48 - tagged pointers in this context is allowed with the exception of 49 - ``brk()``, ``mmap()`` and the ``new_address`` argument to 50 - ``mremap()`` as these have the potential to alias with existing 51 - user addresses. 48 + tagged pointers in this context is allowed with these exceptions: 52 49 53 - NOTE: This behaviour changed in v5.6 and so some earlier kernels may 54 - incorrectly accept valid tagged pointers for the ``brk()``, 55 - ``mmap()`` and ``mremap()`` system calls. 50 + - ``brk()``, ``mmap()`` and the ``new_address`` argument to 51 + ``mremap()`` as these have the potential to alias with existing 52 + user addresses. 53 + 54 + NOTE: This behaviour changed in v5.6 and so some earlier kernels may 55 + incorrectly accept valid tagged pointers for the ``brk()``, 56 + ``mmap()`` and ``mremap()`` system calls. 57 + 58 + - The ``range.start``, ``start`` and ``dst`` arguments to the 59 + ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from 60 + ``userfaultfd()``, as fault addresses subsequently obtained by reading 61 + the file descriptor will be untagged, which may otherwise confuse 62 + tag-unaware programs. 63 + 64 + NOTE: This behaviour changed in v5.14 and so some earlier kernels may 65 + incorrectly accept valid tagged pointers for this system call. 56 66 57 67 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI 58 68 relaxation is disabled by default and the application thread needs to
+3
fs/fs-writeback.c
··· 521 521 */ 522 522 smp_mb(); 523 523 524 + if (IS_DAX(inode)) 525 + return false; 526 + 524 527 /* while holding I_WB_SWITCH, no one else can update the association */ 525 528 spin_lock(&inode->i_lock); 526 529 if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
+1 -1
fs/hugetlbfs/inode.c
··· 77 77 static const struct fs_parameter_spec hugetlb_fs_parameters[] = { 78 78 fsparam_u32 ("gid", Opt_gid), 79 79 fsparam_string("min_size", Opt_min_size), 80 - fsparam_u32 ("mode", Opt_mode), 80 + fsparam_u32oct("mode", Opt_mode), 81 81 fsparam_string("nr_inodes", Opt_nr_inodes), 82 82 fsparam_string("pagesize", Opt_pagesize), 83 83 fsparam_string("size", Opt_size),
+12 -14
fs/userfaultfd.c
··· 1236 1236 } 1237 1237 1238 1238 static __always_inline int validate_range(struct mm_struct *mm, 1239 - __u64 *start, __u64 len) 1239 + __u64 start, __u64 len) 1240 1240 { 1241 1241 __u64 task_size = mm->task_size; 1242 1242 1243 - *start = untagged_addr(*start); 1244 - 1245 - if (*start & ~PAGE_MASK) 1243 + if (start & ~PAGE_MASK) 1246 1244 return -EINVAL; 1247 1245 if (len & ~PAGE_MASK) 1248 1246 return -EINVAL; 1249 1247 if (!len) 1250 1248 return -EINVAL; 1251 - if (*start < mmap_min_addr) 1249 + if (start < mmap_min_addr) 1252 1250 return -EINVAL; 1253 - if (*start >= task_size) 1251 + if (start >= task_size) 1254 1252 return -EINVAL; 1255 - if (len > task_size - *start) 1253 + if (len > task_size - start) 1256 1254 return -EINVAL; 1257 1255 return 0; 1258 1256 } ··· 1314 1316 vm_flags |= VM_UFFD_MINOR; 1315 1317 } 1316 1318 1317 - ret = validate_range(mm, &uffdio_register.range.start, 1319 + ret = validate_range(mm, uffdio_register.range.start, 1318 1320 uffdio_register.range.len); 1319 1321 if (ret) 1320 1322 goto out; ··· 1520 1522 if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) 1521 1523 goto out; 1522 1524 1523 - ret = validate_range(mm, &uffdio_unregister.start, 1525 + ret = validate_range(mm, uffdio_unregister.start, 1524 1526 uffdio_unregister.len); 1525 1527 if (ret) 1526 1528 goto out; ··· 1669 1671 if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) 1670 1672 goto out; 1671 1673 1672 - ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); 1674 + ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); 1673 1675 if (ret) 1674 1676 goto out; 1675 1677 ··· 1709 1711 sizeof(uffdio_copy)-sizeof(__s64))) 1710 1712 goto out; 1711 1713 1712 - ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); 1714 + ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); 1713 1715 if (ret) 1714 1716 goto out; 1715 1717 /* ··· 1766 1768 sizeof(uffdio_zeropage)-sizeof(__s64))) 1767 1769 goto out; 1768 1770 1769 - ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, 1771 + ret = validate_range(ctx->mm, uffdio_zeropage.range.start, 1770 1772 uffdio_zeropage.range.len); 1771 1773 if (ret) 1772 1774 goto out; ··· 1816 1818 sizeof(struct uffdio_writeprotect))) 1817 1819 return -EFAULT; 1818 1820 1819 - ret = validate_range(ctx->mm, &uffdio_wp.range.start, 1821 + ret = validate_range(ctx->mm, uffdio_wp.range.start, 1820 1822 uffdio_wp.range.len); 1821 1823 if (ret) 1822 1824 return ret; ··· 1864 1866 sizeof(uffdio_continue) - (sizeof(__s64)))) 1865 1867 goto out; 1866 1868 1867 - ret = validate_range(ctx->mm, &uffdio_continue.range.start, 1869 + ret = validate_range(ctx->mm, uffdio_continue.range.start, 1868 1870 uffdio_continue.range.len); 1869 1871 if (ret) 1870 1872 goto out;
+4 -2
include/linux/highmem.h
··· 318 318 319 319 VM_BUG_ON(offset + len > PAGE_SIZE); 320 320 memcpy(to + offset, from, len); 321 + flush_dcache_page(page); 321 322 kunmap_local(to); 322 323 } 323 324 324 325 static inline void memzero_page(struct page *page, size_t offset, size_t len) 325 326 { 326 - char *addr = kmap_atomic(page); 327 + char *addr = kmap_local_page(page); 327 328 memset(addr + offset, 0, len); 328 - kunmap_atomic(addr); 329 + flush_dcache_page(page); 330 + kunmap_local(addr); 329 331 } 330 332 331 333 #endif /* _LINUX_HIGHMEM_H */
+2 -2
include/linux/memblock.h
··· 209 209 */ 210 210 #define for_each_mem_range(i, p_start, p_end) \ 211 211 __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ 212 - MEMBLOCK_NONE, p_start, p_end, NULL) 212 + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) 213 213 214 214 /** 215 215 * for_each_mem_range_rev - reverse iterate through memblock areas from ··· 220 220 */ 221 221 #define for_each_mem_range_rev(i, p_start, p_end) \ 222 222 __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ 223 - MEMBLOCK_NONE, p_start, p_end, NULL) 223 + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) 224 224 225 225 /** 226 226 * for_each_reserved_mem_range - iterate over all reserved memblock areas
+1 -1
mm/backing-dev.c
··· 398 398 blkcg_unpin_online(blkcg); 399 399 400 400 fprop_local_destroy_percpu(&wb->memcg_completions); 401 - percpu_ref_exit(&wb->refcnt); 402 401 403 402 spin_lock_irq(&cgwb_lock); 404 403 list_del(&wb->offline_node); 405 404 spin_unlock_irq(&cgwb_lock); 406 405 406 + percpu_ref_exit(&wb->refcnt); 407 407 wb_exit(wb); 408 408 WARN_ON_ONCE(!list_empty(&wb->b_attached)); 409 409 kfree_rcu(wb, rcu);
+16 -3
mm/kfence/core.c
··· 734 734 void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) 735 735 { 736 736 /* 737 + * Perform size check before switching kfence_allocation_gate, so that 738 + * we don't disable KFENCE without making an allocation. 739 + */ 740 + if (size > PAGE_SIZE) 741 + return NULL; 742 + 743 + /* 744 + * Skip allocations from non-default zones, including DMA. We cannot 745 + * guarantee that pages in the KFENCE pool will have the requested 746 + * properties (e.g. reside in DMAable memory). 747 + */ 748 + if ((flags & GFP_ZONEMASK) || 749 + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) 750 + return NULL; 751 + 752 + /* 737 753 * allocation_gate only needs to become non-zero, so it doesn't make 738 754 * sense to continue writing to it and pay the associated contention 739 755 * cost, in case we have a large number of concurrent allocations. ··· 771 755 #endif 772 756 773 757 if (!READ_ONCE(kfence_enabled)) 774 - return NULL; 775 - 776 - if (size > PAGE_SIZE) 777 758 return NULL; 778 759 779 760 return kfence_guarded_alloc(s, size, flags);
+1 -1
mm/kfence/kfence_test.c
··· 852 852 tracepoint_synchronize_unregister(); 853 853 } 854 854 855 - late_initcall(kfence_test_init); 855 + late_initcall_sync(kfence_test_init); 856 856 module_exit(kfence_test_exit); 857 857 858 858 MODULE_LICENSE("GPL v2");
+2 -1
mm/memblock.c
··· 947 947 return true; 948 948 949 949 /* skip hotpluggable memory regions if needed */ 950 - if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) 950 + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && 951 + !(flags & MEMBLOCK_HOTPLUG)) 951 952 return true; 952 953 953 954 /* if we want mirror memory skip non-mirror memory regions */
+10 -1
mm/memory.c
··· 4026 4026 return ret; 4027 4027 } 4028 4028 4029 - if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) 4029 + if (vmf->prealloc_pte) { 4030 + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); 4031 + if (likely(pmd_none(*vmf->pmd))) { 4032 + mm_inc_nr_ptes(vma->vm_mm); 4033 + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); 4034 + vmf->prealloc_pte = NULL; 4035 + } 4036 + spin_unlock(vmf->ptl); 4037 + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { 4030 4038 return VM_FAULT_OOM; 4039 + } 4031 4040 } 4032 4041 4033 4042 /* See comment in handle_pte_fault() */
+2 -2
mm/mmap_lock.c
··· 156 156 #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ 157 157 do { \ 158 158 const char *memcg_path; \ 159 - preempt_disable(); \ 159 + local_lock(&memcg_paths.lock); \ 160 160 memcg_path = get_mm_memcg_path(mm); \ 161 161 trace_mmap_lock_##type(mm, \ 162 162 memcg_path != NULL ? memcg_path : "", \ 163 163 ##__VA_ARGS__); \ 164 164 if (likely(memcg_path != NULL)) \ 165 165 put_memcg_path_buf(); \ 166 - preempt_enable(); \ 166 + local_unlock(&memcg_paths.lock); \ 167 167 } while (0) 168 168 169 169 #else /* !CONFIG_MEMCG */
+16 -13
mm/page_alloc.c
··· 840 840 } 841 841 #endif 842 842 843 - if (_init_on_alloc_enabled_early) { 844 - if (page_poisoning_requested) 845 - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " 846 - "will take precedence over init_on_alloc\n"); 847 - else 848 - static_branch_enable(&init_on_alloc); 843 + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && 844 + page_poisoning_requested) { 845 + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " 846 + "will take precedence over init_on_alloc and init_on_free\n"); 847 + _init_on_alloc_enabled_early = false; 848 + _init_on_free_enabled_early = false; 849 849 } 850 - if (_init_on_free_enabled_early) { 851 - if (page_poisoning_requested) 852 - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " 853 - "will take precedence over init_on_free\n"); 854 - else 855 - static_branch_enable(&init_on_free); 856 - } 850 + 851 + if (_init_on_alloc_enabled_early) 852 + static_branch_enable(&init_on_alloc); 853 + else 854 + static_branch_disable(&init_on_alloc); 855 + 856 + if (_init_on_free_enabled_early) 857 + static_branch_enable(&init_on_free); 858 + else 859 + static_branch_disable(&init_on_free); 857 860 858 861 #ifdef CONFIG_DEBUG_PAGEALLOC 859 862 if (!debug_pagealloc_enabled())
+1
mm/secretmem.c
··· 152 152 } 153 153 154 154 const struct address_space_operations secretmem_aops = { 155 + .set_page_dirty = __set_page_dirty_no_writeback, 155 156 .freepage = secretmem_freepage, 156 157 .migratepage = secretmem_migratepage, 157 158 .isolate_page = secretmem_isolate_page,
+4 -2
tools/testing/selftests/vm/userfaultfd.c
··· 210 210 211 211 static void anon_allocate_area(void **alloc_area) 212 212 { 213 - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) 214 - err("posix_memalign() failed"); 213 + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, 214 + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 215 + if (*alloc_area == MAP_FAILED) 216 + err("mmap of anonymous memory failed"); 215 217 } 216 218 217 219 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)