Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'mm-hotfixes-stable-2023-02-02-19-24-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
"25 hotfixes, mainly for MM. 13 are cc:stable"

* tag 'mm-hotfixes-stable-2023-02-02-19-24-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (26 commits)
mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath()
Kconfig.debug: fix the help description in SCHED_DEBUG
mm/swapfile: add cond_resched() in get_swap_pages()
mm: use stack_depot_early_init for kmemleak
Squashfs: fix handling and sanity checking of xattr_ids count
sh: define RUNTIME_DISCARD_EXIT
highmem: round down the address passed to kunmap_flush_on_unmap()
migrate: hugetlb: check for hugetlb shared PMD in node migration
mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps
mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups
Revert "mm: kmemleak: alloc gray object for reserved region with direct map"
freevxfs: Kconfig: fix spelling
maple_tree: should get pivots boundary by type
.mailmap: update e-mail address for Eugen Hristev
mm, mremap: fix mremap() expanding for vma's with vm_ops->close()
squashfs: harden sanity check in squashfs_read_xattr_id_table
ia64: fix build error due to switch case label appearing next to declaration
mm: multi-gen LRU: fix crash during cgroup migration
Revert "mm: add nodes= arg to memory.reclaim"
zsmalloc: fix a race with deferred_handles storing
...

+430 -152
+1
.mailmap
··· 130 130 Douglas Gilbert <dougg@torque.net> 131 131 Ed L. Cashin <ecashin@coraid.com> 132 132 Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com> 133 + Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com> 133 134 Evgeniy Polyakov <johnpol@2ka.mipt.ru> 134 135 Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com> 135 136 Felipe W Damasio <felipewd@terra.com.br>
+6 -9
Documentation/admin-guide/cgroup-v2.rst
··· 1245 1245 This is a simple interface to trigger memory reclaim in the 1246 1246 target cgroup. 1247 1247 1248 - This file accepts a string which contains the number of bytes to 1249 - reclaim. 1248 + This file accepts a single key, the number of bytes to reclaim. 1249 + No nested keys are currently supported. 1250 1250 1251 1251 Example:: 1252 1252 1253 1253 echo "1G" > memory.reclaim 1254 + 1255 + The interface can be later extended with nested keys to 1256 + configure the reclaim behavior. For example, specify the 1257 + type of memory to reclaim from (anon, file, ..). 1254 1258 1255 1259 Please note that the kernel can over or under reclaim from 1256 1260 the target cgroup. If less bytes are reclaimed than the ··· 1266 1262 the memory reclaim normally is not exercised in this case. 1267 1263 This means that the networking layer will not adapt based on 1268 1264 reclaim induced by memory.reclaim. 1269 - 1270 - This file also allows the user to specify the nodes to reclaim from, 1271 - via the 'nodes=' key, for example:: 1272 - 1273 - echo "1G nodes=0,1" > memory.reclaim 1274 - 1275 - The above instructs the kernel to reclaim memory from nodes 0,1. 1276 1265 1277 1266 memory.peak 1278 1267 A read-only single value file which exists on non-root
+5 -2
arch/ia64/kernel/sys_ia64.c
··· 170 170 asmlinkage long 171 171 ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp) 172 172 { 173 + struct timespec64 rtn_tp; 174 + s64 tick_ns; 175 + 173 176 /* 174 177 * ia64's clock_gettime() syscall is implemented as a vdso call 175 178 * fsys_clock_gettime(). Currently it handles only ··· 188 185 switch (which_clock) { 189 186 case CLOCK_REALTIME: 190 187 case CLOCK_MONOTONIC: 191 - s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq); 192 - struct timespec64 rtn_tp = ns_to_timespec64(tick_ns); 188 + tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq); 189 + rtn_tp = ns_to_timespec64(tick_ns); 193 190 return put_timespec64(&rtn_tp, tp); 194 191 } 195 192
+1
arch/sh/kernel/vmlinux.lds.S
··· 4 4 * Written by Niibe Yutaka and Paul Mundt 5 5 */ 6 6 OUTPUT_ARCH(sh) 7 + #define RUNTIME_DISCARD_EXIT 7 8 #include <asm/thread_info.h> 8 9 #include <asm/cache.h> 9 10 #include <asm/vmlinux.lds.h>
+1 -5
drivers/of/fdt.c
··· 26 26 #include <linux/serial_core.h> 27 27 #include <linux/sysfs.h> 28 28 #include <linux/random.h> 29 - #include <linux/kmemleak.h> 30 29 31 30 #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ 32 31 #include <asm/page.h> ··· 524 525 size = dt_mem_next_cell(dt_root_size_cells, &prop); 525 526 526 527 if (size && 527 - early_init_dt_reserve_memory(base, size, nomap) == 0) { 528 + early_init_dt_reserve_memory(base, size, nomap) == 0) 528 529 pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", 529 530 uname, &base, (unsigned long)(size / SZ_1M)); 530 - if (!nomap) 531 - kmemleak_alloc_phys(base, size, 0); 532 - } 533 531 else 534 532 pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", 535 533 uname, &base, (unsigned long)(size / SZ_1M));
+1 -1
fs/freevxfs/Kconfig
··· 8 8 of SCO UnixWare (and possibly others) and optionally available 9 9 for Sunsoft Solaris, HP-UX and many other operating systems. However 10 10 these particular OS implementations of vxfs may differ in on-disk 11 - data endianess and/or superblock offset. The vxfs module has been 11 + data endianness and/or superblock offset. The vxfs module has been 12 12 tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.) 13 13 Currently only readonly access is supported and VxFX versions 14 14 2, 3 and 4. Tests were performed with HP-UX VxFS version 3.
+1 -3
fs/proc/task_mmu.c
··· 745 745 page = pfn_swap_entry_to_page(swpent); 746 746 } 747 747 if (page) { 748 - int mapcount = page_mapcount(page); 749 - 750 - if (mapcount >= 2) 748 + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) 751 749 mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); 752 750 else 753 751 mss->private_hugetlb += huge_page_size(hstate_vma(vma));
+1 -1
fs/squashfs/squashfs_fs.h
··· 183 183 #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ 184 184 sizeof(u64)) 185 185 /* xattr id lookup table defines */ 186 - #define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) 186 + #define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id)) 187 187 188 188 #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ 189 189 SQUASHFS_METADATA_SIZE)
+1 -1
fs/squashfs/squashfs_fs_sb.h
··· 63 63 long long bytes_used; 64 64 unsigned int inodes; 65 65 unsigned int fragments; 66 - int xattr_ids; 66 + unsigned int xattr_ids; 67 67 unsigned int ids; 68 68 bool panic_on_errors; 69 69 const struct squashfs_decompressor_thread_ops *thread_ops;
+2 -2
fs/squashfs/xattr.h
··· 10 10 11 11 #ifdef CONFIG_SQUASHFS_XATTR 12 12 extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, 13 - u64 *, int *); 13 + u64 *, unsigned int *); 14 14 extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, 15 15 unsigned int *, unsigned long long *); 16 16 #else 17 17 static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, 18 - u64 start, u64 *xattr_table_start, int *xattr_ids) 18 + u64 start, u64 *xattr_table_start, unsigned int *xattr_ids) 19 19 { 20 20 struct squashfs_xattr_id_table *id_table; 21 21
+2 -2
fs/squashfs/xattr_id.c
··· 56 56 * Read uncompressed xattr id lookup table indexes from disk into memory 57 57 */ 58 58 __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, 59 - u64 *xattr_table_start, int *xattr_ids) 59 + u64 *xattr_table_start, unsigned int *xattr_ids) 60 60 { 61 61 struct squashfs_sb_info *msblk = sb->s_fs_info; 62 62 unsigned int len, indexes; ··· 76 76 /* Sanity check values */ 77 77 78 78 /* there is always at least one xattr id */ 79 - if (*xattr_ids == 0) 79 + if (*xattr_ids <= 0) 80 80 return ERR_PTR(-EINVAL); 81 81 82 82 len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+2 -2
include/linux/highmem-internal.h
··· 200 200 static inline void __kunmap_local(const void *addr) 201 201 { 202 202 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP 203 - kunmap_flush_on_unmap(addr); 203 + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); 204 204 #endif 205 205 } 206 206 ··· 227 227 static inline void __kunmap_atomic(const void *addr) 228 228 { 229 229 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP 230 - kunmap_flush_on_unmap(addr); 230 + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); 231 231 #endif 232 232 pagefault_enable(); 233 233 if (IS_ENABLED(CONFIG_PREEMPT_RT))
+13
include/linux/hugetlb.h
··· 7 7 #include <linux/fs.h> 8 8 #include <linux/hugetlb_inline.h> 9 9 #include <linux/cgroup.h> 10 + #include <linux/page_ref.h> 10 11 #include <linux/list.h> 11 12 #include <linux/kref.h> 12 13 #include <linux/pgtable.h> ··· 1185 1184 #else 1186 1185 static inline __init void hugetlb_cma_reserve(int order) 1187 1186 { 1187 + } 1188 + #endif 1189 + 1190 + #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1191 + static inline bool hugetlb_pmd_shared(pte_t *pte) 1192 + { 1193 + return page_count(virt_to_page(pte)) > 1; 1194 + } 1195 + #else 1196 + static inline bool hugetlb_pmd_shared(pte_t *pte) 1197 + { 1198 + return false; 1188 1199 } 1189 1200 #endif 1190 1201
+4 -1
include/linux/memcontrol.h
··· 1666 1666 static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, 1667 1667 struct bdi_writeback *wb) 1668 1668 { 1669 + struct mem_cgroup *memcg; 1670 + 1669 1671 if (mem_cgroup_disabled()) 1670 1672 return; 1671 1673 1672 - if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) 1674 + memcg = folio_memcg(folio); 1675 + if (unlikely(memcg && &memcg->css != wb->memcg_css)) 1673 1676 mem_cgroup_track_foreign_dirty_slowpath(folio, wb); 1674 1677 } 1675 1678
+1 -2
include/linux/swap.h
··· 418 418 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, 419 419 unsigned long nr_pages, 420 420 gfp_t gfp_mask, 421 - unsigned int reclaim_options, 422 - nodemask_t *nodemask); 421 + unsigned int reclaim_options); 423 422 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, 424 423 gfp_t gfp_mask, bool noswap, 425 424 pg_data_t *pgdat,
+2 -1
lib/Kconfig.debug
··· 754 754 select KALLSYMS 755 755 select CRC32 756 756 select STACKDEPOT 757 + select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF 757 758 help 758 759 Say Y here if you want to enable the memory leak 759 760 detector. The memory allocation/freeing is traced in a way ··· 1208 1207 depends on DEBUG_KERNEL && PROC_FS 1209 1208 default y 1210 1209 help 1211 - If you say Y here, the /proc/sched_debug file will be provided 1210 + If you say Y here, the /sys/kernel/debug/sched file will be provided 1212 1211 that can help debug the scheduler. The runtime overhead of this 1213 1212 option is minimal. 1214 1213
+11 -11
lib/maple_tree.c
··· 670 670 unsigned char piv) 671 671 { 672 672 struct maple_node *node = mte_to_node(mn); 673 + enum maple_type type = mte_node_type(mn); 673 674 674 - if (piv >= mt_pivots[piv]) { 675 + if (piv >= mt_pivots[type]) { 675 676 WARN_ON(1); 676 677 return 0; 677 678 } 678 - switch (mte_node_type(mn)) { 679 + switch (type) { 679 680 case maple_arange_64: 680 681 return node->ma64.pivot[piv]; 681 682 case maple_range_64: ··· 4888 4887 unsigned long *pivots, *gaps; 4889 4888 void __rcu **slots; 4890 4889 unsigned long gap = 0; 4891 - unsigned long max, min, index; 4890 + unsigned long max, min; 4892 4891 unsigned char offset; 4893 4892 4894 4893 if (unlikely(mas_is_err(mas))) ··· 4910 4909 min = mas_safe_min(mas, pivots, --offset); 4911 4910 4912 4911 max = mas_safe_pivot(mas, pivots, offset, type); 4913 - index = mas->index; 4914 - while (index <= max) { 4912 + while (mas->index <= max) { 4915 4913 gap = 0; 4916 4914 if (gaps) 4917 4915 gap = gaps[offset]; ··· 4941 4941 min = mas_safe_min(mas, pivots, offset); 4942 4942 } 4943 4943 4944 - if (unlikely(index > max)) { 4945 - mas_set_err(mas, -EBUSY); 4946 - return false; 4947 - } 4944 + if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4945 + goto no_space; 4948 4946 4949 4947 if (unlikely(ma_is_leaf(type))) { 4950 4948 mas->offset = offset; ··· 4959 4961 return false; 4960 4962 4961 4963 ascend: 4962 - if (mte_is_root(mas->node)) 4963 - mas_set_err(mas, -EBUSY); 4964 + if (!mte_is_root(mas->node)) 4965 + return false; 4964 4966 4967 + no_space: 4968 + mas_set_err(mas, -EBUSY); 4965 4969 return false; 4966 4970 } 4967 4971
+89
lib/test_maple_tree.c
··· 2517 2517 mt_set_non_kernel(0); 2518 2518 } 2519 2519 2520 + static noinline void check_empty_area_window(struct maple_tree *mt) 2521 + { 2522 + unsigned long i, nr_entries = 20; 2523 + MA_STATE(mas, mt, 0, 0); 2524 + 2525 + for (i = 1; i <= nr_entries; i++) 2526 + mtree_store_range(mt, i*10, i*10 + 9, 2527 + xa_mk_value(i), GFP_KERNEL); 2528 + 2529 + /* Create another hole besides the one at 0 */ 2530 + mtree_store_range(mt, 160, 169, NULL, GFP_KERNEL); 2531 + 2532 + /* Check lower bounds that don't fit */ 2533 + rcu_read_lock(); 2534 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 10) != -EBUSY); 2535 + 2536 + mas_reset(&mas); 2537 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 6, 90, 5) != -EBUSY); 2538 + 2539 + /* Check lower bound that does fit */ 2540 + mas_reset(&mas); 2541 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 5) != 0); 2542 + MT_BUG_ON(mt, mas.index != 5); 2543 + MT_BUG_ON(mt, mas.last != 9); 2544 + rcu_read_unlock(); 2545 + 2546 + /* Check one gap that doesn't fit and one that does */ 2547 + rcu_read_lock(); 2548 + mas_reset(&mas); 2549 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 217, 9) != 0); 2550 + MT_BUG_ON(mt, mas.index != 161); 2551 + MT_BUG_ON(mt, mas.last != 169); 2552 + 2553 + /* Check one gap that does fit above the min */ 2554 + mas_reset(&mas); 2555 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 3) != 0); 2556 + MT_BUG_ON(mt, mas.index != 216); 2557 + MT_BUG_ON(mt, mas.last != 218); 2558 + 2559 + /* Check size that doesn't fit any gap */ 2560 + mas_reset(&mas); 2561 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 16) != -EBUSY); 2562 + 2563 + /* 2564 + * Check size that doesn't fit the lower end of the window but 2565 + * does fit the gap 2566 + */ 2567 + mas_reset(&mas); 2568 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 167, 200, 4) != -EBUSY); 2569 + 2570 + /* 2571 + * Check size that doesn't fit the upper end of the window but 2572 + * does fit the gap 2573 + */ 2574 + mas_reset(&mas); 2575 + MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 162, 4) != -EBUSY); 2576 + 2577 + /* Check mas_empty_area forward */ 2578 + mas_reset(&mas); 2579 + MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 9) != 0); 2580 + MT_BUG_ON(mt, mas.index != 0); 2581 + MT_BUG_ON(mt, mas.last != 8); 2582 + 2583 + mas_reset(&mas); 2584 + MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 4) != 0); 2585 + MT_BUG_ON(mt, mas.index != 0); 2586 + MT_BUG_ON(mt, mas.last != 3); 2587 + 2588 + mas_reset(&mas); 2589 + MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 11) != -EBUSY); 2590 + 2591 + mas_reset(&mas); 2592 + MT_BUG_ON(mt, mas_empty_area(&mas, 5, 100, 6) != -EBUSY); 2593 + 2594 + mas_reset(&mas); 2595 + MT_BUG_ON(mt, mas_empty_area(&mas, 0, 8, 10) != -EBUSY); 2596 + 2597 + mas_reset(&mas); 2598 + mas_empty_area(&mas, 100, 165, 3); 2599 + 2600 + mas_reset(&mas); 2601 + MT_BUG_ON(mt, mas_empty_area(&mas, 100, 163, 6) != -EBUSY); 2602 + rcu_read_unlock(); 2603 + } 2604 + 2520 2605 static DEFINE_MTREE(tree); 2521 2606 static int maple_tree_seed(void) 2522 2607 { ··· 2848 2763 2849 2764 mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); 2850 2765 check_bnode_min_spanning(&tree); 2766 + mtree_destroy(&tree); 2767 + 2768 + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); 2769 + check_empty_area_window(&tree); 2851 2770 mtree_destroy(&tree); 2852 2771 2853 2772 #if defined(BENCH)
+3
mm/hugetlb.c
··· 5051 5051 entry = huge_pte_clear_uffd_wp(entry); 5052 5052 set_huge_pte_at(dst, addr, dst_pte, entry); 5053 5053 } else if (unlikely(is_pte_marker(entry))) { 5054 + /* No swap on hugetlb */ 5055 + WARN_ON_ONCE( 5056 + is_swapin_error_entry(pte_to_swp_entry(entry))); 5054 5057 /* 5055 5058 * We copy the pte marker only if the dst vma has 5056 5059 * uffd-wp enabled.
+21 -1
mm/khugepaged.c
··· 847 847 return SCAN_SUCCEED; 848 848 } 849 849 850 + /* 851 + * See pmd_trans_unstable() for how the result may change out from 852 + * underneath us, even if we hold mmap_lock in read. 853 + */ 850 854 static int find_pmd_or_thp_or_none(struct mm_struct *mm, 851 855 unsigned long address, 852 856 pmd_t **pmd) ··· 869 865 #endif 870 866 if (pmd_none(pmde)) 871 867 return SCAN_PMD_NONE; 868 + if (!pmd_present(pmde)) 869 + return SCAN_PMD_NULL; 872 870 if (pmd_trans_huge(pmde)) 873 871 return SCAN_PMD_MAPPED; 872 + if (pmd_devmap(pmde)) 873 + return SCAN_PMD_NULL; 874 874 if (pmd_bad(pmde)) 875 875 return SCAN_PMD_NULL; 876 876 return SCAN_SUCCEED; ··· 1650 1642 * has higher cost too. It would also probably require locking 1651 1643 * the anon_vma. 1652 1644 */ 1653 - if (vma->anon_vma) { 1645 + if (READ_ONCE(vma->anon_vma)) { 1654 1646 result = SCAN_PAGE_ANON; 1655 1647 goto next; 1656 1648 } ··· 1678 1670 result = SCAN_PTE_MAPPED_HUGEPAGE; 1679 1671 if ((cc->is_khugepaged || is_target) && 1680 1672 mmap_write_trylock(mm)) { 1673 + /* 1674 + * Re-check whether we have an ->anon_vma, because 1675 + * collapse_and_free_pmd() requires that either no 1676 + * ->anon_vma exists or the anon_vma is locked. 1677 + * We already checked ->anon_vma above, but that check 1678 + * is racy because ->anon_vma can be populated under the 1679 + * mmap lock in read mode. 1680 + */ 1681 + if (vma->anon_vma) { 1682 + result = SCAN_PAGE_ANON; 1683 + goto unlock_next; 1684 + } 1681 1685 /* 1682 1686 * When a vma is registered with uffd-wp, we can't 1683 1687 * recycle the pmd pgtable because there can be pte
+3 -2
mm/kmemleak.c
··· 2070 2070 return -EINVAL; 2071 2071 if (strcmp(str, "off") == 0) 2072 2072 kmemleak_disable(); 2073 - else if (strcmp(str, "on") == 0) 2073 + else if (strcmp(str, "on") == 0) { 2074 2074 kmemleak_skip_disable = 1; 2075 + stack_depot_want_early_init(); 2076 + } 2075 2077 else 2076 2078 return -EINVAL; 2077 2079 return 0; ··· 2095 2093 if (kmemleak_error) 2096 2094 return; 2097 2095 2098 - stack_depot_init(); 2099 2096 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); 2100 2097 jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); 2101 2098
+13 -54
mm/memcontrol.c
··· 63 63 #include <linux/resume_user_mode.h> 64 64 #include <linux/psi.h> 65 65 #include <linux/seq_buf.h> 66 - #include <linux/parser.h> 67 66 #include "internal.h" 68 67 #include <net/sock.h> 69 68 #include <net/ip.h> ··· 2392 2393 psi_memstall_enter(&pflags); 2393 2394 nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages, 2394 2395 gfp_mask, 2395 - MEMCG_RECLAIM_MAY_SWAP, 2396 - NULL); 2396 + MEMCG_RECLAIM_MAY_SWAP); 2397 2397 psi_memstall_leave(&pflags); 2398 2398 } while ((memcg = parent_mem_cgroup(memcg)) && 2399 2399 !mem_cgroup_is_root(memcg)); ··· 2683 2685 2684 2686 psi_memstall_enter(&pflags); 2685 2687 nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, 2686 - gfp_mask, reclaim_options, 2687 - NULL); 2688 + gfp_mask, reclaim_options); 2688 2689 psi_memstall_leave(&pflags); 2689 2690 2690 2691 if (mem_cgroup_margin(mem_over_limit) >= nr_pages) ··· 3503 3506 } 3504 3507 3505 3508 if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, 3506 - memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, 3507 - NULL)) { 3509 + memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) { 3508 3510 ret = -EBUSY; 3509 3511 break; 3510 3512 } ··· 3614 3618 return -EINTR; 3615 3619 3616 3620 if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, 3617 - MEMCG_RECLAIM_MAY_SWAP, 3618 - NULL)) 3621 + MEMCG_RECLAIM_MAY_SWAP)) 3619 3622 nr_retries--; 3620 3623 } 3621 3624 ··· 6424 6429 } 6425 6430 6426 6431 reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, 6427 - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, 6428 - NULL); 6432 + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); 6429 6433 6430 6434 if (!reclaimed && !nr_retries--) 6431 6435 break; ··· 6473 6479 6474 6480 if (nr_reclaims) { 6475 6481 if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, 6476 - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, 6477 - NULL)) 6482 + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP)) 6478 6483 nr_reclaims--; 6479 6484 continue; 6480 6485 } ··· 6596 6603 return nbytes; 6597 6604 } 6598 6605 6599 - enum { 6600 - MEMORY_RECLAIM_NODES = 0, 6601 - MEMORY_RECLAIM_NULL, 6602 - }; 6603 - 6604 - static const match_table_t if_tokens = { 6605 - { MEMORY_RECLAIM_NODES, "nodes=%s" }, 6606 - { MEMORY_RECLAIM_NULL, NULL }, 6607 - }; 6608 - 6609 6606 static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, 6610 6607 size_t nbytes, loff_t off) 6611 6608 { 6612 6609 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); 6613 6610 unsigned int nr_retries = MAX_RECLAIM_RETRIES; 6614 6611 unsigned long nr_to_reclaim, nr_reclaimed = 0; 6615 - unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP | 6616 - MEMCG_RECLAIM_PROACTIVE; 6617 - char *old_buf, *start; 6618 - substring_t args[MAX_OPT_ARGS]; 6619 - int token; 6620 - char value[256]; 6621 - nodemask_t nodemask = NODE_MASK_ALL; 6612 + unsigned int reclaim_options; 6613 + int err; 6622 6614 6623 6615 buf = strstrip(buf); 6616 + err = page_counter_memparse(buf, "", &nr_to_reclaim); 6617 + if (err) 6618 + return err; 6624 6619 6625 - old_buf = buf; 6626 - nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE; 6627 - if (buf == old_buf) 6628 - return -EINVAL; 6629 - 6630 - buf = strstrip(buf); 6631 - 6632 - while ((start = strsep(&buf, " ")) != NULL) { 6633 - if (!strlen(start)) 6634 - continue; 6635 - token = match_token(start, if_tokens, args); 6636 - match_strlcpy(value, args, sizeof(value)); 6637 - switch (token) { 6638 - case MEMORY_RECLAIM_NODES: 6639 - if (nodelist_parse(value, nodemask) < 0) 6640 - return -EINVAL; 6641 - break; 6642 - default: 6643 - return -EINVAL; 6644 - } 6645 - } 6646 - 6620 + reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; 6647 6621 while (nr_reclaimed < nr_to_reclaim) { 6648 6622 unsigned long reclaimed; 6649 6623 ··· 6627 6667 6628 6668 reclaimed = try_to_free_mem_cgroup_pages(memcg, 6629 6669 nr_to_reclaim - nr_reclaimed, 6630 - GFP_KERNEL, reclaim_options, 6631 - &nodemask); 6670 + GFP_KERNEL, reclaim_options); 6632 6671 6633 6672 if (!reclaimed && !nr_retries--) 6634 6673 return -EAGAIN;
+7 -7
mm/memory.c
··· 828 828 return -EBUSY; 829 829 return -ENOENT; 830 830 } else if (is_pte_marker_entry(entry)) { 831 - /* 832 - * We're copying the pgtable should only because dst_vma has 833 - * uffd-wp enabled, do sanity check. 834 - */ 835 - WARN_ON_ONCE(!userfaultfd_wp(dst_vma)); 836 - set_pte_at(dst_mm, addr, dst_pte, pte); 831 + if (is_swapin_error_entry(entry) || userfaultfd_wp(dst_vma)) 832 + set_pte_at(dst_mm, addr, dst_pte, pte); 837 833 return 0; 838 834 } 839 835 if (!userfaultfd_wp(dst_vma)) ··· 3625 3629 /* 3626 3630 * Be careful so that we will only recover a special uffd-wp pte into a 3627 3631 * none pte. Otherwise it means the pte could have changed, so retry. 3632 + * 3633 + * This should also cover the case where e.g. the pte changed 3634 + * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_SWAPIN_ERROR. 3635 + * So is_pte_marker() check is not enough to safely drop the pte. 3628 3636 */ 3629 - if (is_pte_marker(*vmf->pte)) 3637 + if (pte_same(vmf->orig_pte, *vmf->pte)) 3630 3638 pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); 3631 3639 pte_unmap_unlock(vmf->pte, vmf->ptl); 3632 3640 return 0;
+2 -1
mm/mempolicy.c
··· 600 600 601 601 /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ 602 602 if (flags & (MPOL_MF_MOVE_ALL) || 603 - (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) { 603 + (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 && 604 + !hugetlb_pmd_shared(pte))) { 604 605 if (isolate_hugetlb(page, qp->pagelist) && 605 606 (flags & MPOL_MF_STRICT)) 606 607 /*
+7 -1
mm/mprotect.c
··· 245 245 newpte = pte_swp_mksoft_dirty(newpte); 246 246 if (pte_swp_uffd_wp(oldpte)) 247 247 newpte = pte_swp_mkuffd_wp(newpte); 248 - } else if (pte_marker_entry_uffd_wp(entry)) { 248 + } else if (is_pte_marker_entry(entry)) { 249 + /* 250 + * Ignore swapin errors unconditionally, 251 + * because any access should sigbus anyway. 252 + */ 253 + if (is_swapin_error_entry(entry)) 254 + continue; 249 255 /* 250 256 * If this is uffd-wp pte marker and we'd like 251 257 * to unprotect it, drop it; the next page
+19 -6
mm/mremap.c
··· 1027 1027 } 1028 1028 1029 1029 /* 1030 - * Function vma_merge() is called on the extension we are adding to 1031 - * the already existing vma, vma_merge() will merge this extension with 1032 - * the already existing vma (expand operation itself) and possibly also 1033 - * with the next vma if it becomes adjacent to the expanded vma and 1034 - * otherwise compatible. 1030 + * Function vma_merge() is called on the extension we 1031 + * are adding to the already existing vma, vma_merge() 1032 + * will merge this extension with the already existing 1033 + * vma (expand operation itself) and possibly also with 1034 + * the next vma if it becomes adjacent to the expanded 1035 + * vma and otherwise compatible. 1036 + * 1037 + * However, vma_merge() can currently fail due to 1038 + * is_mergeable_vma() check for vm_ops->close (see the 1039 + * comment there). Yet this should not prevent vma 1040 + * expanding, so perform a simple expand for such vma. 1041 + * Ideally the check for close op should be only done 1042 + * when a vma would be actually removed due to a merge. 1035 1043 */ 1036 - vma = vma_merge(mm, vma, extension_start, extension_end, 1044 + if (!vma->vm_ops || !vma->vm_ops->close) { 1045 + vma = vma_merge(mm, vma, extension_start, extension_end, 1037 1046 vma->vm_flags, vma->anon_vma, vma->vm_file, 1038 1047 extension_pgoff, vma_policy(vma), 1039 1048 vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 1049 + } else if (vma_adjust(vma, vma->vm_start, addr + new_len, 1050 + vma->vm_pgoff, NULL)) { 1051 + vma = NULL; 1052 + } 1040 1053 if (!vma) { 1041 1054 vm_unacct_memory(pages); 1042 1055 ret = -ENOMEM;
+1
mm/swapfile.c
··· 1100 1100 goto check_out; 1101 1101 pr_debug("scan_swap_map of si %d failed to find offset\n", 1102 1102 si->type); 1103 + cond_resched(); 1103 1104 1104 1105 spin_lock(&swap_avail_lock); 1105 1106 nextsi:
+5 -4
mm/vmscan.c
··· 3323 3323 if (mem_cgroup_disabled()) 3324 3324 return; 3325 3325 3326 + /* migration can happen before addition */ 3327 + if (!mm->lru_gen.memcg) 3328 + return; 3329 + 3326 3330 rcu_read_lock(); 3327 3331 memcg = mem_cgroup_from_task(task); 3328 3332 rcu_read_unlock(); 3329 3333 if (memcg == mm->lru_gen.memcg) 3330 3334 return; 3331 3335 3332 - VM_WARN_ON_ONCE(!mm->lru_gen.memcg); 3333 3336 VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); 3334 3337 3335 3338 lru_gen_del_mm(mm); ··· 6757 6754 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, 6758 6755 unsigned long nr_pages, 6759 6756 gfp_t gfp_mask, 6760 - unsigned int reclaim_options, 6761 - nodemask_t *nodemask) 6757 + unsigned int reclaim_options) 6762 6758 { 6763 6759 unsigned long nr_reclaimed; 6764 6760 unsigned int noreclaim_flag; ··· 6772 6770 .may_unmap = 1, 6773 6771 .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), 6774 6772 .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), 6775 - .nodemask = nodemask, 6776 6773 }; 6777 6774 /* 6778 6775 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
+205 -32
mm/zsmalloc.c
··· 113 113 * have room for two bit at least. 114 114 */ 115 115 #define OBJ_ALLOCATED_TAG 1 116 - #define OBJ_TAG_BITS 1 116 + 117 + #ifdef CONFIG_ZPOOL 118 + /* 119 + * The second least-significant bit in the object's header identifies if the 120 + * value stored at the header is a deferred handle from the last reclaim 121 + * attempt. 122 + * 123 + * As noted above, this is valid because we have room for two bits. 124 + */ 125 + #define OBJ_DEFERRED_HANDLE_TAG 2 126 + #define OBJ_TAG_BITS 2 127 + #define OBJ_TAG_MASK (OBJ_ALLOCATED_TAG | OBJ_DEFERRED_HANDLE_TAG) 128 + #else 129 + #define OBJ_TAG_BITS 1 130 + #define OBJ_TAG_MASK OBJ_ALLOCATED_TAG 131 + #endif /* CONFIG_ZPOOL */ 132 + 117 133 #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS) 118 134 #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) 119 135 ··· 238 222 * Handle of allocated object. 239 223 */ 240 224 unsigned long handle; 225 + #ifdef CONFIG_ZPOOL 226 + /* 227 + * Deferred handle of a reclaimed object. 228 + */ 229 + unsigned long deferred_handle; 230 + #endif 241 231 }; 242 232 }; 243 233 ··· 294 272 /* links the zspage to the lru list in the pool */ 295 273 struct list_head lru; 296 274 bool under_reclaim; 297 - /* list of unfreed handles whose objects have been reclaimed */ 298 - unsigned long *deferred_handles; 299 275 #endif 300 276 301 277 struct zs_pool *pool; ··· 917 897 return *(unsigned long *)handle; 918 898 } 919 899 920 - static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle) 900 + static bool obj_tagged(struct page *page, void *obj, unsigned long *phandle, 901 + int tag) 921 902 { 922 903 unsigned long handle; 923 904 struct zspage *zspage = get_zspage(page); ··· 929 908 } else 930 909 handle = *(unsigned long *)obj; 931 910 932 - if (!(handle & OBJ_ALLOCATED_TAG)) 911 + if (!(handle & tag)) 933 912 return false; 934 913 935 - *phandle = handle & ~OBJ_ALLOCATED_TAG; 914 + /* Clear all tags before returning the handle */ 915 + *phandle = handle & ~OBJ_TAG_MASK; 936 916 return true; 937 917 } 918 + 919 + static inline bool obj_allocated(struct page *page, void *obj, unsigned long *phandle) 920 + { 921 + return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG); 922 + } 923 + 924 + #ifdef CONFIG_ZPOOL 925 + static bool obj_stores_deferred_handle(struct page *page, void *obj, 926 + unsigned long *phandle) 927 + { 928 + return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG); 929 + } 930 + #endif 938 931 939 932 static void reset_page(struct page *page) 940 933 { ··· 981 946 } 982 947 983 948 #ifdef CONFIG_ZPOOL 949 + static unsigned long find_deferred_handle_obj(struct size_class *class, 950 + struct page *page, int *obj_idx); 951 + 984 952 /* 985 953 * Free all the deferred handles whose objects are freed in zs_free. 986 954 */ 987 - static void free_handles(struct zs_pool *pool, struct zspage *zspage) 955 + static void free_handles(struct zs_pool *pool, struct size_class *class, 956 + struct zspage *zspage) 988 957 { 989 - unsigned long handle = (unsigned long)zspage->deferred_handles; 958 + int obj_idx = 0; 959 + struct page *page = get_first_page(zspage); 960 + unsigned long handle; 990 961 991 - while (handle) { 992 - unsigned long nxt_handle = handle_to_obj(handle); 962 + while (1) { 963 + handle = find_deferred_handle_obj(class, page, &obj_idx); 964 + if (!handle) { 965 + page = get_next_page(page); 966 + if (!page) 967 + break; 968 + obj_idx = 0; 969 + continue; 970 + } 993 971 994 972 cache_free_handle(pool, handle); 995 - handle = nxt_handle; 973 + obj_idx++; 996 974 } 997 975 } 998 976 #else 999 - static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {} 977 + static inline void free_handles(struct zs_pool *pool, struct size_class *class, 978 + struct zspage *zspage) {} 1000 979 #endif 1001 980 1002 981 static void __free_zspage(struct zs_pool *pool, struct size_class *class, ··· 1028 979 VM_BUG_ON(fg != ZS_EMPTY); 1029 980 1030 981 /* Free all deferred handles from zs_free */ 1031 - free_handles(pool, zspage); 982 + free_handles(pool, class, zspage); 1032 983 1033 984 next = page = get_first_page(zspage); 1034 985 do { ··· 1116 1067 #ifdef CONFIG_ZPOOL 1117 1068 INIT_LIST_HEAD(&zspage->lru); 1118 1069 zspage->under_reclaim = false; 1119 - zspage->deferred_handles = NULL; 1120 1070 #endif 1121 1071 1122 1072 set_freeobj(zspage, 0); ··· 1616 1568 } 1617 1569 EXPORT_SYMBOL_GPL(zs_malloc); 1618 1570 1619 - static void obj_free(int class_size, unsigned long obj) 1571 + static void obj_free(int class_size, unsigned long obj, unsigned long *handle) 1620 1572 { 1621 1573 struct link_free *link; 1622 1574 struct zspage *zspage; ··· 1630 1582 zspage = get_zspage(f_page); 1631 1583 1632 1584 vaddr = kmap_atomic(f_page); 1633 - 1634 - /* Insert this object in containing zspage's freelist */ 1635 1585 link = (struct link_free *)(vaddr + f_offset); 1636 - if (likely(!ZsHugePage(zspage))) 1637 - link->next = get_freeobj(zspage) << OBJ_TAG_BITS; 1638 - else 1639 - f_page->index = 0; 1586 + 1587 + if (handle) { 1588 + #ifdef CONFIG_ZPOOL 1589 + /* Stores the (deferred) handle in the object's header */ 1590 + *handle |= OBJ_DEFERRED_HANDLE_TAG; 1591 + *handle &= ~OBJ_ALLOCATED_TAG; 1592 + 1593 + if (likely(!ZsHugePage(zspage))) 1594 + link->deferred_handle = *handle; 1595 + else 1596 + f_page->index = *handle; 1597 + #endif 1598 + } else { 1599 + /* Insert this object in containing zspage's freelist */ 1600 + if (likely(!ZsHugePage(zspage))) 1601 + link->next = get_freeobj(zspage) << OBJ_TAG_BITS; 1602 + else 1603 + f_page->index = 0; 1604 + set_freeobj(zspage, f_objidx); 1605 + } 1606 + 1640 1607 kunmap_atomic(vaddr); 1641 - set_freeobj(zspage, f_objidx); 1642 1608 mod_zspage_inuse(zspage, -1); 1643 1609 } 1644 1610 ··· 1677 1615 zspage = get_zspage(f_page); 1678 1616 class = zspage_class(pool, zspage); 1679 1617 1680 - obj_free(class->size, obj); 1681 1618 class_stat_dec(class, OBJ_USED, 1); 1682 1619 1683 1620 #ifdef CONFIG_ZPOOL ··· 1685 1624 * Reclaim needs the handles during writeback. It'll free 1686 1625 * them along with the zspage when it's done with them. 1687 1626 * 1688 - * Record current deferred handle at the memory location 1689 - * whose address is given by handle. 1627 + * Record current deferred handle in the object's header. 1690 1628 */ 1691 - record_obj(handle, (unsigned long)zspage->deferred_handles); 1692 - zspage->deferred_handles = (unsigned long *)handle; 1629 + obj_free(class->size, obj, &handle); 1693 1630 spin_unlock(&pool->lock); 1694 1631 return; 1695 1632 } 1696 1633 #endif 1634 + obj_free(class->size, obj, NULL); 1635 + 1697 1636 fullness = fix_fullness_group(class, zspage); 1698 1637 if (fullness == ZS_EMPTY) 1699 1638 free_zspage(pool, class, zspage); ··· 1774 1713 } 1775 1714 1776 1715 /* 1777 - * Find alloced object in zspage from index object and 1716 + * Find object with a certain tag in zspage from index object and 1778 1717 * return handle. 1779 1718 */ 1780 - static unsigned long find_alloced_obj(struct size_class *class, 1781 - struct page *page, int *obj_idx) 1719 + static unsigned long find_tagged_obj(struct size_class *class, 1720 + struct page *page, int *obj_idx, int tag) 1782 1721 { 1783 1722 unsigned int offset; 1784 1723 int index = *obj_idx; ··· 1789 1728 offset += class->size * index; 1790 1729 1791 1730 while (offset < PAGE_SIZE) { 1792 - if (obj_allocated(page, addr + offset, &handle)) 1731 + if (obj_tagged(page, addr + offset, &handle, tag)) 1793 1732 break; 1794 1733 1795 1734 offset += class->size; ··· 1802 1741 1803 1742 return handle; 1804 1743 } 1744 + 1745 + /* 1746 + * Find alloced object in zspage from index object and 1747 + * return handle. 1748 + */ 1749 + static unsigned long find_alloced_obj(struct size_class *class, 1750 + struct page *page, int *obj_idx) 1751 + { 1752 + return find_tagged_obj(class, page, obj_idx, OBJ_ALLOCATED_TAG); 1753 + } 1754 + 1755 + #ifdef CONFIG_ZPOOL 1756 + /* 1757 + * Find object storing a deferred handle in header in zspage from index object 1758 + * and return handle. 1759 + */ 1760 + static unsigned long find_deferred_handle_obj(struct size_class *class, 1761 + struct page *page, int *obj_idx) 1762 + { 1763 + return find_tagged_obj(class, page, obj_idx, OBJ_DEFERRED_HANDLE_TAG); 1764 + } 1765 + #endif 1805 1766 1806 1767 struct zs_compact_control { 1807 1768 /* Source spage for migration which could be a subpage of zspage */ ··· 1867 1784 zs_object_copy(class, free_obj, used_obj); 1868 1785 obj_idx++; 1869 1786 record_obj(handle, free_obj); 1870 - obj_free(class->size, used_obj); 1787 + obj_free(class->size, used_obj, NULL); 1871 1788 } 1872 1789 1873 1790 /* Remember last position in this iteration */ ··· 2561 2478 EXPORT_SYMBOL_GPL(zs_destroy_pool); 2562 2479 2563 2480 #ifdef CONFIG_ZPOOL 2481 + static void restore_freelist(struct zs_pool *pool, struct size_class *class, 2482 + struct zspage *zspage) 2483 + { 2484 + unsigned int obj_idx = 0; 2485 + unsigned long handle, off = 0; /* off is within-page offset */ 2486 + struct page *page = get_first_page(zspage); 2487 + struct link_free *prev_free = NULL; 2488 + void *prev_page_vaddr = NULL; 2489 + 2490 + /* in case no free object found */ 2491 + set_freeobj(zspage, (unsigned int)(-1UL)); 2492 + 2493 + while (page) { 2494 + void *vaddr = kmap_atomic(page); 2495 + struct page *next_page; 2496 + 2497 + while (off < PAGE_SIZE) { 2498 + void *obj_addr = vaddr + off; 2499 + 2500 + /* skip allocated object */ 2501 + if (obj_allocated(page, obj_addr, &handle)) { 2502 + obj_idx++; 2503 + off += class->size; 2504 + continue; 2505 + } 2506 + 2507 + /* free deferred handle from reclaim attempt */ 2508 + if (obj_stores_deferred_handle(page, obj_addr, &handle)) 2509 + cache_free_handle(pool, handle); 2510 + 2511 + if (prev_free) 2512 + prev_free->next = obj_idx << OBJ_TAG_BITS; 2513 + else /* first free object found */ 2514 + set_freeobj(zspage, obj_idx); 2515 + 2516 + prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free); 2517 + /* if last free object in a previous page, need to unmap */ 2518 + if (prev_page_vaddr) { 2519 + kunmap_atomic(prev_page_vaddr); 2520 + prev_page_vaddr = NULL; 2521 + } 2522 + 2523 + obj_idx++; 2524 + off += class->size; 2525 + } 2526 + 2527 + /* 2528 + * Handle the last (full or partial) object on this page. 2529 + */ 2530 + next_page = get_next_page(page); 2531 + if (next_page) { 2532 + if (!prev_free || prev_page_vaddr) { 2533 + /* 2534 + * There is no free object in this page, so we can safely 2535 + * unmap it. 2536 + */ 2537 + kunmap_atomic(vaddr); 2538 + } else { 2539 + /* update prev_page_vaddr since prev_free is on this page */ 2540 + prev_page_vaddr = vaddr; 2541 + } 2542 + } else { /* this is the last page */ 2543 + if (prev_free) { 2544 + /* 2545 + * Reset OBJ_TAG_BITS bit to last link to tell 2546 + * whether it's allocated object or not. 2547 + */ 2548 + prev_free->next = -1UL << OBJ_TAG_BITS; 2549 + } 2550 + 2551 + /* unmap previous page (if not done yet) */ 2552 + if (prev_page_vaddr) { 2553 + kunmap_atomic(prev_page_vaddr); 2554 + prev_page_vaddr = NULL; 2555 + } 2556 + 2557 + kunmap_atomic(vaddr); 2558 + } 2559 + 2560 + page = next_page; 2561 + off %= PAGE_SIZE; 2562 + } 2563 + } 2564 + 2564 2565 static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries) 2565 2566 { 2566 2567 int i, obj_idx, ret = 0; ··· 2728 2561 return 0; 2729 2562 } 2730 2563 2564 + /* 2565 + * Eviction fails on one of the handles, so we need to restore zspage. 2566 + * We need to rebuild its freelist (and free stored deferred handles), 2567 + * put it back to the correct size class, and add it to the LRU list. 2568 + */ 2569 + restore_freelist(pool, class, zspage); 2731 2570 putback_zspage(class, zspage); 2732 2571 list_add(&zspage->lru, &pool->lru); 2733 2572 unlock_zspage(zspage);
tools/testing/selftests/filesystems/fat/run_fat_tests.sh
-1
tools/testing/selftests/vm/hugetlb-madvise.c
··· 17 17 #include <stdio.h> 18 18 #include <unistd.h> 19 19 #include <sys/mman.h> 20 - #define __USE_GNU 21 20 #include <fcntl.h> 22 21 23 22 #define MIN_FREE_PAGES 20