Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm, swap: mark bad slots in swap table directly

In preparing the deprecating swap_map, mark bad slots in the swap table
too when setting SWAP_MAP_BAD in swap_map. Also, refine the swap table
sanity check on freeing to adapt to the bad slots change. For swapoff,
the bad slots count must match the cluster usage count, as nothing should
touch them, and they contribute to the cluster usage count on swapon. For
ordinary swap table freeing, the swap table of clusters with bad slots
should never be freed since the cluster usage count never reaches zero.

Link: https://lkml.kernel.org/r/20260218-swap-table-p3-v3-7-f4e34be021a7@tencent.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kairui Song and committed by
Andrew Morton
1307442b 62629ae4

+41 -15
+41 -15
mm/swapfile.c
··· 454 454 swap_table_free_folio_rcu_cb); 455 455 } 456 456 457 + /* 458 + * Sanity check to ensure nothing leaked, and the specified range is empty. 459 + * One special case is that bad slots can't be freed, so check the number of 460 + * bad slots for swapoff, and non-swapoff path must never free bad slots. 461 + */ 462 + static void swap_cluster_assert_empty(struct swap_cluster_info *ci, bool swapoff) 463 + { 464 + unsigned int ci_off = 0, ci_end = SWAPFILE_CLUSTER; 465 + unsigned long swp_tb; 466 + int bad_slots = 0; 467 + 468 + if (!IS_ENABLED(CONFIG_DEBUG_VM) && !swapoff) 469 + return; 470 + 471 + do { 472 + swp_tb = __swap_table_get(ci, ci_off); 473 + if (swp_tb_is_bad(swp_tb)) 474 + bad_slots++; 475 + else 476 + WARN_ON_ONCE(!swp_tb_is_null(swp_tb)); 477 + } while (++ci_off < ci_end); 478 + 479 + WARN_ON_ONCE(bad_slots != (swapoff ? ci->count : 0)); 480 + } 481 + 457 482 static void swap_cluster_free_table(struct swap_cluster_info *ci) 458 483 { 459 - unsigned int ci_off; 460 484 struct swap_table *table; 461 485 462 486 /* Only empty cluster's table is allow to be freed */ 463 487 lockdep_assert_held(&ci->lock); 464 - VM_WARN_ON_ONCE(!cluster_is_empty(ci)); 465 - for (ci_off = 0; ci_off < SWAPFILE_CLUSTER; ci_off++) 466 - VM_WARN_ON_ONCE(!swp_tb_is_null(__swap_table_get(ci, ci_off))); 467 488 table = (void *)rcu_dereference_protected(ci->table, true); 468 489 rcu_assign_pointer(ci->table, NULL); 469 490 ··· 588 567 589 568 static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) 590 569 { 570 + swap_cluster_assert_empty(ci, false); 591 571 swap_cluster_free_table(ci); 592 572 move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE); 593 573 ci->order = 0; ··· 769 747 struct swap_cluster_info *cluster_info, 770 748 unsigned int offset, bool mask) 771 749 { 750 + unsigned int ci_off = offset % SWAPFILE_CLUSTER; 772 751 unsigned long idx = offset / SWAPFILE_CLUSTER; 773 - struct swap_table *table; 774 752 struct swap_cluster_info *ci; 753 + struct swap_table *table; 754 + int ret = 0; 775 755 776 756 /* si->max may got shrunk by swap swap_activate() */ 777 757 if (offset >= si->max && !mask) { ··· 791 767 pr_warn("Empty swap-file\n"); 792 768 return -EINVAL; 793 769 } 794 - /* Check for duplicated bad swap slots. */ 795 - if (si->swap_map[offset]) { 796 - pr_warn("Duplicated bad slot offset %d\n", offset); 797 - return -EINVAL; 798 - } 799 770 800 - si->swap_map[offset] = SWAP_MAP_BAD; 801 771 ci = cluster_info + idx; 802 772 if (!ci->table) { 803 773 table = swap_table_alloc(GFP_KERNEL); ··· 799 781 return -ENOMEM; 800 782 rcu_assign_pointer(ci->table, table); 801 783 } 802 - 803 - ci->count++; 784 + spin_lock(&ci->lock); 785 + /* Check for duplicated bad swap slots. */ 786 + if (__swap_table_xchg(ci, ci_off, SWP_TB_BAD) != SWP_TB_NULL) { 787 + pr_warn("Duplicated bad slot offset %d\n", offset); 788 + ret = -EINVAL; 789 + } else { 790 + si->swap_map[offset] = SWAP_MAP_BAD; 791 + ci->count++; 792 + } 793 + spin_unlock(&ci->lock); 804 794 805 795 WARN_ON(ci->count > SWAPFILE_CLUSTER); 806 796 WARN_ON(ci->flags); 807 797 808 - return 0; 798 + return ret; 809 799 } 810 800 811 801 /* ··· 2780 2754 /* Cluster with bad marks count will have a remaining table */ 2781 2755 spin_lock(&ci->lock); 2782 2756 if (rcu_dereference_protected(ci->table, true)) { 2783 - ci->count = 0; 2757 + swap_cluster_assert_empty(ci, true); 2784 2758 swap_cluster_free_table(ci); 2785 2759 } 2786 2760 spin_unlock(&ci->lock);