Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm, swap: clean up swapon process and locking

Slightly clean up the swapon process. Add comments about what swap_lock
protects, introduce and rename helpers that wrap swap_map and cluster_info
setup, and do it outside of the swap_lock lock.

This lock protection is not needed for swap_map and cluster_info setup
because all swap users must either hold the percpu ref or hold a stable
allocated swap entry (e.g., locking a folio in the swap cache) before
accessing. So before the swap device is exposed by enable_swap_info,
nothing would use the swap device's map or cluster.

So we are safe to allocate and set up swap data freely first, then expose
the swap device and set the SWP_WRITEOK flag.

Link: https://lkml.kernel.org/r/20260218-swap-table-p3-v3-2-f4e34be021a7@tencent.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kairui Song and committed by
Andrew Morton
451c6326 eca4d01b

+50 -41
+50 -41
mm/swapfile.c
··· 65 65 struct swap_cluster_info *ci, struct list_head *list, 66 66 enum swap_cluster_flags new_flags); 67 67 68 + /* 69 + * Protects the swap_info array, and the SWP_USED flag. swap_info contains 70 + * lazily allocated & freed swap device info struts, and SWP_USED indicates 71 + * which device is used, ~SWP_USED devices and can be reused. 72 + * 73 + * Also protects swap_active_head total_swap_pages, and the SWP_WRITEOK flag. 74 + */ 68 75 static DEFINE_SPINLOCK(swap_lock); 69 76 static unsigned int nr_swapfiles; 70 77 atomic_long_t nr_swap_pages; ··· 2664 2657 } 2665 2658 2666 2659 static void setup_swap_info(struct swap_info_struct *si, int prio, 2667 - unsigned char *swap_map, 2668 - struct swap_cluster_info *cluster_info, 2669 2660 unsigned long *zeromap) 2670 2661 { 2671 2662 si->prio = prio; ··· 2673 2668 */ 2674 2669 si->list.prio = -si->prio; 2675 2670 si->avail_list.prio = -si->prio; 2676 - si->swap_map = swap_map; 2677 - si->cluster_info = cluster_info; 2678 2671 si->zeromap = zeromap; 2679 2672 } 2680 2673 ··· 2690 2687 } 2691 2688 2692 2689 static void enable_swap_info(struct swap_info_struct *si, int prio, 2693 - unsigned char *swap_map, 2694 - struct swap_cluster_info *cluster_info, 2695 - unsigned long *zeromap) 2690 + unsigned long *zeromap) 2696 2691 { 2697 2692 spin_lock(&swap_lock); 2698 2693 spin_lock(&si->lock); 2699 - setup_swap_info(si, prio, swap_map, cluster_info, zeromap); 2694 + setup_swap_info(si, prio, zeromap); 2700 2695 spin_unlock(&si->lock); 2701 2696 spin_unlock(&swap_lock); 2702 2697 /* ··· 2712 2711 { 2713 2712 spin_lock(&swap_lock); 2714 2713 spin_lock(&si->lock); 2715 - setup_swap_info(si, si->prio, si->swap_map, si->cluster_info, si->zeromap); 2714 + setup_swap_info(si, si->prio, si->zeromap); 2716 2715 _enable_swap_info(si); 2717 2716 spin_unlock(&si->lock); 2718 2717 spin_unlock(&swap_lock); ··· 2736 2735 } 2737 2736 } 2738 2737 2739 - static void free_cluster_info(struct swap_cluster_info *cluster_info, 2740 - unsigned long maxpages) 2738 + static void free_swap_cluster_info(struct swap_cluster_info *cluster_info, 2739 + unsigned long maxpages) 2741 2740 { 2742 2741 struct swap_cluster_info *ci; 2743 2742 int i, nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); ··· 2890 2889 p->global_cluster = NULL; 2891 2890 vfree(swap_map); 2892 2891 kvfree(zeromap); 2893 - free_cluster_info(cluster_info, maxpages); 2892 + free_swap_cluster_info(cluster_info, maxpages); 2894 2893 /* Destroy swap account information */ 2895 2894 swap_cgroup_swapoff(p->type); 2896 2895 ··· 3237 3236 3238 3237 static int setup_swap_map(struct swap_info_struct *si, 3239 3238 union swap_header *swap_header, 3240 - unsigned char *swap_map, 3241 3239 unsigned long maxpages) 3242 3240 { 3243 3241 unsigned long i; 3242 + unsigned char *swap_map; 3243 + 3244 + swap_map = vzalloc(maxpages); 3245 + si->swap_map = swap_map; 3246 + if (!swap_map) 3247 + return -ENOMEM; 3244 3248 3245 3249 swap_map[0] = SWAP_MAP_BAD; /* omit header page */ 3246 3250 for (i = 0; i < swap_header->info.nr_badpages; i++) { ··· 3266 3260 return 0; 3267 3261 } 3268 3262 3269 - static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, 3270 - union swap_header *swap_header, 3271 - unsigned long maxpages) 3263 + static int setup_swap_clusters_info(struct swap_info_struct *si, 3264 + union swap_header *swap_header, 3265 + unsigned long maxpages) 3272 3266 { 3273 3267 unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); 3274 3268 struct swap_cluster_info *cluster_info; ··· 3337 3331 } 3338 3332 } 3339 3333 3340 - return cluster_info; 3334 + si->cluster_info = cluster_info; 3335 + return 0; 3341 3336 err: 3342 - free_cluster_info(cluster_info, maxpages); 3343 - return ERR_PTR(err); 3337 + free_swap_cluster_info(cluster_info, maxpages); 3338 + return err; 3344 3339 } 3345 3340 3346 3341 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ··· 3356 3349 int nr_extents; 3357 3350 sector_t span; 3358 3351 unsigned long maxpages; 3359 - unsigned char *swap_map = NULL; 3360 3352 unsigned long *zeromap = NULL; 3361 - struct swap_cluster_info *cluster_info = NULL; 3362 3353 struct folio *folio = NULL; 3363 3354 struct inode *inode = NULL; 3364 3355 bool inced_nr_rotate_swap = false; ··· 3367 3362 if (!capable(CAP_SYS_ADMIN)) 3368 3363 return -EPERM; 3369 3364 3365 + /* 3366 + * Allocate or reuse existing !SWP_USED swap_info. The returned 3367 + * si will stay in a dying status, so nothing will access its content 3368 + * until enable_swap_info resurrects its percpu ref and expose it. 3369 + */ 3370 3370 si = alloc_swap_info(); 3371 3371 if (IS_ERR(si)) 3372 3372 return PTR_ERR(si); ··· 3449 3439 3450 3440 maxpages = si->max; 3451 3441 3452 - /* OK, set up the swap map and apply the bad block list */ 3453 - swap_map = vzalloc(maxpages); 3454 - if (!swap_map) { 3455 - error = -ENOMEM; 3456 - goto bad_swap_unlock_inode; 3457 - } 3458 - 3459 - error = swap_cgroup_swapon(si->type, maxpages); 3442 + /* Setup the swap map and apply bad block */ 3443 + error = setup_swap_map(si, swap_header, maxpages); 3460 3444 if (error) 3461 3445 goto bad_swap_unlock_inode; 3462 3446 3463 - error = setup_swap_map(si, swap_header, swap_map, maxpages); 3447 + /* Set up the swap cluster info */ 3448 + error = setup_swap_clusters_info(si, swap_header, maxpages); 3449 + if (error) 3450 + goto bad_swap_unlock_inode; 3451 + 3452 + error = swap_cgroup_swapon(si->type, maxpages); 3464 3453 if (error) 3465 3454 goto bad_swap_unlock_inode; 3466 3455 ··· 3485 3476 } else { 3486 3477 atomic_inc(&nr_rotate_swap); 3487 3478 inced_nr_rotate_swap = true; 3488 - } 3489 - 3490 - cluster_info = setup_clusters(si, swap_header, maxpages); 3491 - if (IS_ERR(cluster_info)) { 3492 - error = PTR_ERR(cluster_info); 3493 - cluster_info = NULL; 3494 - goto bad_swap_unlock_inode; 3495 3479 } 3496 3480 3497 3481 if ((swap_flags & SWAP_FLAG_DISCARD) && ··· 3539 3537 prio = swap_flags & SWAP_FLAG_PRIO_MASK; 3540 3538 3541 3539 si->swap_file = swap_file; 3542 - enable_swap_info(si, prio, swap_map, cluster_info, zeromap); 3540 + 3541 + /* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */ 3542 + enable_swap_info(si, prio, zeromap); 3543 3543 3544 3544 pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", 3545 3545 K(si->pages), name->name, si->prio, nr_extents, ··· 3567 3563 inode = NULL; 3568 3564 destroy_swap_extents(si, swap_file); 3569 3565 swap_cgroup_swapoff(si->type); 3566 + vfree(si->swap_map); 3567 + si->swap_map = NULL; 3568 + free_swap_cluster_info(si->cluster_info, si->max); 3569 + si->cluster_info = NULL; 3570 + /* 3571 + * Clear the SWP_USED flag after all resources are freed so 3572 + * alloc_swap_info can reuse this si safely. 3573 + */ 3570 3574 spin_lock(&swap_lock); 3571 3575 si->flags = 0; 3572 3576 spin_unlock(&swap_lock); 3573 - vfree(swap_map); 3574 3577 kvfree(zeromap); 3575 - if (cluster_info) 3576 - free_cluster_info(cluster_info, maxpages); 3577 3578 if (inced_nr_rotate_swap) 3578 3579 atomic_dec(&nr_rotate_swap); 3579 3580 if (swap_file)