mm, swap: remove contention workaround for swap cache

Swap cluster setup will try to shuffle the clusters on initialization. It
was helpful to avoid contention for the swap cache space. The cluster
size (2M) was much smaller than each swap cache space (64M), so shuffling
the cluster means the allocator will try to allocate swap slots that are
in different swap cache spaces for each CPU, reducing the chance of two
CPUs using the same swap cache space, and hence reducing the contention.

Now, swap cache is managed by swap clusters, this shuffle is pointless.
Just remove it, and clean up related macros.

This also improves the HDD swap performance as shuffling IO is a bad idea
for HDD, and now the shuffling is gone. Test have shown a ~40%
performance gain for HDD [1]:

Doing sequential swap in of 8G data using 8 processes with usemem, average
of 3 test runs:

Before: 1270.91 KB/s per process
After: 1849.54 KB/s per process

Link: https://lore.kernel.org/linux-mm/CAMgjq7AdauQ8=X0zeih2r21QoV=-WWj1hyBxLWRzq74n-C=-Ng@mail.gmail.com/ [1]
Link: https://lkml.kernel.org/r/20250916160100.31545-14-ryncsn@gmail.com
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202504241621.f27743ec-lkp@intel.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Suggested-by: Chris Li <chrisl@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kairui Song and committed by

Andrew Morton 9 months ago 685a17fb 8b47299a

+13 -30

3 changed files

expand all

swap.h

swapfile.c

zswap.c

-4

mm/swap.h

··· 198 198 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); 199 199 200 200 /* linux/mm/swap_state.c */ 201 - /* One swap address space for each 64M swap space */ 202 - #define SWAP_ADDRESS_SPACE_SHIFT 14 203 - #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) 204 - #define SWAP_ADDRESS_SPACE_MASK (SWAP_ADDRESS_SPACE_PAGES - 1) 205 201 extern struct address_space swap_space __ro_after_init; 206 202 static inline struct address_space *swap_address_space(swp_entry_t entry) 207 203 {

+8 -24

mm/swapfile.c

··· 3204 3204 return 0; 3205 3205 } 3206 3206 3207 - #define SWAP_CLUSTER_INFO_COLS \ 3208 - DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) 3209 - #define SWAP_CLUSTER_SPACE_COLS \ 3210 - DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) 3211 - #define SWAP_CLUSTER_COLS \ 3212 - max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) 3213 - 3214 3207 static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, 3215 3208 union swap_header *swap_header, 3216 3209 unsigned long maxpages) 3217 3210 { 3218 3211 unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); 3219 3212 struct swap_cluster_info *cluster_info; 3220 - unsigned long i, j, idx; 3221 3213 int err = -ENOMEM; 3214 + unsigned long i; 3222 3215 3223 3216 cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL); 3224 3217 if (!cluster_info) ··· 3260 3267 INIT_LIST_HEAD(&si->frag_clusters[i]); 3261 3268 } 3262 3269 3263 - /* 3264 - * Reduce false cache line sharing between cluster_info and 3265 - * sharing same address space. 3266 - */ 3267 - for (j = 0; j < SWAP_CLUSTER_COLS; j++) { 3268 - for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { 3269 - struct swap_cluster_info *ci; 3270 - idx = i * SWAP_CLUSTER_COLS + j; 3271 - ci = cluster_info + idx; 3272 - if (idx >= nr_clusters) 3273 - continue; 3274 - if (ci->count) { 3275 - ci->flags = CLUSTER_FLAG_NONFULL; 3276 - list_add_tail(&ci->list, &si->nonfull_clusters[0]); 3277 - continue; 3278 - } 3270 + for (i = 0; i < nr_clusters; i++) { 3271 + struct swap_cluster_info *ci = &cluster_info[i]; 3272 + 3273 + if (ci->count) { 3274 + ci->flags = CLUSTER_FLAG_NONFULL; 3275 + list_add_tail(&ci->list, &si->nonfull_clusters[0]); 3276 + } else { 3279 3277 ci->flags = CLUSTER_FLAG_FREE; 3280 3278 list_add_tail(&ci->list, &si->free_clusters); 3281 3279 }

+5 -2

mm/zswap.c

··· 225 225 * helpers and fwd declarations 226 226 **********************************/ 227 227 228 + /* One swap address space for each 64M swap space */ 229 + #define ZSWAP_ADDRESS_SPACE_SHIFT 14 230 + #define ZSWAP_ADDRESS_SPACE_PAGES (1 << ZSWAP_ADDRESS_SPACE_SHIFT) 228 231 static inline struct xarray *swap_zswap_tree(swp_entry_t swp) 229 232 { 230 233 return &zswap_trees[swp_type(swp)][swp_offset(swp) 231 - >> SWAP_ADDRESS_SPACE_SHIFT]; 234 + >> ZSWAP_ADDRESS_SPACE_SHIFT]; 232 235 } 233 236 234 237 #define zswap_pool_debug(msg, p) \ ··· 1677 1674 struct xarray *trees, *tree; 1678 1675 unsigned int nr, i; 1679 1676 1680 - nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 1677 + nr = DIV_ROUND_UP(nr_pages, ZSWAP_ADDRESS_SPACE_PAGES); 1681 1678 trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); 1682 1679 if (!trees) { 1683 1680 pr_err("alloc failed, zswap disabled for swap type %d\n", type);

Configure Feed

Configure Feed