Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm, swap: use a single page for swap table when the size fits

We have a cluster size of 512 slots. Each slot consumes 8 bytes in swap
table so the swap table size of each cluster is exactly one page (4K).

If that condition is true, allocate one page direct and disable the slab
cache to reduce the memory usage of swap table and avoid fragmentation.

Link: https://lkml.kernel.org/r/20250916160100.31545-16-ryncsn@gmail.com
Co-developed-by: Chris Li <chrisl@kernel.org>
Signed-off-by: Chris Li <chrisl@kernel.org>
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Suggested-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <oliver.sang@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kairui Song and committed by
Andrew Morton
f83938e4 07adc4cf

+43 -10
+2
mm/swap_table.h
··· 11 11 atomic_long_t entries[SWAPFILE_CLUSTER]; 12 12 }; 13 13 14 + #define SWP_TABLE_USE_PAGE (sizeof(struct swap_table) == PAGE_SIZE) 15 + 14 16 /* 15 17 * A swap table entry represents the status of a swap slot on a swap 16 18 * (physical or virtual) device. The swap table in each cluster is a
+41 -10
mm/swapfile.c
··· 434 434 return cluster_index(si, ci) * SWAPFILE_CLUSTER; 435 435 } 436 436 437 + static struct swap_table *swap_table_alloc(gfp_t gfp) 438 + { 439 + struct folio *folio; 440 + 441 + if (!SWP_TABLE_USE_PAGE) 442 + return kmem_cache_zalloc(swap_table_cachep, gfp); 443 + 444 + folio = folio_alloc(gfp | __GFP_ZERO, 0); 445 + if (folio) 446 + return folio_address(folio); 447 + return NULL; 448 + } 449 + 450 + static void swap_table_free_folio_rcu_cb(struct rcu_head *head) 451 + { 452 + struct folio *folio; 453 + 454 + folio = page_folio(container_of(head, struct page, rcu_head)); 455 + folio_put(folio); 456 + } 457 + 458 + static void swap_table_free(struct swap_table *table) 459 + { 460 + if (!SWP_TABLE_USE_PAGE) { 461 + kmem_cache_free(swap_table_cachep, table); 462 + return; 463 + } 464 + 465 + call_rcu(&(folio_page(virt_to_folio(table), 0)->rcu_head), 466 + swap_table_free_folio_rcu_cb); 467 + } 468 + 437 469 static void swap_cluster_free_table(struct swap_cluster_info *ci) 438 470 { 439 471 unsigned int ci_off; ··· 479 447 table = (void *)rcu_dereference_protected(ci->table, true); 480 448 rcu_assign_pointer(ci->table, NULL); 481 449 482 - kmem_cache_free(swap_table_cachep, table); 450 + swap_table_free(table); 483 451 } 484 452 485 453 /* ··· 502 470 /* The cluster must be free and was just isolated from the free list. */ 503 471 VM_WARN_ON_ONCE(ci->flags || !cluster_is_empty(ci)); 504 472 505 - table = kmem_cache_zalloc(swap_table_cachep, 506 - __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); 473 + table = swap_table_alloc(__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); 507 474 if (table) { 508 475 rcu_assign_pointer(ci->table, table); 509 476 return ci; ··· 518 487 spin_unlock(&si->global_cluster_lock); 519 488 local_unlock(&percpu_swap_cluster.lock); 520 489 521 - table = kmem_cache_zalloc(swap_table_cachep, 522 - __GFP_HIGH | __GFP_NOMEMALLOC | GFP_KERNEL); 490 + table = swap_table_alloc(__GFP_HIGH | __GFP_NOMEMALLOC | GFP_KERNEL); 523 491 524 492 /* 525 493 * Back to atomic context. We might have migrated to a new CPU with a ··· 536 506 /* Nothing except this helper should touch a dangling empty cluster. */ 537 507 if (WARN_ON_ONCE(cluster_table_is_alloced(ci))) { 538 508 if (table) 539 - kmem_cache_free(swap_table_cachep, table); 509 + swap_table_free(table); 540 510 return ci; 541 511 } 542 512 ··· 764 734 765 735 ci = cluster_info + idx; 766 736 if (!ci->table) { 767 - table = kmem_cache_zalloc(swap_table_cachep, GFP_KERNEL); 737 + table = swap_table_alloc(GFP_KERNEL); 768 738 if (!table) 769 739 return -ENOMEM; 770 740 rcu_assign_pointer(ci->table, table); ··· 4102 4072 * only, and all swap cache readers (swap_cache_*) verifies 4103 4073 * the content before use. So it's safe to use RCU slab here. 4104 4074 */ 4105 - swap_table_cachep = kmem_cache_create("swap_table", 4106 - sizeof(struct swap_table), 4107 - 0, SLAB_PANIC | SLAB_TYPESAFE_BY_RCU, NULL); 4075 + if (!SWP_TABLE_USE_PAGE) 4076 + swap_table_cachep = kmem_cache_create("swap_table", 4077 + sizeof(struct swap_table), 4078 + 0, SLAB_PANIC | SLAB_TYPESAFE_BY_RCU, NULL); 4108 4079 4109 4080 #ifdef CONFIG_MIGRATION 4110 4081 if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS))