Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: add PageWaiters indicating tasks are waiting for a page bit

Add a new page flag, PageWaiters, to indicate the page waitqueue has
tasks waiting. This can be tested rather than testing waitqueue_active
which requires another cacheline load.

This bit is always set when the page has tasks on page_waitqueue(page),
and is set and cleared under the waitqueue lock. It may be set when
there are no tasks on the waitqueue, which will cause a harmless extra
wakeup check that will clears the bit.

The generic bit-waitqueue infrastructure is no longer used for pages.
Instead, waitqueues are used directly with a custom key type. The
generic code was not flexible enough to have PageWaiters manipulation
under the waitqueue lock (which simplifies concurrency).

This improves the performance of page lock intensive microbenchmarks by
2-3%.

Putting two bits in the same word opens the opportunity to remove the
memory barrier between clearing the lock bit and testing the waiters
bit, after some work on the arch primitives (e.g., ensuring memory
operand widths match and cover both bits).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Bob Peterson <rpeterso@redhat.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Nicholas Piggin and committed by
Linus Torvalds
62906027 6326fec1

+179 -55
+2
include/linux/mm.h
··· 1758 1758 return ptl; 1759 1759 } 1760 1760 1761 + extern void __init pagecache_init(void); 1762 + 1761 1763 extern void free_area_init(unsigned long * zones_size); 1762 1764 extern void free_area_init_node(int nid, unsigned long * zones_size, 1763 1765 unsigned long zone_start_pfn, unsigned long *zholes_size);
+9
include/linux/page-flags.h
··· 73 73 */ 74 74 enum pageflags { 75 75 PG_locked, /* Page is locked. Don't touch. */ 76 + PG_waiters, /* Page has waiters, check its waitqueue */ 76 77 PG_error, 77 78 PG_referenced, 78 79 PG_uptodate, ··· 170 169 * for compound page all operations related to the page flag applied to 171 170 * head page. 172 171 * 172 + * PF_ONLY_HEAD: 173 + * for compound page, callers only ever operate on the head page. 174 + * 173 175 * PF_NO_TAIL: 174 176 * modifications of the page flag must be done on small or head pages, 175 177 * checks can be done on tail pages too. ··· 182 178 */ 183 179 #define PF_ANY(page, enforce) page 184 180 #define PF_HEAD(page, enforce) compound_head(page) 181 + #define PF_ONLY_HEAD(page, enforce) ({ \ 182 + VM_BUG_ON_PGFLAGS(PageTail(page), page); \ 183 + page;}) 185 184 #define PF_NO_TAIL(page, enforce) ({ \ 186 185 VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ 187 186 compound_head(page);}) ··· 262 255 TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) 263 256 264 257 __PAGEFLAG(Locked, locked, PF_NO_TAIL) 258 + PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) 265 259 PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) 266 260 PAGEFLAG(Referenced, referenced, PF_HEAD) 267 261 TESTCLEARFLAG(Referenced, referenced, PF_HEAD) ··· 751 743 752 744 #undef PF_ANY 753 745 #undef PF_HEAD 746 + #undef PF_ONLY_HEAD 754 747 #undef PF_NO_TAIL 755 748 #undef PF_NO_COMPOUND 756 749 #endif /* !__GENERATING_BOUNDS_H */
+11 -12
include/linux/pagemap.h
··· 486 486 * and for filesystems which need to wait on PG_private. 487 487 */ 488 488 extern void wait_on_page_bit(struct page *page, int bit_nr); 489 - 490 489 extern int wait_on_page_bit_killable(struct page *page, int bit_nr); 491 - extern int wait_on_page_bit_killable_timeout(struct page *page, 492 - int bit_nr, unsigned long timeout); 490 + extern void wake_up_page_bit(struct page *page, int bit_nr); 493 491 494 - static inline int wait_on_page_locked_killable(struct page *page) 495 - { 496 - if (!PageLocked(page)) 497 - return 0; 498 - return wait_on_page_bit_killable(compound_head(page), PG_locked); 499 - } 500 - 501 - extern wait_queue_head_t *page_waitqueue(struct page *page); 502 492 static inline void wake_up_page(struct page *page, int bit) 503 493 { 504 - __wake_up_bit(page_waitqueue(page), &page->flags, bit); 494 + if (!PageWaiters(page)) 495 + return; 496 + wake_up_page_bit(page, bit); 505 497 } 506 498 507 499 /* ··· 507 515 { 508 516 if (PageLocked(page)) 509 517 wait_on_page_bit(compound_head(page), PG_locked); 518 + } 519 + 520 + static inline int wait_on_page_locked_killable(struct page *page) 521 + { 522 + if (!PageLocked(page)) 523 + return 0; 524 + return wait_on_page_bit_killable(compound_head(page), PG_locked); 510 525 } 511 526 512 527 /*
-1
include/linux/writeback.h
··· 375 375 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); 376 376 377 377 void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); 378 - void page_writeback_init(void); 379 378 void balance_dirty_pages_ratelimited(struct address_space *mapping); 380 379 bool wb_over_bg_thresh(struct bdi_writeback *wb); 381 380
+1
include/trace/events/mmflags.h
··· 81 81 82 82 #define __def_pageflag_names \ 83 83 {1UL << PG_locked, "locked" }, \ 84 + {1UL << PG_waiters, "waiters" }, \ 84 85 {1UL << PG_error, "error" }, \ 85 86 {1UL << PG_referenced, "referenced" }, \ 86 87 {1UL << PG_uptodate, "uptodate" }, \
+1 -2
init/main.c
··· 647 647 security_init(); 648 648 dbg_late_init(); 649 649 vfs_caches_init(); 650 + pagecache_init(); 650 651 signals_init(); 651 - /* rootfs populating might need page-writeback */ 652 - page_writeback_init(); 653 652 proc_root_init(); 654 653 nsfs_init(); 655 654 cpuset_init();
+151 -40
mm/filemap.c
··· 739 739 * at a cost of "thundering herd" phenomena during rare hash 740 740 * collisions. 741 741 */ 742 - wait_queue_head_t *page_waitqueue(struct page *page) 742 + #define PAGE_WAIT_TABLE_BITS 8 743 + #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) 744 + static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; 745 + 746 + static wait_queue_head_t *page_waitqueue(struct page *page) 743 747 { 744 - return bit_waitqueue(page, 0); 748 + return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; 745 749 } 746 - EXPORT_SYMBOL(page_waitqueue); 750 + 751 + void __init pagecache_init(void) 752 + { 753 + int i; 754 + 755 + for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) 756 + init_waitqueue_head(&page_wait_table[i]); 757 + 758 + page_writeback_init(); 759 + } 760 + 761 + struct wait_page_key { 762 + struct page *page; 763 + int bit_nr; 764 + int page_match; 765 + }; 766 + 767 + struct wait_page_queue { 768 + struct page *page; 769 + int bit_nr; 770 + wait_queue_t wait; 771 + }; 772 + 773 + static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) 774 + { 775 + struct wait_page_key *key = arg; 776 + struct wait_page_queue *wait_page 777 + = container_of(wait, struct wait_page_queue, wait); 778 + 779 + if (wait_page->page != key->page) 780 + return 0; 781 + key->page_match = 1; 782 + 783 + if (wait_page->bit_nr != key->bit_nr) 784 + return 0; 785 + if (test_bit(key->bit_nr, &key->page->flags)) 786 + return 0; 787 + 788 + return autoremove_wake_function(wait, mode, sync, key); 789 + } 790 + 791 + void wake_up_page_bit(struct page *page, int bit_nr) 792 + { 793 + wait_queue_head_t *q = page_waitqueue(page); 794 + struct wait_page_key key; 795 + unsigned long flags; 796 + 797 + key.page = page; 798 + key.bit_nr = bit_nr; 799 + key.page_match = 0; 800 + 801 + spin_lock_irqsave(&q->lock, flags); 802 + __wake_up_locked_key(q, TASK_NORMAL, &key); 803 + /* 804 + * It is possible for other pages to have collided on the waitqueue 805 + * hash, so in that case check for a page match. That prevents a long- 806 + * term waiter 807 + * 808 + * It is still possible to miss a case here, when we woke page waiters 809 + * and removed them from the waitqueue, but there are still other 810 + * page waiters. 811 + */ 812 + if (!waitqueue_active(q) || !key.page_match) { 813 + ClearPageWaiters(page); 814 + /* 815 + * It's possible to miss clearing Waiters here, when we woke 816 + * our page waiters, but the hashed waitqueue has waiters for 817 + * other pages on it. 818 + * 819 + * That's okay, it's a rare case. The next waker will clear it. 820 + */ 821 + } 822 + spin_unlock_irqrestore(&q->lock, flags); 823 + } 824 + EXPORT_SYMBOL(wake_up_page_bit); 825 + 826 + static inline int wait_on_page_bit_common(wait_queue_head_t *q, 827 + struct page *page, int bit_nr, int state, bool lock) 828 + { 829 + struct wait_page_queue wait_page; 830 + wait_queue_t *wait = &wait_page.wait; 831 + int ret = 0; 832 + 833 + init_wait(wait); 834 + wait->func = wake_page_function; 835 + wait_page.page = page; 836 + wait_page.bit_nr = bit_nr; 837 + 838 + for (;;) { 839 + spin_lock_irq(&q->lock); 840 + 841 + if (likely(list_empty(&wait->task_list))) { 842 + if (lock) 843 + __add_wait_queue_tail_exclusive(q, wait); 844 + else 845 + __add_wait_queue(q, wait); 846 + SetPageWaiters(page); 847 + } 848 + 849 + set_current_state(state); 850 + 851 + spin_unlock_irq(&q->lock); 852 + 853 + if (likely(test_bit(bit_nr, &page->flags))) { 854 + io_schedule(); 855 + if (unlikely(signal_pending_state(state, current))) { 856 + ret = -EINTR; 857 + break; 858 + } 859 + } 860 + 861 + if (lock) { 862 + if (!test_and_set_bit_lock(bit_nr, &page->flags)) 863 + break; 864 + } else { 865 + if (!test_bit(bit_nr, &page->flags)) 866 + break; 867 + } 868 + } 869 + 870 + finish_wait(q, wait); 871 + 872 + /* 873 + * A signal could leave PageWaiters set. Clearing it here if 874 + * !waitqueue_active would be possible (by open-coding finish_wait), 875 + * but still fail to catch it in the case of wait hash collision. We 876 + * already can fail to clear wait hash collision cases, so don't 877 + * bother with signals either. 878 + */ 879 + 880 + return ret; 881 + } 747 882 748 883 void wait_on_page_bit(struct page *page, int bit_nr) 749 884 { 750 - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 751 - 752 - if (test_bit(bit_nr, &page->flags)) 753 - __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, 754 - TASK_UNINTERRUPTIBLE); 885 + wait_queue_head_t *q = page_waitqueue(page); 886 + wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false); 755 887 } 756 888 EXPORT_SYMBOL(wait_on_page_bit); 757 889 758 890 int wait_on_page_bit_killable(struct page *page, int bit_nr) 759 891 { 760 - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 761 - 762 - if (!test_bit(bit_nr, &page->flags)) 763 - return 0; 764 - 765 - return __wait_on_bit(page_waitqueue(page), &wait, 766 - bit_wait_io, TASK_KILLABLE); 892 + wait_queue_head_t *q = page_waitqueue(page); 893 + return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); 767 894 } 768 - 769 - int wait_on_page_bit_killable_timeout(struct page *page, 770 - int bit_nr, unsigned long timeout) 771 - { 772 - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 773 - 774 - wait.key.timeout = jiffies + timeout; 775 - if (!test_bit(bit_nr, &page->flags)) 776 - return 0; 777 - return __wait_on_bit(page_waitqueue(page), &wait, 778 - bit_wait_io_timeout, TASK_KILLABLE); 779 - } 780 - EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout); 781 895 782 896 /** 783 897 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue ··· 907 793 908 794 spin_lock_irqsave(&q->lock, flags); 909 795 __add_wait_queue(q, waiter); 796 + SetPageWaiters(page); 910 797 spin_unlock_irqrestore(&q->lock, flags); 911 798 } 912 799 EXPORT_SYMBOL_GPL(add_page_wait_queue); ··· 989 874 * __lock_page - get a lock on the page, assuming we need to sleep to get it 990 875 * @page: the page to lock 991 876 */ 992 - void __lock_page(struct page *page) 877 + void __lock_page(struct page *__page) 993 878 { 994 - struct page *page_head = compound_head(page); 995 - DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); 996 - 997 - __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io, 998 - TASK_UNINTERRUPTIBLE); 879 + struct page *page = compound_head(__page); 880 + wait_queue_head_t *q = page_waitqueue(page); 881 + wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true); 999 882 } 1000 883 EXPORT_SYMBOL(__lock_page); 1001 884 1002 - int __lock_page_killable(struct page *page) 885 + int __lock_page_killable(struct page *__page) 1003 886 { 1004 - struct page *page_head = compound_head(page); 1005 - DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); 1006 - 1007 - return __wait_on_bit_lock(page_waitqueue(page_head), &wait, 1008 - bit_wait_io, TASK_KILLABLE); 887 + struct page *page = compound_head(__page); 888 + wait_queue_head_t *q = page_waitqueue(page); 889 + return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true); 1009 890 } 1010 891 EXPORT_SYMBOL_GPL(__lock_page_killable); 1011 892
+2
mm/internal.h
··· 36 36 /* Do not use these with a slab allocator */ 37 37 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) 38 38 39 + void page_writeback_init(void); 40 + 39 41 int do_swap_page(struct vm_fault *vmf); 40 42 41 43 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
+2
mm/swap.c
··· 69 69 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 70 70 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 71 71 } 72 + __ClearPageWaiters(page); 72 73 mem_cgroup_uncharge(page); 73 74 } 74 75 ··· 785 784 786 785 /* Clear Active bit in case of parallel mark_page_accessed */ 787 786 __ClearPageActive(page); 787 + __ClearPageWaiters(page); 788 788 789 789 list_add(&page->lru, &pages_to_free); 790 790 }