Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Revert "revert "Revert "mm: remove __GFP_NO_KSWAPD""" and associated damage

This reverts commits a50915394f1fc02c2861d3b7ce7014788aa5066e and
d7c3b937bdf45f0b844400b7bf6fd3ed50bac604.

This is a revert of a revert of a revert. In addition, it reverts the
even older i915 change to stop using the __GFP_NO_KSWAPD flag due to the
original commits in linux-next.

It turns out that the original patch really was bogus, and that the
original revert was the correct thing to do after all. We thought we
had fixed the problem, and then reverted the revert, but the problem
really is fundamental: waking up kswapd simply isn't the right thing to
do, and direct reclaim sometimes simply _is_ the right thing to do.

When certain allocations fail, we simply should try some direct reclaim,
and if that fails, fail the allocation. That's the right thing to do
for THP allocations, which can easily fail, and the GPU allocations want
to do that too.

So starting kswapd is sometimes simply wrong, and removing the flag that
said "don't start kswapd" was a mistake. Let's hope we never revisit
this mistake again - and certainly not this many times ;)

Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+20 -13
+3 -3
drivers/gpu/drm/i915/i915_gem.c
··· 1796 1796 */ 1797 1797 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 1798 1798 gfp = mapping_gfp_mask(mapping); 1799 - gfp |= __GFP_NORETRY | __GFP_NOWARN; 1799 + gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 1800 1800 gfp &= ~(__GFP_IO | __GFP_WAIT); 1801 1801 for_each_sg(st->sgl, sg, page_count, i) { 1802 1802 page = shmem_read_mapping_page_gfp(mapping, i, gfp); ··· 1809 1809 * our own buffer, now let the real VM do its job and 1810 1810 * go down in flames if truly OOM. 1811 1811 */ 1812 - gfp &= ~(__GFP_NORETRY | __GFP_NOWARN); 1812 + gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 1813 1813 gfp |= __GFP_IO | __GFP_WAIT; 1814 1814 1815 1815 i915_gem_shrink_all(dev_priv); ··· 1817 1817 if (IS_ERR(page)) 1818 1818 goto err_pages; 1819 1819 1820 - gfp |= __GFP_NORETRY | __GFP_NOWARN; 1820 + gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 1821 1821 gfp &= ~(__GFP_IO | __GFP_WAIT); 1822 1822 } 1823 1823
+4 -2
drivers/mtd/mtdcore.c
··· 1077 1077 * until the request succeeds or until the allocation size falls below 1078 1078 * the system page size. This attempts to make sure it does not adversely 1079 1079 * impact system performance, so when allocating more than one page, we 1080 - * ask the memory allocator to avoid re-trying. 1080 + * ask the memory allocator to avoid re-trying, swapping, writing back 1081 + * or performing I/O. 1081 1082 * 1082 1083 * Note, this function also makes sure that the allocated buffer is aligned to 1083 1084 * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. ··· 1092 1091 */ 1093 1092 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) 1094 1093 { 1095 - gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY; 1094 + gfp_t flags = __GFP_NOWARN | __GFP_WAIT | 1095 + __GFP_NORETRY | __GFP_NO_KSWAPD; 1096 1096 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); 1097 1097 void *kbuf; 1098 1098
+8 -5
include/linux/gfp.h
··· 30 30 #define ___GFP_HARDWALL 0x20000u 31 31 #define ___GFP_THISNODE 0x40000u 32 32 #define ___GFP_RECLAIMABLE 0x80000u 33 - #define ___GFP_NOTRACK 0x100000u 34 - #define ___GFP_OTHER_NODE 0x200000u 35 - #define ___GFP_WRITE 0x400000u 33 + #define ___GFP_NOTRACK 0x200000u 34 + #define ___GFP_NO_KSWAPD 0x400000u 35 + #define ___GFP_OTHER_NODE 0x800000u 36 + #define ___GFP_WRITE 0x1000000u 36 37 37 38 /* 38 39 * GFP bitmasks.. ··· 86 85 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ 87 86 #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ 88 87 88 + #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) 89 89 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 90 90 #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ 91 91 ··· 96 94 */ 97 95 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 98 96 99 - #define __GFP_BITS_SHIFT 23 /* Room for N __GFP_FOO bits */ 97 + #define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ 100 98 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 101 99 102 100 /* This equals 0, but use constants in case they ever change */ ··· 116 114 __GFP_MOVABLE) 117 115 #define GFP_IOFS (__GFP_IO | __GFP_FS) 118 116 #define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 119 - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) 117 + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 118 + __GFP_NO_KSWAPD) 120 119 121 120 #ifdef CONFIG_NUMA 122 121 #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+1
include/trace/events/gfpflags.h
··· 36 36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ 37 37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ 38 38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ 39 + {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ 39 40 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ 40 41 ) : "GFP_NOWAIT" 41 42
+4 -3
mm/page_alloc.c
··· 2416 2416 goto nopage; 2417 2417 2418 2418 restart: 2419 - wake_all_kswapd(order, zonelist, high_zoneidx, 2420 - zone_idx(preferred_zone)); 2419 + if (!(gfp_mask & __GFP_NO_KSWAPD)) 2420 + wake_all_kswapd(order, zonelist, high_zoneidx, 2421 + zone_idx(preferred_zone)); 2421 2422 2422 2423 /* 2423 2424 * OK, we're below the kswapd watermark and have kicked background ··· 2495 2494 * system then fail the allocation instead of entering direct reclaim. 2496 2495 */ 2497 2496 if ((deferred_compaction || contended_compaction) && 2498 - (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) 2497 + (gfp_mask & __GFP_NO_KSWAPD)) 2499 2498 goto nopage; 2500 2499 2501 2500 /* Try direct reclaim and then allocating */