Merge branch 'akpm' (patches from Andrew)

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
"10 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
dax: move writeback calls into the filesystems
dax: give DAX clearing code correct bdev
ext4: online defrag not supported with DAX
ext2, ext4: only set S_DAX for regular inodes
block: disable block device DAX by default
ocfs2: unlock inode if deleting inode from orphan fails
mm: ASLR: use get_random_long()
drivers: char: random: add get_random_long()
mm: numa: quickly fail allocations for NUMA balancing on full nodes
mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED

Linus Torvalds 10 years ago 691429e1 1c271479

+132 -46

24 changed files

expand all collapse all

arch

arm

mmap.c

arm64

mmap.c

mips

mmap.c

powerpc

kernel

process.c

mmap.c

sparc

kernel

sys_sparc_64.c

x86

mmap.c

block

Kconfig

drivers

char

random.c

binfmt_elf.c

block_dev.c

dax.c

ext2

inode.c

ext4

inode.c

ioctl.c

ocfs2

aops.c

xfs

xfs_aops.c

xfs_aops.h

xfs_bmap_util.c

include

linux

dax.h

random.h

filemap.c

memory.c

migrate.c

+1 -1

arch/arm/mm/mmap.c

reviewed

··· 173 173 { 174 174 unsigned long rnd; 175 175 176 176 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 176 176 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 177 177 178 178 return rnd << PAGE_SHIFT; 179 179 }

+2 -2

arch/arm64/mm/mmap.c

reviewed

··· 53 53 54 54 #ifdef CONFIG_COMPAT 55 55 if (test_thread_flag(TIF_32BIT)) 56 56 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); 56 56 + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); 57 57 else 58 58 #endif 59 59 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 59 59 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 60 60 return rnd << PAGE_SHIFT; 61 61 } 62 62

+2 -2

arch/mips/mm/mmap.c

reviewed

··· 146 146 { 147 147 unsigned long rnd; 148 148 149 149 - rnd = (unsigned long)get_random_int(); 149 149 + rnd = get_random_long(); 150 150 rnd <<= PAGE_SHIFT; 151 151 if (TASK_IS_32BIT_ADDR) 152 152 rnd &= 0xfffffful; ··· 174 174 175 175 static inline unsigned long brk_rnd(void) 176 176 { 177 177 - unsigned long rnd = get_random_int(); 177 177 + unsigned long rnd = get_random_long(); 178 178 179 179 rnd = rnd << PAGE_SHIFT; 180 180 /* 8MB for 32bit, 256MB for 64bit */

+2 -2

arch/powerpc/kernel/process.c

reviewed

··· 1768 1768 1769 1769 /* 8MB for 32bit, 1GB for 64bit */ 1770 1770 if (is_32bit_task()) 1771 1771 - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); 1771 1771 + rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT))); 1772 1772 else 1773 1773 - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); 1773 1773 + rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT))); 1774 1774 1775 1775 return rnd << PAGE_SHIFT; 1776 1776 }

+2 -2

arch/powerpc/mm/mmap.c

reviewed

··· 59 59 60 60 /* 8MB for 32bit, 1GB for 64bit */ 61 61 if (is_32bit_task()) 62 62 - rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); 62 62 + rnd = get_random_long() % (1<<(23-PAGE_SHIFT)); 63 63 else 64 64 - rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); 64 64 + rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT)); 65 65 66 66 return rnd << PAGE_SHIFT; 67 67 }

+1 -1

arch/sparc/kernel/sys_sparc_64.c

reviewed

··· 264 264 unsigned long rnd = 0UL; 265 265 266 266 if (current->flags & PF_RANDOMIZE) { 267 267 - unsigned long val = get_random_int(); 267 267 + unsigned long val = get_random_long(); 268 268 if (test_thread_flag(TIF_32BIT)) 269 269 rnd = (val % (1UL << (23UL-PAGE_SHIFT))); 270 270 else

+3 -3

arch/x86/mm/mmap.c

reviewed

··· 71 71 72 72 if (mmap_is_ia32()) 73 73 #ifdef CONFIG_COMPAT 74 74 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); 74 74 + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); 75 75 #else 76 76 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 76 76 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 77 77 #endif 78 78 else 79 79 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 79 79 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 80 80 81 81 return rnd << PAGE_SHIFT; 82 82 }

+13

block/Kconfig

reviewed

··· 88 88 T10/SCSI Data Integrity Field or the T13/ATA External Path 89 89 Protection. If in doubt, say N. 90 90 91 91 + config BLK_DEV_DAX 92 92 + bool "Block device DAX support" 93 93 + depends on FS_DAX 94 94 + depends on BROKEN 95 95 + help 96 96 + When DAX support is available (CONFIG_FS_DAX) raw block 97 97 + devices can also support direct userspace access to the 98 98 + storage capacity via MMAP(2) similar to a file on a 99 99 + DAX-enabled filesystem. However, the DAX I/O-path disables 100 100 + some standard I/O-statistics, and the MMAP(2) path has some 101 101 + operational differences due to bypassing the page 102 102 + cache. If in doubt, say N. 103 103 + 91 104 config BLK_DEV_THROTTLING 92 105 bool "Block layer bio throttling support" 93 106 depends on BLK_CGROUP=y

+22

drivers/char/random.c

reviewed

··· 1819 1819 EXPORT_SYMBOL(get_random_int); 1820 1820 1821 1821 /* 1822 1822 + * Same as get_random_int(), but returns unsigned long. 1823 1823 + */ 1824 1824 + unsigned long get_random_long(void) 1825 1825 + { 1826 1826 + __u32 *hash; 1827 1827 + unsigned long ret; 1828 1828 + 1829 1829 + if (arch_get_random_long(&ret)) 1830 1830 + return ret; 1831 1831 + 1832 1832 + hash = get_cpu_var(get_random_int_hash); 1833 1833 + 1834 1834 + hash[0] += current->pid + jiffies + random_get_entropy(); 1835 1835 + md5_transform(hash, random_int_secret); 1836 1836 + ret = *(unsigned long *)hash; 1837 1837 + put_cpu_var(get_random_int_hash); 1838 1838 + 1839 1839 + return ret; 1840 1840 + } 1841 1841 + EXPORT_SYMBOL(get_random_long); 1842 1842 + 1843 1843 + /* 1822 1844 * randomize_range() returns a start address such that 1823 1845 * 1824 1846 * [...... <range> .....]

+1 -1

fs/binfmt_elf.c

reviewed

··· 653 653 654 654 if ((current->flags & PF_RANDOMIZE) && 655 655 !(current->personality & ADDR_NO_RANDOMIZE)) { 656 656 - random_variable = (unsigned long) get_random_int(); 656 656 + random_variable = get_random_long(); 657 657 random_variable &= STACK_RND_MASK; 658 658 random_variable <<= PAGE_SHIFT; 659 659 }

+17 -2

fs/block_dev.c

reviewed

··· 1201 1201 bdev->bd_disk = disk; 1202 1202 bdev->bd_queue = disk->queue; 1203 1203 bdev->bd_contains = bdev; 1204 1204 - bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; 1204 1204 + if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) 1205 1205 + bdev->bd_inode->i_flags = S_DAX; 1206 1206 + else 1207 1207 + bdev->bd_inode->i_flags = 0; 1208 1208 + 1205 1209 if (!partno) { 1206 1210 ret = -ENXIO; 1207 1211 bdev->bd_part = disk_get_part(disk, partno); ··· 1697 1693 return try_to_free_buffers(page); 1698 1694 } 1699 1695 1696 1696 + static int blkdev_writepages(struct address_space *mapping, 1697 1697 + struct writeback_control *wbc) 1698 1698 + { 1699 1699 + if (dax_mapping(mapping)) { 1700 1700 + struct block_device *bdev = I_BDEV(mapping->host); 1701 1701 + 1702 1702 + return dax_writeback_mapping_range(mapping, bdev, wbc); 1703 1703 + } 1704 1704 + return generic_writepages(mapping, wbc); 1705 1705 + } 1706 1706 + 1700 1707 static const struct address_space_operations def_blk_aops = { 1701 1708 .readpage = blkdev_readpage, 1702 1709 .readpages = blkdev_readpages, 1703 1710 .writepage = blkdev_writepage, 1704 1711 .write_begin = blkdev_write_begin, 1705 1712 .write_end = blkdev_write_end, 1706 1706 - .writepages = generic_writepages, 1713 1713 + .writepages = blkdev_writepages, 1707 1714 .releasepage = blkdev_releasepage, 1708 1715 .direct_IO = blkdev_direct_IO, 1709 1716 .is_dirty_writeback = buffer_check_dirty_writeback,

+11 -10

fs/dax.c

reviewed

··· 79 79 } 80 80 81 81 /* 82 82 - * dax_clear_blocks() is called from within transaction context from XFS, 82 82 + * dax_clear_sectors() is called from within transaction context from XFS, 83 83 * and hence this means the stack from this point must follow GFP_NOFS 84 84 * semantics for all operations. 85 85 */ 86 86 - int dax_clear_blocks(struct inode *inode, sector_t block, long _size) 86 86 + int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) 87 87 { 88 88 - struct block_device *bdev = inode->i_sb->s_bdev; 89 88 struct blk_dax_ctl dax = { 90 90 - .sector = block << (inode->i_blkbits - 9), 89 89 + .sector = _sector, 91 90 .size = _size, 92 91 }; 93 92 ··· 108 109 wmb_pmem(); 109 110 return 0; 110 111 } 111 111 - EXPORT_SYMBOL_GPL(dax_clear_blocks); 112 112 + EXPORT_SYMBOL_GPL(dax_clear_sectors); 112 113 113 114 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ 114 115 static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, ··· 484 485 * end]. This is required by data integrity operations to ensure file data is 485 486 * on persistent storage prior to completion of the operation. 486 487 */ 487 487 - int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, 488 488 - loff_t end) 488 488 + int dax_writeback_mapping_range(struct address_space *mapping, 489 489 + struct block_device *bdev, struct writeback_control *wbc) 489 490 { 490 491 struct inode *inode = mapping->host; 491 491 - struct block_device *bdev = inode->i_sb->s_bdev; 492 492 pgoff_t start_index, end_index, pmd_index; 493 493 pgoff_t indices[PAGEVEC_SIZE]; 494 494 struct pagevec pvec; ··· 498 500 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) 499 501 return -EIO; 500 502 501 501 - start_index = start >> PAGE_CACHE_SHIFT; 502 502 - end_index = end >> PAGE_CACHE_SHIFT; 503 503 + if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) 504 504 + return 0; 505 505 + 506 506 + start_index = wbc->range_start >> PAGE_CACHE_SHIFT; 507 507 + end_index = wbc->range_end >> PAGE_CACHE_SHIFT; 503 508 pmd_index = DAX_PMD_INDEX(start_index); 504 509 505 510 rcu_read_lock();

+13 -3

fs/ext2/inode.c

reviewed

··· 737 737 * so that it's not found by another thread before it's 738 738 * initialised 739 739 */ 740 740 - err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key), 741 741 - 1 << inode->i_blkbits); 740 740 + err = dax_clear_sectors(inode->i_sb->s_bdev, 741 741 + le32_to_cpu(chain[depth-1].key) << 742 742 + (inode->i_blkbits - 9), 743 743 + 1 << inode->i_blkbits); 742 744 if (err) { 743 745 mutex_unlock(&ei->truncate_mutex); 744 746 goto cleanup; ··· 876 874 static int 877 875 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) 878 876 { 877 877 + #ifdef CONFIG_FS_DAX 878 878 + if (dax_mapping(mapping)) { 879 879 + return dax_writeback_mapping_range(mapping, 880 880 + mapping->host->i_sb->s_bdev, 881 881 + wbc); 882 882 + } 883 883 + #endif 884 884 + 879 885 return mpage_writepages(mapping, wbc, ext2_get_block); 880 886 } 881 887 ··· 1306 1296 inode->i_flags |= S_NOATIME; 1307 1297 if (flags & EXT2_DIRSYNC_FL) 1308 1298 inode->i_flags |= S_DIRSYNC; 1309 1309 - if (test_opt(inode->i_sb, DAX)) 1299 1299 + if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) 1310 1300 inode->i_flags |= S_DAX; 1311 1301 } 1312 1302

+5 -1

fs/ext4/inode.c

reviewed

··· 2478 2478 2479 2479 trace_ext4_writepages(inode, wbc); 2480 2480 2481 2481 + if (dax_mapping(mapping)) 2482 2482 + return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, 2483 2483 + wbc); 2484 2484 + 2481 2485 /* 2482 2486 * No pages to write? This is mainly a kludge to avoid starting 2483 2487 * a transaction for special inodes like journal inode on last iput() ··· 4159 4155 new_fl |= S_NOATIME; 4160 4156 if (flags & EXT4_DIRSYNC_FL) 4161 4157 new_fl |= S_DIRSYNC; 4162 4162 - if (test_opt(inode->i_sb, DAX)) 4158 4158 + if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) 4163 4159 new_fl |= S_DAX; 4164 4160 inode_set_flags(inode, new_fl, 4165 4161 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);

fs/ext4/ioctl.c

reviewed

··· 583 583 "Online defrag not supported with bigalloc"); 584 584 err = -EOPNOTSUPP; 585 585 goto mext_out; 586 586 + } else if (IS_DAX(inode)) { 587 587 + ext4_msg(sb, KERN_ERR, 588 588 + "Online defrag not supported with DAX"); 589 589 + err = -EOPNOTSUPP; 590 590 + goto mext_out; 586 591 } 587 592 588 593 err = mnt_want_write_file(filp);

fs/ocfs2/aops.c

reviewed

··· 956 956 tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 957 957 update_isize, end); 958 958 if (tmp_ret < 0) { 959 959 + ocfs2_inode_unlock(inode, 1); 959 960 ret = tmp_ret; 960 961 mlog_errno(ret); 961 962 brelse(di_bh);

+5 -1

fs/xfs/xfs_aops.c

reviewed

··· 55 55 } while ((bh = bh->b_this_page) != head); 56 56 } 57 57 58 58 - STATIC struct block_device * 58 58 + struct block_device * 59 59 xfs_find_bdev_for_inode( 60 60 struct inode *inode) 61 61 { ··· 1208 1208 struct writeback_control *wbc) 1209 1209 { 1210 1210 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1211 1211 + if (dax_mapping(mapping)) 1212 1212 + return dax_writeback_mapping_range(mapping, 1213 1213 + xfs_find_bdev_for_inode(mapping->host), wbc); 1214 1214 + 1211 1215 return generic_writepages(mapping, wbc); 1212 1216 } 1213 1217

fs/xfs/xfs_aops.h

reviewed

··· 62 62 struct buffer_head *map_bh, int create); 63 63 64 64 extern void xfs_count_page_state(struct page *, int *, int *); 65 65 + extern struct block_device *xfs_find_bdev_for_inode(struct inode *); 65 66 66 67 #endif /* __XFS_AOPS_H__ */

+2 -1

fs/xfs/xfs_bmap_util.c

reviewed

··· 75 75 ssize_t size = XFS_FSB_TO_B(mp, count_fsb); 76 76 77 77 if (IS_DAX(VFS_I(ip))) 78 78 - return dax_clear_blocks(VFS_I(ip), block, size); 78 78 + return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), 79 79 + sector, size); 79 80 80 81 /* 81 82 * let the block layer decide on the fastest method of

+5 -3

include/linux/dax.h

reviewed

··· 7 7 8 8 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, 9 9 get_block_t, dio_iodone_t, int flags); 10 10 - int dax_clear_blocks(struct inode *, sector_t block, long size); 10 10 + int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); 11 11 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); 12 12 int dax_truncate_page(struct inode *, loff_t from, get_block_t); 13 13 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, ··· 52 52 { 53 53 return mapping->host && IS_DAX(mapping->host); 54 54 } 55 55 - int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, 56 56 - loff_t end); 55 55 + 56 56 + struct writeback_control; 57 57 + int dax_writeback_mapping_range(struct address_space *mapping, 58 58 + struct block_device *bdev, struct writeback_control *wbc); 57 59 #endif

include/linux/random.h

reviewed

··· 34 34 #endif 35 35 36 36 unsigned int get_random_int(void); 37 37 + unsigned long get_random_long(void); 37 38 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); 38 39 39 40 u32 prandom_u32(void);

+4 -8

mm/filemap.c

reviewed

··· 446 446 { 447 447 int err = 0; 448 448 449 449 - if (mapping->nrpages) { 449 449 + if ((!dax_mapping(mapping) && mapping->nrpages) || 450 450 + (dax_mapping(mapping) && mapping->nrexceptional)) { 450 451 err = filemap_fdatawrite(mapping); 451 452 /* 452 453 * Even if the above returned error, the pages may be ··· 483 482 { 484 483 int err = 0; 485 484 486 486 - if (dax_mapping(mapping) && mapping->nrexceptional) { 487 487 - err = dax_writeback_mapping_range(mapping, lstart, lend); 488 488 - if (err) 489 489 - return err; 490 490 - } 491 491 - 492 492 - if (mapping->nrpages) { 485 485 + if ((!dax_mapping(mapping) && mapping->nrpages) || 486 486 + (dax_mapping(mapping) && mapping->nrexceptional)) { 493 487 err = __filemap_fdatawrite_range(mapping, lstart, lend, 494 488 WB_SYNC_ALL); 495 489 /* See comment of filemap_write_and_wait() */

+12 -2

mm/memory.c

reviewed

··· 3404 3404 if (unlikely(pmd_none(*pmd)) && 3405 3405 unlikely(__pte_alloc(mm, vma, pmd, address))) 3406 3406 return VM_FAULT_OOM; 3407 3407 - /* if an huge pmd materialized from under us just retry later */ 3408 3408 - if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) 3407 3407 + /* 3408 3408 + * If a huge pmd materialized under us just retry later. Use 3409 3409 + * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd 3410 3410 + * didn't become pmd_trans_huge under us and then back to pmd_none, as 3411 3411 + * a result of MADV_DONTNEED running immediately after a huge pmd fault 3412 3412 + * in a different thread of this mm, in turn leading to a misleading 3413 3413 + * pmd_trans_huge() retval. All we have to ensure is that it is a 3414 3414 + * regular pmd that we can walk with pte_offset_map() and we can do that 3415 3415 + * through an atomic read in C, which is what pmd_trans_unstable() 3416 3416 + * provides. 3417 3417 + */ 3418 3418 + if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd))) 3409 3419 return 0; 3410 3420 /* 3411 3421 * A regular pmd is established and it can't morph into a huge pmd

+1 -1

mm/migrate.c

reviewed

··· 1582 1582 (GFP_HIGHUSER_MOVABLE | 1583 1583 __GFP_THISNODE | __GFP_NOMEMALLOC | 1584 1584 __GFP_NORETRY | __GFP_NOWARN) & 1585 1585 - ~(__GFP_IO | __GFP_FS), 0); 1585 1585 + ~__GFP_RECLAIM, 0); 1586 1586 1587 1587 return newpage; 1588 1588 }