Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
"10 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
dax: move writeback calls into the filesystems
dax: give DAX clearing code correct bdev
ext4: online defrag not supported with DAX
ext2, ext4: only set S_DAX for regular inodes
block: disable block device DAX by default
ocfs2: unlock inode if deleting inode from orphan fails
mm: ASLR: use get_random_long()
drivers: char: random: add get_random_long()
mm: numa: quickly fail allocations for NUMA balancing on full nodes
mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED

+132 -46
+1 -1
arch/arm/mm/mmap.c
··· 173 173 { 174 174 unsigned long rnd; 175 175 176 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 176 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 177 177 178 178 return rnd << PAGE_SHIFT; 179 179 }
+2 -2
arch/arm64/mm/mmap.c
··· 53 53 54 54 #ifdef CONFIG_COMPAT 55 55 if (test_thread_flag(TIF_32BIT)) 56 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); 56 + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); 57 57 else 58 58 #endif 59 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 59 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 60 60 return rnd << PAGE_SHIFT; 61 61 } 62 62
+2 -2
arch/mips/mm/mmap.c
··· 146 146 { 147 147 unsigned long rnd; 148 148 149 - rnd = (unsigned long)get_random_int(); 149 + rnd = get_random_long(); 150 150 rnd <<= PAGE_SHIFT; 151 151 if (TASK_IS_32BIT_ADDR) 152 152 rnd &= 0xfffffful; ··· 174 174 175 175 static inline unsigned long brk_rnd(void) 176 176 { 177 - unsigned long rnd = get_random_int(); 177 + unsigned long rnd = get_random_long(); 178 178 179 179 rnd = rnd << PAGE_SHIFT; 180 180 /* 8MB for 32bit, 256MB for 64bit */
+2 -2
arch/powerpc/kernel/process.c
··· 1768 1768 1769 1769 /* 8MB for 32bit, 1GB for 64bit */ 1770 1770 if (is_32bit_task()) 1771 - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); 1771 + rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT))); 1772 1772 else 1773 - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); 1773 + rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT))); 1774 1774 1775 1775 return rnd << PAGE_SHIFT; 1776 1776 }
+2 -2
arch/powerpc/mm/mmap.c
··· 59 59 60 60 /* 8MB for 32bit, 1GB for 64bit */ 61 61 if (is_32bit_task()) 62 - rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); 62 + rnd = get_random_long() % (1<<(23-PAGE_SHIFT)); 63 63 else 64 - rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); 64 + rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT)); 65 65 66 66 return rnd << PAGE_SHIFT; 67 67 }
+1 -1
arch/sparc/kernel/sys_sparc_64.c
··· 264 264 unsigned long rnd = 0UL; 265 265 266 266 if (current->flags & PF_RANDOMIZE) { 267 - unsigned long val = get_random_int(); 267 + unsigned long val = get_random_long(); 268 268 if (test_thread_flag(TIF_32BIT)) 269 269 rnd = (val % (1UL << (23UL-PAGE_SHIFT))); 270 270 else
+3 -3
arch/x86/mm/mmap.c
··· 71 71 72 72 if (mmap_is_ia32()) 73 73 #ifdef CONFIG_COMPAT 74 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); 74 + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); 75 75 #else 76 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 76 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 77 77 #endif 78 78 else 79 - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); 79 + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); 80 80 81 81 return rnd << PAGE_SHIFT; 82 82 }
+13
block/Kconfig
··· 88 88 T10/SCSI Data Integrity Field or the T13/ATA External Path 89 89 Protection. If in doubt, say N. 90 90 91 + config BLK_DEV_DAX 92 + bool "Block device DAX support" 93 + depends on FS_DAX 94 + depends on BROKEN 95 + help 96 + When DAX support is available (CONFIG_FS_DAX) raw block 97 + devices can also support direct userspace access to the 98 + storage capacity via MMAP(2) similar to a file on a 99 + DAX-enabled filesystem. However, the DAX I/O-path disables 100 + some standard I/O-statistics, and the MMAP(2) path has some 101 + operational differences due to bypassing the page 102 + cache. If in doubt, say N. 103 + 91 104 config BLK_DEV_THROTTLING 92 105 bool "Block layer bio throttling support" 93 106 depends on BLK_CGROUP=y
+22
drivers/char/random.c
··· 1819 1819 EXPORT_SYMBOL(get_random_int); 1820 1820 1821 1821 /* 1822 + * Same as get_random_int(), but returns unsigned long. 1823 + */ 1824 + unsigned long get_random_long(void) 1825 + { 1826 + __u32 *hash; 1827 + unsigned long ret; 1828 + 1829 + if (arch_get_random_long(&ret)) 1830 + return ret; 1831 + 1832 + hash = get_cpu_var(get_random_int_hash); 1833 + 1834 + hash[0] += current->pid + jiffies + random_get_entropy(); 1835 + md5_transform(hash, random_int_secret); 1836 + ret = *(unsigned long *)hash; 1837 + put_cpu_var(get_random_int_hash); 1838 + 1839 + return ret; 1840 + } 1841 + EXPORT_SYMBOL(get_random_long); 1842 + 1843 + /* 1822 1844 * randomize_range() returns a start address such that 1823 1845 * 1824 1846 * [...... <range> .....]
+1 -1
fs/binfmt_elf.c
··· 653 653 654 654 if ((current->flags & PF_RANDOMIZE) && 655 655 !(current->personality & ADDR_NO_RANDOMIZE)) { 656 - random_variable = (unsigned long) get_random_int(); 656 + random_variable = get_random_long(); 657 657 random_variable &= STACK_RND_MASK; 658 658 random_variable <<= PAGE_SHIFT; 659 659 }
+17 -2
fs/block_dev.c
··· 1201 1201 bdev->bd_disk = disk; 1202 1202 bdev->bd_queue = disk->queue; 1203 1203 bdev->bd_contains = bdev; 1204 - bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; 1204 + if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) 1205 + bdev->bd_inode->i_flags = S_DAX; 1206 + else 1207 + bdev->bd_inode->i_flags = 0; 1208 + 1205 1209 if (!partno) { 1206 1210 ret = -ENXIO; 1207 1211 bdev->bd_part = disk_get_part(disk, partno); ··· 1697 1693 return try_to_free_buffers(page); 1698 1694 } 1699 1695 1696 + static int blkdev_writepages(struct address_space *mapping, 1697 + struct writeback_control *wbc) 1698 + { 1699 + if (dax_mapping(mapping)) { 1700 + struct block_device *bdev = I_BDEV(mapping->host); 1701 + 1702 + return dax_writeback_mapping_range(mapping, bdev, wbc); 1703 + } 1704 + return generic_writepages(mapping, wbc); 1705 + } 1706 + 1700 1707 static const struct address_space_operations def_blk_aops = { 1701 1708 .readpage = blkdev_readpage, 1702 1709 .readpages = blkdev_readpages, 1703 1710 .writepage = blkdev_writepage, 1704 1711 .write_begin = blkdev_write_begin, 1705 1712 .write_end = blkdev_write_end, 1706 - .writepages = generic_writepages, 1713 + .writepages = blkdev_writepages, 1707 1714 .releasepage = blkdev_releasepage, 1708 1715 .direct_IO = blkdev_direct_IO, 1709 1716 .is_dirty_writeback = buffer_check_dirty_writeback,
+11 -10
fs/dax.c
··· 79 79 } 80 80 81 81 /* 82 - * dax_clear_blocks() is called from within transaction context from XFS, 82 + * dax_clear_sectors() is called from within transaction context from XFS, 83 83 * and hence this means the stack from this point must follow GFP_NOFS 84 84 * semantics for all operations. 85 85 */ 86 - int dax_clear_blocks(struct inode *inode, sector_t block, long _size) 86 + int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) 87 87 { 88 - struct block_device *bdev = inode->i_sb->s_bdev; 89 88 struct blk_dax_ctl dax = { 90 - .sector = block << (inode->i_blkbits - 9), 89 + .sector = _sector, 91 90 .size = _size, 92 91 }; 93 92 ··· 108 109 wmb_pmem(); 109 110 return 0; 110 111 } 111 - EXPORT_SYMBOL_GPL(dax_clear_blocks); 112 + EXPORT_SYMBOL_GPL(dax_clear_sectors); 112 113 113 114 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ 114 115 static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, ··· 484 485 * end]. This is required by data integrity operations to ensure file data is 485 486 * on persistent storage prior to completion of the operation. 486 487 */ 487 - int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, 488 - loff_t end) 488 + int dax_writeback_mapping_range(struct address_space *mapping, 489 + struct block_device *bdev, struct writeback_control *wbc) 489 490 { 490 491 struct inode *inode = mapping->host; 491 - struct block_device *bdev = inode->i_sb->s_bdev; 492 492 pgoff_t start_index, end_index, pmd_index; 493 493 pgoff_t indices[PAGEVEC_SIZE]; 494 494 struct pagevec pvec; ··· 498 500 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) 499 501 return -EIO; 500 502 501 - start_index = start >> PAGE_CACHE_SHIFT; 502 - end_index = end >> PAGE_CACHE_SHIFT; 503 + if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) 504 + return 0; 505 + 506 + start_index = wbc->range_start >> PAGE_CACHE_SHIFT; 507 + end_index = wbc->range_end >> PAGE_CACHE_SHIFT; 503 508 pmd_index = DAX_PMD_INDEX(start_index); 504 509 505 510 rcu_read_lock();
+13 -3
fs/ext2/inode.c
··· 737 737 * so that it's not found by another thread before it's 738 738 * initialised 739 739 */ 740 - err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key), 741 - 1 << inode->i_blkbits); 740 + err = dax_clear_sectors(inode->i_sb->s_bdev, 741 + le32_to_cpu(chain[depth-1].key) << 742 + (inode->i_blkbits - 9), 743 + 1 << inode->i_blkbits); 742 744 if (err) { 743 745 mutex_unlock(&ei->truncate_mutex); 744 746 goto cleanup; ··· 876 874 static int 877 875 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) 878 876 { 877 + #ifdef CONFIG_FS_DAX 878 + if (dax_mapping(mapping)) { 879 + return dax_writeback_mapping_range(mapping, 880 + mapping->host->i_sb->s_bdev, 881 + wbc); 882 + } 883 + #endif 884 + 879 885 return mpage_writepages(mapping, wbc, ext2_get_block); 880 886 } 881 887 ··· 1306 1296 inode->i_flags |= S_NOATIME; 1307 1297 if (flags & EXT2_DIRSYNC_FL) 1308 1298 inode->i_flags |= S_DIRSYNC; 1309 - if (test_opt(inode->i_sb, DAX)) 1299 + if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) 1310 1300 inode->i_flags |= S_DAX; 1311 1301 } 1312 1302
+5 -1
fs/ext4/inode.c
··· 2478 2478 2479 2479 trace_ext4_writepages(inode, wbc); 2480 2480 2481 + if (dax_mapping(mapping)) 2482 + return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, 2483 + wbc); 2484 + 2481 2485 /* 2482 2486 * No pages to write? This is mainly a kludge to avoid starting 2483 2487 * a transaction for special inodes like journal inode on last iput() ··· 4159 4155 new_fl |= S_NOATIME; 4160 4156 if (flags & EXT4_DIRSYNC_FL) 4161 4157 new_fl |= S_DIRSYNC; 4162 - if (test_opt(inode->i_sb, DAX)) 4158 + if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) 4163 4159 new_fl |= S_DAX; 4164 4160 inode_set_flags(inode, new_fl, 4165 4161 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
+5
fs/ext4/ioctl.c
··· 583 583 "Online defrag not supported with bigalloc"); 584 584 err = -EOPNOTSUPP; 585 585 goto mext_out; 586 + } else if (IS_DAX(inode)) { 587 + ext4_msg(sb, KERN_ERR, 588 + "Online defrag not supported with DAX"); 589 + err = -EOPNOTSUPP; 590 + goto mext_out; 586 591 } 587 592 588 593 err = mnt_want_write_file(filp);
+1
fs/ocfs2/aops.c
··· 956 956 tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 957 957 update_isize, end); 958 958 if (tmp_ret < 0) { 959 + ocfs2_inode_unlock(inode, 1); 959 960 ret = tmp_ret; 960 961 mlog_errno(ret); 961 962 brelse(di_bh);
+5 -1
fs/xfs/xfs_aops.c
··· 55 55 } while ((bh = bh->b_this_page) != head); 56 56 } 57 57 58 - STATIC struct block_device * 58 + struct block_device * 59 59 xfs_find_bdev_for_inode( 60 60 struct inode *inode) 61 61 { ··· 1208 1208 struct writeback_control *wbc) 1209 1209 { 1210 1210 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1211 + if (dax_mapping(mapping)) 1212 + return dax_writeback_mapping_range(mapping, 1213 + xfs_find_bdev_for_inode(mapping->host), wbc); 1214 + 1211 1215 return generic_writepages(mapping, wbc); 1212 1216 } 1213 1217
+1
fs/xfs/xfs_aops.h
··· 62 62 struct buffer_head *map_bh, int create); 63 63 64 64 extern void xfs_count_page_state(struct page *, int *, int *); 65 + extern struct block_device *xfs_find_bdev_for_inode(struct inode *); 65 66 66 67 #endif /* __XFS_AOPS_H__ */
+2 -1
fs/xfs/xfs_bmap_util.c
··· 75 75 ssize_t size = XFS_FSB_TO_B(mp, count_fsb); 76 76 77 77 if (IS_DAX(VFS_I(ip))) 78 - return dax_clear_blocks(VFS_I(ip), block, size); 78 + return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), 79 + sector, size); 79 80 80 81 /* 81 82 * let the block layer decide on the fastest method of
+5 -3
include/linux/dax.h
··· 7 7 8 8 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, 9 9 get_block_t, dio_iodone_t, int flags); 10 - int dax_clear_blocks(struct inode *, sector_t block, long size); 10 + int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); 11 11 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); 12 12 int dax_truncate_page(struct inode *, loff_t from, get_block_t); 13 13 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, ··· 52 52 { 53 53 return mapping->host && IS_DAX(mapping->host); 54 54 } 55 - int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, 56 - loff_t end); 55 + 56 + struct writeback_control; 57 + int dax_writeback_mapping_range(struct address_space *mapping, 58 + struct block_device *bdev, struct writeback_control *wbc); 57 59 #endif
+1
include/linux/random.h
··· 34 34 #endif 35 35 36 36 unsigned int get_random_int(void); 37 + unsigned long get_random_long(void); 37 38 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); 38 39 39 40 u32 prandom_u32(void);
+4 -8
mm/filemap.c
··· 446 446 { 447 447 int err = 0; 448 448 449 - if (mapping->nrpages) { 449 + if ((!dax_mapping(mapping) && mapping->nrpages) || 450 + (dax_mapping(mapping) && mapping->nrexceptional)) { 450 451 err = filemap_fdatawrite(mapping); 451 452 /* 452 453 * Even if the above returned error, the pages may be ··· 483 482 { 484 483 int err = 0; 485 484 486 - if (dax_mapping(mapping) && mapping->nrexceptional) { 487 - err = dax_writeback_mapping_range(mapping, lstart, lend); 488 - if (err) 489 - return err; 490 - } 491 - 492 - if (mapping->nrpages) { 485 + if ((!dax_mapping(mapping) && mapping->nrpages) || 486 + (dax_mapping(mapping) && mapping->nrexceptional)) { 493 487 err = __filemap_fdatawrite_range(mapping, lstart, lend, 494 488 WB_SYNC_ALL); 495 489 /* See comment of filemap_write_and_wait() */
+12 -2
mm/memory.c
··· 3404 3404 if (unlikely(pmd_none(*pmd)) && 3405 3405 unlikely(__pte_alloc(mm, vma, pmd, address))) 3406 3406 return VM_FAULT_OOM; 3407 - /* if an huge pmd materialized from under us just retry later */ 3408 - if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) 3407 + /* 3408 + * If a huge pmd materialized under us just retry later. Use 3409 + * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd 3410 + * didn't become pmd_trans_huge under us and then back to pmd_none, as 3411 + * a result of MADV_DONTNEED running immediately after a huge pmd fault 3412 + * in a different thread of this mm, in turn leading to a misleading 3413 + * pmd_trans_huge() retval. All we have to ensure is that it is a 3414 + * regular pmd that we can walk with pte_offset_map() and we can do that 3415 + * through an atomic read in C, which is what pmd_trans_unstable() 3416 + * provides. 3417 + */ 3418 + if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd))) 3409 3419 return 0; 3410 3420 /* 3411 3421 * A regular pmd is established and it can't morph into a huge pmd
+1 -1
mm/migrate.c
··· 1582 1582 (GFP_HIGHUSER_MOVABLE | 1583 1583 __GFP_THISNODE | __GFP_NOMEMALLOC | 1584 1584 __GFP_NORETRY | __GFP_NOWARN) & 1585 - ~(__GFP_IO | __GFP_FS), 0); 1585 + ~__GFP_RECLAIM, 0); 1586 1586 1587 1587 return newpage; 1588 1588 }