Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew Morton)

Merge a bunch of fixes from Andrew Morton.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
fs/proc/task_mmu.c: fix buffer overflow in add_page_map()
arch: *: Kconfig: add "kernel/Kconfig.freezer" to "arch/*/Kconfig"
ocfs2: fix null pointer dereference in ocfs2_dir_foreach_blk_id()
x86 get_unmapped_area(): use proper mmap base for bottom-up direction
ocfs2: fix NULL pointer dereference in ocfs2_duplicate_clusters_by_page
ocfs2: Revert 40bd62e to avoid regression in extended allocation
drivers/rtc/rtc-stmp3xxx.c: provide timeout for potentially endless loop polling a HW bit
hugetlb: fix lockdep splat caused by pmd sharing
aoe: adjust ref of head for compound page tails
microblaze: fix clone syscall
mm: save soft-dirty bits on file pages
mm: save soft-dirty bits on swapped pages
memcg: don't initialize kmem-cache destroying work for root caches

+283 -106
+6
arch/Kconfig
··· 407 407 help 408 408 Architecture has the first two arguments of clone(2) swapped. 409 409 410 + config CLONE_BACKWARDS3 411 + bool 412 + help 413 + Architecture has tls passed as the 3rd argument of clone(2), 414 + not the 5th one. 415 + 410 416 config ODD_RT_SIGACTION 411 417 bool 412 418 help
+1
arch/hexagon/Kconfig
··· 158 158 endmenu 159 159 160 160 source "init/Kconfig" 161 + source "kernel/Kconfig.freezer" 161 162 source "drivers/Kconfig" 162 163 source "fs/Kconfig" 163 164
+1 -1
arch/microblaze/Kconfig
··· 28 28 select GENERIC_CLOCKEVENTS 29 29 select GENERIC_IDLE_POLL_SETUP 30 30 select MODULES_USE_ELF_RELA 31 - select CLONE_BACKWARDS 31 + select CLONE_BACKWARDS3 32 32 33 33 config SWAP 34 34 def_bool n
+1
arch/openrisc/Kconfig
··· 55 55 56 56 source "init/Kconfig" 57 57 58 + source "kernel/Kconfig.freezer" 58 59 59 60 menu "Processor type and features" 60 61
+2
arch/score/Kconfig
··· 87 87 88 88 source "init/Kconfig" 89 89 90 + source "kernel/Kconfig.freezer" 91 + 90 92 config MMU 91 93 def_bool y 92 94
+47 -1
arch/x86/include/asm/pgtable-2level.h
··· 55 55 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) 56 56 #endif 57 57 58 + #ifdef CONFIG_MEM_SOFT_DIRTY 59 + 60 + /* 61 + * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and 62 + * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset 63 + * into this range. 64 + */ 65 + #define PTE_FILE_MAX_BITS 28 66 + #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) 67 + #define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) 68 + #define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) 69 + #define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1) 70 + #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) 71 + #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) 72 + #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) 73 + 74 + #define pte_to_pgoff(pte) \ 75 + ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \ 76 + & ((1U << PTE_FILE_BITS1) - 1))) \ 77 + + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \ 78 + & ((1U << PTE_FILE_BITS2) - 1)) \ 79 + << (PTE_FILE_BITS1)) \ 80 + + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \ 81 + & ((1U << PTE_FILE_BITS3) - 1)) \ 82 + << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ 83 + + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \ 84 + << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) 85 + 86 + #define pgoff_to_pte(off) \ 87 + ((pte_t) { .pte_low = \ 88 + ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ 89 + + ((((off) >> PTE_FILE_BITS1) \ 90 + & ((1U << PTE_FILE_BITS2) - 1)) \ 91 + << PTE_FILE_SHIFT2) \ 92 + + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ 93 + & ((1U << PTE_FILE_BITS3) - 1)) \ 94 + << PTE_FILE_SHIFT3) \ 95 + + ((((off) >> \ 96 + (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \ 97 + << PTE_FILE_SHIFT4) \ 98 + + _PAGE_FILE }) 99 + 100 + #else /* CONFIG_MEM_SOFT_DIRTY */ 101 + 58 102 /* 59 103 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, 60 - * split up the 29 bits of offset into this range: 104 + * split up the 29 bits of offset into this range. 61 105 */ 62 106 #define PTE_FILE_MAX_BITS 29 63 107 #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) ··· 131 87 + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ 132 88 << PTE_FILE_SHIFT3) \ 133 89 + _PAGE_FILE }) 90 + 91 + #endif /* CONFIG_MEM_SOFT_DIRTY */ 134 92 135 93 /* Encode and de-code a swap entry */ 136 94 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
+3
arch/x86/include/asm/pgtable-3level.h
··· 179 179 /* 180 180 * Bits 0, 6 and 7 are taken in the low part of the pte, 181 181 * put the 32 bits of offset into the high part. 182 + * 183 + * For soft-dirty tracking 11 bit is taken from 184 + * the low part of pte as well. 182 185 */ 183 186 #define pte_to_pgoff(pte) ((pte).pte_high) 184 187 #define pgoff_to_pte(off) \
+30
arch/x86/include/asm/pgtable.h
··· 314 314 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); 315 315 } 316 316 317 + static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 318 + { 319 + return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); 320 + } 321 + 322 + static inline int pte_swp_soft_dirty(pte_t pte) 323 + { 324 + return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; 325 + } 326 + 327 + static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 328 + { 329 + return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); 330 + } 331 + 332 + static inline pte_t pte_file_clear_soft_dirty(pte_t pte) 333 + { 334 + return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); 335 + } 336 + 337 + static inline pte_t pte_file_mksoft_dirty(pte_t pte) 338 + { 339 + return pte_set_flags(pte, _PAGE_SOFT_DIRTY); 340 + } 341 + 342 + static inline int pte_file_soft_dirty(pte_t pte) 343 + { 344 + return pte_flags(pte) & _PAGE_SOFT_DIRTY; 345 + } 346 + 317 347 /* 318 348 * Mask out unsupported bits in a present pgprot. Non-present pgprots 319 349 * can use those bits for other purposes, so leave them be.
+16 -1
arch/x86/include/asm/pgtable_types.h
··· 61 61 * they do not conflict with each other. 62 62 */ 63 63 64 + #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN 65 + 64 66 #ifdef CONFIG_MEM_SOFT_DIRTY 65 - #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) 67 + #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) 66 68 #else 67 69 #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) 70 + #endif 71 + 72 + /* 73 + * Tracking soft dirty bit when a page goes to a swap is tricky. 74 + * We need a bit which can be stored in pte _and_ not conflict 75 + * with swap entry format. On x86 bits 6 and 7 are *not* involved 76 + * into swap entry computation, but bit 6 is used for nonlinear 77 + * file mapping, so we borrow bit 7 for soft dirty tracking. 78 + */ 79 + #ifdef CONFIG_MEM_SOFT_DIRTY 80 + #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE 81 + #else 82 + #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) 68 83 #endif 69 84 70 85 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+1 -1
arch/x86/kernel/sys_x86_64.c
··· 101 101 *begin = new_begin; 102 102 } 103 103 } else { 104 - *begin = TASK_UNMAPPED_BASE; 104 + *begin = mmap_legacy_base(); 105 105 *end = TASK_SIZE; 106 106 } 107 107 }
+1 -1
arch/x86/mm/mmap.c
··· 98 98 * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 99 99 * does, but not when emulating X86_32 100 100 */ 101 - static unsigned long mmap_legacy_base(void) 101 + unsigned long mmap_legacy_base(void) 102 102 { 103 103 if (mmap_is_ia32()) 104 104 return TASK_UNMAPPED_BASE;
+7 -10
drivers/block/aoe/aoecmd.c
··· 906 906 int i; 907 907 908 908 bio_for_each_segment(bv, bio, i) { 909 - page = bv->bv_page; 910 909 /* Non-zero page count for non-head members of 911 - * compound pages is no longer allowed by the kernel, 912 - * but this has never been seen here. 910 + * compound pages is no longer allowed by the kernel. 913 911 */ 914 - if (unlikely(PageCompound(page))) 915 - if (compound_trans_head(page) != page) { 916 - pr_crit("page tail used for block I/O\n"); 917 - BUG(); 918 - } 912 + page = compound_trans_head(bv->bv_page); 919 913 atomic_inc(&page->_count); 920 914 } 921 915 } ··· 918 924 bio_pagedec(struct bio *bio) 919 925 { 920 926 struct bio_vec *bv; 927 + struct page *page; 921 928 int i; 922 929 923 - bio_for_each_segment(bv, bio, i) 924 - atomic_dec(&bv->bv_page->_count); 930 + bio_for_each_segment(bv, bio, i) { 931 + page = compound_trans_head(bv->bv_page); 932 + atomic_dec(&page->_count); 933 + } 925 934 } 926 935 927 936 static void
+25 -10
drivers/rtc/rtc-stmp3xxx.c
··· 23 23 #include <linux/init.h> 24 24 #include <linux/platform_device.h> 25 25 #include <linux/interrupt.h> 26 + #include <linux/delay.h> 26 27 #include <linux/rtc.h> 27 28 #include <linux/slab.h> 28 29 #include <linux/of_device.h> ··· 120 119 } 121 120 #endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ 122 121 123 - static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) 122 + static int stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) 124 123 { 124 + int timeout = 5000; /* 3ms according to i.MX28 Ref Manual */ 125 125 /* 126 - * The datasheet doesn't say which way round the 127 - * NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0, 128 - * 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS 126 + * The i.MX28 Applications Processor Reference Manual, Rev. 1, 2010 127 + * states: 128 + * | The order in which registers are updated is 129 + * | Persistent 0, 1, 2, 3, 4, 5, Alarm, Seconds. 130 + * | (This list is in bitfield order, from LSB to MSB, as they would 131 + * | appear in the STALE_REGS and NEW_REGS bitfields of the HW_RTC_STAT 132 + * | register. For example, the Seconds register corresponds to 133 + * | STALE_REGS or NEW_REGS containing 0x80.) 129 134 */ 130 - while (readl(rtc_data->io + STMP3XXX_RTC_STAT) & 131 - (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) 132 - cpu_relax(); 135 + do { 136 + if (!(readl(rtc_data->io + STMP3XXX_RTC_STAT) & 137 + (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT))) 138 + return 0; 139 + udelay(1); 140 + } while (--timeout > 0); 141 + return (readl(rtc_data->io + STMP3XXX_RTC_STAT) & 142 + (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) ? -ETIME : 0; 133 143 } 134 144 135 145 /* Time read/write */ 136 146 static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) 137 147 { 148 + int ret; 138 149 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); 139 150 140 - stmp3xxx_wait_time(rtc_data); 151 + ret = stmp3xxx_wait_time(rtc_data); 152 + if (ret) 153 + return ret; 154 + 141 155 rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm); 142 156 return 0; 143 157 } ··· 162 146 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); 163 147 164 148 writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS); 165 - stmp3xxx_wait_time(rtc_data); 166 - return 0; 149 + return stmp3xxx_wait_time(rtc_data); 167 150 } 168 151 169 152 /* interrupt(s) handler */
+10
fs/hugetlbfs/inode.c
··· 463 463 return inode; 464 464 } 465 465 466 + /* 467 + * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never 468 + * be taken from reclaim -- unlike regular filesystems. This needs an 469 + * annotation because huge_pmd_share() does an allocation under 470 + * i_mmap_mutex. 471 + */ 472 + struct lock_class_key hugetlbfs_i_mmap_mutex_key; 473 + 466 474 static struct inode *hugetlbfs_get_inode(struct super_block *sb, 467 475 struct inode *dir, 468 476 umode_t mode, dev_t dev) ··· 482 474 struct hugetlbfs_inode_info *info; 483 475 inode->i_ino = get_next_ino(); 484 476 inode_init_owner(inode, dir, mode); 477 + lockdep_set_class(&inode->i_mapping->i_mmap_mutex, 478 + &hugetlbfs_i_mmap_mutex_key); 485 479 inode->i_mapping->a_ops = &hugetlbfs_aops; 486 480 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 487 481 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+1 -1
fs/ocfs2/aops.c
··· 1757 1757 goto out; 1758 1758 } else if (ret == 1) { 1759 1759 clusters_need = wc->w_clen; 1760 - ret = ocfs2_refcount_cow(inode, filp, di_bh, 1760 + ret = ocfs2_refcount_cow(inode, di_bh, 1761 1761 wc->w_cpos, wc->w_clen, UINT_MAX); 1762 1762 if (ret) { 1763 1763 mlog_errno(ret);
+1 -3
fs/ocfs2/dir.c
··· 2153 2153 { 2154 2154 int ret; 2155 2155 struct ocfs2_empty_dir_priv priv = { 2156 - .ctx.actor = ocfs2_empty_dir_filldir 2156 + .ctx.actor = ocfs2_empty_dir_filldir, 2157 2157 }; 2158 - 2159 - memset(&priv, 0, sizeof(priv)); 2160 2158 2161 2159 if (ocfs2_dir_indexed(inode)) { 2162 2160 ret = ocfs2_empty_dir_dx(inode, &priv);
+3 -3
fs/ocfs2/file.c
··· 370 370 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 371 371 goto out; 372 372 373 - return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); 373 + return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); 374 374 375 375 out: 376 376 return status; ··· 899 899 zero_clusters = last_cpos - zero_cpos; 900 900 901 901 if (needs_cow) { 902 - rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, 902 + rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, 903 903 zero_clusters, UINT_MAX); 904 904 if (rc) { 905 905 mlog_errno(rc); ··· 2078 2078 2079 2079 *meta_level = 1; 2080 2080 2081 - ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); 2081 + ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); 2082 2082 if (ret) 2083 2083 mlog_errno(ret); 2084 2084 out:
+1 -1
fs/ocfs2/journal.h
··· 537 537 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); 538 538 539 539 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + 540 - ocfs2_quota_trans_credits(sb) + bits_wanted; 540 + ocfs2_quota_trans_credits(sb); 541 541 } 542 542 543 543 static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
+1 -1
fs/ocfs2/move_extents.c
··· 69 69 u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); 70 70 u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); 71 71 72 - ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, 72 + ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, 73 73 p_cpos, new_p_cpos, len); 74 74 if (ret) { 75 75 mlog_errno(ret);
+8 -45
fs/ocfs2/refcounttree.c
··· 49 49 50 50 struct ocfs2_cow_context { 51 51 struct inode *inode; 52 - struct file *file; 53 52 u32 cow_start; 54 53 u32 cow_len; 55 54 struct ocfs2_extent_tree data_et; ··· 65 66 u32 *num_clusters, 66 67 unsigned int *extent_flags); 67 68 int (*cow_duplicate_clusters)(handle_t *handle, 68 - struct file *file, 69 + struct inode *inode, 69 70 u32 cpos, u32 old_cluster, 70 71 u32 new_cluster, u32 new_len); 71 72 }; ··· 2921 2922 } 2922 2923 2923 2924 int ocfs2_duplicate_clusters_by_page(handle_t *handle, 2924 - struct file *file, 2925 + struct inode *inode, 2925 2926 u32 cpos, u32 old_cluster, 2926 2927 u32 new_cluster, u32 new_len) 2927 2928 { 2928 2929 int ret = 0, partial; 2929 - struct inode *inode = file_inode(file); 2930 - struct ocfs2_caching_info *ci = INODE_CACHE(inode); 2931 - struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2930 + struct super_block *sb = inode->i_sb; 2932 2931 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2933 2932 struct page *page; 2934 2933 pgoff_t page_index; ··· 2975 2978 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) 2976 2979 BUG_ON(PageDirty(page)); 2977 2980 2978 - if (PageReadahead(page)) { 2979 - page_cache_async_readahead(mapping, 2980 - &file->f_ra, file, 2981 - page, page_index, 2982 - readahead_pages); 2983 - } 2984 - 2985 2981 if (!PageUptodate(page)) { 2986 2982 ret = block_read_full_page(page, ocfs2_get_block); 2987 2983 if (ret) { ··· 2994 3004 } 2995 3005 } 2996 3006 2997 - ocfs2_map_and_dirty_page(inode, handle, from, to, 3007 + ocfs2_map_and_dirty_page(inode, 3008 + handle, from, to, 2998 3009 page, 0, &new_block); 2999 3010 mark_page_accessed(page); 3000 3011 unlock: ··· 3011 3020 } 3012 3021 3013 3022 int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 3014 - struct file *file, 3023 + struct inode *inode, 3015 3024 u32 cpos, u32 old_cluster, 3016 3025 u32 new_cluster, u32 new_len) 3017 3026 { 3018 3027 int ret = 0; 3019 - struct inode *inode = file_inode(file); 3020 3028 struct super_block *sb = inode->i_sb; 3021 3029 struct ocfs2_caching_info *ci = INODE_CACHE(inode); 3022 3030 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); ··· 3140 3150 3141 3151 /*If the old clusters is unwritten, no need to duplicate. */ 3142 3152 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 3143 - ret = context->cow_duplicate_clusters(handle, context->file, 3153 + ret = context->cow_duplicate_clusters(handle, context->inode, 3144 3154 cpos, old, new, len); 3145 3155 if (ret) { 3146 3156 mlog_errno(ret); ··· 3418 3428 return ret; 3419 3429 } 3420 3430 3421 - static void ocfs2_readahead_for_cow(struct inode *inode, 3422 - struct file *file, 3423 - u32 start, u32 len) 3424 - { 3425 - struct address_space *mapping; 3426 - pgoff_t index; 3427 - unsigned long num_pages; 3428 - int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 3429 - 3430 - if (!file) 3431 - return; 3432 - 3433 - mapping = file->f_mapping; 3434 - num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; 3435 - if (!num_pages) 3436 - num_pages = 1; 3437 - 3438 - index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; 3439 - page_cache_sync_readahead(mapping, &file->f_ra, file, 3440 - index, num_pages); 3441 - } 3442 - 3443 3431 /* 3444 3432 * Starting at cpos, try to CoW write_len clusters. Don't CoW 3445 3433 * past max_cpos. This will stop when it runs into a hole or an 3446 3434 * unrefcounted extent. 3447 3435 */ 3448 3436 static int ocfs2_refcount_cow_hunk(struct inode *inode, 3449 - struct file *file, 3450 3437 struct buffer_head *di_bh, 3451 3438 u32 cpos, u32 write_len, u32 max_cpos) 3452 3439 { ··· 3452 3485 3453 3486 BUG_ON(cow_len == 0); 3454 3487 3455 - ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); 3456 - 3457 3488 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); 3458 3489 if (!context) { 3459 3490 ret = -ENOMEM; ··· 3473 3508 context->ref_root_bh = ref_root_bh; 3474 3509 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; 3475 3510 context->get_clusters = ocfs2_di_get_clusters; 3476 - context->file = file; 3477 3511 3478 3512 ocfs2_init_dinode_extent_tree(&context->data_et, 3479 3513 INODE_CACHE(inode), di_bh); ··· 3501 3537 * clusters between cpos and cpos+write_len are safe to modify. 3502 3538 */ 3503 3539 int ocfs2_refcount_cow(struct inode *inode, 3504 - struct file *file, 3505 3540 struct buffer_head *di_bh, 3506 3541 u32 cpos, u32 write_len, u32 max_cpos) 3507 3542 { ··· 3520 3557 num_clusters = write_len; 3521 3558 3522 3559 if (ext_flags & OCFS2_EXT_REFCOUNTED) { 3523 - ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, 3560 + ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, 3524 3561 num_clusters, max_cpos); 3525 3562 if (ret) { 3526 3563 mlog_errno(ret);
+3 -3
fs/ocfs2/refcounttree.h
··· 53 53 int *credits, 54 54 int *ref_blocks); 55 55 int ocfs2_refcount_cow(struct inode *inode, 56 - struct file *filep, struct buffer_head *di_bh, 56 + struct buffer_head *di_bh, 57 57 u32 cpos, u32 write_len, u32 max_cpos); 58 58 59 59 typedef int (ocfs2_post_refcount_func)(struct inode *inode, ··· 85 85 u32 cpos, u32 write_len, 86 86 struct ocfs2_post_refcount *post); 87 87 int ocfs2_duplicate_clusters_by_page(handle_t *handle, 88 - struct file *file, 88 + struct inode *inode, 89 89 u32 cpos, u32 old_cluster, 90 90 u32 new_cluster, u32 new_len); 91 91 int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 92 - struct file *file, 92 + struct inode *inode, 93 93 u32 cpos, u32 old_cluster, 94 94 u32 new_cluster, u32 new_len); 95 95 int ocfs2_cow_sync_writeback(struct super_block *sb,
+21 -10
fs/proc/task_mmu.c
··· 730 730 * of how soft-dirty works. 731 731 */ 732 732 pte_t ptent = *pte; 733 - ptent = pte_wrprotect(ptent); 734 - ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); 733 + 734 + if (pte_present(ptent)) { 735 + ptent = pte_wrprotect(ptent); 736 + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); 737 + } else if (is_swap_pte(ptent)) { 738 + ptent = pte_swp_clear_soft_dirty(ptent); 739 + } else if (pte_file(ptent)) { 740 + ptent = pte_file_clear_soft_dirty(ptent); 741 + } 742 + 735 743 set_pte_at(vma->vm_mm, addr, pte, ptent); 736 744 #endif 737 745 } ··· 760 752 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 761 753 for (; addr != end; pte++, addr += PAGE_SIZE) { 762 754 ptent = *pte; 763 - if (!pte_present(ptent)) 764 - continue; 765 755 766 756 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 767 757 clear_soft_dirty(vma, addr, pte); 768 758 continue; 769 759 } 760 + 761 + if (!pte_present(ptent)) 762 + continue; 770 763 771 764 page = vm_normal_page(vma, addr, ptent); 772 765 if (!page) ··· 868 859 } pagemap_entry_t; 869 860 870 861 struct pagemapread { 871 - int pos, len; 862 + int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ 872 863 pagemap_entry_t *buffer; 873 864 bool v2; 874 865 }; ··· 876 867 #define PAGEMAP_WALK_SIZE (PMD_SIZE) 877 868 #define PAGEMAP_WALK_MASK (PMD_MASK) 878 869 879 - #define PM_ENTRY_BYTES sizeof(u64) 870 + #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) 880 871 #define PM_STATUS_BITS 3 881 872 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 882 873 #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) ··· 939 930 flags = PM_PRESENT; 940 931 page = vm_normal_page(vma, addr, pte); 941 932 } else if (is_swap_pte(pte)) { 942 - swp_entry_t entry = pte_to_swp_entry(pte); 943 - 933 + swp_entry_t entry; 934 + if (pte_swp_soft_dirty(pte)) 935 + flags2 |= __PM_SOFT_DIRTY; 936 + entry = pte_to_swp_entry(pte); 944 937 frame = swp_type(entry) | 945 938 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 946 939 flags = PM_SWAP; ··· 1127 1116 goto out_task; 1128 1117 1129 1118 pm.v2 = soft_dirty_cleared; 1130 - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 1131 - pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 1119 + pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 1120 + pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); 1132 1121 ret = -ENOMEM; 1133 1122 if (!pm.buffer) 1134 1123 goto out_task;
+30
include/asm-generic/pgtable.h
··· 417 417 { 418 418 return pmd; 419 419 } 420 + 421 + static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 422 + { 423 + return pte; 424 + } 425 + 426 + static inline int pte_swp_soft_dirty(pte_t pte) 427 + { 428 + return 0; 429 + } 430 + 431 + static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 432 + { 433 + return pte; 434 + } 435 + 436 + static inline pte_t pte_file_clear_soft_dirty(pte_t pte) 437 + { 438 + return pte; 439 + } 440 + 441 + static inline pte_t pte_file_mksoft_dirty(pte_t pte) 442 + { 443 + return pte; 444 + } 445 + 446 + static inline int pte_file_soft_dirty(pte_t pte) 447 + { 448 + return 0; 449 + } 420 450 #endif 421 451 422 452 #ifndef __HAVE_PFNMAP_TRACKING
+1
include/linux/sched.h
··· 314 314 struct user_namespace; 315 315 316 316 #ifdef CONFIG_MMU 317 + extern unsigned long mmap_legacy_base(void); 317 318 extern void arch_pick_mmap_layout(struct mm_struct *mm); 318 319 extern unsigned long 319 320 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+2
include/linux/swapops.h
··· 67 67 swp_entry_t arch_entry; 68 68 69 69 BUG_ON(pte_file(pte)); 70 + if (pte_swp_soft_dirty(pte)) 71 + pte = pte_swp_clear_soft_dirty(pte); 70 72 arch_entry = __pte_to_swp_entry(pte); 71 73 return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); 72 74 }
+5
include/linux/syscalls.h
··· 802 802 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, 803 803 int __user *); 804 804 #else 805 + #ifdef CONFIG_CLONE_BACKWARDS3 806 + asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, 807 + int __user *, int); 808 + #else 805 809 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, 806 810 int __user *, int); 811 + #endif 807 812 #endif 808 813 809 814 asmlinkage long sys_execve(const char __user *filename,
+6
kernel/fork.c
··· 1679 1679 int __user *, parent_tidptr, 1680 1680 int __user *, child_tidptr, 1681 1681 int, tls_val) 1682 + #elif defined(CONFIG_CLONE_BACKWARDS3) 1683 + SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, 1684 + int, stack_size, 1685 + int __user *, parent_tidptr, 1686 + int __user *, child_tidptr, 1687 + int, tls_val) 1682 1688 #else 1683 1689 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 1684 1690 int __user *, parent_tidptr,
+9 -4
mm/fremap.c
··· 57 57 unsigned long addr, unsigned long pgoff, pgprot_t prot) 58 58 { 59 59 int err = -ENOMEM; 60 - pte_t *pte; 60 + pte_t *pte, ptfile; 61 61 spinlock_t *ptl; 62 62 63 63 pte = get_locked_pte(mm, addr, &ptl); 64 64 if (!pte) 65 65 goto out; 66 66 67 - if (!pte_none(*pte)) 68 - zap_pte(mm, vma, addr, pte); 67 + ptfile = pgoff_to_pte(pgoff); 69 68 70 - set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); 69 + if (!pte_none(*pte)) { 70 + if (pte_present(*pte) && pte_soft_dirty(*pte)) 71 + pte_file_mksoft_dirty(ptfile); 72 + zap_pte(mm, vma, addr, pte); 73 + } 74 + 75 + set_pte_at(mm, addr, pte, ptfile); 71 76 /* 72 77 * We don't need to run update_mmu_cache() here because the "file pte" 73 78 * being installed by install_file_pte() is not a real pte - it's a
+2 -2
mm/memcontrol.c
··· 3195 3195 if (!s->memcg_params) 3196 3196 return -ENOMEM; 3197 3197 3198 - INIT_WORK(&s->memcg_params->destroy, 3199 - kmem_cache_destroy_work_func); 3200 3198 if (memcg) { 3201 3199 s->memcg_params->memcg = memcg; 3202 3200 s->memcg_params->root_cache = root_cache; 3201 + INIT_WORK(&s->memcg_params->destroy, 3202 + kmem_cache_destroy_work_func); 3203 3203 } else 3204 3204 s->memcg_params->is_root_cache = true; 3205 3205
+10 -3
mm/memory.c
··· 1141 1141 continue; 1142 1142 if (unlikely(details) && details->nonlinear_vma 1143 1143 && linear_page_index(details->nonlinear_vma, 1144 - addr) != page->index) 1145 - set_pte_at(mm, addr, pte, 1146 - pgoff_to_pte(page->index)); 1144 + addr) != page->index) { 1145 + pte_t ptfile = pgoff_to_pte(page->index); 1146 + if (pte_soft_dirty(ptent)) 1147 + pte_file_mksoft_dirty(ptfile); 1148 + set_pte_at(mm, addr, pte, ptfile); 1149 + } 1147 1150 if (PageAnon(page)) 1148 1151 rss[MM_ANONPAGES]--; 1149 1152 else { ··· 3118 3115 exclusive = 1; 3119 3116 } 3120 3117 flush_icache_page(vma, page); 3118 + if (pte_swp_soft_dirty(orig_pte)) 3119 + pte = pte_mksoft_dirty(pte); 3121 3120 set_pte_at(mm, address, page_table, pte); 3122 3121 if (page == swapcache) 3123 3122 do_page_add_anon_rmap(page, vma, address, exclusive); ··· 3413 3408 entry = mk_pte(page, vma->vm_page_prot); 3414 3409 if (flags & FAULT_FLAG_WRITE) 3415 3410 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 3411 + else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte)) 3412 + pte_mksoft_dirty(entry); 3416 3413 if (anon) { 3417 3414 inc_mm_counter_fast(mm, MM_ANONPAGES); 3418 3415 page_add_new_anon_rmap(page, vma, address);
+11 -3
mm/rmap.c
··· 1236 1236 swp_entry_to_pte(make_hwpoison_entry(page))); 1237 1237 } else if (PageAnon(page)) { 1238 1238 swp_entry_t entry = { .val = page_private(page) }; 1239 + pte_t swp_pte; 1239 1240 1240 1241 if (PageSwapCache(page)) { 1241 1242 /* ··· 1265 1264 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); 1266 1265 entry = make_migration_entry(page, pte_write(pteval)); 1267 1266 } 1268 - set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 1267 + swp_pte = swp_entry_to_pte(entry); 1268 + if (pte_soft_dirty(pteval)) 1269 + swp_pte = pte_swp_mksoft_dirty(swp_pte); 1270 + set_pte_at(mm, address, pte, swp_pte); 1269 1271 BUG_ON(pte_file(*pte)); 1270 1272 } else if (IS_ENABLED(CONFIG_MIGRATION) && 1271 1273 (TTU_ACTION(flags) == TTU_MIGRATION)) { ··· 1405 1401 pteval = ptep_clear_flush(vma, address, pte); 1406 1402 1407 1403 /* If nonlinear, store the file page offset in the pte. */ 1408 - if (page->index != linear_page_index(vma, address)) 1409 - set_pte_at(mm, address, pte, pgoff_to_pte(page->index)); 1404 + if (page->index != linear_page_index(vma, address)) { 1405 + pte_t ptfile = pgoff_to_pte(page->index); 1406 + if (pte_soft_dirty(pteval)) 1407 + pte_file_mksoft_dirty(ptfile); 1408 + set_pte_at(mm, address, pte, ptfile); 1409 + } 1410 1410 1411 1411 /* Move the dirty bit to the physical page now the pte is gone. */ 1412 1412 if (pte_dirty(pteval))
+17 -2
mm/swapfile.c
··· 866 866 } 867 867 #endif /* CONFIG_HIBERNATION */ 868 868 869 + static inline int maybe_same_pte(pte_t pte, pte_t swp_pte) 870 + { 871 + #ifdef CONFIG_MEM_SOFT_DIRTY 872 + /* 873 + * When pte keeps soft dirty bit the pte generated 874 + * from swap entry does not has it, still it's same 875 + * pte from logical point of view. 876 + */ 877 + pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte); 878 + return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty); 879 + #else 880 + return pte_same(pte, swp_pte); 881 + #endif 882 + } 883 + 869 884 /* 870 885 * No need to decide whether this PTE shares the swap entry with others, 871 886 * just let do_wp_page work it out if a write is requested later - to ··· 907 892 } 908 893 909 894 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 910 - if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 895 + if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { 911 896 mem_cgroup_cancel_charge_swapin(memcg); 912 897 ret = 0; 913 898 goto out; ··· 962 947 * swapoff spends a _lot_ of time in this loop! 963 948 * Test inline before going to call unuse_pte. 964 949 */ 965 - if (unlikely(pte_same(*pte, swp_pte))) { 950 + if (unlikely(maybe_same_pte(*pte, swp_pte))) { 966 951 pte_unmap(pte); 967 952 ret = unuse_pte(vma, pmd, addr, entry, page); 968 953 if (ret)