Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"13 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
rbtree: include rcu.h
scripts/faddr2line: fix error when addr2line output contains discriminator
ocfs2: take inode cluster lock before moving reflinked inode from orphan dir
mm, oom: fix concurrent munlock and oom reaper unmap, v3
mm: migrate: fix double call of radix_tree_replace_slot()
proc/kcore: don't bounds check against address 0
mm: don't show nr_indirectly_reclaimable in /proc/vmstat
mm: sections are not offlined during memory hotremove
z3fold: fix reclaim lock-ups
init: fix false positives in W+X checking
lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit()
KASAN: prohibit KASAN+STRUCTLEAK combination
MAINTAINERS: update Shuah's email address

+164 -87
-3
MAINTAINERS
··· 3691 3691 3692 3692 CPU POWER MONITORING SUBSYSTEM 3693 3693 M: Thomas Renninger <trenn@suse.com> 3694 - M: Shuah Khan <shuahkh@osg.samsung.com> 3695 3694 M: Shuah Khan <shuah@kernel.org> 3696 3695 L: linux-pm@vger.kernel.org 3697 3696 S: Maintained ··· 7695 7696 F: include/uapi/linux/sunrpc/ 7696 7697 7697 7698 KERNEL SELFTEST FRAMEWORK 7698 - M: Shuah Khan <shuahkh@osg.samsung.com> 7699 7699 M: Shuah Khan <shuah@kernel.org> 7700 7700 L: linux-kselftest@vger.kernel.org 7701 7701 T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git ··· 14648 14650 14649 14651 USB OVER IP DRIVER 14650 14652 M: Valentina Manea <valentina.manea.m@gmail.com> 14651 - M: Shuah Khan <shuahkh@osg.samsung.com> 14652 14653 M: Shuah Khan <shuah@kernel.org> 14653 14654 L: linux-usb@vger.kernel.org 14654 14655 S: Maintained
+4
arch/Kconfig
··· 464 464 config GCC_PLUGIN_STRUCTLEAK 465 465 bool "Force initialization of variables containing userspace addresses" 466 466 depends on GCC_PLUGINS 467 + # Currently STRUCTLEAK inserts initialization out of live scope of 468 + # variables from KASAN point of view. This leads to KASAN false 469 + # positive reports. Prohibit this combination for now. 470 + depends on !KASAN_EXTRA 467 471 help 468 472 This plugin zero-initializes any structures containing a 469 473 __user attribute. This can prevent some classes of information
+12 -2
fs/ocfs2/refcounttree.c
··· 4250 4250 static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, 4251 4251 struct dentry *new_dentry, bool preserve) 4252 4252 { 4253 - int error; 4253 + int error, had_lock; 4254 4254 struct inode *inode = d_inode(old_dentry); 4255 4255 struct buffer_head *old_bh = NULL; 4256 4256 struct inode *new_orphan_inode = NULL; 4257 + struct ocfs2_lock_holder oh; 4257 4258 4258 4259 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) 4259 4260 return -EOPNOTSUPP; ··· 4296 4295 goto out; 4297 4296 } 4298 4297 4298 + had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, 4299 + &oh); 4300 + if (had_lock < 0) { 4301 + error = had_lock; 4302 + mlog_errno(error); 4303 + goto out; 4304 + } 4305 + 4299 4306 /* If the security isn't preserved, we need to re-initialize them. */ 4300 4307 if (!preserve) { 4301 4308 error = ocfs2_init_security_and_acl(dir, new_orphan_inode, ··· 4311 4302 if (error) 4312 4303 mlog_errno(error); 4313 4304 } 4314 - out: 4315 4305 if (!error) { 4316 4306 error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, 4317 4307 new_dentry); 4318 4308 if (error) 4319 4309 mlog_errno(error); 4320 4310 } 4311 + ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); 4321 4312 4313 + out: 4322 4314 if (new_orphan_inode) { 4323 4315 /* 4324 4316 * We need to open_unlock the inode no matter whether we
+16 -7
fs/proc/kcore.c
··· 209 209 { 210 210 struct list_head *head = (struct list_head *)arg; 211 211 struct kcore_list *ent; 212 + struct page *p; 213 + 214 + if (!pfn_valid(pfn)) 215 + return 1; 216 + 217 + p = pfn_to_page(pfn); 218 + if (!memmap_valid_within(pfn, p, page_zone(p))) 219 + return 1; 212 220 213 221 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 214 222 if (!ent) 215 223 return -ENOMEM; 216 - ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); 224 + ent->addr = (unsigned long)page_to_virt(p); 217 225 ent->size = nr_pages << PAGE_SHIFT; 218 226 219 - /* Sanity check: Can happen in 32bit arch...maybe */ 220 - if (ent->addr < (unsigned long) __va(0)) 227 + if (!virt_addr_valid(ent->addr)) 221 228 goto free_out; 222 229 223 230 /* cut not-mapped area. ....from ppc-32 code. */ 224 231 if (ULONG_MAX - ent->addr < ent->size) 225 232 ent->size = ULONG_MAX - ent->addr; 226 233 227 - /* cut when vmalloc() area is higher than direct-map area */ 228 - if (VMALLOC_START > (unsigned long)__va(0)) { 229 - if (ent->addr > VMALLOC_START) 230 - goto free_out; 234 + /* 235 + * We've already checked virt_addr_valid so we know this address 236 + * is a valid pointer, therefore we can check against it to determine 237 + * if we need to trim 238 + */ 239 + if (VMALLOC_START > ent->addr) { 231 240 if (VMALLOC_START - ent->addr < ent->size) 232 241 ent->size = VMALLOC_START - ent->addr; 233 242 }
+2
include/linux/oom.h
··· 95 95 return 0; 96 96 } 97 97 98 + void __oom_reap_task_mm(struct mm_struct *mm); 99 + 98 100 extern unsigned long oom_badness(struct task_struct *p, 99 101 struct mem_cgroup *memcg, const nodemask_t *nodemask, 100 102 unsigned long totalpages);
+1
include/linux/rbtree_augmented.h
··· 26 26 27 27 #include <linux/compiler.h> 28 28 #include <linux/rbtree.h> 29 + #include <linux/rcupdate.h> 29 30 30 31 /* 31 32 * Please note - only struct rb_augment_callbacks and the prototypes for
+1
include/linux/rbtree_latch.h
··· 35 35 36 36 #include <linux/rbtree.h> 37 37 #include <linux/seqlock.h> 38 + #include <linux/rcupdate.h> 38 39 39 40 struct latch_tree_node { 40 41 struct rb_node node[2];
+7
init/main.c
··· 1034 1034 static void mark_readonly(void) 1035 1035 { 1036 1036 if (rodata_enabled) { 1037 + /* 1038 + * load_module() results in W+X mappings, which are cleaned up 1039 + * with call_rcu_sched(). Let's make sure that queued work is 1040 + * flushed so that we don't hit false positives looking for 1041 + * insecure pages which are W+X. 1042 + */ 1043 + rcu_barrier_sched(); 1037 1044 mark_rodata_ro(); 1038 1045 rodata_test(); 1039 1046 } else
+5
kernel/module.c
··· 3517 3517 * walking this with preempt disabled. In all the failure paths, we 3518 3518 * call synchronize_sched(), but we don't want to slow down the success 3519 3519 * path, so use actual RCU here. 3520 + * Note that module_alloc() on most architectures creates W+X page 3521 + * mappings which won't be cleaned up until do_free_init() runs. Any 3522 + * code such as mark_rodata_ro() which depends on those mappings to 3523 + * be cleaned up needs to sync with the queued work - ie 3524 + * rcu_barrier_sched() 3520 3525 */ 3521 3526 call_rcu_sched(&freeinit->rcu, do_free_init); 3522 3527 mutex_unlock(&module_mutex);
+6 -1
lib/find_bit_benchmark.c
··· 132 132 test_find_next_bit(bitmap, BITMAP_LEN); 133 133 test_find_next_zero_bit(bitmap, BITMAP_LEN); 134 134 test_find_last_bit(bitmap, BITMAP_LEN); 135 - test_find_first_bit(bitmap, BITMAP_LEN); 135 + 136 + /* 137 + * test_find_first_bit() may take some time, so 138 + * traverse only part of bitmap to avoid soft lockup. 139 + */ 140 + test_find_first_bit(bitmap, BITMAP_LEN / 10); 136 141 test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); 137 142 138 143 pr_err("\nStart testing find_bit() with sparse bitmap\n");
+1 -3
mm/migrate.c
··· 528 528 int i; 529 529 int index = page_index(page); 530 530 531 - for (i = 0; i < HPAGE_PMD_NR; i++) { 531 + for (i = 1; i < HPAGE_PMD_NR; i++) { 532 532 pslot = radix_tree_lookup_slot(&mapping->i_pages, 533 533 index + i); 534 534 radix_tree_replace_slot(&mapping->i_pages, pslot, 535 535 newpage + i); 536 536 } 537 - } else { 538 - radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); 539 537 } 540 538 541 539 /*
+26 -18
mm/mmap.c
··· 3056 3056 /* mm's last user has gone, and its about to be pulled down */ 3057 3057 mmu_notifier_release(mm); 3058 3058 3059 + if (unlikely(mm_is_oom_victim(mm))) { 3060 + /* 3061 + * Manually reap the mm to free as much memory as possible. 3062 + * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard 3063 + * this mm from further consideration. Taking mm->mmap_sem for 3064 + * write after setting MMF_OOM_SKIP will guarantee that the oom 3065 + * reaper will not run on this mm again after mmap_sem is 3066 + * dropped. 3067 + * 3068 + * Nothing can be holding mm->mmap_sem here and the above call 3069 + * to mmu_notifier_release(mm) ensures mmu notifier callbacks in 3070 + * __oom_reap_task_mm() will not block. 3071 + * 3072 + * This needs to be done before calling munlock_vma_pages_all(), 3073 + * which clears VM_LOCKED, otherwise the oom reaper cannot 3074 + * reliably test it. 3075 + */ 3076 + mutex_lock(&oom_lock); 3077 + __oom_reap_task_mm(mm); 3078 + mutex_unlock(&oom_lock); 3079 + 3080 + set_bit(MMF_OOM_SKIP, &mm->flags); 3081 + down_write(&mm->mmap_sem); 3082 + up_write(&mm->mmap_sem); 3083 + } 3084 + 3059 3085 if (mm->locked_vm) { 3060 3086 vma = mm->mmap; 3061 3087 while (vma) { ··· 3103 3077 /* update_hiwater_rss(mm) here? but nobody should be looking */ 3104 3078 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 3105 3079 unmap_vmas(&tlb, vma, 0, -1); 3106 - 3107 - if (unlikely(mm_is_oom_victim(mm))) { 3108 - /* 3109 - * Wait for oom_reap_task() to stop working on this 3110 - * mm. Because MMF_OOM_SKIP is already set before 3111 - * calling down_read(), oom_reap_task() will not run 3112 - * on this "mm" post up_write(). 3113 - * 3114 - * mm_is_oom_victim() cannot be set from under us 3115 - * either because victim->mm is already set to NULL 3116 - * under task_lock before calling mmput and oom_mm is 3117 - * set not NULL by the OOM killer only if victim->mm 3118 - * is found not NULL while holding the task_lock. 3119 - */ 3120 - set_bit(MMF_OOM_SKIP, &mm->flags); 3121 - down_write(&mm->mmap_sem); 3122 - up_write(&mm->mmap_sem); 3123 - } 3124 3080 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); 3125 3081 tlb_finish_mmu(&tlb, 0, -1); 3126 3082
+43 -38
mm/oom_kill.c
··· 469 469 return false; 470 470 } 471 471 472 - 473 472 #ifdef CONFIG_MMU 474 473 /* 475 474 * OOM Reaper kernel thread which tries to reap the memory used by the OOM ··· 479 480 static struct task_struct *oom_reaper_list; 480 481 static DEFINE_SPINLOCK(oom_reaper_lock); 481 482 482 - static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) 483 + void __oom_reap_task_mm(struct mm_struct *mm) 483 484 { 484 - struct mmu_gather tlb; 485 485 struct vm_area_struct *vma; 486 + 487 + /* 488 + * Tell all users of get_user/copy_from_user etc... that the content 489 + * is no longer stable. No barriers really needed because unmapping 490 + * should imply barriers already and the reader would hit a page fault 491 + * if it stumbled over a reaped memory. 492 + */ 493 + set_bit(MMF_UNSTABLE, &mm->flags); 494 + 495 + for (vma = mm->mmap ; vma; vma = vma->vm_next) { 496 + if (!can_madv_dontneed_vma(vma)) 497 + continue; 498 + 499 + /* 500 + * Only anonymous pages have a good chance to be dropped 501 + * without additional steps which we cannot afford as we 502 + * are OOM already. 503 + * 504 + * We do not even care about fs backed pages because all 505 + * which are reclaimable have already been reclaimed and 506 + * we do not want to block exit_mmap by keeping mm ref 507 + * count elevated without a good reason. 508 + */ 509 + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { 510 + const unsigned long start = vma->vm_start; 511 + const unsigned long end = vma->vm_end; 512 + struct mmu_gather tlb; 513 + 514 + tlb_gather_mmu(&tlb, mm, start, end); 515 + mmu_notifier_invalidate_range_start(mm, start, end); 516 + unmap_page_range(&tlb, vma, start, end, NULL); 517 + mmu_notifier_invalidate_range_end(mm, start, end); 518 + tlb_finish_mmu(&tlb, start, end); 519 + } 520 + } 521 + } 522 + 523 + static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) 524 + { 486 525 bool ret = true; 487 526 488 527 /* 489 528 * We have to make sure to not race with the victim exit path 490 529 * and cause premature new oom victim selection: 491 - * __oom_reap_task_mm exit_mm 530 + * oom_reap_task_mm exit_mm 492 531 * mmget_not_zero 493 532 * mmput 494 533 * atomic_dec_and_test ··· 571 534 572 535 trace_start_task_reaping(tsk->pid); 573 536 574 - /* 575 - * Tell all users of get_user/copy_from_user etc... that the content 576 - * is no longer stable. No barriers really needed because unmapping 577 - * should imply barriers already and the reader would hit a page fault 578 - * if it stumbled over a reaped memory. 579 - */ 580 - set_bit(MMF_UNSTABLE, &mm->flags); 537 + __oom_reap_task_mm(mm); 581 538 582 - for (vma = mm->mmap ; vma; vma = vma->vm_next) { 583 - if (!can_madv_dontneed_vma(vma)) 584 - continue; 585 - 586 - /* 587 - * Only anonymous pages have a good chance to be dropped 588 - * without additional steps which we cannot afford as we 589 - * are OOM already. 590 - * 591 - * We do not even care about fs backed pages because all 592 - * which are reclaimable have already been reclaimed and 593 - * we do not want to block exit_mmap by keeping mm ref 594 - * count elevated without a good reason. 595 - */ 596 - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { 597 - const unsigned long start = vma->vm_start; 598 - const unsigned long end = vma->vm_end; 599 - 600 - tlb_gather_mmu(&tlb, mm, start, end); 601 - mmu_notifier_invalidate_range_start(mm, start, end); 602 - unmap_page_range(&tlb, vma, start, end, NULL); 603 - mmu_notifier_invalidate_range_end(mm, start, end); 604 - tlb_finish_mmu(&tlb, start, end); 605 - } 606 - } 607 539 pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", 608 540 task_pid_nr(tsk), tsk->comm, 609 541 K(get_mm_counter(mm, MM_ANONPAGES)), ··· 593 587 struct mm_struct *mm = tsk->signal->oom_mm; 594 588 595 589 /* Retry the down_read_trylock(mmap_sem) a few times */ 596 - while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) 590 + while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) 597 591 schedule_timeout_idle(HZ/10); 598 592 599 593 if (attempts <= MAX_OOM_REAP_RETRIES || 600 594 test_bit(MMF_OOM_SKIP, &mm->flags)) 601 595 goto done; 602 - 603 596 604 597 pr_info("oom_reaper: unable to reap pid:%d (%s)\n", 605 598 task_pid_nr(tsk), tsk->comm);
+1 -1
mm/sparse.c
··· 629 629 unsigned long pfn; 630 630 631 631 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 632 - unsigned long section_nr = pfn_to_section_nr(start_pfn); 632 + unsigned long section_nr = pfn_to_section_nr(pfn); 633 633 struct mem_section *ms; 634 634 635 635 /*
+5 -1
mm/vmstat.c
··· 1161 1161 "nr_vmscan_immediate_reclaim", 1162 1162 "nr_dirtied", 1163 1163 "nr_written", 1164 - "nr_indirectly_reclaimable", 1164 + "", /* nr_indirectly_reclaimable */ 1165 1165 1166 1166 /* enum writeback_stat_item counters */ 1167 1167 "nr_dirty_threshold", ··· 1739 1739 { 1740 1740 unsigned long *l = arg; 1741 1741 unsigned long off = l - (unsigned long *)m->private; 1742 + 1743 + /* Skip hidden vmstat items. */ 1744 + if (*vmstat_text[off] == '\0') 1745 + return 0; 1742 1746 1743 1747 seq_puts(m, vmstat_text[off]); 1744 1748 seq_put_decimal_ull(m, " ", *l);
+30 -12
mm/z3fold.c
··· 144 144 PAGE_HEADLESS = 0, 145 145 MIDDLE_CHUNK_MAPPED, 146 146 NEEDS_COMPACTING, 147 - PAGE_STALE 147 + PAGE_STALE, 148 + UNDER_RECLAIM 148 149 }; 149 150 150 151 /***************** ··· 174 173 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 175 174 clear_bit(NEEDS_COMPACTING, &page->private); 176 175 clear_bit(PAGE_STALE, &page->private); 176 + clear_bit(UNDER_RECLAIM, &page->private); 177 177 178 178 spin_lock_init(&zhdr->page_lock); 179 179 kref_init(&zhdr->refcount); ··· 758 756 atomic64_dec(&pool->pages_nr); 759 757 return; 760 758 } 759 + if (test_bit(UNDER_RECLAIM, &page->private)) { 760 + z3fold_page_unlock(zhdr); 761 + return; 762 + } 761 763 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 762 764 z3fold_page_unlock(zhdr); 763 765 return; ··· 846 840 kref_get(&zhdr->refcount); 847 841 list_del_init(&zhdr->buddy); 848 842 zhdr->cpu = -1; 843 + set_bit(UNDER_RECLAIM, &page->private); 844 + break; 849 845 } 850 846 851 847 list_del_init(&page->lru); ··· 895 887 goto next; 896 888 } 897 889 next: 898 - spin_lock(&pool->lock); 899 890 if (test_bit(PAGE_HEADLESS, &page->private)) { 900 891 if (ret == 0) { 901 - spin_unlock(&pool->lock); 902 892 free_z3fold_page(page); 903 893 return 0; 904 894 } 905 - } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { 906 - atomic64_dec(&pool->pages_nr); 895 + spin_lock(&pool->lock); 896 + list_add(&page->lru, &pool->lru); 907 897 spin_unlock(&pool->lock); 908 - return 0; 898 + } else { 899 + z3fold_page_lock(zhdr); 900 + clear_bit(UNDER_RECLAIM, &page->private); 901 + if (kref_put(&zhdr->refcount, 902 + release_z3fold_page_locked)) { 903 + atomic64_dec(&pool->pages_nr); 904 + return 0; 905 + } 906 + /* 907 + * if we are here, the page is still not completely 908 + * free. Take the global pool lock then to be able 909 + * to add it back to the lru list 910 + */ 911 + spin_lock(&pool->lock); 912 + list_add(&page->lru, &pool->lru); 913 + spin_unlock(&pool->lock); 914 + z3fold_page_unlock(zhdr); 909 915 } 910 916 911 - /* 912 - * Add to the beginning of LRU. 913 - * Pool lock has to be kept here to ensure the page has 914 - * not already been released 915 - */ 916 - list_add(&page->lru, &pool->lru); 917 + /* We started off locked to we need to lock the pool back */ 918 + spin_lock(&pool->lock); 917 919 } 918 920 spin_unlock(&pool->lock); 919 921 return -EAGAIN;
+4 -1
scripts/faddr2line
··· 170 170 echo "$file_lines" | while read -r line 171 171 do 172 172 echo $line 173 - eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') 173 + n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g') 174 + n1=$[$n-5] 175 + n2=$[$n+5] 176 + f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') 174 177 awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f 175 178 done 176 179