Merge branch 'akpm' (patches from Andrew)

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"24 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits)
autofs: fix error return in autofs_fill_super()
autofs: drop dentry reference only when it is never used
fs/drop_caches.c: avoid softlockups in drop_pagecache_sb()
mm: migrate: don't rely on __PageMovable() of newpage after unlocking it
psi: clarify the Kconfig text for the default-disable option
mm, memory_hotplug: __offline_pages fix wrong locking
mm: hwpoison: use do_send_sig_info() instead of force_sig()
kasan: mark file common so ftrace doesn't trace it
init/Kconfig: fix grammar by moving a closing parenthesis
lib/test_kmod.c: potential double free in error handling
mm, oom: fix use-after-free in oom_kill_process
mm/hotplug: invalid PFNs from pfn_to_online_page()
mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages
psi: fix aggregation idle shut-off
mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone
mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone
oom, oom_reaper: do not enqueue same task twice
mm: migrate: make buffer_migrate_page_norefs() actually succeed
kernel/exit.c: release ptraced tasks before zap_pid_ns_processes
x86_64: increase stack size for KASAN_EXTRA
...

Linus Torvalds 7 years ago b9de6efe cd984a5b

+304 -74

38 changed files

expand all collapse all

arch

c6x

include

asm

Kbuild

uapi

asm

Kbuild

h8300

include

asm

Kbuild

uapi

asm

Kbuild

hexagon

include

asm

Kbuild

uapi

asm

Kbuild

m68k

include

asm

Kbuild

uapi

asm

Kbuild

microblaze

include

asm

Kbuild

uapi

asm

Kbuild

openrisc

include

asm

Kbuild

uapi

asm

Kbuild

unicore32

include

asm

Kbuild

uapi

asm

Kbuild

x86

include

asm

page_64_types.h

autofs

expire.c

inode.c

drop_caches.c

proc

generic.c

internal.h

proc_net.c

include

linux

memory_hotplug.h

sched

coredump.h

init

Kconfig

kernel

exit.c

sched

psi.c

workqueue.c

workqueue_internal.h

lib

test_kmod.c

hugetlb.c

kasan

Makefile

memory-failure.c

memory_hotplug.c

migrate.c

oom_kill.c

tools

testing

selftests

proc

.gitignore

Makefile

setns-dcache.c

arch/c6x/include/asm/Kbuild

reviewed

··· 30 30 generic-y += preempt.h 31 31 generic-y += segment.h 32 32 generic-y += serial.h 33 33 + generic-y += shmparam.h 33 34 generic-y += tlbflush.h 34 35 generic-y += topology.h 35 36 generic-y += trace_clock.h

-1

arch/c6x/include/uapi/asm/Kbuild

reviewed

··· 1 1 include include/uapi/asm-generic/Kbuild.asm 2 2 3 3 generic-y += kvm_para.h 4 4 - generic-y += shmparam.h 5 4 generic-y += ucontext.h

arch/h8300/include/asm/Kbuild

reviewed

··· 40 40 generic-y += scatterlist.h 41 41 generic-y += sections.h 42 42 generic-y += serial.h 43 43 + generic-y += shmparam.h 43 44 generic-y += sizes.h 44 45 generic-y += spinlock.h 45 46 generic-y += timex.h

-1

arch/h8300/include/uapi/asm/Kbuild

reviewed

··· 1 1 include include/uapi/asm-generic/Kbuild.asm 2 2 3 3 generic-y += kvm_para.h 4 4 - generic-y += shmparam.h 5 4 generic-y += ucontext.h

arch/hexagon/include/asm/Kbuild

reviewed

··· 30 30 generic-y += sections.h 31 31 generic-y += segment.h 32 32 generic-y += serial.h 33 33 + generic-y += shmparam.h 33 34 generic-y += sizes.h 34 35 generic-y += topology.h 35 36 generic-y += trace_clock.h

-1

arch/hexagon/include/uapi/asm/Kbuild

reviewed

··· 1 1 include include/uapi/asm-generic/Kbuild.asm 2 2 3 3 - generic-y += shmparam.h 4 3 generic-y += ucontext.h

arch/m68k/include/asm/Kbuild

reviewed

··· 20 20 generic-y += percpu.h 21 21 generic-y += preempt.h 22 22 generic-y += sections.h 23 23 + generic-y += shmparam.h 23 24 generic-y += spinlock.h 24 25 generic-y += topology.h 25 26 generic-y += trace_clock.h

-1

arch/m68k/include/uapi/asm/Kbuild

reviewed

··· 2 2 3 3 generated-y += unistd_32.h 4 4 generic-y += kvm_para.h 5 5 - generic-y += shmparam.h

arch/microblaze/include/asm/Kbuild

reviewed

··· 26 26 generic-y += percpu.h 27 27 generic-y += preempt.h 28 28 generic-y += serial.h 29 29 + generic-y += shmparam.h 29 30 generic-y += syscalls.h 30 31 generic-y += topology.h 31 32 generic-y += trace_clock.h

-1

arch/microblaze/include/uapi/asm/Kbuild

reviewed

··· 2 2 3 3 generated-y += unistd_32.h 4 4 generic-y += kvm_para.h 5 5 - generic-y += shmparam.h 6 5 generic-y += ucontext.h

arch/openrisc/include/asm/Kbuild

reviewed

··· 34 34 generic-y += qrwlock.h 35 35 generic-y += sections.h 36 36 generic-y += segment.h 37 37 + generic-y += shmparam.h 37 38 generic-y += string.h 38 39 generic-y += switch_to.h 39 40 generic-y += topology.h

-1

arch/openrisc/include/uapi/asm/Kbuild

reviewed

··· 1 1 include include/uapi/asm-generic/Kbuild.asm 2 2 3 3 generic-y += kvm_para.h 4 4 - generic-y += shmparam.h 5 4 generic-y += ucontext.h

arch/unicore32/include/asm/Kbuild

reviewed

··· 28 28 generic-y += sections.h 29 29 generic-y += segment.h 30 30 generic-y += serial.h 31 31 + generic-y += shmparam.h 31 32 generic-y += sizes.h 32 33 generic-y += syscalls.h 33 34 generic-y += topology.h

-1

arch/unicore32/include/uapi/asm/Kbuild

reviewed

··· 1 1 include include/uapi/asm-generic/Kbuild.asm 2 2 3 3 generic-y += kvm_para.h 4 4 - generic-y += shmparam.h 5 4 generic-y += ucontext.h

arch/x86/include/asm/page_64_types.h

reviewed

··· 7 7 #endif 8 8 9 9 #ifdef CONFIG_KASAN 10 10 + #ifdef CONFIG_KASAN_EXTRA 11 11 + #define KASAN_STACK_ORDER 2 12 12 + #else 10 13 #define KASAN_STACK_ORDER 1 14 14 + #endif 11 15 #else 12 16 #define KASAN_STACK_ORDER 0 13 17 #endif

+2 -1

fs/autofs/expire.c

reviewed

··· 596 596 pkt.len = dentry->d_name.len; 597 597 memcpy(pkt.name, dentry->d_name.name, pkt.len); 598 598 pkt.name[pkt.len] = '\0'; 599 599 - dput(dentry); 600 599 601 600 if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) 602 601 ret = -EFAULT; ··· 607 608 ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); 608 609 complete_all(&ino->expire_complete); 609 610 spin_unlock(&sbi->fs_lock); 611 611 + 612 612 + dput(dentry); 610 613 611 614 return ret; 612 615 }

+3 -1

fs/autofs/inode.c

reviewed

··· 266 266 } 267 267 root_inode = autofs_get_inode(s, S_IFDIR | 0755); 268 268 root = d_make_root(root_inode); 269 269 - if (!root) 269 269 + if (!root) { 270 270 + ret = -ENOMEM; 270 271 goto fail_ino; 272 272 + } 271 273 pipe = NULL; 272 274 273 275 root->d_fsdata = ino;

+7 -1

fs/drop_caches.c

reviewed

··· 21 21 spin_lock(&sb->s_inode_list_lock); 22 22 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 23 23 spin_lock(&inode->i_lock); 24 24 + /* 25 25 + * We must skip inodes in unusual state. We may also skip 26 26 + * inodes without pages but we deliberately won't in case 27 27 + * we need to reschedule to avoid softlockups. 28 28 + */ 24 29 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 25 25 - (inode->i_mapping->nrpages == 0)) { 30 30 + (inode->i_mapping->nrpages == 0 && !need_resched())) { 26 31 spin_unlock(&inode->i_lock); 27 32 continue; 28 33 } ··· 35 30 spin_unlock(&inode->i_lock); 36 31 spin_unlock(&sb->s_inode_list_lock); 37 32 33 33 + cond_resched(); 38 34 invalidate_mapping_pages(inode->i_mapping, 0, -1); 39 35 iput(toput_inode); 40 36 toput_inode = inode;

+3 -1

fs/proc/generic.c

reviewed

··· 256 256 inode = proc_get_inode(dir->i_sb, de); 257 257 if (!inode) 258 258 return ERR_PTR(-ENOMEM); 259 259 - d_set_d_op(dentry, &proc_misc_dentry_ops); 259 259 + d_set_d_op(dentry, de->proc_dops); 260 260 return d_splice_alias(inode, dentry); 261 261 } 262 262 read_unlock(&proc_subdir_lock); ··· 428 428 spin_lock_init(&ent->pde_unload_lock); 429 429 INIT_LIST_HEAD(&ent->pde_openers); 430 430 proc_set_user(ent, (*parent)->uid, (*parent)->gid); 431 431 + 432 432 + ent->proc_dops = &proc_misc_dentry_ops; 431 433 432 434 out: 433 435 return ent;

fs/proc/internal.h

reviewed

··· 44 44 struct completion *pde_unload_completion; 45 45 const struct inode_operations *proc_iops; 46 46 const struct file_operations *proc_fops; 47 47 + const struct dentry_operations *proc_dops; 47 48 union { 48 49 const struct seq_operations *seq_ops; 49 50 int (*single_show)(struct seq_file *, void *);

+20

fs/proc/proc_net.c

reviewed

··· 38 38 return maybe_get_net(PDE_NET(PDE(inode))); 39 39 } 40 40 41 41 + static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) 42 42 + { 43 43 + return 0; 44 44 + } 45 45 + 46 46 + static const struct dentry_operations proc_net_dentry_ops = { 47 47 + .d_revalidate = proc_net_d_revalidate, 48 48 + .d_delete = always_delete_dentry, 49 49 + }; 50 50 + 51 51 + static void pde_force_lookup(struct proc_dir_entry *pde) 52 52 + { 53 53 + /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ 54 54 + pde->proc_dops = &proc_net_dentry_ops; 55 55 + } 56 56 + 41 57 static int seq_open_net(struct inode *inode, struct file *file) 42 58 { 43 59 unsigned int state_size = PDE(inode)->state_size; ··· 106 90 p = proc_create_reg(name, mode, &parent, data); 107 91 if (!p) 108 92 return NULL; 93 93 + pde_force_lookup(p); 109 94 p->proc_fops = &proc_net_seq_fops; 110 95 p->seq_ops = ops; 111 96 p->state_size = state_size; ··· 150 133 p = proc_create_reg(name, mode, &parent, data); 151 134 if (!p) 152 135 return NULL; 136 136 + pde_force_lookup(p); 153 137 p->proc_fops = &proc_net_seq_fops; 154 138 p->seq_ops = ops; 155 139 p->state_size = state_size; ··· 199 181 p = proc_create_reg(name, mode, &parent, data); 200 182 if (!p) 201 183 return NULL; 184 184 + pde_force_lookup(p); 202 185 p->proc_fops = &proc_net_single_fops; 203 186 p->single_show = show; 204 187 return proc_register(parent, p); ··· 242 223 p = proc_create_reg(name, mode, &parent, data); 243 224 if (!p) 244 225 return NULL; 226 226 + pde_force_lookup(p); 245 227 p->proc_fops = &proc_net_single_fops; 246 228 p->single_show = show; 247 229 p->write = write;

+10 -8

include/linux/memory_hotplug.h

reviewed

··· 21 21 * walkers which rely on the fully initialized page->flags and others 22 22 * should use this rather than pfn_valid && pfn_to_page 23 23 */ 24 24 - #define pfn_to_online_page(pfn) \ 25 25 - ({ \ 26 26 - struct page *___page = NULL; \ 27 27 - unsigned long ___nr = pfn_to_section_nr(pfn); \ 28 28 - \ 29 29 - if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ 30 30 - ___page = pfn_to_page(pfn); \ 31 31 - ___page; \ 24 24 + #define pfn_to_online_page(pfn) \ 25 25 + ({ \ 26 26 + struct page *___page = NULL; \ 27 27 + unsigned long ___pfn = pfn; \ 28 28 + unsigned long ___nr = pfn_to_section_nr(___pfn); \ 29 29 + \ 30 30 + if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \ 31 31 + pfn_valid_within(___pfn)) \ 32 32 + ___page = pfn_to_page(___pfn); \ 33 33 + ___page; \ 32 34 }) 33 35 34 36 /*

include/linux/sched/coredump.h

reviewed

··· 71 71 #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ 72 72 #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ 73 73 #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ 74 74 + #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ 74 75 #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) 75 76 76 77 #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\

+12 -1

init/Kconfig

reviewed

··· 512 512 per default but can be enabled through passing psi=1 on the 513 513 kernel commandline during boot. 514 514 515 515 + This feature adds some code to the task wakeup and sleep 516 516 + paths of the scheduler. The overhead is too low to affect 517 517 + common scheduling-intense workloads in practice (such as 518 518 + webservers, memcache), but it does show up in artificial 519 519 + scheduler stress tests, such as hackbench. 520 520 + 521 521 + If you are paranoid and not sure what the kernel will be 522 522 + used for, say Y. 523 523 + 524 524 + Say N if unsure. 525 525 + 515 526 endmenu # "CPU/Task time and stats accounting" 516 527 517 528 config CPU_ISOLATION ··· 836 825 PIDs controller is designed to stop this from happening. 837 826 838 827 It should be noted that organisational operations (such as attaching 839 839 - to a cgroup hierarchy will *not* be blocked by the PIDs controller), 828 828 + to a cgroup hierarchy) will *not* be blocked by the PIDs controller, 840 829 since the PIDs limit only affects a process's ability to fork, not to 841 830 attach to a cgroup. 842 831

+10 -2

kernel/exit.c

reviewed

··· 558 558 return NULL; 559 559 } 560 560 561 561 - static struct task_struct *find_child_reaper(struct task_struct *father) 561 561 + static struct task_struct *find_child_reaper(struct task_struct *father, 562 562 + struct list_head *dead) 562 563 __releases(&tasklist_lock) 563 564 __acquires(&tasklist_lock) 564 565 { 565 566 struct pid_namespace *pid_ns = task_active_pid_ns(father); 566 567 struct task_struct *reaper = pid_ns->child_reaper; 568 568 + struct task_struct *p, *n; 567 569 568 570 if (likely(reaper != father)) 569 571 return reaper; ··· 581 579 panic("Attempted to kill init! exitcode=0x%08x\n", 582 580 father->signal->group_exit_code ?: father->exit_code); 583 581 } 582 582 + 583 583 + list_for_each_entry_safe(p, n, dead, ptrace_entry) { 584 584 + list_del_init(&p->ptrace_entry); 585 585 + release_task(p); 586 586 + } 587 587 + 584 588 zap_pid_ns_processes(pid_ns); 585 589 write_lock_irq(&tasklist_lock); 586 590 ··· 676 668 exit_ptrace(father, dead); 677 669 678 670 /* Can drop and reacquire tasklist_lock */ 679 679 - reaper = find_child_reaper(father); 671 671 + reaper = find_child_reaper(father, dead); 680 672 if (list_empty(&father->children)) 681 673 return; 682 674

+17 -4

kernel/sched/psi.c

reviewed

··· 124 124 * sampling of the aggregate task states would be. 125 125 */ 126 126 127 127 + #include "../workqueue_internal.h" 127 128 #include <linux/sched/loadavg.h> 128 129 #include <linux/seq_file.h> 129 130 #include <linux/proc_fs.h> ··· 481 480 groupc->tasks[t]++; 482 481 483 482 write_seqcount_end(&groupc->seq); 484 484 - 485 485 - if (!delayed_work_pending(&group->clock_work)) 486 486 - schedule_delayed_work(&group->clock_work, PSI_FREQ); 487 483 } 488 484 489 485 static struct psi_group *iterate_groups(struct task_struct *task, void **iter) ··· 511 513 { 512 514 int cpu = task_cpu(task); 513 515 struct psi_group *group; 516 516 + bool wake_clock = true; 514 517 void *iter = NULL; 515 518 516 519 if (!task->pid) ··· 529 530 task->psi_flags &= ~clear; 530 531 task->psi_flags |= set; 531 532 532 532 - while ((group = iterate_groups(task, &iter))) 533 533 + /* 534 534 + * Periodic aggregation shuts off if there is a period of no 535 535 + * task changes, so we wake it back up if necessary. However, 536 536 + * don't do this if the task change is the aggregation worker 537 537 + * itself going to sleep, or we'll ping-pong forever. 538 538 + */ 539 539 + if (unlikely((clear & TSK_RUNNING) && 540 540 + (task->flags & PF_WQ_WORKER) && 541 541 + wq_worker_last_func(task) == psi_update_work)) 542 542 + wake_clock = false; 543 543 + 544 544 + while ((group = iterate_groups(task, &iter))) { 533 545 psi_group_change(group, cpu, clear, set); 546 546 + if (wake_clock && !delayed_work_pending(&group->clock_work)) 547 547 + schedule_delayed_work(&group->clock_work, PSI_FREQ); 548 548 + } 534 549 } 535 550 536 551 void psi_memstall_tick(struct task_struct *task, int cpu)

+23

kernel/workqueue.c

reviewed

··· 910 910 } 911 911 912 912 /** 913 913 + * wq_worker_last_func - retrieve worker's last work function 914 914 + * 915 915 + * Determine the last function a worker executed. This is called from 916 916 + * the scheduler to get a worker's last known identity. 917 917 + * 918 918 + * CONTEXT: 919 919 + * spin_lock_irq(rq->lock) 920 920 + * 921 921 + * Return: 922 922 + * The last work function %current executed as a worker, NULL if it 923 923 + * hasn't executed any work yet. 924 924 + */ 925 925 + work_func_t wq_worker_last_func(struct task_struct *task) 926 926 + { 927 927 + struct worker *worker = kthread_data(task); 928 928 + 929 929 + return worker->last_func; 930 930 + } 931 931 + 932 932 + /** 913 933 * worker_set_flags - set worker flags and adjust nr_running accordingly 914 934 * @worker: self 915 935 * @flags: flags to set ··· 2203 2183 /* clear cpu intensive status */ 2204 2184 if (unlikely(cpu_intensive)) 2205 2185 worker_clr_flags(worker, WORKER_CPU_INTENSIVE); 2186 2186 + 2187 2187 + /* tag the worker for identification in schedule() */ 2188 2188 + worker->last_func = worker->current_func; 2206 2189 2207 2190 /* we're done with it, release */ 2208 2191 hash_del(&worker->hentry);

+5 -1

kernel/workqueue_internal.h

reviewed

··· 53 53 54 54 /* used only by rescuers to point to the target workqueue */ 55 55 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ 56 56 + 57 57 + /* used by the scheduler to determine a worker's last known identity */ 58 58 + work_func_t last_func; 56 59 }; 57 60 58 61 /** ··· 70 67 71 68 /* 72 69 * Scheduler hooks for concurrency managed workqueue. Only to be used from 73 73 - * sched/core.c and workqueue.c. 70 70 + * sched/ and workqueue.c. 74 71 */ 75 72 void wq_worker_waking_up(struct task_struct *task, int cpu); 76 73 struct task_struct *wq_worker_sleeping(struct task_struct *task); 74 74 + work_func_t wq_worker_last_func(struct task_struct *task); 77 75 78 76 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */

+1 -1

lib/test_kmod.c

reviewed

··· 632 632 config->test_driver = NULL; 633 633 634 634 kfree_const(config->test_fs); 635 635 - config->test_driver = NULL; 635 635 + config->test_fs = NULL; 636 636 } 637 637 638 638 static void kmod_config_free(struct kmod_test_device *test_dev)

+2 -1

mm/hugetlb.c

reviewed

··· 4268 4268 break; 4269 4269 } 4270 4270 if (ret & VM_FAULT_RETRY) { 4271 4271 - if (nonblocking) 4271 4271 + if (nonblocking && 4272 4272 + !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) 4272 4273 *nonblocking = 0; 4273 4274 *nr_pages = 0; 4274 4275 /*

mm/kasan/Makefile

reviewed

··· 5 5 UBSAN_SANITIZE_tags.o := n 6 6 KCOV_INSTRUMENT := n 7 7 8 8 + CFLAGS_REMOVE_common.o = -pg 8 9 CFLAGS_REMOVE_generic.o = -pg 9 10 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 10 11 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533

+2 -1

mm/memory-failure.c

reviewed

··· 372 372 if (fail || tk->addr_valid == 0) { 373 373 pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", 374 374 pfn, tk->tsk->comm, tk->tsk->pid); 375 375 - force_sig(SIGKILL, tk->tsk); 375 375 + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, 376 376 + tk->tsk, PIDTYPE_PID); 376 377 } 377 378 378 379 /*

+27 -35

mm/memory_hotplug.c

reviewed

··· 1233 1233 bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) 1234 1234 { 1235 1235 struct page *page = pfn_to_page(start_pfn); 1236 1236 - struct page *end_page = page + nr_pages; 1236 1236 + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); 1237 1237 + struct page *end_page = pfn_to_page(end_pfn); 1237 1238 1238 1239 /* Check the starting page of each pageblock within the range */ 1239 1240 for (; page < end_page; page = next_active_pageblock(page)) { ··· 1274 1273 i++; 1275 1274 if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) 1276 1275 continue; 1276 1276 + /* Check if we got outside of the zone */ 1277 1277 + if (zone && !zone_spans_pfn(zone, pfn + i)) 1278 1278 + return 0; 1277 1279 page = pfn_to_page(pfn + i); 1278 1280 if (zone && page_zone(page) != zone) 1279 1281 return 0; ··· 1305 1301 static unsigned long scan_movable_pages(unsigned long start, unsigned long end) 1306 1302 { 1307 1303 unsigned long pfn; 1308 1308 - struct page *page; 1304 1304 + 1309 1305 for (pfn = start; pfn < end; pfn++) { 1310 1310 - if (pfn_valid(pfn)) { 1311 1311 - page = pfn_to_page(pfn); 1312 1312 - if (PageLRU(page)) 1313 1313 - return pfn; 1314 1314 - if (__PageMovable(page)) 1315 1315 - return pfn; 1316 1316 - if (PageHuge(page)) { 1317 1317 - if (hugepage_migration_supported(page_hstate(page)) && 1318 1318 - page_huge_active(page)) 1319 1319 - return pfn; 1320 1320 - else 1321 1321 - pfn = round_up(pfn + 1, 1322 1322 - 1 << compound_order(page)) - 1; 1323 1323 - } 1324 1324 - } 1306 1306 + struct page *page, *head; 1307 1307 + unsigned long skip; 1308 1308 + 1309 1309 + if (!pfn_valid(pfn)) 1310 1310 + continue; 1311 1311 + page = pfn_to_page(pfn); 1312 1312 + if (PageLRU(page)) 1313 1313 + return pfn; 1314 1314 + if (__PageMovable(page)) 1315 1315 + return pfn; 1316 1316 + 1317 1317 + if (!PageHuge(page)) 1318 1318 + continue; 1319 1319 + head = compound_head(page); 1320 1320 + if (hugepage_migration_supported(page_hstate(head)) && 1321 1321 + page_huge_active(head)) 1322 1322 + return pfn; 1323 1323 + skip = (1 << compound_order(head)) - (page - head); 1324 1324 + pfn += skip - 1; 1325 1325 } 1326 1326 return 0; 1327 1327 } ··· 1352 1344 { 1353 1345 unsigned long pfn; 1354 1346 struct page *page; 1355 1355 - int not_managed = 0; 1356 1347 int ret = 0; 1357 1348 LIST_HEAD(source); 1358 1349 ··· 1399 1392 else 1400 1393 ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); 1401 1394 if (!ret) { /* Success */ 1402 1402 - put_page(page); 1403 1395 list_add_tail(&page->lru, &source); 1404 1396 if (!__PageMovable(page)) 1405 1397 inc_node_page_state(page, NR_ISOLATED_ANON + ··· 1407 1401 } else { 1408 1402 pr_warn("failed to isolate pfn %lx\n", pfn); 1409 1403 dump_page(page, "isolation failed"); 1410 1410 - put_page(page); 1411 1411 - /* Because we don't have big zone->lock. we should 1412 1412 - check this again here. */ 1413 1413 - if (page_count(page)) { 1414 1414 - not_managed++; 1415 1415 - ret = -EBUSY; 1416 1416 - break; 1417 1417 - } 1418 1404 } 1405 1405 + put_page(page); 1419 1406 } 1420 1407 if (!list_empty(&source)) { 1421 1421 - if (not_managed) { 1422 1422 - putback_movable_pages(&source); 1423 1423 - goto out; 1424 1424 - } 1425 1425 - 1426 1408 /* Allocate a new page from the nearest neighbor node */ 1427 1409 ret = migrate_pages(&source, new_node_page, NULL, 0, 1428 1410 MIGRATE_SYNC, MR_MEMORY_HOTPLUG); ··· 1423 1429 putback_movable_pages(&source); 1424 1430 } 1425 1431 } 1426 1426 - out: 1432 1432 + 1427 1433 return ret; 1428 1434 } 1429 1435 ··· 1570 1576 we assume this for now. .*/ 1571 1577 if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, 1572 1578 &valid_end)) { 1573 1573 - mem_hotplug_done(); 1574 1579 ret = -EINVAL; 1575 1580 reason = "multizone range"; 1576 1581 goto failed_removal; ··· 1584 1591 MIGRATE_MOVABLE, 1585 1592 SKIP_HWPOISON | REPORT_FAILURE); 1586 1593 if (ret) { 1587 1587 - mem_hotplug_done(); 1588 1594 reason = "failure to isolate range"; 1589 1595 goto failed_removal; 1590 1596 }

+5 -7

mm/migrate.c

reviewed

··· 709 709 /* Simple case, sync compaction */ 710 710 if (mode != MIGRATE_ASYNC) { 711 711 do { 712 712 - get_bh(bh); 713 712 lock_buffer(bh); 714 713 bh = bh->b_this_page; 715 714 ··· 719 720 720 721 /* async case, we cannot block on lock_buffer so use trylock_buffer */ 721 722 do { 722 722 - get_bh(bh); 723 723 if (!trylock_buffer(bh)) { 724 724 /* 725 725 * We failed to lock the buffer and cannot stall in 726 726 * async migration. Release the taken locks 727 727 */ 728 728 struct buffer_head *failed_bh = bh; 729 729 - put_bh(failed_bh); 730 729 bh = head; 731 730 while (bh != failed_bh) { 732 731 unlock_buffer(bh); 733 733 - put_bh(bh); 734 732 bh = bh->b_this_page; 735 733 } 736 734 return false; ··· 814 818 bh = head; 815 819 do { 816 820 unlock_buffer(bh); 817 817 - put_bh(bh); 818 821 bh = bh->b_this_page; 819 822 820 823 } while (bh != head); ··· 1130 1135 * If migration is successful, decrease refcount of the newpage 1131 1136 * which will not free the page because new page owner increased 1132 1137 * refcounter. As well, if it is LRU page, add the page to LRU 1133 1133 - * list in here. 1138 1138 + * list in here. Use the old state of the isolated source page to 1139 1139 + * determine if we migrated a LRU page. newpage was already unlocked 1140 1140 + * and possibly modified by its owner - don't rely on the page 1141 1141 + * state. 1134 1142 */ 1135 1143 if (rc == MIGRATEPAGE_SUCCESS) { 1136 1136 - if (unlikely(__PageMovable(newpage))) 1144 1144 + if (unlikely(!is_lru)) 1137 1145 put_page(newpage); 1138 1146 else 1139 1147 putback_lru_page(newpage);

+10 -2

mm/oom_kill.c

reviewed

··· 647 647 648 648 static void wake_oom_reaper(struct task_struct *tsk) 649 649 { 650 650 - /* tsk is already queued? */ 651 651 - if (tsk == oom_reaper_list || tsk->oom_reaper_list) 650 650 + /* mm is already queued? */ 651 651 + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) 652 652 return; 653 653 654 654 get_task_struct(tsk); ··· 975 975 * still freeing memory. 976 976 */ 977 977 read_lock(&tasklist_lock); 978 978 + 979 979 + /* 980 980 + * The task 'p' might have already exited before reaching here. The 981 981 + * put_task_struct() will free task_struct 'p' while the loop still try 982 982 + * to access the field of 'p', so, get an extra reference. 983 983 + */ 984 984 + get_task_struct(p); 978 985 for_each_thread(p, t) { 979 986 list_for_each_entry(child, &t->children, sibling) { 980 987 unsigned int child_points; ··· 1001 994 } 1002 995 } 1003 996 } 997 997 + put_task_struct(p); 1004 998 read_unlock(&tasklist_lock); 1005 999 1006 1000 /*

tools/testing/selftests/proc/.gitignore

reviewed

··· 10 10 /proc-uptime-002 11 11 /read 12 12 /self 13 13 + /setns-dcache 13 14 /thread-self

tools/testing/selftests/proc/Makefile

reviewed

··· 14 14 TEST_GEN_PROGS += proc-uptime-002 15 15 TEST_GEN_PROGS += read 16 16 TEST_GEN_PROGS += self 17 17 + TEST_GEN_PROGS += setns-dcache 17 18 TEST_GEN_PROGS += thread-self 18 19 19 20 include ../lib.mk

+129

tools/testing/selftests/proc/setns-dcache.c

reviewed

··· 1 1 + /* 2 2 + * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com> 3 3 + * 4 4 + * Permission to use, copy, modify, and distribute this software for any 5 5 + * purpose with or without fee is hereby granted, provided that the above 6 6 + * copyright notice and this permission notice appear in all copies. 7 7 + * 8 8 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 9 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 10 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 11 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 12 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 13 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 14 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 15 + */ 16 16 + /* 17 17 + * Test that setns(CLONE_NEWNET) points to new /proc/net content even 18 18 + * if old one is in dcache. 19 19 + * 20 20 + * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled. 21 21 + */ 22 22 + #undef NDEBUG 23 23 + #include <assert.h> 24 24 + #include <errno.h> 25 25 + #include <sched.h> 26 26 + #include <signal.h> 27 27 + #include <stdio.h> 28 28 + #include <stdlib.h> 29 29 + #include <string.h> 30 30 + #include <unistd.h> 31 31 + #include <sys/types.h> 32 32 + #include <sys/stat.h> 33 33 + #include <fcntl.h> 34 34 + #include <sys/socket.h> 35 35 + 36 36 + static pid_t pid = -1; 37 37 + 38 38 + static void f(void) 39 39 + { 40 40 + if (pid > 0) { 41 41 + kill(pid, SIGTERM); 42 42 + } 43 43 + } 44 44 + 45 45 + int main(void) 46 46 + { 47 47 + int fd[2]; 48 48 + char _ = 0; 49 49 + int nsfd; 50 50 + 51 51 + atexit(f); 52 52 + 53 53 + /* Check for priviledges and syscall availability straight away. */ 54 54 + if (unshare(CLONE_NEWNET) == -1) { 55 55 + if (errno == ENOSYS || errno == EPERM) { 56 56 + return 4; 57 57 + } 58 58 + return 1; 59 59 + } 60 60 + /* Distinguisher between two otherwise empty net namespaces. */ 61 61 + if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) { 62 62 + return 1; 63 63 + } 64 64 + 65 65 + if (pipe(fd) == -1) { 66 66 + return 1; 67 67 + } 68 68 + 69 69 + pid = fork(); 70 70 + if (pid == -1) { 71 71 + return 1; 72 72 + } 73 73 + 74 74 + if (pid == 0) { 75 75 + if (unshare(CLONE_NEWNET) == -1) { 76 76 + return 1; 77 77 + } 78 78 + 79 79 + if (write(fd[1], &_, 1) != 1) { 80 80 + return 1; 81 81 + } 82 82 + 83 83 + pause(); 84 84 + 85 85 + return 0; 86 86 + } 87 87 + 88 88 + if (read(fd[0], &_, 1) != 1) { 89 89 + return 1; 90 90 + } 91 91 + 92 92 + { 93 93 + char buf[64]; 94 94 + snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid); 95 95 + nsfd = open(buf, O_RDONLY); 96 96 + if (nsfd == -1) { 97 97 + return 1; 98 98 + } 99 99 + } 100 100 + 101 101 + /* Reliably pin dentry into dcache. */ 102 102 + (void)open("/proc/net/unix", O_RDONLY); 103 103 + 104 104 + if (setns(nsfd, CLONE_NEWNET) == -1) { 105 105 + return 1; 106 106 + } 107 107 + 108 108 + kill(pid, SIGTERM); 109 109 + pid = 0; 110 110 + 111 111 + { 112 112 + char buf[4096]; 113 113 + ssize_t rv; 114 114 + int fd; 115 115 + 116 116 + fd = open("/proc/net/unix", O_RDONLY); 117 117 + if (fd == -1) { 118 118 + return 1; 119 119 + } 120 120 + 121 121 + #define S "Num RefCount Protocol Flags Type St Inode Path\n" 122 122 + rv = read(fd, buf, sizeof(buf)); 123 123 + 124 124 + assert(rv == strlen(S)); 125 125 + assert(memcmp(buf, S, strlen(S)) == 0); 126 126 + } 127 127 + 128 128 + return 0; 129 129 + }