Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched: Make migrate_{en,dis}able() inline

For now, migrate_enable and migrate_disable are global, which makes them
become hotspots in some case. Take BPF for example, the function calling
to migrate_enable and migrate_disable in BPF trampoline can introduce
significant overhead, and following is the 'perf top' of FENTRY's
benchmark (./tools/testing/selftests/bpf/bench trig-fentry):

54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k]
bpf_prog_2dcccf652aac1793_bench_trigger_fentry
10.43% [kernel] [k] migrate_enable
10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037
8.06% [kernel] [k] __bpf_prog_exit_recur
4.11% libc.so.6 [.] syscall
2.15% [kernel] [k] entry_SYSCALL_64
1.48% [kernel] [k] memchr_inv
1.32% [kernel] [k] fput
1.16% [kernel] [k] _copy_to_user
0.73% [kernel] [k] bpf_prog_test_run_raw_tp

So in this commit, we make migrate_enable/migrate_disable inline to obtain
better performance. The struct rq is defined internally in
kernel/sched/sched.h, and the field "nr_pinned" is accessed in
migrate_enable/migrate_disable, which makes it hard to make them inline.

Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1],
so we can define the migrate_enable/migrate_disable in
include/linux/sched.h and access "this_rq()->nr_pinned" with
"(void *)this_rq() + RQ_nr_pinned".

The offset of "nr_pinned" is generated in include/generated/rq-offsets.h
by kernel/sched/rq-offsets.c.

Generally speaking, we move the definition of migrate_enable and
migrate_disable to include/linux/sched.h from kernel/sched/core.c. The
calling to __set_cpus_allowed_ptr() is leaved in ___migrate_enable().

The "struct rq" is not available in include/linux/sched.h, so we can't
access the "runqueues" with this_cpu_ptr(), as the compilation will fail
in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
typeof((ptr) + 0)

So we introduce the this_rq_raw() and access the runqueues with
arch_raw_cpu_ptr/PERCPU_PTR directly.

The variable "runqueues" is not visible in the kernel modules, and export
it is not a good idea. As Peter Zijlstra advised in [2], we define and
export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
them for the modules.

Before this patch, the performance of BPF FENTRY is:

fentry : 113.030 ± 0.149M/s
fentry : 112.501 ± 0.187M/s
fentry : 112.828 ± 0.267M/s
fentry : 115.287 ± 0.241M/s

After this patch, the performance of BPF FENTRY increases to:

fentry : 143.644 ± 0.670M/s
fentry : 149.764 ± 0.362M/s
fentry : 149.642 ± 0.156M/s
fentry : 145.263 ± 0.221M/s

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1]
Link: https://lore.kernel.org/all/20250819123214.GH4067720@noisy.programming.kicks-ass.net/ [2]

authored by

Menglong Dong and committed by
Peter Zijlstra
378b7708 88a90315

+152 -53
+12 -1
Kbuild
··· 34 34 $(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE 35 35 $(call filechk,offsets,__ASM_OFFSETS_H__) 36 36 37 + # Generate rq-offsets.h 38 + 39 + rq-offsets-file := include/generated/rq-offsets.h 40 + 41 + targets += kernel/sched/rq-offsets.s 42 + 43 + kernel/sched/rq-offsets.s: $(offsets-file) 44 + 45 + $(rq-offsets-file): kernel/sched/rq-offsets.s FORCE 46 + $(call filechk,offsets,__RQ_OFFSETS_H__) 47 + 37 48 # Check for missing system calls 38 49 39 50 quiet_cmd_syscalls = CALL $< 40 51 cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags) 41 52 42 53 PHONY += missing-syscalls 43 - missing-syscalls: scripts/checksyscalls.sh $(offsets-file) 54 + missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file) 44 55 $(call cmd,syscalls) 45 56 46 57 # Check the manual modification of atomic headers
-3
include/linux/preempt.h
··· 424 424 * work-conserving schedulers. 425 425 * 426 426 */ 427 - extern void migrate_disable(void); 428 - extern void migrate_enable(void); 429 427 430 428 /** 431 429 * preempt_disable_nested - Disable preemption inside a normally preempt disabled section ··· 469 471 470 472 DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) 471 473 DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) 472 - DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) 473 474 474 475 #ifdef CONFIG_PREEMPT_DYNAMIC 475 476
+113
include/linux/sched.h
··· 49 49 #include <linux/tracepoint-defs.h> 50 50 #include <linux/unwind_deferred_types.h> 51 51 #include <asm/kmap_size.h> 52 + #ifndef COMPILE_OFFSETS 53 + #include <generated/rq-offsets.h> 54 + #endif 52 55 53 56 /* task_struct member predeclarations (sorted alphabetically): */ 54 57 struct audit_context; ··· 2319 2316 #define alloc_tag_save(_tag) NULL 2320 2317 #define alloc_tag_restore(_tag, _old) do {} while (0) 2321 2318 #endif 2319 + 2320 + #ifndef MODULE 2321 + #ifndef COMPILE_OFFSETS 2322 + 2323 + extern void ___migrate_enable(void); 2324 + 2325 + struct rq; 2326 + DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 2327 + 2328 + /* 2329 + * The "struct rq" is not available here, so we can't access the 2330 + * "runqueues" with this_cpu_ptr(), as the compilation will fail in 2331 + * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): 2332 + * typeof((ptr) + 0) 2333 + * 2334 + * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here. 2335 + */ 2336 + #ifdef CONFIG_SMP 2337 + #define this_rq_raw() arch_raw_cpu_ptr(&runqueues) 2338 + #else 2339 + #define this_rq_raw() PERCPU_PTR(&runqueues) 2340 + #endif 2341 + #define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned)) 2342 + 2343 + static inline void __migrate_enable(void) 2344 + { 2345 + struct task_struct *p = current; 2346 + 2347 + #ifdef CONFIG_DEBUG_PREEMPT 2348 + /* 2349 + * Check both overflow from migrate_disable() and superfluous 2350 + * migrate_enable(). 2351 + */ 2352 + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) 2353 + return; 2354 + #endif 2355 + 2356 + if (p->migration_disabled > 1) { 2357 + p->migration_disabled--; 2358 + return; 2359 + } 2360 + 2361 + /* 2362 + * Ensure stop_task runs either before or after this, and that 2363 + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). 2364 + */ 2365 + guard(preempt)(); 2366 + if (unlikely(p->cpus_ptr != &p->cpus_mask)) 2367 + ___migrate_enable(); 2368 + /* 2369 + * Mustn't clear migration_disabled() until cpus_ptr points back at the 2370 + * regular cpus_mask, otherwise things that race (eg. 2371 + * select_fallback_rq) get confused. 2372 + */ 2373 + barrier(); 2374 + p->migration_disabled = 0; 2375 + this_rq_pinned()--; 2376 + } 2377 + 2378 + static inline void __migrate_disable(void) 2379 + { 2380 + struct task_struct *p = current; 2381 + 2382 + if (p->migration_disabled) { 2383 + #ifdef CONFIG_DEBUG_PREEMPT 2384 + /* 2385 + *Warn about overflow half-way through the range. 2386 + */ 2387 + WARN_ON_ONCE((s16)p->migration_disabled < 0); 2388 + #endif 2389 + p->migration_disabled++; 2390 + return; 2391 + } 2392 + 2393 + guard(preempt)(); 2394 + this_rq_pinned()++; 2395 + p->migration_disabled = 1; 2396 + } 2397 + #else /* !COMPILE_OFFSETS */ 2398 + static inline void __migrate_disable(void) { } 2399 + static inline void __migrate_enable(void) { } 2400 + #endif /* !COMPILE_OFFSETS */ 2401 + 2402 + /* 2403 + * So that it is possible to not export the runqueues variable, define and 2404 + * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use 2405 + * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will 2406 + * be defined in kernel/sched/core.c. 2407 + */ 2408 + #ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE 2409 + static inline void migrate_disable(void) 2410 + { 2411 + __migrate_disable(); 2412 + } 2413 + 2414 + static inline void migrate_enable(void) 2415 + { 2416 + __migrate_enable(); 2417 + } 2418 + #else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ 2419 + extern void migrate_disable(void); 2420 + extern void migrate_enable(void); 2421 + #endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ 2422 + 2423 + #else /* MODULE */ 2424 + extern void migrate_disable(void); 2425 + extern void migrate_enable(void); 2426 + #endif /* MODULE */ 2427 + 2428 + DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) 2322 2429 2323 2430 #endif
+1
kernel/bpf/verifier.c
··· 23855 23855 BTF_SET_START(btf_id_deny) 23856 23856 BTF_ID_UNUSED 23857 23857 #ifdef CONFIG_SMP 23858 + BTF_ID(func, ___migrate_enable) 23858 23859 BTF_ID(func, migrate_disable) 23859 23860 BTF_ID(func, migrate_enable) 23860 23861 #endif
+14 -49
kernel/sched/core.c
··· 7 7 * Copyright (C) 1991-2002 Linus Torvalds 8 8 * Copyright (C) 1998-2024 Ingo Molnar, Red Hat 9 9 */ 10 + #define INSTANTIATE_EXPORTED_MIGRATE_DISABLE 11 + #include <linux/sched.h> 10 12 #include <linux/highmem.h> 11 13 #include <linux/hrtimer_api.h> 12 14 #include <linux/ktime_api.h> ··· 2383 2381 __do_set_cpus_allowed(p, &ac); 2384 2382 } 2385 2383 2386 - void migrate_disable(void) 2387 - { 2388 - struct task_struct *p = current; 2389 - 2390 - if (p->migration_disabled) { 2391 - #ifdef CONFIG_DEBUG_PREEMPT 2392 - /* 2393 - *Warn about overflow half-way through the range. 2394 - */ 2395 - WARN_ON_ONCE((s16)p->migration_disabled < 0); 2396 - #endif 2397 - p->migration_disabled++; 2398 - return; 2399 - } 2400 - 2401 - guard(preempt)(); 2402 - this_rq()->nr_pinned++; 2403 - p->migration_disabled = 1; 2404 - } 2405 - EXPORT_SYMBOL_GPL(migrate_disable); 2406 - 2407 - void migrate_enable(void) 2384 + void ___migrate_enable(void) 2408 2385 { 2409 2386 struct task_struct *p = current; 2410 2387 struct affinity_context ac = { ··· 2391 2410 .flags = SCA_MIGRATE_ENABLE, 2392 2411 }; 2393 2412 2394 - #ifdef CONFIG_DEBUG_PREEMPT 2395 - /* 2396 - * Check both overflow from migrate_disable() and superfluous 2397 - * migrate_enable(). 2398 - */ 2399 - if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) 2400 - return; 2401 - #endif 2413 + __set_cpus_allowed_ptr(p, &ac); 2414 + } 2415 + EXPORT_SYMBOL_GPL(___migrate_enable); 2402 2416 2403 - if (p->migration_disabled > 1) { 2404 - p->migration_disabled--; 2405 - return; 2406 - } 2417 + void migrate_disable(void) 2418 + { 2419 + __migrate_disable(); 2420 + } 2421 + EXPORT_SYMBOL_GPL(migrate_disable); 2407 2422 2408 - /* 2409 - * Ensure stop_task runs either before or after this, and that 2410 - * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). 2411 - */ 2412 - guard(preempt)(); 2413 - if (p->cpus_ptr != &p->cpus_mask) 2414 - __set_cpus_allowed_ptr(p, &ac); 2415 - /* 2416 - * Mustn't clear migration_disabled() until cpus_ptr points back at the 2417 - * regular cpus_mask, otherwise things that race (eg. 2418 - * select_fallback_rq) get confused. 2419 - */ 2420 - barrier(); 2421 - p->migration_disabled = 0; 2422 - this_rq()->nr_pinned--; 2423 + void migrate_enable(void) 2424 + { 2425 + __migrate_enable(); 2423 2426 } 2424 2427 EXPORT_SYMBOL_GPL(migrate_enable); 2425 2428
+12
kernel/sched/rq-offsets.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define COMPILE_OFFSETS 3 + #include <linux/kbuild.h> 4 + #include <linux/types.h> 5 + #include "sched.h" 6 + 7 + int main(void) 8 + { 9 + DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned)); 10 + 11 + return 0; 12 + }