Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Retire rcu_trace_implies_rcu_gp()

RCU Tasks Trace grace period implies RCU grace period, and this
guarantee is expected to remain in the future. Only BPF is the user of
this predicate, hence retire the API and clean up all in-tree users.

RCU Tasks Trace is now implemented on SRCU-fast and its grace period
mechanism always has at least one call to synchronize_rcu() as it is
required for SRCU-fast's correctness (it replaces the smp_mb() that
SRCU-fast readers skip). So, RCU-tt GP will always imply RCU GP.

Reviewed-by: Puranjay Mohan <puranjay@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260407162234.785270-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Kumar Kartikeya Dwivedi and committed by
Alexei Starovoitov
57b23c0f a8aa3067

+19 -60
-12
include/linux/rcupdate.h
··· 206 206 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ 207 207 208 208 /** 209 - * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? 210 - * 211 - * As an accident of implementation, an RCU Tasks Trace grace period also 212 - * acts as an RCU grace period. However, this could change at any time. 213 - * Code relying on this accident must call this function to verify that 214 - * this accident is still happening. 215 - * 216 - * You have been warned! 217 - */ 218 - static inline bool rcu_trace_implies_rcu_gp(void) { return true; } 219 - 220 - /** 221 209 * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU 222 210 * 223 211 * This macro resembles cond_resched(), except that it is defined to
+4 -6
kernel/bpf/core.c
··· 2644 2644 { 2645 2645 struct bpf_prog_array *progs; 2646 2646 2647 - /* If RCU Tasks Trace grace period implies RCU grace period, there is 2648 - * no need to call kfree_rcu(), just call kfree() directly. 2647 + /* 2648 + * RCU Tasks Trace grace period implies RCU grace period, there is no 2649 + * need to call kfree_rcu(), just call kfree() directly. 2649 2650 */ 2650 2651 progs = container_of(rcu, struct bpf_prog_array, rcu); 2651 - if (rcu_trace_implies_rcu_gp()) 2652 - kfree(progs); 2653 - else 2654 - kfree_rcu(progs, rcu); 2652 + kfree(progs); 2655 2653 } 2656 2654 2657 2655 void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
+1 -1
kernel/bpf/helpers.c
··· 1272 1272 return; 1273 1273 } 1274 1274 1275 - /* rcu_trace_implies_rcu_gp() is true and will remain so */ 1275 + /* RCU Tasks Trace grace period implies RCU grace period. */ 1276 1276 bpf_async_cb_rcu_free(rcu); 1277 1277 } 1278 1278
+10 -23
kernel/bpf/memalloc.c
··· 284 284 atomic_set(&c->call_rcu_ttrace_in_progress, 0); 285 285 } 286 286 287 - static void __free_rcu_tasks_trace(struct rcu_head *head) 288 - { 289 - /* If RCU Tasks Trace grace period implies RCU grace period, 290 - * there is no need to invoke call_rcu(). 291 - */ 292 - if (rcu_trace_implies_rcu_gp()) 293 - __free_rcu(head); 294 - else 295 - call_rcu(head, __free_rcu); 296 - } 297 - 298 287 static void enque_to_free(struct bpf_mem_cache *c, void *obj) 299 288 { 300 289 struct llist_node *llnode = obj; ··· 315 326 return; 316 327 } 317 328 318 - /* Use call_rcu_tasks_trace() to wait for sleepable progs to finish. 319 - * If RCU Tasks Trace grace period implies RCU grace period, free 320 - * these elements directly, else use call_rcu() to wait for normal 321 - * progs to finish and finally do free_one() on each element. 329 + /* 330 + * Use call_rcu_tasks_trace() to wait for sleepable progs to finish. 331 + * RCU Tasks Trace grace period implies RCU grace period, so pass 332 + * __free_rcu directly as the callback. 322 333 */ 323 - call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu_tasks_trace); 334 + call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu); 324 335 } 325 336 326 337 static void free_bulk(struct bpf_mem_cache *c) ··· 685 696 686 697 static void free_mem_alloc(struct bpf_mem_alloc *ma) 687 698 { 688 - /* waiting_for_gp[_ttrace] lists were drained, but RCU callbacks 699 + /* 700 + * waiting_for_gp[_ttrace] lists were drained, but RCU callbacks 689 701 * might still execute. Wait for them. 690 702 * 691 703 * rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(), 692 704 * but rcu_barrier_tasks_trace() and rcu_barrier() below are only used 693 - * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(), 694 - * so if call_rcu(head, __free_rcu) is skipped due to 695 - * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by 696 - * using rcu_trace_implies_rcu_gp() as well. 705 + * to wait for the pending __free_by_rcu(), and __free_rcu(). RCU Tasks 706 + * Trace grace period implies RCU grace period, so all __free_rcu don't 707 + * need extra call_rcu() (and thus extra rcu_barrier() here). 697 708 */ 698 709 rcu_barrier(); /* wait for __free_by_rcu */ 699 710 rcu_barrier_tasks_trace(); /* wait for __free_rcu */ 700 - if (!rcu_trace_implies_rcu_gp()) 701 - rcu_barrier(); 702 711 free_mem_alloc_no_barrier(ma); 703 712 } 704 713
+4 -18
kernel/bpf/syscall.c
··· 941 941 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 942 942 } 943 943 944 - static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 945 - { 946 - if (rcu_trace_implies_rcu_gp()) 947 - bpf_map_free_rcu_gp(rcu); 948 - else 949 - call_rcu(rcu, bpf_map_free_rcu_gp); 950 - } 951 - 952 944 /* decrement map refcnt and schedule it for freeing via workqueue 953 945 * (underlying map implementation ops->map_free() might sleep) 954 946 */ ··· 951 959 bpf_map_free_id(map); 952 960 953 961 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 962 + /* RCU tasks trace grace period implies RCU grace period. */ 954 963 if (READ_ONCE(map->free_after_mult_rcu_gp)) 955 - call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 964 + call_rcu_tasks_trace(&map->rcu, bpf_map_free_rcu_gp); 956 965 else if (READ_ONCE(map->free_after_rcu_gp)) 957 966 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 958 967 else ··· 3266 3273 (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP); 3267 3274 } 3268 3275 3269 - static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3270 - { 3271 - if (rcu_trace_implies_rcu_gp()) 3272 - bpf_link_defer_dealloc_rcu_gp(rcu); 3273 - else 3274 - call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3275 - } 3276 - 3277 3276 /* bpf_link_free is guaranteed to be called from process context */ 3278 3277 static void bpf_link_free(struct bpf_link *link) 3279 3278 { ··· 3291 3306 * faultable case, since it exclusively uses RCU Tasks Trace. 3292 3307 */ 3293 3308 if (link->sleepable || (link->prog && link->prog->sleepable)) 3294 - call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3309 + /* RCU Tasks Trace grace period implies RCU grace period. */ 3310 + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3295 3311 /* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */ 3296 3312 else if (bpf_link_is_tracepoint(link)) 3297 3313 call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);