Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

rv: Convert the opid monitor to a hybrid automaton

The opid monitor validates that wakeup and need_resched events only
occur with interrupts and preemption disabled by following the
preemptirq tracepoints.
As reported in [1], those tracepoints might be inaccurate in some
situations (e.g. NMIs).

Since the monitor doesn't validate other ordering properties, remove the
dependency on preemptirq tracepoints and convert the monitor to a hybrid
automaton to validate the constraint during event handling.
This makes the monitor more robust by also removing the workaround for
interrupts missing the preemption tracepoints, which was working on
PREEMPT_RT only and allows the monitor to be built on kernels without
the preemptirqs tracepoints.

[1] - https://lore.kernel.org/lkml/20250625120823.60600-1-gmonaco@redhat.com

Reviewed-by: Nam Cao <namcao@linutronix.de>
Link: https://lore.kernel.org/r/20260330111010.153663-8-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>

+82 -230
+13 -47
Documentation/trace/rv/monitor_sched.rst
··· 346 346 347 347 The operations with preemption and irq disabled (opid) monitor ensures 348 348 operations like ``wakeup`` and ``need_resched`` occur with interrupts and 349 - preemption disabled or during interrupt context, in such case preemption may 350 - not be disabled explicitly. 349 + preemption disabled. 351 350 ``need_resched`` can be set by some RCU internals functions, in which case it 352 - doesn't match a task wakeup and might occur with only interrupts disabled:: 351 + doesn't match a task wakeup and might occur with only interrupts disabled. 352 + The interrupt and preemption status are validated by the hybrid automaton 353 + constraints when processing the events:: 353 354 354 - | sched_need_resched 355 - | sched_waking 356 - | irq_entry 357 - | +--------------------+ 358 - v v | 359 - +------------------------------------------------------+ 360 - +----------- | disabled | <+ 361 - | +------------------------------------------------------+ | 362 - | | ^ | 363 - | | preempt_disable sched_need_resched | 364 - | preempt_enable | +--------------------+ | 365 - | v | v | | 366 - | +------------------------------------------------------+ | 367 - | | irq_disabled | | 368 - | +------------------------------------------------------+ | 369 - | | | ^ | 370 - | irq_entry irq_entry | | | 371 - | sched_need_resched v | irq_disable | 372 - | sched_waking +--------------+ | | | 373 - | +----- | | irq_enable | | 374 - | | | in_irq | | | | 375 - | +----> | | | | | 376 - | +--------------+ | | irq_disable 377 - | | | | | 378 - | irq_enable | irq_enable | | | 379 - | v v | | 380 - | #======================================================# | 381 - | H enabled H | 382 - | #======================================================# | 383 - | | ^ ^ preempt_enable | | 384 - | preempt_disable preempt_enable +--------------------+ | 385 - | v | | 386 - | +------------------+ | | 387 - +----------> | preempt_disabled | -+ | 388 - +------------------+ | 389 - | | 390 - +-------------------------------------------------------+ 391 - 392 - This monitor is designed to work on ``PREEMPT_RT`` kernels, the special case of 393 - events occurring in interrupt context is a shortcut to identify valid scenarios 394 - where the preemption tracepoints might not be visible, during interrupts 395 - preemption is always disabled. On non- ``PREEMPT_RT`` kernels, the interrupts 396 - might invoke a softirq to set ``need_resched`` and wake up a task. This is 397 - another special case that is currently not supported by the monitor. 355 + | 356 + | 357 + v 358 + #=========# sched_need_resched;irq_off == 1 359 + H H sched_waking;irq_off == 1 && preempt_off == 1 360 + H any H ------------------------------------------------+ 361 + H H | 362 + H H <-----------------------------------------------+ 363 + #=========# 398 364 399 365 References 400 366 ----------
+3 -8
kernel/trace/rv/monitors/opid/Kconfig
··· 2 2 # 3 3 config RV_MON_OPID 4 4 depends on RV 5 - depends on TRACE_IRQFLAGS 6 - depends on TRACE_PREEMPT_TOGGLE 7 5 depends on RV_MON_SCHED 8 - default y if PREEMPT_RT 9 - select DA_MON_EVENTS_IMPLICIT 6 + default y 7 + select HA_MON_EVENTS_IMPLICIT 10 8 bool "opid monitor" 11 9 help 12 10 Monitor to ensure operations like wakeup and need resched occur with 13 - interrupts and preemption disabled or during IRQs, where preemption 14 - may not be disabled explicitly. 15 - 16 - This monitor is unstable on !PREEMPT_RT, say N unless you are testing it. 11 + interrupts and preemption disabled. 17 12 18 13 For further information, see: 19 14 Documentation/trace/rv/monitor_sched.rst
+34 -77
kernel/trace/rv/monitors/opid/opid.c
··· 10 10 #define MODULE_NAME "opid" 11 11 12 12 #include <trace/events/sched.h> 13 - #include <trace/events/irq.h> 14 - #include <trace/events/preemptirq.h> 15 13 #include <rv_trace.h> 16 14 #include <monitors/sched/sched.h> 17 15 18 16 #define RV_MON_TYPE RV_MON_PER_CPU 19 17 #include "opid.h" 20 - #include <rv/da_monitor.h> 18 + #include <rv/ha_monitor.h> 21 19 22 - #ifdef CONFIG_X86_LOCAL_APIC 23 - #include <asm/trace/irq_vectors.h> 24 - 25 - static void handle_vector_irq_entry(void *data, int vector) 20 + static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_opid env, u64 time_ns) 26 21 { 27 - da_handle_event(irq_entry_opid); 28 - } 29 - 30 - static void attach_vector_irq(void) 31 - { 32 - rv_attach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry); 33 - if (IS_ENABLED(CONFIG_IRQ_WORK)) 34 - rv_attach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry); 35 - if (IS_ENABLED(CONFIG_SMP)) { 36 - rv_attach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry); 37 - rv_attach_trace_probe("opid", call_function_entry, handle_vector_irq_entry); 38 - rv_attach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry); 22 + if (env == irq_off_opid) 23 + return irqs_disabled(); 24 + else if (env == preempt_off_opid) { 25 + /* 26 + * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables 27 + * preemption (adding one to the preempt_count). Since we are 28 + * interested in the preempt_count at the time the tracepoint was 29 + * hit, we consider 1 as still enabled. 30 + */ 31 + if (IS_ENABLED(CONFIG_PREEMPTION)) 32 + return (preempt_count() & PREEMPT_MASK) > 1; 33 + return true; 39 34 } 35 + return ENV_INVALID_VALUE; 40 36 } 41 37 42 - static void detach_vector_irq(void) 38 + static inline bool ha_verify_guards(struct ha_monitor *ha_mon, 39 + enum states curr_state, enum events event, 40 + enum states next_state, u64 time_ns) 43 41 { 44 - rv_detach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry); 45 - if (IS_ENABLED(CONFIG_IRQ_WORK)) 46 - rv_detach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry); 47 - if (IS_ENABLED(CONFIG_SMP)) { 48 - rv_detach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry); 49 - rv_detach_trace_probe("opid", call_function_entry, handle_vector_irq_entry); 50 - rv_detach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry); 51 - } 42 + bool res = true; 43 + 44 + if (curr_state == any_opid && event == sched_need_resched_opid) 45 + res = ha_get_env(ha_mon, irq_off_opid, time_ns) == 1ull; 46 + else if (curr_state == any_opid && event == sched_waking_opid) 47 + res = ha_get_env(ha_mon, irq_off_opid, time_ns) == 1ull && 48 + ha_get_env(ha_mon, preempt_off_opid, time_ns) == 1ull; 49 + return res; 52 50 } 53 51 54 - #else 55 - /* We assume irq_entry tracepoints are sufficient on other architectures */ 56 - static void attach_vector_irq(void) { } 57 - static void detach_vector_irq(void) { } 58 - #endif 59 - 60 - static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip) 52 + static bool ha_verify_constraint(struct ha_monitor *ha_mon, 53 + enum states curr_state, enum events event, 54 + enum states next_state, u64 time_ns) 61 55 { 62 - da_handle_event(irq_disable_opid); 63 - } 56 + if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns)) 57 + return false; 64 58 65 - static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip) 66 - { 67 - da_handle_event(irq_enable_opid); 68 - } 69 - 70 - static void handle_irq_entry(void *data, int irq, struct irqaction *action) 71 - { 72 - da_handle_event(irq_entry_opid); 73 - } 74 - 75 - static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) 76 - { 77 - da_handle_event(preempt_disable_opid); 78 - } 79 - 80 - static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) 81 - { 82 - da_handle_event(preempt_enable_opid); 59 + return true; 83 60 } 84 61 85 62 static void handle_sched_need_resched(void *data, struct task_struct *tsk, int cpu, int tif) 86 63 { 87 - /* The monitor's intitial state is not in_irq */ 88 - if (this_cpu_read(hardirq_context)) 89 - da_handle_event(sched_need_resched_opid); 90 - else 91 - da_handle_start_event(sched_need_resched_opid); 64 + da_handle_start_run_event(sched_need_resched_opid); 92 65 } 93 66 94 67 static void handle_sched_waking(void *data, struct task_struct *p) 95 68 { 96 - /* The monitor's intitial state is not in_irq */ 97 - if (this_cpu_read(hardirq_context)) 98 - da_handle_event(sched_waking_opid); 99 - else 100 - da_handle_start_event(sched_waking_opid); 69 + da_handle_start_run_event(sched_waking_opid); 101 70 } 102 71 103 72 static int enable_opid(void) ··· 77 108 if (retval) 78 109 return retval; 79 110 80 - rv_attach_trace_probe("opid", irq_disable, handle_irq_disable); 81 - rv_attach_trace_probe("opid", irq_enable, handle_irq_enable); 82 - rv_attach_trace_probe("opid", irq_handler_entry, handle_irq_entry); 83 - rv_attach_trace_probe("opid", preempt_disable, handle_preempt_disable); 84 - rv_attach_trace_probe("opid", preempt_enable, handle_preempt_enable); 85 111 rv_attach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); 86 112 rv_attach_trace_probe("opid", sched_waking, handle_sched_waking); 87 - attach_vector_irq(); 88 113 89 114 return 0; 90 115 } ··· 87 124 { 88 125 rv_this.enabled = 0; 89 126 90 - rv_detach_trace_probe("opid", irq_disable, handle_irq_disable); 91 - rv_detach_trace_probe("opid", irq_enable, handle_irq_enable); 92 - rv_detach_trace_probe("opid", irq_handler_entry, handle_irq_entry); 93 - rv_detach_trace_probe("opid", preempt_disable, handle_preempt_disable); 94 - rv_detach_trace_probe("opid", preempt_enable, handle_preempt_enable); 95 127 rv_detach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); 96 128 rv_detach_trace_probe("opid", sched_waking, handle_sched_waking); 97 - detach_vector_irq(); 98 129 99 130 da_monitor_destroy(); 100 131 }
+20 -68
kernel/trace/rv/monitors/opid/opid.h
··· 8 8 #define MONITOR_NAME opid 9 9 10 10 enum states_opid { 11 - disabled_opid, 12 - enabled_opid, 13 - in_irq_opid, 14 - irq_disabled_opid, 15 - preempt_disabled_opid, 11 + any_opid, 16 12 state_max_opid, 17 13 }; 18 14 19 15 #define INVALID_STATE state_max_opid 20 16 21 17 enum events_opid { 22 - irq_disable_opid, 23 - irq_enable_opid, 24 - irq_entry_opid, 25 - preempt_disable_opid, 26 - preempt_enable_opid, 27 18 sched_need_resched_opid, 28 19 sched_waking_opid, 29 20 event_max_opid, 30 21 }; 31 22 23 + enum envs_opid { 24 + irq_off_opid, 25 + preempt_off_opid, 26 + env_max_opid, 27 + env_max_stored_opid = irq_off_opid, 28 + }; 29 + 30 + _Static_assert(env_max_stored_opid <= MAX_HA_ENV_LEN, "Not enough slots"); 31 + 32 32 struct automaton_opid { 33 33 char *state_names[state_max_opid]; 34 34 char *event_names[event_max_opid]; 35 + char *env_names[env_max_opid]; 35 36 unsigned char function[state_max_opid][event_max_opid]; 36 37 unsigned char initial_state; 37 38 bool final_states[state_max_opid]; ··· 40 39 41 40 static const struct automaton_opid automaton_opid = { 42 41 .state_names = { 43 - "disabled", 44 - "enabled", 45 - "in_irq", 46 - "irq_disabled", 47 - "preempt_disabled", 42 + "any", 48 43 }, 49 44 .event_names = { 50 - "irq_disable", 51 - "irq_enable", 52 - "irq_entry", 53 - "preempt_disable", 54 - "preempt_enable", 55 45 "sched_need_resched", 56 46 "sched_waking", 57 47 }, 58 - .function = { 59 - { 60 - INVALID_STATE, 61 - preempt_disabled_opid, 62 - disabled_opid, 63 - INVALID_STATE, 64 - irq_disabled_opid, 65 - disabled_opid, 66 - disabled_opid, 67 - }, 68 - { 69 - irq_disabled_opid, 70 - INVALID_STATE, 71 - INVALID_STATE, 72 - preempt_disabled_opid, 73 - enabled_opid, 74 - INVALID_STATE, 75 - INVALID_STATE, 76 - }, 77 - { 78 - INVALID_STATE, 79 - enabled_opid, 80 - in_irq_opid, 81 - INVALID_STATE, 82 - INVALID_STATE, 83 - in_irq_opid, 84 - in_irq_opid, 85 - }, 86 - { 87 - INVALID_STATE, 88 - enabled_opid, 89 - in_irq_opid, 90 - disabled_opid, 91 - INVALID_STATE, 92 - irq_disabled_opid, 93 - INVALID_STATE, 94 - }, 95 - { 96 - disabled_opid, 97 - INVALID_STATE, 98 - INVALID_STATE, 99 - INVALID_STATE, 100 - enabled_opid, 101 - INVALID_STATE, 102 - INVALID_STATE, 103 - }, 48 + .env_names = { 49 + "irq_off", 50 + "preempt_off", 104 51 }, 105 - .initial_state = disabled_opid, 106 - .final_states = { 0, 1, 0, 0, 0 }, 52 + .function = { 53 + { any_opid, any_opid }, 54 + }, 55 + .initial_state = any_opid, 56 + .final_states = { 1 }, 107 57 };
+4
kernel/trace/rv/monitors/opid/opid_trace.h
··· 12 12 DEFINE_EVENT(error_da_monitor, error_opid, 13 13 TP_PROTO(char *state, char *event), 14 14 TP_ARGS(state, event)); 15 + 16 + DEFINE_EVENT(error_env_da_monitor, error_env_opid, 17 + TP_PROTO(char *state, char *event, char *env), 18 + TP_ARGS(state, event, env)); 15 19 #endif /* CONFIG_RV_MON_OPID */
+1 -1
kernel/trace/rv/rv_trace.h
··· 62 62 #include <monitors/scpd/scpd_trace.h> 63 63 #include <monitors/snep/snep_trace.h> 64 64 #include <monitors/sts/sts_trace.h> 65 - #include <monitors/opid/opid_trace.h> 66 65 // Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here 67 66 68 67 #ifdef CONFIG_HA_MON_EVENTS_IMPLICIT ··· 90 91 __get_str(env)) 91 92 ); 92 93 94 + #include <monitors/opid/opid_trace.h> 93 95 // Add new monitors based on CONFIG_HA_MON_EVENTS_IMPLICIT here 94 96 95 97 #endif
+7 -29
tools/verification/models/sched/opid.dot
··· 1 1 digraph state_automaton { 2 2 center = true; 3 3 size = "7,11"; 4 - {node [shape = plaintext, style=invis, label=""] "__init_disabled"}; 5 - {node [shape = circle] "disabled"}; 6 - {node [shape = doublecircle] "enabled"}; 7 - {node [shape = circle] "enabled"}; 8 - {node [shape = circle] "in_irq"}; 9 - {node [shape = circle] "irq_disabled"}; 10 - {node [shape = circle] "preempt_disabled"}; 11 - "__init_disabled" -> "disabled"; 12 - "disabled" [label = "disabled"]; 13 - "disabled" -> "disabled" [ label = "sched_need_resched\nsched_waking\nirq_entry" ]; 14 - "disabled" -> "irq_disabled" [ label = "preempt_enable" ]; 15 - "disabled" -> "preempt_disabled" [ label = "irq_enable" ]; 16 - "enabled" [label = "enabled", color = green3]; 17 - "enabled" -> "enabled" [ label = "preempt_enable" ]; 18 - "enabled" -> "irq_disabled" [ label = "irq_disable" ]; 19 - "enabled" -> "preempt_disabled" [ label = "preempt_disable" ]; 20 - "in_irq" [label = "in_irq"]; 21 - "in_irq" -> "enabled" [ label = "irq_enable" ]; 22 - "in_irq" -> "in_irq" [ label = "sched_need_resched\nsched_waking\nirq_entry" ]; 23 - "irq_disabled" [label = "irq_disabled"]; 24 - "irq_disabled" -> "disabled" [ label = "preempt_disable" ]; 25 - "irq_disabled" -> "enabled" [ label = "irq_enable" ]; 26 - "irq_disabled" -> "in_irq" [ label = "irq_entry" ]; 27 - "irq_disabled" -> "irq_disabled" [ label = "sched_need_resched" ]; 28 - "preempt_disabled" [label = "preempt_disabled"]; 29 - "preempt_disabled" -> "disabled" [ label = "irq_disable" ]; 30 - "preempt_disabled" -> "enabled" [ label = "preempt_enable" ]; 4 + {node [shape = plaintext, style=invis, label=""] "__init_any"}; 5 + {node [shape = doublecircle] "any"}; 6 + "__init_any" -> "any"; 7 + "any" [label = "any", color = green3]; 8 + "any" -> "any" [ label = "sched_need_resched;irq_off == 1\nsched_waking;irq_off == 1 && preempt_off == 1" ]; 31 9 { rank = min ; 32 - "__init_disabled"; 33 - "disabled"; 10 + "__init_any"; 11 + "any"; 34 12 } 35 13 }