Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'irq-core-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner:

- Prevent a interrupt migration related live lock in handle_edge_irq()

If the interrupt affinity is moved to a new target CPU and the
interrupt is currently handled on the previous target CPU for edge
type interrupts the handler might get stuck on the previous target
for a long time, which causes both involved CPUs to waste cycles and
eventually run into a soft-lockup situation.

Solve this by checking whether the interrupt is redirected to a new
target CPU and if the interrupt is handled on that new target CPU,
busy wait for completion instead of masking it and sending the
pending but which would cause the old CPU to re-run the handler and
in the worst case repeating this excercise for a long time.

This only works on architectures which use single CPU interrupt
targets, but that's so far the only ones where this behaviour has
been observed.

- Add a kunit test for interrupt disable depth counts

The nested interrupt disable depth has been an issue in the past
especially vs. free_irq(), interrupt shutdown and CPU hotplug and
their interactions. The test exercises the combinations of these
scenarios and checks for correctness.

* tag 'irq-core-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
genirq: Prevent migration live lock in handle_edge_irq()
genirq: Split up irq_pm_check_wakeup()
genirq: Move irq_wait_for_poll() to call site
genirq: Remove pointless local variable
genirq: Add kunit tests for depth counts

+312 -58
+11
kernel/irq/Kconfig
··· 144 144 config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD 145 145 bool 146 146 147 + config IRQ_KUNIT_TEST 148 + bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS 149 + depends on KUNIT=y 150 + default KUNIT_ALL_TESTS 151 + imply SMP 152 + help 153 + This option enables KUnit tests for the IRQ subsystem API. These are 154 + only for development and testing, not for regular kernel use cases. 155 + 156 + If unsure, say N. 157 + 147 158 endmenu 148 159 149 160 config GENERIC_IRQ_MULTI_HANDLER
+1
kernel/irq/Makefile
··· 19 19 obj-$(CONFIG_SMP) += affinity.o 20 20 obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o 21 21 obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o 22 + obj-$(CONFIG_IRQ_KUNIT_TEST) += irq_test.o
+61 -9
kernel/irq/chip.c
··· 457 457 unmask_irq(desc); 458 458 } 459 459 460 - static bool irq_check_poll(struct irq_desc *desc) 460 + /* Busy wait until INPROGRESS is cleared */ 461 + static bool irq_wait_on_inprogress(struct irq_desc *desc) 461 462 { 462 - if (!(desc->istate & IRQS_POLL_INPROGRESS)) 463 - return false; 464 - return irq_wait_for_poll(desc); 463 + if (IS_ENABLED(CONFIG_SMP)) { 464 + do { 465 + raw_spin_unlock(&desc->lock); 466 + while (irqd_irq_inprogress(&desc->irq_data)) 467 + cpu_relax(); 468 + raw_spin_lock(&desc->lock); 469 + } while (irqd_irq_inprogress(&desc->irq_data)); 470 + 471 + /* Might have been disabled in meantime */ 472 + return !irqd_irq_disabled(&desc->irq_data) && desc->action; 473 + } 474 + return false; 465 475 } 466 476 467 477 static bool irq_can_handle_pm(struct irq_desc *desc) 468 478 { 469 - unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED; 479 + struct irq_data *irqd = &desc->irq_data; 480 + const struct cpumask *aff; 470 481 471 482 /* 472 483 * If the interrupt is not in progress and is not an armed 473 484 * wakeup interrupt, proceed. 474 485 */ 475 - if (!irqd_has_set(&desc->irq_data, mask)) 486 + if (!irqd_has_set(irqd, IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED)) 476 487 return true; 477 488 478 489 /* ··· 491 480 * and suspended, disable it and notify the pm core about the 492 481 * event. 493 482 */ 494 - if (irq_pm_check_wakeup(desc)) 483 + if (unlikely(irqd_has_set(irqd, IRQD_WAKEUP_ARMED))) { 484 + irq_pm_handle_wakeup(desc); 485 + return false; 486 + } 487 + 488 + /* Check whether the interrupt is polled on another CPU */ 489 + if (unlikely(desc->istate & IRQS_POLL_INPROGRESS)) { 490 + if (WARN_ONCE(irq_poll_cpu == smp_processor_id(), 491 + "irq poll in progress on cpu %d for irq %d\n", 492 + smp_processor_id(), desc->irq_data.irq)) 493 + return false; 494 + return irq_wait_on_inprogress(desc); 495 + } 496 + 497 + /* The below works only for single target interrupts */ 498 + if (!IS_ENABLED(CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK) || 499 + !irqd_is_single_target(irqd) || desc->handle_irq != handle_edge_irq) 495 500 return false; 496 501 497 502 /* 498 - * Handle a potential concurrent poll on a different core. 503 + * If the interrupt affinity was moved to this CPU and the 504 + * interrupt is currently handled on the previous target CPU, then 505 + * busy wait for INPROGRESS to be cleared. Otherwise for edge type 506 + * interrupts the handler might get stuck on the previous target: 507 + * 508 + * CPU 0 CPU 1 (new target) 509 + * handle_edge_irq() 510 + * repeat: 511 + * handle_event() handle_edge_irq() 512 + * if (INPROGESS) { 513 + * set(PENDING); 514 + * mask(); 515 + * return; 516 + * } 517 + * if (PENDING) { 518 + * clear(PENDING); 519 + * unmask(); 520 + * goto repeat; 521 + * } 522 + * 523 + * This happens when the device raises interrupts with a high rate 524 + * and always before handle_event() completes and the CPU0 handler 525 + * can clear INPROGRESS. This has been observed in virtual machines. 499 526 */ 500 - return irq_check_poll(desc); 527 + aff = irq_data_get_effective_affinity_mask(irqd); 528 + if (cpumask_first(aff) != smp_processor_id()) 529 + return false; 530 + return irq_wait_on_inprogress(desc); 501 531 } 502 532 503 533 static inline bool irq_can_handle_actions(struct irq_desc *desc)
+3 -3
kernel/irq/internals.h
··· 20 20 #define istate core_internal_state__do_not_mess_with_it 21 21 22 22 extern bool noirqdebug; 23 + extern int irq_poll_cpu; 23 24 24 25 extern struct irqaction chained_action; 25 26 ··· 113 112 int check_irq_resend(struct irq_desc *desc, bool inject); 114 113 void clear_irq_resend(struct irq_desc *desc); 115 114 void irq_resend_init(struct irq_desc *desc); 116 - bool irq_wait_for_poll(struct irq_desc *desc); 117 115 void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); 118 116 119 117 void wake_threads_waitq(struct irq_desc *desc); ··· 277 277 } 278 278 279 279 #ifdef CONFIG_PM_SLEEP 280 - bool irq_pm_check_wakeup(struct irq_desc *desc); 280 + void irq_pm_handle_wakeup(struct irq_desc *desc); 281 281 void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); 282 282 void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); 283 283 #else 284 - static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; } 284 + static inline void irq_pm_handle_wakeup(struct irq_desc *desc) { } 285 285 static inline void 286 286 irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } 287 287 static inline void
+229
kernel/irq/irq_test.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1+ 2 + 3 + #include <linux/cpu.h> 4 + #include <linux/cpumask.h> 5 + #include <linux/interrupt.h> 6 + #include <linux/irq.h> 7 + #include <linux/irqdesc.h> 8 + #include <linux/irqdomain.h> 9 + #include <linux/nodemask.h> 10 + #include <kunit/test.h> 11 + 12 + #include "internals.h" 13 + 14 + static irqreturn_t noop_handler(int irq, void *data) 15 + { 16 + return IRQ_HANDLED; 17 + } 18 + 19 + static void noop(struct irq_data *data) { } 20 + static unsigned int noop_ret(struct irq_data *data) { return 0; } 21 + 22 + static int noop_affinity(struct irq_data *data, const struct cpumask *dest, 23 + bool force) 24 + { 25 + irq_data_update_effective_affinity(data, dest); 26 + 27 + return 0; 28 + } 29 + 30 + static struct irq_chip fake_irq_chip = { 31 + .name = "fake", 32 + .irq_startup = noop_ret, 33 + .irq_shutdown = noop, 34 + .irq_enable = noop, 35 + .irq_disable = noop, 36 + .irq_ack = noop, 37 + .irq_mask = noop, 38 + .irq_unmask = noop, 39 + .irq_set_affinity = noop_affinity, 40 + .flags = IRQCHIP_SKIP_SET_WAKE, 41 + }; 42 + 43 + static void irq_disable_depth_test(struct kunit *test) 44 + { 45 + struct irq_desc *desc; 46 + int virq, ret; 47 + 48 + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL); 49 + KUNIT_ASSERT_GE(test, virq, 0); 50 + 51 + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); 52 + 53 + desc = irq_to_desc(virq); 54 + KUNIT_ASSERT_PTR_NE(test, desc, NULL); 55 + 56 + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); 57 + KUNIT_EXPECT_EQ(test, ret, 0); 58 + 59 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 60 + 61 + disable_irq(virq); 62 + KUNIT_EXPECT_EQ(test, desc->depth, 1); 63 + 64 + enable_irq(virq); 65 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 66 + 67 + free_irq(virq, NULL); 68 + } 69 + 70 + static void irq_free_disabled_test(struct kunit *test) 71 + { 72 + struct irq_desc *desc; 73 + int virq, ret; 74 + 75 + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL); 76 + KUNIT_ASSERT_GE(test, virq, 0); 77 + 78 + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); 79 + 80 + desc = irq_to_desc(virq); 81 + KUNIT_ASSERT_PTR_NE(test, desc, NULL); 82 + 83 + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); 84 + KUNIT_EXPECT_EQ(test, ret, 0); 85 + 86 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 87 + 88 + disable_irq(virq); 89 + KUNIT_EXPECT_EQ(test, desc->depth, 1); 90 + 91 + free_irq(virq, NULL); 92 + KUNIT_EXPECT_GE(test, desc->depth, 1); 93 + 94 + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); 95 + KUNIT_EXPECT_EQ(test, ret, 0); 96 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 97 + 98 + free_irq(virq, NULL); 99 + } 100 + 101 + static void irq_shutdown_depth_test(struct kunit *test) 102 + { 103 + struct irq_desc *desc; 104 + struct irq_data *data; 105 + int virq, ret; 106 + struct irq_affinity_desc affinity = { 107 + .is_managed = 1, 108 + .mask = CPU_MASK_ALL, 109 + }; 110 + 111 + if (!IS_ENABLED(CONFIG_SMP)) 112 + kunit_skip(test, "requires CONFIG_SMP for managed shutdown"); 113 + 114 + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity); 115 + KUNIT_ASSERT_GE(test, virq, 0); 116 + 117 + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); 118 + 119 + desc = irq_to_desc(virq); 120 + KUNIT_ASSERT_PTR_NE(test, desc, NULL); 121 + 122 + data = irq_desc_get_irq_data(desc); 123 + KUNIT_ASSERT_PTR_NE(test, data, NULL); 124 + 125 + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); 126 + KUNIT_EXPECT_EQ(test, ret, 0); 127 + 128 + KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); 129 + KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); 130 + KUNIT_EXPECT_TRUE(test, irqd_affinity_is_managed(data)); 131 + 132 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 133 + 134 + disable_irq(virq); 135 + KUNIT_EXPECT_EQ(test, desc->depth, 1); 136 + 137 + irq_shutdown_and_deactivate(desc); 138 + 139 + KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); 140 + KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); 141 + 142 + KUNIT_EXPECT_EQ(test, irq_activate(desc), 0); 143 + #ifdef CONFIG_SMP 144 + irq_startup_managed(desc); 145 + #endif 146 + 147 + KUNIT_EXPECT_EQ(test, desc->depth, 1); 148 + 149 + enable_irq(virq); 150 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 151 + 152 + free_irq(virq, NULL); 153 + } 154 + 155 + static void irq_cpuhotplug_test(struct kunit *test) 156 + { 157 + struct irq_desc *desc; 158 + struct irq_data *data; 159 + int virq, ret; 160 + struct irq_affinity_desc affinity = { 161 + .is_managed = 1, 162 + }; 163 + 164 + if (!IS_ENABLED(CONFIG_SMP)) 165 + kunit_skip(test, "requires CONFIG_SMP for CPU hotplug"); 166 + if (!get_cpu_device(1)) 167 + kunit_skip(test, "requires more than 1 CPU for CPU hotplug"); 168 + if (!cpu_is_hotpluggable(1)) 169 + kunit_skip(test, "CPU 1 must be hotpluggable"); 170 + 171 + cpumask_copy(&affinity.mask, cpumask_of(1)); 172 + 173 + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity); 174 + KUNIT_ASSERT_GE(test, virq, 0); 175 + 176 + irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq); 177 + 178 + desc = irq_to_desc(virq); 179 + KUNIT_ASSERT_PTR_NE(test, desc, NULL); 180 + 181 + data = irq_desc_get_irq_data(desc); 182 + KUNIT_ASSERT_PTR_NE(test, data, NULL); 183 + 184 + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); 185 + KUNIT_EXPECT_EQ(test, ret, 0); 186 + 187 + KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); 188 + KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); 189 + KUNIT_EXPECT_TRUE(test, irqd_affinity_is_managed(data)); 190 + 191 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 192 + 193 + disable_irq(virq); 194 + KUNIT_EXPECT_EQ(test, desc->depth, 1); 195 + 196 + KUNIT_EXPECT_EQ(test, remove_cpu(1), 0); 197 + KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); 198 + KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); 199 + KUNIT_EXPECT_GE(test, desc->depth, 1); 200 + KUNIT_EXPECT_EQ(test, add_cpu(1), 0); 201 + 202 + KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); 203 + KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); 204 + KUNIT_EXPECT_EQ(test, desc->depth, 1); 205 + 206 + enable_irq(virq); 207 + KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); 208 + KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); 209 + KUNIT_EXPECT_EQ(test, desc->depth, 0); 210 + 211 + free_irq(virq, NULL); 212 + } 213 + 214 + static struct kunit_case irq_test_cases[] = { 215 + KUNIT_CASE(irq_disable_depth_test), 216 + KUNIT_CASE(irq_free_disabled_test), 217 + KUNIT_CASE(irq_shutdown_depth_test), 218 + KUNIT_CASE(irq_cpuhotplug_test), 219 + {} 220 + }; 221 + 222 + static struct kunit_suite irq_test_suite = { 223 + .name = "irq_test_cases", 224 + .test_cases = irq_test_cases, 225 + }; 226 + 227 + kunit_test_suite(irq_test_suite); 228 + MODULE_DESCRIPTION("IRQ unit test suite"); 229 + MODULE_LICENSE("GPL");
+6 -10
kernel/irq/pm.c
··· 13 13 14 14 #include "internals.h" 15 15 16 - bool irq_pm_check_wakeup(struct irq_desc *desc) 16 + void irq_pm_handle_wakeup(struct irq_desc *desc) 17 17 { 18 - if (irqd_is_wakeup_armed(&desc->irq_data)) { 19 - irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); 20 - desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; 21 - desc->depth++; 22 - irq_disable(desc); 23 - pm_system_irq_wakeup(irq_desc_get_irq(desc)); 24 - return true; 25 - } 26 - return false; 18 + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); 19 + desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; 20 + desc->depth++; 21 + irq_disable(desc); 22 + pm_system_irq_wakeup(irq_desc_get_irq(desc)); 27 23 } 28 24 29 25 /*
+1 -36
kernel/irq/spurious.c
··· 19 19 #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) 20 20 static void poll_spurious_irqs(struct timer_list *unused); 21 21 static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); 22 - static int irq_poll_cpu; 22 + int irq_poll_cpu; 23 23 static atomic_t irq_poll_active; 24 - 25 - /* 26 - * We wait here for a poller to finish. 27 - * 28 - * If the poll runs on this CPU, then we yell loudly and return 29 - * false. That will leave the interrupt line disabled in the worst 30 - * case, but it should never happen. 31 - * 32 - * We wait until the poller is done and then recheck disabled and 33 - * action (about to be disabled). Only if it's still active, we return 34 - * true and let the handler run. 35 - */ 36 - bool irq_wait_for_poll(struct irq_desc *desc) 37 - { 38 - lockdep_assert_held(&desc->lock); 39 - 40 - if (WARN_ONCE(irq_poll_cpu == smp_processor_id(), 41 - "irq poll in progress on cpu %d for irq %d\n", 42 - smp_processor_id(), desc->irq_data.irq)) 43 - return false; 44 - 45 - #ifdef CONFIG_SMP 46 - do { 47 - raw_spin_unlock(&desc->lock); 48 - while (irqd_irq_inprogress(&desc->irq_data)) 49 - cpu_relax(); 50 - raw_spin_lock(&desc->lock); 51 - } while (irqd_irq_inprogress(&desc->irq_data)); 52 - /* Might have been disabled in meantime */ 53 - return !irqd_irq_disabled(&desc->irq_data) && desc->action; 54 - #else 55 - return false; 56 - #endif 57 - } 58 - 59 24 60 25 /* 61 26 * Recovery handler for misrouted interrupts.