Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

genirq: Add interrupt redirection infrastructure

Add infrastructure to redirect interrupt handler execution to a
different CPU when the current CPU is not part of the interrupt's CPU
affinity mask.

This is primarily aimed at (de)multiplexed interrupts, where the child
interrupt handler runs in the context of the parent interrupt handler,
and therefore CPU affinity control for the child interrupt is typically
not available.

With the new infrastructure, the child interrupt is allowed to freely
change its affinity setting, independently of the parent. If the
interrupt handler happens to be triggered on an "incompatible" CPU (a
CPU that's not part of the child interrupt's affinity mask), the handler
is redirected and runs in IRQ work context on a "compatible" CPU.

No functional change is being made to any existing irqchip driver, and
irqchip drivers must be explicitly modified to use the newly added
infrastructure to support interrupt redirection.

Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/linux-pci/878qpg4o4t.ffs@tglx/
Link: https://patch.msgid.link/20251128212055.1409093-2-rrendec@redhat.com

authored by

Radu Rendec and committed by
Thomas Gleixner
fcc1d0da 0317e0ab

+144 -6
+10
include/linux/irq.h
··· 459 459 * checks against the supplied affinity mask are not 460 460 * required. This is used for CPU hotplug where the 461 461 * target CPU is not yet set in the cpu_online_mask. 462 + * @irq_pre_redirect: Optional function to be invoked before redirecting 463 + * an interrupt via irq_work. Called only on CONFIG_SMP. 462 464 * @irq_retrigger: resend an IRQ to the CPU 463 465 * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ 464 466 * @irq_set_wake: enable/disable power-management wake-on of an IRQ ··· 505 503 void (*irq_eoi)(struct irq_data *data); 506 504 507 505 int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); 506 + void (*irq_pre_redirect)(struct irq_data *data); 508 507 int (*irq_retrigger)(struct irq_data *data); 509 508 int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); 510 509 int (*irq_set_wake)(struct irq_data *data, unsigned int on); ··· 690 687 extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); 691 688 extern int irq_chip_request_resources_parent(struct irq_data *data); 692 689 extern void irq_chip_release_resources_parent(struct irq_data *data); 690 + #ifdef CONFIG_SMP 691 + void irq_chip_pre_redirect_parent(struct irq_data *data); 692 + #endif 693 + #endif 694 + 695 + #ifdef CONFIG_SMP 696 + int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); 693 697 #endif 694 698 695 699 /* Disable or mask interrupts during a kernel kexec */
+16 -1
include/linux/irqdesc.h
··· 2 2 #ifndef _LINUX_IRQDESC_H 3 3 #define _LINUX_IRQDESC_H 4 4 5 - #include <linux/rcupdate.h> 5 + #include <linux/irq_work.h> 6 6 #include <linux/kobject.h> 7 7 #include <linux/mutex.h> 8 + #include <linux/rcupdate.h> 8 9 9 10 /* 10 11 * Core internal functions to deal with irq descriptors ··· 31 30 }; 32 31 33 32 /** 33 + * struct irq_redirect - interrupt redirection metadata 34 + * @work: Harg irq_work item for handler execution on a different CPU 35 + * @target_cpu: CPU to run irq handler on in case the current CPU is not part 36 + * of the irq affinity mask 37 + */ 38 + struct irq_redirect { 39 + struct irq_work work; 40 + unsigned int target_cpu; 41 + }; 42 + 43 + /** 34 44 * struct irq_desc - interrupt descriptor 35 45 * @irq_common_data: per irq and chip data passed down to chip functions 36 46 * @kstat_irqs: irq stats per cpu ··· 58 46 * @threads_handled: stats field for deferred spurious detection of threaded handlers 59 47 * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers 60 48 * @lock: locking for SMP 49 + * @redirect: Facility for redirecting interrupts via irq_work 61 50 * @affinity_hint: hint to user space for preferred irq affinity 62 51 * @affinity_notify: context for notification of affinity changes 63 52 * @pending_mask: pending rebalanced interrupts ··· 96 83 raw_spinlock_t lock; 97 84 struct cpumask *percpu_enabled; 98 85 #ifdef CONFIG_SMP 86 + struct irq_redirect redirect; 99 87 const struct cpumask *affinity_hint; 100 88 struct irq_affinity_notify *affinity_notify; 101 89 #ifdef CONFIG_GENERIC_PENDING_IRQ ··· 199 185 int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); 200 186 int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq); 201 187 int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq); 188 + bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); 202 189 #endif 203 190 204 191 /* Test to see if a driver has successfully requested an irq */
+21 -1
kernel/irq/chip.c
··· 1122 1122 } 1123 1123 #endif 1124 1124 1125 - #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY 1125 + #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY 1126 1126 1127 1127 #ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS 1128 1128 /** ··· 1193 1193 EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); 1194 1194 1195 1195 #endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ 1196 + 1197 + #ifdef CONFIG_SMP 1198 + void irq_chip_pre_redirect_parent(struct irq_data *data) 1199 + { 1200 + data = data->parent_data; 1201 + data->chip->irq_pre_redirect(data); 1202 + } 1203 + EXPORT_SYMBOL_GPL(irq_chip_pre_redirect_parent); 1204 + #endif 1196 1205 1197 1206 /** 1198 1207 * irq_chip_set_parent_state - set the state of a parent interrupt. ··· 1485 1476 data->chip->irq_release_resources(data); 1486 1477 } 1487 1478 EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); 1479 + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ 1480 + 1481 + #ifdef CONFIG_SMP 1482 + int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force) 1483 + { 1484 + struct irq_redirect *redir = &irq_data_to_desc(data)->redirect; 1485 + 1486 + WRITE_ONCE(redir->target_cpu, cpumask_first(dest)); 1487 + return IRQ_SET_MASK_OK; 1488 + } 1489 + EXPORT_SYMBOL_GPL(irq_chip_redirect_set_affinity); 1488 1490 #endif 1489 1491 1490 1492 /**
+84 -2
kernel/irq/irqdesc.c
··· 78 78 return 0; 79 79 } 80 80 81 - static void desc_smp_init(struct irq_desc *desc, int node, 82 - const struct cpumask *affinity) 81 + static void irq_redirect_work(struct irq_work *work) 82 + { 83 + handle_irq_desc(container_of(work, struct irq_desc, redirect.work)); 84 + } 85 + 86 + static void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) 83 87 { 84 88 if (!affinity) 85 89 affinity = irq_default_affinity; ··· 95 91 #ifdef CONFIG_NUMA 96 92 desc->irq_common_data.node = node; 97 93 #endif 94 + desc->redirect.work = IRQ_WORK_INIT_HARD(irq_redirect_work); 98 95 } 99 96 100 97 static void free_masks(struct irq_desc *desc) ··· 771 766 WARN_ON_ONCE(!in_nmi()); 772 767 return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); 773 768 } 769 + 770 + #ifdef CONFIG_SMP 771 + static bool demux_redirect_remote(struct irq_desc *desc) 772 + { 773 + guard(raw_spinlock)(&desc->lock); 774 + const struct cpumask *m = irq_data_get_effective_affinity_mask(&desc->irq_data); 775 + unsigned int target_cpu = READ_ONCE(desc->redirect.target_cpu); 776 + 777 + if (desc->irq_data.chip->irq_pre_redirect) 778 + desc->irq_data.chip->irq_pre_redirect(&desc->irq_data); 779 + 780 + /* 781 + * If the interrupt handler is already running on a CPU that's included 782 + * in the interrupt's affinity mask, redirection is not necessary. 783 + */ 784 + if (cpumask_test_cpu(smp_processor_id(), m)) 785 + return false; 786 + 787 + /* 788 + * The desc->action check protects against IRQ shutdown: __free_irq() sets 789 + * desc->action to NULL while holding desc->lock, which we also hold. 790 + * 791 + * Calling irq_work_queue_on() here is safe w.r.t. CPU unplugging: 792 + * - takedown_cpu() schedules multi_cpu_stop() on all active CPUs, 793 + * including the one that's taken down. 794 + * - multi_cpu_stop() acts like a barrier, which means all active 795 + * CPUs go through MULTI_STOP_DISABLE_IRQ and disable hard IRQs 796 + * *before* the dying CPU runs take_cpu_down() in MULTI_STOP_RUN. 797 + * - Hard IRQs are re-enabled at the end of multi_cpu_stop(), *after* 798 + * the dying CPU has run take_cpu_down() in MULTI_STOP_RUN. 799 + * - Since we run in hard IRQ context, we run either before or after 800 + * take_cpu_down() but never concurrently. 801 + * - If we run before take_cpu_down(), the dying CPU hasn't been marked 802 + * offline yet (it's marked via take_cpu_down() -> __cpu_disable()), 803 + * so the WARN in irq_work_queue_on() can't occur. 804 + * - Furthermore, the work item we queue will be flushed later via 805 + * take_cpu_down() -> cpuhp_invoke_callback_range_nofail() -> 806 + * smpcfd_dying_cpu() -> irq_work_run(). 807 + * - If we run after take_cpu_down(), target_cpu has been already 808 + * updated via take_cpu_down() -> __cpu_disable(), which eventually 809 + * calls irq_do_set_affinity() during IRQ migration. So, target_cpu 810 + * no longer points to the dying CPU in this case. 811 + */ 812 + if (desc->action) 813 + irq_work_queue_on(&desc->redirect.work, target_cpu); 814 + 815 + return true; 816 + } 817 + #else /* CONFIG_SMP */ 818 + static bool demux_redirect_remote(struct irq_desc *desc) 819 + { 820 + return false; 821 + } 822 + #endif 823 + 824 + /** 825 + * generic_handle_demux_domain_irq - Invoke the handler for a hardware interrupt 826 + * of a demultiplexing domain. 827 + * @domain: The domain where to perform the lookup 828 + * @hwirq: The hardware interrupt number to convert to a logical one 829 + * 830 + * Returns: True on success, or false if lookup has failed 831 + */ 832 + bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq) 833 + { 834 + struct irq_desc *desc = irq_resolve_mapping(domain, hwirq); 835 + 836 + if (unlikely(!desc)) 837 + return false; 838 + 839 + if (demux_redirect_remote(desc)) 840 + return true; 841 + 842 + return !handle_irq_desc(desc); 843 + } 844 + EXPORT_SYMBOL_GPL(generic_handle_demux_domain_irq); 845 + 774 846 #endif 775 847 776 848 /* Dynamic interrupt handling */
+13 -2
kernel/irq/manage.c
··· 35 35 early_param("threadirqs", setup_forced_irqthreads); 36 36 #endif 37 37 38 + #ifdef CONFIG_SMP 39 + static inline void synchronize_irqwork(struct irq_desc *desc) 40 + { 41 + /* Synchronize pending or on the fly redirect work */ 42 + irq_work_sync(&desc->redirect.work); 43 + } 44 + #else 45 + static inline void synchronize_irqwork(struct irq_desc *desc) { } 46 + #endif 47 + 38 48 static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state); 39 49 40 50 static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) ··· 117 107 118 108 static void __synchronize_irq(struct irq_desc *desc) 119 109 { 110 + synchronize_irqwork(desc); 120 111 __synchronize_hardirq(desc, true); 112 + 121 113 /* 122 114 * We made sure that no hardirq handler is running. Now verify that no 123 115 * threaded handlers are active. ··· 229 217 230 218 static DEFINE_PER_CPU(struct cpumask, __tmp_mask); 231 219 232 - int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, 233 - bool force) 220 + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) 234 221 { 235 222 struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); 236 223 struct irq_desc *desc = irq_data_to_desc(data);