Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'irq-msi-2026-02-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull MSI updates from Thomas Gleixner:
"Updates for the [PCI] MSI subsystem:

- Add interrupt redirection infrastructure

Some PCI controllers use a single demultiplexing interrupt for the
MSI interrupts of subordinate devices.

This prevents setting the interrupt affinity of device interrupts,
which causes device interrupts to be delivered to a single CPU.
That obviously is counterproductive for multi-queue devices and
interrupt balancing.

To work around this limitation the new infrastructure installs a
dummy irq_set_affinity() callback which captures the affinity mask
and picks a redirection target CPU out of the mask.

When the PCI controller demultiplexes the interrupts it invokes a
new handling function in the core, which either runs the interrupt
handler in the context of the target CPU or delegates it to
irq_work on the target CPU.

- Utilize the interrupt redirection mechanism in the PCI DWC host
controller driver.

This allows affinity control for the subordinate device MSI
interrupts instead of being randomly executed on the CPU which runs
the demultiplex handler.

- Replace the binary 64-bit MSI flag with a DMA mask

Some PCI devices have PCI_MSI_FLAGS_64BIT in the MSI capability,
but implement less than 64 address bits. This breaks on platforms
where such a device is assigned an MSI address higher than what's
supported.

With the binary 64-bit flag there is no other choice than disabling
64-bit MSI support which leaves the device disfunctional.

By using a DMA mask the address limit of a device can be described
correctly which provides support for the above scenario.

- Make use of the DMA mask based address limit in the hda/intel and
radeon drivers to enable them on affected platforms

- The usual small cleanups and improvements"

* tag 'irq-msi-2026-02-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
ALSA: hda/intel: Make MSI address limit based on the device DMA limit
drm/radeon: Make MSI address limit based on the device DMA limit
PCI/MSI: Check the device specific address mask in msi_verify_entries()
PCI/MSI: Convert the boolean no_64bit_msi flag to a DMA address mask
genirq/redirect: Prevent writing MSI message on affinity change
PCI/MSI: Unmap MSI-X region on error
genirq: Update effective affinity for redirected interrupts
PCI: dwc: Enable MSI affinity support
PCI: dwc: Code cleanup
genirq: Add interrupt redirection infrastructure
genirq/msi: Correct kernel-doc in <linux/msi.h>

+247 -112
+1 -1
arch/powerpc/platforms/powernv/pci-ioda.c
··· 1666 1666 return -ENXIO; 1667 1667 1668 1668 /* Force 32-bit MSI on some broken devices */ 1669 - if (dev->no_64bit_msi) 1669 + if (dev->msi_addr_mask < DMA_BIT_MASK(64)) 1670 1670 is_64 = 0; 1671 1671 1672 1672 /* Assign XIVE to PE */
+2 -2
arch/powerpc/platforms/pseries/msi.c
··· 383 383 */ 384 384 again: 385 385 if (type == PCI_CAP_ID_MSI) { 386 - if (pdev->no_64bit_msi) { 386 + if (pdev->msi_addr_mask < DMA_BIT_MASK(64)) { 387 387 rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); 388 388 if (rc < 0) { 389 389 /* ··· 409 409 if (use_32bit_msi_hack && rc > 0) 410 410 rtas_hack_32bit_msi_gen2(pdev); 411 411 } else { 412 - if (pdev->no_64bit_msi) 412 + if (pdev->msi_addr_mask < DMA_BIT_MASK(64)) 413 413 rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec); 414 414 else 415 415 rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
+1
drivers/gpu/drm/radeon/radeon_device.c
··· 1374 1374 pr_warn("radeon: No suitable DMA available\n"); 1375 1375 return r; 1376 1376 } 1377 + rdev->pdev->msi_addr_mask = DMA_BIT_MASK(dma_bits); 1377 1378 rdev->need_swiotlb = drm_need_swiotlb(dma_bits); 1378 1379 1379 1380 /* Registers mapping */
-10
drivers/gpu/drm/radeon/radeon_irq_kms.c
··· 245 245 if (rdev->flags & RADEON_IS_AGP) 246 246 return false; 247 247 248 - /* 249 - * Older chips have a HW limitation, they can only generate 40 bits 250 - * of address for "64-bit" MSIs which breaks on some platforms, notably 251 - * IBM POWER servers, so we limit them 252 - */ 253 - if (rdev->family < CHIP_BONAIRE) { 254 - dev_info(rdev->dev, "radeon: MSI limited to 32-bit\n"); 255 - rdev->pdev->no_64bit_msi = 1; 256 - } 257 - 258 248 /* force MSI on */ 259 249 if (radeon_msi == 1) 260 250 return true;
+1 -1
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
··· 331 331 332 332 #ifdef CONFIG_PPC64 333 333 /* Ensure MSI/MSI-X interrupts lie within addressable physical memory */ 334 - pdev->no_64bit_msi = 1; 334 + pdev->msi_addr_mask = DMA_BIT_MASK(32); 335 335 #endif 336 336 337 337 err = ionic_setup_one(ionic);
+58 -69
drivers/pci/controller/dwc/pcie-designware-host.c
··· 26 26 static struct pci_ops dw_pcie_ecam_ops; 27 27 static struct pci_ops dw_child_pcie_ops; 28 28 29 + #ifdef CONFIG_SMP 30 + static void dw_irq_noop(struct irq_data *d) { } 31 + #endif 32 + 33 + static bool dw_pcie_init_dev_msi_info(struct device *dev, struct irq_domain *domain, 34 + struct irq_domain *real_parent, struct msi_domain_info *info) 35 + { 36 + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) 37 + return false; 38 + 39 + #ifdef CONFIG_SMP 40 + info->chip->irq_ack = dw_irq_noop; 41 + info->chip->irq_pre_redirect = irq_chip_pre_redirect_parent; 42 + #else 43 + info->chip->irq_ack = irq_chip_ack_parent; 44 + #endif 45 + return true; 46 + } 47 + 29 48 #define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ 30 49 MSI_FLAG_USE_DEF_CHIP_OPS | \ 31 - MSI_FLAG_NO_AFFINITY | \ 32 50 MSI_FLAG_PCI_MSI_MASK_PARENT) 33 51 #define DW_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI | \ 34 52 MSI_FLAG_PCI_MSIX | \ ··· 58 40 .required_flags = DW_PCIE_MSI_FLAGS_REQUIRED, 59 41 .supported_flags = DW_PCIE_MSI_FLAGS_SUPPORTED, 60 42 .bus_select_token = DOMAIN_BUS_PCI_MSI, 61 - .chip_flags = MSI_CHIP_FLAG_SET_ACK, 62 43 .prefix = "DW-", 63 - .init_dev_msi_info = msi_lib_init_dev_msi_info, 44 + .init_dev_msi_info = dw_pcie_init_dev_msi_info, 64 45 }; 65 46 66 47 /* MSI int handler */ 67 - irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp) 48 + void dw_handle_msi_irq(struct dw_pcie_rp *pp) 68 49 { 69 - int i, pos; 70 - unsigned long val; 71 - u32 status, num_ctrls; 72 - irqreturn_t ret = IRQ_NONE; 73 50 struct dw_pcie *pci = to_dw_pcie_from_pp(pp); 51 + unsigned int i, num_ctrls; 74 52 75 53 num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL; 76 54 77 55 for (i = 0; i < num_ctrls; i++) { 78 - status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS + 79 - (i * MSI_REG_CTRL_BLOCK_SIZE)); 56 + unsigned int reg_off = i * MSI_REG_CTRL_BLOCK_SIZE; 57 + unsigned int irq_off = i * MAX_MSI_IRQS_PER_CTRL; 58 + unsigned long status, pos; 59 + 60 + status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS + reg_off); 80 61 if (!status) 81 62 continue; 82 63 83 - ret = IRQ_HANDLED; 84 - val = status; 85 - pos = 0; 86 - while ((pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 87 - pos)) != MAX_MSI_IRQS_PER_CTRL) { 88 - generic_handle_domain_irq(pp->irq_domain, 89 - (i * MAX_MSI_IRQS_PER_CTRL) + 90 - pos); 91 - pos++; 92 - } 64 + for_each_set_bit(pos, &status, MAX_MSI_IRQS_PER_CTRL) 65 + generic_handle_demux_domain_irq(pp->irq_domain, irq_off + pos); 93 66 } 94 - 95 - return ret; 96 67 } 97 68 98 69 /* Chained MSI interrupt service routine */ ··· 102 95 { 103 96 struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); 104 97 struct dw_pcie *pci = to_dw_pcie_from_pp(pp); 105 - u64 msi_target; 106 - 107 - msi_target = (u64)pp->msi_data; 98 + u64 msi_target = (u64)pp->msi_data; 108 99 109 100 msg->address_lo = lower_32_bits(msi_target); 110 101 msg->address_hi = upper_32_bits(msi_target); 111 - 112 102 msg->data = d->hwirq; 113 103 114 104 dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n", ··· 117 113 struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); 118 114 struct dw_pcie *pci = to_dw_pcie_from_pp(pp); 119 115 unsigned int res, bit, ctrl; 120 - unsigned long flags; 121 116 122 - raw_spin_lock_irqsave(&pp->lock, flags); 123 - 117 + guard(raw_spinlock)(&pp->lock); 124 118 ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL; 125 119 res = ctrl * MSI_REG_CTRL_BLOCK_SIZE; 126 120 bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL; 127 121 128 122 pp->irq_mask[ctrl] |= BIT(bit); 129 123 dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]); 130 - 131 - raw_spin_unlock_irqrestore(&pp->lock, flags); 132 124 } 133 125 134 126 static void dw_pci_bottom_unmask(struct irq_data *d) ··· 132 132 struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); 133 133 struct dw_pcie *pci = to_dw_pcie_from_pp(pp); 134 134 unsigned int res, bit, ctrl; 135 - unsigned long flags; 136 135 137 - raw_spin_lock_irqsave(&pp->lock, flags); 138 - 136 + guard(raw_spinlock)(&pp->lock); 139 137 ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL; 140 138 res = ctrl * MSI_REG_CTRL_BLOCK_SIZE; 141 139 bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL; 142 140 143 141 pp->irq_mask[ctrl] &= ~BIT(bit); 144 142 dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]); 145 - 146 - raw_spin_unlock_irqrestore(&pp->lock, flags); 147 143 } 148 144 149 145 static void dw_pci_bottom_ack(struct irq_data *d) ··· 156 160 } 157 161 158 162 static struct irq_chip dw_pci_msi_bottom_irq_chip = { 159 - .name = "DWPCI-MSI", 160 - .irq_ack = dw_pci_bottom_ack, 161 - .irq_compose_msi_msg = dw_pci_setup_msi_msg, 162 - .irq_mask = dw_pci_bottom_mask, 163 - .irq_unmask = dw_pci_bottom_unmask, 163 + .name = "DWPCI-MSI", 164 + .irq_compose_msi_msg = dw_pci_setup_msi_msg, 165 + .irq_mask = dw_pci_bottom_mask, 166 + .irq_unmask = dw_pci_bottom_unmask, 167 + #ifdef CONFIG_SMP 168 + .irq_ack = dw_irq_noop, 169 + .irq_pre_redirect = dw_pci_bottom_ack, 170 + .irq_set_affinity = irq_chip_redirect_set_affinity, 171 + #else 172 + .irq_ack = dw_pci_bottom_ack, 173 + #endif 164 174 }; 165 175 166 - static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, 167 - unsigned int virq, unsigned int nr_irqs, 168 - void *args) 176 + static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 177 + unsigned int nr_irqs, void *args) 169 178 { 170 179 struct dw_pcie_rp *pp = domain->host_data; 171 - unsigned long flags; 172 - u32 i; 173 180 int bit; 174 181 175 - raw_spin_lock_irqsave(&pp->lock, flags); 176 - 177 - bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors, 178 - order_base_2(nr_irqs)); 179 - 180 - raw_spin_unlock_irqrestore(&pp->lock, flags); 182 + scoped_guard (raw_spinlock_irq, &pp->lock) { 183 + bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors, 184 + order_base_2(nr_irqs)); 185 + } 181 186 182 187 if (bit < 0) 183 188 return -ENOSPC; 184 189 185 - for (i = 0; i < nr_irqs; i++) 186 - irq_domain_set_info(domain, virq + i, bit + i, 187 - pp->msi_irq_chip, 188 - pp, handle_edge_irq, 189 - NULL, NULL); 190 - 190 + for (unsigned int i = 0; i < nr_irqs; i++) { 191 + irq_domain_set_info(domain, virq + i, bit + i, pp->msi_irq_chip, 192 + pp, handle_edge_irq, NULL, NULL); 193 + } 191 194 return 0; 192 195 } 193 196 194 - static void dw_pcie_irq_domain_free(struct irq_domain *domain, 195 - unsigned int virq, unsigned int nr_irqs) 197 + static void dw_pcie_irq_domain_free(struct irq_domain *domain, unsigned int virq, 198 + unsigned int nr_irqs) 196 199 { 197 200 struct irq_data *d = irq_domain_get_irq_data(domain, virq); 198 201 struct dw_pcie_rp *pp = domain->host_data; 199 - unsigned long flags; 200 202 201 - raw_spin_lock_irqsave(&pp->lock, flags); 202 - 203 - bitmap_release_region(pp->msi_irq_in_use, d->hwirq, 204 - order_base_2(nr_irqs)); 205 - 206 - raw_spin_unlock_irqrestore(&pp->lock, flags); 203 + guard(raw_spinlock_irq)(&pp->lock); 204 + bitmap_release_region(pp->msi_irq_in_use, d->hwirq, order_base_2(nr_irqs)); 207 205 } 208 206 209 207 static const struct irq_domain_ops dw_pcie_msi_domain_ops = { ··· 231 241 232 242 for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++) { 233 243 if (pp->msi_irq[ctrl] > 0) 234 - irq_set_chained_handler_and_data(pp->msi_irq[ctrl], 235 - NULL, NULL); 244 + irq_set_chained_handler_and_data(pp->msi_irq[ctrl], NULL, NULL); 236 245 } 237 246 238 247 irq_domain_remove(pp->irq_domain);
+2 -5
drivers/pci/controller/dwc/pcie-designware.h
··· 821 821 #ifdef CONFIG_PCIE_DW_HOST 822 822 int dw_pcie_suspend_noirq(struct dw_pcie *pci); 823 823 int dw_pcie_resume_noirq(struct dw_pcie *pci); 824 - irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp); 824 + void dw_handle_msi_irq(struct dw_pcie_rp *pp); 825 825 void dw_pcie_msi_init(struct dw_pcie_rp *pp); 826 826 int dw_pcie_msi_host_init(struct dw_pcie_rp *pp); 827 827 void dw_pcie_free_msi(struct dw_pcie_rp *pp); ··· 842 842 return 0; 843 843 } 844 844 845 - static inline irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp) 846 - { 847 - return IRQ_NONE; 848 - } 845 + static inline void dw_handle_msi_irq(struct dw_pcie_rp *pp) { } 849 846 850 847 static inline void dw_pcie_msi_init(struct dw_pcie_rp *pp) 851 848 { }
+9 -5
drivers/pci/msi/msi.c
··· 321 321 static int msi_verify_entries(struct pci_dev *dev) 322 322 { 323 323 struct msi_desc *entry; 324 + u64 address; 324 325 325 - if (!dev->no_64bit_msi) 326 + if (dev->msi_addr_mask == DMA_BIT_MASK(64)) 326 327 return 0; 327 328 328 329 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 329 - if (entry->msg.address_hi) { 330 - pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n", 331 - entry->msg.address_hi, entry->msg.address_lo); 330 + address = (u64)entry->msg.address_hi << 32 | entry->msg.address_lo; 331 + if (address & ~dev->msi_addr_mask) { 332 + pci_err(dev, "arch assigned 64-bit MSI address %#llx above device MSI address mask %#llx\n", 333 + address, dev->msi_addr_mask); 332 334 break; 333 335 } 334 336 } ··· 739 737 740 738 ret = msix_setup_interrupts(dev, entries, nvec, affd); 741 739 if (ret) 742 - goto out_disable; 740 + goto out_unmap; 743 741 744 742 /* Disable INTX */ 745 743 pci_intx_for_msi(dev, 0); ··· 760 758 pcibios_free_irq(dev); 761 759 return 0; 762 760 761 + out_unmap: 762 + iounmap(dev->msix_base); 763 763 out_disable: 764 764 dev->msix_enabled = 0; 765 765 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
+1 -1
drivers/pci/msi/pcidev_msi.c
··· 24 24 } 25 25 26 26 if (!(ctrl & PCI_MSI_FLAGS_64BIT)) 27 - dev->no_64bit_msi = 1; 27 + dev->msi_addr_mask = DMA_BIT_MASK(32); 28 28 } 29 29 30 30 void pci_msix_init(struct pci_dev *dev)
+7
drivers/pci/probe.c
··· 2047 2047 */ 2048 2048 dev->dma_mask = 0xffffffff; 2049 2049 2050 + /* 2051 + * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit 2052 + * if MSI (rather than MSI-X) capability does not have 2053 + * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver. 2054 + */ 2055 + dev->msi_addr_mask = DMA_BIT_MASK(64); 2056 + 2050 2057 dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), 2051 2058 dev->bus->number, PCI_SLOT(dev->devfn), 2052 2059 PCI_FUNC(dev->devfn));
+10
include/linux/irq.h
··· 459 459 * checks against the supplied affinity mask are not 460 460 * required. This is used for CPU hotplug where the 461 461 * target CPU is not yet set in the cpu_online_mask. 462 + * @irq_pre_redirect: Optional function to be invoked before redirecting 463 + * an interrupt via irq_work. Called only on CONFIG_SMP. 462 464 * @irq_retrigger: resend an IRQ to the CPU 463 465 * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ 464 466 * @irq_set_wake: enable/disable power-management wake-on of an IRQ ··· 505 503 void (*irq_eoi)(struct irq_data *data); 506 504 507 505 int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); 506 + void (*irq_pre_redirect)(struct irq_data *data); 508 507 int (*irq_retrigger)(struct irq_data *data); 509 508 int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); 510 509 int (*irq_set_wake)(struct irq_data *data, unsigned int on); ··· 687 684 extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); 688 685 extern int irq_chip_request_resources_parent(struct irq_data *data); 689 686 extern void irq_chip_release_resources_parent(struct irq_data *data); 687 + #ifdef CONFIG_SMP 688 + void irq_chip_pre_redirect_parent(struct irq_data *data); 689 + #endif 690 + #endif 691 + 692 + #ifdef CONFIG_SMP 693 + int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); 690 694 #endif 691 695 692 696 /* Disable or mask interrupts during a kernel kexec */
+16 -1
include/linux/irqdesc.h
··· 2 2 #ifndef _LINUX_IRQDESC_H 3 3 #define _LINUX_IRQDESC_H 4 4 5 - #include <linux/rcupdate.h> 5 + #include <linux/irq_work.h> 6 6 #include <linux/kobject.h> 7 7 #include <linux/mutex.h> 8 + #include <linux/rcupdate.h> 8 9 9 10 /* 10 11 * Core internal functions to deal with irq descriptors ··· 31 30 }; 32 31 33 32 /** 33 + * struct irq_redirect - interrupt redirection metadata 34 + * @work: Harg irq_work item for handler execution on a different CPU 35 + * @target_cpu: CPU to run irq handler on in case the current CPU is not part 36 + * of the irq affinity mask 37 + */ 38 + struct irq_redirect { 39 + struct irq_work work; 40 + unsigned int target_cpu; 41 + }; 42 + 43 + /** 34 44 * struct irq_desc - interrupt descriptor 35 45 * @irq_common_data: per irq and chip data passed down to chip functions 36 46 * @kstat_irqs: irq stats per cpu ··· 58 46 * @threads_handled: stats field for deferred spurious detection of threaded handlers 59 47 * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers 60 48 * @lock: locking for SMP 49 + * @redirect: Facility for redirecting interrupts via irq_work 61 50 * @affinity_hint: hint to user space for preferred irq affinity 62 51 * @affinity_notify: context for notification of affinity changes 63 52 * @pending_mask: pending rebalanced interrupts ··· 96 83 raw_spinlock_t lock; 97 84 struct cpumask *percpu_enabled; 98 85 #ifdef CONFIG_SMP 86 + struct irq_redirect redirect; 99 87 const struct cpumask *affinity_hint; 100 88 struct irq_affinity_notify *affinity_notify; 101 89 #ifdef CONFIG_GENERIC_PENDING_IRQ ··· 199 185 int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); 200 186 int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq); 201 187 int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq); 188 + bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); 202 189 #endif 203 190 204 191 /* Test to see if a driver has successfully requested an irq */
+7 -6
include/linux/msi.h
··· 49 49 #endif 50 50 51 51 /** 52 - * msi_msg - Representation of a MSI message 52 + * struct msi_msg - Representation of a MSI message 53 53 * @address_lo: Low 32 bits of msi message address 54 - * @arch_addrlo: Architecture specific shadow of @address_lo 54 + * @arch_addr_lo: Architecture specific shadow of @address_lo 55 55 * @address_hi: High 32 bits of msi message address 56 56 * (only used when device supports it) 57 - * @arch_addrhi: Architecture specific shadow of @address_hi 57 + * @arch_addr_hi: Architecture specific shadow of @address_hi 58 58 * @data: MSI message data (usually 16 bits) 59 59 * @arch_data: Architecture specific shadow of @data 60 60 */ ··· 91 91 struct msi_msg *msg); 92 92 93 93 /** 94 - * pci_msi_desc - PCI/MSI specific MSI descriptor data 94 + * struct pci_msi_desc - PCI/MSI specific MSI descriptor data 95 95 * 96 96 * @msi_mask: [PCI MSI] MSI cached mask bits 97 97 * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits ··· 101 101 * @can_mask: [PCI MSI/X] Masking supported? 102 102 * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit 103 103 * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq 104 + * @msi_attrib: [PCI MSI/X] Compound struct of MSI/X attributes 104 105 * @mask_pos: [PCI MSI] Mask register position 105 106 * @mask_base: [PCI MSI-X] Mask register base address 106 107 */ ··· 170 169 * Only used if iommu_msi_shift != 0 171 170 * @iommu_msi_shift: Indicates how many bits of the original address should be 172 171 * preserved when using iommu_msi_iova. 173 - * @sysfs_attr: Pointer to sysfs device attribute 172 + * @sysfs_attrs: Pointer to sysfs device attribute 174 173 * 175 174 * @write_msi_msg: Callback that may be called when the MSI message 176 175 * address or data changes ··· 221 220 /** 222 221 * struct msi_dev_domain - The internals of MSI domain info per device 223 222 * @store: Xarray for storing MSI descriptor pointers 224 - * @irqdomain: Pointer to a per device interrupt domain 223 + * @domain: Pointer to a per device interrupt domain 225 224 */ 226 225 struct msi_dev_domain { 227 226 struct xarray store;
+7 -1
include/linux/pci.h
··· 377 377 0xffffffff. You only need to change 378 378 this if your device has broken DMA 379 379 or supports 64-bit transfers. */ 380 + u64 msi_addr_mask; /* Mask of the bits of bus address for 381 + MSI that this device implements. 382 + Normally set based on device 383 + capabilities. You only need to 384 + change this if your device claims 385 + to support 64-bit MSI but implements 386 + fewer than 64 address bits. */ 380 387 381 388 struct device_dma_parameters dma_parms; 382 389 ··· 448 441 449 442 unsigned int is_busmaster:1; /* Is busmaster */ 450 443 unsigned int no_msi:1; /* May not use MSI */ 451 - unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ 452 444 unsigned int block_cfg_access:1; /* Config space access blocked */ 453 445 unsigned int broken_parity_status:1; /* Generates false positive parity */ 454 446 unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */
+23 -1
kernel/irq/chip.c
··· 1122 1122 } 1123 1123 #endif 1124 1124 1125 - #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY 1125 + #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY 1126 1126 1127 1127 #ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS 1128 1128 /** ··· 1193 1193 EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); 1194 1194 1195 1195 #endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ 1196 + 1197 + #ifdef CONFIG_SMP 1198 + void irq_chip_pre_redirect_parent(struct irq_data *data) 1199 + { 1200 + data = data->parent_data; 1201 + data->chip->irq_pre_redirect(data); 1202 + } 1203 + EXPORT_SYMBOL_GPL(irq_chip_pre_redirect_parent); 1204 + #endif 1196 1205 1197 1206 /** 1198 1207 * irq_chip_set_parent_state - set the state of a parent interrupt. ··· 1485 1476 data->chip->irq_release_resources(data); 1486 1477 } 1487 1478 EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); 1479 + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ 1480 + 1481 + #ifdef CONFIG_SMP 1482 + int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force) 1483 + { 1484 + struct irq_redirect *redir = &irq_data_to_desc(data)->redirect; 1485 + 1486 + WRITE_ONCE(redir->target_cpu, cpumask_first(dest)); 1487 + irq_data_update_effective_affinity(data, dest); 1488 + 1489 + return IRQ_SET_MASK_OK_DONE; 1490 + } 1491 + EXPORT_SYMBOL_GPL(irq_chip_redirect_set_affinity); 1488 1492 #endif 1489 1493 1490 1494 /**
+84 -2
kernel/irq/irqdesc.c
··· 78 78 return 0; 79 79 } 80 80 81 - static void desc_smp_init(struct irq_desc *desc, int node, 82 - const struct cpumask *affinity) 81 + static void irq_redirect_work(struct irq_work *work) 82 + { 83 + handle_irq_desc(container_of(work, struct irq_desc, redirect.work)); 84 + } 85 + 86 + static void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) 83 87 { 84 88 if (!affinity) 85 89 affinity = irq_default_affinity; ··· 95 91 #ifdef CONFIG_NUMA 96 92 desc->irq_common_data.node = node; 97 93 #endif 94 + desc->redirect.work = IRQ_WORK_INIT_HARD(irq_redirect_work); 98 95 } 99 96 100 97 static void free_masks(struct irq_desc *desc) ··· 772 767 WARN_ON_ONCE(!in_nmi()); 773 768 return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); 774 769 } 770 + 771 + #ifdef CONFIG_SMP 772 + static bool demux_redirect_remote(struct irq_desc *desc) 773 + { 774 + guard(raw_spinlock)(&desc->lock); 775 + const struct cpumask *m = irq_data_get_effective_affinity_mask(&desc->irq_data); 776 + unsigned int target_cpu = READ_ONCE(desc->redirect.target_cpu); 777 + 778 + if (desc->irq_data.chip->irq_pre_redirect) 779 + desc->irq_data.chip->irq_pre_redirect(&desc->irq_data); 780 + 781 + /* 782 + * If the interrupt handler is already running on a CPU that's included 783 + * in the interrupt's affinity mask, redirection is not necessary. 784 + */ 785 + if (cpumask_test_cpu(smp_processor_id(), m)) 786 + return false; 787 + 788 + /* 789 + * The desc->action check protects against IRQ shutdown: __free_irq() sets 790 + * desc->action to NULL while holding desc->lock, which we also hold. 791 + * 792 + * Calling irq_work_queue_on() here is safe w.r.t. CPU unplugging: 793 + * - takedown_cpu() schedules multi_cpu_stop() on all active CPUs, 794 + * including the one that's taken down. 795 + * - multi_cpu_stop() acts like a barrier, which means all active 796 + * CPUs go through MULTI_STOP_DISABLE_IRQ and disable hard IRQs 797 + * *before* the dying CPU runs take_cpu_down() in MULTI_STOP_RUN. 798 + * - Hard IRQs are re-enabled at the end of multi_cpu_stop(), *after* 799 + * the dying CPU has run take_cpu_down() in MULTI_STOP_RUN. 800 + * - Since we run in hard IRQ context, we run either before or after 801 + * take_cpu_down() but never concurrently. 802 + * - If we run before take_cpu_down(), the dying CPU hasn't been marked 803 + * offline yet (it's marked via take_cpu_down() -> __cpu_disable()), 804 + * so the WARN in irq_work_queue_on() can't occur. 805 + * - Furthermore, the work item we queue will be flushed later via 806 + * take_cpu_down() -> cpuhp_invoke_callback_range_nofail() -> 807 + * smpcfd_dying_cpu() -> irq_work_run(). 808 + * - If we run after take_cpu_down(), target_cpu has been already 809 + * updated via take_cpu_down() -> __cpu_disable(), which eventually 810 + * calls irq_do_set_affinity() during IRQ migration. So, target_cpu 811 + * no longer points to the dying CPU in this case. 812 + */ 813 + if (desc->action) 814 + irq_work_queue_on(&desc->redirect.work, target_cpu); 815 + 816 + return true; 817 + } 818 + #else /* CONFIG_SMP */ 819 + static bool demux_redirect_remote(struct irq_desc *desc) 820 + { 821 + return false; 822 + } 823 + #endif 824 + 825 + /** 826 + * generic_handle_demux_domain_irq - Invoke the handler for a hardware interrupt 827 + * of a demultiplexing domain. 828 + * @domain: The domain where to perform the lookup 829 + * @hwirq: The hardware interrupt number to convert to a logical one 830 + * 831 + * Returns: True on success, or false if lookup has failed 832 + */ 833 + bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq) 834 + { 835 + struct irq_desc *desc = irq_resolve_mapping(domain, hwirq); 836 + 837 + if (unlikely(!desc)) 838 + return false; 839 + 840 + if (demux_redirect_remote(desc)) 841 + return true; 842 + 843 + return !handle_irq_desc(desc); 844 + } 845 + EXPORT_SYMBOL_GPL(generic_handle_demux_domain_irq); 846 + 775 847 #endif 776 848 777 849 /* Dynamic interrupt handling */
+13 -2
kernel/irq/manage.c
··· 35 35 early_param("threadirqs", setup_forced_irqthreads); 36 36 #endif 37 37 38 + #ifdef CONFIG_SMP 39 + static inline void synchronize_irqwork(struct irq_desc *desc) 40 + { 41 + /* Synchronize pending or on the fly redirect work */ 42 + irq_work_sync(&desc->redirect.work); 43 + } 44 + #else 45 + static inline void synchronize_irqwork(struct irq_desc *desc) { } 46 + #endif 47 + 38 48 static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state); 39 49 40 50 static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) ··· 117 107 118 108 static void __synchronize_irq(struct irq_desc *desc) 119 109 { 110 + synchronize_irqwork(desc); 120 111 __synchronize_hardirq(desc, true); 112 + 121 113 /* 122 114 * We made sure that no hardirq handler is running. Now verify that no 123 115 * threaded handlers are active. ··· 229 217 230 218 static DEFINE_PER_CPU(struct cpumask, __tmp_mask); 231 219 232 - int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, 233 - bool force) 220 + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) 234 221 { 235 222 struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); 236 223 struct irq_desc *desc = irq_data_to_desc(data);
+5 -5
sound/hda/controllers/intel.c
··· 1903 1903 chip->gts_present = true; 1904 1904 #endif 1905 1905 1906 - if (chip->msi && chip->driver_caps & AZX_DCAPS_NO_MSI64) { 1907 - dev_dbg(card->dev, "Disabling 64bit MSI\n"); 1908 - pci->no_64bit_msi = true; 1909 - } 1910 - 1911 1906 pci_set_master(pci); 1912 1907 1913 1908 gcap = azx_readw(chip, GCAP); ··· 1952 1957 if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(dma_bits))) 1953 1958 dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); 1954 1959 dma_set_max_seg_size(&pci->dev, UINT_MAX); 1960 + 1961 + if (chip->msi && chip->driver_caps & AZX_DCAPS_NO_MSI64) { 1962 + dev_dbg(card->dev, "Restricting MSI to %u-bit\n", dma_bits); 1963 + pci->msi_addr_mask = DMA_BIT_MASK(dma_bits); 1964 + } 1955 1965 1956 1966 /* read number of streams from GCAP register instead of using 1957 1967 * hardcoded value