Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

PCI: Suspend iommu function prior to resetting a device

PCIe permits a device to ignore ATS invalidation TLPs while processing a
reset. This creates a problem visible to the OS where an ATS invalidation
command will time out: e.g. an SVA domain will have no coordination with a
reset event and can racily issue ATS invalidations to a resetting device.

The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends SW to disable and
block ATS before initiating a Function Level Reset. It also mentions that
other reset methods could have the same vulnerability as well.

The IOMMU subsystem provides pci_dev_reset_iommu_prepare/done() callback
helpers for this matter. Use them in all the existing reset functions.

This will attach the device to its iommu_group->blocking_domain during the
device reset, so as to allow IOMMU driver to:
- invoke pci_disable_ats() and pci_enable_ats(), if necessary
- wait for all ATS invalidations to complete
- stop issuing new ATS invalidations
- fence any incoming ATS queries

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Nicolin Chen and committed by
Joerg Roedel
f5b16b80 c279e839

+89 -12
+13 -4
drivers/pci/pci-acpi.c
··· 9 9 10 10 #include <linux/delay.h> 11 11 #include <linux/init.h> 12 + #include <linux/iommu.h> 12 13 #include <linux/irqdomain.h> 13 14 #include <linux/pci.h> 14 15 #include <linux/msi.h> ··· 972 971 int pci_dev_acpi_reset(struct pci_dev *dev, bool probe) 973 972 { 974 973 acpi_handle handle = ACPI_HANDLE(&dev->dev); 974 + int ret; 975 975 976 976 if (!handle || !acpi_has_method(handle, "_RST")) 977 977 return -ENOTTY; ··· 980 978 if (probe) 981 979 return 0; 982 980 983 - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) { 984 - pci_warn(dev, "ACPI _RST failed\n"); 985 - return -ENOTTY; 981 + ret = pci_dev_reset_iommu_prepare(dev); 982 + if (ret) { 983 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret); 984 + return ret; 986 985 } 987 986 988 - return 0; 987 + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) { 988 + pci_warn(dev, "ACPI _RST failed\n"); 989 + ret = -ENOTTY; 990 + } 991 + 992 + pci_dev_reset_iommu_done(dev); 993 + return ret; 989 994 } 990 995 991 996 bool acpi_pci_power_manageable(struct pci_dev *dev)
+58 -7
drivers/pci/pci.c
··· 13 13 #include <linux/delay.h> 14 14 #include <linux/dmi.h> 15 15 #include <linux/init.h> 16 + #include <linux/iommu.h> 16 17 #include <linux/msi.h> 17 18 #include <linux/of.h> 18 19 #include <linux/pci.h> ··· 26 25 #include <linux/logic_pio.h> 27 26 #include <linux/device.h> 28 27 #include <linux/pm_runtime.h> 28 + #include <linux/pci-ats.h> 29 29 #include <linux/pci_hotplug.h> 30 30 #include <linux/vmalloc.h> 31 31 #include <asm/dma.h> ··· 4332 4330 */ 4333 4331 int pcie_flr(struct pci_dev *dev) 4334 4332 { 4333 + int ret; 4334 + 4335 4335 if (!pci_wait_for_pending_transaction(dev)) 4336 4336 pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n"); 4337 + 4338 + /* Have to call it after waiting for pending DMA transaction */ 4339 + ret = pci_dev_reset_iommu_prepare(dev); 4340 + if (ret) { 4341 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret); 4342 + return ret; 4343 + } 4337 4344 4338 4345 pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); 4339 4346 4340 4347 if (dev->imm_ready) 4341 - return 0; 4348 + goto done; 4342 4349 4343 4350 /* 4344 4351 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within ··· 4356 4345 */ 4357 4346 msleep(100); 4358 4347 4359 - return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS); 4348 + ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS); 4349 + done: 4350 + pci_dev_reset_iommu_done(dev); 4351 + return ret; 4360 4352 } 4361 4353 EXPORT_SYMBOL_GPL(pcie_flr); 4362 4354 ··· 4387 4373 4388 4374 static int pci_af_flr(struct pci_dev *dev, bool probe) 4389 4375 { 4376 + int ret; 4390 4377 int pos; 4391 4378 u8 cap; 4392 4379 ··· 4414 4399 PCI_AF_STATUS_TP << 8)) 4415 4400 pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n"); 4416 4401 4402 + /* Have to call it after waiting for pending DMA transaction */ 4403 + ret = pci_dev_reset_iommu_prepare(dev); 4404 + if (ret) { 4405 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret); 4406 + return ret; 4407 + } 4408 + 4417 4409 pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); 4418 4410 4419 4411 if (dev->imm_ready) 4420 - return 0; 4412 + goto done; 4421 4413 4422 4414 /* 4423 4415 * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006, ··· 4434 4412 */ 4435 4413 msleep(100); 4436 4414 4437 - return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS); 4415 + ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS); 4416 + done: 4417 + pci_dev_reset_iommu_done(dev); 4418 + return ret; 4438 4419 } 4439 4420 4440 4421 /** ··· 4458 4433 static int pci_pm_reset(struct pci_dev *dev, bool probe) 4459 4434 { 4460 4435 u16 csr; 4436 + int ret; 4461 4437 4462 4438 if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET) 4463 4439 return -ENOTTY; ··· 4473 4447 if (dev->current_state != PCI_D0) 4474 4448 return -EINVAL; 4475 4449 4450 + ret = pci_dev_reset_iommu_prepare(dev); 4451 + if (ret) { 4452 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret); 4453 + return ret; 4454 + } 4455 + 4476 4456 csr &= ~PCI_PM_CTRL_STATE_MASK; 4477 4457 csr |= PCI_D3hot; 4478 4458 pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr); ··· 4489 4457 pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr); 4490 4458 pci_dev_d3_sleep(dev); 4491 4459 4492 - return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS); 4460 + ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS); 4461 + pci_dev_reset_iommu_done(dev); 4462 + return ret; 4493 4463 } 4494 4464 4495 4465 /** ··· 4919 4885 return -ENOTTY; 4920 4886 } 4921 4887 4888 + rc = pci_dev_reset_iommu_prepare(dev); 4889 + if (rc) { 4890 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc); 4891 + return rc; 4892 + } 4893 + 4922 4894 rc = pci_dev_reset_slot_function(dev, probe); 4923 4895 if (rc != -ENOTTY) 4924 - return rc; 4925 - return pci_parent_bus_reset(dev, probe); 4896 + goto done; 4897 + 4898 + rc = pci_parent_bus_reset(dev, probe); 4899 + done: 4900 + pci_dev_reset_iommu_done(dev); 4901 + return rc; 4926 4902 } 4927 4903 4928 4904 static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) ··· 4956 4912 if (rc) 4957 4913 return -ENOTTY; 4958 4914 4915 + rc = pci_dev_reset_iommu_prepare(dev); 4916 + if (rc) { 4917 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc); 4918 + return rc; 4919 + } 4920 + 4959 4921 if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) { 4960 4922 val = reg; 4961 4923 } else { ··· 4976 4926 pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL, 4977 4927 reg); 4978 4928 4929 + pci_dev_reset_iommu_done(dev); 4979 4930 return rc; 4980 4931 } 4981 4932
+18 -1
drivers/pci/quirks.c
··· 21 21 #include <linux/pci.h> 22 22 #include <linux/isa-dma.h> /* isa_dma_bridge_buggy */ 23 23 #include <linux/init.h> 24 + #include <linux/iommu.h> 24 25 #include <linux/delay.h> 25 26 #include <linux/acpi.h> 26 27 #include <linux/dmi.h> ··· 4229 4228 { 0 } 4230 4229 }; 4231 4230 4231 + static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe, 4232 + const struct pci_dev_reset_methods *i) 4233 + { 4234 + int ret; 4235 + 4236 + ret = pci_dev_reset_iommu_prepare(dev); 4237 + if (ret) { 4238 + pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret); 4239 + return ret; 4240 + } 4241 + 4242 + ret = i->reset(dev, probe); 4243 + pci_dev_reset_iommu_done(dev); 4244 + return ret; 4245 + } 4246 + 4232 4247 /* 4233 4248 * These device-specific reset methods are here rather than in a driver 4234 4249 * because when a host assigns a device to a guest VM, the host may need ··· 4259 4242 i->vendor == (u16)PCI_ANY_ID) && 4260 4243 (i->device == dev->device || 4261 4244 i->device == (u16)PCI_ANY_ID)) 4262 - return i->reset(dev, probe); 4245 + return __pci_dev_specific_reset(dev, probe, i); 4263 4246 } 4264 4247 4265 4248 return -ENOTTY;