Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommu/vt-d: Fix race condition during PASID entry replacement

The Intel VT-d PASID table entry is 512 bits (64 bytes). When replacing
an active PASID entry (e.g., during domain replacement), the current
implementation calculates a new entry on the stack and copies it to the
table using a single structure assignment.

struct pasid_entry *pte, new_pte;

pte = intel_pasid_get_entry(dev, pasid);
pasid_pte_config_first_level(iommu, &new_pte, ...);
*pte = new_pte;

Because the hardware may fetch the 512-bit PASID entry in multiple
128-bit chunks, updating the entire entry while it is active (Present
bit set) risks a "torn" read. In this scenario, the IOMMU hardware
could observe an inconsistent state — partially new data and partially
old data — leading to unpredictable behavior or spurious faults.

Fix this by removing the unsafe "replace" helpers and following the
"clear-then-update" flow, which ensures the Present bit is cleared and
the required invalidation handshake is completed before the new
configuration is applied.

Fixes: 7543ee63e811 ("iommu/vt-d: Add pasid replace helpers")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20260120061816.2132558-4-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Lu Baolu and committed by
Joerg Roedel
c3b1edea c1e4f1dc

+16 -220
+12 -17
drivers/iommu/intel/iommu.c
··· 1252 1252 ioasid_t pasid, u16 did, phys_addr_t fsptptr, 1253 1253 int flags, struct iommu_domain *old) 1254 1254 { 1255 - if (!old) 1256 - return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, 1257 - did, flags); 1258 - return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did, 1259 - iommu_domain_did(old, iommu), 1260 - flags); 1255 + if (old) 1256 + intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1257 + 1258 + return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags); 1261 1259 } 1262 1260 1263 1261 static int domain_setup_second_level(struct intel_iommu *iommu, ··· 1263 1265 struct device *dev, ioasid_t pasid, 1264 1266 struct iommu_domain *old) 1265 1267 { 1266 - if (!old) 1267 - return intel_pasid_setup_second_level(iommu, domain, 1268 - dev, pasid); 1269 - return intel_pasid_replace_second_level(iommu, domain, dev, 1270 - iommu_domain_did(old, iommu), 1271 - pasid); 1268 + if (old) 1269 + intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1270 + 1271 + return intel_pasid_setup_second_level(iommu, domain, dev, pasid); 1272 1272 } 1273 1273 1274 1274 static int domain_setup_passthrough(struct intel_iommu *iommu, 1275 1275 struct device *dev, ioasid_t pasid, 1276 1276 struct iommu_domain *old) 1277 1277 { 1278 - if (!old) 1279 - return intel_pasid_setup_pass_through(iommu, dev, pasid); 1280 - return intel_pasid_replace_pass_through(iommu, dev, 1281 - iommu_domain_did(old, iommu), 1282 - pasid); 1278 + if (old) 1279 + intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1280 + 1281 + return intel_pasid_setup_pass_through(iommu, dev, pasid); 1283 1282 } 1284 1283 1285 1284 static int domain_setup_first_level(struct intel_iommu *iommu,
+4 -5
drivers/iommu/intel/nested.c
··· 136 136 struct device *dev, ioasid_t pasid, 137 137 struct iommu_domain *old) 138 138 { 139 - if (!old) 140 - return intel_pasid_setup_nested(iommu, dev, pasid, domain); 141 - return intel_pasid_replace_nested(iommu, dev, pasid, 142 - iommu_domain_did(old, iommu), 143 - domain); 139 + if (old) 140 + intel_pasid_tear_down_entry(iommu, dev, pasid, false); 141 + 142 + return intel_pasid_setup_nested(iommu, dev, pasid, domain); 144 143 } 145 144 146 145 static int intel_nested_set_dev_pasid(struct iommu_domain *domain,
-184
drivers/iommu/intel/pasid.c
··· 417 417 return 0; 418 418 } 419 419 420 - int intel_pasid_replace_first_level(struct intel_iommu *iommu, 421 - struct device *dev, phys_addr_t fsptptr, 422 - u32 pasid, u16 did, u16 old_did, 423 - int flags) 424 - { 425 - struct pasid_entry *pte, new_pte; 426 - 427 - if (!ecap_flts(iommu->ecap)) { 428 - pr_err("No first level translation support on %s\n", 429 - iommu->name); 430 - return -EINVAL; 431 - } 432 - 433 - if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 434 - pr_err("No 5-level paging support for first-level on %s\n", 435 - iommu->name); 436 - return -EINVAL; 437 - } 438 - 439 - pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags); 440 - 441 - spin_lock(&iommu->lock); 442 - pte = intel_pasid_get_entry(dev, pasid); 443 - if (!pte) { 444 - spin_unlock(&iommu->lock); 445 - return -ENODEV; 446 - } 447 - 448 - if (!pasid_pte_is_present(pte)) { 449 - spin_unlock(&iommu->lock); 450 - return -EINVAL; 451 - } 452 - 453 - WARN_ON(old_did != pasid_get_domain_id(pte)); 454 - 455 - *pte = new_pte; 456 - spin_unlock(&iommu->lock); 457 - 458 - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 459 - intel_iommu_drain_pasid_prq(dev, pasid); 460 - 461 - return 0; 462 - } 463 - 464 420 /* 465 421 * Set up the scalable mode pasid entry for second only translation type. 466 422 */ ··· 479 523 spin_unlock(&iommu->lock); 480 524 481 525 pasid_flush_caches(iommu, pte, pasid, did); 482 - 483 - return 0; 484 - } 485 - 486 - int intel_pasid_replace_second_level(struct intel_iommu *iommu, 487 - struct dmar_domain *domain, 488 - struct device *dev, u16 old_did, 489 - u32 pasid) 490 - { 491 - struct pasid_entry *pte, new_pte; 492 - u16 did; 493 - 494 - /* 495 - * If hardware advertises no support for second level 496 - * translation, return directly. 497 - */ 498 - if (!ecap_slts(iommu->ecap)) { 499 - pr_err("No second level translation support on %s\n", 500 - iommu->name); 501 - return -EINVAL; 502 - } 503 - 504 - did = domain_id_iommu(domain, iommu); 505 - 506 - pasid_pte_config_second_level(iommu, &new_pte, domain, did); 507 - 508 - spin_lock(&iommu->lock); 509 - pte = intel_pasid_get_entry(dev, pasid); 510 - if (!pte) { 511 - spin_unlock(&iommu->lock); 512 - return -ENODEV; 513 - } 514 - 515 - if (!pasid_pte_is_present(pte)) { 516 - spin_unlock(&iommu->lock); 517 - return -EINVAL; 518 - } 519 - 520 - WARN_ON(old_did != pasid_get_domain_id(pte)); 521 - 522 - *pte = new_pte; 523 - spin_unlock(&iommu->lock); 524 - 525 - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 526 - intel_iommu_drain_pasid_prq(dev, pasid); 527 526 528 527 return 0; 529 528 } ··· 591 680 spin_unlock(&iommu->lock); 592 681 593 682 pasid_flush_caches(iommu, pte, pasid, did); 594 - 595 - return 0; 596 - } 597 - 598 - int intel_pasid_replace_pass_through(struct intel_iommu *iommu, 599 - struct device *dev, u16 old_did, 600 - u32 pasid) 601 - { 602 - struct pasid_entry *pte, new_pte; 603 - u16 did = FLPT_DEFAULT_DID; 604 - 605 - pasid_pte_config_pass_through(iommu, &new_pte, did); 606 - 607 - spin_lock(&iommu->lock); 608 - pte = intel_pasid_get_entry(dev, pasid); 609 - if (!pte) { 610 - spin_unlock(&iommu->lock); 611 - return -ENODEV; 612 - } 613 - 614 - if (!pasid_pte_is_present(pte)) { 615 - spin_unlock(&iommu->lock); 616 - return -EINVAL; 617 - } 618 - 619 - WARN_ON(old_did != pasid_get_domain_id(pte)); 620 - 621 - *pte = new_pte; 622 - spin_unlock(&iommu->lock); 623 - 624 - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 625 - intel_iommu_drain_pasid_prq(dev, pasid); 626 683 627 684 return 0; 628 685 } ··· 724 845 spin_unlock(&iommu->lock); 725 846 726 847 pasid_flush_caches(iommu, pte, pasid, did); 727 - 728 - return 0; 729 - } 730 - 731 - int intel_pasid_replace_nested(struct intel_iommu *iommu, 732 - struct device *dev, u32 pasid, 733 - u16 old_did, struct dmar_domain *domain) 734 - { 735 - struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 736 - struct dmar_domain *s2_domain = domain->s2_domain; 737 - u16 did = domain_id_iommu(domain, iommu); 738 - struct pasid_entry *pte, new_pte; 739 - 740 - /* Address width should match the address width supported by hardware */ 741 - switch (s1_cfg->addr_width) { 742 - case ADDR_WIDTH_4LEVEL: 743 - break; 744 - case ADDR_WIDTH_5LEVEL: 745 - if (!cap_fl5lp_support(iommu->cap)) { 746 - dev_err_ratelimited(dev, 747 - "5-level paging not supported\n"); 748 - return -EINVAL; 749 - } 750 - break; 751 - default: 752 - dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 753 - s1_cfg->addr_width); 754 - return -EINVAL; 755 - } 756 - 757 - if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 758 - pr_err_ratelimited("No supervisor request support on %s\n", 759 - iommu->name); 760 - return -EINVAL; 761 - } 762 - 763 - if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 764 - pr_err_ratelimited("No extended access flag support on %s\n", 765 - iommu->name); 766 - return -EINVAL; 767 - } 768 - 769 - pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did); 770 - 771 - spin_lock(&iommu->lock); 772 - pte = intel_pasid_get_entry(dev, pasid); 773 - if (!pte) { 774 - spin_unlock(&iommu->lock); 775 - return -ENODEV; 776 - } 777 - 778 - if (!pasid_pte_is_present(pte)) { 779 - spin_unlock(&iommu->lock); 780 - return -EINVAL; 781 - } 782 - 783 - WARN_ON(old_did != pasid_get_domain_id(pte)); 784 - 785 - *pte = new_pte; 786 - spin_unlock(&iommu->lock); 787 - 788 - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 789 - intel_iommu_drain_pasid_prq(dev, pasid); 790 848 791 849 return 0; 792 850 }
-14
drivers/iommu/intel/pasid.h
··· 316 316 struct device *dev, u32 pasid); 317 317 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 318 318 u32 pasid, struct dmar_domain *domain); 319 - int intel_pasid_replace_first_level(struct intel_iommu *iommu, 320 - struct device *dev, phys_addr_t fsptptr, 321 - u32 pasid, u16 did, u16 old_did, int flags); 322 - int intel_pasid_replace_second_level(struct intel_iommu *iommu, 323 - struct dmar_domain *domain, 324 - struct device *dev, u16 old_did, 325 - u32 pasid); 326 - int intel_pasid_replace_pass_through(struct intel_iommu *iommu, 327 - struct device *dev, u16 old_did, 328 - u32 pasid); 329 - int intel_pasid_replace_nested(struct intel_iommu *iommu, 330 - struct device *dev, u32 pasid, 331 - u16 old_did, struct dmar_domain *domain); 332 - 333 319 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, 334 320 struct device *dev, u32 pasid, 335 321 bool fault_ignore);