Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge git://git.infradead.org/intel-iommu

Pull intel iommu updates from David Woodhouse:
"This lays a little of the groundwork for upcoming Shared Virtual
Memory support — fixing some bogus #defines for capability bits and
adding the new ones, and starting to use the new wider page tables
where we can, in anticipation of actually filling in the new fields
therein.

It also allows graphics devices to be assigned to VM guests again.
This got broken in 3.17 by disallowing assignment of RMRR-afflicted
devices. Like USB, we do understand why there's an RMRR for graphics
devices — and unlike USB, it's actually sane. So we can make an
exception for graphics devices, just as we do USB controllers.

Finally, tone down the warning about the X2APIC_OPT_OUT bit, due to
persistent requests. X2APIC_OPT_OUT was added to the spec as a nasty
hack to allow broken BIOSes to forbid us from using X2APIC when they
do stupid and invasive things and would break if we did.

Someone noticed that since Windows doesn't have full IOMMU support for
DMA protection, setting the X2APIC_OPT_OUT bit made Windows avoid
initialising the IOMMU on the graphics unit altogether.

This means that it would be available for use in "driver mode", where
the IOMMU registers are made available through a BAR of the graphics
device and the graphics driver can do SVM all for itself.

So they started setting the X2APIC_OPT_OUT bit on *all* platforms with
SVM capabilities. And even the platforms which *might*, if the
planets had been aligned correctly, possibly have had SVM capability
but which in practice actually don't"

* git://git.infradead.org/intel-iommu:
iommu/vt-d: support extended root and context entries
iommu/vt-d: Add new extended capabilities from v2.3 VT-d specification
iommu/vt-d: Allow RMRR on graphics devices too
iommu/vt-d: Print x2apic opt out info instead of printing a warning
iommu/vt-d: kill bogus ecap_niotlb_iunits()

+82 -83
+66 -76
drivers/iommu/intel-iommu.c
··· 50 50 #define CONTEXT_SIZE VTD_PAGE_SIZE 51 51 52 52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 53 + #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 53 54 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 54 55 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 55 56 ··· 185 184 * 64-127: Reserved 186 185 */ 187 186 struct root_entry { 188 - u64 val; 189 - u64 rsvd1; 187 + u64 lo; 188 + u64 hi; 190 189 }; 191 190 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 192 - static inline bool root_present(struct root_entry *root) 193 - { 194 - return (root->val & 1); 195 - } 196 - static inline void set_root_present(struct root_entry *root) 197 - { 198 - root->val |= 1; 199 - } 200 - static inline void set_root_value(struct root_entry *root, unsigned long value) 201 - { 202 - root->val &= ~VTD_PAGE_MASK; 203 - root->val |= value & VTD_PAGE_MASK; 204 - } 205 191 206 - static inline struct context_entry * 207 - get_context_addr_from_root(struct root_entry *root) 208 - { 209 - return (struct context_entry *) 210 - (root_present(root)?phys_to_virt( 211 - root->val & VTD_PAGE_MASK) : 212 - NULL); 213 - } 214 192 215 193 /* 216 194 * low 64 bits: ··· 662 682 domain->iommu_superpage = domain_update_iommu_superpage(NULL); 663 683 } 664 684 685 + static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu, 686 + u8 bus, u8 devfn, int alloc) 687 + { 688 + struct root_entry *root = &iommu->root_entry[bus]; 689 + struct context_entry *context; 690 + u64 *entry; 691 + 692 + if (ecap_ecs(iommu->ecap)) { 693 + if (devfn >= 0x80) { 694 + devfn -= 0x80; 695 + entry = &root->hi; 696 + } 697 + devfn *= 2; 698 + } 699 + entry = &root->lo; 700 + if (*entry & 1) 701 + context = phys_to_virt(*entry & VTD_PAGE_MASK); 702 + else { 703 + unsigned long phy_addr; 704 + if (!alloc) 705 + return NULL; 706 + 707 + context = alloc_pgtable_page(iommu->node); 708 + if (!context) 709 + return NULL; 710 + 711 + __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 712 + phy_addr = virt_to_phys((void *)context); 713 + *entry = phy_addr | 1; 714 + __iommu_flush_cache(iommu, entry, sizeof(*entry)); 715 + } 716 + return &context[devfn]; 717 + } 718 + 665 719 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) 666 720 { 667 721 struct dmar_drhd_unit *drhd = NULL; ··· 755 741 clflush_cache_range(addr, size); 756 742 } 757 743 758 - /* Gets context entry for a given bus and devfn */ 759 - static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 760 - u8 bus, u8 devfn) 761 - { 762 - struct root_entry *root; 763 - struct context_entry *context; 764 - unsigned long phy_addr; 765 - unsigned long flags; 766 - 767 - spin_lock_irqsave(&iommu->lock, flags); 768 - root = &iommu->root_entry[bus]; 769 - context = get_context_addr_from_root(root); 770 - if (!context) { 771 - context = (struct context_entry *) 772 - alloc_pgtable_page(iommu->node); 773 - if (!context) { 774 - spin_unlock_irqrestore(&iommu->lock, flags); 775 - return NULL; 776 - } 777 - __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 778 - phy_addr = virt_to_phys((void *)context); 779 - set_root_value(root, phy_addr); 780 - set_root_present(root); 781 - __iommu_flush_cache(iommu, root, sizeof(*root)); 782 - } 783 - spin_unlock_irqrestore(&iommu->lock, flags); 784 - return &context[devfn]; 785 - } 786 - 787 744 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) 788 745 { 789 - struct root_entry *root; 790 746 struct context_entry *context; 791 - int ret; 747 + int ret = 0; 792 748 unsigned long flags; 793 749 794 750 spin_lock_irqsave(&iommu->lock, flags); 795 - root = &iommu->root_entry[bus]; 796 - context = get_context_addr_from_root(root); 797 - if (!context) { 798 - ret = 0; 799 - goto out; 800 - } 801 - ret = context_present(&context[devfn]); 802 - out: 751 + context = iommu_context_addr(iommu, bus, devfn, 0); 752 + if (context) 753 + ret = context_present(context); 803 754 spin_unlock_irqrestore(&iommu->lock, flags); 804 755 return ret; 805 756 } 806 757 807 758 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) 808 759 { 809 - struct root_entry *root; 810 760 struct context_entry *context; 811 761 unsigned long flags; 812 762 813 763 spin_lock_irqsave(&iommu->lock, flags); 814 - root = &iommu->root_entry[bus]; 815 - context = get_context_addr_from_root(root); 764 + context = iommu_context_addr(iommu, bus, devfn, 0); 816 765 if (context) { 817 - context_clear_entry(&context[devfn]); 818 - __iommu_flush_cache(iommu, &context[devfn], \ 819 - sizeof(*context)); 766 + context_clear_entry(context); 767 + __iommu_flush_cache(iommu, context, sizeof(*context)); 820 768 } 821 769 spin_unlock_irqrestore(&iommu->lock, flags); 822 770 } 823 771 824 772 static void free_context_table(struct intel_iommu *iommu) 825 773 { 826 - struct root_entry *root; 827 774 int i; 828 775 unsigned long flags; 829 776 struct context_entry *context; ··· 794 819 goto out; 795 820 } 796 821 for (i = 0; i < ROOT_ENTRY_NR; i++) { 797 - root = &iommu->root_entry[i]; 798 - context = get_context_addr_from_root(root); 822 + context = iommu_context_addr(iommu, i, 0, 0); 799 823 if (context) 800 824 free_pgtable_page(context); 825 + 826 + if (!ecap_ecs(iommu->ecap)) 827 + continue; 828 + 829 + context = iommu_context_addr(iommu, i, 0x80, 0); 830 + if (context) 831 + free_pgtable_page(context); 832 + 801 833 } 802 834 free_pgtable_page(iommu->root_entry); 803 835 iommu->root_entry = NULL; ··· 1128 1146 1129 1147 static void iommu_set_root_entry(struct intel_iommu *iommu) 1130 1148 { 1131 - void *addr; 1149 + u64 addr; 1132 1150 u32 sts; 1133 1151 unsigned long flag; 1134 1152 1135 - addr = iommu->root_entry; 1153 + addr = virt_to_phys(iommu->root_entry); 1154 + if (ecap_ecs(iommu->ecap)) 1155 + addr |= DMA_RTADDR_RTT; 1136 1156 1137 1157 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1138 - dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 1158 + dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 1139 1159 1140 1160 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 1141 1161 ··· 1784 1800 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && 1785 1801 translation != CONTEXT_TT_MULTI_LEVEL); 1786 1802 1787 - context = device_to_context_entry(iommu, bus, devfn); 1803 + spin_lock_irqsave(&iommu->lock, flags); 1804 + context = iommu_context_addr(iommu, bus, devfn, 1); 1805 + spin_unlock_irqrestore(&iommu->lock, flags); 1788 1806 if (!context) 1789 1807 return -ENOMEM; 1790 1808 spin_lock_irqsave(&iommu->lock, flags); ··· 2550 2564 * In both cases we assume that PCI USB devices with RMRRs have them largely 2551 2565 * for historical reasons and that the RMRR space is not actively used post 2552 2566 * boot. This exclusion may change if vendors begin to abuse it. 2567 + * 2568 + * The same exception is made for graphics devices, with the requirement that 2569 + * any use of the RMRR regions will be torn down before assigning the device 2570 + * to a guest. 2553 2571 */ 2554 2572 static bool device_is_rmrr_locked(struct device *dev) 2555 2573 { ··· 2563 2573 if (dev_is_pci(dev)) { 2564 2574 struct pci_dev *pdev = to_pci_dev(dev); 2565 2575 2566 - if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 2576 + if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 2567 2577 return false; 2568 2578 } 2569 2579
+1 -4
drivers/iommu/intel_irq_remapping.c
··· 637 637 if (x2apic_supported()) { 638 638 eim = !dmar_x2apic_optout(); 639 639 if (!eim) 640 - printk(KERN_WARNING 641 - "Your BIOS is broken and requested that x2apic be disabled.\n" 642 - "This will slightly decrease performance.\n" 643 - "Use 'intremap=no_x2apic_optout' to override BIOS request.\n"); 640 + pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n"); 644 641 } 645 642 646 643 for_each_iommu(iommu, drhd) {
+15 -3
include/linux/intel-iommu.h
··· 115 115 * Extended Capability Register 116 116 */ 117 117 118 - #define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) 118 + #define ecap_pss(e) ((e >> 35) & 0x1f) 119 + #define ecap_eafs(e) ((e >> 34) & 0x1) 120 + #define ecap_nwfs(e) ((e >> 33) & 0x1) 121 + #define ecap_srs(e) ((e >> 31) & 0x1) 122 + #define ecap_ers(e) ((e >> 30) & 0x1) 123 + #define ecap_prs(e) ((e >> 29) & 0x1) 124 + #define ecap_pasid(e) ((e >> 28) & 0x1) 125 + #define ecap_dis(e) ((e >> 27) & 0x1) 126 + #define ecap_nest(e) ((e >> 26) & 0x1) 127 + #define ecap_mts(e) ((e >> 25) & 0x1) 128 + #define ecap_ecs(e) ((e >> 24) & 0x1) 119 129 #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) 120 - #define ecap_max_iotlb_offset(e) \ 121 - (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) 130 + #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) 122 131 #define ecap_coherent(e) ((e) & 0x1) 123 132 #define ecap_qis(e) ((e) & 0x2) 124 133 #define ecap_pass_through(e) ((e >> 6) & 0x1) ··· 188 179 #define DMA_GSTS_IRTPS (((u32)1) << 24) 189 180 #define DMA_GSTS_IRES (((u32)1) << 25) 190 181 #define DMA_GSTS_CFIS (((u32)1) << 23) 182 + 183 + /* DMA_RTADDR_REG */ 184 + #define DMA_RTADDR_RTT (((u64)1) << 11) 191 185 192 186 /* CCMD_REG */ 193 187 #define DMA_CCMD_ICC (((u64)1) << 63)