Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires

VT-d second stage HW specifies both the maximum IOVA and the supported
table walk starting points. Weirdly there is HW that only supports a 4
level walk but has a maximum IOVA that only needs 3.

The current code miscalculates this and creates a wrongly sized page table
which ultimately fails the compatibility check for number of levels.

This is fixed by allowing the page table to be created with both a vasz
and top_level input. The vasz will set the aperture for the domain while
the top_level will set the page table geometry.

Add top_level to vtdss and correct the logic in VT-d to generate the right
top_level and vasz from mgaw and sagaw.

Fixes: d373449d8e97 ("iommu/vt-d: Use the generic iommu page table")
Reported-by: Calvin Owens <calvin@wbinvd.org>
Closes: https://lore.kernel.org/r/8f257d2651eb8a4358fcbd47b0145002e5f1d638.1764237717.git.calvin@wbinvd.org
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Calvin Owens <calvin@wbinvd.org>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
d856f9d2 416d9a22

+35 -20
+6 -13
drivers/iommu/generic_pt/fmt/vtdss.h
··· 248 248 const struct pt_iommu_vtdss_cfg *cfg) 249 249 { 250 250 struct pt_vtdss *table = &iommu_table->vtdss_pt; 251 - unsigned int vasz_lg2 = cfg->common.hw_max_vasz_lg2; 252 251 253 - if (vasz_lg2 > PT_MAX_VA_ADDRESS_LG2) 252 + if (cfg->top_level > 4 || cfg->top_level < 2) 254 253 return -EOPNOTSUPP; 255 - else if (vasz_lg2 > 48) 256 - pt_top_set_level(&table->common, 4); 257 - else if (vasz_lg2 > 39) 258 - pt_top_set_level(&table->common, 3); 259 - else if (vasz_lg2 > 30) 260 - pt_top_set_level(&table->common, 2); 261 - else 262 - return -EOPNOTSUPP; 254 + 255 + pt_top_set_level(&table->common, cfg->top_level); 263 256 return 0; 264 257 } 265 258 #define pt_iommu_fmt_init vtdss_pt_iommu_fmt_init ··· 275 282 276 283 #if defined(GENERIC_PT_KUNIT) 277 284 static const struct pt_iommu_vtdss_cfg vtdss_kunit_fmt_cfgs[] = { 278 - [0] = { .common.hw_max_vasz_lg2 = 39 }, 279 - [1] = { .common.hw_max_vasz_lg2 = 48 }, 280 - [2] = { .common.hw_max_vasz_lg2 = 57 }, 285 + [0] = { .common.hw_max_vasz_lg2 = 39, .top_level = 2}, 286 + [1] = { .common.hw_max_vasz_lg2 = 48, .top_level = 3}, 287 + [2] = { .common.hw_max_vasz_lg2 = 57, .top_level = 4}, 281 288 }; 282 289 #define kunit_fmt_cfgs vtdss_kunit_fmt_cfgs 283 290 enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE) };
+14
drivers/iommu/generic_pt/iommu_pt.h
··· 1128 1128 PT_FORCE_ENABLED_FEATURES)) 1129 1129 return -EOPNOTSUPP; 1130 1130 1131 + /* 1132 + * Check if the top level of the page table is too small to hold the 1133 + * specified maxvasz. 1134 + */ 1135 + if (!pt_feature(common, PT_FEAT_DYNAMIC_TOP) && 1136 + top_range.top_level != PT_MAX_TOP_LEVEL) { 1137 + struct pt_state pts = { .range = &top_range, 1138 + .level = top_range.top_level }; 1139 + 1140 + if (common->max_vasz_lg2 > 1141 + pt_num_items_lg2(&pts) + pt_table_item_lg2sz(&pts)) 1142 + return -EOPNOTSUPP; 1143 + } 1144 + 1131 1145 if (common->max_oasz_lg2 == 0) 1132 1146 common->max_oasz_lg2 = pt_max_oa_lg2(common); 1133 1147 else
+13 -7
drivers/iommu/intel/iommu.c
··· 2858 2858 return &dmar_domain->domain; 2859 2859 } 2860 2860 2861 - static int compute_vasz_lg2_ss(struct intel_iommu *iommu) 2861 + static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu, 2862 + unsigned int *top_level) 2862 2863 { 2863 2864 unsigned int sagaw = cap_sagaw(iommu->cap); 2864 2865 unsigned int mgaw = cap_mgaw(iommu->cap); 2865 2866 2866 2867 /* 2867 2868 * Find the largest table size that both the mgaw and sagaw support. 2868 - * This sets both the number of table levels and the valid range of 2869 - * IOVA. 2869 + * This sets the valid range of IOVA and the top starting level. 2870 + * Some HW may only support a 4 or 5 level walk but must limit IOVA to 2871 + * 3 levels. 2870 2872 */ 2871 - if (mgaw >= 48 && (sagaw & BIT(3))) 2873 + if (mgaw > 48 && sagaw >= BIT(3)) { 2874 + *top_level = 4; 2872 2875 return min(57, mgaw); 2873 - else if (mgaw >= 39 && (sagaw & BIT(2))) 2876 + } else if (mgaw > 39 && sagaw >= BIT(2)) { 2877 + *top_level = 3 + ffs(sagaw >> 3); 2874 2878 return min(48, mgaw); 2875 - else if (mgaw >= 30 && (sagaw & BIT(1))) 2879 + } else if (mgaw > 30 && sagaw >= BIT(1)) { 2880 + *top_level = 2 + ffs(sagaw >> 2); 2876 2881 return min(39, mgaw); 2882 + } 2877 2883 return 0; 2878 2884 } 2879 2885 ··· 2916 2910 if (IS_ERR(dmar_domain)) 2917 2911 return ERR_CAST(dmar_domain); 2918 2912 2919 - cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu); 2913 + cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level); 2920 2914 cfg.common.hw_max_oasz_lg2 = 52; 2921 2915 cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE); 2922 2916
+2
include/linux/generic_pt/iommu.h
··· 264 264 265 265 struct pt_iommu_vtdss_cfg { 266 266 struct pt_iommu_cfg common; 267 + /* 4 is a 57 bit 5 level table */ 268 + unsigned int top_level; 267 269 }; 268 270 269 271 struct pt_iommu_vtdss_hw_info {