Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommupt: Add the RISC-V page table format

The RISC-V format is a fairly simple 5 level page table not unlike the x86
one. It has optional support for a single contiguous page size of 64k (16
x 4k).

The specification describes a 32-bit format, the general code can support
it via a #define but the iommu side implementation has been left off until
a user comes.

Tested-by: Vincent Chen <vincent.chen@sifive.com>
Acked-by: Paul Walmsley <pjw@kernel.org> # arch/riscv
Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Tested-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
e71e0012 f338e773

+394
+1
drivers/iommu/generic_pt/.kunitconfig
··· 5 5 CONFIG_IOMMU_PT=y 6 6 CONFIG_IOMMU_PT_AMDV1=y 7 7 CONFIG_IOMMU_PT_VTDSS=y 8 + CONFIG_IOMMU_PT_RISCV64=y 8 9 CONFIG_IOMMU_PT_X86_64=y 9 10 CONFIG_IOMMU_PT_KUNIT_TEST=y 10 11
+11
drivers/iommu/generic_pt/Kconfig
··· 52 52 53 53 Selected automatically by an IOMMU driver that uses this format. 54 54 55 + config IOMMU_PT_RISCV64 56 + tristate "IOMMU page table for RISC-V 64 bit Sv57/Sv48/Sv39" 57 + depends on !GENERIC_ATOMIC64 # for cmpxchg64 58 + help 59 + iommu_domain implementation for RISC-V 64 bit 3/4/5 level page table. 60 + It supports 4K/2M/1G/512G/256T page sizes and can decode a sign 61 + extended portion of the 64 bit IOVA space. 62 + 63 + Selected automatically by an IOMMU driver that uses this format. 64 + 55 65 config IOMMU_PT_X86_64 56 66 tristate "IOMMU page table for x86 64-bit, 4/5 levels" 57 67 depends on !GENERIC_ATOMIC64 # for cmpxchg64 ··· 76 66 tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS 77 67 depends on KUNIT 78 68 depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV1 69 + depends on IOMMU_PT_RISCV64 || !IOMMU_PT_RISCV64 79 70 depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_64 80 71 depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS 81 72 default KUNIT_ALL_TESTS
+2
drivers/iommu/generic_pt/fmt/Makefile
··· 5 5 6 6 iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss 7 7 8 + iommu_pt_fmt-$(CONFIG_IOMMU_PT_RISCV64) += riscv64 9 + 8 10 iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86_64) += x86_64 9 11 10 12 IOMMU_PT_KUNIT_TEST :=
+29
drivers/iommu/generic_pt/fmt/defs_riscv.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES 4 + * 5 + */ 6 + #ifndef __GENERIC_PT_FMT_DEFS_RISCV_H 7 + #define __GENERIC_PT_FMT_DEFS_RISCV_H 8 + 9 + #include <linux/generic_pt/common.h> 10 + #include <linux/types.h> 11 + 12 + #ifdef PT_RISCV_32BIT 13 + typedef u32 pt_riscv_entry_t; 14 + #define riscvpt_write_attrs riscv32pt_write_attrs 15 + #else 16 + typedef u64 pt_riscv_entry_t; 17 + #define riscvpt_write_attrs riscv64pt_write_attrs 18 + #endif 19 + 20 + typedef pt_riscv_entry_t pt_vaddr_t; 21 + typedef u64 pt_oaddr_t; 22 + 23 + struct riscvpt_write_attrs { 24 + pt_riscv_entry_t descriptor_bits; 25 + gfp_t gfp; 26 + }; 27 + #define pt_write_attrs riscvpt_write_attrs 28 + 29 + #endif
+11
drivers/iommu/generic_pt/fmt/iommu_riscv64.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES 4 + */ 5 + #define PT_FMT riscv 6 + #define PT_FMT_VARIANT 64 7 + #define PT_SUPPORTED_FEATURES \ 8 + (BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \ 9 + BIT(PT_FEAT_RISCV_SVNAPOT_64K)) 10 + 11 + #include "iommu_template.h"
+313
drivers/iommu/generic_pt/fmt/riscv.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES 4 + * 5 + * RISC-V page table 6 + * 7 + * This is described in Sections: 8 + * 12.3. Sv32: Page-Based 32-bit Virtual-Memory Systems 9 + * 12.4. Sv39: Page-Based 39-bit Virtual-Memory System 10 + * 12.5. Sv48: Page-Based 48-bit Virtual-Memory System 11 + * 12.6. Sv57: Page-Based 57-bit Virtual-Memory System 12 + * of the "The RISC-V Instruction Set Manual: Volume II" 13 + * 14 + * This includes the contiguous page extension from: 15 + * Chapter 13. "Svnapot" Extension for NAPOT Translation Contiguity, 16 + * Version 1.0 17 + * 18 + * The table format is sign extended and supports leafs in every level. The spec 19 + * doesn't talk a lot about levels, but level here is the same as i=LEVELS-1 in 20 + * the spec. 21 + */ 22 + #ifndef __GENERIC_PT_FMT_RISCV_H 23 + #define __GENERIC_PT_FMT_RISCV_H 24 + 25 + #include "defs_riscv.h" 26 + #include "../pt_defs.h" 27 + 28 + #include <linux/bitfield.h> 29 + #include <linux/container_of.h> 30 + #include <linux/log2.h> 31 + #include <linux/sizes.h> 32 + 33 + enum { 34 + PT_ITEM_WORD_SIZE = sizeof(pt_riscv_entry_t), 35 + #ifdef PT_RISCV_32BIT 36 + PT_MAX_VA_ADDRESS_LG2 = 32, 37 + PT_MAX_OUTPUT_ADDRESS_LG2 = 34, 38 + PT_MAX_TOP_LEVEL = 1, 39 + #else 40 + PT_MAX_VA_ADDRESS_LG2 = 57, 41 + PT_MAX_OUTPUT_ADDRESS_LG2 = 56, 42 + PT_MAX_TOP_LEVEL = 4, 43 + #endif 44 + PT_GRANULE_LG2SZ = 12, 45 + PT_TABLEMEM_LG2SZ = 12, 46 + 47 + /* fsc.PPN is 44 bits wide, all PPNs are 4k aligned */ 48 + PT_TOP_PHYS_MASK = GENMASK_ULL(55, 12), 49 + }; 50 + 51 + /* PTE bits */ 52 + enum { 53 + RISCVPT_V = BIT(0), 54 + RISCVPT_R = BIT(1), 55 + RISCVPT_W = BIT(2), 56 + RISCVPT_X = BIT(3), 57 + RISCVPT_U = BIT(4), 58 + RISCVPT_G = BIT(5), 59 + RISCVPT_A = BIT(6), 60 + RISCVPT_D = BIT(7), 61 + RISCVPT_RSW = GENMASK(9, 8), 62 + RISCVPT_PPN32 = GENMASK(31, 10), 63 + 64 + RISCVPT_PPN64 = GENMASK_ULL(53, 10), 65 + RISCVPT_PPN64_64K = GENMASK_ULL(53, 14), 66 + RISCVPT_PBMT = GENMASK_ULL(62, 61), 67 + RISCVPT_N = BIT_ULL(63), 68 + 69 + /* Svnapot encodings for ppn[0] */ 70 + RISCVPT_PPN64_64K_SZ = BIT(13), 71 + }; 72 + 73 + #ifdef PT_RISCV_32BIT 74 + #define RISCVPT_PPN RISCVPT_PPN32 75 + #define pt_riscv pt_riscv_32 76 + #else 77 + #define RISCVPT_PPN RISCVPT_PPN64 78 + #define pt_riscv pt_riscv_64 79 + #endif 80 + 81 + #define common_to_riscvpt(common_ptr) \ 82 + container_of_const(common_ptr, struct pt_riscv, common) 83 + #define to_riscvpt(pts) common_to_riscvpt((pts)->range->common) 84 + 85 + static inline pt_oaddr_t riscvpt_table_pa(const struct pt_state *pts) 86 + { 87 + return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ); 88 + } 89 + #define pt_table_pa riscvpt_table_pa 90 + 91 + static inline pt_oaddr_t riscvpt_entry_oa(const struct pt_state *pts) 92 + { 93 + if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) && 94 + pts->entry & RISCVPT_N) { 95 + PT_WARN_ON(pts->level != 0); 96 + return oalog2_mul(FIELD_GET(RISCVPT_PPN64_64K, pts->entry), 97 + ilog2(SZ_64K)); 98 + } 99 + return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ); 100 + } 101 + #define pt_entry_oa riscvpt_entry_oa 102 + 103 + static inline bool riscvpt_can_have_leaf(const struct pt_state *pts) 104 + { 105 + return true; 106 + } 107 + #define pt_can_have_leaf riscvpt_can_have_leaf 108 + 109 + /* Body in pt_fmt_defaults.h */ 110 + static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts); 111 + 112 + static inline unsigned int 113 + riscvpt_entry_num_contig_lg2(const struct pt_state *pts) 114 + { 115 + if (PT_SUPPORTED_FEATURE(PT_FEAT_RISCV_SVNAPOT_64K) && 116 + pts->entry & RISCVPT_N) { 117 + PT_WARN_ON(!pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K)); 118 + PT_WARN_ON(pts->level); 119 + return ilog2(16); 120 + } 121 + return ilog2(1); 122 + } 123 + #define pt_entry_num_contig_lg2 riscvpt_entry_num_contig_lg2 124 + 125 + static inline unsigned int riscvpt_num_items_lg2(const struct pt_state *pts) 126 + { 127 + return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64)); 128 + } 129 + #define pt_num_items_lg2 riscvpt_num_items_lg2 130 + 131 + static inline unsigned short 132 + riscvpt_contig_count_lg2(const struct pt_state *pts) 133 + { 134 + if (pts->level == 0 && pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K)) 135 + return ilog2(16); 136 + return ilog2(1); 137 + } 138 + #define pt_contig_count_lg2 riscvpt_contig_count_lg2 139 + 140 + static inline enum pt_entry_type riscvpt_load_entry_raw(struct pt_state *pts) 141 + { 142 + const pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t); 143 + pt_riscv_entry_t entry; 144 + 145 + pts->entry = entry = READ_ONCE(tablep[pts->index]); 146 + if (!(entry & RISCVPT_V)) 147 + return PT_ENTRY_EMPTY; 148 + if (pts->level == 0 || 149 + ((entry & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) != 0)) 150 + return PT_ENTRY_OA; 151 + return PT_ENTRY_TABLE; 152 + } 153 + #define pt_load_entry_raw riscvpt_load_entry_raw 154 + 155 + static inline void 156 + riscvpt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, 157 + unsigned int oasz_lg2, 158 + const struct pt_write_attrs *attrs) 159 + { 160 + pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t); 161 + pt_riscv_entry_t entry; 162 + 163 + if (!pt_check_install_leaf_args(pts, oa, oasz_lg2)) 164 + return; 165 + 166 + entry = RISCVPT_V | 167 + FIELD_PREP(RISCVPT_PPN, log2_div(oa, PT_GRANULE_LG2SZ)) | 168 + attrs->descriptor_bits; 169 + 170 + if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) && pts->level == 0 && 171 + oasz_lg2 != PT_GRANULE_LG2SZ) { 172 + u64 *end; 173 + 174 + entry |= RISCVPT_N | RISCVPT_PPN64_64K_SZ; 175 + tablep += pts->index; 176 + end = tablep + log2_div(SZ_64K, PT_GRANULE_LG2SZ); 177 + for (; tablep != end; tablep++) 178 + WRITE_ONCE(*tablep, entry); 179 + } else { 180 + /* FIXME does riscv need this to be cmpxchg? */ 181 + WRITE_ONCE(tablep[pts->index], entry); 182 + } 183 + pts->entry = entry; 184 + } 185 + #define pt_install_leaf_entry riscvpt_install_leaf_entry 186 + 187 + static inline bool riscvpt_install_table(struct pt_state *pts, 188 + pt_oaddr_t table_pa, 189 + const struct pt_write_attrs *attrs) 190 + { 191 + pt_riscv_entry_t entry; 192 + 193 + entry = RISCVPT_V | 194 + FIELD_PREP(RISCVPT_PPN, log2_div(table_pa, PT_GRANULE_LG2SZ)); 195 + return pt_table_install64(pts, entry); 196 + } 197 + #define pt_install_table riscvpt_install_table 198 + 199 + static inline void riscvpt_attr_from_entry(const struct pt_state *pts, 200 + struct pt_write_attrs *attrs) 201 + { 202 + attrs->descriptor_bits = 203 + pts->entry & (RISCVPT_R | RISCVPT_W | RISCVPT_X | RISCVPT_U | 204 + RISCVPT_G | RISCVPT_A | RISCVPT_D); 205 + } 206 + #define pt_attr_from_entry riscvpt_attr_from_entry 207 + 208 + /* --- iommu */ 209 + #include <linux/generic_pt/iommu.h> 210 + #include <linux/iommu.h> 211 + 212 + #define pt_iommu_table pt_iommu_riscv_64 213 + 214 + /* The common struct is in the per-format common struct */ 215 + static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table) 216 + { 217 + return &container_of(iommu_table, struct pt_iommu_table, iommu) 218 + ->riscv_64pt.common; 219 + } 220 + 221 + static inline struct pt_iommu *iommu_from_common(struct pt_common *common) 222 + { 223 + return &container_of(common, struct pt_iommu_table, riscv_64pt.common) 224 + ->iommu; 225 + } 226 + 227 + static inline int riscvpt_iommu_set_prot(struct pt_common *common, 228 + struct pt_write_attrs *attrs, 229 + unsigned int iommu_prot) 230 + { 231 + u64 pte; 232 + 233 + pte = RISCVPT_A | RISCVPT_U; 234 + if (iommu_prot & IOMMU_WRITE) 235 + pte |= RISCVPT_W | RISCVPT_R | RISCVPT_D; 236 + if (iommu_prot & IOMMU_READ) 237 + pte |= RISCVPT_R; 238 + if (!(iommu_prot & IOMMU_NOEXEC)) 239 + pte |= RISCVPT_X; 240 + 241 + /* Caller must specify a supported combination of flags */ 242 + if (unlikely((pte & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) == 0)) 243 + return -EOPNOTSUPP; 244 + 245 + attrs->descriptor_bits = pte; 246 + return 0; 247 + } 248 + #define pt_iommu_set_prot riscvpt_iommu_set_prot 249 + 250 + static inline int 251 + riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table, 252 + const struct pt_iommu_riscv_64_cfg *cfg) 253 + { 254 + struct pt_riscv *table = &iommu_table->riscv_64pt; 255 + 256 + switch (cfg->common.hw_max_vasz_lg2) { 257 + case 39: 258 + pt_top_set_level(&table->common, 2); 259 + break; 260 + case 48: 261 + pt_top_set_level(&table->common, 3); 262 + break; 263 + case 57: 264 + pt_top_set_level(&table->common, 4); 265 + break; 266 + default: 267 + return -EINVAL; 268 + } 269 + table->common.max_oasz_lg2 = 270 + min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2); 271 + return 0; 272 + } 273 + #define pt_iommu_fmt_init riscvpt_iommu_fmt_init 274 + 275 + static inline void 276 + riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table, 277 + const struct pt_range *top_range, 278 + struct pt_iommu_riscv_64_hw_info *info) 279 + { 280 + phys_addr_t top_phys = virt_to_phys(top_range->top_table); 281 + 282 + info->ppn = oalog2_div(top_phys, PT_GRANULE_LG2SZ); 283 + PT_WARN_ON(top_phys & ~PT_TOP_PHYS_MASK); 284 + 285 + /* 286 + * See Table 3. Encodings of iosatp.MODE field" for DC.tx.SXL = 0: 287 + * 8 = Sv39 = top level 2 288 + * 9 = Sv38 = top level 3 289 + * 10 = Sv57 = top level 4 290 + */ 291 + info->fsc_iosatp_mode = top_range->top_level + 6; 292 + } 293 + #define pt_iommu_fmt_hw_info riscvpt_iommu_fmt_hw_info 294 + 295 + #if defined(GENERIC_PT_KUNIT) 296 + static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = { 297 + [0] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K), 298 + .common.hw_max_oasz_lg2 = 56, 299 + .common.hw_max_vasz_lg2 = 39 }, 300 + [1] = { .common.features = 0, 301 + .common.hw_max_oasz_lg2 = 56, 302 + .common.hw_max_vasz_lg2 = 48 }, 303 + [2] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K), 304 + .common.hw_max_oasz_lg2 = 56, 305 + .common.hw_max_vasz_lg2 = 57 }, 306 + }; 307 + #define kunit_fmt_cfgs riscv_64_kunit_fmt_cfgs 308 + enum { 309 + KUNIT_FMT_FEATURES = BIT(PT_FEAT_RISCV_SVNAPOT_64K), 310 + }; 311 + #endif 312 + 313 + #endif
+16
include/linux/generic_pt/common.h
··· 175 175 PT_FEAT_VTDSS_FORCE_WRITEABLE, 176 176 }; 177 177 178 + struct pt_riscv_32 { 179 + struct pt_common common; 180 + }; 181 + 182 + struct pt_riscv_64 { 183 + struct pt_common common; 184 + }; 185 + 186 + enum { 187 + /* 188 + * Support the 64k contiguous page size following the Svnapot extension. 189 + */ 190 + PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START, 191 + 192 + }; 193 + 178 194 struct pt_x86_64 { 179 195 struct pt_common common; 180 196 };
+11
include/linux/generic_pt/iommu.h
··· 275 275 276 276 IOMMU_FORMAT(vtdss, vtdss_pt); 277 277 278 + struct pt_iommu_riscv_64_cfg { 279 + struct pt_iommu_cfg common; 280 + }; 281 + 282 + struct pt_iommu_riscv_64_hw_info { 283 + u64 ppn; 284 + u8 fsc_iosatp_mode; 285 + }; 286 + 287 + IOMMU_FORMAT(riscv_64, riscv_64pt); 288 + 278 289 struct pt_iommu_x86_64_cfg { 279 290 struct pt_iommu_cfg common; 280 291 /* 4 is a 57 bit 5 level table */