Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
"The diffstat is dominated by changes to our TLB invalidation errata
handling and the introduction of a new GCS selftest to catch one of
the issues that is fixed here relating to PROT_NONE mappings.

- Fix cpufreq warning due to attempting a cross-call with interrupts
masked when reading local AMU counters

- Fix DEBUG_PREEMPT warning from the delay loop when it tries to
access per-cpu errata workaround state for the virtual counter

- Re-jig and optimise our TLB invalidation errata workarounds in
preparation for more hardware brokenness

- Fix GCS mappings to interact properly with PROT_NONE and to avoid
corrupting the pte on CPUs with FEAT_LPA2

- Fix ioremap_prot() to extract only the memory attributes from the
user pte and ignore all the other 'prot' bits"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
arm64: topology: Fix false warning in counters_read_on_cpu() for same-CPU reads
arm64: Fix sampling the "stable" virtual counter in preemptible section
arm64: tlb: Optimize ARM64_WORKAROUND_REPEAT_TLBI
arm64: tlb: Allow XZR argument to TLBI ops
kselftest: arm64: Check access to GCS after mprotect(PROT_NONE)
arm64: gcs: Honour mprotect(PROT_NONE) on shadow stack mappings
arm64: gcs: Do not set PTE_SHARED on GCS mappings if FEAT_LPA2 is enabled
arm64: io: Extract user memory type in ioremap_prot()
arm64: io: Rename ioremap_prot() to __ioremap_prot()

+179 -60
+20 -6
arch/arm64/include/asm/io.h
··· 264 264 typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, 265 265 pgprot_t *prot); 266 266 int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); 267 + void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot); 267 268 269 + static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size, 270 + pgprot_t user_prot) 271 + { 272 + pgprot_t prot; 273 + ptdesc_t user_prot_val = pgprot_val(user_prot); 274 + 275 + if (WARN_ON_ONCE(!(user_prot_val & PTE_USER))) 276 + return NULL; 277 + 278 + prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK, 279 + user_prot_val & PTE_ATTRINDX_MASK); 280 + return __ioremap_prot(phys, size, prot); 281 + } 268 282 #define ioremap_prot ioremap_prot 269 283 270 - #define _PAGE_IOREMAP PROT_DEVICE_nGnRE 271 - 284 + #define ioremap(addr, size) \ 285 + __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) 272 286 #define ioremap_wc(addr, size) \ 273 - ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) 287 + __ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) 274 288 #define ioremap_np(addr, size) \ 275 - ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) 289 + __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) 276 290 277 291 278 292 #define ioremap_encrypted(addr, size) \ 279 - ioremap_prot((addr), (size), PAGE_KERNEL) 293 + __ioremap_prot((addr), (size), PAGE_KERNEL) 280 294 281 295 /* 282 296 * io{read,write}{16,32,64}be() macros ··· 311 297 if (pfn_is_map_memory(__phys_to_pfn(addr))) 312 298 return (void __iomem *)__phys_to_virt(addr); 313 299 314 - return ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); 300 + return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); 315 301 } 316 302 317 303 /*
-3
arch/arm64/include/asm/pgtable-prot.h
··· 164 164 #define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) 165 165 #define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) 166 166 167 - #define PAGE_GCS __pgprot(_PAGE_GCS) 168 - #define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) 169 - 170 167 #define PIE_E0 ( \ 171 168 PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ 172 169 PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
+37 -26
arch/arm64/include/asm/tlbflush.h
··· 31 31 */ 32 32 #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ 33 33 "tlbi " #op "\n" \ 34 - ALTERNATIVE("nop\n nop", \ 35 - "dsb ish\n tlbi " #op, \ 36 - ARM64_WORKAROUND_REPEAT_TLBI, \ 37 - CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 38 34 : : ) 39 35 40 36 #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ 41 - "tlbi " #op ", %0\n" \ 42 - ALTERNATIVE("nop\n nop", \ 43 - "dsb ish\n tlbi " #op ", %0", \ 44 - ARM64_WORKAROUND_REPEAT_TLBI, \ 45 - CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 46 - : : "r" (arg)) 37 + "tlbi " #op ", %x0\n" \ 38 + : : "rZ" (arg)) 47 39 48 40 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) 49 41 ··· 173 181 (__pages >> (5 * (scale) + 1)) - 1; \ 174 182 }) 175 183 184 + #define __repeat_tlbi_sync(op, arg...) \ 185 + do { \ 186 + if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \ 187 + break; \ 188 + __tlbi(op, ##arg); \ 189 + dsb(ish); \ 190 + } while (0) 191 + 192 + /* 193 + * Complete broadcast TLB maintenance issued by the host which invalidates 194 + * stage 1 information in the host's own translation regime. 195 + */ 196 + static inline void __tlbi_sync_s1ish(void) 197 + { 198 + dsb(ish); 199 + __repeat_tlbi_sync(vale1is, 0); 200 + } 201 + 202 + /* 203 + * Complete broadcast TLB maintenance issued by hyp code which invalidates 204 + * stage 1 translation information in any translation regime. 205 + */ 206 + static inline void __tlbi_sync_s1ish_hyp(void) 207 + { 208 + dsb(ish); 209 + __repeat_tlbi_sync(vale2is, 0); 210 + } 211 + 176 212 /* 177 213 * TLB Invalidation 178 214 * ================ ··· 299 279 { 300 280 dsb(ishst); 301 281 __tlbi(vmalle1is); 302 - dsb(ish); 282 + __tlbi_sync_s1ish(); 303 283 isb(); 304 284 } 305 285 ··· 311 291 asid = __TLBI_VADDR(0, ASID(mm)); 312 292 __tlbi(aside1is, asid); 313 293 __tlbi_user(aside1is, asid); 314 - dsb(ish); 294 + __tlbi_sync_s1ish(); 315 295 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 316 296 } 317 297 ··· 365 345 unsigned long uaddr) 366 346 { 367 347 flush_tlb_page_nosync(vma, uaddr); 368 - dsb(ish); 348 + __tlbi_sync_s1ish(); 369 349 } 370 350 371 351 static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) 372 352 { 373 - /* 374 - * TLB flush deferral is not required on systems which are affected by 375 - * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation 376 - * will have two consecutive TLBI instructions with a dsb(ish) in between 377 - * defeating the purpose (i.e save overall 'dsb ish' cost). 378 - */ 379 - if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) 380 - return false; 381 - 382 353 return true; 383 354 } 384 355 ··· 385 374 */ 386 375 static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) 387 376 { 388 - dsb(ish); 377 + __tlbi_sync_s1ish(); 389 378 } 390 379 391 380 /* ··· 520 509 { 521 510 __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, 522 511 last_level, tlb_level); 523 - dsb(ish); 512 + __tlbi_sync_s1ish(); 524 513 } 525 514 526 515 static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, ··· 568 557 dsb(ishst); 569 558 __flush_tlb_range_op(vaale1is, start, pages, stride, 0, 570 559 TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); 571 - dsb(ish); 560 + __tlbi_sync_s1ish(); 572 561 isb(); 573 562 } 574 563 ··· 582 571 583 572 dsb(ishst); 584 573 __tlbi(vaae1is, addr); 585 - dsb(ish); 574 + __tlbi_sync_s1ish(); 586 575 isb(); 587 576 } 588 577
+1 -1
arch/arm64/kernel/acpi.c
··· 377 377 prot = __acpi_get_writethrough_mem_attribute(); 378 378 } 379 379 } 380 - return ioremap_prot(phys, size, prot); 380 + return __ioremap_prot(phys, size, prot); 381 381 } 382 382 383 383 /*
+1 -1
arch/arm64/kernel/sys_compat.c
··· 37 37 * We pick the reserved-ASID to minimise the impact. 38 38 */ 39 39 __tlbi(aside1is, __TLBI_VADDR(0, 0)); 40 - dsb(ish); 40 + __tlbi_sync_s1ish(); 41 41 } 42 42 43 43 ret = caches_clean_inval_user_pou(start, start + chunk);
+15 -6
arch/arm64/kernel/topology.c
··· 400 400 int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) 401 401 { 402 402 /* 403 - * Abort call on counterless CPU or when interrupts are 404 - * disabled - can lead to deadlock in smp sync call. 403 + * Abort call on counterless CPU. 405 404 */ 406 405 if (!cpu_has_amu_feat(cpu)) 407 406 return -EOPNOTSUPP; 408 407 409 - if (WARN_ON_ONCE(irqs_disabled())) 410 - return -EPERM; 411 - 412 - smp_call_function_single(cpu, func, val, 1); 408 + if (irqs_disabled()) { 409 + /* 410 + * When IRQs are disabled (tick path: sched_tick -> 411 + * topology_scale_freq_tick or cppc_scale_freq_tick), only local 412 + * CPU counter reads are allowed. Remote CPU counter read would 413 + * require smp_call_function_single() which is unsafe with IRQs 414 + * disabled. 415 + */ 416 + if (WARN_ON_ONCE(cpu != smp_processor_id())) 417 + return -EPERM; 418 + func(val); 419 + } else { 420 + smp_call_function_single(cpu, func, val, 1); 421 + } 413 422 414 423 return 0; 415 424 }
+1 -1
arch/arm64/kvm/hyp/nvhe/mm.c
··· 271 271 */ 272 272 dsb(ishst); 273 273 __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); 274 - dsb(ish); 274 + __tlbi_sync_s1ish_hyp(); 275 275 isb(); 276 276 } 277 277
+4 -4
arch/arm64/kvm/hyp/nvhe/tlb.c
··· 169 169 */ 170 170 dsb(ish); 171 171 __tlbi(vmalle1is); 172 - dsb(ish); 172 + __tlbi_sync_s1ish_hyp(); 173 173 isb(); 174 174 175 175 exit_vmid_context(&cxt); ··· 226 226 227 227 dsb(ish); 228 228 __tlbi(vmalle1is); 229 - dsb(ish); 229 + __tlbi_sync_s1ish_hyp(); 230 230 isb(); 231 231 232 232 exit_vmid_context(&cxt); ··· 240 240 enter_vmid_context(mmu, &cxt, false); 241 241 242 242 __tlbi(vmalls12e1is); 243 - dsb(ish); 243 + __tlbi_sync_s1ish_hyp(); 244 244 isb(); 245 245 246 246 exit_vmid_context(&cxt); ··· 266 266 /* Same remark as in enter_vmid_context() */ 267 267 dsb(ish); 268 268 __tlbi(alle1is); 269 - dsb(ish); 269 + __tlbi_sync_s1ish_hyp(); 270 270 }
+1 -1
arch/arm64/kvm/hyp/pgtable.c
··· 501 501 *unmapped += granule; 502 502 } 503 503 504 - dsb(ish); 504 + __tlbi_sync_s1ish_hyp(); 505 505 isb(); 506 506 mm_ops->put_page(ctx->ptep); 507 507
+5 -5
arch/arm64/kvm/hyp/vhe/tlb.c
··· 115 115 */ 116 116 dsb(ish); 117 117 __tlbi(vmalle1is); 118 - dsb(ish); 118 + __tlbi_sync_s1ish_hyp(); 119 119 isb(); 120 120 121 121 exit_vmid_context(&cxt); ··· 176 176 177 177 dsb(ish); 178 178 __tlbi(vmalle1is); 179 - dsb(ish); 179 + __tlbi_sync_s1ish_hyp(); 180 180 isb(); 181 181 182 182 exit_vmid_context(&cxt); ··· 192 192 enter_vmid_context(mmu, &cxt); 193 193 194 194 __tlbi(vmalls12e1is); 195 - dsb(ish); 195 + __tlbi_sync_s1ish_hyp(); 196 196 isb(); 197 197 198 198 exit_vmid_context(&cxt); ··· 217 217 { 218 218 dsb(ishst); 219 219 __tlbi(alle1is); 220 - dsb(ish); 220 + __tlbi_sync_s1ish_hyp(); 221 221 } 222 222 223 223 /* ··· 358 358 default: 359 359 ret = -EINVAL; 360 360 } 361 - dsb(ish); 361 + __tlbi_sync_s1ish_hyp(); 362 362 isb(); 363 363 364 364 if (mmu)
+5 -1
arch/arm64/lib/delay.c
··· 32 32 * Note that userspace cannot change the offset behind our back either, 33 33 * as the vcpu mutex is held as long as KVM_RUN is in progress. 34 34 */ 35 - #define __delay_cycles() __arch_counter_get_cntvct_stable() 35 + static cycles_t notrace __delay_cycles(void) 36 + { 37 + guard(preempt_notrace)(); 38 + return __arch_counter_get_cntvct_stable(); 39 + } 36 40 37 41 void __delay(unsigned long cycles) 38 42 {
+3 -3
arch/arm64/mm/ioremap.c
··· 14 14 return 0; 15 15 } 16 16 17 - void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, 18 - pgprot_t pgprot) 17 + void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size, 18 + pgprot_t pgprot) 19 19 { 20 20 unsigned long last_addr = phys_addr + size - 1; 21 21 ··· 39 39 40 40 return generic_ioremap_prot(phys_addr, size, pgprot); 41 41 } 42 - EXPORT_SYMBOL(ioremap_prot); 42 + EXPORT_SYMBOL(__ioremap_prot); 43 43 44 44 /* 45 45 * Must be called after early_fixmap_init
+10 -2
arch/arm64/mm/mmap.c
··· 34 34 [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC 35 35 }; 36 36 37 + static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO; 38 + 37 39 /* 38 40 * You really shouldn't be using read() or write() on /dev/mem. This might go 39 41 * away in the future. ··· 75 73 protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY; 76 74 } 77 75 78 - if (lpa2_is_enabled()) 76 + if (lpa2_is_enabled()) { 79 77 for (int i = 0; i < ARRAY_SIZE(protection_map); i++) 80 78 pgprot_val(protection_map[i]) &= ~PTE_SHARED; 79 + gcs_page_prot &= ~PTE_SHARED; 80 + } 81 81 82 82 return 0; 83 83 } ··· 91 87 92 88 /* Short circuit GCS to avoid bloating the table. */ 93 89 if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { 94 - prot = _PAGE_GCS_RO; 90 + /* Honour mprotect(PROT_NONE) on shadow stack mappings */ 91 + if (vm_flags & VM_ACCESS_FLAGS) 92 + prot = gcs_page_prot; 93 + else 94 + prot = pgprot_val(protection_map[VM_NONE]); 95 95 } else { 96 96 prot = pgprot_val(protection_map[vm_flags & 97 97 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
+76
tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2026 ARM Limited 4 + */ 5 + 6 + #include <errno.h> 7 + #include <signal.h> 8 + #include <unistd.h> 9 + 10 + #include <sys/mman.h> 11 + #include <sys/prctl.h> 12 + 13 + #include "test_signals_utils.h" 14 + #include "testcases.h" 15 + 16 + static uint64_t *gcs_page; 17 + static bool post_mprotect; 18 + 19 + #ifndef __NR_map_shadow_stack 20 + #define __NR_map_shadow_stack 453 21 + #endif 22 + 23 + static bool alloc_gcs(struct tdescr *td) 24 + { 25 + long page_size = sysconf(_SC_PAGE_SIZE); 26 + 27 + gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, 28 + page_size, 0); 29 + if (gcs_page == MAP_FAILED) { 30 + fprintf(stderr, "Failed to map %ld byte GCS: %d\n", 31 + page_size, errno); 32 + return false; 33 + } 34 + 35 + return true; 36 + } 37 + 38 + static int gcs_prot_none_fault_trigger(struct tdescr *td) 39 + { 40 + /* Verify that the page is readable (ie, not completely unmapped) */ 41 + fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); 42 + 43 + if (mprotect(gcs_page, sysconf(_SC_PAGE_SIZE), PROT_NONE) != 0) { 44 + fprintf(stderr, "mprotect(PROT_NONE) failed: %d\n", errno); 45 + return 0; 46 + } 47 + post_mprotect = true; 48 + 49 + /* This should trigger a fault if PROT_NONE is honoured for the GCS page */ 50 + fprintf(stderr, "Read value after mprotect(PROT_NONE) 0x%lx\n", gcs_page[0]); 51 + return 0; 52 + } 53 + 54 + static int gcs_prot_none_fault_signal(struct tdescr *td, siginfo_t *si, 55 + ucontext_t *uc) 56 + { 57 + ASSERT_GOOD_CONTEXT(uc); 58 + 59 + /* A fault before mprotect(PROT_NONE) is unexpected. */ 60 + if (!post_mprotect) 61 + return 0; 62 + 63 + return 1; 64 + } 65 + 66 + struct tdescr tde = { 67 + .name = "GCS PROT_NONE fault", 68 + .descr = "Read from GCS after mprotect(PROT_NONE) segfaults", 69 + .feats_required = FEAT_GCS, 70 + .timeout = 3, 71 + .sig_ok = SIGSEGV, 72 + .sanity_disabled = true, 73 + .init = alloc_gcs, 74 + .trigger = gcs_prot_none_fault_trigger, 75 + .run = gcs_prot_none_fault_signal, 76 + };