Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
"There's more here than I would ideally like at this stage, but there's
been a steady trickle of fixes and some of them took a few rounds of
review.

The bulk of the changes are fixing some fallout from the recent BBM
level two support which allows the linear map to be split from block
to page mappings at runtime, but inadvertently led to sleeping in
atomic context on some paths where the linear map was already mapped
with page granularity. The fix is simply to avoid splitting in those
cases but the implementation of that is a little involved.

The other interesting fix is addressing a catastophic performance
issue with our per-cpu atomics discovered by Paul in the SRCU locking
code but which took some interactions with the hardware folks to
resolve.

Summary:

- Avoid sleeping in atomic context when changing linear map
permissions for DEBUG_PAGEALLOC or KFENCE

- Rework printing of Spectre mitigation status to avoid hardlockup
when enabling per-task mitigations on the context-switch path

- Reject kernel modules when instruction patching fails either due to
the DWARF-based SCS patching or because of an alternatives callback
residing outside of the core kernel text

- Propagate error when updating kernel memory permissions in kprobes

- Drop pointless, incorrect message when enabling the ACPI SPCR
console

- Use value-returning LSE instructions for per-cpu atomics to reduce
latency in SRCU locking routines"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
arm64: Reject modules with internal alternative callbacks
arm64: Fail module loading if dynamic SCS patching fails
arm64: proton-pack: Fix hard lockup due to print in scheduler context
arm64: proton-pack: Drop print when !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
arm64: mm: Tidy up force_pte_mapping()
arm64: mm: Optimize range_split_to_ptes()
arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context
arm64: kprobes: check the return value of set_memory_rox()
arm64: acpi: Drop message logging SPCR default console
Revert "ACPI: Suppress misleading SPCR console message when SPCR table is absent"
arm64: Use load LSE atomics for the non-return per-CPU atomic operations

+165 -82
+5 -2
arch/arm64/include/asm/alternative.h
··· 26 26 bool alternative_is_applied(u16 cpucap); 27 27 28 28 #ifdef CONFIG_MODULES 29 - void apply_alternatives_module(void *start, size_t length); 29 + int apply_alternatives_module(void *start, size_t length); 30 30 #else 31 - static inline void apply_alternatives_module(void *start, size_t length) { } 31 + static inline int apply_alternatives_module(void *start, size_t length) 32 + { 33 + return 0; 34 + } 32 35 #endif 33 36 34 37 void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+1 -2
arch/arm64/include/asm/kfence.h
··· 10 10 11 11 #include <asm/set_memory.h> 12 12 13 - static inline bool arch_kfence_init_pool(void) { return true; } 14 - 15 13 static inline bool kfence_protect_page(unsigned long addr, bool protect) 16 14 { 17 15 set_memory_valid(addr, 1, !protect); ··· 23 25 { 24 26 return !kfence_early_init; 25 27 } 28 + bool arch_kfence_init_pool(void); 26 29 #else /* CONFIG_KFENCE */ 27 30 static inline bool arm64_kfence_can_set_direct_map(void) { return false; } 28 31 #endif /* CONFIG_KFENCE */
+11 -4
arch/arm64/include/asm/percpu.h
··· 77 77 " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ 78 78 " cbnz %w[loop], 1b", \ 79 79 /* LSE atomics */ \ 80 - #op_lse "\t%" #w "[val], %[ptr]\n" \ 80 + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ 81 81 __nops(3)) \ 82 82 : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ 83 83 [ptr] "+Q"(*(u##sz *)ptr) \ ··· 124 124 PERCPU_RW_OPS(16) 125 125 PERCPU_RW_OPS(32) 126 126 PERCPU_RW_OPS(64) 127 - PERCPU_OP(add, add, stadd) 128 - PERCPU_OP(andnot, bic, stclr) 129 - PERCPU_OP(or, orr, stset) 127 + 128 + /* 129 + * Use value-returning atomics for CPU-local ops as they are more likely 130 + * to execute "near" to the CPU (e.g. in L1$). 131 + * 132 + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop 133 + */ 134 + PERCPU_OP(add, add, ldadd) 135 + PERCPU_OP(andnot, bic, ldclr) 136 + PERCPU_OP(or, orr, ldset) 130 137 PERCPU_RET_OP(add, add, ldadd) 131 138 132 139 #undef PERCPU_RW_OPS
+1 -1
arch/arm64/include/asm/scs.h
··· 53 53 EDYNSCS_INVALID_CFA_OPCODE = 4, 54 54 }; 55 55 56 - int __pi_scs_patch(const u8 eh_frame[], int size); 56 + int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); 57 57 58 58 #endif /* __ASSEMBLY __ */ 59 59
+1
arch/arm64/include/asm/spectre.h
··· 117 117 __le32 *origptr, __le32 *updptr, int nr_inst); 118 118 void spectre_bhb_patch_clearbhb(struct alt_instr *alt, 119 119 __le32 *origptr, __le32 *updptr, int nr_inst); 120 + void spectre_print_disabled_mitigations(void); 120 121 121 122 #endif /* __ASSEMBLY__ */ 122 123 #endif /* __ASM_SPECTRE_H */
+1 -7
arch/arm64/kernel/acpi.c
··· 197 197 */ 198 198 void __init acpi_boot_table_init(void) 199 199 { 200 - int ret; 201 - 202 200 /* 203 201 * Enable ACPI instead of device tree unless 204 202 * - ACPI has been disabled explicitly (acpi=off), or ··· 250 252 * behaviour, use acpi=nospcr to disable console in ACPI SPCR 251 253 * table as default serial console. 252 254 */ 253 - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, 255 + acpi_parse_spcr(earlycon_acpi_spcr_enable, 254 256 !param_acpi_nospcr); 255 - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) 256 - pr_info("Use ACPI SPCR as default console: No\n"); 257 - else 258 - pr_info("Use ACPI SPCR as default console: Yes\n"); 259 257 260 258 if (IS_ENABLED(CONFIG_ACPI_BGRT)) 261 259 acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
+12 -7
arch/arm64/kernel/alternative.c
··· 139 139 } while (cur += d_size, cur < end); 140 140 } 141 141 142 - static void __apply_alternatives(const struct alt_region *region, 143 - bool is_module, 144 - unsigned long *cpucap_mask) 142 + static int __apply_alternatives(const struct alt_region *region, 143 + bool is_module, 144 + unsigned long *cpucap_mask) 145 145 { 146 146 struct alt_instr *alt; 147 147 __le32 *origptr, *updptr; ··· 166 166 updptr = is_module ? origptr : lm_alias(origptr); 167 167 nr_inst = alt->orig_len / AARCH64_INSN_SIZE; 168 168 169 - if (ALT_HAS_CB(alt)) 169 + if (ALT_HAS_CB(alt)) { 170 170 alt_cb = ALT_REPL_PTR(alt); 171 - else 171 + if (is_module && !core_kernel_text((unsigned long)alt_cb)) 172 + return -ENOEXEC; 173 + } else { 172 174 alt_cb = patch_alternative; 175 + } 173 176 174 177 alt_cb(alt, origptr, updptr, nr_inst); 175 178 ··· 196 193 bitmap_and(applied_alternatives, applied_alternatives, 197 194 system_cpucaps, ARM64_NCAPS); 198 195 } 196 + 197 + return 0; 199 198 } 200 199 201 200 static void __init apply_alternatives_vdso(void) ··· 282 277 } 283 278 284 279 #ifdef CONFIG_MODULES 285 - void apply_alternatives_module(void *start, size_t length) 280 + int apply_alternatives_module(void *start, size_t length) 286 281 { 287 282 struct alt_region region = { 288 283 .begin = start, ··· 292 287 293 288 bitmap_fill(all_capabilities, ARM64_NCAPS); 294 289 295 - __apply_alternatives(&region, true, &all_capabilities[0]); 290 + return __apply_alternatives(&region, true, &all_capabilities[0]); 296 291 } 297 292 #endif 298 293
+6
arch/arm64/kernel/cpufeature.c
··· 95 95 #include <asm/vectors.h> 96 96 #include <asm/virt.h> 97 97 98 + #include <asm/spectre.h> 98 99 /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ 99 100 static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; 100 101 ··· 3876 3875 */ 3877 3876 if (system_uses_ttbr0_pan()) 3878 3877 pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); 3878 + 3879 + /* 3880 + * Report Spectre mitigations status. 3881 + */ 3882 + spectre_print_disabled_mitigations(); 3879 3883 } 3880 3884 3881 3885 void __init setup_system_features(void)
+17 -4
arch/arm64/kernel/module.c
··· 489 489 int ret; 490 490 491 491 s = find_section(hdr, sechdrs, ".altinstructions"); 492 - if (s) 493 - apply_alternatives_module((void *)s->sh_addr, s->sh_size); 492 + if (s) { 493 + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); 494 + if (ret < 0) { 495 + pr_err("module %s: error occurred when applying alternatives\n", me->name); 496 + return ret; 497 + } 498 + } 494 499 495 500 if (scs_is_dynamic()) { 496 501 s = find_section(hdr, sechdrs, ".init.eh_frame"); 497 502 if (s) { 498 - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); 499 - if (ret) 503 + /* 504 + * Because we can reject modules that are malformed 505 + * so SCS patching fails, skip dry run and try to patch 506 + * it in place. If patching fails, the module would not 507 + * be loaded anyway. 508 + */ 509 + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); 510 + if (ret) { 500 511 pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", 501 512 me->name, ret); 513 + return -ENOEXEC; 514 + } 502 515 } 503 516 } 504 517
+1 -1
arch/arm64/kernel/pi/map_kernel.c
··· 104 104 105 105 if (enable_scs) { 106 106 scs_patch(__eh_frame_start + va_offset, 107 - __eh_frame_end - __eh_frame_start); 107 + __eh_frame_end - __eh_frame_start, false); 108 108 asm("ic ialluis"); 109 109 110 110 dynamic_scs_is_enabled = true;
+6 -4
arch/arm64/kernel/pi/patch-scs.c
··· 225 225 return 0; 226 226 } 227 227 228 - int scs_patch(const u8 eh_frame[], int size) 228 + int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) 229 229 { 230 230 int code_alignment_factor = 1; 231 231 bool fde_use_sdata8 = false; ··· 277 277 } 278 278 } else { 279 279 ret = scs_handle_fde_frame(frame, code_alignment_factor, 280 - fde_use_sdata8, true); 280 + fde_use_sdata8, !skip_dry_run); 281 281 if (ret) 282 282 return ret; 283 - scs_handle_fde_frame(frame, code_alignment_factor, 284 - fde_use_sdata8, false); 283 + 284 + if (!skip_dry_run) 285 + scs_handle_fde_frame(frame, code_alignment_factor, 286 + fde_use_sdata8, false); 285 287 } 286 288 287 289 p += sizeof(frame->size) + frame->size;
+1 -1
arch/arm64/kernel/pi/pi.h
··· 27 27 void init_feature_override(u64 boot_status, const void *fdt, int chosen); 28 28 u64 kaslr_early_init(void *fdt, int chosen); 29 29 void relocate_kernel(u64 offset); 30 - int scs_patch(const u8 eh_frame[], int size); 30 + int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); 31 31 32 32 void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, 33 33 pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+4 -1
arch/arm64/kernel/probes/kprobes.c
··· 49 49 addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 50 50 if (!addr) 51 51 return NULL; 52 - set_memory_rox((unsigned long)addr, 1); 52 + if (set_memory_rox((unsigned long)addr, 1)) { 53 + execmem_free(addr); 54 + return NULL; 55 + } 53 56 return addr; 54 57 } 55 58
+18 -17
arch/arm64/kernel/proton-pack.c
··· 91 91 92 92 static bool spectre_v2_mitigations_off(void) 93 93 { 94 - bool ret = __nospectre_v2 || cpu_mitigations_off(); 95 - 96 - if (ret) 97 - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); 98 - 99 - return ret; 94 + return __nospectre_v2 || cpu_mitigations_off(); 100 95 } 101 96 102 97 static const char *get_bhb_affected_string(enum mitigation_state bhb_state) ··· 416 421 */ 417 422 static bool spectre_v4_mitigations_off(void) 418 423 { 419 - bool ret = cpu_mitigations_off() || 420 - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; 421 - 422 - if (ret) 423 - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); 424 - 425 - return ret; 424 + return cpu_mitigations_off() || 425 + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; 426 426 } 427 427 428 428 /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ ··· 1032 1042 1033 1043 if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { 1034 1044 /* No point mitigating Spectre-BHB alone. */ 1035 - } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { 1036 - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); 1037 - } else if (cpu_mitigations_off() || __nospectre_bhb) { 1038 - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); 1039 1045 } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { 1040 1046 state = SPECTRE_MITIGATED; 1041 1047 set_bit(BHB_HW, &system_bhb_mitigations); ··· 1185 1199 pr_err("WARNING: %s", EBPF_WARN); 1186 1200 } 1187 1201 #endif 1202 + 1203 + void spectre_print_disabled_mitigations(void) 1204 + { 1205 + /* Keep a single copy of the common message suffix to avoid duplication. */ 1206 + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; 1207 + 1208 + if (spectre_v2_mitigations_off()) 1209 + pr_info("spectre-v2 %s", spectre_disabled_suffix); 1210 + 1211 + if (spectre_v4_mitigations_off()) 1212 + pr_info("spectre-v4 %s", spectre_disabled_suffix); 1213 + 1214 + if (__nospectre_bhb || cpu_mitigations_off()) 1215 + pr_info("spectre-bhb %s", spectre_disabled_suffix); 1216 + }
+80 -31
arch/arm64/mm/mmu.c
··· 708 708 return ret; 709 709 } 710 710 711 + static inline bool force_pte_mapping(void) 712 + { 713 + const bool bbml2 = system_capabilities_finalized() ? 714 + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); 715 + 716 + if (debug_pagealloc_enabled()) 717 + return true; 718 + if (bbml2) 719 + return false; 720 + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); 721 + } 722 + 723 + static inline bool split_leaf_mapping_possible(void) 724 + { 725 + /* 726 + * !BBML2_NOABORT systems should never run into scenarios where we would 727 + * have to split. So exit early and let calling code detect it and raise 728 + * a warning. 729 + */ 730 + if (!system_supports_bbml2_noabort()) 731 + return false; 732 + return !force_pte_mapping(); 733 + } 734 + 711 735 static DEFINE_MUTEX(pgtable_split_lock); 712 736 713 737 int split_kernel_leaf_mapping(unsigned long start, unsigned long end) ··· 739 715 int ret; 740 716 741 717 /* 742 - * !BBML2_NOABORT systems should not be trying to change permissions on 743 - * anything that is not pte-mapped in the first place. Just return early 744 - * and let the permission change code raise a warning if not already 745 - * pte-mapped. 718 + * Exit early if the region is within a pte-mapped area or if we can't 719 + * split. For the latter case, the permission change code will raise a 720 + * warning if not already pte-mapped. 746 721 */ 747 - if (!system_supports_bbml2_noabort()) 722 + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) 748 723 return 0; 749 724 750 725 /* ··· 781 758 return ret; 782 759 } 783 760 784 - static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, 785 - unsigned long next, 786 - struct mm_walk *walk) 761 + static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, 762 + unsigned long next, struct mm_walk *walk) 787 763 { 764 + gfp_t gfp = *(gfp_t *)walk->private; 788 765 pud_t pud = pudp_get(pudp); 789 766 int ret = 0; 790 767 791 768 if (pud_leaf(pud)) 792 - ret = split_pud(pudp, pud, GFP_ATOMIC, false); 769 + ret = split_pud(pudp, pud, gfp, false); 793 770 794 771 return ret; 795 772 } 796 773 797 - static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, 798 - unsigned long next, 799 - struct mm_walk *walk) 774 + static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, 775 + unsigned long next, struct mm_walk *walk) 800 776 { 777 + gfp_t gfp = *(gfp_t *)walk->private; 801 778 pmd_t pmd = pmdp_get(pmdp); 802 779 int ret = 0; 803 780 804 781 if (pmd_leaf(pmd)) { 805 782 if (pmd_cont(pmd)) 806 783 split_contpmd(pmdp); 807 - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); 784 + ret = split_pmd(pmdp, pmd, gfp, false); 808 785 809 786 /* 810 787 * We have split the pmd directly to ptes so there is no need to ··· 816 793 return ret; 817 794 } 818 795 819 - static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, 820 - unsigned long next, 821 - struct mm_walk *walk) 796 + static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, 797 + unsigned long next, struct mm_walk *walk) 822 798 { 823 799 pte_t pte = __ptep_get(ptep); 824 800 ··· 827 805 return 0; 828 806 } 829 807 830 - static const struct mm_walk_ops split_to_ptes_ops __initconst = { 808 + static const struct mm_walk_ops split_to_ptes_ops = { 831 809 .pud_entry = split_to_ptes_pud_entry, 832 810 .pmd_entry = split_to_ptes_pmd_entry, 833 811 .pte_entry = split_to_ptes_pte_entry, 834 812 }; 813 + 814 + static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) 815 + { 816 + int ret; 817 + 818 + arch_enter_lazy_mmu_mode(); 819 + ret = walk_kernel_page_table_range_lockless(start, end, 820 + &split_to_ptes_ops, NULL, &gfp); 821 + arch_leave_lazy_mmu_mode(); 822 + 823 + return ret; 824 + } 835 825 836 826 static bool linear_map_requires_bbml2 __initdata; 837 827 ··· 881 847 * PTE. The kernel alias remains static throughout runtime so 882 848 * can continue to be safely mapped with large mappings. 883 849 */ 884 - ret = walk_kernel_page_table_range_lockless(lstart, kstart, 885 - &split_to_ptes_ops, NULL, NULL); 850 + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); 886 851 if (!ret) 887 - ret = walk_kernel_page_table_range_lockless(kend, lend, 888 - &split_to_ptes_ops, NULL, NULL); 852 + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); 889 853 if (ret) 890 854 panic("Failed to split linear map\n"); 891 855 flush_tlb_kernel_range(lstart, lend); ··· 1034 1002 memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); 1035 1003 __kfence_pool = phys_to_virt(kfence_pool); 1036 1004 } 1005 + 1006 + bool arch_kfence_init_pool(void) 1007 + { 1008 + unsigned long start = (unsigned long)__kfence_pool; 1009 + unsigned long end = start + KFENCE_POOL_SIZE; 1010 + int ret; 1011 + 1012 + /* Exit early if we know the linear map is already pte-mapped. */ 1013 + if (!split_leaf_mapping_possible()) 1014 + return true; 1015 + 1016 + /* Kfence pool is already pte-mapped for the early init case. */ 1017 + if (kfence_early_init) 1018 + return true; 1019 + 1020 + mutex_lock(&pgtable_split_lock); 1021 + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); 1022 + mutex_unlock(&pgtable_split_lock); 1023 + 1024 + /* 1025 + * Since the system supports bbml2_noabort, tlb invalidation is not 1026 + * required here; the pgtable mappings have been split to pte but larger 1027 + * entries may safely linger in the TLB. 1028 + */ 1029 + 1030 + return !ret; 1031 + } 1037 1032 #else /* CONFIG_KFENCE */ 1038 1033 1039 1034 static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } 1040 1035 static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } 1041 1036 1042 1037 #endif /* CONFIG_KFENCE */ 1043 - 1044 - static inline bool force_pte_mapping(void) 1045 - { 1046 - bool bbml2 = system_capabilities_finalized() ? 1047 - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); 1048 - 1049 - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || 1050 - is_realm_world())) || 1051 - debug_pagealloc_enabled(); 1052 - } 1053 1038 1054 1039 static void __init map_mem(pgd_t *pgdp) 1055 1040 {