Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

-3

arch/arm64/Makefile

··· 83 83 84 84 ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) 85 85 KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds 86 - ifeq ($(CONFIG_DYNAMIC_FTRACE),y) 87 - KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o 88 - endif 89 86 endif 90 87 91 88 # Default value

+1 -1

arch/arm64/include/asm/cacheflush.h

··· 38 38 * 39 39 * See Documentation/cachetlb.txt for more information. Please note that 40 40 * the implementation assumes non-aliasing VIPT D-cache and (aliasing) 41 - * VIPT or ASID-tagged VIVT I-cache. 41 + * VIPT I-cache. 42 42 * 43 43 * flush_cache_mm(mm) 44 44 *

+45 -1

arch/arm64/include/asm/module.h

··· 32 32 struct mod_plt_sec init; 33 33 34 34 /* for CONFIG_DYNAMIC_FTRACE */ 35 - void *ftrace_trampoline; 35 + struct plt_entry *ftrace_trampoline; 36 36 }; 37 37 #endif 38 38 ··· 44 44 #else 45 45 #define module_alloc_base ((u64)_etext - MODULES_VSIZE) 46 46 #endif 47 + 48 + struct plt_entry { 49 + /* 50 + * A program that conforms to the AArch64 Procedure Call Standard 51 + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or 52 + * IP1 (x17) may be inserted at any branch instruction that is 53 + * exposed to a relocation that supports long branches. Since that 54 + * is exactly what we are dealing with here, we are free to use x16 55 + * as a scratch register in the PLT veneers. 56 + */ 57 + __le32 mov0; /* movn x16, #0x.... */ 58 + __le32 mov1; /* movk x16, #0x...., lsl #16 */ 59 + __le32 mov2; /* movk x16, #0x...., lsl #32 */ 60 + __le32 br; /* br x16 */ 61 + }; 62 + 63 + static inline struct plt_entry get_plt_entry(u64 val) 64 + { 65 + /* 66 + * MOVK/MOVN/MOVZ opcode: 67 + * +--------+------------+--------+-----------+-------------+---------+ 68 + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | 69 + * +--------+------------+--------+-----------+-------------+---------+ 70 + * 71 + * Rd := 0x10 (x16) 72 + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) 73 + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) 74 + * sf := 1 (64-bit variant) 75 + */ 76 + return (struct plt_entry){ 77 + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), 78 + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), 79 + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), 80 + cpu_to_le32(0xd61f0200) 81 + }; 82 + } 83 + 84 + static inline bool plt_entries_equal(const struct plt_entry *a, 85 + const struct plt_entry *b) 86 + { 87 + return a->mov0 == b->mov0 && 88 + a->mov1 == b->mov1 && 89 + a->mov2 == b->mov2; 90 + } 47 91 48 92 #endif /* __ASM_MODULE_H */

-3

arch/arm64/kernel/Makefile

··· 61 61 ifeq ($(CONFIG_DEBUG_EFI),y) 62 62 AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" 63 63 endif 64 - 65 - # will be included by each individual module but not by the core kernel itself 66 - extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o

+3 -3

arch/arm64/kernel/cpu_ops.c

··· 31 31 32 32 const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; 33 33 34 - static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { 34 + static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { 35 35 &smp_spin_table_ops, 36 36 &cpu_psci_ops, 37 37 NULL, 38 38 }; 39 39 40 - static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { 40 + static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { 41 41 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL 42 42 &acpi_parking_protocol_ops, 43 43 #endif ··· 47 47 48 48 static const struct cpu_operations * __init cpu_get_ops(const char *name) 49 49 { 50 - const struct cpu_operations **ops; 50 + const struct cpu_operations *const *ops; 51 51 52 52 ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; 53 53

+3 -3

arch/arm64/kernel/fpsimd.c

··· 1026 1026 1027 1027 local_bh_disable(); 1028 1028 1029 - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { 1030 - current->thread.fpsimd_state = *state; 1029 + current->thread.fpsimd_state = *state; 1030 + if (system_supports_sve() && test_thread_flag(TIF_SVE)) 1031 1031 fpsimd_to_sve(current); 1032 - } 1032 + 1033 1033 task_fpsimd_load(); 1034 1034 1035 1035 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {

-18

arch/arm64/kernel/ftrace-mod.S

··· 1 - /* 2 - * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 3 - * 4 - * This program is free software; you can redistribute it and/or modify 5 - * it under the terms of the GNU General Public License version 2 as 6 - * published by the Free Software Foundation. 7 - */ 8 - 9 - #include <linux/linkage.h> 10 - #include <asm/assembler.h> 11 - 12 - .section ".text.ftrace_trampoline", "ax" 13 - .align 3 14 - 0: .quad 0 15 - __ftrace_trampoline: 16 - ldr x16, 0b 17 - br x16 18 - ENDPROC(__ftrace_trampoline)

+8 -6

arch/arm64/kernel/ftrace.c

··· 76 76 77 77 if (offset < -SZ_128M || offset >= SZ_128M) { 78 78 #ifdef CONFIG_ARM64_MODULE_PLTS 79 - unsigned long *trampoline; 79 + struct plt_entry trampoline; 80 80 struct module *mod; 81 81 82 82 /* ··· 104 104 * is added in the future, but for now, the pr_err() below 105 105 * deals with a theoretical issue only. 106 106 */ 107 - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; 108 - if (trampoline[0] != addr) { 109 - if (trampoline[0] != 0) { 107 + trampoline = get_plt_entry(addr); 108 + if (!plt_entries_equal(mod->arch.ftrace_trampoline, 109 + &trampoline)) { 110 + if (!plt_entries_equal(mod->arch.ftrace_trampoline, 111 + &(struct plt_entry){})) { 110 112 pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); 111 113 return -EINVAL; 112 114 } 113 115 114 116 /* point the trampoline to our ftrace entry point */ 115 117 module_disable_ro(mod); 116 - trampoline[0] = addr; 118 + *mod->arch.ftrace_trampoline = trampoline; 117 119 module_enable_ro(mod, true); 118 120 119 121 /* update trampoline before patching in the branch */ 120 122 smp_wmb(); 121 123 } 122 - addr = (unsigned long)&trampoline[1]; 124 + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; 123 125 #else /* CONFIG_ARM64_MODULE_PLTS */ 124 126 return -EINVAL; 125 127 #endif /* CONFIG_ARM64_MODULE_PLTS */

+14 -36

arch/arm64/kernel/module-plts.c

··· 11 11 #include <linux/module.h> 12 12 #include <linux/sort.h> 13 13 14 - struct plt_entry { 15 - /* 16 - * A program that conforms to the AArch64 Procedure Call Standard 17 - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or 18 - * IP1 (x17) may be inserted at any branch instruction that is 19 - * exposed to a relocation that supports long branches. Since that 20 - * is exactly what we are dealing with here, we are free to use x16 21 - * as a scratch register in the PLT veneers. 22 - */ 23 - __le32 mov0; /* movn x16, #0x.... */ 24 - __le32 mov1; /* movk x16, #0x...., lsl #16 */ 25 - __le32 mov2; /* movk x16, #0x...., lsl #32 */ 26 - __le32 br; /* br x16 */ 27 - }; 28 - 29 14 static bool in_init(const struct module *mod, void *loc) 30 15 { 31 16 return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; ··· 25 40 int i = pltsec->plt_num_entries; 26 41 u64 val = sym->st_value + rela->r_addend; 27 42 28 - /* 29 - * MOVK/MOVN/MOVZ opcode: 30 - * +--------+------------+--------+-----------+-------------+---------+ 31 - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | 32 - * +--------+------------+--------+-----------+-------------+---------+ 33 - * 34 - * Rd := 0x10 (x16) 35 - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) 36 - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) 37 - * sf := 1 (64-bit variant) 38 - */ 39 - plt[i] = (struct plt_entry){ 40 - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), 41 - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), 42 - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), 43 - cpu_to_le32(0xd61f0200) 44 - }; 43 + plt[i] = get_plt_entry(val); 45 44 46 45 /* 47 46 * Check if the entry we just created is a duplicate. Given that the 48 47 * relocations are sorted, this will be the last entry we allocated. 49 48 * (if one exists). 50 49 */ 51 - if (i > 0 && 52 - plt[i].mov0 == plt[i - 1].mov0 && 53 - plt[i].mov1 == plt[i - 1].mov1 && 54 - plt[i].mov2 == plt[i - 1].mov2) 50 + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) 55 51 return (u64)&plt[i - 1]; 56 52 57 53 pltsec->plt_num_entries++; ··· 120 154 unsigned long core_plts = 0; 121 155 unsigned long init_plts = 0; 122 156 Elf64_Sym *syms = NULL; 157 + Elf_Shdr *tramp = NULL; 123 158 int i; 124 159 125 160 /* ··· 132 165 mod->arch.core.plt = sechdrs + i; 133 166 else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) 134 167 mod->arch.init.plt = sechdrs + i; 168 + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && 169 + !strcmp(secstrings + sechdrs[i].sh_name, 170 + ".text.ftrace_trampoline")) 171 + tramp = sechdrs + i; 135 172 else if (sechdrs[i].sh_type == SHT_SYMTAB) 136 173 syms = (Elf64_Sym *)sechdrs[i].sh_addr; 137 174 } ··· 185 214 mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry); 186 215 mod->arch.init.plt_num_entries = 0; 187 216 mod->arch.init.plt_max_entries = init_plts; 217 + 218 + if (tramp) { 219 + tramp->sh_type = SHT_NOBITS; 220 + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 221 + tramp->sh_addralign = __alignof__(struct plt_entry); 222 + tramp->sh_size = sizeof(struct plt_entry); 223 + } 188 224 189 225 return 0; 190 226 }

+1

arch/arm64/kernel/module.lds

··· 1 1 SECTIONS { 2 2 .plt (NOLOAD) : { BYTE(0) } 3 3 .init.plt (NOLOAD) : { BYTE(0) } 4 + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } 4 5 }

-6

arch/arm64/kernel/perf_event.c

··· 262 262 263 263 [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, 264 264 [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, 265 - 266 - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, 267 - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, 268 - 269 - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, 270 - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, 271 265 }; 272 266 273 267 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]

+16 -12

arch/arm64/mm/context.c

··· 96 96 97 97 set_reserved_asid_bits(); 98 98 99 - /* 100 - * Ensure the generation bump is observed before we xchg the 101 - * active_asids. 102 - */ 103 - smp_wmb(); 104 - 105 99 for_each_possible_cpu(i) { 106 100 asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); 107 101 /* ··· 111 117 per_cpu(reserved_asids, i) = asid; 112 118 } 113 119 114 - /* Queue a TLB invalidate and flush the I-cache if necessary. */ 120 + /* 121 + * Queue a TLB invalidation for each CPU to perform on next 122 + * context-switch 123 + */ 115 124 cpumask_setall(&tlb_flush_pending); 116 125 } 117 126 ··· 199 202 asid = atomic64_read(&mm->context.id); 200 203 201 204 /* 202 - * The memory ordering here is subtle. We rely on the control 203 - * dependency between the generation read and the update of 204 - * active_asids to ensure that we are synchronised with a 205 - * parallel rollover (i.e. this pairs with the smp_wmb() in 206 - * flush_context). 205 + * The memory ordering here is subtle. 206 + * If our ASID matches the current generation, then we update 207 + * our active_asids entry with a relaxed xchg. Racing with a 208 + * concurrent rollover means that either: 209 + * 210 + * - We get a zero back from the xchg and end up waiting on the 211 + * lock. Taking the lock synchronises with the rollover and so 212 + * we are forced to see the updated generation. 213 + * 214 + * - We get a valid ASID back from the xchg, which means the 215 + * relaxed xchg in flush_context will treat us as reserved 216 + * because atomic RmWs are totally ordered for a given location. 207 217 */ 208 218 if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) 209 219 && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))

+1 -1

arch/arm64/mm/pgd.c

··· 26 26 #include <asm/page.h> 27 27 #include <asm/tlbflush.h> 28 28 29 - static struct kmem_cache *pgd_cache; 29 + static struct kmem_cache *pgd_cache __ro_after_init; 30 30 31 31 pgd_t *pgd_alloc(struct mm_struct *mm) 32 32 {

Configure Feed

Configure Feed