Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'riscv-for-linus-6.6-mw2-2' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull more RISC-V updates from Palmer Dabbelt:

- The kernel now dynamically probes for misaligned access speed, as
opposed to relying on a table of known implementations.

- Support for non-coherent devices on systems using the Andes AX45MP
core, including the RZ/Five SoCs.

- Support for the V extension in ptrace(), again.

- Support for KASLR.

- Support for the BPF prog pack allocator in RISC-V.

- A handful of bug fixes and cleanups.

* tag 'riscv-for-linus-6.6-mw2-2' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (25 commits)
soc: renesas: Kconfig: For ARCH_R9A07G043 select the required configs if dependencies are met
riscv: Kconfig.errata: Add dependency for RISCV_SBI in ERRATA_ANDES config
riscv: Kconfig.errata: Drop dependency for MMU in ERRATA_ANDES_CMO config
riscv: Kconfig: Select DMA_DIRECT_REMAP only if MMU is enabled
bpf, riscv: use prog pack allocator in the BPF JIT
riscv: implement a memset like function for text
riscv: extend patch_text_nosync() for multiple pages
bpf: make bpf_prog_pack allocator portable
riscv: libstub: Implement KASLR by using generic functions
libstub: Fix compilation warning for rv32
arm64: libstub: Move KASLR handling functions to kaslr.c
riscv: Dump out kernel offset information on panic
riscv: Introduce virtual kernel mapping KASLR
RISC-V: Add ptrace support for vectors
soc: renesas: Kconfig: Select the required configs for RZ/Five SoC
cache: Add L2 cache management for Andes AX45MP RISC-V core
dt-bindings: cache: andestech,ax45mp-cache: Add DT binding documentation for L2 cache controller
riscv: mm: dma-noncoherent: nonstandard cache operations support
riscv: errata: Add Andes alternative ports
riscv: asm: vendorid_list: Add Andes Technology to the vendors list
...

+1440 -216
+81
Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 + # Copyright (C) 2023 Renesas Electronics Corp. 3 + %YAML 1.2 4 + --- 5 + $id: http://devicetree.org/schemas/cache/andestech,ax45mp-cache.yaml# 6 + $schema: http://devicetree.org/meta-schemas/core.yaml# 7 + 8 + title: Andestech AX45MP L2 Cache Controller 9 + 10 + maintainers: 11 + - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> 12 + 13 + description: 14 + A level-2 cache (L2C) is used to improve the system performance by providing 15 + a large amount of cache line entries and reasonable access delays. The L2C 16 + is shared between cores, and a non-inclusive non-exclusive policy is used. 17 + 18 + select: 19 + properties: 20 + compatible: 21 + contains: 22 + enum: 23 + - andestech,ax45mp-cache 24 + 25 + required: 26 + - compatible 27 + 28 + properties: 29 + compatible: 30 + items: 31 + - const: andestech,ax45mp-cache 32 + - const: cache 33 + 34 + reg: 35 + maxItems: 1 36 + 37 + interrupts: 38 + maxItems: 1 39 + 40 + cache-line-size: 41 + const: 64 42 + 43 + cache-level: 44 + const: 2 45 + 46 + cache-sets: 47 + const: 1024 48 + 49 + cache-size: 50 + enum: [131072, 262144, 524288, 1048576, 2097152] 51 + 52 + cache-unified: true 53 + 54 + next-level-cache: true 55 + 56 + additionalProperties: false 57 + 58 + required: 59 + - compatible 60 + - reg 61 + - interrupts 62 + - cache-line-size 63 + - cache-level 64 + - cache-sets 65 + - cache-size 66 + - cache-unified 67 + 68 + examples: 69 + - | 70 + #include <dt-bindings/interrupt-controller/irq.h> 71 + 72 + cache-controller@2010000 { 73 + compatible = "andestech,ax45mp-cache", "cache"; 74 + reg = <0x13400000 0x100000>; 75 + interrupts = <508 IRQ_TYPE_LEVEL_HIGH>; 76 + cache-line-size = <64>; 77 + cache-level = <2>; 78 + cache-sets = <1024>; 79 + cache-size = <262144>; 80 + cache-unified; 81 + };
+5 -6
Documentation/riscv/hwprobe.rst
··· 87 87 emulated via software, either in or below the kernel. These accesses are 88 88 always extremely slow. 89 89 90 - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported 91 - in hardware, but are slower than the corresponding aligned accesses 92 - sequences. 90 + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower 91 + than equivalent byte accesses. Misaligned accesses may be supported 92 + directly in hardware, or trapped and emulated by software. 93 93 94 - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported 95 - in hardware and are faster than the corresponding aligned accesses 96 - sequences. 94 + * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster 95 + than equivalent byte accesses. 97 96 98 97 * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are 99 98 not supported at all and will generate a misaligned address fault.
+7
MAINTAINERS
··· 20406 20406 T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git 20407 20407 F: drivers/staging/ 20408 20408 20409 + STANDALONE CACHE CONTROLLER DRIVERS 20410 + M: Conor Dooley <conor@kernel.org> 20411 + L: linux-riscv@lists.infradead.org 20412 + S: Maintained 20413 + T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ 20414 + F: drivers/cache 20415 + 20409 20416 STARFIRE/DURALAN NETWORK DRIVER 20410 20417 M: Ion Badulescu <ionut@badula.org> 20411 20418 S: Odd Fixes
+2
arch/arm64/include/asm/efi.h
··· 156 156 157 157 efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f); 158 158 159 + void efi_icache_sync(unsigned long start, unsigned long end); 160 + 159 161 #endif /* _ASM_EFI_H */
+27 -1
arch/riscv/Kconfig
··· 273 273 select ARCH_HAS_SYNC_DMA_FOR_CPU 274 274 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 275 275 select DMA_BOUNCE_UNALIGNED_KMALLOC if SWIOTLB 276 - select DMA_DIRECT_REMAP 276 + select DMA_DIRECT_REMAP if MMU 277 + 278 + config RISCV_NONSTANDARD_CACHE_OPS 279 + bool 280 + depends on RISCV_DMA_NONCOHERENT 281 + help 282 + This enables function pointer support for non-standard noncoherent 283 + systems to handle cache management. 277 284 278 285 config AS_HAS_INSN 279 286 def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) ··· 717 710 Since RISCV uses the RELA relocation format, this requires a 718 711 relocation pass at runtime even if the kernel is loaded at the 719 712 same address it was linked at. 713 + 714 + If unsure, say N. 715 + 716 + config RANDOMIZE_BASE 717 + bool "Randomize the address of the kernel image" 718 + select RELOCATABLE 719 + depends on MMU && 64BIT && !XIP_KERNEL 720 + help 721 + Randomizes the virtual address at which the kernel image is 722 + loaded, as a security feature that deters exploit attempts 723 + relying on knowledge of the location of kernel internals. 724 + 725 + It is the bootloader's job to provide entropy, by passing a 726 + random u64 value in /chosen/kaslr-seed at kernel entry. 727 + 728 + When booting via the UEFI stub, it will invoke the firmware's 729 + EFI_RNG_PROTOCOL implementation (if available) to supply entropy 730 + to the kernel proper. In addition, it will randomise the physical 731 + location of the kernel Image as well. 720 732 721 733 If unsure, say N. 722 734
+21
arch/riscv/Kconfig.errata
··· 1 1 menu "CPU errata selection" 2 2 3 + config ERRATA_ANDES 4 + bool "Andes AX45MP errata" 5 + depends on RISCV_ALTERNATIVE && RISCV_SBI 6 + help 7 + All Andes errata Kconfig depend on this Kconfig. Disabling 8 + this Kconfig will disable all Andes errata. Please say "Y" 9 + here if your platform uses Andes CPU cores. 10 + 11 + Otherwise, please say "N" here to avoid unnecessary overhead. 12 + 13 + config ERRATA_ANDES_CMO 14 + bool "Apply Andes cache management errata" 15 + depends on ERRATA_ANDES && ARCH_R9A07G043 16 + select RISCV_DMA_NONCOHERENT 17 + default y 18 + help 19 + This will apply the cache management errata to handle the 20 + non-standard handling on non-coherent operations on Andes cores. 21 + 22 + If you don't know what to do here, say "Y". 23 + 3 24 config ERRATA_SIFIVE 4 25 bool "SiFive errata" 5 26 depends on RISCV_ALTERNATIVE
+1
arch/riscv/errata/Makefile
··· 2 2 KBUILD_CFLAGS += -fno-pie 3 3 endif 4 4 5 + obj-$(CONFIG_ERRATA_ANDES) += andes/ 5 6 obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ 6 7 obj-$(CONFIG_ERRATA_THEAD) += thead/
+1
arch/riscv/errata/andes/Makefile
··· 1 + obj-y += errata.o
+66
arch/riscv/errata/andes/errata.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Erratas to be applied for Andes CPU cores 4 + * 5 + * Copyright (C) 2023 Renesas Electronics Corporation. 6 + * 7 + * Author: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> 8 + */ 9 + 10 + #include <linux/memory.h> 11 + #include <linux/module.h> 12 + 13 + #include <asm/alternative.h> 14 + #include <asm/cacheflush.h> 15 + #include <asm/errata_list.h> 16 + #include <asm/patch.h> 17 + #include <asm/processor.h> 18 + #include <asm/sbi.h> 19 + #include <asm/vendorid_list.h> 20 + 21 + #define ANDESTECH_AX45MP_MARCHID 0x8000000000008a45UL 22 + #define ANDESTECH_AX45MP_MIMPID 0x500UL 23 + #define ANDESTECH_SBI_EXT_ANDES 0x0900031E 24 + 25 + #define ANDES_SBI_EXT_IOCP_SW_WORKAROUND 1 26 + 27 + static long ax45mp_iocp_sw_workaround(void) 28 + { 29 + struct sbiret ret; 30 + 31 + /* 32 + * ANDES_SBI_EXT_IOCP_SW_WORKAROUND SBI EXT checks if the IOCP is missing and 33 + * cache is controllable only then CMO will be applied to the platform. 34 + */ 35 + ret = sbi_ecall(ANDESTECH_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND, 36 + 0, 0, 0, 0, 0, 0); 37 + 38 + return ret.error ? 0 : ret.value; 39 + } 40 + 41 + static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) 42 + { 43 + if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO)) 44 + return false; 45 + 46 + if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) 47 + return false; 48 + 49 + if (!ax45mp_iocp_sw_workaround()) 50 + return false; 51 + 52 + /* Set this just to make core cbo code happy */ 53 + riscv_cbom_block_size = 1; 54 + riscv_noncoherent_supported(); 55 + 56 + return true; 57 + } 58 + 59 + void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, 60 + unsigned long archid, unsigned long impid, 61 + unsigned int stage) 62 + { 63 + errata_probe_iocp(stage, archid, impid); 64 + 65 + /* we have nothing to patch here ATM so just return back */ 66 + }
-8
arch/riscv/errata/thead/errata.c
··· 120 120 if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) 121 121 local_flush_icache_all(); 122 122 } 123 - 124 - void thead_feature_probe_func(unsigned int cpu, 125 - unsigned long archid, 126 - unsigned long impid) 127 - { 128 - if ((archid == 0) && (impid == 0)) 129 - per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST; 130 - }
+3 -5
arch/riscv/include/asm/alternative.h
··· 30 30 #define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset) 31 31 #define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset) 32 32 33 - void probe_vendor_features(unsigned int cpu); 34 33 void __init apply_boot_alternatives(void); 35 34 void __init apply_early_boot_alternatives(void); 36 35 void apply_module_alternatives(void *start, size_t length); ··· 45 46 u32 patch_id; /* The patch ID (erratum ID or cpufeature ID) */ 46 47 }; 47 48 49 + void andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, 50 + unsigned long archid, unsigned long impid, 51 + unsigned int stage); 48 52 void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, 49 53 unsigned long archid, unsigned long impid, 50 54 unsigned int stage); ··· 55 53 unsigned long archid, unsigned long impid, 56 54 unsigned int stage); 57 55 58 - void thead_feature_probe_func(unsigned int cpu, unsigned long archid, 59 - unsigned long impid); 60 - 61 56 void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, 62 57 unsigned int stage); 63 58 64 59 #else /* CONFIG_RISCV_ALTERNATIVE */ 65 60 66 - static inline void probe_vendor_features(unsigned int cpu) { } 67 61 static inline void apply_boot_alternatives(void) { } 68 62 static inline void apply_early_boot_alternatives(void) { } 69 63 static inline void apply_module_alternatives(void *start, size_t length) { }
+2
arch/riscv/include/asm/cpufeature.h
··· 30 30 /* Per-cpu ISA extensions. */ 31 31 extern struct riscv_isainfo hart_isa[NR_CPUS]; 32 32 33 + void check_unaligned_access(int cpu); 34 + 33 35 #endif
+28
arch/riscv/include/asm/dma-noncoherent.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2023 Renesas Electronics Corp. 4 + */ 5 + 6 + #ifndef __ASM_DMA_NONCOHERENT_H 7 + #define __ASM_DMA_NONCOHERENT_H 8 + 9 + #include <linux/dma-direct.h> 10 + 11 + /* 12 + * struct riscv_nonstd_cache_ops - Structure for non-standard CMO function pointers 13 + * 14 + * @wback: Function pointer for cache writeback 15 + * @inv: Function pointer for invalidating cache 16 + * @wback_inv: Function pointer for flushing the cache (writeback + invalidating) 17 + */ 18 + struct riscv_nonstd_cache_ops { 19 + void (*wback)(phys_addr_t paddr, size_t size); 20 + void (*inv)(phys_addr_t paddr, size_t size); 21 + void (*wback_inv)(phys_addr_t paddr, size_t size); 22 + }; 23 + 24 + extern struct riscv_nonstd_cache_ops noncoherent_cache_ops; 25 + 26 + void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops); 27 + 28 + #endif /* __ASM_DMA_NONCOHERENT_H */
+2
arch/riscv/include/asm/efi.h
··· 45 45 46 46 unsigned long stext_offset(void); 47 47 48 + void efi_icache_sync(unsigned long start, unsigned long end); 49 + 48 50 #endif /* _ASM_EFI_H */
+5
arch/riscv/include/asm/errata_list.h
··· 11 11 #include <asm/hwcap.h> 12 12 #include <asm/vendorid_list.h> 13 13 14 + #ifdef CONFIG_ERRATA_ANDES 15 + #define ERRATA_ANDESTECH_NO_IOCP 0 16 + #define ERRATA_ANDESTECH_NUMBER 1 17 + #endif 18 + 14 19 #ifdef CONFIG_ERRATA_SIFIVE 15 20 #define ERRATA_SIFIVE_CIP_453 0 16 21 #define ERRATA_SIFIVE_CIP_1200 1
+3
arch/riscv/include/asm/page.h
··· 106 106 struct kernel_mapping { 107 107 unsigned long page_offset; 108 108 unsigned long virt_addr; 109 + unsigned long virt_offset; 109 110 uintptr_t phys_addr; 110 111 uintptr_t size; 111 112 /* Offset between linear mapping virtual address and kernel load address */ ··· 185 184 #define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) 186 185 187 186 #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) 187 + 188 + unsigned long kaslr_offset(void); 188 189 189 190 #endif /* __ASSEMBLY__ */ 190 191
+1
arch/riscv/include/asm/patch.h
··· 7 7 #define _ASM_RISCV_PATCH_H 8 8 9 9 int patch_text_nosync(void *addr, const void *insns, size_t len); 10 + int patch_text_set_nosync(void *addr, u8 c, size_t len); 10 11 int patch_text(void *addr, u32 *insns, int ninsns); 11 12 12 13 extern int riscv_patch_in_stop_machine;
+1
arch/riscv/include/asm/vendorid_list.h
··· 5 5 #ifndef ASM_VENDOR_LIST_H 6 6 #define ASM_VENDOR_LIST_H 7 7 8 + #define ANDESTECH_VENDOR_ID 0x31e 8 9 #define SIFIVE_VENDOR_ID 0x489 9 10 #define THEAD_VENDOR_ID 0x5b7 10 11
+9 -4
arch/riscv/include/uapi/asm/ptrace.h
··· 108 108 * In signal handler, datap will be set a correct user stack offset 109 109 * and vector registers will be copied to the address of datap 110 110 * pointer. 111 - * 112 - * In ptrace syscall, datap will be set to zero and the vector 113 - * registers will be copied to the address right after this 114 - * structure. 115 111 */ 112 + }; 113 + 114 + struct __riscv_v_regset_state { 115 + unsigned long vstart; 116 + unsigned long vl; 117 + unsigned long vtype; 118 + unsigned long vcsr; 119 + unsigned long vlenb; 120 + char vreg[]; 116 121 }; 117 122 118 123 /*
+1
arch/riscv/kernel/Makefile
··· 38 38 obj-y += head.o 39 39 obj-y += soc.o 40 40 obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o 41 + obj-y += copy-unaligned.o 41 42 obj-y += cpu.o 42 43 obj-y += cpufeature.o 43 44 obj-y += entry.o
+5 -19
arch/riscv/kernel/alternative.c
··· 27 27 void (*patch_func)(struct alt_entry *begin, struct alt_entry *end, 28 28 unsigned long archid, unsigned long impid, 29 29 unsigned int stage); 30 - void (*feature_probe_func)(unsigned int cpu, unsigned long archid, 31 - unsigned long impid); 32 30 }; 33 31 34 32 static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) ··· 41 43 cpu_mfr_info->imp_id = sbi_get_mimpid(); 42 44 #endif 43 45 44 - cpu_mfr_info->feature_probe_func = NULL; 45 46 switch (cpu_mfr_info->vendor_id) { 47 + #ifdef CONFIG_ERRATA_ANDES 48 + case ANDESTECH_VENDOR_ID: 49 + cpu_mfr_info->patch_func = andes_errata_patch_func; 50 + break; 51 + #endif 46 52 #ifdef CONFIG_ERRATA_SIFIVE 47 53 case SIFIVE_VENDOR_ID: 48 54 cpu_mfr_info->patch_func = sifive_errata_patch_func; ··· 55 53 #ifdef CONFIG_ERRATA_THEAD 56 54 case THEAD_VENDOR_ID: 57 55 cpu_mfr_info->patch_func = thead_errata_patch_func; 58 - cpu_mfr_info->feature_probe_func = thead_feature_probe_func; 59 56 break; 60 57 #endif 61 58 default: ··· 144 143 } 145 144 } 146 145 147 - /* Called on each CPU as it starts */ 148 - void probe_vendor_features(unsigned int cpu) 149 - { 150 - struct cpu_manufacturer_info_t cpu_mfr_info; 151 - 152 - riscv_fill_cpu_mfr_info(&cpu_mfr_info); 153 - if (!cpu_mfr_info.feature_probe_func) 154 - return; 155 - 156 - cpu_mfr_info.feature_probe_func(cpu, 157 - cpu_mfr_info.arch_id, 158 - cpu_mfr_info.imp_id); 159 - } 160 - 161 146 /* 162 147 * This is called very early in the boot process (directly after we run 163 148 * a feature detect on the boot CPU). No need to worry about other CPUs ··· 198 211 /* If called on non-boot cpu things could go wrong */ 199 212 WARN_ON(smp_processor_id() != 0); 200 213 201 - probe_vendor_features(0); 202 214 _apply_alternatives((struct alt_entry *)__alt_start, 203 215 (struct alt_entry *)__alt_end, 204 216 RISCV_ALTERNATIVES_BOOT);
+71
arch/riscv/kernel/copy-unaligned.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (C) 2023 Rivos Inc. */ 3 + 4 + #include <linux/linkage.h> 5 + #include <asm/asm.h> 6 + 7 + .text 8 + 9 + /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */ 10 + /* Performs a memcpy without aligning buffers, using word loads and stores. */ 11 + /* Note: The size is truncated to a multiple of 8 * SZREG */ 12 + ENTRY(__riscv_copy_words_unaligned) 13 + andi a4, a2, ~((8*SZREG)-1) 14 + beqz a4, 2f 15 + add a3, a1, a4 16 + 1: 17 + REG_L a4, 0(a1) 18 + REG_L a5, SZREG(a1) 19 + REG_L a6, 2*SZREG(a1) 20 + REG_L a7, 3*SZREG(a1) 21 + REG_L t0, 4*SZREG(a1) 22 + REG_L t1, 5*SZREG(a1) 23 + REG_L t2, 6*SZREG(a1) 24 + REG_L t3, 7*SZREG(a1) 25 + REG_S a4, 0(a0) 26 + REG_S a5, SZREG(a0) 27 + REG_S a6, 2*SZREG(a0) 28 + REG_S a7, 3*SZREG(a0) 29 + REG_S t0, 4*SZREG(a0) 30 + REG_S t1, 5*SZREG(a0) 31 + REG_S t2, 6*SZREG(a0) 32 + REG_S t3, 7*SZREG(a0) 33 + addi a0, a0, 8*SZREG 34 + addi a1, a1, 8*SZREG 35 + bltu a1, a3, 1b 36 + 37 + 2: 38 + ret 39 + END(__riscv_copy_words_unaligned) 40 + 41 + /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ 42 + /* Performs a memcpy without aligning buffers, using only byte accesses. */ 43 + /* Note: The size is truncated to a multiple of 8 */ 44 + ENTRY(__riscv_copy_bytes_unaligned) 45 + andi a4, a2, ~(8-1) 46 + beqz a4, 2f 47 + add a3, a1, a4 48 + 1: 49 + lb a4, 0(a1) 50 + lb a5, 1(a1) 51 + lb a6, 2(a1) 52 + lb a7, 3(a1) 53 + lb t0, 4(a1) 54 + lb t1, 5(a1) 55 + lb t2, 6(a1) 56 + lb t3, 7(a1) 57 + sb a4, 0(a0) 58 + sb a5, 1(a0) 59 + sb a6, 2(a0) 60 + sb a7, 3(a0) 61 + sb t0, 4(a0) 62 + sb t1, 5(a0) 63 + sb t2, 6(a0) 64 + sb t3, 7(a0) 65 + addi a0, a0, 8 66 + addi a1, a1, 8 67 + bltu a1, a3, 1b 68 + 69 + 2: 70 + ret 71 + END(__riscv_copy_bytes_unaligned)
+13
arch/riscv/kernel/copy-unaligned.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2023 Rivos, Inc. 4 + */ 5 + #ifndef __RISCV_KERNEL_COPY_UNALIGNED_H 6 + #define __RISCV_KERNEL_COPY_UNALIGNED_H 7 + 8 + #include <linux/types.h> 9 + 10 + void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size); 11 + void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size); 12 + 13 + #endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */
+104
arch/riscv/kernel/cpufeature.c
··· 18 18 #include <asm/cacheflush.h> 19 19 #include <asm/cpufeature.h> 20 20 #include <asm/hwcap.h> 21 + #include <asm/hwprobe.h> 21 22 #include <asm/patch.h> 22 23 #include <asm/processor.h> 23 24 #include <asm/vector.h> 24 25 26 + #include "copy-unaligned.h" 27 + 25 28 #define NUM_ALPHA_EXTS ('z' - 'a' + 1) 29 + 30 + #define MISALIGNED_ACCESS_JIFFIES_LG2 1 31 + #define MISALIGNED_BUFFER_SIZE 0x4000 32 + #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) 26 33 27 34 unsigned long elf_hwcap __read_mostly; 28 35 ··· 555 548 556 549 return hwcap; 557 550 } 551 + 552 + void check_unaligned_access(int cpu) 553 + { 554 + u64 start_cycles, end_cycles; 555 + u64 word_cycles; 556 + u64 byte_cycles; 557 + int ratio; 558 + unsigned long start_jiffies, now; 559 + struct page *page; 560 + void *dst; 561 + void *src; 562 + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; 563 + 564 + page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); 565 + if (!page) { 566 + pr_warn("Can't alloc pages to measure memcpy performance"); 567 + return; 568 + } 569 + 570 + /* Make an unaligned destination buffer. */ 571 + dst = (void *)((unsigned long)page_address(page) | 0x1); 572 + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ 573 + src = dst + (MISALIGNED_BUFFER_SIZE / 2); 574 + src += 2; 575 + word_cycles = -1ULL; 576 + /* Do a warmup. */ 577 + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); 578 + preempt_disable(); 579 + start_jiffies = jiffies; 580 + while ((now = jiffies) == start_jiffies) 581 + cpu_relax(); 582 + 583 + /* 584 + * For a fixed amount of time, repeatedly try the function, and take 585 + * the best time in cycles as the measurement. 586 + */ 587 + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { 588 + start_cycles = get_cycles64(); 589 + /* Ensure the CSR read can't reorder WRT to the copy. */ 590 + mb(); 591 + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); 592 + /* Ensure the copy ends before the end time is snapped. */ 593 + mb(); 594 + end_cycles = get_cycles64(); 595 + if ((end_cycles - start_cycles) < word_cycles) 596 + word_cycles = end_cycles - start_cycles; 597 + } 598 + 599 + byte_cycles = -1ULL; 600 + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); 601 + start_jiffies = jiffies; 602 + while ((now = jiffies) == start_jiffies) 603 + cpu_relax(); 604 + 605 + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { 606 + start_cycles = get_cycles64(); 607 + mb(); 608 + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); 609 + mb(); 610 + end_cycles = get_cycles64(); 611 + if ((end_cycles - start_cycles) < byte_cycles) 612 + byte_cycles = end_cycles - start_cycles; 613 + } 614 + 615 + preempt_enable(); 616 + 617 + /* Don't divide by zero. */ 618 + if (!word_cycles || !byte_cycles) { 619 + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", 620 + cpu); 621 + 622 + goto out; 623 + } 624 + 625 + if (word_cycles < byte_cycles) 626 + speed = RISCV_HWPROBE_MISALIGNED_FAST; 627 + 628 + ratio = div_u64((byte_cycles * 100), word_cycles); 629 + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", 630 + cpu, 631 + ratio / 100, 632 + ratio % 100, 633 + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); 634 + 635 + per_cpu(misaligned_access_speed, cpu) = speed; 636 + 637 + out: 638 + __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); 639 + } 640 + 641 + static int check_unaligned_access_boot_cpu(void) 642 + { 643 + check_unaligned_access(0); 644 + return 0; 645 + } 646 + 647 + arch_initcall(check_unaligned_access_boot_cpu); 558 648 559 649 #ifdef CONFIG_RISCV_ALTERNATIVE 560 650 /*
+1
arch/riscv/kernel/image-vars.h
··· 27 27 __efistub__start_kernel = _start_kernel; 28 28 __efistub__end = _end; 29 29 __efistub__edata = _edata; 30 + __efistub___init_text_end = __init_text_end; 30 31 __efistub_screen_info = screen_info; 31 32 32 33 #endif
+109 -5
arch/riscv/kernel/patch.c
··· 6 6 #include <linux/spinlock.h> 7 7 #include <linux/mm.h> 8 8 #include <linux/memory.h> 9 + #include <linux/string.h> 9 10 #include <linux/uaccess.h> 10 11 #include <linux/stop_machine.h> 11 12 #include <asm/kprobes.h> ··· 54 53 } 55 54 NOKPROBE_SYMBOL(patch_unmap); 56 55 57 - static int patch_insn_write(void *addr, const void *insn, size_t len) 56 + static int __patch_insn_set(void *addr, u8 c, size_t len) 57 + { 58 + void *waddr = addr; 59 + bool across_pages = (((uintptr_t)addr & ~PAGE_MASK) + len) > PAGE_SIZE; 60 + 61 + /* 62 + * Only two pages can be mapped at a time for writing. 63 + */ 64 + if (len + offset_in_page(addr) > 2 * PAGE_SIZE) 65 + return -EINVAL; 66 + /* 67 + * Before reaching here, it was expected to lock the text_mutex 68 + * already, so we don't need to give another lock here and could 69 + * ensure that it was safe between each cores. 70 + */ 71 + lockdep_assert_held(&text_mutex); 72 + 73 + if (across_pages) 74 + patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); 75 + 76 + waddr = patch_map(addr, FIX_TEXT_POKE0); 77 + 78 + memset(waddr, c, len); 79 + 80 + patch_unmap(FIX_TEXT_POKE0); 81 + 82 + if (across_pages) 83 + patch_unmap(FIX_TEXT_POKE1); 84 + 85 + return 0; 86 + } 87 + NOKPROBE_SYMBOL(__patch_insn_set); 88 + 89 + static int __patch_insn_write(void *addr, const void *insn, size_t len) 58 90 { 59 91 void *waddr = addr; 60 92 bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; 61 93 int ret; 94 + 95 + /* 96 + * Only two pages can be mapped at a time for writing. 97 + */ 98 + if (len + offset_in_page(addr) > 2 * PAGE_SIZE) 99 + return -EINVAL; 62 100 63 101 /* 64 102 * Before reaching here, it was expected to lock the text_mutex ··· 114 74 lockdep_assert_held(&text_mutex); 115 75 116 76 if (across_pages) 117 - patch_map(addr + len, FIX_TEXT_POKE1); 77 + patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); 118 78 119 79 waddr = patch_map(addr, FIX_TEXT_POKE0); 120 80 ··· 127 87 128 88 return ret; 129 89 } 130 - NOKPROBE_SYMBOL(patch_insn_write); 90 + NOKPROBE_SYMBOL(__patch_insn_write); 131 91 #else 132 - static int patch_insn_write(void *addr, const void *insn, size_t len) 92 + static int __patch_insn_set(void *addr, u8 c, size_t len) 93 + { 94 + memset(addr, c, len); 95 + 96 + return 0; 97 + } 98 + NOKPROBE_SYMBOL(__patch_insn_set); 99 + 100 + static int __patch_insn_write(void *addr, const void *insn, size_t len) 133 101 { 134 102 return copy_to_kernel_nofault(addr, insn, len); 135 103 } 136 - NOKPROBE_SYMBOL(patch_insn_write); 104 + NOKPROBE_SYMBOL(__patch_insn_write); 137 105 #endif /* CONFIG_MMU */ 106 + 107 + static int patch_insn_set(void *addr, u8 c, size_t len) 108 + { 109 + size_t patched = 0; 110 + size_t size; 111 + int ret = 0; 112 + 113 + /* 114 + * __patch_insn_set() can only work on 2 pages at a time so call it in a 115 + * loop with len <= 2 * PAGE_SIZE. 116 + */ 117 + while (patched < len && !ret) { 118 + size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); 119 + ret = __patch_insn_set(addr + patched, c, size); 120 + 121 + patched += size; 122 + } 123 + 124 + return ret; 125 + } 126 + NOKPROBE_SYMBOL(patch_insn_set); 127 + 128 + int patch_text_set_nosync(void *addr, u8 c, size_t len) 129 + { 130 + u32 *tp = addr; 131 + int ret; 132 + 133 + ret = patch_insn_set(tp, c, len); 134 + 135 + if (!ret) 136 + flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); 137 + 138 + return ret; 139 + } 140 + NOKPROBE_SYMBOL(patch_text_set_nosync); 141 + 142 + static int patch_insn_write(void *addr, const void *insn, size_t len) 143 + { 144 + size_t patched = 0; 145 + size_t size; 146 + int ret = 0; 147 + 148 + /* 149 + * Copy the instructions to the destination address, two pages at a time 150 + * because __patch_insn_write() can only handle len <= 2 * PAGE_SIZE. 151 + */ 152 + while (patched < len && !ret) { 153 + size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); 154 + ret = __patch_insn_write(addr + patched, insn + patched, size); 155 + 156 + patched += size; 157 + } 158 + 159 + return ret; 160 + } 161 + NOKPROBE_SYMBOL(patch_insn_write); 138 162 139 163 int patch_text_nosync(void *addr, const void *insns, size_t len) 140 164 {
+1 -1
arch/riscv/kernel/pi/Makefile
··· 35 35 $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE 36 36 $(call if_changed_rule,cc_o_c) 37 37 38 - obj-y := cmdline_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o 38 + obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o 39 39 extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+13
arch/riscv/kernel/pi/cmdline_early.c
··· 14 14 * LLVM complain because the function is actually unused in this file). 15 15 */ 16 16 u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); 17 + bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); 17 18 18 19 static char *get_early_cmdline(uintptr_t dtb_pa) 19 20 { ··· 60 59 char *cmdline = get_early_cmdline(dtb_pa); 61 60 62 61 return match_noXlvl(cmdline); 62 + } 63 + 64 + static bool match_nokaslr(char *cmdline) 65 + { 66 + return strstr(cmdline, "nokaslr"); 67 + } 68 + 69 + bool set_nokaslr_from_cmdline(uintptr_t dtb_pa) 70 + { 71 + char *cmdline = get_early_cmdline(dtb_pa); 72 + 73 + return match_nokaslr(cmdline); 63 74 }
+30
arch/riscv/kernel/pi/fdt_early.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <linux/types.h> 3 + #include <linux/init.h> 4 + #include <linux/libfdt.h> 5 + 6 + /* 7 + * Declare the functions that are exported (but prefixed) here so that LLVM 8 + * does not complain it lacks the 'static' keyword (which, if added, makes 9 + * LLVM complain because the function is actually unused in this file). 10 + */ 11 + u64 get_kaslr_seed(uintptr_t dtb_pa); 12 + 13 + u64 get_kaslr_seed(uintptr_t dtb_pa) 14 + { 15 + int node, len; 16 + fdt64_t *prop; 17 + u64 ret; 18 + 19 + node = fdt_path_offset((void *)dtb_pa, "/chosen"); 20 + if (node < 0) 21 + return 0; 22 + 23 + prop = fdt_getprop_w((void *)dtb_pa, node, "kaslr-seed", &len); 24 + if (!prop || len != sizeof(u64)) 25 + return 0; 26 + 27 + ret = fdt64_to_cpu(*prop); 28 + *prop = 0; 29 + return ret; 30 + }
+79
arch/riscv/kernel/ptrace.c
··· 25 25 #ifdef CONFIG_FPU 26 26 REGSET_F, 27 27 #endif 28 + #ifdef CONFIG_RISCV_ISA_V 29 + REGSET_V, 30 + #endif 28 31 }; 29 32 30 33 static int riscv_gpr_get(struct task_struct *target, ··· 84 81 } 85 82 #endif 86 83 84 + #ifdef CONFIG_RISCV_ISA_V 85 + static int riscv_vr_get(struct task_struct *target, 86 + const struct user_regset *regset, 87 + struct membuf to) 88 + { 89 + struct __riscv_v_ext_state *vstate = &target->thread.vstate; 90 + struct __riscv_v_regset_state ptrace_vstate; 91 + 92 + if (!riscv_v_vstate_query(task_pt_regs(target))) 93 + return -EINVAL; 94 + 95 + /* 96 + * Ensure the vector registers have been saved to the memory before 97 + * copying them to membuf. 98 + */ 99 + if (target == current) 100 + riscv_v_vstate_save(current, task_pt_regs(current)); 101 + 102 + ptrace_vstate.vstart = vstate->vstart; 103 + ptrace_vstate.vl = vstate->vl; 104 + ptrace_vstate.vtype = vstate->vtype; 105 + ptrace_vstate.vcsr = vstate->vcsr; 106 + ptrace_vstate.vlenb = vstate->vlenb; 107 + 108 + /* Copy vector header from vstate. */ 109 + membuf_write(&to, &ptrace_vstate, sizeof(struct __riscv_v_regset_state)); 110 + 111 + /* Copy all the vector registers from vstate. */ 112 + return membuf_write(&to, vstate->datap, riscv_v_vsize); 113 + } 114 + 115 + static int riscv_vr_set(struct task_struct *target, 116 + const struct user_regset *regset, 117 + unsigned int pos, unsigned int count, 118 + const void *kbuf, const void __user *ubuf) 119 + { 120 + int ret; 121 + struct __riscv_v_ext_state *vstate = &target->thread.vstate; 122 + struct __riscv_v_regset_state ptrace_vstate; 123 + 124 + if (!riscv_v_vstate_query(task_pt_regs(target))) 125 + return -EINVAL; 126 + 127 + /* Copy rest of the vstate except datap */ 128 + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ptrace_vstate, 0, 129 + sizeof(struct __riscv_v_regset_state)); 130 + if (unlikely(ret)) 131 + return ret; 132 + 133 + if (vstate->vlenb != ptrace_vstate.vlenb) 134 + return -EINVAL; 135 + 136 + vstate->vstart = ptrace_vstate.vstart; 137 + vstate->vl = ptrace_vstate.vl; 138 + vstate->vtype = ptrace_vstate.vtype; 139 + vstate->vcsr = ptrace_vstate.vcsr; 140 + 141 + /* Copy all the vector registers. */ 142 + pos = 0; 143 + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap, 144 + 0, riscv_v_vsize); 145 + return ret; 146 + } 147 + #endif 148 + 87 149 static const struct user_regset riscv_user_regset[] = { 88 150 [REGSET_X] = { 89 151 .core_note_type = NT_PRSTATUS, ··· 166 98 .align = sizeof(elf_fpreg_t), 167 99 .regset_get = riscv_fpr_get, 168 100 .set = riscv_fpr_set, 101 + }, 102 + #endif 103 + #ifdef CONFIG_RISCV_ISA_V 104 + [REGSET_V] = { 105 + .core_note_type = NT_RISCV_VECTOR, 106 + .align = 16, 107 + .n = ((32 * RISCV_MAX_VLENB) + 108 + sizeof(struct __riscv_v_regset_state)) / sizeof(__u32), 109 + .size = sizeof(__u32), 110 + .regset_get = riscv_vr_get, 111 + .set = riscv_vr_set, 169 112 }, 170 113 #endif 171 114 };
+25
arch/riscv/kernel/setup.c
··· 21 21 #include <linux/smp.h> 22 22 #include <linux/efi.h> 23 23 #include <linux/crash_dump.h> 24 + #include <linux/panic_notifier.h> 24 25 25 26 #include <asm/acpi.h> 26 27 #include <asm/alternative.h> ··· 348 347 349 348 free_initmem_default(POISON_FREE_INITMEM); 350 349 } 350 + 351 + static int dump_kernel_offset(struct notifier_block *self, 352 + unsigned long v, void *p) 353 + { 354 + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", 355 + kernel_map.virt_offset, 356 + KERNEL_LINK_ADDR); 357 + 358 + return 0; 359 + } 360 + 361 + static struct notifier_block kernel_offset_notifier = { 362 + .notifier_call = dump_kernel_offset 363 + }; 364 + 365 + static int __init register_kernel_offset_dumper(void) 366 + { 367 + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) 368 + atomic_notifier_chain_register(&panic_notifier_list, 369 + &kernel_offset_notifier); 370 + 371 + return 0; 372 + } 373 + device_initcall(register_kernel_offset_dumper);
+2 -1
arch/riscv/kernel/smpboot.c
··· 26 26 #include <linux/sched/task_stack.h> 27 27 #include <linux/sched/mm.h> 28 28 #include <asm/cpu_ops.h> 29 + #include <asm/cpufeature.h> 29 30 #include <asm/irq.h> 30 31 #include <asm/mmu_context.h> 31 32 #include <asm/numa.h> ··· 246 245 247 246 numa_add_cpu(curr_cpuid); 248 247 set_cpu_online(curr_cpuid, 1); 249 - probe_vendor_features(curr_cpuid); 248 + check_unaligned_access(curr_cpuid); 250 249 251 250 if (has_vector()) { 252 251 if (riscv_v_setup_vsize())
+94 -9
arch/riscv/mm/dma-noncoherent.c
··· 9 9 #include <linux/dma-map-ops.h> 10 10 #include <linux/mm.h> 11 11 #include <asm/cacheflush.h> 12 + #include <asm/dma-noncoherent.h> 12 13 13 14 static bool noncoherent_supported __ro_after_init; 14 15 int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN; 15 16 EXPORT_SYMBOL_GPL(dma_cache_alignment); 16 17 17 - void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, 18 - enum dma_data_direction dir) 18 + struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = { 19 + .wback = NULL, 20 + .inv = NULL, 21 + .wback_inv = NULL, 22 + }; 23 + 24 + static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) 19 25 { 20 26 void *vaddr = phys_to_virt(paddr); 21 27 28 + #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS 29 + if (unlikely(noncoherent_cache_ops.wback)) { 30 + noncoherent_cache_ops.wback(paddr, size); 31 + return; 32 + } 33 + #endif 34 + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); 35 + } 36 + 37 + static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) 38 + { 39 + void *vaddr = phys_to_virt(paddr); 40 + 41 + #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS 42 + if (unlikely(noncoherent_cache_ops.inv)) { 43 + noncoherent_cache_ops.inv(paddr, size); 44 + return; 45 + } 46 + #endif 47 + 48 + ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); 49 + } 50 + 51 + static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) 52 + { 53 + void *vaddr = phys_to_virt(paddr); 54 + 55 + #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS 56 + if (unlikely(noncoherent_cache_ops.wback_inv)) { 57 + noncoherent_cache_ops.wback_inv(paddr, size); 58 + return; 59 + } 60 + #endif 61 + 62 + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); 63 + } 64 + 65 + static inline bool arch_sync_dma_clean_before_fromdevice(void) 66 + { 67 + return true; 68 + } 69 + 70 + static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void) 71 + { 72 + return true; 73 + } 74 + 75 + void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, 76 + enum dma_data_direction dir) 77 + { 22 78 switch (dir) { 23 79 case DMA_TO_DEVICE: 24 - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); 80 + arch_dma_cache_wback(paddr, size); 25 81 break; 82 + 26 83 case DMA_FROM_DEVICE: 27 - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); 28 - break; 84 + if (!arch_sync_dma_clean_before_fromdevice()) { 85 + arch_dma_cache_inv(paddr, size); 86 + break; 87 + } 88 + fallthrough; 89 + 29 90 case DMA_BIDIRECTIONAL: 30 - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); 91 + /* Skip the invalidate here if it's done later */ 92 + if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) && 93 + arch_sync_dma_cpu_needs_post_dma_flush()) 94 + arch_dma_cache_wback(paddr, size); 95 + else 96 + arch_dma_cache_wback_inv(paddr, size); 31 97 break; 98 + 32 99 default: 33 100 break; 34 101 } ··· 104 37 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, 105 38 enum dma_data_direction dir) 106 39 { 107 - void *vaddr = phys_to_virt(paddr); 108 - 109 40 switch (dir) { 110 41 case DMA_TO_DEVICE: 111 42 break; 43 + 112 44 case DMA_FROM_DEVICE: 113 45 case DMA_BIDIRECTIONAL: 114 - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); 46 + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ 47 + if (arch_sync_dma_cpu_needs_post_dma_flush()) 48 + arch_dma_cache_inv(paddr, size); 115 49 break; 50 + 116 51 default: 117 52 break; 118 53 } ··· 123 54 void arch_dma_prep_coherent(struct page *page, size_t size) 124 55 { 125 56 void *flush_addr = page_address(page); 57 + 58 + #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS 59 + if (unlikely(noncoherent_cache_ops.wback_inv)) { 60 + noncoherent_cache_ops.wback_inv(page_to_phys(page), size); 61 + return; 62 + } 63 + #endif 126 64 127 65 ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); 128 66 } ··· 162 86 if (!noncoherent_supported) 163 87 dma_cache_alignment = 1; 164 88 } 89 + 90 + void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) 91 + { 92 + if (!ops) 93 + return; 94 + 95 + noncoherent_cache_ops = *ops; 96 + } 97 + EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops);
+35 -1
arch/riscv/mm/init.c
··· 1014 1014 #endif 1015 1015 } 1016 1016 1017 + #ifdef CONFIG_RANDOMIZE_BASE 1018 + extern bool __init __pi_set_nokaslr_from_cmdline(uintptr_t dtb_pa); 1019 + extern u64 __init __pi_get_kaslr_seed(uintptr_t dtb_pa); 1020 + 1021 + static int __init print_nokaslr(char *p) 1022 + { 1023 + pr_info("Disabled KASLR"); 1024 + return 0; 1025 + } 1026 + early_param("nokaslr", print_nokaslr); 1027 + 1028 + unsigned long kaslr_offset(void) 1029 + { 1030 + return kernel_map.virt_offset; 1031 + } 1032 + #endif 1033 + 1017 1034 asmlinkage void __init setup_vm(uintptr_t dtb_pa) 1018 1035 { 1019 1036 pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; 1020 1037 1021 - kernel_map.virt_addr = KERNEL_LINK_ADDR; 1038 + #ifdef CONFIG_RANDOMIZE_BASE 1039 + if (!__pi_set_nokaslr_from_cmdline(dtb_pa)) { 1040 + u64 kaslr_seed = __pi_get_kaslr_seed(dtb_pa); 1041 + u32 kernel_size = (uintptr_t)(&_end) - (uintptr_t)(&_start); 1042 + u32 nr_pos; 1043 + 1044 + /* 1045 + * Compute the number of positions available: we are limited 1046 + * by the early page table that only has one PUD and we must 1047 + * be aligned on PMD_SIZE. 1048 + */ 1049 + nr_pos = (PUD_SIZE - kernel_size) / PMD_SIZE; 1050 + 1051 + kernel_map.virt_offset = (kaslr_seed % nr_pos) * PMD_SIZE; 1052 + } 1053 + #endif 1054 + 1055 + kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; 1022 1056 kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); 1023 1057 1024 1058 #ifdef CONFIG_XIP_KERNEL
+13
arch/riscv/mm/pmem.c
··· 7 7 #include <linux/libnvdimm.h> 8 8 9 9 #include <asm/cacheflush.h> 10 + #include <asm/dma-noncoherent.h> 10 11 11 12 void arch_wb_cache_pmem(void *addr, size_t size) 12 13 { 14 + #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS 15 + if (unlikely(noncoherent_cache_ops.wback)) { 16 + noncoherent_cache_ops.wback(virt_to_phys(addr), size); 17 + return; 18 + } 19 + #endif 13 20 ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); 14 21 } 15 22 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); 16 23 17 24 void arch_invalidate_pmem(void *addr, size_t size) 18 25 { 26 + #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS 27 + if (unlikely(noncoherent_cache_ops.inv)) { 28 + noncoherent_cache_ops.inv(virt_to_phys(addr), size); 29 + return; 30 + } 31 + #endif 19 32 ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); 20 33 } 21 34 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+3
arch/riscv/net/bpf_jit.h
··· 68 68 struct rv_jit_context { 69 69 struct bpf_prog *prog; 70 70 u16 *insns; /* RV insns */ 71 + u16 *ro_insns; 71 72 int ninsns; 72 73 int prologue_len; 73 74 int epilogue_offset; ··· 86 85 87 86 struct rv_jit_data { 88 87 struct bpf_binary_header *header; 88 + struct bpf_binary_header *ro_header; 89 89 u8 *image; 90 + u8 *ro_image; 90 91 struct rv_jit_context ctx; 91 92 }; 92 93
+48 -12
arch/riscv/net/bpf_jit_comp64.c
··· 144 144 /* Emit fixed-length instructions for address */ 145 145 static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx) 146 146 { 147 - u64 ip = (u64)(ctx->insns + ctx->ninsns); 147 + /* 148 + * Use the ro_insns(RX) to calculate the offset as the BPF program will 149 + * finally run from this memory region. 150 + */ 151 + u64 ip = (u64)(ctx->ro_insns + ctx->ninsns); 148 152 s64 off = addr - ip; 149 153 s64 upper = (off + (1 << 11)) >> 12; 150 154 s64 lower = off & 0xfff; ··· 468 464 s64 off = 0; 469 465 u64 ip; 470 466 471 - if (addr && ctx->insns) { 472 - ip = (u64)(long)(ctx->insns + ctx->ninsns); 467 + if (addr && ctx->insns && ctx->ro_insns) { 468 + /* 469 + * Use the ro_insns(RX) to calculate the offset as the BPF 470 + * program will finally run from this memory region. 471 + */ 472 + ip = (u64)(long)(ctx->ro_insns + ctx->ninsns); 473 473 off = addr - ip; 474 474 } 475 475 ··· 586 578 { 587 579 struct exception_table_entry *ex; 588 580 unsigned long pc; 589 - off_t offset; 581 + off_t ins_offset; 582 + off_t fixup_offset; 590 583 591 - if (!ctx->insns || !ctx->prog->aux->extable || 584 + if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable || 592 585 (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) 593 586 return 0; 594 587 ··· 603 594 return -EINVAL; 604 595 605 596 ex = &ctx->prog->aux->extable[ctx->nexentries]; 606 - pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len]; 597 + pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len]; 607 598 608 - offset = pc - (long)&ex->insn; 609 - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 599 + /* 600 + * This is the relative offset of the instruction that may fault from 601 + * the exception table itself. This will be written to the exception 602 + * table and if this instruction faults, the destination register will 603 + * be set to '0' and the execution will jump to the next instruction. 604 + */ 605 + ins_offset = pc - (long)&ex->insn; 606 + if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 610 607 return -ERANGE; 611 - ex->insn = offset; 612 608 613 609 /* 614 610 * Since the extable follows the program, the fixup offset is always ··· 622 608 * bits. We don't need to worry about buildtime or runtime sort 623 609 * modifying the upper bits because the table is already sorted, and 624 610 * isn't part of the main exception table. 611 + * 612 + * The fixup_offset is set to the next instruction from the instruction 613 + * that may fault. The execution will jump to this after handling the 614 + * fault. 625 615 */ 626 - offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16)); 627 - if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) 616 + fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16)); 617 + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 628 618 return -ERANGE; 629 619 630 - ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | 620 + /* 621 + * The offsets above have been calculated using the RO buffer but we 622 + * need to use the R/W buffer for writes. 623 + * switch ex to rw buffer for writing. 624 + */ 625 + ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns); 626 + 627 + ex->insn = ins_offset; 628 + 629 + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 631 630 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 632 631 ex->type = EX_TYPE_BPF; 633 632 ··· 1034 1007 1035 1008 ctx.ninsns = 0; 1036 1009 ctx.insns = NULL; 1010 + ctx.ro_insns = NULL; 1037 1011 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); 1038 1012 if (ret < 0) 1039 1013 return ret; ··· 1043 1015 return -EFBIG; 1044 1016 1045 1017 ctx.ninsns = 0; 1018 + /* 1019 + * The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to write the 1020 + * JITed instructions and later copies it to a RX region (ctx.ro_insns). 1021 + * It also uses ctx.ro_insns to calculate offsets for jumps etc. As the 1022 + * trampoline image uses the same memory area for writing and execution, 1023 + * both ctx.insns and ctx.ro_insns can be set to image. 1024 + */ 1046 1025 ctx.insns = image; 1026 + ctx.ro_insns = image; 1047 1027 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); 1048 1028 if (ret < 0) 1049 1029 return ret;
+90 -16
arch/riscv/net/bpf_jit_core.c
··· 8 8 9 9 #include <linux/bpf.h> 10 10 #include <linux/filter.h> 11 + #include <linux/memory.h> 12 + #include <asm/patch.h> 11 13 #include "bpf_jit.h" 12 14 13 15 /* Number of iterations to try until offsets converge. */ ··· 119 117 sizeof(struct exception_table_entry); 120 118 prog_size = sizeof(*ctx->insns) * ctx->ninsns; 121 119 122 - jit_data->header = 123 - bpf_jit_binary_alloc(prog_size + extable_size, 124 - &jit_data->image, 125 - sizeof(u32), 126 - bpf_fill_ill_insns); 127 - if (!jit_data->header) { 120 + jit_data->ro_header = 121 + bpf_jit_binary_pack_alloc(prog_size + extable_size, 122 + &jit_data->ro_image, sizeof(u32), 123 + &jit_data->header, &jit_data->image, 124 + bpf_fill_ill_insns); 125 + if (!jit_data->ro_header) { 128 126 prog = orig_prog; 129 127 goto out_offset; 130 128 } 131 129 130 + /* 131 + * Use the image(RW) for writing the JITed instructions. But also save 132 + * the ro_image(RX) for calculating the offsets in the image. The RW 133 + * image will be later copied to the RX image from where the program 134 + * will run. The bpf_jit_binary_pack_finalize() will do this copy in the 135 + * final step. 136 + */ 137 + ctx->ro_insns = (u16 *)jit_data->ro_image; 132 138 ctx->insns = (u16 *)jit_data->image; 133 139 /* 134 140 * Now, when the image is allocated, the image can ··· 148 138 149 139 if (i == NR_JIT_ITERATIONS) { 150 140 pr_err("bpf-jit: image did not converge in <%d passes!\n", i); 151 - if (jit_data->header) 152 - bpf_jit_binary_free(jit_data->header); 153 141 prog = orig_prog; 154 - goto out_offset; 142 + goto out_free_hdr; 155 143 } 156 144 157 145 if (extable_size) 158 - prog->aux->extable = (void *)ctx->insns + prog_size; 146 + prog->aux->extable = (void *)ctx->ro_insns + prog_size; 159 147 160 148 skip_init_ctx: 161 149 pass++; ··· 162 154 163 155 bpf_jit_build_prologue(ctx); 164 156 if (build_body(ctx, extra_pass, NULL)) { 165 - bpf_jit_binary_free(jit_data->header); 166 157 prog = orig_prog; 167 - goto out_offset; 158 + goto out_free_hdr; 168 159 } 169 160 bpf_jit_build_epilogue(ctx); 170 161 171 162 if (bpf_jit_enable > 1) 172 163 bpf_jit_dump(prog->len, prog_size, pass, ctx->insns); 173 164 174 - prog->bpf_func = (void *)ctx->insns; 165 + prog->bpf_func = (void *)ctx->ro_insns; 175 166 prog->jited = 1; 176 167 prog->jited_len = prog_size; 177 168 178 - bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); 179 - 180 169 if (!prog->is_func || extra_pass) { 181 - bpf_jit_binary_lock_ro(jit_data->header); 170 + if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, 171 + jit_data->header))) { 172 + /* ro_header has been freed */ 173 + jit_data->ro_header = NULL; 174 + prog = orig_prog; 175 + goto out_offset; 176 + } 177 + /* 178 + * The instructions have now been copied to the ROX region from 179 + * where they will execute. 180 + * Write any modified data cache blocks out to memory and 181 + * invalidate the corresponding blocks in the instruction cache. 182 + */ 183 + bpf_flush_icache(jit_data->ro_header, ctx->ro_insns + ctx->ninsns); 182 184 for (i = 0; i < prog->len; i++) 183 185 ctx->offset[i] = ninsns_rvoff(ctx->offset[i]); 184 186 bpf_prog_fill_jited_linfo(prog, ctx->offset); ··· 203 185 bpf_jit_prog_release_other(prog, prog == orig_prog ? 204 186 tmp : orig_prog); 205 187 return prog; 188 + 189 + out_free_hdr: 190 + if (jit_data->header) { 191 + bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, 192 + sizeof(jit_data->header->size)); 193 + bpf_jit_binary_pack_free(jit_data->ro_header, jit_data->header); 194 + } 195 + goto out_offset; 206 196 } 207 197 208 198 u64 bpf_jit_alloc_exec_limit(void) ··· 229 203 void bpf_jit_free_exec(void *addr) 230 204 { 231 205 return vfree(addr); 206 + } 207 + 208 + void *bpf_arch_text_copy(void *dst, void *src, size_t len) 209 + { 210 + int ret; 211 + 212 + mutex_lock(&text_mutex); 213 + ret = patch_text_nosync(dst, src, len); 214 + mutex_unlock(&text_mutex); 215 + 216 + if (ret) 217 + return ERR_PTR(-EINVAL); 218 + 219 + return dst; 220 + } 221 + 222 + int bpf_arch_text_invalidate(void *dst, size_t len) 223 + { 224 + int ret; 225 + 226 + mutex_lock(&text_mutex); 227 + ret = patch_text_set_nosync(dst, 0, len); 228 + mutex_unlock(&text_mutex); 229 + 230 + return ret; 231 + } 232 + 233 + void bpf_jit_free(struct bpf_prog *prog) 234 + { 235 + if (prog->jited) { 236 + struct rv_jit_data *jit_data = prog->aux->jit_data; 237 + struct bpf_binary_header *hdr; 238 + 239 + /* 240 + * If we fail the final pass of JIT (from jit_subprogs), 241 + * the program may not be finalized yet. Call finalize here 242 + * before freeing it. 243 + */ 244 + if (jit_data) { 245 + bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, jit_data->header); 246 + kfree(jit_data); 247 + } 248 + hdr = bpf_jit_binary_pack_hdr(prog); 249 + bpf_jit_binary_pack_free(hdr, NULL); 250 + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 251 + } 252 + 253 + bpf_prog_unlock_free(prog); 232 254 }
+2
drivers/Kconfig
··· 15 15 16 16 source "drivers/bus/Kconfig" 17 17 18 + source "drivers/cache/Kconfig" 19 + 18 20 source "drivers/connector/Kconfig" 19 21 20 22 source "drivers/firmware/Kconfig"
+1
drivers/Makefile
··· 11 11 MAKEFLAGS += --include-dir=$(srctree) 12 12 endif 13 13 14 + obj-y += cache/ 14 15 obj-y += irqchip/ 15 16 obj-y += bus/ 16 17
+11
drivers/cache/Kconfig
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + menu "Cache Drivers" 3 + 4 + config AX45MP_L2_CACHE 5 + bool "Andes Technology AX45MP L2 Cache controller" 6 + depends on RISCV_DMA_NONCOHERENT 7 + select RISCV_NONSTANDARD_CACHE_OPS 8 + help 9 + Support for the L2 cache controller on Andes Technology AX45MP platforms. 10 + 11 + endmenu
+3
drivers/cache/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + 3 + obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o
+213
drivers/cache/ax45mp_cache.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * non-coherent cache functions for Andes AX45MP 4 + * 5 + * Copyright (C) 2023 Renesas Electronics Corp. 6 + */ 7 + 8 + #include <linux/cacheflush.h> 9 + #include <linux/cacheinfo.h> 10 + #include <linux/dma-direction.h> 11 + #include <linux/of_address.h> 12 + #include <linux/of_platform.h> 13 + 14 + #include <asm/dma-noncoherent.h> 15 + 16 + /* L2 cache registers */ 17 + #define AX45MP_L2C_REG_CTL_OFFSET 0x8 18 + 19 + #define AX45MP_L2C_REG_C0_CMD_OFFSET 0x40 20 + #define AX45MP_L2C_REG_C0_ACC_OFFSET 0x48 21 + #define AX45MP_L2C_REG_STATUS_OFFSET 0x80 22 + 23 + /* D-cache operation */ 24 + #define AX45MP_CCTL_L1D_VA_INVAL 0 /* Invalidate an L1 cache entry */ 25 + #define AX45MP_CCTL_L1D_VA_WB 1 /* Write-back an L1 cache entry */ 26 + 27 + /* L2 CCTL status */ 28 + #define AX45MP_CCTL_L2_STATUS_IDLE 0 29 + 30 + /* L2 CCTL status cores mask */ 31 + #define AX45MP_CCTL_L2_STATUS_C0_MASK 0xf 32 + 33 + /* L2 cache operation */ 34 + #define AX45MP_CCTL_L2_PA_INVAL 0x8 /* Invalidate an L2 cache entry */ 35 + #define AX45MP_CCTL_L2_PA_WB 0x9 /* Write-back an L2 cache entry */ 36 + 37 + #define AX45MP_L2C_REG_PER_CORE_OFFSET 0x10 38 + #define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET 4 39 + 40 + #define AX45MP_L2C_REG_CN_CMD_OFFSET(n) \ 41 + (AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) 42 + #define AX45MP_L2C_REG_CN_ACC_OFFSET(n) \ 43 + (AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) 44 + #define AX45MP_CCTL_L2_STATUS_CN_MASK(n) \ 45 + (AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET)) 46 + 47 + #define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM 0x80b 48 + #define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM 0x80c 49 + 50 + #define AX45MP_CACHE_LINE_SIZE 64 51 + 52 + struct ax45mp_priv { 53 + void __iomem *l2c_base; 54 + u32 ax45mp_cache_line_size; 55 + }; 56 + 57 + static struct ax45mp_priv ax45mp_priv; 58 + 59 + /* L2 Cache operations */ 60 + static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void) 61 + { 62 + return readl(ax45mp_priv.l2c_base + AX45MP_L2C_REG_STATUS_OFFSET); 63 + } 64 + 65 + static void ax45mp_cpu_cache_operation(unsigned long start, unsigned long end, 66 + unsigned int l1_op, unsigned int l2_op) 67 + { 68 + unsigned long line_size = ax45mp_priv.ax45mp_cache_line_size; 69 + void __iomem *base = ax45mp_priv.l2c_base; 70 + int mhartid = smp_processor_id(); 71 + unsigned long pa; 72 + 73 + while (end > start) { 74 + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); 75 + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, l1_op); 76 + 77 + pa = virt_to_phys((void *)start); 78 + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); 79 + writel(l2_op, base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); 80 + while ((ax45mp_cpu_l2c_get_cctl_status() & 81 + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != 82 + AX45MP_CCTL_L2_STATUS_IDLE) 83 + ; 84 + 85 + start += line_size; 86 + } 87 + } 88 + 89 + /* Write-back L1 and L2 cache entry */ 90 + static inline void ax45mp_cpu_dcache_wb_range(unsigned long start, unsigned long end) 91 + { 92 + ax45mp_cpu_cache_operation(start, end, AX45MP_CCTL_L1D_VA_WB, 93 + AX45MP_CCTL_L2_PA_WB); 94 + } 95 + 96 + /* Invalidate the L1 and L2 cache entry */ 97 + static inline void ax45mp_cpu_dcache_inval_range(unsigned long start, unsigned long end) 98 + { 99 + ax45mp_cpu_cache_operation(start, end, AX45MP_CCTL_L1D_VA_INVAL, 100 + AX45MP_CCTL_L2_PA_INVAL); 101 + } 102 + 103 + static void ax45mp_dma_cache_inv(phys_addr_t paddr, size_t size) 104 + { 105 + unsigned long start = (unsigned long)phys_to_virt(paddr); 106 + unsigned long end = start + size; 107 + unsigned long line_size; 108 + unsigned long flags; 109 + 110 + if (unlikely(start == end)) 111 + return; 112 + 113 + line_size = ax45mp_priv.ax45mp_cache_line_size; 114 + 115 + start = start & (~(line_size - 1)); 116 + end = ((end + line_size - 1) & (~(line_size - 1))); 117 + 118 + local_irq_save(flags); 119 + 120 + ax45mp_cpu_dcache_inval_range(start, end); 121 + 122 + local_irq_restore(flags); 123 + } 124 + 125 + static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size) 126 + { 127 + unsigned long start = (unsigned long)phys_to_virt(paddr); 128 + unsigned long end = start + size; 129 + unsigned long line_size; 130 + unsigned long flags; 131 + 132 + line_size = ax45mp_priv.ax45mp_cache_line_size; 133 + start = start & (~(line_size - 1)); 134 + local_irq_save(flags); 135 + ax45mp_cpu_dcache_wb_range(start, end); 136 + local_irq_restore(flags); 137 + } 138 + 139 + static void ax45mp_dma_cache_wback_inv(phys_addr_t paddr, size_t size) 140 + { 141 + ax45mp_dma_cache_wback(paddr, size); 142 + ax45mp_dma_cache_inv(paddr, size); 143 + } 144 + 145 + static int ax45mp_get_l2_line_size(struct device_node *np) 146 + { 147 + int ret; 148 + 149 + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv.ax45mp_cache_line_size); 150 + if (ret) { 151 + pr_err("Failed to get cache-line-size, defaulting to 64 bytes\n"); 152 + return ret; 153 + } 154 + 155 + if (ax45mp_priv.ax45mp_cache_line_size != AX45MP_CACHE_LINE_SIZE) { 156 + pr_err("Expected cache-line-size to be 64 bytes (found:%u)\n", 157 + ax45mp_priv.ax45mp_cache_line_size); 158 + return -EINVAL; 159 + } 160 + 161 + return 0; 162 + } 163 + 164 + static const struct riscv_nonstd_cache_ops ax45mp_cmo_ops __initdata = { 165 + .wback = &ax45mp_dma_cache_wback, 166 + .inv = &ax45mp_dma_cache_inv, 167 + .wback_inv = &ax45mp_dma_cache_wback_inv, 168 + }; 169 + 170 + static const struct of_device_id ax45mp_cache_ids[] = { 171 + { .compatible = "andestech,ax45mp-cache" }, 172 + { /* sentinel */ } 173 + }; 174 + 175 + static int __init ax45mp_cache_init(void) 176 + { 177 + struct device_node *np; 178 + struct resource res; 179 + int ret; 180 + 181 + np = of_find_matching_node(NULL, ax45mp_cache_ids); 182 + if (!of_device_is_available(np)) 183 + return -ENODEV; 184 + 185 + ret = of_address_to_resource(np, 0, &res); 186 + if (ret) 187 + return ret; 188 + 189 + /* 190 + * If IOCP is present on the Andes AX45MP core riscv_cbom_block_size 191 + * will be 0 for sure, so we can definitely rely on it. If 192 + * riscv_cbom_block_size = 0 we don't need to handle CMO using SW any 193 + * more so we just return success here and only if its being set we 194 + * continue further in the probe path. 195 + */ 196 + if (!riscv_cbom_block_size) 197 + return 0; 198 + 199 + ax45mp_priv.l2c_base = ioremap(res.start, resource_size(&res)); 200 + if (!ax45mp_priv.l2c_base) 201 + return -ENOMEM; 202 + 203 + ret = ax45mp_get_l2_line_size(np); 204 + if (ret) { 205 + iounmap(ax45mp_priv.l2c_base); 206 + return ret; 207 + } 208 + 209 + riscv_noncoherent_register_cache_ops(&ax45mp_cmo_ops); 210 + 211 + return 0; 212 + } 213 + early_initcall(ax45mp_cache_init);
+2 -2
drivers/firmware/efi/libstub/Makefile
··· 86 86 screen_info.o efi-stub-entry.o 87 87 88 88 lib-$(CONFIG_ARM) += arm32-stub.o 89 - lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o 89 + lib-$(CONFIG_ARM64) += kaslr.o arm64.o arm64-stub.o smbios.o 90 90 lib-$(CONFIG_X86) += x86-stub.o 91 91 lib-$(CONFIG_X86_64) += x86-5lvl.o 92 - lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o 92 + lib-$(CONFIG_RISCV) += kaslr.o riscv.o riscv-stub.o 93 93 lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o 94 94 95 95 CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+13 -104
drivers/firmware/efi/libstub/arm64-stub.c
··· 14 14 15 15 #include "efistub.h" 16 16 17 - /* 18 - * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail 19 - * to provide space, and fail to zero it). Check for this condition by double 20 - * checking that the first and the last byte of the image are covered by the 21 - * same EFI memory map entry. 22 - */ 23 - static bool check_image_region(u64 base, u64 size) 24 - { 25 - struct efi_boot_memmap *map; 26 - efi_status_t status; 27 - bool ret = false; 28 - int map_offset; 29 - 30 - status = efi_get_memory_map(&map, false); 31 - if (status != EFI_SUCCESS) 32 - return false; 33 - 34 - for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { 35 - efi_memory_desc_t *md = (void *)map->map + map_offset; 36 - u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE; 37 - 38 - /* 39 - * Find the region that covers base, and return whether 40 - * it covers base+size bytes. 41 - */ 42 - if (base >= md->phys_addr && base < end) { 43 - ret = (base + size) <= end; 44 - break; 45 - } 46 - } 47 - 48 - efi_bs_call(free_pool, map); 49 - 50 - return ret; 51 - } 52 - 53 17 efi_status_t handle_kernel_image(unsigned long *image_addr, 54 18 unsigned long *image_size, 55 19 unsigned long *reserve_addr, ··· 23 59 { 24 60 efi_status_t status; 25 61 unsigned long kernel_size, kernel_codesize, kernel_memsize; 26 - u32 phys_seed = 0; 27 - u64 min_kimg_align = efi_get_kimg_min_align(); 28 - 29 - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { 30 - efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID; 31 - void *p; 32 - 33 - if (efi_nokaslr) { 34 - efi_info("KASLR disabled on kernel command line\n"); 35 - } else if (efi_bs_call(handle_protocol, image_handle, 36 - &li_fixed_proto, &p) == EFI_SUCCESS) { 37 - efi_info("Image placement fixed by loader\n"); 38 - } else { 39 - status = efi_get_random_bytes(sizeof(phys_seed), 40 - (u8 *)&phys_seed); 41 - if (status == EFI_NOT_FOUND) { 42 - efi_info("EFI_RNG_PROTOCOL unavailable\n"); 43 - efi_nokaslr = true; 44 - } else if (status != EFI_SUCCESS) { 45 - efi_err("efi_get_random_bytes() failed (0x%lx)\n", 46 - status); 47 - efi_nokaslr = true; 48 - } 49 - } 50 - } 51 62 52 63 if (image->image_base != _text) { 53 64 efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); ··· 37 98 kernel_codesize = __inittext_end - _text; 38 99 kernel_memsize = kernel_size + (_end - _edata); 39 100 *reserve_size = kernel_memsize; 101 + *image_addr = (unsigned long)_text; 40 102 41 - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { 42 - /* 43 - * If KASLR is enabled, and we have some randomness available, 44 - * locate the kernel at a randomized offset in physical memory. 45 - */ 46 - status = efi_random_alloc(*reserve_size, min_kimg_align, 47 - reserve_addr, phys_seed, 48 - EFI_LOADER_CODE, EFI_ALLOC_LIMIT); 49 - if (status != EFI_SUCCESS) 50 - efi_warn("efi_random_alloc() failed: 0x%lx\n", status); 51 - } else { 52 - status = EFI_OUT_OF_RESOURCES; 53 - } 54 - 55 - if (status != EFI_SUCCESS) { 56 - if (!check_image_region((u64)_text, kernel_memsize)) { 57 - efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); 58 - } else if (IS_ALIGNED((u64)_text, min_kimg_align) && 59 - (u64)_end < EFI_ALLOC_LIMIT) { 60 - /* 61 - * Just execute from wherever we were loaded by the 62 - * UEFI PE/COFF loader if the placement is suitable. 63 - */ 64 - *image_addr = (u64)_text; 65 - *reserve_size = 0; 66 - return EFI_SUCCESS; 67 - } 68 - 69 - status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, 70 - ULONG_MAX, min_kimg_align, 71 - EFI_LOADER_CODE); 72 - 73 - if (status != EFI_SUCCESS) { 74 - efi_err("Failed to relocate kernel\n"); 75 - *reserve_size = 0; 76 - return status; 77 - } 78 - } 79 - 80 - *image_addr = *reserve_addr; 81 - memcpy((void *)*image_addr, _text, kernel_size); 82 - caches_clean_inval_pou(*image_addr, *image_addr + kernel_codesize); 83 - efi_remap_image(*image_addr, *reserve_size, kernel_codesize); 103 + status = efi_kaslr_relocate_kernel(image_addr, 104 + reserve_addr, reserve_size, 105 + kernel_size, kernel_codesize, 106 + kernel_memsize, 107 + efi_kaslr_get_phys_seed(image_handle)); 108 + if (status != EFI_SUCCESS) 109 + return status; 84 110 85 111 return EFI_SUCCESS; 86 112 } ··· 62 158 * actual entrypoint in the .text region of the image. 63 159 */ 64 160 return (char *)primary_entry - _text; 161 + } 162 + 163 + void efi_icache_sync(unsigned long start, unsigned long end) 164 + { 165 + caches_clean_inval_pou(start, end); 65 166 }
+8
drivers/firmware/efi/libstub/efistub.h
··· 1133 1133 1134 1134 void efi_remap_image(unsigned long image_base, unsigned alloc_size, 1135 1135 unsigned long code_size); 1136 + efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr, 1137 + unsigned long *reserve_addr, 1138 + unsigned long *reserve_size, 1139 + unsigned long kernel_size, 1140 + unsigned long kernel_codesize, 1141 + unsigned long kernel_memsize, 1142 + u32 phys_seed); 1143 + u32 efi_kaslr_get_phys_seed(efi_handle_t image_handle); 1136 1144 1137 1145 asmlinkage efi_status_t __efiapi 1138 1146 efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
+159
drivers/firmware/efi/libstub/kaslr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Helper functions used by the EFI stub on multiple 4 + * architectures to deal with physical address space randomization. 5 + */ 6 + #include <linux/efi.h> 7 + 8 + #include "efistub.h" 9 + 10 + /** 11 + * efi_kaslr_get_phys_seed() - Get random seed for physical kernel KASLR 12 + * @image_handle: Handle to the image 13 + * 14 + * If KASLR is not disabled, obtain a random seed using EFI_RNG_PROTOCOL 15 + * that will be used to move the kernel physical mapping. 16 + * 17 + * Return: the random seed 18 + */ 19 + u32 efi_kaslr_get_phys_seed(efi_handle_t image_handle) 20 + { 21 + efi_status_t status; 22 + u32 phys_seed; 23 + efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID; 24 + void *p; 25 + 26 + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) 27 + return 0; 28 + 29 + if (efi_nokaslr) { 30 + efi_info("KASLR disabled on kernel command line\n"); 31 + } else if (efi_bs_call(handle_protocol, image_handle, 32 + &li_fixed_proto, &p) == EFI_SUCCESS) { 33 + efi_info("Image placement fixed by loader\n"); 34 + } else { 35 + status = efi_get_random_bytes(sizeof(phys_seed), 36 + (u8 *)&phys_seed); 37 + if (status == EFI_SUCCESS) { 38 + return phys_seed; 39 + } else if (status == EFI_NOT_FOUND) { 40 + efi_info("EFI_RNG_PROTOCOL unavailable\n"); 41 + efi_nokaslr = true; 42 + } else if (status != EFI_SUCCESS) { 43 + efi_err("efi_get_random_bytes() failed (0x%lx)\n", 44 + status); 45 + efi_nokaslr = true; 46 + } 47 + } 48 + 49 + return 0; 50 + } 51 + 52 + /* 53 + * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail 54 + * to provide space, and fail to zero it). Check for this condition by double 55 + * checking that the first and the last byte of the image are covered by the 56 + * same EFI memory map entry. 57 + */ 58 + static bool check_image_region(u64 base, u64 size) 59 + { 60 + struct efi_boot_memmap *map; 61 + efi_status_t status; 62 + bool ret = false; 63 + int map_offset; 64 + 65 + status = efi_get_memory_map(&map, false); 66 + if (status != EFI_SUCCESS) 67 + return false; 68 + 69 + for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { 70 + efi_memory_desc_t *md = (void *)map->map + map_offset; 71 + u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE; 72 + 73 + /* 74 + * Find the region that covers base, and return whether 75 + * it covers base+size bytes. 76 + */ 77 + if (base >= md->phys_addr && base < end) { 78 + ret = (base + size) <= end; 79 + break; 80 + } 81 + } 82 + 83 + efi_bs_call(free_pool, map); 84 + 85 + return ret; 86 + } 87 + 88 + /** 89 + * efi_kaslr_relocate_kernel() - Relocate the kernel (random if KASLR enabled) 90 + * @image_addr: Pointer to the current kernel location 91 + * @reserve_addr: Pointer to the relocated kernel location 92 + * @reserve_size: Size of the relocated kernel 93 + * @kernel_size: Size of the text + data 94 + * @kernel_codesize: Size of the text 95 + * @kernel_memsize: Size of the text + data + bss 96 + * @phys_seed: Random seed used for the relocation 97 + * 98 + * If KASLR is not enabled, this function relocates the kernel to a fixed 99 + * address (or leave it as its current location). If KASLR is enabled, the 100 + * kernel physical location is randomized using the seed in parameter. 101 + * 102 + * Return: status code, EFI_SUCCESS if relocation is successful 103 + */ 104 + efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr, 105 + unsigned long *reserve_addr, 106 + unsigned long *reserve_size, 107 + unsigned long kernel_size, 108 + unsigned long kernel_codesize, 109 + unsigned long kernel_memsize, 110 + u32 phys_seed) 111 + { 112 + efi_status_t status; 113 + u64 min_kimg_align = efi_get_kimg_min_align(); 114 + 115 + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { 116 + /* 117 + * If KASLR is enabled, and we have some randomness available, 118 + * locate the kernel at a randomized offset in physical memory. 119 + */ 120 + status = efi_random_alloc(*reserve_size, min_kimg_align, 121 + reserve_addr, phys_seed, 122 + EFI_LOADER_CODE, EFI_ALLOC_LIMIT); 123 + if (status != EFI_SUCCESS) 124 + efi_warn("efi_random_alloc() failed: 0x%lx\n", status); 125 + } else { 126 + status = EFI_OUT_OF_RESOURCES; 127 + } 128 + 129 + if (status != EFI_SUCCESS) { 130 + if (!check_image_region(*image_addr, kernel_memsize)) { 131 + efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); 132 + } else if (IS_ALIGNED(*image_addr, min_kimg_align) && 133 + (unsigned long)_end < EFI_ALLOC_LIMIT) { 134 + /* 135 + * Just execute from wherever we were loaded by the 136 + * UEFI PE/COFF loader if the placement is suitable. 137 + */ 138 + *reserve_size = 0; 139 + return EFI_SUCCESS; 140 + } 141 + 142 + status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, 143 + ULONG_MAX, min_kimg_align, 144 + EFI_LOADER_CODE); 145 + 146 + if (status != EFI_SUCCESS) { 147 + efi_err("Failed to relocate kernel\n"); 148 + *reserve_size = 0; 149 + return status; 150 + } 151 + } 152 + 153 + memcpy((void *)*reserve_addr, (void *)*image_addr, kernel_size); 154 + *image_addr = *reserve_addr; 155 + efi_icache_sync(*image_addr, *image_addr + kernel_codesize); 156 + efi_remap_image(*image_addr, *reserve_size, kernel_codesize); 157 + 158 + return status; 159 + }
+15 -18
drivers/firmware/efi/libstub/riscv-stub.c
··· 30 30 efi_loaded_image_t *image, 31 31 efi_handle_t image_handle) 32 32 { 33 - unsigned long kernel_size = 0; 34 - unsigned long preferred_addr; 33 + unsigned long kernel_size, kernel_codesize, kernel_memsize; 35 34 efi_status_t status; 36 35 37 36 kernel_size = _edata - _start; 37 + kernel_codesize = __init_text_end - _start; 38 + kernel_memsize = kernel_size + (_end - _edata); 38 39 *image_addr = (unsigned long)_start; 39 - *image_size = kernel_size + (_end - _edata); 40 + *image_size = kernel_memsize; 41 + *reserve_size = *image_size; 40 42 41 - /* 42 - * RISC-V kernel maps PAGE_OFFSET virtual address to the same physical 43 - * address where kernel is booted. That's why kernel should boot from 44 - * as low as possible to avoid wastage of memory. Currently, dram_base 45 - * is occupied by the firmware. So the preferred address for kernel to 46 - * boot is next aligned address. If preferred address is not available, 47 - * relocate_kernel will fall back to efi_low_alloc_above to allocate 48 - * lowest possible memory region as long as the address and size meets 49 - * the alignment constraints. 50 - */ 51 - preferred_addr = EFI_KIMG_PREFERRED_ADDRESS; 52 - status = efi_relocate_kernel(image_addr, kernel_size, *image_size, 53 - preferred_addr, efi_get_kimg_min_align(), 54 - 0x0); 55 - 43 + status = efi_kaslr_relocate_kernel(image_addr, 44 + reserve_addr, reserve_size, 45 + kernel_size, kernel_codesize, kernel_memsize, 46 + efi_kaslr_get_phys_seed(image_handle)); 56 47 if (status != EFI_SUCCESS) { 57 48 efi_err("Failed to relocate kernel\n"); 58 49 *image_size = 0; 59 50 } 51 + 60 52 return status; 53 + } 54 + 55 + void efi_icache_sync(unsigned long start, unsigned long end) 56 + { 57 + asm volatile ("fence.i" ::: "memory"); 61 58 }
+5
drivers/soc/renesas/Kconfig
··· 334 334 config ARCH_R9A07G043 335 335 bool "RISC-V Platform support for RZ/Five" 336 336 select ARCH_RZG2L 337 + select AX45MP_L2_CACHE if RISCV_DMA_NONCOHERENT 338 + select DMA_GLOBAL_POOL 339 + select ERRATA_ANDES if RISCV_SBI 340 + select ERRATA_ANDES_CMO if ERRATA_ANDES 341 + 337 342 help 338 343 This enables support for the Renesas RZ/Five SoC. 339 344
+2
include/uapi/linux/elf.h
··· 445 445 #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ 446 446 #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ 447 447 #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ 448 + #define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ 449 + #define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ 448 450 #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ 449 451 #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ 450 452 #define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */
+4 -4
kernel/bpf/core.c
··· 870 870 GFP_KERNEL); 871 871 if (!pack) 872 872 return NULL; 873 - pack->ptr = module_alloc(BPF_PROG_PACK_SIZE); 873 + pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE); 874 874 if (!pack->ptr) { 875 875 kfree(pack); 876 876 return NULL; ··· 894 894 mutex_lock(&pack_mutex); 895 895 if (size > BPF_PROG_PACK_SIZE) { 896 896 size = round_up(size, PAGE_SIZE); 897 - ptr = module_alloc(size); 897 + ptr = bpf_jit_alloc_exec(size); 898 898 if (ptr) { 899 899 bpf_fill_ill_insns(ptr, size); 900 900 set_vm_flush_reset_perms(ptr); ··· 932 932 933 933 mutex_lock(&pack_mutex); 934 934 if (hdr->size > BPF_PROG_PACK_SIZE) { 935 - module_memfree(hdr); 935 + bpf_jit_free_exec(hdr); 936 936 goto out; 937 937 } 938 938 ··· 956 956 if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, 957 957 BPF_PROG_CHUNK_COUNT, 0) == 0) { 958 958 list_del(&pack->list); 959 - module_memfree(pack->ptr); 959 + bpf_jit_free_exec(pack->ptr); 960 960 kfree(pack); 961 961 } 962 962 out: