Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86: vdso: Wire up getrandom() vDSO implementation

Hook up the generic vDSO implementation to the x86 vDSO data page. Since
the existing vDSO infrastructure is heavily based on the timekeeping
functionality, which works over arrays of bases, a new macro is
introduced for vvars that are not arrays.

The vDSO function requires a ChaCha20 implementation that does not write
to the stack, yet can still do an entire ChaCha20 permutation, so
provide this using SSE2, since this is userland code that must work on
all x86-64 processors.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Samuel Neves <sneves@dei.uc.pt> # for vgetrandom-chacha.S
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

+275 -1
+2
MAINTAINERS
··· 18747 18747 F: drivers/virt/vmgenid.c 18748 18748 F: include/vdso/getrandom.h 18749 18749 F: lib/vdso/getrandom.c 18750 + F: arch/x86/entry/vdso/vgetrandom* 18751 + F: arch/x86/include/asm/vdso/getrandom* 18750 18752 18751 18753 RAPIDIO SUBSYSTEM 18752 18754 M: Matt Porter <mporter@kernel.crashing.org>
+1
arch/x86/Kconfig
··· 287 287 select HAVE_UNSTABLE_SCHED_CLOCK 288 288 select HAVE_USER_RETURN_NOTIFIER 289 289 select HAVE_GENERIC_VDSO 290 + select VDSO_GETRANDOM if X86_64 290 291 select HOTPLUG_PARALLEL if SMP && X86_64 291 292 select HOTPLUG_SMT if SMP 292 293 select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
+2 -1
arch/x86/entry/vdso/Makefile
··· 7 7 include $(srctree)/lib/vdso/Makefile 8 8 9 9 # Files to link into the vDSO: 10 - vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o 10 + vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o 11 11 vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o 12 12 vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o 13 13 vobjs-$(CONFIG_X86_SGX) += vsgx.o ··· 73 73 CFLAGS_REMOVE_vgetcpu.o = -pg 74 74 CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg 75 75 CFLAGS_REMOVE_vsgx.o = -pg 76 + CFLAGS_REMOVE_vgetrandom.o = -pg 76 77 77 78 # 78 79 # X32 processes use x32 vDSO to access 64bit kernel data.
+2
arch/x86/entry/vdso/vdso.lds.S
··· 30 30 #ifdef CONFIG_X86_SGX 31 31 __vdso_sgx_enter_enclave; 32 32 #endif 33 + getrandom; 34 + __vdso_getrandom; 33 35 local: *; 34 36 }; 35 37 }
+178
arch/x86/entry/vdso/vgetrandom-chacha.S
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 + */ 5 + 6 + #include <linux/linkage.h> 7 + #include <asm/frame.h> 8 + 9 + .section .rodata, "a" 10 + .align 16 11 + CONSTANTS: .octa 0x6b20657479622d323320646e61707865 12 + .text 13 + 14 + /* 15 + * Very basic SSE2 implementation of ChaCha20. Produces a given positive number 16 + * of blocks of output with a nonce of 0, taking an input key and 8-byte 17 + * counter. Importantly does not spill to the stack. Its arguments are: 18 + * 19 + * rdi: output bytes 20 + * rsi: 32-byte key input 21 + * rdx: 8-byte counter input/output 22 + * rcx: number of 64-byte blocks to write to output 23 + */ 24 + SYM_FUNC_START(__arch_chacha20_blocks_nostack) 25 + 26 + .set output, %rdi 27 + .set key, %rsi 28 + .set counter, %rdx 29 + .set nblocks, %rcx 30 + .set i, %al 31 + /* xmm registers are *not* callee-save. */ 32 + .set temp, %xmm0 33 + .set state0, %xmm1 34 + .set state1, %xmm2 35 + .set state2, %xmm3 36 + .set state3, %xmm4 37 + .set copy0, %xmm5 38 + .set copy1, %xmm6 39 + .set copy2, %xmm7 40 + .set copy3, %xmm8 41 + .set one, %xmm9 42 + 43 + /* copy0 = "expand 32-byte k" */ 44 + movaps CONSTANTS(%rip),copy0 45 + /* copy1,copy2 = key */ 46 + movups 0x00(key),copy1 47 + movups 0x10(key),copy2 48 + /* copy3 = counter || zero nonce */ 49 + movq 0x00(counter),copy3 50 + /* one = 1 || 0 */ 51 + movq $1,%rax 52 + movq %rax,one 53 + 54 + .Lblock: 55 + /* state0,state1,state2,state3 = copy0,copy1,copy2,copy3 */ 56 + movdqa copy0,state0 57 + movdqa copy1,state1 58 + movdqa copy2,state2 59 + movdqa copy3,state3 60 + 61 + movb $10,i 62 + .Lpermute: 63 + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ 64 + paddd state1,state0 65 + pxor state0,state3 66 + movdqa state3,temp 67 + pslld $16,temp 68 + psrld $16,state3 69 + por temp,state3 70 + 71 + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ 72 + paddd state3,state2 73 + pxor state2,state1 74 + movdqa state1,temp 75 + pslld $12,temp 76 + psrld $20,state1 77 + por temp,state1 78 + 79 + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ 80 + paddd state1,state0 81 + pxor state0,state3 82 + movdqa state3,temp 83 + pslld $8,temp 84 + psrld $24,state3 85 + por temp,state3 86 + 87 + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ 88 + paddd state3,state2 89 + pxor state2,state1 90 + movdqa state1,temp 91 + pslld $7,temp 92 + psrld $25,state1 93 + por temp,state1 94 + 95 + /* state1[0,1,2,3] = state1[1,2,3,0] */ 96 + pshufd $0x39,state1,state1 97 + /* state2[0,1,2,3] = state2[2,3,0,1] */ 98 + pshufd $0x4e,state2,state2 99 + /* state3[0,1,2,3] = state3[3,0,1,2] */ 100 + pshufd $0x93,state3,state3 101 + 102 + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ 103 + paddd state1,state0 104 + pxor state0,state3 105 + movdqa state3,temp 106 + pslld $16,temp 107 + psrld $16,state3 108 + por temp,state3 109 + 110 + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ 111 + paddd state3,state2 112 + pxor state2,state1 113 + movdqa state1,temp 114 + pslld $12,temp 115 + psrld $20,state1 116 + por temp,state1 117 + 118 + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ 119 + paddd state1,state0 120 + pxor state0,state3 121 + movdqa state3,temp 122 + pslld $8,temp 123 + psrld $24,state3 124 + por temp,state3 125 + 126 + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ 127 + paddd state3,state2 128 + pxor state2,state1 129 + movdqa state1,temp 130 + pslld $7,temp 131 + psrld $25,state1 132 + por temp,state1 133 + 134 + /* state1[0,1,2,3] = state1[3,0,1,2] */ 135 + pshufd $0x93,state1,state1 136 + /* state2[0,1,2,3] = state2[2,3,0,1] */ 137 + pshufd $0x4e,state2,state2 138 + /* state3[0,1,2,3] = state3[1,2,3,0] */ 139 + pshufd $0x39,state3,state3 140 + 141 + decb i 142 + jnz .Lpermute 143 + 144 + /* output0 = state0 + copy0 */ 145 + paddd copy0,state0 146 + movups state0,0x00(output) 147 + /* output1 = state1 + copy1 */ 148 + paddd copy1,state1 149 + movups state1,0x10(output) 150 + /* output2 = state2 + copy2 */ 151 + paddd copy2,state2 152 + movups state2,0x20(output) 153 + /* output3 = state3 + copy3 */ 154 + paddd copy3,state3 155 + movups state3,0x30(output) 156 + 157 + /* ++copy3.counter */ 158 + paddq one,copy3 159 + 160 + /* output += 64, --nblocks */ 161 + addq $64,output 162 + decq nblocks 163 + jnz .Lblock 164 + 165 + /* counter = copy3.counter */ 166 + movq copy3,0x00(counter) 167 + 168 + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ 169 + pxor state0,state0 170 + pxor state1,state1 171 + pxor state2,state2 172 + pxor state3,state3 173 + pxor copy1,copy1 174 + pxor copy2,copy2 175 + pxor temp,temp 176 + 177 + ret 178 + SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+17
arch/x86/entry/vdso/vgetrandom.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 + */ 5 + #include <linux/types.h> 6 + 7 + #include "../../../../lib/vdso/getrandom.c" 8 + 9 + ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); 10 + 11 + ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) 12 + { 13 + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); 14 + } 15 + 16 + ssize_t getrandom(void *, size_t, unsigned int, void *, size_t) 17 + __attribute__((weak, alias("__vdso_getrandom")));
+55
arch/x86/include/asm/vdso/getrandom.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 + */ 5 + #ifndef __ASM_VDSO_GETRANDOM_H 6 + #define __ASM_VDSO_GETRANDOM_H 7 + 8 + #ifndef __ASSEMBLY__ 9 + 10 + #include <asm/unistd.h> 11 + #include <asm/vvar.h> 12 + 13 + /** 14 + * getrandom_syscall - Invoke the getrandom() syscall. 15 + * @buffer: Destination buffer to fill with random bytes. 16 + * @len: Size of @buffer in bytes. 17 + * @flags: Zero or more GRND_* flags. 18 + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. 19 + */ 20 + static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) 21 + { 22 + long ret; 23 + 24 + asm ("syscall" : "=a" (ret) : 25 + "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) : 26 + "rcx", "r11", "memory"); 27 + 28 + return ret; 29 + } 30 + 31 + #define __vdso_rng_data (VVAR(_vdso_rng_data)) 32 + 33 + static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) 34 + { 35 + if (IS_ENABLED(CONFIG_TIME_NS) && __vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) 36 + return (void *)&__vdso_rng_data + ((void *)&__timens_vdso_data - (void *)&__vdso_data); 37 + return &__vdso_rng_data; 38 + } 39 + 40 + /** 41 + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. 42 + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. 43 + * @key: 32-byte input key. 44 + * @counter: 8-byte counter, read on input and updated on return. 45 + * @nblocks: Number of blocks to generate. 46 + * 47 + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write 48 + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data 49 + * leaking into forked child processes. 50 + */ 51 + extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); 52 + 53 + #endif /* !__ASSEMBLY__ */ 54 + 55 + #endif /* __ASM_VDSO_GETRANDOM_H */
+2
arch/x86/include/asm/vdso/vsyscall.h
··· 11 11 #include <asm/vvar.h> 12 12 13 13 DEFINE_VVAR(struct vdso_data, _vdso_data); 14 + DEFINE_VVAR_SINGLE(struct vdso_rng_data, _vdso_rng_data); 15 + 14 16 /* 15 17 * Update the vDSO data page to keep in sync with kernel timekeeping. 16 18 */
+16
arch/x86/include/asm/vvar.h
··· 26 26 */ 27 27 #define DECLARE_VVAR(offset, type, name) \ 28 28 EMIT_VVAR(name, offset) 29 + #define DECLARE_VVAR_SINGLE(offset, type, name) \ 30 + EMIT_VVAR(name, offset) 29 31 30 32 #else 31 33 ··· 39 37 extern type timens_ ## name[CS_BASES] \ 40 38 __attribute__((visibility("hidden"))); \ 41 39 40 + #define DECLARE_VVAR_SINGLE(offset, type, name) \ 41 + extern type vvar_ ## name \ 42 + __attribute__((visibility("hidden"))); \ 43 + 42 44 #define VVAR(name) (vvar_ ## name) 43 45 #define TIMENS(name) (timens_ ## name) 44 46 45 47 #define DEFINE_VVAR(type, name) \ 46 48 type name[CS_BASES] \ 49 + __attribute__((section(".vvar_" #name), aligned(16))) __visible 50 + 51 + #define DEFINE_VVAR_SINGLE(type, name) \ 52 + type name \ 47 53 __attribute__((section(".vvar_" #name), aligned(16))) __visible 48 54 49 55 #endif ··· 60 50 61 51 DECLARE_VVAR(128, struct vdso_data, _vdso_data) 62 52 53 + #if !defined(_SINGLE_DATA) 54 + #define _SINGLE_DATA 55 + DECLARE_VVAR_SINGLE(640, struct vdso_rng_data, _vdso_rng_data) 56 + #endif 57 + 63 58 #undef DECLARE_VVAR 59 + #undef DECLARE_VVAR_SINGLE 64 60 65 61 #endif