Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

lib/crypto: x86/sha256: Move static_call above kernel-mode FPU section

As I did for sha512_blocks(), reorganize x86's sha256_blocks() to be
just a static_call. To achieve that, for each assembly function add a C
function that handles the kernel-mode FPU section and fallback. While
this increases total code size slightly, the amount of code actually
executed on a given system does not increase, and it is slightly more
efficient since it eliminates the extra static_key. It also makes the
assembly functions be called with standard direct calls instead of
static calls, eliminating the need for ANNOTATE_NOENDBR.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250704023958.73274-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>

+25 -34
-3
lib/crypto/x86/sha256-avx-asm.S
··· 48 48 ######################################################################## 49 49 50 50 #include <linux/linkage.h> 51 - #include <linux/objtool.h> 52 51 53 52 ## assume buffers not aligned 54 53 #define VMOVDQ vmovdqu ··· 345 346 ######################################################################## 346 347 .text 347 348 SYM_FUNC_START(sha256_transform_avx) 348 - ANNOTATE_NOENDBR # since this is called only via static_call 349 - 350 349 pushq %rbx 351 350 pushq %r12 352 351 pushq %r13
-3
lib/crypto/x86/sha256-avx2-asm.S
··· 49 49 ######################################################################## 50 50 51 51 #include <linux/linkage.h> 52 - #include <linux/objtool.h> 53 52 54 53 ## assume buffers not aligned 55 54 #define VMOVDQ vmovdqu ··· 522 523 ######################################################################## 523 524 .text 524 525 SYM_FUNC_START(sha256_transform_rorx) 525 - ANNOTATE_NOENDBR # since this is called only via static_call 526 - 527 526 pushq %rbx 528 527 pushq %r12 529 528 pushq %r13
-2
lib/crypto/x86/sha256-ni-asm.S
··· 54 54 */ 55 55 56 56 #include <linux/linkage.h> 57 - #include <linux/objtool.h> 58 57 59 58 #define STATE_PTR %rdi /* 1st arg */ 60 59 #define DATA_PTR %rsi /* 2nd arg */ ··· 110 111 */ 111 112 .text 112 113 SYM_FUNC_START(sha256_ni_transform) 113 - ANNOTATE_NOENDBR # since this is called only via static_call 114 114 115 115 shl $6, NUM_BLKS /* convert to bytes */ 116 116 jz .Ldone_hash
-3
lib/crypto/x86/sha256-ssse3-asm.S
··· 47 47 ######################################################################## 48 48 49 49 #include <linux/linkage.h> 50 - #include <linux/objtool.h> 51 50 52 51 ## assume buffers not aligned 53 52 #define MOVDQ movdqu ··· 352 353 ######################################################################## 353 354 .text 354 355 SYM_FUNC_START(sha256_transform_ssse3) 355 - ANNOTATE_NOENDBR # since this is called only via static_call 356 - 357 356 pushq %rbx 358 357 pushq %r12 359 358 pushq %r13
+25 -23
lib/crypto/x86/sha256.h
··· 8 8 #include <crypto/internal/simd.h> 9 9 #include <linux/static_call.h> 10 10 11 - asmlinkage void sha256_transform_ssse3(struct sha256_block_state *state, 12 - const u8 *data, size_t nblocks); 13 - asmlinkage void sha256_transform_avx(struct sha256_block_state *state, 14 - const u8 *data, size_t nblocks); 15 - asmlinkage void sha256_transform_rorx(struct sha256_block_state *state, 16 - const u8 *data, size_t nblocks); 17 - asmlinkage void sha256_ni_transform(struct sha256_block_state *state, 18 - const u8 *data, size_t nblocks); 11 + DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); 19 12 20 - static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86); 13 + #define DEFINE_X86_SHA256_FN(c_fn, asm_fn) \ 14 + asmlinkage void asm_fn(struct sha256_block_state *state, \ 15 + const u8 *data, size_t nblocks); \ 16 + static void c_fn(struct sha256_block_state *state, const u8 *data, \ 17 + size_t nblocks) \ 18 + { \ 19 + if (likely(crypto_simd_usable())) { \ 20 + kernel_fpu_begin(); \ 21 + asm_fn(state, data, nblocks); \ 22 + kernel_fpu_end(); \ 23 + } else { \ 24 + sha256_blocks_generic(state, data, nblocks); \ 25 + } \ 26 + } 21 27 22 - DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3); 28 + DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3); 29 + DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx); 30 + DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx); 31 + DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform); 23 32 24 33 static void sha256_blocks(struct sha256_block_state *state, 25 34 const u8 *data, size_t nblocks) 26 35 { 27 - if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) { 28 - kernel_fpu_begin(); 29 - static_call(sha256_blocks_x86)(state, data, nblocks); 30 - kernel_fpu_end(); 31 - } else { 32 - sha256_blocks_generic(state, data, nblocks); 33 - } 36 + static_call(sha256_blocks_x86)(state, data, nblocks); 34 37 } 35 38 36 39 #define sha256_mod_init_arch sha256_mod_init_arch 37 40 static inline void sha256_mod_init_arch(void) 38 41 { 39 42 if (boot_cpu_has(X86_FEATURE_SHA_NI)) { 40 - static_call_update(sha256_blocks_x86, sha256_ni_transform); 43 + static_call_update(sha256_blocks_x86, sha256_blocks_ni); 41 44 } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 42 45 NULL) && 43 46 boot_cpu_has(X86_FEATURE_AVX)) { 44 47 if (boot_cpu_has(X86_FEATURE_AVX2) && 45 48 boot_cpu_has(X86_FEATURE_BMI2)) 46 49 static_call_update(sha256_blocks_x86, 47 - sha256_transform_rorx); 50 + sha256_blocks_avx2); 48 51 else 49 52 static_call_update(sha256_blocks_x86, 50 - sha256_transform_avx); 51 - } else if (!boot_cpu_has(X86_FEATURE_SSSE3)) { 52 - return; 53 + sha256_blocks_avx); 54 + } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { 55 + static_call_update(sha256_blocks_x86, sha256_blocks_ssse3); 53 56 } 54 - static_branch_enable(&have_sha256_x86); 55 57 }