Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

lib/crypto: s390/sha3: Add optimized one-shot SHA-3 digest functions

Some z/Architecture processors can compute a SHA-3 digest in a single
instruction. arch/s390/crypto/ already uses this capability to optimize
the SHA-3 crypto_shash algorithms.

Use this capability to implement the sha3_224(), sha3_256(), sha3_384(),
and sha3_512() library functions too.

SHA3-256 benchmark results provided by Harald Freudenberger
(https://lore.kernel.org/r/4188d18bfcc8a64941c5ebd8de10ede2@linux.ibm.com/)
on a z/Architecture machine with "facility 86" (MSA level 12):

Length (bytes) Before (MB/s) After (MB/s)
============== ============= ============
16 212 225
64 820 915
256 1850 3350
1024 5400 8300
4096 11200 11300

Note: the original data from Harald was given in the form of a graph for
each length, showing the distribution of throughputs from 500 runs. I
guesstimated the peak of each one.

Harald also reported that the generic SHA-3 code was at most 259 MB/s
(https://lore.kernel.org/r/c39f6b6c110def0095e5da5becc12085@linux.ibm.com/).
So as expected, the earlier commit that optimized sha3_absorb_blocks()
and sha3_keccakf() is the more important one; it optimized the Keccak
permutation which is the most performance-critical part of SHA-3.
Still, this additional commit does notably improve performance further
on some lengths.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Harald Freudenberger <freude@linux.ibm.com>
Link: https://lore.kernel.org/r/20251026055032.1413733-13-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>

+65 -2
+65 -2
lib/crypto/s390/sha3.h
··· 8 8 #include <linux/cpufeature.h> 9 9 10 10 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3); 11 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3_init_optim); 11 12 12 13 static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data, 13 14 size_t nblocks, size_t block_size) ··· 61 60 } 62 61 } 63 62 63 + static inline bool s390_sha3(int func, const u8 *in, size_t in_len, 64 + u8 *out, size_t out_len) 65 + { 66 + struct sha3_state state; 67 + 68 + if (!static_branch_likely(&have_sha3)) 69 + return false; 70 + 71 + if (static_branch_likely(&have_sha3_init_optim)) 72 + func |= CPACF_KLMD_NIP | CPACF_KLMD_DUFOP; 73 + else 74 + memset(&state, 0, sizeof(state)); 75 + 76 + cpacf_klmd(func, &state, in, in_len); 77 + 78 + if (static_branch_likely(&have_sha3_init_optim)) 79 + kmsan_unpoison_memory(&state, out_len); 80 + 81 + memcpy(out, &state, out_len); 82 + memzero_explicit(&state, sizeof(state)); 83 + return true; 84 + } 85 + 86 + #define sha3_224_arch sha3_224_arch 87 + static bool sha3_224_arch(const u8 *in, size_t in_len, 88 + u8 out[SHA3_224_DIGEST_SIZE]) 89 + { 90 + return s390_sha3(CPACF_KLMD_SHA3_224, in, in_len, 91 + out, SHA3_224_DIGEST_SIZE); 92 + } 93 + 94 + #define sha3_256_arch sha3_256_arch 95 + static bool sha3_256_arch(const u8 *in, size_t in_len, 96 + u8 out[SHA3_256_DIGEST_SIZE]) 97 + { 98 + return s390_sha3(CPACF_KLMD_SHA3_256, in, in_len, 99 + out, SHA3_256_DIGEST_SIZE); 100 + } 101 + 102 + #define sha3_384_arch sha3_384_arch 103 + static bool sha3_384_arch(const u8 *in, size_t in_len, 104 + u8 out[SHA3_384_DIGEST_SIZE]) 105 + { 106 + return s390_sha3(CPACF_KLMD_SHA3_384, in, in_len, 107 + out, SHA3_384_DIGEST_SIZE); 108 + } 109 + 110 + #define sha3_512_arch sha3_512_arch 111 + static bool sha3_512_arch(const u8 *in, size_t in_len, 112 + u8 out[SHA3_512_DIGEST_SIZE]) 113 + { 114 + return s390_sha3(CPACF_KLMD_SHA3_512, in, in_len, 115 + out, SHA3_512_DIGEST_SIZE); 116 + } 117 + 64 118 #define sha3_mod_init_arch sha3_mod_init_arch 65 119 static void sha3_mod_init_arch(void) 66 120 { ··· 135 79 QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_256); 136 80 QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_384); 137 81 QUERY(CPACF_KIMD, CPACF_KIMD_SHA3_512); 82 + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_224); 83 + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_256); 84 + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_384); 85 + QUERY(CPACF_KLMD, CPACF_KLMD_SHA3_512); 138 86 #undef QUERY 139 87 140 - if (num_present == num_possible) 88 + if (num_present == num_possible) { 141 89 static_branch_enable(&have_sha3); 142 - else if (num_present != 0) 90 + if (test_facility(86)) 91 + static_branch_enable(&have_sha3_init_optim); 92 + } else if (num_present != 0) { 143 93 pr_warn("Unsupported combination of SHA-3 facilities\n"); 94 + } 144 95 }