Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

lib/crypto: arm64/ghash: Migrate optimized code into library

Remove the "ghash-neon" crypto_shash algorithm. Move the corresponding
assembly code into lib/crypto/, and wire it up to the GHASH library.

This makes the GHASH library be optimized on arm64 (though only with
NEON, not PMULL; for now the goal is just parity with crypto_shash). It
greatly reduces the amount of arm64-specific glue code that is needed,
and it fixes the issue where this optimization was disabled by default.

To integrate the assembly code correctly with the library, make the
following tweaks:

- Change the type of 'blocks' from int to size_t
- Change the types of 'dg' and 'h' to polyval_elem. Note that this
simply reflects the format that the code was already using.
- Remove the 'head' argument, which is no longer needed.
- Remove the CFI stubs, as indirect calls are no longer used.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260319061723.1140720-10-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>

+86 -165
+2 -3
arch/arm64/crypto/Kconfig
··· 3 3 menu "Accelerated Cryptographic Algorithms for CPU (arm64)" 4 4 5 5 config CRYPTO_GHASH_ARM64_CE 6 - tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)" 6 + tristate "AEAD cipher: AES in GCM mode (ARMv8 Crypto Extensions)" 7 7 depends on KERNEL_MODE_NEON 8 - select CRYPTO_HASH 9 8 select CRYPTO_LIB_AES 10 9 select CRYPTO_LIB_GF128MUL 11 10 select CRYPTO_AEAD 12 11 help 13 - GCM GHASH function (NIST SP800-38D) 12 + AEAD cipher: AES-GCM 14 13 15 14 Architecture: arm64 using: 16 15 - ARMv8 Crypto Extensions
+1 -1
arch/arm64/crypto/Makefile
··· 27 27 sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o 28 28 29 29 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o 30 - ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o ghash-neon-core.o 30 + ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o 31 31 32 32 obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o 33 33 aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+1 -2
arch/arm64/crypto/ghash-ce-core.S
··· 6 6 */ 7 7 8 8 #include <linux/linkage.h> 9 - #include <linux/cfi_types.h> 10 9 #include <asm/assembler.h> 11 10 12 11 SHASH .req v0 ··· 66 67 * void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 67 68 * u64 const h[][2], const char *head) 68 69 */ 69 - SYM_TYPED_FUNC_START(pmull_ghash_update_p64) 70 + SYM_FUNC_START(pmull_ghash_update_p64) 70 71 ld1 {SHASH.2d}, [x3] 71 72 ld1 {XL.2d}, [x1] 72 73
+12 -138
arch/arm64/crypto/ghash-ce-glue.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Accelerated GHASH implementation with ARMv8 PMULL instructions. 3 + * AES-GCM using ARMv8 Crypto Extensions 4 4 * 5 5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 6 */ ··· 11 11 #include <crypto/ghash.h> 12 12 #include <crypto/gf128mul.h> 13 13 #include <crypto/internal/aead.h> 14 - #include <crypto/internal/hash.h> 15 14 #include <crypto/internal/skcipher.h> 16 15 #include <crypto/scatterwalk.h> 17 16 #include <linux/cpufeature.h> ··· 22 23 23 24 #include <asm/simd.h> 24 25 25 - MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); 26 + MODULE_DESCRIPTION("AES-GCM using ARMv8 Crypto Extensions"); 26 27 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 27 28 MODULE_LICENSE("GPL v2"); 28 - MODULE_ALIAS_CRYPTO("ghash"); 29 + MODULE_ALIAS_CRYPTO("gcm(aes)"); 30 + MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))"); 29 31 30 32 #define RFC4106_NONCE_SIZE 4 31 33 32 34 struct ghash_key { 33 35 be128 k; 34 36 u64 h[][2]; 35 - }; 36 - 37 - struct arm_ghash_desc_ctx { 38 - u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; 39 37 }; 40 38 41 39 struct gcm_aes_ctx { ··· 44 48 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 45 49 u64 const h[][2], const char *head); 46 50 47 - asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 48 - u64 const h[][2], const char *head); 49 - 50 51 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], 51 52 u64 const h[][2], u64 dg[], u8 ctr[], 52 53 u32 const rk[], int rounds, u8 tag[]); ··· 52 59 u32 const rk[], int rounds, const u8 l[], 53 60 const u8 tag[], u64 authsize); 54 61 55 - static int ghash_init(struct shash_desc *desc) 56 - { 57 - struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 58 - 59 - *ctx = (struct arm_ghash_desc_ctx){}; 60 - return 0; 61 - } 62 - 63 - static __always_inline 64 - void ghash_do_simd_update(int blocks, u64 dg[], const char *src, 65 - struct ghash_key *key, const char *head, 66 - void (*simd_update)(int blocks, u64 dg[], 67 - const char *src, 68 - u64 const h[][2], 69 - const char *head)) 62 + static void ghash_do_simd_update(int blocks, u64 dg[], const char *src, 63 + struct ghash_key *key, const char *head) 70 64 { 71 65 scoped_ksimd() 72 - simd_update(blocks, dg, src, key->h, head); 73 - } 74 - 75 - /* avoid hogging the CPU for too long */ 76 - #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE) 77 - 78 - static int ghash_update(struct shash_desc *desc, const u8 *src, 79 - unsigned int len) 80 - { 81 - struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 82 - struct ghash_key *key = crypto_shash_ctx(desc->tfm); 83 - int blocks; 84 - 85 - blocks = len / GHASH_BLOCK_SIZE; 86 - len -= blocks * GHASH_BLOCK_SIZE; 87 - 88 - do { 89 - int chunk = min(blocks, MAX_BLOCKS); 90 - 91 - ghash_do_simd_update(chunk, ctx->digest, src, key, NULL, 92 - pmull_ghash_update_p8); 93 - blocks -= chunk; 94 - src += chunk * GHASH_BLOCK_SIZE; 95 - } while (unlikely(blocks > 0)); 96 - return len; 97 - } 98 - 99 - static int ghash_export(struct shash_desc *desc, void *out) 100 - { 101 - struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 102 - u8 *dst = out; 103 - 104 - put_unaligned_be64(ctx->digest[1], dst); 105 - put_unaligned_be64(ctx->digest[0], dst + 8); 106 - return 0; 107 - } 108 - 109 - static int ghash_import(struct shash_desc *desc, const void *in) 110 - { 111 - struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 112 - const u8 *src = in; 113 - 114 - ctx->digest[1] = get_unaligned_be64(src); 115 - ctx->digest[0] = get_unaligned_be64(src + 8); 116 - return 0; 117 - } 118 - 119 - static int ghash_finup(struct shash_desc *desc, const u8 *src, 120 - unsigned int len, u8 *dst) 121 - { 122 - struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 123 - struct ghash_key *key = crypto_shash_ctx(desc->tfm); 124 - 125 - if (len) { 126 - u8 buf[GHASH_BLOCK_SIZE] = {}; 127 - 128 - memcpy(buf, src, len); 129 - ghash_do_simd_update(1, ctx->digest, buf, key, NULL, 130 - pmull_ghash_update_p8); 131 - memzero_explicit(buf, sizeof(buf)); 132 - } 133 - return ghash_export(desc, dst); 66 + pmull_ghash_update_p64(blocks, dg, src, key->h, head); 134 67 } 135 68 136 69 static void ghash_reflect(u64 h[], const be128 *k) ··· 69 150 if (carry) 70 151 h[1] ^= 0xc200000000000000UL; 71 152 } 72 - 73 - static int ghash_setkey(struct crypto_shash *tfm, 74 - const u8 *inkey, unsigned int keylen) 75 - { 76 - struct ghash_key *key = crypto_shash_ctx(tfm); 77 - 78 - if (keylen != GHASH_BLOCK_SIZE) 79 - return -EINVAL; 80 - 81 - /* needed for the fallback */ 82 - memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); 83 - 84 - ghash_reflect(key->h[0], &key->k); 85 - return 0; 86 - } 87 - 88 - static struct shash_alg ghash_alg = { 89 - .base.cra_name = "ghash", 90 - .base.cra_driver_name = "ghash-neon", 91 - .base.cra_priority = 150, 92 - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, 93 - .base.cra_blocksize = GHASH_BLOCK_SIZE, 94 - .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), 95 - .base.cra_module = THIS_MODULE, 96 - 97 - .digestsize = GHASH_DIGEST_SIZE, 98 - .init = ghash_init, 99 - .update = ghash_update, 100 - .finup = ghash_finup, 101 - .setkey = ghash_setkey, 102 - .export = ghash_export, 103 - .import = ghash_import, 104 - .descsize = sizeof(struct arm_ghash_desc_ctx), 105 - .statesize = sizeof(struct ghash_desc_ctx), 106 - }; 107 153 108 154 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 109 155 unsigned int keylen) ··· 124 240 int blocks = count / GHASH_BLOCK_SIZE; 125 241 126 242 ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key, 127 - *buf_count ? buf : NULL, 128 - pmull_ghash_update_p64); 129 - 243 + *buf_count ? buf : NULL); 130 244 src += blocks * GHASH_BLOCK_SIZE; 131 245 count %= GHASH_BLOCK_SIZE; 132 246 *buf_count = 0; ··· 157 275 158 276 if (buf_count) { 159 277 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); 160 - ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL, 161 - pmull_ghash_update_p64); 278 + ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL); 162 279 } 163 280 } 164 281 ··· 386 505 387 506 static int __init ghash_ce_mod_init(void) 388 507 { 389 - if (!cpu_have_named_feature(ASIMD)) 508 + if (!cpu_have_named_feature(ASIMD) || !cpu_have_named_feature(PMULL)) 390 509 return -ENODEV; 391 510 392 - if (cpu_have_named_feature(PMULL)) 393 - return crypto_register_aeads(gcm_aes_algs, 394 - ARRAY_SIZE(gcm_aes_algs)); 395 - 396 - return crypto_register_shash(&ghash_alg); 511 + return crypto_register_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); 397 512 } 398 513 399 514 static void __exit ghash_ce_mod_exit(void) 400 515 { 401 - if (cpu_have_named_feature(PMULL)) 402 - crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); 403 - else 404 - crypto_unregister_shash(&ghash_alg); 516 + crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); 405 517 } 406 518 407 519 static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = {
+7 -13
arch/arm64/crypto/ghash-neon-core.S lib/crypto/arm64/ghash-neon-core.S
··· 6 6 */ 7 7 8 8 #include <linux/linkage.h> 9 - #include <linux/cfi_types.h> 10 9 #include <asm/assembler.h> 11 10 12 11 SHASH .req v0 ··· 178 179 .endm 179 180 180 181 /* 181 - * void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 182 - * u64 const h[][2], const char *head) 182 + * void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg, 183 + * const u8 *src, 184 + * const struct polyval_elem *h) 183 185 */ 184 - SYM_TYPED_FUNC_START(pmull_ghash_update_p8) 186 + SYM_FUNC_START(pmull_ghash_update_p8) 185 187 ld1 {SHASH.2d}, [x3] 186 188 ld1 {XL.2d}, [x1] 187 189 188 190 __pmull_pre_p8 189 191 190 - /* do the head block first, if supplied */ 191 - cbz x4, 0f 192 - ld1 {T1.2d}, [x4] 193 - mov x4, xzr 194 - b 3f 195 - 196 192 0: ld1 {T1.2d}, [x2], #16 197 - sub w0, w0, #1 193 + sub x0, x0, #1 198 194 199 - 3: /* multiply XL by SHASH in GF(2^128) */ 195 + /* multiply XL by SHASH in GF(2^128) */ 200 196 CPU_LE( rev64 T1.16b, T1.16b ) 201 197 202 198 ext T2.16b, XL.16b, XL.16b, #8 ··· 213 219 eor T2.16b, T2.16b, XH.16b 214 220 eor XL.16b, XL.16b, T2.16b 215 221 216 - cbnz w0, 0b 222 + cbnz x0, 0b 217 223 218 224 st1 {XL.2d}, [x1] 219 225 ret
+2 -1
lib/crypto/Makefile
··· 159 159 ifeq ($(CONFIG_CRYPTO_LIB_GF128HASH_ARCH),y) 160 160 CFLAGS_gf128hash.o += -I$(src)/$(SRCARCH) 161 161 libgf128hash-$(CONFIG_ARM) += arm/ghash-neon-core.o 162 - libgf128hash-$(CONFIG_ARM64) += arm64/polyval-ce-core.o 162 + libgf128hash-$(CONFIG_ARM64) += arm64/ghash-neon-core.o \ 163 + arm64/polyval-ce-core.o 163 164 libgf128hash-$(CONFIG_X86) += x86/polyval-pclmul-avx.o 164 165 endif 165 166
+61 -7
lib/crypto/arm64/gf128hash.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 2 /* 3 - * POLYVAL library functions, arm64 optimized 3 + * GHASH and POLYVAL, arm64 optimized 4 4 * 5 5 * Copyright 2025 Google LLC 6 6 */ ··· 9 9 10 10 #define NUM_H_POWERS 8 11 11 12 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd); 12 13 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull); 13 14 15 + asmlinkage void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg, 16 + const u8 *src, 17 + const struct polyval_elem *h); 14 18 asmlinkage void polyval_mul_pmull(struct polyval_elem *a, 15 19 const struct polyval_elem *b); 16 20 asmlinkage void polyval_blocks_pmull(struct polyval_elem *acc, ··· 45 41 } 46 42 } 47 43 44 + static void polyval_mul_arm64(struct polyval_elem *a, 45 + const struct polyval_elem *b) 46 + { 47 + if (static_branch_likely(&have_asimd) && may_use_simd()) { 48 + static const u8 zeroes[GHASH_BLOCK_SIZE]; 49 + 50 + scoped_ksimd() { 51 + if (static_branch_likely(&have_pmull)) { 52 + polyval_mul_pmull(a, b); 53 + } else { 54 + /* 55 + * Note that this is indeed equivalent to a 56 + * POLYVAL multiplication, since it takes the 57 + * accumulator and key in POLYVAL format, and 58 + * byte-swapping a block of zeroes is a no-op. 59 + */ 60 + pmull_ghash_update_p8(1, a, zeroes, b); 61 + } 62 + } 63 + } else { 64 + polyval_mul_generic(a, b); 65 + } 66 + } 67 + 68 + #define ghash_mul_arch ghash_mul_arch 69 + static void ghash_mul_arch(struct polyval_elem *acc, 70 + const struct ghash_key *key) 71 + { 72 + polyval_mul_arm64(acc, &key->h); 73 + } 74 + 48 75 #define polyval_mul_arch polyval_mul_arch 49 76 static void polyval_mul_arch(struct polyval_elem *acc, 50 77 const struct polyval_key *key) 51 78 { 52 - if (static_branch_likely(&have_pmull) && may_use_simd()) { 53 - scoped_ksimd() 54 - polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]); 79 + polyval_mul_arm64(acc, &key->h_powers[NUM_H_POWERS - 1]); 80 + } 81 + 82 + #define ghash_blocks_arch ghash_blocks_arch 83 + static void ghash_blocks_arch(struct polyval_elem *acc, 84 + const struct ghash_key *key, 85 + const u8 *data, size_t nblocks) 86 + { 87 + if (static_branch_likely(&have_asimd) && may_use_simd()) { 88 + do { 89 + /* Allow rescheduling every 4 KiB. */ 90 + size_t n = min_t(size_t, nblocks, 91 + 4096 / GHASH_BLOCK_SIZE); 92 + 93 + scoped_ksimd() 94 + pmull_ghash_update_p8(n, acc, data, &key->h); 95 + data += n * GHASH_BLOCK_SIZE; 96 + nblocks -= n; 97 + } while (nblocks); 55 98 } else { 56 - polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]); 99 + ghash_blocks_generic(acc, &key->h, data, nblocks); 57 100 } 58 101 } 59 102 ··· 129 78 #define gf128hash_mod_init_arch gf128hash_mod_init_arch 130 79 static void gf128hash_mod_init_arch(void) 131 80 { 132 - if (cpu_have_named_feature(PMULL)) 133 - static_branch_enable(&have_pmull); 81 + if (cpu_have_named_feature(ASIMD)) { 82 + static_branch_enable(&have_asimd); 83 + if (cpu_have_named_feature(PMULL)) 84 + static_branch_enable(&have_pmull); 85 + } 134 86 }