Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'libcrypto-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

Pull crypto library fixes from Eric Biggers:
"Fixes for some recent regressions as well as some longstanding issues:

- Fix incorrect output from the arm64 NEON implementation of GHASH

- Merge the ksimd scopes in the arm64 XTS code to reduce stack usage

- Roll up the BLAKE2b round loop on 32-bit kernels to greatly reduce
code size and stack usage

- Add missing RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS dependency

- Fix chacha-riscv64-zvkb.S to not use frame pointer for data"

* tag 'libcrypto-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
crypto: arm64/ghash - Fix incorrect output from ghash-neon
crypto/arm64: sm4/xts - Merge ksimd scopes to reduce stack bloat
crypto/arm64: aes/xts - Use single ksimd scope to reduce stack bloat
lib/crypto: blake2s: Replace manual unrolling with unrolled_full
lib/crypto: blake2b: Roll up BLAKE2b round loop on 32-bit
lib/crypto: riscv: Depend on RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS
lib/crypto: riscv/chacha: Avoid s0/fp register

+132 -144
+36 -39
arch/arm64/crypto/aes-glue.c
··· 549 549 tail = 0; 550 550 } 551 551 552 - for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { 553 - int nbytes = walk.nbytes; 552 + scoped_ksimd() { 553 + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { 554 + int nbytes = walk.nbytes; 554 555 555 - if (walk.nbytes < walk.total) 556 - nbytes &= ~(AES_BLOCK_SIZE - 1); 556 + if (walk.nbytes < walk.total) 557 + nbytes &= ~(AES_BLOCK_SIZE - 1); 557 558 558 - scoped_ksimd() 559 559 aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 560 560 ctx->key1.key_enc, rounds, nbytes, 561 561 ctx->key2.key_enc, walk.iv, first); 562 - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 563 - } 562 + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 563 + } 564 564 565 - if (err || likely(!tail)) 566 - return err; 565 + if (err || likely(!tail)) 566 + return err; 567 567 568 - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 569 - if (req->dst != req->src) 570 - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 568 + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 569 + if (req->dst != req->src) 570 + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 571 571 572 - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 573 - req->iv); 572 + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 573 + req->iv); 574 574 575 - err = skcipher_walk_virt(&walk, &subreq, false); 576 - if (err) 577 - return err; 575 + err = skcipher_walk_virt(&walk, &subreq, false); 576 + if (err) 577 + return err; 578 578 579 - scoped_ksimd() 580 579 aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 581 580 ctx->key1.key_enc, rounds, walk.nbytes, 582 581 ctx->key2.key_enc, walk.iv, first); 583 - 582 + } 584 583 return skcipher_walk_done(&walk, 0); 585 584 } 586 585 ··· 618 619 tail = 0; 619 620 } 620 621 621 - for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { 622 - int nbytes = walk.nbytes; 622 + scoped_ksimd() { 623 + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { 624 + int nbytes = walk.nbytes; 623 625 624 - if (walk.nbytes < walk.total) 625 - nbytes &= ~(AES_BLOCK_SIZE - 1); 626 + if (walk.nbytes < walk.total) 627 + nbytes &= ~(AES_BLOCK_SIZE - 1); 626 628 627 - scoped_ksimd() 628 629 aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 629 630 ctx->key1.key_dec, rounds, nbytes, 630 631 ctx->key2.key_enc, walk.iv, first); 631 - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 632 - } 632 + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 633 + } 633 634 634 - if (err || likely(!tail)) 635 - return err; 635 + if (err || likely(!tail)) 636 + return err; 636 637 637 - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 638 - if (req->dst != req->src) 639 - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 638 + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 639 + if (req->dst != req->src) 640 + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 640 641 641 - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 642 - req->iv); 642 + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 643 + req->iv); 643 644 644 - err = skcipher_walk_virt(&walk, &subreq, false); 645 - if (err) 646 - return err; 645 + err = skcipher_walk_virt(&walk, &subreq, false); 646 + if (err) 647 + return err; 647 648 648 - 649 - scoped_ksimd() 650 649 aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 651 650 ctx->key1.key_dec, rounds, walk.nbytes, 652 651 ctx->key2.key_enc, walk.iv, first); 653 - 652 + } 654 653 return skcipher_walk_done(&walk, 0); 655 654 } 656 655
+21 -23
arch/arm64/crypto/aes-neonbs-glue.c
··· 312 312 if (err) 313 313 return err; 314 314 315 - while (walk.nbytes >= AES_BLOCK_SIZE) { 316 - int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; 317 - out = walk.dst.virt.addr; 318 - in = walk.src.virt.addr; 319 - nbytes = walk.nbytes; 315 + scoped_ksimd() { 316 + while (walk.nbytes >= AES_BLOCK_SIZE) { 317 + int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; 318 + out = walk.dst.virt.addr; 319 + in = walk.src.virt.addr; 320 + nbytes = walk.nbytes; 320 321 321 - scoped_ksimd() { 322 322 if (blocks >= 8) { 323 323 if (first == 1) 324 324 neon_aes_ecb_encrypt(walk.iv, walk.iv, ··· 344 344 ctx->twkey, walk.iv, first); 345 345 nbytes = first = 0; 346 346 } 347 + err = skcipher_walk_done(&walk, nbytes); 347 348 } 348 - err = skcipher_walk_done(&walk, nbytes); 349 - } 350 349 351 - if (err || likely(!tail)) 352 - return err; 350 + if (err || likely(!tail)) 351 + return err; 353 352 354 - /* handle ciphertext stealing */ 355 - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 356 - if (req->dst != req->src) 357 - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 353 + /* handle ciphertext stealing */ 354 + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 355 + if (req->dst != req->src) 356 + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 358 357 359 - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 360 - req->iv); 358 + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 359 + req->iv); 361 360 362 - err = skcipher_walk_virt(&walk, req, false); 363 - if (err) 364 - return err; 361 + err = skcipher_walk_virt(&walk, req, false); 362 + if (err) 363 + return err; 365 364 366 - out = walk.dst.virt.addr; 367 - in = walk.src.virt.addr; 368 - nbytes = walk.nbytes; 365 + out = walk.dst.virt.addr; 366 + in = walk.src.virt.addr; 367 + nbytes = walk.nbytes; 369 368 370 - scoped_ksimd() { 371 369 if (encrypt) 372 370 neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, 373 371 ctx->key.rounds, nbytes, ctx->twkey,
+1 -1
arch/arm64/crypto/ghash-ce-glue.c
··· 133 133 u8 buf[GHASH_BLOCK_SIZE] = {}; 134 134 135 135 memcpy(buf, src, len); 136 - ghash_do_simd_update(1, ctx->digest, src, key, NULL, 136 + ghash_do_simd_update(1, ctx->digest, buf, key, NULL, 137 137 pmull_ghash_update_p8); 138 138 memzero_explicit(buf, sizeof(buf)); 139 139 }
+22 -24
arch/arm64/crypto/sm4-ce-glue.c
··· 346 346 tail = 0; 347 347 } 348 348 349 - while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) { 350 - if (nbytes < walk.total) 351 - nbytes &= ~(SM4_BLOCK_SIZE - 1); 349 + scoped_ksimd() { 350 + while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) { 351 + if (nbytes < walk.total) 352 + nbytes &= ~(SM4_BLOCK_SIZE - 1); 352 353 353 - scoped_ksimd() { 354 354 if (encrypt) 355 355 sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, 356 356 walk.src.virt.addr, walk.iv, nbytes, ··· 359 359 sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, 360 360 walk.src.virt.addr, walk.iv, nbytes, 361 361 rkey2_enc); 362 + 363 + rkey2_enc = NULL; 364 + 365 + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 366 + if (err) 367 + return err; 362 368 } 363 369 364 - rkey2_enc = NULL; 370 + if (likely(tail == 0)) 371 + return 0; 365 372 366 - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 373 + /* handle ciphertext stealing */ 374 + 375 + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); 376 + if (req->dst != req->src) 377 + dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen); 378 + 379 + skcipher_request_set_crypt(&subreq, src, dst, 380 + SM4_BLOCK_SIZE + tail, req->iv); 381 + 382 + err = skcipher_walk_virt(&walk, &subreq, false); 367 383 if (err) 368 384 return err; 369 - } 370 385 371 - if (likely(tail == 0)) 372 - return 0; 373 - 374 - /* handle ciphertext stealing */ 375 - 376 - dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); 377 - if (req->dst != req->src) 378 - dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen); 379 - 380 - skcipher_request_set_crypt(&subreq, src, dst, SM4_BLOCK_SIZE + tail, 381 - req->iv); 382 - 383 - err = skcipher_walk_virt(&walk, &subreq, false); 384 - if (err) 385 - return err; 386 - 387 - scoped_ksimd() { 388 386 if (encrypt) 389 387 sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, 390 388 walk.src.virt.addr, walk.iv, walk.nbytes,
+8 -4
arch/riscv/crypto/Kconfig
··· 4 4 5 5 config CRYPTO_AES_RISCV64 6 6 tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTS" 7 - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 7 + depends on 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 8 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 8 9 select CRYPTO_ALGAPI 9 10 select CRYPTO_LIB_AES 10 11 select CRYPTO_SKCIPHER ··· 21 20 22 21 config CRYPTO_GHASH_RISCV64 23 22 tristate "Hash functions: GHASH" 24 - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 23 + depends on 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 24 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 25 25 select CRYPTO_GCM 26 26 help 27 27 GCM GHASH function (NIST SP 800-38D) ··· 32 30 33 31 config CRYPTO_SM3_RISCV64 34 32 tristate "Hash functions: SM3 (ShangMi 3)" 35 - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 33 + depends on 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 34 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 36 35 select CRYPTO_HASH 37 36 select CRYPTO_LIB_SM3 38 37 help ··· 45 42 46 43 config CRYPTO_SM4_RISCV64 47 44 tristate "Ciphers: SM4 (ShangMi 4)" 48 - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 45 + depends on 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 46 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 49 47 select CRYPTO_ALGAPI 50 48 select CRYPTO_SM4 51 49 help
+6 -3
lib/crypto/Kconfig
··· 61 61 default y if ARM64 && KERNEL_MODE_NEON 62 62 default y if MIPS && CPU_MIPS32_R2 63 63 default y if PPC64 && CPU_LITTLE_ENDIAN && VSX 64 - default y if RISCV && 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 64 + default y if RISCV && 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 65 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 65 66 default y if S390 66 67 default y if X86_64 67 68 ··· 185 184 default y if ARM64 186 185 default y if MIPS && CPU_CAVIUM_OCTEON 187 186 default y if PPC && SPE 188 - default y if RISCV && 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 187 + default y if RISCV && 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 188 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 189 189 default y if S390 190 190 default y if SPARC64 191 191 default y if X86_64 ··· 204 202 default y if ARM && !CPU_V7M 205 203 default y if ARM64 206 204 default y if MIPS && CPU_CAVIUM_OCTEON 207 - default y if RISCV && 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO 205 + default y if RISCV && 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \ 206 + RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS 208 207 default y if S390 209 208 default y if SPARC64 210 209 default y if X86_64
-1
lib/crypto/Makefile
··· 33 33 34 34 obj-$(CONFIG_CRYPTO_LIB_BLAKE2B) += libblake2b.o 35 35 libblake2b-y := blake2b.o 36 - CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930 37 36 ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y) 38 37 CFLAGS_blake2b.o += -I$(src)/$(SRCARCH) 39 38 libblake2b-$(CONFIG_ARM) += arm/blake2b-neon-core.o
+20 -24
lib/crypto/blake2b.c
··· 14 14 #include <linux/kernel.h> 15 15 #include <linux/module.h> 16 16 #include <linux/string.h> 17 + #include <linux/unroll.h> 17 18 #include <linux/types.h> 18 19 19 20 static const u8 blake2b_sigma[12][16] = { ··· 74 73 b = ror64(b ^ c, 63); \ 75 74 } while (0) 76 75 77 - #define ROUND(r) do { \ 78 - G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ 79 - G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ 80 - G(r, 2, v[2], v[ 6], v[10], v[14]); \ 81 - G(r, 3, v[3], v[ 7], v[11], v[15]); \ 82 - G(r, 4, v[0], v[ 5], v[10], v[15]); \ 83 - G(r, 5, v[1], v[ 6], v[11], v[12]); \ 84 - G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ 85 - G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ 86 - } while (0) 87 - ROUND(0); 88 - ROUND(1); 89 - ROUND(2); 90 - ROUND(3); 91 - ROUND(4); 92 - ROUND(5); 93 - ROUND(6); 94 - ROUND(7); 95 - ROUND(8); 96 - ROUND(9); 97 - ROUND(10); 98 - ROUND(11); 99 - 76 + #ifdef CONFIG_64BIT 77 + /* 78 + * Unroll the rounds loop to enable constant-folding of the 79 + * blake2b_sigma values. Seems worthwhile on 64-bit kernels. 80 + * Not worthwhile on 32-bit kernels because the code size is 81 + * already so large there due to BLAKE2b using 64-bit words. 82 + */ 83 + unrolled_full 84 + #endif 85 + for (int r = 0; r < 12; r++) { 86 + G(r, 0, v[0], v[4], v[8], v[12]); 87 + G(r, 1, v[1], v[5], v[9], v[13]); 88 + G(r, 2, v[2], v[6], v[10], v[14]); 89 + G(r, 3, v[3], v[7], v[11], v[15]); 90 + G(r, 4, v[0], v[5], v[10], v[15]); 91 + G(r, 5, v[1], v[6], v[11], v[12]); 92 + G(r, 6, v[2], v[7], v[8], v[13]); 93 + G(r, 7, v[3], v[4], v[9], v[14]); 94 + } 100 95 #undef G 101 - #undef ROUND 102 96 103 97 for (i = 0; i < 8; ++i) 104 98 ctx->h[i] ^= v[i] ^ v[i + 8];
+16 -22
lib/crypto/blake2s.c
··· 14 14 #include <linux/kernel.h> 15 15 #include <linux/module.h> 16 16 #include <linux/string.h> 17 + #include <linux/unroll.h> 17 18 #include <linux/types.h> 18 19 19 20 static const u8 blake2s_sigma[10][16] = { ··· 72 71 b = ror32(b ^ c, 7); \ 73 72 } while (0) 74 73 75 - #define ROUND(r) do { \ 76 - G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ 77 - G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ 78 - G(r, 2, v[2], v[ 6], v[10], v[14]); \ 79 - G(r, 3, v[3], v[ 7], v[11], v[15]); \ 80 - G(r, 4, v[0], v[ 5], v[10], v[15]); \ 81 - G(r, 5, v[1], v[ 6], v[11], v[12]); \ 82 - G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ 83 - G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ 84 - } while (0) 85 - ROUND(0); 86 - ROUND(1); 87 - ROUND(2); 88 - ROUND(3); 89 - ROUND(4); 90 - ROUND(5); 91 - ROUND(6); 92 - ROUND(7); 93 - ROUND(8); 94 - ROUND(9); 95 - 74 + /* 75 + * Unroll the rounds loop to enable constant-folding of the 76 + * blake2s_sigma values. 77 + */ 78 + unrolled_full 79 + for (int r = 0; r < 10; r++) { 80 + G(r, 0, v[0], v[4], v[8], v[12]); 81 + G(r, 1, v[1], v[5], v[9], v[13]); 82 + G(r, 2, v[2], v[6], v[10], v[14]); 83 + G(r, 3, v[3], v[7], v[11], v[15]); 84 + G(r, 4, v[0], v[5], v[10], v[15]); 85 + G(r, 5, v[1], v[6], v[11], v[12]); 86 + G(r, 6, v[2], v[7], v[8], v[13]); 87 + G(r, 7, v[3], v[4], v[9], v[14]); 88 + } 96 89 #undef G 97 - #undef ROUND 98 90 99 91 for (i = 0; i < 8; ++i) 100 92 ctx->h[i] ^= v[i] ^ v[i + 8];
+2 -3
lib/crypto/riscv/chacha-riscv64-zvkb.S
··· 60 60 #define VL t2 61 61 #define STRIDE t3 62 62 #define ROUND_CTR t4 63 - #define KEY0 s0 63 + #define KEY0 t5 64 + // Avoid s0/fp to allow for unwinding 64 65 #define KEY1 s1 65 66 #define KEY2 s2 66 67 #define KEY3 s3 ··· 144 143 // The updated 32-bit counter is written back to state->x[12] before returning. 145 144 SYM_FUNC_START(chacha_zvkb) 146 145 addi sp, sp, -96 147 - sd s0, 0(sp) 148 146 sd s1, 8(sp) 149 147 sd s2, 16(sp) 150 148 sd s3, 24(sp) ··· 280 280 bnez NBLOCKS, .Lblock_loop 281 281 282 282 sw COUNTER, 48(STATEP) 283 - ld s0, 0(sp) 284 283 ld s1, 8(sp) 285 284 ld s2, 16(sp) 286 285 ld s3, 24(sp)