Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fix from Herbert Xu:
"This fixes a performance regression in arm64 NEON crypto as well as a
crash in x86 aegis/morus on unsupported CPUs"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
crypto: x86/aegis,morus - Fix and simplify CPUID checks
crypto: arm64 - revert NEON yield for fast AEAD implementations

+101 -191
+54 -94
arch/arm64/crypto/aes-ce-ccm-core.S
··· 19 19 * u32 *macp, u8 const rk[], u32 rounds); 20 20 */ 21 21 ENTRY(ce_aes_ccm_auth_data) 22 - frame_push 7 23 - 24 - mov x19, x0 25 - mov x20, x1 26 - mov x21, x2 27 - mov x22, x3 28 - mov x23, x4 29 - mov x24, x5 30 - 31 - ldr w25, [x22] /* leftover from prev round? */ 22 + ldr w8, [x3] /* leftover from prev round? */ 32 23 ld1 {v0.16b}, [x0] /* load mac */ 33 - cbz w25, 1f 34 - sub w25, w25, #16 24 + cbz w8, 1f 25 + sub w8, w8, #16 35 26 eor v1.16b, v1.16b, v1.16b 36 - 0: ldrb w7, [x20], #1 /* get 1 byte of input */ 37 - subs w21, w21, #1 38 - add w25, w25, #1 27 + 0: ldrb w7, [x1], #1 /* get 1 byte of input */ 28 + subs w2, w2, #1 29 + add w8, w8, #1 39 30 ins v1.b[0], w7 40 31 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 41 32 beq 8f /* out of input? */ 42 - cbnz w25, 0b 33 + cbnz w8, 0b 43 34 eor v0.16b, v0.16b, v1.16b 44 - 1: ld1 {v3.4s}, [x23] /* load first round key */ 45 - prfm pldl1strm, [x20] 46 - cmp w24, #12 /* which key size? */ 47 - add x6, x23, #16 48 - sub w7, w24, #2 /* modified # of rounds */ 35 + 1: ld1 {v3.4s}, [x4] /* load first round key */ 36 + prfm pldl1strm, [x1] 37 + cmp w5, #12 /* which key size? */ 38 + add x6, x4, #16 39 + sub w7, w5, #2 /* modified # of rounds */ 49 40 bmi 2f 50 41 bne 5f 51 42 mov v5.16b, v3.16b ··· 55 64 ld1 {v5.4s}, [x6], #16 /* load next round key */ 56 65 bpl 3b 57 66 aese v0.16b, v4.16b 58 - subs w21, w21, #16 /* last data? */ 67 + subs w2, w2, #16 /* last data? */ 59 68 eor v0.16b, v0.16b, v5.16b /* final round */ 60 69 bmi 6f 61 - ld1 {v1.16b}, [x20], #16 /* load next input block */ 70 + ld1 {v1.16b}, [x1], #16 /* load next input block */ 62 71 eor v0.16b, v0.16b, v1.16b /* xor with mac */ 63 - beq 6f 64 - 65 - if_will_cond_yield_neon 66 - st1 {v0.16b}, [x19] /* store mac */ 67 - do_cond_yield_neon 68 - ld1 {v0.16b}, [x19] /* reload mac */ 69 - endif_yield_neon 70 - 71 - b 1b 72 - 6: st1 {v0.16b}, [x19] /* store mac */ 72 + bne 1b 73 + 6: st1 {v0.16b}, [x0] /* store mac */ 73 74 beq 10f 74 - adds w21, w21, #16 75 + adds w2, w2, #16 75 76 beq 10f 76 - mov w25, w21 77 - 7: ldrb w7, [x20], #1 77 + mov w8, w2 78 + 7: ldrb w7, [x1], #1 78 79 umov w6, v0.b[0] 79 80 eor w6, w6, w7 80 - strb w6, [x19], #1 81 - subs w21, w21, #1 81 + strb w6, [x0], #1 82 + subs w2, w2, #1 82 83 beq 10f 83 84 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 84 85 b 7b 85 - 8: mov w7, w25 86 - add w25, w25, #16 86 + 8: mov w7, w8 87 + add w8, w8, #16 87 88 9: ext v1.16b, v1.16b, v1.16b, #1 88 89 adds w7, w7, #1 89 90 bne 9b 90 91 eor v0.16b, v0.16b, v1.16b 91 - st1 {v0.16b}, [x19] 92 - 10: str w25, [x22] 93 - 94 - frame_pop 92 + st1 {v0.16b}, [x0] 93 + 10: str w8, [x3] 95 94 ret 96 95 ENDPROC(ce_aes_ccm_auth_data) 97 96 ··· 126 145 ENDPROC(ce_aes_ccm_final) 127 146 128 147 .macro aes_ccm_do_crypt,enc 129 - frame_push 8 130 - 131 - mov x19, x0 132 - mov x20, x1 133 - mov x21, x2 134 - mov x22, x3 135 - mov x23, x4 136 - mov x24, x5 137 - mov x25, x6 138 - 139 - ldr x26, [x25, #8] /* load lower ctr */ 140 - ld1 {v0.16b}, [x24] /* load mac */ 141 - CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ 148 + ldr x8, [x6, #8] /* load lower ctr */ 149 + ld1 {v0.16b}, [x5] /* load mac */ 150 + CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 142 151 0: /* outer loop */ 143 - ld1 {v1.8b}, [x25] /* load upper ctr */ 144 - prfm pldl1strm, [x20] 145 - add x26, x26, #1 146 - rev x9, x26 147 - cmp w23, #12 /* which key size? */ 148 - sub w7, w23, #2 /* get modified # of rounds */ 152 + ld1 {v1.8b}, [x6] /* load upper ctr */ 153 + prfm pldl1strm, [x1] 154 + add x8, x8, #1 155 + rev x9, x8 156 + cmp w4, #12 /* which key size? */ 157 + sub w7, w4, #2 /* get modified # of rounds */ 149 158 ins v1.d[1], x9 /* no carry in lower ctr */ 150 - ld1 {v3.4s}, [x22] /* load first round key */ 151 - add x10, x22, #16 159 + ld1 {v3.4s}, [x3] /* load first round key */ 160 + add x10, x3, #16 152 161 bmi 1f 153 162 bne 4f 154 163 mov v5.16b, v3.16b ··· 165 194 bpl 2b 166 195 aese v0.16b, v4.16b 167 196 aese v1.16b, v4.16b 168 - subs w21, w21, #16 169 - bmi 7f /* partial block? */ 170 - ld1 {v2.16b}, [x20], #16 /* load next input block */ 197 + subs w2, w2, #16 198 + bmi 6f /* partial block? */ 199 + ld1 {v2.16b}, [x1], #16 /* load next input block */ 171 200 .if \enc == 1 172 201 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 173 202 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ ··· 176 205 eor v1.16b, v2.16b, v5.16b /* final round enc */ 177 206 .endif 178 207 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 179 - st1 {v1.16b}, [x19], #16 /* write output block */ 180 - beq 5f 208 + st1 {v1.16b}, [x0], #16 /* write output block */ 209 + bne 0b 210 + CPU_LE( rev x8, x8 ) 211 + st1 {v0.16b}, [x5] /* store mac */ 212 + str x8, [x6, #8] /* store lsb end of ctr (BE) */ 213 + 5: ret 181 214 182 - if_will_cond_yield_neon 183 - st1 {v0.16b}, [x24] /* store mac */ 184 - do_cond_yield_neon 185 - ld1 {v0.16b}, [x24] /* reload mac */ 186 - endif_yield_neon 187 - 188 - b 0b 189 - 5: 190 - CPU_LE( rev x26, x26 ) 191 - st1 {v0.16b}, [x24] /* store mac */ 192 - str x26, [x25, #8] /* store lsb end of ctr (BE) */ 193 - 194 - 6: frame_pop 195 - ret 196 - 197 - 7: eor v0.16b, v0.16b, v5.16b /* final round mac */ 215 + 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ 198 216 eor v1.16b, v1.16b, v5.16b /* final round enc */ 199 - st1 {v0.16b}, [x24] /* store mac */ 200 - add w21, w21, #16 /* process partial tail block */ 201 - 8: ldrb w9, [x20], #1 /* get 1 byte of input */ 217 + st1 {v0.16b}, [x5] /* store mac */ 218 + add w2, w2, #16 /* process partial tail block */ 219 + 7: ldrb w9, [x1], #1 /* get 1 byte of input */ 202 220 umov w6, v1.b[0] /* get top crypted ctr byte */ 203 221 umov w7, v0.b[0] /* get top mac byte */ 204 222 .if \enc == 1 ··· 197 237 eor w9, w9, w6 198 238 eor w7, w7, w9 199 239 .endif 200 - strb w9, [x19], #1 /* store out byte */ 201 - strb w7, [x24], #1 /* store mac byte */ 202 - subs w21, w21, #1 203 - beq 6b 240 + strb w9, [x0], #1 /* store out byte */ 241 + strb w7, [x5], #1 /* store mac byte */ 242 + subs w2, w2, #1 243 + beq 5b 204 244 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 205 245 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 206 - b 8b 246 + b 7b 207 247 .endm 208 248 209 249 /*
+26 -52
arch/arm64/crypto/ghash-ce-core.S
··· 322 322 .endm 323 323 324 324 .macro pmull_gcm_do_crypt, enc 325 - frame_push 10 325 + ld1 {SHASH.2d}, [x4] 326 + ld1 {XL.2d}, [x1] 327 + ldr x8, [x5, #8] // load lower counter 326 328 327 - mov x19, x0 328 - mov x20, x1 329 - mov x21, x2 330 - mov x22, x3 331 - mov x23, x4 332 - mov x24, x5 333 - mov x25, x6 334 - mov x26, x7 335 - .if \enc == 1 336 - ldr x27, [sp, #96] // first stacked arg 337 - .endif 338 - 339 - ldr x28, [x24, #8] // load lower counter 340 - CPU_LE( rev x28, x28 ) 341 - 342 - 0: mov x0, x25 343 - load_round_keys w26, x0 344 - ld1 {SHASH.2d}, [x23] 345 - ld1 {XL.2d}, [x20] 329 + load_round_keys w7, x6 346 330 347 331 movi MASK.16b, #0xe1 348 332 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 333 + CPU_LE( rev x8, x8 ) 349 334 shl MASK.2d, MASK.2d, #57 350 335 eor SHASH2.16b, SHASH2.16b, SHASH.16b 351 336 352 337 .if \enc == 1 353 - ld1 {KS.16b}, [x27] 338 + ldr x10, [sp] 339 + ld1 {KS.16b}, [x10] 354 340 .endif 355 341 356 - 1: ld1 {CTR.8b}, [x24] // load upper counter 357 - ld1 {INP.16b}, [x22], #16 358 - rev x9, x28 359 - add x28, x28, #1 360 - sub w19, w19, #1 342 + 0: ld1 {CTR.8b}, [x5] // load upper counter 343 + ld1 {INP.16b}, [x3], #16 344 + rev x9, x8 345 + add x8, x8, #1 346 + sub w0, w0, #1 361 347 ins CTR.d[1], x9 // set lower counter 362 348 363 349 .if \enc == 1 364 350 eor INP.16b, INP.16b, KS.16b // encrypt input 365 - st1 {INP.16b}, [x21], #16 351 + st1 {INP.16b}, [x2], #16 366 352 .endif 367 353 368 354 rev64 T1.16b, INP.16b 369 355 370 - cmp w26, #12 371 - b.ge 4f // AES-192/256? 356 + cmp w7, #12 357 + b.ge 2f // AES-192/256? 372 358 373 - 2: enc_round CTR, v21 359 + 1: enc_round CTR, v21 374 360 375 361 ext T2.16b, XL.16b, XL.16b, #8 376 362 ext IN1.16b, T1.16b, T1.16b, #8 ··· 411 425 412 426 .if \enc == 0 413 427 eor INP.16b, INP.16b, KS.16b 414 - st1 {INP.16b}, [x21], #16 428 + st1 {INP.16b}, [x2], #16 415 429 .endif 416 430 417 - cbz w19, 3f 431 + cbnz w0, 0b 418 432 419 - if_will_cond_yield_neon 420 - st1 {XL.2d}, [x20] 433 + CPU_LE( rev x8, x8 ) 434 + st1 {XL.2d}, [x1] 435 + str x8, [x5, #8] // store lower counter 436 + 421 437 .if \enc == 1 422 - st1 {KS.16b}, [x27] 423 - .endif 424 - do_cond_yield_neon 425 - b 0b 426 - endif_yield_neon 427 - 428 - b 1b 429 - 430 - 3: st1 {XL.2d}, [x20] 431 - .if \enc == 1 432 - st1 {KS.16b}, [x27] 438 + st1 {KS.16b}, [x10] 433 439 .endif 434 440 435 - CPU_LE( rev x28, x28 ) 436 - str x28, [x24, #8] // store lower counter 437 - 438 - frame_pop 439 441 ret 440 442 441 - 4: b.eq 5f // AES-192? 443 + 2: b.eq 3f // AES-192? 442 444 enc_round CTR, v17 443 445 enc_round CTR, v18 444 - 5: enc_round CTR, v19 446 + 3: enc_round CTR, v19 445 447 enc_round CTR, v20 446 - b 2b 448 + b 1b 447 449 .endm 448 450 449 451 /*
+4 -8
arch/x86/crypto/aegis128-aesni-glue.c
··· 375 375 } 376 376 }; 377 377 378 - static const struct x86_cpu_id aesni_cpu_id[] = { 379 - X86_FEATURE_MATCH(X86_FEATURE_AES), 380 - X86_FEATURE_MATCH(X86_FEATURE_XMM2), 381 - {} 382 - }; 383 - MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); 384 - 385 378 static int __init crypto_aegis128_aesni_module_init(void) 386 379 { 387 - if (!x86_match_cpu(aesni_cpu_id)) 380 + if (!boot_cpu_has(X86_FEATURE_XMM2) || 381 + !boot_cpu_has(X86_FEATURE_AES) || 382 + !boot_cpu_has(X86_FEATURE_OSXSAVE) || 383 + !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) 388 384 return -ENODEV; 389 385 390 386 return crypto_register_aeads(crypto_aegis128_aesni_alg,
+4 -8
arch/x86/crypto/aegis128l-aesni-glue.c
··· 375 375 } 376 376 }; 377 377 378 - static const struct x86_cpu_id aesni_cpu_id[] = { 379 - X86_FEATURE_MATCH(X86_FEATURE_AES), 380 - X86_FEATURE_MATCH(X86_FEATURE_XMM2), 381 - {} 382 - }; 383 - MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); 384 - 385 378 static int __init crypto_aegis128l_aesni_module_init(void) 386 379 { 387 - if (!x86_match_cpu(aesni_cpu_id)) 380 + if (!boot_cpu_has(X86_FEATURE_XMM2) || 381 + !boot_cpu_has(X86_FEATURE_AES) || 382 + !boot_cpu_has(X86_FEATURE_OSXSAVE) || 383 + !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) 388 384 return -ENODEV; 389 385 390 386 return crypto_register_aeads(crypto_aegis128l_aesni_alg,
+4 -8
arch/x86/crypto/aegis256-aesni-glue.c
··· 375 375 } 376 376 }; 377 377 378 - static const struct x86_cpu_id aesni_cpu_id[] = { 379 - X86_FEATURE_MATCH(X86_FEATURE_AES), 380 - X86_FEATURE_MATCH(X86_FEATURE_XMM2), 381 - {} 382 - }; 383 - MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); 384 - 385 378 static int __init crypto_aegis256_aesni_module_init(void) 386 379 { 387 - if (!x86_match_cpu(aesni_cpu_id)) 380 + if (!boot_cpu_has(X86_FEATURE_XMM2) || 381 + !boot_cpu_has(X86_FEATURE_AES) || 382 + !boot_cpu_has(X86_FEATURE_OSXSAVE) || 383 + !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) 388 384 return -ENODEV; 389 385 390 386 return crypto_register_aeads(crypto_aegis256_aesni_alg,
+3 -7
arch/x86/crypto/morus1280-avx2-glue.c
··· 37 37 38 38 MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); 39 39 40 - static const struct x86_cpu_id avx2_cpu_id[] = { 41 - X86_FEATURE_MATCH(X86_FEATURE_AVX2), 42 - {} 43 - }; 44 - MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id); 45 - 46 40 static int __init crypto_morus1280_avx2_module_init(void) 47 41 { 48 - if (!x86_match_cpu(avx2_cpu_id)) 42 + if (!boot_cpu_has(X86_FEATURE_AVX2) || 43 + !boot_cpu_has(X86_FEATURE_OSXSAVE) || 44 + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) 49 45 return -ENODEV; 50 46 51 47 return crypto_register_aeads(crypto_morus1280_avx2_algs,
+3 -7
arch/x86/crypto/morus1280-sse2-glue.c
··· 37 37 38 38 MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); 39 39 40 - static const struct x86_cpu_id sse2_cpu_id[] = { 41 - X86_FEATURE_MATCH(X86_FEATURE_XMM2), 42 - {} 43 - }; 44 - MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id); 45 - 46 40 static int __init crypto_morus1280_sse2_module_init(void) 47 41 { 48 - if (!x86_match_cpu(sse2_cpu_id)) 42 + if (!boot_cpu_has(X86_FEATURE_XMM2) || 43 + !boot_cpu_has(X86_FEATURE_OSXSAVE) || 44 + !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) 49 45 return -ENODEV; 50 46 51 47 return crypto_register_aeads(crypto_morus1280_sse2_algs,
+3 -7
arch/x86/crypto/morus640-sse2-glue.c
··· 37 37 38 38 MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); 39 39 40 - static const struct x86_cpu_id sse2_cpu_id[] = { 41 - X86_FEATURE_MATCH(X86_FEATURE_XMM2), 42 - {} 43 - }; 44 - MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id); 45 - 46 40 static int __init crypto_morus640_sse2_module_init(void) 47 41 { 48 - if (!x86_match_cpu(sse2_cpu_id)) 42 + if (!boot_cpu_has(X86_FEATURE_XMM2) || 43 + !boot_cpu_has(X86_FEATURE_OSXSAVE) || 44 + !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) 49 45 return -ENODEV; 50 46 51 47 return crypto_register_aeads(crypto_morus640_sse2_algs,