Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

crypto: poly1305 - add new 32 and 64-bit generic versions

These two C implementations from Zinc -- a 32x32 one and a 64x64 one,
depending on the platform -- come from Andrew Moon's public domain
poly1305-donna portable code, modified for usage in the kernel. The
precomputation in the 32-bit version and the use of 64x64 multiplies in
the 64-bit version make these perform better than the code it replaces.
Moon's code is also very widespread and has received many eyeballs of
scrutiny.

There's a bit of interference between the x86 implementation, which
relies on internal details of the old scalar implementation. In the next
commit, the x86 implementation will be replaced with a faster one that
doesn't rely on this, so none of this matters much. But for now, to keep
this passing the tests, we inline the bits of the old implementation
that the x86 implementation relied on. Also, since we now support a
slightly larger key space, via the union, some offsets had to be fixed
up.

Nonce calculation was folded in with the emit function, to take
advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no
nonce handling in emit, so this path was conditionalized. We also
introduced a new struct, poly1305_core_key, to represent the precise
amount of space that particular implementation uses.

Testing with kbench9000, depending on the CPU, the update function for
the 32x32 version has been improved by 4%-7%, and for the 64x64 by
19%-30%. The 32x32 gains are small, but I think there's great value in
having a parallel implementation to the 64x64 one so that the two can be
compared side-by-side as nice stand-alone units.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Jason A. Donenfeld and committed by
Herbert Xu
1c08a104 e3419426

+675 -228
+10 -10
arch/x86/crypto/poly1305-avx2-x86_64.S
··· 34 34 #define u2 0x08(%r8) 35 35 #define u3 0x0c(%r8) 36 36 #define u4 0x10(%r8) 37 - #define w0 0x14(%r8) 38 - #define w1 0x18(%r8) 39 - #define w2 0x1c(%r8) 40 - #define w3 0x20(%r8) 41 - #define w4 0x24(%r8) 42 - #define y0 0x28(%r8) 43 - #define y1 0x2c(%r8) 44 - #define y2 0x30(%r8) 45 - #define y3 0x34(%r8) 46 - #define y4 0x38(%r8) 37 + #define w0 0x18(%r8) 38 + #define w1 0x1c(%r8) 39 + #define w2 0x20(%r8) 40 + #define w3 0x24(%r8) 41 + #define w4 0x28(%r8) 42 + #define y0 0x30(%r8) 43 + #define y1 0x34(%r8) 44 + #define y2 0x38(%r8) 45 + #define y3 0x3c(%r8) 46 + #define y4 0x40(%r8) 47 47 #define m %rsi 48 48 #define hc0 %ymm0 49 49 #define hc1 %ymm1
+204 -11
arch/x86/crypto/poly1305_glue.c
··· 25 25 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); 26 26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); 27 27 28 + static inline u64 mlt(u64 a, u64 b) 29 + { 30 + return a * b; 31 + } 32 + 33 + static inline u32 sr(u64 v, u_char n) 34 + { 35 + return v >> n; 36 + } 37 + 38 + static inline u32 and(u32 v, u32 mask) 39 + { 40 + return v & mask; 41 + } 42 + 28 43 static void poly1305_simd_mult(u32 *a, const u32 *b) 29 44 { 30 45 u8 m[POLY1305_BLOCK_SIZE]; ··· 49 34 * we don't need for key multiplication; compensate for it. */ 50 35 a[4] -= 1 << 24; 51 36 poly1305_block_sse2(a, m, b, 1); 37 + } 38 + 39 + static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key) 40 + { 41 + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 42 + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; 43 + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; 44 + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; 45 + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; 46 + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; 47 + } 48 + 49 + static void poly1305_integer_blocks(struct poly1305_state *state, 50 + const struct poly1305_key *key, 51 + const void *src, 52 + unsigned int nblocks, u32 hibit) 53 + { 54 + u32 r0, r1, r2, r3, r4; 55 + u32 s1, s2, s3, s4; 56 + u32 h0, h1, h2, h3, h4; 57 + u64 d0, d1, d2, d3, d4; 58 + 59 + if (!nblocks) 60 + return; 61 + 62 + r0 = key->r[0]; 63 + r1 = key->r[1]; 64 + r2 = key->r[2]; 65 + r3 = key->r[3]; 66 + r4 = key->r[4]; 67 + 68 + s1 = r1 * 5; 69 + s2 = r2 * 5; 70 + s3 = r3 * 5; 71 + s4 = r4 * 5; 72 + 73 + h0 = state->h[0]; 74 + h1 = state->h[1]; 75 + h2 = state->h[2]; 76 + h3 = state->h[3]; 77 + h4 = state->h[4]; 78 + 79 + do { 80 + /* h += m[i] */ 81 + h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; 82 + h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; 83 + h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; 84 + h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; 85 + h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); 86 + 87 + /* h *= r */ 88 + d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + 89 + mlt(h3, s2) + mlt(h4, s1); 90 + d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + 91 + mlt(h3, s3) + mlt(h4, s2); 92 + d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + 93 + mlt(h3, s4) + mlt(h4, s3); 94 + d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + 95 + mlt(h3, r0) + mlt(h4, s4); 96 + d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + 97 + mlt(h3, r1) + mlt(h4, r0); 98 + 99 + /* (partial) h %= p */ 100 + d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); 101 + d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); 102 + d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); 103 + d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); 104 + h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); 105 + h1 += h0 >> 26; h0 = h0 & 0x3ffffff; 106 + 107 + src += POLY1305_BLOCK_SIZE; 108 + } while (--nblocks); 109 + 110 + state->h[0] = h0; 111 + state->h[1] = h1; 112 + state->h[2] = h2; 113 + state->h[3] = h3; 114 + state->h[4] = h4; 115 + } 116 + 117 + static void poly1305_integer_emit(const struct poly1305_state *state, void *dst) 118 + { 119 + u32 h0, h1, h2, h3, h4; 120 + u32 g0, g1, g2, g3, g4; 121 + u32 mask; 122 + 123 + /* fully carry h */ 124 + h0 = state->h[0]; 125 + h1 = state->h[1]; 126 + h2 = state->h[2]; 127 + h3 = state->h[3]; 128 + h4 = state->h[4]; 129 + 130 + h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; 131 + h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; 132 + h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; 133 + h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; 134 + h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; 135 + 136 + /* compute h + -p */ 137 + g0 = h0 + 5; 138 + g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; 139 + g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; 140 + g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; 141 + g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; 142 + 143 + /* select h if h < p, or h + -p if h >= p */ 144 + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; 145 + g0 &= mask; 146 + g1 &= mask; 147 + g2 &= mask; 148 + g3 &= mask; 149 + g4 &= mask; 150 + mask = ~mask; 151 + h0 = (h0 & mask) | g0; 152 + h1 = (h1 & mask) | g1; 153 + h2 = (h2 & mask) | g2; 154 + h3 = (h3 & mask) | g3; 155 + h4 = (h4 & mask) | g4; 156 + 157 + /* h = h % (2^128) */ 158 + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); 159 + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); 160 + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); 161 + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); 162 + } 163 + 164 + void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) 165 + { 166 + poly1305_integer_setkey(desc->opaque_r, key); 167 + desc->s[0] = get_unaligned_le32(key + 16); 168 + desc->s[1] = get_unaligned_le32(key + 20); 169 + desc->s[2] = get_unaligned_le32(key + 24); 170 + desc->s[3] = get_unaligned_le32(key + 28); 171 + poly1305_core_init(&desc->h); 172 + desc->buflen = 0; 173 + desc->sset = true; 174 + desc->rset = 1; 175 + } 176 + EXPORT_SYMBOL_GPL(poly1305_init_arch); 177 + 178 + static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, 179 + const u8 *src, unsigned int srclen) 180 + { 181 + if (!dctx->sset) { 182 + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { 183 + poly1305_integer_setkey(dctx->r, src); 184 + src += POLY1305_BLOCK_SIZE; 185 + srclen -= POLY1305_BLOCK_SIZE; 186 + dctx->rset = 1; 187 + } 188 + if (srclen >= POLY1305_BLOCK_SIZE) { 189 + dctx->s[0] = get_unaligned_le32(src + 0); 190 + dctx->s[1] = get_unaligned_le32(src + 4); 191 + dctx->s[2] = get_unaligned_le32(src + 8); 192 + dctx->s[3] = get_unaligned_le32(src + 12); 193 + src += POLY1305_BLOCK_SIZE; 194 + srclen -= POLY1305_BLOCK_SIZE; 195 + dctx->sset = true; 196 + } 197 + } 198 + return srclen; 52 199 } 53 200 54 201 static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, ··· 224 47 srclen = datalen; 225 48 } 226 49 if (srclen >= POLY1305_BLOCK_SIZE) { 227 - poly1305_core_blocks(&dctx->h, dctx->r, src, 228 - srclen / POLY1305_BLOCK_SIZE, 1); 50 + poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src, 51 + srclen / POLY1305_BLOCK_SIZE, 1); 229 52 srclen %= POLY1305_BLOCK_SIZE; 230 53 } 231 54 return srclen; ··· 282 105 return srclen; 283 106 } 284 107 285 - void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) 286 - { 287 - poly1305_init_generic(desc, key); 288 - } 289 - EXPORT_SYMBOL(poly1305_init_arch); 290 - 291 108 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 292 109 unsigned int srclen) 293 110 { ··· 329 158 } 330 159 EXPORT_SYMBOL(poly1305_update_arch); 331 160 332 - void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) 161 + void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst) 333 162 { 334 - poly1305_final_generic(desc, digest); 163 + __le32 digest[4]; 164 + u64 f = 0; 165 + 166 + if (unlikely(desc->buflen)) { 167 + desc->buf[desc->buflen++] = 1; 168 + memset(desc->buf + desc->buflen, 0, 169 + POLY1305_BLOCK_SIZE - desc->buflen); 170 + poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0); 171 + } 172 + 173 + poly1305_integer_emit(&desc->h, digest); 174 + 175 + /* mac = (h + s) % (2^128) */ 176 + f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; 177 + put_unaligned_le32(f, dst + 0); 178 + f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; 179 + put_unaligned_le32(f, dst + 4); 180 + f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; 181 + put_unaligned_le32(f, dst + 8); 182 + f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; 183 + put_unaligned_le32(f, dst + 12); 184 + 185 + *desc = (struct poly1305_desc_ctx){}; 335 186 } 336 187 EXPORT_SYMBOL(poly1305_final_arch); 337 188 ··· 376 183 if (unlikely(!dctx->sset)) 377 184 return -ENOKEY; 378 185 379 - poly1305_final_generic(dctx, dst); 186 + poly1305_final_arch(dctx, dst); 380 187 return 0; 381 188 } 382 189
+2 -2
crypto/adiantum.c
··· 70 70 struct crypto_skcipher *streamcipher; 71 71 struct crypto_cipher *blockcipher; 72 72 struct crypto_shash *hash; 73 - struct poly1305_key header_hash_key; 73 + struct poly1305_core_key header_hash_key; 74 74 }; 75 75 76 76 struct adiantum_request_ctx { ··· 239 239 poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, 240 240 TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1); 241 241 242 - poly1305_core_emit(&state, &rctx->header_hash); 242 + poly1305_core_emit(&state, NULL, &rctx->header_hash); 243 243 } 244 244 245 245 /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
+1 -1
crypto/nhpoly1305.c
··· 210 210 if (state->nh_remaining) 211 211 process_nh_hash_value(state, key); 212 212 213 - poly1305_core_emit(&state->poly_state, dst); 213 + poly1305_core_emit(&state->poly_state, NULL, dst); 214 214 return 0; 215 215 } 216 216 EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
+24 -1
crypto/poly1305_generic.c
··· 31 31 return 0; 32 32 } 33 33 34 + static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, 35 + const u8 *src, unsigned int srclen) 36 + { 37 + if (!dctx->sset) { 38 + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { 39 + poly1305_core_setkey(&dctx->core_r, src); 40 + src += POLY1305_BLOCK_SIZE; 41 + srclen -= POLY1305_BLOCK_SIZE; 42 + dctx->rset = 2; 43 + } 44 + if (srclen >= POLY1305_BLOCK_SIZE) { 45 + dctx->s[0] = get_unaligned_le32(src + 0); 46 + dctx->s[1] = get_unaligned_le32(src + 4); 47 + dctx->s[2] = get_unaligned_le32(src + 8); 48 + dctx->s[3] = get_unaligned_le32(src + 12); 49 + src += POLY1305_BLOCK_SIZE; 50 + srclen -= POLY1305_BLOCK_SIZE; 51 + dctx->sset = true; 52 + } 53 + } 54 + return srclen; 55 + } 56 + 34 57 static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 35 58 unsigned int srclen) 36 59 { ··· 65 42 srclen = datalen; 66 43 } 67 44 68 - poly1305_core_blocks(&dctx->h, dctx->r, src, 45 + poly1305_core_blocks(&dctx->h, &dctx->core_r, src, 69 46 srclen / POLY1305_BLOCK_SIZE, 1); 70 47 } 71 48
+10 -35
include/crypto/internal/poly1305.h
··· 11 11 #include <crypto/poly1305.h> 12 12 13 13 /* 14 - * Poly1305 core functions. These implement the ε-almost-∆-universal hash 15 - * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce 16 - * ("s key") at the end. They also only support block-aligned inputs. 14 + * Poly1305 core functions. These only accept whole blocks; the caller must 15 + * handle any needed block buffering and padding. 'hibit' must be 1 for any 16 + * full blocks, or 0 for the final block if it had to be padded. If 'nonce' is 17 + * non-NULL, then it's added at the end to compute the Poly1305 MAC. Otherwise, 18 + * only the ε-almost-∆-universal hash function (not the full MAC) is computed. 17 19 */ 18 - void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); 20 + 21 + void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); 19 22 static inline void poly1305_core_init(struct poly1305_state *state) 20 23 { 21 24 *state = (struct poly1305_state){}; 22 25 } 23 26 24 27 void poly1305_core_blocks(struct poly1305_state *state, 25 - const struct poly1305_key *key, const void *src, 28 + const struct poly1305_core_key *key, const void *src, 26 29 unsigned int nblocks, u32 hibit); 27 - void poly1305_core_emit(const struct poly1305_state *state, void *dst); 28 - 29 - /* 30 - * Poly1305 requires a unique key for each tag, which implies that we can't set 31 - * it on the tfm that gets accessed by multiple users simultaneously. Instead we 32 - * expect the key as the first 32 bytes in the update() call. 33 - */ 34 - static inline 35 - unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, 36 - const u8 *src, unsigned int srclen) 37 - { 38 - if (!dctx->sset) { 39 - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { 40 - poly1305_core_setkey(dctx->r, src); 41 - src += POLY1305_BLOCK_SIZE; 42 - srclen -= POLY1305_BLOCK_SIZE; 43 - dctx->rset = 1; 44 - } 45 - if (srclen >= POLY1305_BLOCK_SIZE) { 46 - dctx->s[0] = get_unaligned_le32(src + 0); 47 - dctx->s[1] = get_unaligned_le32(src + 4); 48 - dctx->s[2] = get_unaligned_le32(src + 8); 49 - dctx->s[3] = get_unaligned_le32(src + 12); 50 - src += POLY1305_BLOCK_SIZE; 51 - srclen -= POLY1305_BLOCK_SIZE; 52 - dctx->sset = true; 53 - } 54 - } 55 - return srclen; 56 - } 30 + void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], 31 + void *dst); 57 32 58 33 #endif
+2 -2
include/crypto/nhpoly1305.h
··· 7 7 #define _NHPOLY1305_H 8 8 9 9 #include <crypto/hash.h> 10 - #include <crypto/poly1305.h> 10 + #include <crypto/internal/poly1305.h> 11 11 12 12 /* NH parameterization: */ 13 13 ··· 33 33 #define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES) 34 34 35 35 struct nhpoly1305_key { 36 - struct poly1305_key poly_key; 36 + struct poly1305_core_key poly_key; 37 37 u32 nh_key[NH_KEY_WORDS]; 38 38 }; 39 39
+23 -3
include/crypto/poly1305.h
··· 13 13 #define POLY1305_KEY_SIZE 32 14 14 #define POLY1305_DIGEST_SIZE 16 15 15 16 + /* The poly1305_key and poly1305_state types are mostly opaque and 17 + * implementation-defined. Limbs might be in base 2^64 or base 2^26, or 18 + * different yet. The union type provided keeps these 64-bit aligned for the 19 + * case in which this is implemented using 64x64 multiplies. 20 + */ 21 + 16 22 struct poly1305_key { 17 - u32 r[5]; /* key, base 2^26 */ 23 + union { 24 + u32 r[5]; 25 + u64 r64[3]; 26 + }; 27 + }; 28 + 29 + struct poly1305_core_key { 30 + struct poly1305_key key; 31 + struct poly1305_key precomputed_s; 18 32 }; 19 33 20 34 struct poly1305_state { 21 - u32 h[5]; /* accumulator, base 2^26 */ 35 + union { 36 + u32 h[5]; 37 + u64 h64[3]; 38 + }; 22 39 }; 23 40 24 41 struct poly1305_desc_ctx { ··· 52 35 /* accumulator */ 53 36 struct poly1305_state h; 54 37 /* key */ 55 - struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; 38 + union { 39 + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; 40 + struct poly1305_core_key core_r; 41 + }; 56 42 }; 57 43 58 44 void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
+3 -1
lib/crypto/Makefile
··· 28 28 libdes-y := des.o 29 29 30 30 obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o 31 - libpoly1305-y := poly1305.o 31 + libpoly1305-y := poly1305-donna32.o 32 + libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o 33 + libpoly1305-y += poly1305.o 32 34 33 35 obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o 34 36 libsha256-y := sha256.o
+204
lib/crypto/poly1305-donna32.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR MIT 2 + /* 3 + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 + * 5 + * This is based in part on Andrew Moon's poly1305-donna, which is in the 6 + * public domain. 7 + */ 8 + 9 + #include <linux/kernel.h> 10 + #include <asm/unaligned.h> 11 + #include <crypto/internal/poly1305.h> 12 + 13 + void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) 14 + { 15 + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 16 + key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff; 17 + key->key.r[1] = (get_unaligned_le32(&raw_key[3]) >> 2) & 0x3ffff03; 18 + key->key.r[2] = (get_unaligned_le32(&raw_key[6]) >> 4) & 0x3ffc0ff; 19 + key->key.r[3] = (get_unaligned_le32(&raw_key[9]) >> 6) & 0x3f03fff; 20 + key->key.r[4] = (get_unaligned_le32(&raw_key[12]) >> 8) & 0x00fffff; 21 + 22 + /* s = 5*r */ 23 + key->precomputed_s.r[0] = key->key.r[1] * 5; 24 + key->precomputed_s.r[1] = key->key.r[2] * 5; 25 + key->precomputed_s.r[2] = key->key.r[3] * 5; 26 + key->precomputed_s.r[3] = key->key.r[4] * 5; 27 + } 28 + EXPORT_SYMBOL(poly1305_core_setkey); 29 + 30 + void poly1305_core_blocks(struct poly1305_state *state, 31 + const struct poly1305_core_key *key, const void *src, 32 + unsigned int nblocks, u32 hibit) 33 + { 34 + const u8 *input = src; 35 + u32 r0, r1, r2, r3, r4; 36 + u32 s1, s2, s3, s4; 37 + u32 h0, h1, h2, h3, h4; 38 + u64 d0, d1, d2, d3, d4; 39 + u32 c; 40 + 41 + if (!nblocks) 42 + return; 43 + 44 + hibit <<= 24; 45 + 46 + r0 = key->key.r[0]; 47 + r1 = key->key.r[1]; 48 + r2 = key->key.r[2]; 49 + r3 = key->key.r[3]; 50 + r4 = key->key.r[4]; 51 + 52 + s1 = key->precomputed_s.r[0]; 53 + s2 = key->precomputed_s.r[1]; 54 + s3 = key->precomputed_s.r[2]; 55 + s4 = key->precomputed_s.r[3]; 56 + 57 + h0 = state->h[0]; 58 + h1 = state->h[1]; 59 + h2 = state->h[2]; 60 + h3 = state->h[3]; 61 + h4 = state->h[4]; 62 + 63 + do { 64 + /* h += m[i] */ 65 + h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff; 66 + h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff; 67 + h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff; 68 + h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff; 69 + h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit; 70 + 71 + /* h *= r */ 72 + d0 = ((u64)h0 * r0) + ((u64)h1 * s4) + 73 + ((u64)h2 * s3) + ((u64)h3 * s2) + 74 + ((u64)h4 * s1); 75 + d1 = ((u64)h0 * r1) + ((u64)h1 * r0) + 76 + ((u64)h2 * s4) + ((u64)h3 * s3) + 77 + ((u64)h4 * s2); 78 + d2 = ((u64)h0 * r2) + ((u64)h1 * r1) + 79 + ((u64)h2 * r0) + ((u64)h3 * s4) + 80 + ((u64)h4 * s3); 81 + d3 = ((u64)h0 * r3) + ((u64)h1 * r2) + 82 + ((u64)h2 * r1) + ((u64)h3 * r0) + 83 + ((u64)h4 * s4); 84 + d4 = ((u64)h0 * r4) + ((u64)h1 * r3) + 85 + ((u64)h2 * r2) + ((u64)h3 * r1) + 86 + ((u64)h4 * r0); 87 + 88 + /* (partial) h %= p */ 89 + c = (u32)(d0 >> 26); 90 + h0 = (u32)d0 & 0x3ffffff; 91 + d1 += c; 92 + c = (u32)(d1 >> 26); 93 + h1 = (u32)d1 & 0x3ffffff; 94 + d2 += c; 95 + c = (u32)(d2 >> 26); 96 + h2 = (u32)d2 & 0x3ffffff; 97 + d3 += c; 98 + c = (u32)(d3 >> 26); 99 + h3 = (u32)d3 & 0x3ffffff; 100 + d4 += c; 101 + c = (u32)(d4 >> 26); 102 + h4 = (u32)d4 & 0x3ffffff; 103 + h0 += c * 5; 104 + c = (h0 >> 26); 105 + h0 = h0 & 0x3ffffff; 106 + h1 += c; 107 + 108 + input += POLY1305_BLOCK_SIZE; 109 + } while (--nblocks); 110 + 111 + state->h[0] = h0; 112 + state->h[1] = h1; 113 + state->h[2] = h2; 114 + state->h[3] = h3; 115 + state->h[4] = h4; 116 + } 117 + EXPORT_SYMBOL(poly1305_core_blocks); 118 + 119 + void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], 120 + void *dst) 121 + { 122 + u8 *mac = dst; 123 + u32 h0, h1, h2, h3, h4, c; 124 + u32 g0, g1, g2, g3, g4; 125 + u64 f; 126 + u32 mask; 127 + 128 + /* fully carry h */ 129 + h0 = state->h[0]; 130 + h1 = state->h[1]; 131 + h2 = state->h[2]; 132 + h3 = state->h[3]; 133 + h4 = state->h[4]; 134 + 135 + c = h1 >> 26; 136 + h1 = h1 & 0x3ffffff; 137 + h2 += c; 138 + c = h2 >> 26; 139 + h2 = h2 & 0x3ffffff; 140 + h3 += c; 141 + c = h3 >> 26; 142 + h3 = h3 & 0x3ffffff; 143 + h4 += c; 144 + c = h4 >> 26; 145 + h4 = h4 & 0x3ffffff; 146 + h0 += c * 5; 147 + c = h0 >> 26; 148 + h0 = h0 & 0x3ffffff; 149 + h1 += c; 150 + 151 + /* compute h + -p */ 152 + g0 = h0 + 5; 153 + c = g0 >> 26; 154 + g0 &= 0x3ffffff; 155 + g1 = h1 + c; 156 + c = g1 >> 26; 157 + g1 &= 0x3ffffff; 158 + g2 = h2 + c; 159 + c = g2 >> 26; 160 + g2 &= 0x3ffffff; 161 + g3 = h3 + c; 162 + c = g3 >> 26; 163 + g3 &= 0x3ffffff; 164 + g4 = h4 + c - (1UL << 26); 165 + 166 + /* select h if h < p, or h + -p if h >= p */ 167 + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; 168 + g0 &= mask; 169 + g1 &= mask; 170 + g2 &= mask; 171 + g3 &= mask; 172 + g4 &= mask; 173 + mask = ~mask; 174 + 175 + h0 = (h0 & mask) | g0; 176 + h1 = (h1 & mask) | g1; 177 + h2 = (h2 & mask) | g2; 178 + h3 = (h3 & mask) | g3; 179 + h4 = (h4 & mask) | g4; 180 + 181 + /* h = h % (2^128) */ 182 + h0 = ((h0) | (h1 << 26)) & 0xffffffff; 183 + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; 184 + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; 185 + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; 186 + 187 + if (likely(nonce)) { 188 + /* mac = (h + nonce) % (2^128) */ 189 + f = (u64)h0 + nonce[0]; 190 + h0 = (u32)f; 191 + f = (u64)h1 + nonce[1] + (f >> 32); 192 + h1 = (u32)f; 193 + f = (u64)h2 + nonce[2] + (f >> 32); 194 + h2 = (u32)f; 195 + f = (u64)h3 + nonce[3] + (f >> 32); 196 + h3 = (u32)f; 197 + } 198 + 199 + put_unaligned_le32(h0, &mac[0]); 200 + put_unaligned_le32(h1, &mac[4]); 201 + put_unaligned_le32(h2, &mac[8]); 202 + put_unaligned_le32(h3, &mac[12]); 203 + } 204 + EXPORT_SYMBOL(poly1305_core_emit);
+185
lib/crypto/poly1305-donna64.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR MIT 2 + /* 3 + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 + * 5 + * This is based in part on Andrew Moon's poly1305-donna, which is in the 6 + * public domain. 7 + */ 8 + 9 + #include <linux/kernel.h> 10 + #include <asm/unaligned.h> 11 + #include <crypto/internal/poly1305.h> 12 + 13 + typedef __uint128_t u128; 14 + 15 + void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) 16 + { 17 + u64 t0, t1; 18 + 19 + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 20 + t0 = get_unaligned_le64(&raw_key[0]); 21 + t1 = get_unaligned_le64(&raw_key[8]); 22 + 23 + key->key.r64[0] = t0 & 0xffc0fffffffULL; 24 + key->key.r64[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL; 25 + key->key.r64[2] = ((t1 >> 24)) & 0x00ffffffc0fULL; 26 + 27 + /* s = 20*r */ 28 + key->precomputed_s.r64[0] = key->key.r64[1] * 20; 29 + key->precomputed_s.r64[1] = key->key.r64[2] * 20; 30 + } 31 + EXPORT_SYMBOL(poly1305_core_setkey); 32 + 33 + void poly1305_core_blocks(struct poly1305_state *state, 34 + const struct poly1305_core_key *key, const void *src, 35 + unsigned int nblocks, u32 hibit) 36 + { 37 + const u8 *input = src; 38 + u64 hibit64; 39 + u64 r0, r1, r2; 40 + u64 s1, s2; 41 + u64 h0, h1, h2; 42 + u64 c; 43 + u128 d0, d1, d2, d; 44 + 45 + if (!nblocks) 46 + return; 47 + 48 + hibit64 = ((u64)hibit) << 40; 49 + 50 + r0 = key->key.r64[0]; 51 + r1 = key->key.r64[1]; 52 + r2 = key->key.r64[2]; 53 + 54 + h0 = state->h64[0]; 55 + h1 = state->h64[1]; 56 + h2 = state->h64[2]; 57 + 58 + s1 = key->precomputed_s.r64[0]; 59 + s2 = key->precomputed_s.r64[1]; 60 + 61 + do { 62 + u64 t0, t1; 63 + 64 + /* h += m[i] */ 65 + t0 = get_unaligned_le64(&input[0]); 66 + t1 = get_unaligned_le64(&input[8]); 67 + 68 + h0 += t0 & 0xfffffffffffULL; 69 + h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL; 70 + h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64; 71 + 72 + /* h *= r */ 73 + d0 = (u128)h0 * r0; 74 + d = (u128)h1 * s2; 75 + d0 += d; 76 + d = (u128)h2 * s1; 77 + d0 += d; 78 + d1 = (u128)h0 * r1; 79 + d = (u128)h1 * r0; 80 + d1 += d; 81 + d = (u128)h2 * s2; 82 + d1 += d; 83 + d2 = (u128)h0 * r2; 84 + d = (u128)h1 * r1; 85 + d2 += d; 86 + d = (u128)h2 * r0; 87 + d2 += d; 88 + 89 + /* (partial) h %= p */ 90 + c = (u64)(d0 >> 44); 91 + h0 = (u64)d0 & 0xfffffffffffULL; 92 + d1 += c; 93 + c = (u64)(d1 >> 44); 94 + h1 = (u64)d1 & 0xfffffffffffULL; 95 + d2 += c; 96 + c = (u64)(d2 >> 42); 97 + h2 = (u64)d2 & 0x3ffffffffffULL; 98 + h0 += c * 5; 99 + c = h0 >> 44; 100 + h0 = h0 & 0xfffffffffffULL; 101 + h1 += c; 102 + 103 + input += POLY1305_BLOCK_SIZE; 104 + } while (--nblocks); 105 + 106 + state->h64[0] = h0; 107 + state->h64[1] = h1; 108 + state->h64[2] = h2; 109 + } 110 + EXPORT_SYMBOL(poly1305_core_blocks); 111 + 112 + void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], 113 + void *dst) 114 + { 115 + u8 *mac = dst; 116 + u64 h0, h1, h2, c; 117 + u64 g0, g1, g2; 118 + u64 t0, t1; 119 + 120 + /* fully carry h */ 121 + h0 = state->h64[0]; 122 + h1 = state->h64[1]; 123 + h2 = state->h64[2]; 124 + 125 + c = h1 >> 44; 126 + h1 &= 0xfffffffffffULL; 127 + h2 += c; 128 + c = h2 >> 42; 129 + h2 &= 0x3ffffffffffULL; 130 + h0 += c * 5; 131 + c = h0 >> 44; 132 + h0 &= 0xfffffffffffULL; 133 + h1 += c; 134 + c = h1 >> 44; 135 + h1 &= 0xfffffffffffULL; 136 + h2 += c; 137 + c = h2 >> 42; 138 + h2 &= 0x3ffffffffffULL; 139 + h0 += c * 5; 140 + c = h0 >> 44; 141 + h0 &= 0xfffffffffffULL; 142 + h1 += c; 143 + 144 + /* compute h + -p */ 145 + g0 = h0 + 5; 146 + c = g0 >> 44; 147 + g0 &= 0xfffffffffffULL; 148 + g1 = h1 + c; 149 + c = g1 >> 44; 150 + g1 &= 0xfffffffffffULL; 151 + g2 = h2 + c - (1ULL << 42); 152 + 153 + /* select h if h < p, or h + -p if h >= p */ 154 + c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1; 155 + g0 &= c; 156 + g1 &= c; 157 + g2 &= c; 158 + c = ~c; 159 + h0 = (h0 & c) | g0; 160 + h1 = (h1 & c) | g1; 161 + h2 = (h2 & c) | g2; 162 + 163 + if (likely(nonce)) { 164 + /* h = (h + nonce) */ 165 + t0 = ((u64)nonce[1] << 32) | nonce[0]; 166 + t1 = ((u64)nonce[3] << 32) | nonce[2]; 167 + 168 + h0 += t0 & 0xfffffffffffULL; 169 + c = h0 >> 44; 170 + h0 &= 0xfffffffffffULL; 171 + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c; 172 + c = h1 >> 44; 173 + h1 &= 0xfffffffffffULL; 174 + h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c; 175 + h2 &= 0x3ffffffffffULL; 176 + } 177 + 178 + /* mac = h % (2^128) */ 179 + h0 = h0 | (h1 << 44); 180 + h1 = (h1 >> 20) | (h2 << 24); 181 + 182 + put_unaligned_le64(h0, &mac[0]); 183 + put_unaligned_le64(h1, &mac[8]); 184 + } 185 + EXPORT_SYMBOL(poly1305_core_emit);
+7 -162
lib/crypto/poly1305.c
··· 12 12 #include <linux/module.h> 13 13 #include <asm/unaligned.h> 14 14 15 - static inline u64 mlt(u64 a, u64 b) 16 - { 17 - return a * b; 18 - } 19 - 20 - static inline u32 sr(u64 v, u_char n) 21 - { 22 - return v >> n; 23 - } 24 - 25 - static inline u32 and(u32 v, u32 mask) 26 - { 27 - return v & mask; 28 - } 29 - 30 - void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) 31 - { 32 - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 33 - key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; 34 - key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; 35 - key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; 36 - key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; 37 - key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; 38 - } 39 - EXPORT_SYMBOL_GPL(poly1305_core_setkey); 40 - 41 - void poly1305_core_blocks(struct poly1305_state *state, 42 - const struct poly1305_key *key, const void *src, 43 - unsigned int nblocks, u32 hibit) 44 - { 45 - u32 r0, r1, r2, r3, r4; 46 - u32 s1, s2, s3, s4; 47 - u32 h0, h1, h2, h3, h4; 48 - u64 d0, d1, d2, d3, d4; 49 - 50 - if (!nblocks) 51 - return; 52 - 53 - r0 = key->r[0]; 54 - r1 = key->r[1]; 55 - r2 = key->r[2]; 56 - r3 = key->r[3]; 57 - r4 = key->r[4]; 58 - 59 - s1 = r1 * 5; 60 - s2 = r2 * 5; 61 - s3 = r3 * 5; 62 - s4 = r4 * 5; 63 - 64 - h0 = state->h[0]; 65 - h1 = state->h[1]; 66 - h2 = state->h[2]; 67 - h3 = state->h[3]; 68 - h4 = state->h[4]; 69 - 70 - do { 71 - /* h += m[i] */ 72 - h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; 73 - h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; 74 - h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; 75 - h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; 76 - h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); 77 - 78 - /* h *= r */ 79 - d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + 80 - mlt(h3, s2) + mlt(h4, s1); 81 - d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + 82 - mlt(h3, s3) + mlt(h4, s2); 83 - d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + 84 - mlt(h3, s4) + mlt(h4, s3); 85 - d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + 86 - mlt(h3, r0) + mlt(h4, s4); 87 - d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + 88 - mlt(h3, r1) + mlt(h4, r0); 89 - 90 - /* (partial) h %= p */ 91 - d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); 92 - d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); 93 - d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); 94 - d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); 95 - h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); 96 - h1 += h0 >> 26; h0 = h0 & 0x3ffffff; 97 - 98 - src += POLY1305_BLOCK_SIZE; 99 - } while (--nblocks); 100 - 101 - state->h[0] = h0; 102 - state->h[1] = h1; 103 - state->h[2] = h2; 104 - state->h[3] = h3; 105 - state->h[4] = h4; 106 - } 107 - EXPORT_SYMBOL_GPL(poly1305_core_blocks); 108 - 109 - void poly1305_core_emit(const struct poly1305_state *state, void *dst) 110 - { 111 - u32 h0, h1, h2, h3, h4; 112 - u32 g0, g1, g2, g3, g4; 113 - u32 mask; 114 - 115 - /* fully carry h */ 116 - h0 = state->h[0]; 117 - h1 = state->h[1]; 118 - h2 = state->h[2]; 119 - h3 = state->h[3]; 120 - h4 = state->h[4]; 121 - 122 - h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; 123 - h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; 124 - h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; 125 - h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; 126 - h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; 127 - 128 - /* compute h + -p */ 129 - g0 = h0 + 5; 130 - g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; 131 - g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; 132 - g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; 133 - g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; 134 - 135 - /* select h if h < p, or h + -p if h >= p */ 136 - mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; 137 - g0 &= mask; 138 - g1 &= mask; 139 - g2 &= mask; 140 - g3 &= mask; 141 - g4 &= mask; 142 - mask = ~mask; 143 - h0 = (h0 & mask) | g0; 144 - h1 = (h1 & mask) | g1; 145 - h2 = (h2 & mask) | g2; 146 - h3 = (h3 & mask) | g3; 147 - h4 = (h4 & mask) | g4; 148 - 149 - /* h = h % (2^128) */ 150 - put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); 151 - put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); 152 - put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); 153 - put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); 154 - } 155 - EXPORT_SYMBOL_GPL(poly1305_core_emit); 156 - 157 15 void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) 158 16 { 159 - poly1305_core_setkey(desc->r, key); 17 + poly1305_core_setkey(&desc->core_r, key); 160 18 desc->s[0] = get_unaligned_le32(key + 16); 161 19 desc->s[1] = get_unaligned_le32(key + 20); 162 20 desc->s[2] = get_unaligned_le32(key + 24); ··· 22 164 poly1305_core_init(&desc->h); 23 165 desc->buflen = 0; 24 166 desc->sset = true; 25 - desc->rset = 1; 167 + desc->rset = 2; 26 168 } 27 169 EXPORT_SYMBOL_GPL(poly1305_init_generic); 28 170 ··· 39 181 desc->buflen += bytes; 40 182 41 183 if (desc->buflen == POLY1305_BLOCK_SIZE) { 42 - poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1); 184 + poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 185 + 1, 1); 43 186 desc->buflen = 0; 44 187 } 45 188 } 46 189 47 190 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 48 - poly1305_core_blocks(&desc->h, desc->r, src, 191 + poly1305_core_blocks(&desc->h, &desc->core_r, src, 49 192 nbytes / POLY1305_BLOCK_SIZE, 1); 50 193 src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); 51 194 nbytes %= POLY1305_BLOCK_SIZE; ··· 61 202 62 203 void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) 63 204 { 64 - __le32 digest[4]; 65 - u64 f = 0; 66 - 67 205 if (unlikely(desc->buflen)) { 68 206 desc->buf[desc->buflen++] = 1; 69 207 memset(desc->buf + desc->buflen, 0, 70 208 POLY1305_BLOCK_SIZE - desc->buflen); 71 - poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0); 209 + poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0); 72 210 } 73 211 74 - poly1305_core_emit(&desc->h, digest); 75 - 76 - /* mac = (h + s) % (2^128) */ 77 - f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; 78 - put_unaligned_le32(f, dst + 0); 79 - f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; 80 - put_unaligned_le32(f, dst + 4); 81 - f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; 82 - put_unaligned_le32(f, dst + 8); 83 - f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; 84 - put_unaligned_le32(f, dst + 12); 85 - 212 + poly1305_core_emit(&desc->h, desc->s, dst); 86 213 *desc = (struct poly1305_desc_ctx){}; 87 214 } 88 215 EXPORT_SYMBOL_GPL(poly1305_final_generic);