Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/crc32: update prototype for crc_pcl()

- Change the len parameter from unsigned int to size_t, so that the
library function which takes a size_t can safely use this code.

- Rename to crc32c_x86_3way() which is much clearer.

- Move the crc parameter to the front, as this is the usual convention.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20241202010844.144356-12-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>

+35 -35
+3 -4
arch/x86/crypto/crc32c-intel_glue.c
··· 41 41 */ 42 42 #define CRC32C_PCL_BREAKEVEN 512 43 43 44 - asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len, 45 - unsigned int crc_init); 44 + asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); 46 45 #endif /* CONFIG_X86_64 */ 47 46 48 47 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) ··· 158 159 */ 159 160 if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { 160 161 kernel_fpu_begin(); 161 - *crcp = crc_pcl(data, len, *crcp); 162 + *crcp = crc32c_x86_3way(*crcp, data, len); 162 163 kernel_fpu_end(); 163 164 } else 164 165 *crcp = crc32c_intel_le_hw(*crcp, data, len); ··· 170 171 { 171 172 if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { 172 173 kernel_fpu_begin(); 173 - *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); 174 + *(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len)); 174 175 kernel_fpu_end(); 175 176 } else 176 177 *(__le32 *)out =
+32 -31
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
··· 52 52 # regular CRC code that does not interleave the CRC instructions. 53 53 #define SMALL_SIZE 200 54 54 55 - # unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init); 55 + # u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); 56 56 57 57 .text 58 - SYM_FUNC_START(crc_pcl) 59 - #define bufp %rdi 60 - #define bufp_d %edi 61 - #define len %esi 62 - #define crc_init %edx 63 - #define crc_init_q %rdx 58 + SYM_FUNC_START(crc32c_x86_3way) 59 + #define crc0 %edi 60 + #define crc0_q %rdi 61 + #define bufp %rsi 62 + #define bufp_d %esi 63 + #define len %rdx 64 + #define len_dw %edx 64 65 #define n_misaligned %ecx /* overlaps chunk_bytes! */ 65 66 #define n_misaligned_q %rcx 66 67 #define chunk_bytes %ecx /* overlaps n_misaligned! */ ··· 86 85 .Ldo_align: 87 86 movq (bufp), %rax 88 87 add n_misaligned_q, bufp 89 - sub n_misaligned, len 88 + sub n_misaligned_q, len 90 89 .Lalign_loop: 91 - crc32b %al, crc_init # compute crc32 of 1-byte 90 + crc32b %al, crc0 # compute crc32 of 1-byte 92 91 shr $8, %rax # get next byte 93 92 dec n_misaligned 94 93 jne .Lalign_loop ··· 103 102 104 103 .Lpartial_block: 105 104 # Compute floor(len / 24) to get num qwords to process from each lane. 106 - imul $2731, len, %eax # 2731 = ceil(2^16 / 24) 105 + imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24) 107 106 shr $16, %eax 108 107 jmp .Lcrc_3lanes 109 108 ··· 126 125 # Unroll the loop by a factor of 4 to reduce the overhead of the loop 127 126 # bookkeeping instructions, which can compete with crc32q for the ALUs. 128 127 .Lcrc_3lanes_4x_loop: 129 - crc32q (bufp), crc_init_q 128 + crc32q (bufp), crc0_q 130 129 crc32q (bufp,chunk_bytes_q), crc1 131 130 crc32q (bufp,chunk_bytes_q,2), crc2 132 - crc32q 8(bufp), crc_init_q 131 + crc32q 8(bufp), crc0_q 133 132 crc32q 8(bufp,chunk_bytes_q), crc1 134 133 crc32q 8(bufp,chunk_bytes_q,2), crc2 135 - crc32q 16(bufp), crc_init_q 134 + crc32q 16(bufp), crc0_q 136 135 crc32q 16(bufp,chunk_bytes_q), crc1 137 136 crc32q 16(bufp,chunk_bytes_q,2), crc2 138 - crc32q 24(bufp), crc_init_q 137 + crc32q 24(bufp), crc0_q 139 138 crc32q 24(bufp,chunk_bytes_q), crc1 140 139 crc32q 24(bufp,chunk_bytes_q,2), crc2 141 140 add $32, bufp ··· 147 146 jz .Lcrc_3lanes_last_qword 148 147 149 148 .Lcrc_3lanes_1x_loop: 150 - crc32q (bufp), crc_init_q 149 + crc32q (bufp), crc0_q 151 150 crc32q (bufp,chunk_bytes_q), crc1 152 151 crc32q (bufp,chunk_bytes_q,2), crc2 153 152 add $8, bufp ··· 155 154 jnz .Lcrc_3lanes_1x_loop 156 155 157 156 .Lcrc_3lanes_last_qword: 158 - crc32q (bufp), crc_init_q 157 + crc32q (bufp), crc0_q 159 158 crc32q (bufp,chunk_bytes_q), crc1 160 159 # SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet 161 160 ··· 166 165 lea (K_table-8)(%rip), %rax # first entry is for idx 1 167 166 pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2 168 167 lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3 169 - sub %eax, len # len -= chunk_bytes * 3 168 + sub %rax, len # len -= chunk_bytes * 3 170 169 171 - movq crc_init_q, %xmm1 # CRC for block 1 170 + movq crc0_q, %xmm1 # CRC for block 1 172 171 pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2 173 172 174 173 movq crc1, %xmm2 # CRC for block 2 ··· 177 176 pxor %xmm2,%xmm1 178 177 movq %xmm1, %rax 179 178 xor (bufp,chunk_bytes_q,2), %rax 180 - mov crc2, crc_init_q 181 - crc32 %rax, crc_init_q 179 + mov crc2, crc0_q 180 + crc32 %rax, crc0_q 182 181 lea 8(bufp,chunk_bytes_q,2), bufp 183 182 184 183 ################################################################ ··· 194 193 ## 6) Process any remainder without interleaving: 195 194 ####################################################################### 196 195 .Lsmall: 197 - test len, len 196 + test len_dw, len_dw 198 197 jz .Ldone 199 - mov len, %eax 198 + mov len_dw, %eax 200 199 shr $3, %eax 201 200 jz .Ldo_dword 202 201 .Ldo_qwords: 203 - crc32q (bufp), crc_init_q 202 + crc32q (bufp), crc0_q 204 203 add $8, bufp 205 204 dec %eax 206 205 jnz .Ldo_qwords 207 206 .Ldo_dword: 208 - test $4, len 207 + test $4, len_dw 209 208 jz .Ldo_word 210 - crc32l (bufp), crc_init 209 + crc32l (bufp), crc0 211 210 add $4, bufp 212 211 .Ldo_word: 213 - test $2, len 212 + test $2, len_dw 214 213 jz .Ldo_byte 215 - crc32w (bufp), crc_init 214 + crc32w (bufp), crc0 216 215 add $2, bufp 217 216 .Ldo_byte: 218 - test $1, len 217 + test $1, len_dw 219 218 jz .Ldone 220 - crc32b (bufp), crc_init 219 + crc32b (bufp), crc0 221 220 .Ldone: 222 - mov crc_init, %eax 221 + mov crc0, %eax 223 222 RET 224 - SYM_FUNC_END(crc_pcl) 223 + SYM_FUNC_END(crc32c_x86_3way) 225 224 226 225 .section .rodata, "a", @progbits 227 226 ################################################################