Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

lib/crc32: improve support for arch-specific overrides

Currently the CRC32 library functions are defined as weak symbols, and
the arm64 and riscv architectures override them.

This method of arch-specific overrides has the limitation that it only
works when both the base and arch code is built-in. Also, it makes the
arch-specific code be silently not used if it is accidentally built with
lib-y instead of obj-y; unfortunately the RISC-V code does this.

This commit reorganizes the code to have explicit *_arch() functions
that are called when they are enabled, similar to how some of the crypto
library code works (e.g. chacha_crypt() calls chacha_crypt_arch()).

Make the existing kconfig choice for the CRC32 implementation also
control whether the arch-optimized implementation (if one is available)
is enabled or not. Make it enabled by default if CRC32 is also enabled.

The result is that arch-optimized CRC32 library functions will be
included automatically when appropriate, but it is now possible to
disable them. They can also now be built as a loadable module if the
CRC32 library functions happen to be used only by loadable modules, in
which case the arch and base CRC32 modules will be automatically loaded
via direct symbol dependency when appropriate.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20241202010844.144356-3-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>

+118 -51
+1
arch/arm64/Kconfig
··· 21 21 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE 22 22 select ARCH_HAS_CACHE_LINE_SIZE 23 23 select ARCH_HAS_CC_PLATFORM 24 + select ARCH_HAS_CRC32 24 25 select ARCH_HAS_CURRENT_STACK_POINTER 25 26 select ARCH_HAS_DEBUG_VIRTUAL 26 27 select ARCH_HAS_DEBUG_VM_PGTABLE
+2 -1
arch/arm64/lib/Makefile
··· 13 13 14 14 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o 15 15 16 - obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o 16 + obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o 17 + crc32-arm64-y := crc32.o crc32-glue.o 17 18 18 19 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 19 20
+10 -3
arch/arm64/lib/crc32-glue.c
··· 2 2 3 3 #include <linux/crc32.h> 4 4 #include <linux/linkage.h> 5 + #include <linux/module.h> 5 6 6 7 #include <asm/alternative.h> 7 8 #include <asm/cpufeature.h> ··· 22 21 asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); 23 22 asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len); 24 23 25 - u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 24 + u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) 26 25 { 27 26 if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) 28 27 return crc32_le_base(crc, p, len); ··· 41 40 42 41 return crc32_le_arm64(crc, p, len); 43 42 } 43 + EXPORT_SYMBOL(crc32_le_arch); 44 44 45 - u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) 45 + u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) 46 46 { 47 47 if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) 48 48 return crc32c_le_base(crc, p, len); ··· 62 60 63 61 return crc32c_le_arm64(crc, p, len); 64 62 } 63 + EXPORT_SYMBOL(crc32c_le_arch); 65 64 66 - u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 65 + u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) 67 66 { 68 67 if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) 69 68 return crc32_be_base(crc, p, len); ··· 83 80 84 81 return crc32_be_arm64(crc, p, len); 85 82 } 83 + EXPORT_SYMBOL(crc32_be_arch); 84 + 85 + MODULE_LICENSE("GPL"); 86 + MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
+1
arch/riscv/Kconfig
··· 24 24 select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 25 25 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE 26 26 select ARCH_HAS_BINFMT_FLAT 27 + select ARCH_HAS_CRC32 if RISCV_ISA_ZBC 27 28 select ARCH_HAS_CURRENT_STACK_POINTER 28 29 select ARCH_HAS_DEBUG_VIRTUAL if MMU 29 30 select ARCH_HAS_DEBUG_VM_PGTABLE
+1 -2
arch/riscv/lib/Makefile
··· 15 15 lib-$(CONFIG_MMU) += uaccess.o 16 16 lib-$(CONFIG_64BIT) += tishift.o 17 17 lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o 18 - lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o 19 - 18 + obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o 20 19 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 21 20 lib-$(CONFIG_RISCV_ISA_V) += xor.o 22 21 lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
+10 -3
arch/riscv/lib/crc32.c arch/riscv/lib/crc32-riscv.c
··· 14 14 #include <linux/crc32poly.h> 15 15 #include <linux/crc32.h> 16 16 #include <linux/byteorder/generic.h> 17 + #include <linux/module.h> 17 18 18 19 /* 19 20 * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for ··· 218 217 return crc_fb(crc, p, len); 219 218 } 220 219 221 - u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 220 + u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) 222 221 { 223 222 return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, 224 223 crc32_le_base); 225 224 } 225 + EXPORT_SYMBOL(crc32_le_arch); 226 226 227 - u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) 227 + u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) 228 228 { 229 229 return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, 230 230 CRC32C_POLY_QT_LE, crc32c_le_base); 231 231 } 232 + EXPORT_SYMBOL(crc32c_le_arch); 232 233 233 234 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, 234 235 size_t len) ··· 256 253 return crc; 257 254 } 258 255 259 - u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 256 + u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) 260 257 { 261 258 size_t offset, head_len, tail_len; 262 259 unsigned long const *p_ul; ··· 295 292 legacy: 296 293 return crc32_be_base(crc, p, len); 297 294 } 295 + EXPORT_SYMBOL(crc32_be_arch); 296 + 297 + MODULE_LICENSE("GPL"); 298 + MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
+2 -2
crypto/crc32_generic.c
··· 160 160 static int __init crc32_mod_init(void) 161 161 { 162 162 /* register the arch flavor only if it differs from the generic one */ 163 - return crypto_register_shashes(algs, 1 + (&crc32_le != &crc32_le_base)); 163 + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); 164 164 } 165 165 166 166 static void __exit crc32_mod_fini(void) 167 167 { 168 - crypto_unregister_shashes(algs, 1 + (&crc32_le != &crc32_le_base)); 168 + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); 169 169 } 170 170 171 171 subsys_initcall(crc32_mod_init);
+2 -2
crypto/crc32c_generic.c
··· 200 200 static int __init crc32c_mod_init(void) 201 201 { 202 202 /* register the arch flavor only if it differs from the generic one */ 203 - return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base)); 203 + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); 204 204 } 205 205 206 206 static void __exit crc32c_mod_fini(void) 207 207 { 208 - crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base)); 208 + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); 209 209 } 210 210 211 211 subsys_initcall(crc32c_mod_init);
+28 -7
include/linux/crc32.h
··· 8 8 #include <linux/types.h> 9 9 #include <linux/bitrev.h> 10 10 11 - u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); 12 - u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); 13 - u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); 14 - u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); 11 + u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len); 12 + u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len); 13 + u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len); 14 + u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len); 15 + u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len); 16 + u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len); 17 + 18 + static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len) 19 + { 20 + if (IS_ENABLED(CONFIG_CRC32_ARCH)) 21 + return crc32_le_arch(crc, p, len); 22 + return crc32_le_base(crc, p, len); 23 + } 24 + 25 + static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len) 26 + { 27 + if (IS_ENABLED(CONFIG_CRC32_ARCH)) 28 + return crc32_be_arch(crc, p, len); 29 + return crc32_be_base(crc, p, len); 30 + } 31 + 32 + /* TODO: leading underscores should be dropped once callers have been updated */ 33 + static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) 34 + { 35 + if (IS_ENABLED(CONFIG_CRC32_ARCH)) 36 + return crc32c_le_arch(crc, p, len); 37 + return crc32c_le_base(crc, p, len); 38 + } 15 39 16 40 /** 17 41 * crc32_le_combine - Combine two crc32 check values into one. For two ··· 61 37 { 62 38 return crc32_le_shift(crc1, len2) ^ crc2; 63 39 } 64 - 65 - u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); 66 - u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); 67 40 68 41 /** 69 42 * __crc32c_le_combine - Combine two crc32c check values into one. For two
+54 -16
lib/Kconfig
··· 190 190 the kernel tree does. Such modules that use library CRC32/CRC32c 191 191 functions require M here. 192 192 193 + config ARCH_HAS_CRC32 194 + bool 195 + 193 196 config CRC32_SELFTEST 194 197 tristate "CRC32 perform self test on init" 195 198 depends on CRC32 ··· 205 202 choice 206 203 prompt "CRC32 implementation" 207 204 depends on CRC32 208 - default CRC32_SLICEBY8 205 + default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32 206 + default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32 209 207 help 210 - This option allows a kernel builder to override the default choice 211 - of CRC32 algorithm. Choose the default ("slice by 8") unless you 212 - know that you need one of the others. 208 + This option allows you to override the default choice of CRC32 209 + implementation. Choose the default unless you know that you need one 210 + of the others. 213 211 214 - config CRC32_SLICEBY8 212 + config CRC32_IMPL_ARCH_PLUS_SLICEBY8 213 + bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32 214 + help 215 + Use architecture-optimized implementation of CRC32. Fall back to 216 + slice-by-8 in cases where the arch-optimized implementation cannot be 217 + used, e.g. if the CPU lacks support for the needed instructions. 218 + 219 + This is the default when an arch-optimized implementation exists. 220 + 221 + config CRC32_IMPL_ARCH_PLUS_SLICEBY1 222 + bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32 223 + help 224 + Use architecture-optimized implementation of CRC32, but fall back to 225 + slice-by-1 instead of slice-by-8 in order to reduce the binary size. 226 + 227 + config CRC32_IMPL_SLICEBY8 215 228 bool "Slice by 8 bytes" 216 229 help 217 230 Calculate checksum 8 bytes at a time with a clever slicing algorithm. 218 - This is the fastest algorithm, but comes with a 8KiB lookup table. 219 - Most modern processors have enough cache to hold this table without 220 - thrashing the cache. 231 + This is much slower than the architecture-optimized implementation of 232 + CRC32 (if the selected arch has one), but it is portable and is the 233 + fastest implementation when no arch-optimized implementation is 234 + available. It uses an 8KiB lookup table. Most modern processors have 235 + enough cache to hold this table without thrashing the cache. 221 236 222 - This is the default implementation choice. Choose this one unless 223 - you have a good reason not to. 224 - 225 - config CRC32_SLICEBY4 237 + config CRC32_IMPL_SLICEBY4 226 238 bool "Slice by 4 bytes" 227 239 help 228 240 Calculate checksum 4 bytes at a time with a clever slicing algorithm. ··· 246 228 247 229 Only choose this option if you know what you are doing. 248 230 249 - config CRC32_SARWATE 250 - bool "Sarwate's Algorithm (one byte at a time)" 231 + config CRC32_IMPL_SLICEBY1 232 + bool "Slice by 1 byte (Sarwate's algorithm)" 251 233 help 252 234 Calculate checksum a byte at a time using Sarwate's algorithm. This 253 - is not particularly fast, but has a small 256 byte lookup table. 235 + is not particularly fast, but has a small 1KiB lookup table. 254 236 255 237 Only choose this option if you know what you are doing. 256 238 257 - config CRC32_BIT 239 + config CRC32_IMPL_BIT 258 240 bool "Classic Algorithm (one bit at a time)" 259 241 help 260 242 Calculate checksum one bit at a time. This is VERY slow, but has ··· 263 245 Only choose this option if you are debugging crc32. 264 246 265 247 endchoice 248 + 249 + config CRC32_ARCH 250 + tristate 251 + default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 252 + 253 + config CRC32_SLICEBY8 254 + bool 255 + default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8 256 + 257 + config CRC32_SLICEBY4 258 + bool 259 + default y if CRC32_IMPL_SLICEBY4 260 + 261 + config CRC32_SARWATE 262 + bool 263 + default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 264 + 265 + config CRC32_BIT 266 + bool 267 + default y if CRC32_IMPL_BIT 266 268 267 269 config CRC64 268 270 tristate "CRC64 functions"
+7 -15
lib/crc32.c
··· 183 183 } 184 184 185 185 #if CRC_LE_BITS == 1 186 - u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) 186 + u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) 187 187 { 188 188 return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); 189 189 } 190 - u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) 190 + u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) 191 191 { 192 192 return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); 193 193 } 194 194 #else 195 - u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) 195 + u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) 196 196 { 197 197 return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); 198 198 } 199 - u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) 199 + u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) 200 200 { 201 201 return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); 202 202 } 203 203 #endif 204 - EXPORT_SYMBOL(crc32_le); 205 - EXPORT_SYMBOL(__crc32c_le); 206 - 207 - u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); 208 204 EXPORT_SYMBOL(crc32_le_base); 209 - 210 - u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); 211 205 EXPORT_SYMBOL(crc32c_le_base); 212 - 213 - u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); 214 206 215 207 /* 216 208 * This multiplies the polynomials x and y modulo the given modulus. ··· 327 335 } 328 336 329 337 #if CRC_BE_BITS == 1 330 - u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) 338 + u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) 331 339 { 332 340 return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); 333 341 } 334 342 #else 335 - u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) 343 + u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) 336 344 { 337 345 return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); 338 346 } 339 347 #endif 340 - EXPORT_SYMBOL(crc32_be); 348 + EXPORT_SYMBOL(crc32_be_base);