Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

xor: pass the entire operation to the low-level ops

Currently the high-level xor code chunks up all operations into small
units for only up to 1 + 4 vectors, and passes it to four different
methods. This means the FPU/vector context is entered and left a lot for
wide stripes, and a lot of indirect expensive indirect calls are
performed. Switch to passing the entire gen_xor request to the low-level
ops, and provide a macro to dispatch it to the existing helper.

This reduce the number of indirect calls and FPU/vector context switches
by a factor approaching nr_stripes / 4, and also reduces source and binary
code size.

Link: https://lkml.kernel.org/r/20260327061704.3707577-27-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Christoph Hellwig and committed by
Andrew Morton
80dcf0a7 0f629e72

+224 -505
-5
include/linux/raid/xor.h
··· 2 2 #ifndef _XOR_H 3 3 #define _XOR_H 4 4 5 - #define MAX_XOR_BLOCKS 4 6 - 7 - extern void xor_blocks(unsigned int count, unsigned int bytes, 8 - void *dest, void **srcs); 9 - 10 5 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes); 11 6 12 7 #endif /* _XOR_H */
+9 -10
lib/raid/xor/alpha/xor.c
··· 832 832 .end xor_alpha_prefetch_5 \n\ 833 833 "); 834 834 835 + DO_XOR_BLOCKS(alpha, xor_alpha_2, xor_alpha_3, xor_alpha_4, xor_alpha_5); 836 + 835 837 struct xor_block_template xor_block_alpha = { 836 - .name = "alpha", 837 - .do_2 = xor_alpha_2, 838 - .do_3 = xor_alpha_3, 839 - .do_4 = xor_alpha_4, 840 - .do_5 = xor_alpha_5, 838 + .name = "alpha", 839 + .xor_gen = xor_gen_alpha, 841 840 }; 842 841 842 + DO_XOR_BLOCKS(alpha_prefetch, xor_alpha_prefetch_2, xor_alpha_prefetch_3, 843 + xor_alpha_prefetch_4, xor_alpha_prefetch_5); 844 + 843 845 struct xor_block_template xor_block_alpha_prefetch = { 844 - .name = "alpha prefetch", 845 - .do_2 = xor_alpha_prefetch_2, 846 - .do_3 = xor_alpha_prefetch_3, 847 - .do_4 = xor_alpha_prefetch_4, 848 - .do_5 = xor_alpha_prefetch_5, 846 + .name = "alpha prefetch", 847 + .xor_gen = xor_gen_alpha_prefetch, 849 848 };
+5 -44
lib/raid/xor/arm/xor-neon-glue.c
··· 5 5 #include "xor_impl.h" 6 6 #include "xor_arch.h" 7 7 8 - extern struct xor_block_template const xor_block_neon_inner; 9 - 10 - static void 11 - xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 12 - const unsigned long * __restrict p2) 8 + static void xor_gen_neon(void *dest, void **srcs, unsigned int src_cnt, 9 + unsigned int bytes) 13 10 { 14 11 kernel_neon_begin(); 15 - xor_block_neon_inner.do_2(bytes, p1, p2); 16 - kernel_neon_end(); 17 - } 18 - 19 - static void 20 - xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, 21 - const unsigned long * __restrict p2, 22 - const unsigned long * __restrict p3) 23 - { 24 - kernel_neon_begin(); 25 - xor_block_neon_inner.do_3(bytes, p1, p2, p3); 26 - kernel_neon_end(); 27 - } 28 - 29 - static void 30 - xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, 31 - const unsigned long * __restrict p2, 32 - const unsigned long * __restrict p3, 33 - const unsigned long * __restrict p4) 34 - { 35 - kernel_neon_begin(); 36 - xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); 37 - kernel_neon_end(); 38 - } 39 - 40 - static void 41 - xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, 42 - const unsigned long * __restrict p2, 43 - const unsigned long * __restrict p3, 44 - const unsigned long * __restrict p4, 45 - const unsigned long * __restrict p5) 46 - { 47 - kernel_neon_begin(); 48 - xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); 12 + xor_gen_neon_inner(dest, srcs, src_cnt, bytes); 49 13 kernel_neon_end(); 50 14 } 51 15 52 16 struct xor_block_template xor_block_neon = { 53 - .name = "neon", 54 - .do_2 = xor_neon_2, 55 - .do_3 = xor_neon_3, 56 - .do_4 = xor_neon_4, 57 - .do_5 = xor_neon_5 17 + .name = "neon", 18 + .xor_gen = xor_gen_neon, 58 19 };
+2 -7
lib/raid/xor/arm/xor-neon.c
··· 4 4 */ 5 5 6 6 #include "xor_impl.h" 7 + #include "xor_arch.h" 7 8 8 9 #ifndef __ARM_NEON__ 9 10 #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' ··· 23 22 #define NO_TEMPLATE 24 23 #include "../xor-8regs.c" 25 24 26 - struct xor_block_template const xor_block_neon_inner = { 27 - .name = "__inner_neon__", 28 - .do_2 = xor_8regs_2, 29 - .do_3 = xor_8regs_3, 30 - .do_4 = xor_8regs_4, 31 - .do_5 = xor_8regs_5, 32 - }; 25 + __DO_XOR_BLOCKS(neon_inner, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5);
+5 -5
lib/raid/xor/arm/xor.c
··· 127 127 } while (--lines); 128 128 } 129 129 130 + DO_XOR_BLOCKS(arm4regs, xor_arm4regs_2, xor_arm4regs_3, xor_arm4regs_4, 131 + xor_arm4regs_5); 132 + 130 133 struct xor_block_template xor_block_arm4regs = { 131 - .name = "arm4regs", 132 - .do_2 = xor_arm4regs_2, 133 - .do_3 = xor_arm4regs_3, 134 - .do_4 = xor_arm4regs_4, 135 - .do_5 = xor_arm4regs_5, 134 + .name = "arm4regs", 135 + .xor_gen = xor_gen_arm4regs, 136 136 };
+3
lib/raid/xor/arm/xor_arch.h
··· 7 7 extern struct xor_block_template xor_block_arm4regs; 8 8 extern struct xor_block_template xor_block_neon; 9 9 10 + void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt, 11 + unsigned int bytes); 12 + 10 13 static __always_inline void __init arch_xor_init(void) 11 14 { 12 15 xor_register(&xor_block_arm4regs);
+5 -39
lib/raid/xor/arm64/xor-neon-glue.c
··· 10 10 #include "xor-neon.h" 11 11 12 12 #define XOR_TEMPLATE(_name) \ 13 - static void \ 14 - xor_##_name##_2(unsigned long bytes, unsigned long * __restrict p1, \ 15 - const unsigned long * __restrict p2) \ 13 + static void xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \ 14 + unsigned int bytes) \ 16 15 { \ 17 16 scoped_ksimd() \ 18 - __xor_##_name##_2(bytes, p1, p2); \ 19 - } \ 20 - \ 21 - static void \ 22 - xor_##_name##_3(unsigned long bytes, unsigned long * __restrict p1, \ 23 - const unsigned long * __restrict p2, \ 24 - const unsigned long * __restrict p3) \ 25 - { \ 26 - scoped_ksimd() \ 27 - __xor_##_name##_3(bytes, p1, p2, p3); \ 28 - } \ 29 - \ 30 - static void \ 31 - xor_##_name##_4(unsigned long bytes, unsigned long * __restrict p1, \ 32 - const unsigned long * __restrict p2, \ 33 - const unsigned long * __restrict p3, \ 34 - const unsigned long * __restrict p4) \ 35 - { \ 36 - scoped_ksimd() \ 37 - __xor_##_name##_4(bytes, p1, p2, p3, p4); \ 38 - } \ 39 - \ 40 - static void \ 41 - xor_##_name##_5(unsigned long bytes, unsigned long * __restrict p1, \ 42 - const unsigned long * __restrict p2, \ 43 - const unsigned long * __restrict p3, \ 44 - const unsigned long * __restrict p4, \ 45 - const unsigned long * __restrict p5) \ 46 - { \ 47 - scoped_ksimd() \ 48 - __xor_##_name##_5(bytes, p1, p2, p3, p4, p5); \ 17 + xor_gen_##_name##_inner(dest, srcs, src_cnt, bytes); \ 49 18 } \ 50 19 \ 51 20 struct xor_block_template xor_block_##_name = { \ 52 - .name = __stringify(_name), \ 53 - .do_2 = xor_##_name##_2, \ 54 - .do_3 = xor_##_name##_3, \ 55 - .do_4 = xor_##_name##_4, \ 56 - .do_5 = xor_##_name##_5 \ 21 + .name = __stringify(_name), \ 22 + .xor_gen = xor_gen_##_name, \ 57 23 }; 58 24 59 25 XOR_TEMPLATE(neon);
+13 -7
lib/raid/xor/arm64/xor-neon.c
··· 10 10 #include "xor_arch.h" 11 11 #include "xor-neon.h" 12 12 13 - void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 13 + static void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 14 14 const unsigned long * __restrict p2) 15 15 { 16 16 uint64_t *dp1 = (uint64_t *)p1; ··· 37 37 } while (--lines > 0); 38 38 } 39 39 40 - void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, 40 + static void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, 41 41 const unsigned long * __restrict p2, 42 42 const unsigned long * __restrict p3) 43 43 { ··· 73 73 } while (--lines > 0); 74 74 } 75 75 76 - void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, 76 + static void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, 77 77 const unsigned long * __restrict p2, 78 78 const unsigned long * __restrict p3, 79 79 const unsigned long * __restrict p4) ··· 118 118 } while (--lines > 0); 119 119 } 120 120 121 - void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, 121 + static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, 122 122 const unsigned long * __restrict p2, 123 123 const unsigned long * __restrict p3, 124 124 const unsigned long * __restrict p4, ··· 172 172 } while (--lines > 0); 173 173 } 174 174 175 + __DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4, 176 + __xor_neon_5); 177 + 175 178 static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) 176 179 { 177 180 uint64x2_t res; ··· 185 182 return res; 186 183 } 187 184 188 - void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, 185 + static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, 189 186 const unsigned long * __restrict p2, 190 187 const unsigned long * __restrict p3) 191 188 { ··· 219 216 } while (--lines > 0); 220 217 } 221 218 222 - void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, 219 + static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, 223 220 const unsigned long * __restrict p2, 224 221 const unsigned long * __restrict p3, 225 222 const unsigned long * __restrict p4) ··· 262 259 } while (--lines > 0); 263 260 } 264 261 265 - void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, 262 + static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, 266 263 const unsigned long * __restrict p2, 267 264 const unsigned long * __restrict p3, 268 265 const unsigned long * __restrict p4, ··· 307 304 dp5 += 8; 308 305 } while (--lines > 0); 309 306 } 307 + 308 + __DO_XOR_BLOCKS(eor3_inner, __xor_neon_2, __xor_eor3_3, __xor_eor3_4, 309 + __xor_eor3_5);
+4 -28
lib/raid/xor/arm64/xor-neon.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 3 - void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 4 - const unsigned long * __restrict p2); 5 - void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, 6 - const unsigned long * __restrict p2, 7 - const unsigned long * __restrict p3); 8 - void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, 9 - const unsigned long * __restrict p2, 10 - const unsigned long * __restrict p3, 11 - const unsigned long * __restrict p4); 12 - void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, 13 - const unsigned long * __restrict p2, 14 - const unsigned long * __restrict p3, 15 - const unsigned long * __restrict p4, 16 - const unsigned long * __restrict p5); 17 - 18 - #define __xor_eor3_2 __xor_neon_2 19 - void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, 20 - const unsigned long * __restrict p2, 21 - const unsigned long * __restrict p3); 22 - void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, 23 - const unsigned long * __restrict p2, 24 - const unsigned long * __restrict p3, 25 - const unsigned long * __restrict p4); 26 - void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, 27 - const unsigned long * __restrict p2, 28 - const unsigned long * __restrict p3, 29 - const unsigned long * __restrict p4, 30 - const unsigned long * __restrict p5); 3 + void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt, 4 + unsigned int bytes); 5 + void xor_gen_eor3_inner(void *dest, void **srcs, unsigned int src_cnt, 6 + unsigned int bytes);
+11 -51
lib/raid/xor/loongarch/xor_simd_glue.c
··· 11 11 #include "xor_arch.h" 12 12 #include "xor_simd.h" 13 13 14 - #define MAKE_XOR_GLUE_2(flavor) \ 15 - static void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,\ 16 - const unsigned long * __restrict p2) \ 14 + #define MAKE_XOR_GLUES(flavor) \ 15 + DO_XOR_BLOCKS(flavor##_inner, __xor_##flavor##_2, __xor_##flavor##_3, \ 16 + __xor_##flavor##_4, __xor_##flavor##_5); \ 17 + \ 18 + static void xor_gen_##flavor(void *dest, void **srcs, unsigned int src_cnt, \ 19 + unsigned int bytes) \ 17 20 { \ 18 21 kernel_fpu_begin(); \ 19 - __xor_##flavor##_2(bytes, p1, p2); \ 22 + xor_gen_##flavor##_inner(dest, srcs, src_cnt, bytes); \ 20 23 kernel_fpu_end(); \ 21 24 } \ 22 - 23 - #define MAKE_XOR_GLUE_3(flavor) \ 24 - static void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,\ 25 - const unsigned long * __restrict p2, \ 26 - const unsigned long * __restrict p3) \ 27 - { \ 28 - kernel_fpu_begin(); \ 29 - __xor_##flavor##_3(bytes, p1, p2, p3); \ 30 - kernel_fpu_end(); \ 31 - } \ 32 - 33 - #define MAKE_XOR_GLUE_4(flavor) \ 34 - static void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,\ 35 - const unsigned long * __restrict p2, \ 36 - const unsigned long * __restrict p3, \ 37 - const unsigned long * __restrict p4) \ 38 - { \ 39 - kernel_fpu_begin(); \ 40 - __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ 41 - kernel_fpu_end(); \ 42 - } \ 43 - 44 - #define MAKE_XOR_GLUE_5(flavor) \ 45 - static void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,\ 46 - const unsigned long * __restrict p2, \ 47 - const unsigned long * __restrict p3, \ 48 - const unsigned long * __restrict p4, \ 49 - const unsigned long * __restrict p5) \ 50 - { \ 51 - kernel_fpu_begin(); \ 52 - __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ 53 - kernel_fpu_end(); \ 54 - } \ 55 - 56 - #define MAKE_XOR_GLUES(flavor) \ 57 - MAKE_XOR_GLUE_2(flavor); \ 58 - MAKE_XOR_GLUE_3(flavor); \ 59 - MAKE_XOR_GLUE_4(flavor); \ 60 - MAKE_XOR_GLUE_5(flavor); \ 61 - \ 62 - struct xor_block_template xor_block_##flavor = { \ 63 - .name = __stringify(flavor), \ 64 - .do_2 = xor_##flavor##_2, \ 65 - .do_3 = xor_##flavor##_3, \ 66 - .do_4 = xor_##flavor##_4, \ 67 - .do_5 = xor_##flavor##_5, \ 25 + \ 26 + struct xor_block_template xor_block_##flavor = { \ 27 + .name = __stringify(flavor), \ 28 + .xor_gen = xor_gen_##flavor \ 68 29 } 69 - 70 30 71 31 #ifdef CONFIG_CPU_HAS_LSX 72 32 MAKE_XOR_GLUES(lsx);
+22 -18
lib/raid/xor/powerpc/xor_vmx.c
··· 10 10 * Sparse (as at v0.5.0) gets very, very confused by this file. 11 11 * Make it a bit simpler for it. 12 12 */ 13 + #include "xor_impl.h" 13 14 #if !defined(__CHECKER__) 14 15 #include <altivec.h> 15 16 #else ··· 50 49 V1##_3 = vec_xor(V1##_3, V2##_3); \ 51 50 } while (0) 52 51 53 - void __xor_altivec_2(unsigned long bytes, 54 - unsigned long * __restrict v1_in, 55 - const unsigned long * __restrict v2_in) 52 + static void __xor_altivec_2(unsigned long bytes, 53 + unsigned long * __restrict v1_in, 54 + const unsigned long * __restrict v2_in) 56 55 { 57 56 DEFINE(v1); 58 57 DEFINE(v2); ··· 69 68 } while (--lines > 0); 70 69 } 71 70 72 - void __xor_altivec_3(unsigned long bytes, 73 - unsigned long * __restrict v1_in, 74 - const unsigned long * __restrict v2_in, 75 - const unsigned long * __restrict v3_in) 71 + static void __xor_altivec_3(unsigned long bytes, 72 + unsigned long * __restrict v1_in, 73 + const unsigned long * __restrict v2_in, 74 + const unsigned long * __restrict v3_in) 76 75 { 77 76 DEFINE(v1); 78 77 DEFINE(v2); ··· 93 92 } while (--lines > 0); 94 93 } 95 94 96 - void __xor_altivec_4(unsigned long bytes, 97 - unsigned long * __restrict v1_in, 98 - const unsigned long * __restrict v2_in, 99 - const unsigned long * __restrict v3_in, 100 - const unsigned long * __restrict v4_in) 95 + static void __xor_altivec_4(unsigned long bytes, 96 + unsigned long * __restrict v1_in, 97 + const unsigned long * __restrict v2_in, 98 + const unsigned long * __restrict v3_in, 99 + const unsigned long * __restrict v4_in) 101 100 { 102 101 DEFINE(v1); 103 102 DEFINE(v2); ··· 122 121 } while (--lines > 0); 123 122 } 124 123 125 - void __xor_altivec_5(unsigned long bytes, 126 - unsigned long * __restrict v1_in, 127 - const unsigned long * __restrict v2_in, 128 - const unsigned long * __restrict v3_in, 129 - const unsigned long * __restrict v4_in, 130 - const unsigned long * __restrict v5_in) 124 + static void __xor_altivec_5(unsigned long bytes, 125 + unsigned long * __restrict v1_in, 126 + const unsigned long * __restrict v2_in, 127 + const unsigned long * __restrict v3_in, 128 + const unsigned long * __restrict v4_in, 129 + const unsigned long * __restrict v5_in) 131 130 { 132 131 DEFINE(v1); 133 132 DEFINE(v2); ··· 155 154 v5 += 4; 156 155 } while (--lines > 0); 157 156 } 157 + 158 + __DO_XOR_BLOCKS(altivec_inner, __xor_altivec_2, __xor_altivec_3, 159 + __xor_altivec_4, __xor_altivec_5);
+2 -14
lib/raid/xor/powerpc/xor_vmx.h
··· 6 6 * outside of the enable/disable altivec block. 7 7 */ 8 8 9 - void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, 10 - const unsigned long * __restrict p2); 11 - void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, 12 - const unsigned long * __restrict p2, 13 - const unsigned long * __restrict p3); 14 - void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, 15 - const unsigned long * __restrict p2, 16 - const unsigned long * __restrict p3, 17 - const unsigned long * __restrict p4); 18 - void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, 19 - const unsigned long * __restrict p2, 20 - const unsigned long * __restrict p3, 21 - const unsigned long * __restrict p4, 22 - const unsigned long * __restrict p5); 9 + void xor_gen_altivec_inner(void *dest, void **srcs, unsigned int src_cnt, 10 + unsigned int bytes);
+5 -44
lib/raid/xor/powerpc/xor_vmx_glue.c
··· 12 12 #include "xor_arch.h" 13 13 #include "xor_vmx.h" 14 14 15 - static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, 16 - const unsigned long * __restrict p2) 15 + static void xor_gen_altivec(void *dest, void **srcs, unsigned int src_cnt, 16 + unsigned int bytes) 17 17 { 18 18 preempt_disable(); 19 19 enable_kernel_altivec(); 20 - __xor_altivec_2(bytes, p1, p2); 21 - disable_kernel_altivec(); 22 - preempt_enable(); 23 - } 24 - 25 - static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, 26 - const unsigned long * __restrict p2, 27 - const unsigned long * __restrict p3) 28 - { 29 - preempt_disable(); 30 - enable_kernel_altivec(); 31 - __xor_altivec_3(bytes, p1, p2, p3); 32 - disable_kernel_altivec(); 33 - preempt_enable(); 34 - } 35 - 36 - static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, 37 - const unsigned long * __restrict p2, 38 - const unsigned long * __restrict p3, 39 - const unsigned long * __restrict p4) 40 - { 41 - preempt_disable(); 42 - enable_kernel_altivec(); 43 - __xor_altivec_4(bytes, p1, p2, p3, p4); 44 - disable_kernel_altivec(); 45 - preempt_enable(); 46 - } 47 - 48 - static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, 49 - const unsigned long * __restrict p2, 50 - const unsigned long * __restrict p3, 51 - const unsigned long * __restrict p4, 52 - const unsigned long * __restrict p5) 53 - { 54 - preempt_disable(); 55 - enable_kernel_altivec(); 56 - __xor_altivec_5(bytes, p1, p2, p3, p4, p5); 20 + xor_gen_altivec_inner(dest, srcs, src_cnt, bytes); 57 21 disable_kernel_altivec(); 58 22 preempt_enable(); 59 23 } 60 24 61 25 struct xor_block_template xor_block_altivec = { 62 - .name = "altivec", 63 - .do_2 = xor_altivec_2, 64 - .do_3 = xor_altivec_3, 65 - .do_4 = xor_altivec_4, 66 - .do_5 = xor_altivec_5, 26 + .name = "altivec", 27 + .xor_gen = xor_gen_altivec, 67 28 };
+6 -37
lib/raid/xor/riscv/xor-glue.c
··· 9 9 #include "xor_impl.h" 10 10 #include "xor_arch.h" 11 11 12 - static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, 13 - const unsigned long *__restrict p2) 14 - { 15 - kernel_vector_begin(); 16 - xor_regs_2_(bytes, p1, p2); 17 - kernel_vector_end(); 18 - } 12 + DO_XOR_BLOCKS(vector_inner, xor_regs_2_, xor_regs_3_, xor_regs_4_, xor_regs_5_); 19 13 20 - static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, 21 - const unsigned long *__restrict p2, 22 - const unsigned long *__restrict p3) 14 + static void xor_gen_vector(void *dest, void **srcs, unsigned int src_cnt, 15 + unsigned int bytes) 23 16 { 24 17 kernel_vector_begin(); 25 - xor_regs_3_(bytes, p1, p2, p3); 26 - kernel_vector_end(); 27 - } 28 - 29 - static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, 30 - const unsigned long *__restrict p2, 31 - const unsigned long *__restrict p3, 32 - const unsigned long *__restrict p4) 33 - { 34 - kernel_vector_begin(); 35 - xor_regs_4_(bytes, p1, p2, p3, p4); 36 - kernel_vector_end(); 37 - } 38 - 39 - static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, 40 - const unsigned long *__restrict p2, 41 - const unsigned long *__restrict p3, 42 - const unsigned long *__restrict p4, 43 - const unsigned long *__restrict p5) 44 - { 45 - kernel_vector_begin(); 46 - xor_regs_5_(bytes, p1, p2, p3, p4, p5); 18 + xor_gen_vector_inner(dest, srcs, src_cnt, bytes); 47 19 kernel_vector_end(); 48 20 } 49 21 50 22 struct xor_block_template xor_block_rvv = { 51 - .name = "rvv", 52 - .do_2 = xor_vector_2, 53 - .do_3 = xor_vector_3, 54 - .do_4 = xor_vector_4, 55 - .do_5 = xor_vector_5 23 + .name = "rvv", 24 + .xor_gen = xor_gen_vector, 56 25 };
+4 -5
lib/raid/xor/s390/xor.c
··· 125 125 : : "0", "cc", "memory"); 126 126 } 127 127 128 + DO_XOR_BLOCKS(xc, xor_xc_2, xor_xc_3, xor_xc_4, xor_xc_5); 129 + 128 130 struct xor_block_template xor_block_xc = { 129 - .name = "xc", 130 - .do_2 = xor_xc_2, 131 - .do_3 = xor_xc_3, 132 - .do_4 = xor_xc_4, 133 - .do_5 = xor_xc_5, 131 + .name = "xc", 132 + .xor_gen = xor_gen_xc, 134 133 };
+4 -5
lib/raid/xor/sparc/xor-sparc32.c
··· 244 244 } while (--lines > 0); 245 245 } 246 246 247 + DO_XOR_BLOCKS(sparc32, sparc_2, sparc_3, sparc_4, sparc_5); 248 + 247 249 struct xor_block_template xor_block_SPARC = { 248 - .name = "SPARC", 249 - .do_2 = sparc_2, 250 - .do_3 = sparc_3, 251 - .do_4 = sparc_4, 252 - .do_5 = sparc_5, 250 + .name = "SPARC", 251 + .xor_gen = xor_gen_sparc32, 253 252 };
+9 -10
lib/raid/xor/sparc/xor-sparc64-glue.c
··· 28 28 29 29 /* XXX Ugh, write cheetah versions... -DaveM */ 30 30 31 + DO_XOR_BLOCKS(vis, xor_vis_2, xor_vis_3, xor_vis_4, xor_vis_5); 32 + 31 33 struct xor_block_template xor_block_VIS = { 32 - .name = "VIS", 33 - .do_2 = xor_vis_2, 34 - .do_3 = xor_vis_3, 35 - .do_4 = xor_vis_4, 36 - .do_5 = xor_vis_5, 34 + .name = "VIS", 35 + .xor_gen = xor_gen_vis, 37 36 }; 38 37 39 38 void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1, ··· 50 51 const unsigned long * __restrict p4, 51 52 const unsigned long * __restrict p5); 52 53 54 + DO_XOR_BLOCKS(niagara, xor_niagara_2, xor_niagara_3, xor_niagara_4, 55 + xor_niagara_5); 56 + 53 57 struct xor_block_template xor_block_niagara = { 54 - .name = "Niagara", 55 - .do_2 = xor_niagara_2, 56 - .do_3 = xor_niagara_3, 57 - .do_4 = xor_niagara_4, 58 - .do_5 = xor_niagara_5, 58 + .name = "Niagara", 59 + .xor_gen = xor_gen_niagara, 59 60 };
+10 -19
lib/raid/xor/x86/xor-avx.c
··· 29 29 { 30 30 unsigned long lines = bytes >> 9; 31 31 32 - kernel_fpu_begin(); 33 - 34 32 while (lines--) { 35 33 #undef BLOCK 36 34 #define BLOCK(i, reg) \ ··· 45 47 p0 = (unsigned long *)((uintptr_t)p0 + 512); 46 48 p1 = (unsigned long *)((uintptr_t)p1 + 512); 47 49 } 48 - 49 - kernel_fpu_end(); 50 50 } 51 51 52 52 static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, ··· 52 56 const unsigned long * __restrict p2) 53 57 { 54 58 unsigned long lines = bytes >> 9; 55 - 56 - kernel_fpu_begin(); 57 59 58 60 while (lines--) { 59 61 #undef BLOCK ··· 72 78 p1 = (unsigned long *)((uintptr_t)p1 + 512); 73 79 p2 = (unsigned long *)((uintptr_t)p2 + 512); 74 80 } 75 - 76 - kernel_fpu_end(); 77 81 } 78 82 79 83 static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, ··· 80 88 const unsigned long * __restrict p3) 81 89 { 82 90 unsigned long lines = bytes >> 9; 83 - 84 - kernel_fpu_begin(); 85 91 86 92 while (lines--) { 87 93 #undef BLOCK ··· 103 113 p2 = (unsigned long *)((uintptr_t)p2 + 512); 104 114 p3 = (unsigned long *)((uintptr_t)p3 + 512); 105 115 } 106 - 107 - kernel_fpu_end(); 108 116 } 109 117 110 118 static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, ··· 112 124 const unsigned long * __restrict p4) 113 125 { 114 126 unsigned long lines = bytes >> 9; 115 - 116 - kernel_fpu_begin(); 117 127 118 128 while (lines--) { 119 129 #undef BLOCK ··· 138 152 p3 = (unsigned long *)((uintptr_t)p3 + 512); 139 153 p4 = (unsigned long *)((uintptr_t)p4 + 512); 140 154 } 155 + } 141 156 157 + DO_XOR_BLOCKS(avx_inner, xor_avx_2, xor_avx_3, xor_avx_4, xor_avx_5); 158 + 159 + static void xor_gen_avx(void *dest, void **srcs, unsigned int src_cnt, 160 + unsigned int bytes) 161 + { 162 + kernel_fpu_begin(); 163 + xor_gen_avx_inner(dest, srcs, src_cnt, bytes); 142 164 kernel_fpu_end(); 143 165 } 144 166 145 167 struct xor_block_template xor_block_avx = { 146 - .name = "avx", 147 - .do_2 = xor_avx_2, 148 - .do_3 = xor_avx_3, 149 - .do_4 = xor_avx_4, 150 - .do_5 = xor_avx_5, 168 + .name = "avx", 169 + .xor_gen = xor_gen_avx, 151 170 };
+24 -40
lib/raid/xor/x86/xor-mmx.c
··· 21 21 { 22 22 unsigned long lines = bytes >> 7; 23 23 24 - kernel_fpu_begin(); 25 - 26 24 asm volatile( 27 25 #undef BLOCK 28 26 #define BLOCK(i) \ ··· 53 55 "+r" (p1), "+r" (p2) 54 56 : 55 57 : "memory"); 56 - 57 - kernel_fpu_end(); 58 58 } 59 59 60 60 static void ··· 61 65 const unsigned long * __restrict p3) 62 66 { 63 67 unsigned long lines = bytes >> 7; 64 - 65 - kernel_fpu_begin(); 66 68 67 69 asm volatile( 68 70 #undef BLOCK ··· 99 105 "+r" (p1), "+r" (p2), "+r" (p3) 100 106 : 101 107 : "memory"); 102 - 103 - kernel_fpu_end(); 104 108 } 105 109 106 110 static void ··· 108 116 const unsigned long * __restrict p4) 109 117 { 110 118 unsigned long lines = bytes >> 7; 111 - 112 - kernel_fpu_begin(); 113 119 114 120 asm volatile( 115 121 #undef BLOCK ··· 151 161 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 152 162 : 153 163 : "memory"); 154 - 155 - kernel_fpu_end(); 156 164 } 157 165 158 166 ··· 162 174 const unsigned long * __restrict p5) 163 175 { 164 176 unsigned long lines = bytes >> 7; 165 - 166 - kernel_fpu_begin(); 167 177 168 178 /* Make sure GCC forgets anything it knows about p4 or p5, 169 179 such that it won't pass to the asm volatile below a ··· 223 237 Clobber them just to be sure nobody does something stupid 224 238 like assuming they have some legal value. */ 225 239 asm("" : "=r" (p4), "=r" (p5)); 226 - 227 - kernel_fpu_end(); 228 240 } 229 241 230 242 #undef LD ··· 238 254 const unsigned long * __restrict p2) 239 255 { 240 256 unsigned long lines = bytes >> 6; 241 - 242 - kernel_fpu_begin(); 243 257 244 258 asm volatile( 245 259 " .align 32 ;\n" ··· 275 293 "+r" (p1), "+r" (p2) 276 294 : 277 295 : "memory"); 278 - 279 - kernel_fpu_end(); 280 296 } 281 297 282 298 static void ··· 283 303 const unsigned long * __restrict p3) 284 304 { 285 305 unsigned long lines = bytes >> 6; 286 - 287 - kernel_fpu_begin(); 288 306 289 307 asm volatile( 290 308 " .align 32,0x90 ;\n" ··· 329 351 "+r" (p1), "+r" (p2), "+r" (p3) 330 352 : 331 353 : "memory" ); 332 - 333 - kernel_fpu_end(); 334 354 } 335 355 336 356 static void ··· 338 362 const unsigned long * __restrict p4) 339 363 { 340 364 unsigned long lines = bytes >> 6; 341 - 342 - kernel_fpu_begin(); 343 365 344 366 asm volatile( 345 367 " .align 32,0x90 ;\n" ··· 393 419 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 394 420 : 395 421 : "memory"); 396 - 397 - kernel_fpu_end(); 398 422 } 399 423 400 424 static void ··· 403 431 const unsigned long * __restrict p5) 404 432 { 405 433 unsigned long lines = bytes >> 6; 406 - 407 - kernel_fpu_begin(); 408 434 409 435 /* Make sure GCC forgets anything it knows about p4 or p5, 410 436 such that it won't pass to the asm volatile below a ··· 480 510 Clobber them just to be sure nobody does something stupid 481 511 like assuming they have some legal value. */ 482 512 asm("" : "=r" (p4), "=r" (p5)); 513 + } 483 514 515 + DO_XOR_BLOCKS(pII_mmx_inner, xor_pII_mmx_2, xor_pII_mmx_3, xor_pII_mmx_4, 516 + xor_pII_mmx_5); 517 + 518 + static void xor_gen_pII_mmx(void *dest, void **srcs, unsigned int src_cnt, 519 + unsigned int bytes) 520 + { 521 + kernel_fpu_begin(); 522 + xor_gen_pII_mmx_inner(dest, srcs, src_cnt, bytes); 484 523 kernel_fpu_end(); 485 524 } 486 525 487 526 struct xor_block_template xor_block_pII_mmx = { 488 - .name = "pII_mmx", 489 - .do_2 = xor_pII_mmx_2, 490 - .do_3 = xor_pII_mmx_3, 491 - .do_4 = xor_pII_mmx_4, 492 - .do_5 = xor_pII_mmx_5, 527 + .name = "pII_mmx", 528 + .xor_gen = xor_gen_pII_mmx, 493 529 }; 494 530 531 + DO_XOR_BLOCKS(p5_mmx_inner, xor_p5_mmx_2, xor_p5_mmx_3, xor_p5_mmx_4, 532 + xor_p5_mmx_5); 533 + 534 + static void xor_gen_p5_mmx(void *dest, void **srcs, unsigned int src_cnt, 535 + unsigned int bytes) 536 + { 537 + kernel_fpu_begin(); 538 + xor_gen_p5_mmx_inner(dest, srcs, src_cnt, bytes); 539 + kernel_fpu_end(); 540 + } 541 + 495 542 struct xor_block_template xor_block_p5_mmx = { 496 - .name = "p5_mmx", 497 - .do_2 = xor_p5_mmx_2, 498 - .do_3 = xor_p5_mmx_3, 499 - .do_4 = xor_p5_mmx_4, 500 - .do_5 = xor_p5_mmx_5, 543 + .name = "p5_mmx", 544 + .xor_gen = xor_gen_p5_mmx, 501 545 };
+23 -40
lib/raid/xor/x86/xor-sse.c
··· 51 51 { 52 52 unsigned long lines = bytes >> 8; 53 53 54 - kernel_fpu_begin(); 55 - 56 54 asm volatile( 57 55 #undef BLOCK 58 56 #define BLOCK(i) \ ··· 91 93 [p1] "+r" (p1), [p2] "+r" (p2) 92 94 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 93 95 : "memory"); 94 - 95 - kernel_fpu_end(); 96 96 } 97 97 98 98 static void ··· 98 102 const unsigned long * __restrict p2) 99 103 { 100 104 unsigned long lines = bytes >> 8; 101 - 102 - kernel_fpu_begin(); 103 105 104 106 asm volatile( 105 107 #undef BLOCK ··· 122 128 [p1] "+r" (p1), [p2] "+r" (p2) 123 129 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 124 130 : "memory"); 125 - 126 - kernel_fpu_end(); 127 131 } 128 132 129 133 static void ··· 130 138 const unsigned long * __restrict p3) 131 139 { 132 140 unsigned long lines = bytes >> 8; 133 - 134 - kernel_fpu_begin(); 135 141 136 142 asm volatile( 137 143 #undef BLOCK ··· 178 188 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) 179 189 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 180 190 : "memory"); 181 - 182 - kernel_fpu_end(); 183 191 } 184 192 185 193 static void ··· 186 198 const unsigned long * __restrict p3) 187 199 { 188 200 unsigned long lines = bytes >> 8; 189 - 190 - kernel_fpu_begin(); 191 201 192 202 asm volatile( 193 203 #undef BLOCK ··· 212 226 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) 213 227 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 214 228 : "memory"); 215 - 216 - kernel_fpu_end(); 217 229 } 218 230 219 231 static void ··· 221 237 const unsigned long * __restrict p4) 222 238 { 223 239 unsigned long lines = bytes >> 8; 224 - 225 - kernel_fpu_begin(); 226 240 227 241 asm volatile( 228 242 #undef BLOCK ··· 276 294 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) 277 295 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 278 296 : "memory"); 279 - 280 - kernel_fpu_end(); 281 297 } 282 298 283 299 static void ··· 285 305 const unsigned long * __restrict p4) 286 306 { 287 307 unsigned long lines = bytes >> 8; 288 - 289 - kernel_fpu_begin(); 290 308 291 309 asm volatile( 292 310 #undef BLOCK ··· 313 335 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) 314 336 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 315 337 : "memory"); 316 - 317 - kernel_fpu_end(); 318 338 } 319 339 320 340 static void ··· 323 347 const unsigned long * __restrict p5) 324 348 { 325 349 unsigned long lines = bytes >> 8; 326 - 327 - kernel_fpu_begin(); 328 350 329 351 asm volatile( 330 352 #undef BLOCK ··· 385 411 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) 386 412 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 387 413 : "memory"); 388 - 389 - kernel_fpu_end(); 390 414 } 391 415 392 416 static void ··· 395 423 const unsigned long * __restrict p5) 396 424 { 397 425 unsigned long lines = bytes >> 8; 398 - 399 - kernel_fpu_begin(); 400 426 401 427 asm volatile( 402 428 #undef BLOCK ··· 425 455 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) 426 456 : [inc] XOR_CONSTANT_CONSTRAINT (256UL) 427 457 : "memory"); 458 + } 428 459 460 + DO_XOR_BLOCKS(sse_inner, xor_sse_2, xor_sse_3, xor_sse_4, xor_sse_5); 461 + 462 + static void xor_gen_sse(void *dest, void **srcs, unsigned int src_cnt, 463 + unsigned int bytes) 464 + { 465 + kernel_fpu_begin(); 466 + xor_gen_sse_inner(dest, srcs, src_cnt, bytes); 429 467 kernel_fpu_end(); 430 468 } 431 469 432 470 struct xor_block_template xor_block_sse = { 433 - .name = "sse", 434 - .do_2 = xor_sse_2, 435 - .do_3 = xor_sse_3, 436 - .do_4 = xor_sse_4, 437 - .do_5 = xor_sse_5, 471 + .name = "sse", 472 + .xor_gen = xor_gen_sse, 438 473 }; 439 474 475 + DO_XOR_BLOCKS(sse_pf64_inner, xor_sse_2_pf64, xor_sse_3_pf64, xor_sse_4_pf64, 476 + xor_sse_5_pf64); 477 + 478 + static void xor_gen_sse_pf64(void *dest, void **srcs, unsigned int src_cnt, 479 + unsigned int bytes) 480 + { 481 + kernel_fpu_begin(); 482 + xor_gen_sse_pf64_inner(dest, srcs, src_cnt, bytes); 483 + kernel_fpu_end(); 484 + } 485 + 440 486 struct xor_block_template xor_block_sse_pf64 = { 441 - .name = "prefetch64-sse", 442 - .do_2 = xor_sse_2_pf64, 443 - .do_3 = xor_sse_3_pf64, 444 - .do_4 = xor_sse_4_pf64, 445 - .do_5 = xor_sse_5_pf64, 487 + .name = "prefetch64-sse", 488 + .xor_gen = xor_gen_sse_pf64, 446 489 };
+5 -5
lib/raid/xor/xor-32regs-prefetch.c
··· 258 258 goto once_more; 259 259 } 260 260 261 + DO_XOR_BLOCKS(32regs_p, xor_32regs_p_2, xor_32regs_p_3, xor_32regs_p_4, 262 + xor_32regs_p_5); 263 + 261 264 struct xor_block_template xor_block_32regs_p = { 262 - .name = "32regs_prefetch", 263 - .do_2 = xor_32regs_p_2, 264 - .do_3 = xor_32regs_p_3, 265 - .do_4 = xor_32regs_p_4, 266 - .do_5 = xor_32regs_p_5, 265 + .name = "32regs_prefetch", 266 + .xor_gen = xor_gen_32regs_p, 267 267 };
+4 -5
lib/raid/xor/xor-32regs.c
··· 209 209 } while (--lines > 0); 210 210 } 211 211 212 + DO_XOR_BLOCKS(32regs, xor_32regs_2, xor_32regs_3, xor_32regs_4, xor_32regs_5); 213 + 212 214 struct xor_block_template xor_block_32regs = { 213 - .name = "32regs", 214 - .do_2 = xor_32regs_2, 215 - .do_3 = xor_32regs_3, 216 - .do_4 = xor_32regs_4, 217 - .do_5 = xor_32regs_5, 215 + .name = "32regs", 216 + .xor_gen = xor_gen_32regs, 218 217 };
+6 -5
lib/raid/xor/xor-8regs-prefetch.c
··· 136 136 goto once_more; 137 137 } 138 138 139 + 140 + DO_XOR_BLOCKS(8regs_p, xor_8regs_p_2, xor_8regs_p_3, xor_8regs_p_4, 141 + xor_8regs_p_5); 142 + 139 143 struct xor_block_template xor_block_8regs_p = { 140 - .name = "8regs_prefetch", 141 - .do_2 = xor_8regs_p_2, 142 - .do_3 = xor_8regs_p_3, 143 - .do_4 = xor_8regs_p_4, 144 - .do_5 = xor_8regs_p_5, 144 + .name = "8regs_prefetch", 145 + .xor_gen = xor_gen_8regs_p, 145 146 };
+4 -5
lib/raid/xor/xor-8regs.c
··· 94 94 } 95 95 96 96 #ifndef NO_TEMPLATE 97 + DO_XOR_BLOCKS(8regs, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5); 98 + 97 99 struct xor_block_template xor_block_8regs = { 98 - .name = "8regs", 99 - .do_2 = xor_8regs_2, 100 - .do_3 = xor_8regs_3, 101 - .do_4 = xor_8regs_4, 102 - .do_5 = xor_8regs_5, 100 + .name = "8regs", 101 + .xor_gen = xor_gen_8regs, 103 102 }; 104 103 #endif /* NO_TEMPLATE */
+5 -43
lib/raid/xor/xor-core.c
··· 13 13 #include <linux/preempt.h> 14 14 #include "xor_impl.h" 15 15 16 - /* The xor routines to use. */ 16 + /* The xor routine to use. */ 17 17 static struct xor_block_template *active_template; 18 - 19 - void 20 - xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) 21 - { 22 - unsigned long *p1, *p2, *p3, *p4; 23 - 24 - WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 25 - 26 - p1 = (unsigned long *) srcs[0]; 27 - if (src_count == 1) { 28 - active_template->do_2(bytes, dest, p1); 29 - return; 30 - } 31 - 32 - p2 = (unsigned long *) srcs[1]; 33 - if (src_count == 2) { 34 - active_template->do_3(bytes, dest, p1, p2); 35 - return; 36 - } 37 - 38 - p3 = (unsigned long *) srcs[2]; 39 - if (src_count == 3) { 40 - active_template->do_4(bytes, dest, p1, p2, p3); 41 - return; 42 - } 43 - 44 - p4 = (unsigned long *) srcs[3]; 45 - active_template->do_5(bytes, dest, p1, p2, p3, p4); 46 - } 47 - EXPORT_SYMBOL(xor_blocks); 48 18 49 19 /** 50 20 * xor_gen - generate RAID-style XOR information ··· 33 63 */ 34 64 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes) 35 65 { 36 - unsigned int src_off = 0; 37 - 38 - WARN_ON_ONCE(in_interrupt()); 66 + WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 39 67 WARN_ON_ONCE(bytes == 0); 40 68 WARN_ON_ONCE(bytes & 511); 41 69 42 - while (src_cnt > 0) { 43 - unsigned int this_cnt = min(src_cnt, MAX_XOR_BLOCKS); 44 - 45 - xor_blocks(this_cnt, bytes, dest, srcs + src_off); 46 - 47 - src_cnt -= this_cnt; 48 - src_off += this_cnt; 49 - } 70 + active_template->xor_gen(dest, srcs, src_cnt, bytes); 50 71 } 51 72 EXPORT_SYMBOL(xor_gen); 52 73 ··· 81 120 int speed; 82 121 unsigned long reps; 83 122 ktime_t min, start, t0; 123 + void *srcs[1] = { b2 }; 84 124 85 125 preempt_disable(); 86 126 ··· 92 130 cpu_relax(); 93 131 do { 94 132 mb(); /* prevent loop optimization */ 95 - tmpl->do_2(BENCH_SIZE, b1, b2); 133 + tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE); 96 134 mb(); 97 135 } while (reps++ < REPS || (t0 = ktime_get()) == start); 98 136 min = ktime_sub(t0, start);
+34 -14
lib/raid/xor/xor_impl.h
··· 3 3 #define _XOR_IMPL_H 4 4 5 5 #include <linux/init.h> 6 + #include <linux/minmax.h> 6 7 7 8 struct xor_block_template { 8 9 struct xor_block_template *next; 9 10 const char *name; 10 11 int speed; 11 - void (*do_2)(unsigned long, unsigned long * __restrict, 12 - const unsigned long * __restrict); 13 - void (*do_3)(unsigned long, unsigned long * __restrict, 14 - const unsigned long * __restrict, 15 - const unsigned long * __restrict); 16 - void (*do_4)(unsigned long, unsigned long * __restrict, 17 - const unsigned long * __restrict, 18 - const unsigned long * __restrict, 19 - const unsigned long * __restrict); 20 - void (*do_5)(unsigned long, unsigned long * __restrict, 21 - const unsigned long * __restrict, 22 - const unsigned long * __restrict, 23 - const unsigned long * __restrict, 24 - const unsigned long * __restrict); 12 + void (*xor_gen)(void *dest, void **srcs, unsigned int src_cnt, 13 + unsigned int bytes); 25 14 }; 15 + 16 + #define __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) \ 17 + void \ 18 + xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \ 19 + unsigned int bytes) \ 20 + { \ 21 + unsigned int src_off = 0; \ 22 + \ 23 + while (src_cnt > 0) { \ 24 + unsigned int this_cnt = min(src_cnt, 4); \ 25 + \ 26 + if (this_cnt == 1) \ 27 + _handle1(bytes, dest, srcs[src_off]); \ 28 + else if (this_cnt == 2) \ 29 + _handle2(bytes, dest, srcs[src_off], \ 30 + srcs[src_off + 1]); \ 31 + else if (this_cnt == 3) \ 32 + _handle3(bytes, dest, srcs[src_off], \ 33 + srcs[src_off + 1], srcs[src_off + 2]); \ 34 + else \ 35 + _handle4(bytes, dest, srcs[src_off], \ 36 + srcs[src_off + 1], srcs[src_off + 2], \ 37 + srcs[src_off + 3]); \ 38 + \ 39 + src_cnt -= this_cnt; \ 40 + src_off += this_cnt; \ 41 + } \ 42 + } 43 + 44 + #define DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) \ 45 + static __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) 26 46 27 47 /* generic implementations */ 28 48 extern struct xor_block_template xor_block_8regs;