Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

percpu: Wire up cmpxchg128

In order to replace cmpxchg_double() with the newly minted
cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.654945124@infradead.org

+240 -39
+20
arch/arm64/include/asm/percpu.h
··· 140 140 * re-enabling preemption for preemptible kernels, but doing that in a way 141 141 * which builds inside a module would mean messing directly with the preempt 142 142 * count. If you do this, peterz and tglx will hunt you down. 143 + * 144 + * Not to mention it'll break the actual preemption model for missing a 145 + * preemption point when TIF_NEED_RESCHED gets set while preemption is 146 + * disabled. 143 147 */ 144 148 #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ 145 149 ({ \ ··· 243 239 _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) 244 240 #define this_cpu_cmpxchg_8(pcp, o, n) \ 245 241 _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) 242 + 243 + #define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) 244 + 245 + #define this_cpu_cmpxchg128(pcp, o, n) \ 246 + ({ \ 247 + typedef typeof(pcp) pcp_op_T__; \ 248 + u128 old__, new__, ret__; \ 249 + pcp_op_T__ *ptr__; \ 250 + old__ = o; \ 251 + new__ = n; \ 252 + preempt_disable_notrace(); \ 253 + ptr__ = raw_cpu_ptr(&(pcp)); \ 254 + ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \ 255 + preempt_enable_notrace(); \ 256 + ret__; \ 257 + }) 246 258 247 259 #ifdef __KVM_NVHE_HYPERVISOR__ 248 260 extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
+16
arch/s390/include/asm/percpu.h
··· 148 148 #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) 149 149 #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) 150 150 151 + #define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) 152 + 153 + #define this_cpu_cmpxchg128(pcp, oval, nval) \ 154 + ({ \ 155 + typedef typeof(pcp) pcp_op_T__; \ 156 + u128 old__, new__, ret__; \ 157 + pcp_op_T__ *ptr__; \ 158 + old__ = oval; \ 159 + new__ = nval; \ 160 + preempt_disable_notrace(); \ 161 + ptr__ = raw_cpu_ptr(&(pcp)); \ 162 + ret__ = cmpxchg128((void *)ptr__, old__, new__); \ 163 + preempt_enable_notrace(); \ 164 + ret__; \ 165 + }) 166 + 151 167 #define arch_this_cpu_xchg(pcp, nval) \ 152 168 ({ \ 153 169 typeof(pcp) *ptr__; \
+68 -6
arch/x86/include/asm/percpu.h
··· 210 210 (typeof(_var))(unsigned long) pco_old__; \ 211 211 }) 212 212 213 + #if defined(CONFIG_X86_32) && !defined(CONFIG_UML) 214 + #define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ 215 + ({ \ 216 + union { \ 217 + u64 var; \ 218 + struct { \ 219 + u32 low, high; \ 220 + }; \ 221 + } old__, new__; \ 222 + \ 223 + old__.var = _oval; \ 224 + new__.var = _nval; \ 225 + \ 226 + asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \ 227 + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ 228 + : [var] "+m" (_var), \ 229 + "+a" (old__.low), \ 230 + "+d" (old__.high) \ 231 + : "b" (new__.low), \ 232 + "c" (new__.high) \ 233 + : "memory", "esi"); \ 234 + \ 235 + old__.var; \ 236 + }) 237 + 238 + #define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) 239 + #define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) 240 + #endif 241 + 242 + #ifdef CONFIG_X86_64 243 + #define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); 244 + #define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); 245 + 246 + #define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ 247 + ({ \ 248 + union { \ 249 + u128 var; \ 250 + struct { \ 251 + u64 low, high; \ 252 + }; \ 253 + } old__, new__; \ 254 + \ 255 + old__.var = _oval; \ 256 + new__.var = _nval; \ 257 + \ 258 + asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \ 259 + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ 260 + : [var] "+m" (_var), \ 261 + "+a" (old__.low), \ 262 + "+d" (old__.high) \ 263 + : "b" (new__.low), \ 264 + "c" (new__.high) \ 265 + : "memory", "rsi"); \ 266 + \ 267 + old__.var; \ 268 + }) 269 + 270 + #define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) 271 + #define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) 272 + #endif 273 + 213 274 /* 214 275 * this_cpu_read() makes gcc load the percpu variable every time it is 215 276 * accessed while this_cpu_read_stable() allows the value to be cached. ··· 402 341 bool __ret; \ 403 342 typeof(pcp1) __o1 = (o1), __n1 = (n1); \ 404 343 typeof(pcp2) __o2 = (o2), __n2 = (n2); \ 405 - alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ 406 - "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ 407 - X86_FEATURE_CX16, \ 408 - ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ 409 - "+m" (pcp2), "+d" (__o2)), \ 410 - "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ 344 + asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \ 345 + "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \ 346 + "setz %0" \ 347 + : "=a" (__ret), "+m" (pcp1) \ 348 + : "b" (__n1), "c" (__n2), \ 349 + "a" (__o1), "d" (__o2) \ 350 + : "memory", "rsi"); \ 411 351 __ret; \ 412 352 }) 413 353
+2 -1
arch/x86/lib/Makefile
··· 61 61 lib-y += strstr_32.o 62 62 lib-y += string_32.o 63 63 lib-y += memmove_32.o 64 + lib-y += cmpxchg8b_emu.o 64 65 ifneq ($(CONFIG_X86_CMPXCHG64),y) 65 - lib-y += cmpxchg8b_emu.o atomic64_386_32.o 66 + lib-y += atomic64_386_32.o 66 67 endif 67 68 else 68 69 obj-y += iomap_copy_64.o
+25 -18
arch/x86/lib/cmpxchg16b_emu.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 #include <linux/linkage.h> 3 3 #include <asm/percpu.h> 4 + #include <asm/processor-flags.h> 4 5 5 6 .text 6 7 7 8 /* 9 + * Emulate 'cmpxchg16b %gs:(%rsi)' 10 + * 8 11 * Inputs: 9 12 * %rsi : memory location to compare 10 13 * %rax : low 64 bits of old value 11 14 * %rdx : high 64 bits of old value 12 15 * %rbx : low 64 bits of new value 13 16 * %rcx : high 64 bits of new value 14 - * %al : Operation successful 17 + * 18 + * Notably this is not LOCK prefixed and is not safe against NMIs 15 19 */ 16 20 SYM_FUNC_START(this_cpu_cmpxchg16b_emu) 17 21 18 - # 19 - # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not 20 - # via the ZF. Caller will access %al to get result. 21 - # 22 - # Note that this is only useful for a cpuops operation. Meaning that we 23 - # do *not* have a fully atomic operation but just an operation that is 24 - # *atomic* on a single cpu (as provided by the this_cpu_xx class of 25 - # macros). 26 - # 27 22 pushfq 28 23 cli 29 24 30 - cmpq PER_CPU_VAR((%rsi)), %rax 31 - jne .Lnot_same 32 - cmpq PER_CPU_VAR(8(%rsi)), %rdx 33 - jne .Lnot_same 25 + /* if (*ptr == old) */ 26 + cmpq PER_CPU_VAR(0(%rsi)), %rax 27 + jne .Lnot_same 28 + cmpq PER_CPU_VAR(8(%rsi)), %rdx 29 + jne .Lnot_same 34 30 35 - movq %rbx, PER_CPU_VAR((%rsi)) 36 - movq %rcx, PER_CPU_VAR(8(%rsi)) 31 + /* *ptr = new */ 32 + movq %rbx, PER_CPU_VAR(0(%rsi)) 33 + movq %rcx, PER_CPU_VAR(8(%rsi)) 34 + 35 + /* set ZF in EFLAGS to indicate success */ 36 + orl $X86_EFLAGS_ZF, (%rsp) 37 37 38 38 popfq 39 - mov $1, %al 40 39 RET 41 40 42 41 .Lnot_same: 42 + /* *ptr != old */ 43 + 44 + /* old = *ptr */ 45 + movq PER_CPU_VAR(0(%rsi)), %rax 46 + movq PER_CPU_VAR(8(%rsi)), %rdx 47 + 48 + /* clear ZF in EFLAGS to indicate failure */ 49 + andl $(~X86_EFLAGS_ZF), (%rsp) 50 + 43 51 popfq 44 - xor %al,%al 45 52 RET 46 53 47 54 SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
+53 -14
arch/x86/lib/cmpxchg8b_emu.S
··· 2 2 3 3 #include <linux/linkage.h> 4 4 #include <asm/export.h> 5 + #include <asm/percpu.h> 6 + #include <asm/processor-flags.h> 5 7 6 8 .text 7 9 10 + #ifndef CONFIG_X86_CMPXCHG64 11 + 8 12 /* 13 + * Emulate 'cmpxchg8b (%esi)' on UP 14 + * 9 15 * Inputs: 10 16 * %esi : memory location to compare 11 17 * %eax : low 32 bits of old value ··· 21 15 */ 22 16 SYM_FUNC_START(cmpxchg8b_emu) 23 17 24 - # 25 - # Emulate 'cmpxchg8b (%esi)' on UP except we don't 26 - # set the whole ZF thing (caller will just compare 27 - # eax:edx with the expected value) 28 - # 29 18 pushfl 30 19 cli 31 20 32 - cmpl (%esi), %eax 33 - jne .Lnot_same 34 - cmpl 4(%esi), %edx 35 - jne .Lhalf_same 21 + cmpl 0(%esi), %eax 22 + jne .Lnot_same 23 + cmpl 4(%esi), %edx 24 + jne .Lnot_same 36 25 37 - movl %ebx, (%esi) 38 - movl %ecx, 4(%esi) 26 + movl %ebx, 0(%esi) 27 + movl %ecx, 4(%esi) 28 + 29 + orl $X86_EFLAGS_ZF, (%esp) 39 30 40 31 popfl 41 32 RET 42 33 43 34 .Lnot_same: 44 - movl (%esi), %eax 45 - .Lhalf_same: 46 - movl 4(%esi), %edx 35 + movl 0(%esi), %eax 36 + movl 4(%esi), %edx 37 + 38 + andl $(~X86_EFLAGS_ZF), (%esp) 47 39 48 40 popfl 49 41 RET 50 42 51 43 SYM_FUNC_END(cmpxchg8b_emu) 52 44 EXPORT_SYMBOL(cmpxchg8b_emu) 45 + 46 + #endif 47 + 48 + #ifndef CONFIG_UML 49 + 50 + SYM_FUNC_START(this_cpu_cmpxchg8b_emu) 51 + 52 + pushfl 53 + cli 54 + 55 + cmpl PER_CPU_VAR(0(%esi)), %eax 56 + jne .Lnot_same2 57 + cmpl PER_CPU_VAR(4(%esi)), %edx 58 + jne .Lnot_same2 59 + 60 + movl %ebx, PER_CPU_VAR(0(%esi)) 61 + movl %ecx, PER_CPU_VAR(4(%esi)) 62 + 63 + orl $X86_EFLAGS_ZF, (%esp) 64 + 65 + popfl 66 + RET 67 + 68 + .Lnot_same2: 69 + movl PER_CPU_VAR(0(%esi)), %eax 70 + movl PER_CPU_VAR(4(%esi)), %edx 71 + 72 + andl $(~X86_EFLAGS_ZF), (%esp) 73 + 74 + popfl 75 + RET 76 + 77 + SYM_FUNC_END(this_cpu_cmpxchg8b_emu) 78 + 79 + #endif
+56
include/asm-generic/percpu.h
··· 350 350 #endif 351 351 #endif 352 352 353 + #ifndef raw_cpu_try_cmpxchg64 354 + #ifdef raw_cpu_cmpxchg64 355 + #define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \ 356 + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg64) 357 + #else 358 + #define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \ 359 + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) 360 + #endif 361 + #endif 362 + #ifndef raw_cpu_try_cmpxchg128 363 + #ifdef raw_cpu_cmpxchg128 364 + #define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \ 365 + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg128) 366 + #else 367 + #define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \ 368 + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) 369 + #endif 370 + #endif 371 + 353 372 #ifndef raw_cpu_cmpxchg_1 354 373 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \ 355 374 raw_cpu_generic_cmpxchg(pcp, oval, nval) ··· 383 364 #endif 384 365 #ifndef raw_cpu_cmpxchg_8 385 366 #define raw_cpu_cmpxchg_8(pcp, oval, nval) \ 367 + raw_cpu_generic_cmpxchg(pcp, oval, nval) 368 + #endif 369 + 370 + #ifndef raw_cpu_cmpxchg64 371 + #define raw_cpu_cmpxchg64(pcp, oval, nval) \ 372 + raw_cpu_generic_cmpxchg(pcp, oval, nval) 373 + #endif 374 + #ifndef raw_cpu_cmpxchg128 375 + #define raw_cpu_cmpxchg128(pcp, oval, nval) \ 386 376 raw_cpu_generic_cmpxchg(pcp, oval, nval) 387 377 #endif 388 378 ··· 540 512 #endif 541 513 #endif 542 514 515 + #ifndef this_cpu_try_cmpxchg64 516 + #ifdef this_cpu_cmpxchg64 517 + #define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \ 518 + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg64) 519 + #else 520 + #define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \ 521 + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) 522 + #endif 523 + #endif 524 + #ifndef this_cpu_try_cmpxchg128 525 + #ifdef this_cpu_cmpxchg128 526 + #define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \ 527 + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg128) 528 + #else 529 + #define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \ 530 + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) 531 + #endif 532 + #endif 533 + 543 534 #ifndef this_cpu_cmpxchg_1 544 535 #define this_cpu_cmpxchg_1(pcp, oval, nval) \ 545 536 this_cpu_generic_cmpxchg(pcp, oval, nval) ··· 573 526 #endif 574 527 #ifndef this_cpu_cmpxchg_8 575 528 #define this_cpu_cmpxchg_8(pcp, oval, nval) \ 529 + this_cpu_generic_cmpxchg(pcp, oval, nval) 530 + #endif 531 + 532 + #ifndef this_cpu_cmpxchg64 533 + #define this_cpu_cmpxchg64(pcp, oval, nval) \ 534 + this_cpu_generic_cmpxchg(pcp, oval, nval) 535 + #endif 536 + #ifndef this_cpu_cmpxchg128 537 + #define this_cpu_cmpxchg128(pcp, oval, nval) \ 576 538 this_cpu_generic_cmpxchg(pcp, oval, nval) 577 539 #endif 578 540