Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

riscv: Improve zacas fully-ordered cmpxchg()

The current fully-ordered cmpxchgXX() implementation results in:

amocas.X.rl a5,a4,(s1)
fence rw,rw

This provides enough sync but we can actually use the following better
mapping instead:

amocas.X.aqrl a5,a4,(s1)

Suggested-by: Andrea Parri <andrea@rivosinc.com>
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-7-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

authored by

Alexandre Ghiti and committed by
Palmer Dabbelt
6116e22e 1658ef43

+64 -28
+64 -28
arch/riscv/include/asm/cmpxchg.h
··· 107 107 * store NEW in MEM. Return the initial value in MEM. Success is 108 108 * indicated by comparing RETURN with OLD. 109 109 */ 110 - 111 - #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \ 110 + #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ 111 + sc_prepend, sc_append, \ 112 + cas_prepend, cas_append, \ 113 + r, p, o, n) \ 112 114 ({ \ 113 115 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 114 116 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ ··· 119 117 r = o; \ 120 118 \ 121 119 __asm__ __volatile__ ( \ 122 - prepend \ 120 + cas_prepend \ 123 121 " amocas" cas_sfx " %0, %z2, %1\n" \ 124 - append \ 122 + cas_append \ 125 123 : "+&r" (r), "+A" (*(p)) \ 126 124 : "rJ" (n) \ 127 125 : "memory"); \ ··· 136 134 ulong __rc; \ 137 135 \ 138 136 __asm__ __volatile__ ( \ 139 - prepend \ 137 + sc_prepend \ 140 138 "0: lr.w %0, %2\n" \ 141 139 " and %1, %0, %z5\n" \ 142 140 " bne %1, %z3, 1f\n" \ ··· 144 142 " or %1, %1, %z4\n" \ 145 143 " sc.w" sc_sfx " %1, %1, %2\n" \ 146 144 " bnez %1, 0b\n" \ 147 - append \ 145 + sc_append \ 148 146 "1:\n" \ 149 147 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 150 148 : "rJ" ((long)__oldx), "rJ" (__newx), \ ··· 155 153 } \ 156 154 }) 157 155 158 - #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ 156 + #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ 157 + sc_prepend, sc_append, \ 158 + cas_prepend, cas_append, \ 159 + r, p, co, o, n) \ 159 160 ({ \ 160 161 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 161 162 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 162 163 r = o; \ 163 164 \ 164 165 __asm__ __volatile__ ( \ 165 - prepend \ 166 - " amocas" sc_cas_sfx " %0, %z2, %1\n" \ 167 - append \ 166 + cas_prepend \ 167 + " amocas" cas_sfx " %0, %z2, %1\n" \ 168 + cas_append \ 168 169 : "+&r" (r), "+A" (*(p)) \ 169 170 : "rJ" (n) \ 170 171 : "memory"); \ ··· 175 170 register unsigned int __rc; \ 176 171 \ 177 172 __asm__ __volatile__ ( \ 178 - prepend \ 173 + sc_prepend \ 179 174 "0: lr" lr_sfx " %0, %2\n" \ 180 175 " bne %0, %z3, 1f\n" \ 181 - " sc" sc_cas_sfx " %1, %z4, %2\n" \ 176 + " sc" sc_sfx " %1, %z4, %2\n" \ 182 177 " bnez %1, 0b\n" \ 183 - append \ 178 + sc_append \ 184 179 "1:\n" \ 185 180 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ 186 181 : "rJ" (co o), "rJ" (n) \ ··· 188 183 } \ 189 184 }) 190 185 191 - #define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \ 186 + #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ 187 + sc_prepend, sc_append, \ 188 + cas_prepend, cas_append) \ 192 189 ({ \ 193 190 __typeof__(ptr) __ptr = (ptr); \ 194 191 __typeof__(*(__ptr)) __old = (old); \ ··· 199 192 \ 200 193 switch (sizeof(*__ptr)) { \ 201 194 case 1: \ 202 - __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \ 203 - prepend, append, \ 204 - __ret, __ptr, __old, __new); \ 195 + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ 196 + sc_prepend, sc_append, \ 197 + cas_prepend, cas_append, \ 198 + __ret, __ptr, __old, __new); \ 205 199 break; \ 206 200 case 2: \ 207 - __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \ 208 - prepend, append, \ 209 - __ret, __ptr, __old, __new); \ 201 + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ 202 + sc_prepend, sc_append, \ 203 + cas_prepend, cas_append, \ 204 + __ret, __ptr, __old, __new); \ 210 205 break; \ 211 206 case 4: \ 212 - __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \ 213 - __ret, __ptr, (long), __old, __new); \ 207 + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ 208 + sc_prepend, sc_append, \ 209 + cas_prepend, cas_append, \ 210 + __ret, __ptr, (long), __old, __new); \ 214 211 break; \ 215 212 case 8: \ 216 - __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \ 217 - __ret, __ptr, /**/, __old, __new); \ 213 + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ 214 + sc_prepend, sc_append, \ 215 + cas_prepend, cas_append, \ 216 + __ret, __ptr, /**/, __old, __new); \ 218 217 break; \ 219 218 default: \ 220 219 BUILD_BUG(); \ ··· 228 215 (__typeof__(*(__ptr)))__ret; \ 229 216 }) 230 217 218 + /* 219 + * These macros are here to improve the readability of the arch_cmpxchg_XXX() 220 + * macros. 221 + */ 222 + #define SC_SFX(x) x 223 + #define CAS_SFX(x) x 224 + #define SC_PREPEND(x) x 225 + #define SC_APPEND(x) x 226 + #define CAS_PREPEND(x) x 227 + #define CAS_APPEND(x) x 228 + 231 229 #define arch_cmpxchg_relaxed(ptr, o, n) \ 232 - _arch_cmpxchg((ptr), (o), (n), "", "", "") 230 + _arch_cmpxchg((ptr), (o), (n), \ 231 + SC_SFX(""), CAS_SFX(""), \ 232 + SC_PREPEND(""), SC_APPEND(""), \ 233 + CAS_PREPEND(""), CAS_APPEND("")) 233 234 234 235 #define arch_cmpxchg_acquire(ptr, o, n) \ 235 - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) 236 + _arch_cmpxchg((ptr), (o), (n), \ 237 + SC_SFX(""), CAS_SFX(""), \ 238 + SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ 239 + CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) 236 240 237 241 #define arch_cmpxchg_release(ptr, o, n) \ 238 - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "") 242 + _arch_cmpxchg((ptr), (o), (n), \ 243 + SC_SFX(""), CAS_SFX(""), \ 244 + SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ 245 + CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) 239 246 240 247 #define arch_cmpxchg(ptr, o, n) \ 241 - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") 248 + _arch_cmpxchg((ptr), (o), (n), \ 249 + SC_SFX(".rl"), CAS_SFX(".aqrl"), \ 250 + SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ 251 + CAS_PREPEND(""), CAS_APPEND("")) 242 252 243 253 #define arch_cmpxchg_local(ptr, o, n) \ 244 254 arch_cmpxchg_relaxed((ptr), (o), (n))