Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'arc-v4.2-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC fixes from Vineet Gupta:
"Here's a late pull request for accumulated ARC fixes which came out of
extended testing of the new ARCv2 port with LTP etc. llock/scond
livelock workaround has been reviewed by PeterZ. The changes look a
lot but I've crafted them into finer grained patches for better
tracking later.

I have some more fixes (ARC Futex backend) ready to go but those will
have to wait for tglx to return from vacation.

Summary:
- Enable a reduced config of HS38 (w/o div-rem, ll64...)
- Add software workaround for LLOCK/SCOND livelock
- Fallout of a recent pt_regs update"

* tag 'arc-v4.2-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in exponential backoff
ARC: Make pt_regs regs unsigned
ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle
ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
ARC: LLOCK/SCOND based rwlock
ARC: LLOCK/SCOND based spin_lock
ARC: refactor atomic inline asm operands with symbolic names
Revert "ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock"
ARCv2: [axs103_smp] Reduce clk for Quad FPGA configs
ARCv2: Fix the peripheral address space detection
ARCv2: allow selection of page size for MMUv4
ARCv2: lib: memset: Don't assume 64-bit load/stores
ARCv2: lib: memcpy: Missing PREFETCHW
ARCv2: add knob for DIV_REV in Kconfig
ARC/time: Migrate to new 'set-state' interface

+718 -116
+11 -2
arch/arc/Kconfig
··· 313 313 314 314 config ARC_PAGE_SIZE_16K 315 315 bool "16KB" 316 - depends on ARC_MMU_V3 316 + depends on ARC_MMU_V3 || ARC_MMU_V4 317 317 318 318 config ARC_PAGE_SIZE_4K 319 319 bool "4KB" 320 - depends on ARC_MMU_V3 320 + depends on ARC_MMU_V3 || ARC_MMU_V4 321 321 322 322 endchoice 323 323 ··· 365 365 default y 366 366 depends on !ARC_CANT_LLSC 367 367 368 + config ARC_STAR_9000923308 369 + bool "Workaround for llock/scond livelock" 370 + default y 371 + depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC 372 + 368 373 config ARC_HAS_SWAPE 369 374 bool "Insn: SWAPE (endian-swap)" 370 375 default y ··· 382 377 Enable gcc to generate 64-bit load/store instructions 383 378 ISA mandates even/odd registers to allow encoding of two 384 379 dest operands with 2 possible source operands. 380 + default y 381 + 382 + config ARC_HAS_DIV_REM 383 + bool "Insn: div, divu, rem, remu" 385 384 default y 386 385 387 386 config ARC_HAS_RTC
+9 -1
arch/arc/Makefile
··· 36 36 cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock 37 37 cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape 38 38 39 + ifdef CONFIG_ISA_ARCV2 40 + 39 41 ifndef CONFIG_ARC_HAS_LL64 40 - cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64 42 + cflags-y += -mno-ll64 43 + endif 44 + 45 + ifndef CONFIG_ARC_HAS_DIV_REM 46 + cflags-y += -mno-div-rem 47 + endif 48 + 41 49 endif 42 50 43 51 cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
+3 -4
arch/arc/include/asm/arcregs.h
··· 89 89 #define ECR_C_BIT_DTLB_LD_MISS 8 90 90 #define ECR_C_BIT_DTLB_ST_MISS 9 91 91 92 - 93 92 /* Auxiliary registers */ 94 93 #define AUX_IDENTITY 4 95 94 #define AUX_INTR_VEC_BASE 0x25 96 - 95 + #define AUX_NON_VOL 0x5e 97 96 98 97 /* 99 98 * Floating Pt Registers ··· 239 240 240 241 struct bcr_perip { 241 242 #ifdef CONFIG_CPU_BIG_ENDIAN 242 - unsigned int start:8, pad2:8, sz:8, pad:8; 243 + unsigned int start:8, pad2:8, sz:8, ver:8; 243 244 #else 244 - unsigned int pad:8, sz:8, pad2:8, start:8; 245 + unsigned int ver:8, sz:8, pad2:8, start:8; 245 246 #endif 246 247 }; 247 248
+55 -23
arch/arc/include/asm/atomic.h
··· 23 23 24 24 #define atomic_set(v, i) (((v)->counter) = (i)) 25 25 26 - #ifdef CONFIG_ISA_ARCV2 27 - #define PREFETCHW " prefetchw [%1] \n" 28 - #else 29 - #define PREFETCHW 26 + #ifdef CONFIG_ARC_STAR_9000923308 27 + 28 + #define SCOND_FAIL_RETRY_VAR_DEF \ 29 + unsigned int delay = 1, tmp; \ 30 + 31 + #define SCOND_FAIL_RETRY_ASM \ 32 + " bz 4f \n" \ 33 + " ; --- scond fail delay --- \n" \ 34 + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ 35 + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ 36 + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ 37 + " rol %[delay], %[delay] \n" /* delay *= 2 */ \ 38 + " b 1b \n" /* start over */ \ 39 + "4: ; --- success --- \n" \ 40 + 41 + #define SCOND_FAIL_RETRY_VARS \ 42 + ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ 43 + 44 + #else /* !CONFIG_ARC_STAR_9000923308 */ 45 + 46 + #define SCOND_FAIL_RETRY_VAR_DEF 47 + 48 + #define SCOND_FAIL_RETRY_ASM \ 49 + " bnz 1b \n" \ 50 + 51 + #define SCOND_FAIL_RETRY_VARS 52 + 30 53 #endif 31 54 32 55 #define ATOMIC_OP(op, c_op, asm_op) \ 33 56 static inline void atomic_##op(int i, atomic_t *v) \ 34 57 { \ 35 - unsigned int temp; \ 58 + unsigned int val; \ 59 + SCOND_FAIL_RETRY_VAR_DEF \ 36 60 \ 37 61 __asm__ __volatile__( \ 38 - "1: \n" \ 39 - PREFETCHW \ 40 - " llock %0, [%1] \n" \ 41 - " " #asm_op " %0, %0, %2 \n" \ 42 - " scond %0, [%1] \n" \ 43 - " bnz 1b \n" \ 44 - : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ 45 - : "r"(&v->counter), "ir"(i) \ 62 + "1: llock %[val], [%[ctr]] \n" \ 63 + " " #asm_op " %[val], %[val], %[i] \n" \ 64 + " scond %[val], [%[ctr]] \n" \ 65 + " \n" \ 66 + SCOND_FAIL_RETRY_ASM \ 67 + \ 68 + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ 69 + SCOND_FAIL_RETRY_VARS \ 70 + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ 71 + [i] "ir" (i) \ 46 72 : "cc"); \ 47 73 } \ 48 74 49 75 #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ 50 76 static inline int atomic_##op##_return(int i, atomic_t *v) \ 51 77 { \ 52 - unsigned int temp; \ 78 + unsigned int val; \ 79 + SCOND_FAIL_RETRY_VAR_DEF \ 53 80 \ 54 81 /* \ 55 82 * Explicit full memory barrier needed before/after as \ ··· 85 58 smp_mb(); \ 86 59 \ 87 60 __asm__ __volatile__( \ 88 - "1: \n" \ 89 - PREFETCHW \ 90 - " llock %0, [%1] \n" \ 91 - " " #asm_op " %0, %0, %2 \n" \ 92 - " scond %0, [%1] \n" \ 93 - " bnz 1b \n" \ 94 - : "=&r"(temp) \ 95 - : "r"(&v->counter), "ir"(i) \ 61 + "1: llock %[val], [%[ctr]] \n" \ 62 + " " #asm_op " %[val], %[val], %[i] \n" \ 63 + " scond %[val], [%[ctr]] \n" \ 64 + " \n" \ 65 + SCOND_FAIL_RETRY_ASM \ 66 + \ 67 + : [val] "=&r" (val) \ 68 + SCOND_FAIL_RETRY_VARS \ 69 + : [ctr] "r" (&v->counter), \ 70 + [i] "ir" (i) \ 96 71 : "cc"); \ 97 72 \ 98 73 smp_mb(); \ 99 74 \ 100 - return temp; \ 75 + return val; \ 101 76 } 102 77 103 78 #else /* !CONFIG_ARC_HAS_LLSC */ ··· 179 150 #undef ATOMIC_OPS 180 151 #undef ATOMIC_OP_RETURN 181 152 #undef ATOMIC_OP 153 + #undef SCOND_FAIL_RETRY_VAR_DEF 154 + #undef SCOND_FAIL_RETRY_ASM 155 + #undef SCOND_FAIL_RETRY_VARS 182 156 183 157 /** 184 158 * __atomic_add_unless - add unless the number is a given value
+27 -27
arch/arc/include/asm/ptrace.h
··· 20 20 struct pt_regs { 21 21 22 22 /* Real registers */ 23 - long bta; /* bta_l1, bta_l2, erbta */ 23 + unsigned long bta; /* bta_l1, bta_l2, erbta */ 24 24 25 - long lp_start, lp_end, lp_count; 25 + unsigned long lp_start, lp_end, lp_count; 26 26 27 - long status32; /* status32_l1, status32_l2, erstatus */ 28 - long ret; /* ilink1, ilink2 or eret */ 29 - long blink; 30 - long fp; 31 - long r26; /* gp */ 27 + unsigned long status32; /* status32_l1, status32_l2, erstatus */ 28 + unsigned long ret; /* ilink1, ilink2 or eret */ 29 + unsigned long blink; 30 + unsigned long fp; 31 + unsigned long r26; /* gp */ 32 32 33 - long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; 33 + unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; 34 34 35 - long sp; /* user/kernel sp depending on where we came from */ 36 - long orig_r0; 35 + unsigned long sp; /* User/Kernel depending on where we came from */ 36 + unsigned long orig_r0; 37 37 38 38 /* 39 39 * To distinguish bet excp, syscall, irq ··· 55 55 unsigned long event; 56 56 }; 57 57 58 - long user_r25; 58 + unsigned long user_r25; 59 59 }; 60 60 #else 61 61 62 62 struct pt_regs { 63 63 64 - long orig_r0; 64 + unsigned long orig_r0; 65 65 66 66 union { 67 67 struct { ··· 76 76 unsigned long event; 77 77 }; 78 78 79 - long bta; /* bta_l1, bta_l2, erbta */ 79 + unsigned long bta; /* bta_l1, bta_l2, erbta */ 80 80 81 - long user_r25; 81 + unsigned long user_r25; 82 82 83 - long r26; /* gp */ 84 - long fp; 85 - long sp; /* user/kernel sp depending on where we came from */ 83 + unsigned long r26; /* gp */ 84 + unsigned long fp; 85 + unsigned long sp; /* user/kernel sp depending on where we came from */ 86 86 87 - long r12; 87 + unsigned long r12; 88 88 89 89 /*------- Below list auto saved by h/w -----------*/ 90 - long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; 90 + unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; 91 91 92 - long blink; 93 - long lp_end, lp_start, lp_count; 92 + unsigned long blink; 93 + unsigned long lp_end, lp_start, lp_count; 94 94 95 - long ei, ldi, jli; 95 + unsigned long ei, ldi, jli; 96 96 97 - long ret; 98 - long status32; 97 + unsigned long ret; 98 + unsigned long status32; 99 99 }; 100 100 101 101 #endif ··· 103 103 /* Callee saved registers - need to be saved only when you are scheduled out */ 104 104 105 105 struct callee_regs { 106 - long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; 106 + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; 107 107 }; 108 108 109 - #define instruction_pointer(regs) (unsigned long)((regs)->ret) 109 + #define instruction_pointer(regs) ((regs)->ret) 110 110 #define profile_pc(regs) instruction_pointer(regs) 111 111 112 112 /* return 1 if user mode or 0 if kernel mode */ ··· 142 142 143 143 static inline long regs_return_value(struct pt_regs *regs) 144 144 { 145 - return regs->r0; 145 + return (long)regs->r0; 146 146 } 147 147 148 148 #endif /* !__ASSEMBLY__ */
+523 -15
arch/arc/include/asm/spinlock.h
··· 18 18 #define arch_spin_unlock_wait(x) \ 19 19 do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) 20 20 21 + #ifdef CONFIG_ARC_HAS_LLSC 22 + 23 + /* 24 + * A normal LLOCK/SCOND based system, w/o need for livelock workaround 25 + */ 26 + #ifndef CONFIG_ARC_STAR_9000923308 27 + 21 28 static inline void arch_spin_lock(arch_spinlock_t *lock) 22 29 { 23 - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; 30 + unsigned int val; 31 + 32 + smp_mb(); 33 + 34 + __asm__ __volatile__( 35 + "1: llock %[val], [%[slock]] \n" 36 + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ 37 + " scond %[LOCKED], [%[slock]] \n" /* acquire */ 38 + " bnz 1b \n" 39 + " \n" 40 + : [val] "=&r" (val) 41 + : [slock] "r" (&(lock->slock)), 42 + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) 43 + : "memory", "cc"); 44 + 45 + smp_mb(); 46 + } 47 + 48 + /* 1 - lock taken successfully */ 49 + static inline int arch_spin_trylock(arch_spinlock_t *lock) 50 + { 51 + unsigned int val, got_it = 0; 52 + 53 + smp_mb(); 54 + 55 + __asm__ __volatile__( 56 + "1: llock %[val], [%[slock]] \n" 57 + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ 58 + " scond %[LOCKED], [%[slock]] \n" /* acquire */ 59 + " bnz 1b \n" 60 + " mov %[got_it], 1 \n" 61 + "4: \n" 62 + " \n" 63 + : [val] "=&r" (val), 64 + [got_it] "+&r" (got_it) 65 + : [slock] "r" (&(lock->slock)), 66 + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) 67 + : "memory", "cc"); 68 + 69 + smp_mb(); 70 + 71 + return got_it; 72 + } 73 + 74 + static inline void arch_spin_unlock(arch_spinlock_t *lock) 75 + { 76 + smp_mb(); 77 + 78 + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; 79 + 80 + smp_mb(); 81 + } 82 + 83 + /* 84 + * Read-write spinlocks, allowing multiple readers but only one writer. 85 + * Unfair locking as Writers could be starved indefinitely by Reader(s) 86 + */ 87 + 88 + static inline void arch_read_lock(arch_rwlock_t *rw) 89 + { 90 + unsigned int val; 91 + 92 + smp_mb(); 93 + 94 + /* 95 + * zero means writer holds the lock exclusively, deny Reader. 96 + * Otherwise grant lock to first/subseq reader 97 + * 98 + * if (rw->counter > 0) { 99 + * rw->counter--; 100 + * ret = 1; 101 + * } 102 + */ 103 + 104 + __asm__ __volatile__( 105 + "1: llock %[val], [%[rwlock]] \n" 106 + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ 107 + " sub %[val], %[val], 1 \n" /* reader lock */ 108 + " scond %[val], [%[rwlock]] \n" 109 + " bnz 1b \n" 110 + " \n" 111 + : [val] "=&r" (val) 112 + : [rwlock] "r" (&(rw->counter)), 113 + [WR_LOCKED] "ir" (0) 114 + : "memory", "cc"); 115 + 116 + smp_mb(); 117 + } 118 + 119 + /* 1 - lock taken successfully */ 120 + static inline int arch_read_trylock(arch_rwlock_t *rw) 121 + { 122 + unsigned int val, got_it = 0; 123 + 124 + smp_mb(); 125 + 126 + __asm__ __volatile__( 127 + "1: llock %[val], [%[rwlock]] \n" 128 + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ 129 + " sub %[val], %[val], 1 \n" /* counter-- */ 130 + " scond %[val], [%[rwlock]] \n" 131 + " bnz 1b \n" /* retry if collided with someone */ 132 + " mov %[got_it], 1 \n" 133 + " \n" 134 + "4: ; --- done --- \n" 135 + 136 + : [val] "=&r" (val), 137 + [got_it] "+&r" (got_it) 138 + : [rwlock] "r" (&(rw->counter)), 139 + [WR_LOCKED] "ir" (0) 140 + : "memory", "cc"); 141 + 142 + smp_mb(); 143 + 144 + return got_it; 145 + } 146 + 147 + static inline void arch_write_lock(arch_rwlock_t *rw) 148 + { 149 + unsigned int val; 150 + 151 + smp_mb(); 152 + 153 + /* 154 + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), 155 + * deny writer. Otherwise if unlocked grant to writer 156 + * Hence the claim that Linux rwlocks are unfair to writers. 157 + * (can be starved for an indefinite time by readers). 158 + * 159 + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { 160 + * rw->counter = 0; 161 + * ret = 1; 162 + * } 163 + */ 164 + 165 + __asm__ __volatile__( 166 + "1: llock %[val], [%[rwlock]] \n" 167 + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ 168 + " mov %[val], %[WR_LOCKED] \n" 169 + " scond %[val], [%[rwlock]] \n" 170 + " bnz 1b \n" 171 + " \n" 172 + : [val] "=&r" (val) 173 + : [rwlock] "r" (&(rw->counter)), 174 + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), 175 + [WR_LOCKED] "ir" (0) 176 + : "memory", "cc"); 177 + 178 + smp_mb(); 179 + } 180 + 181 + /* 1 - lock taken successfully */ 182 + static inline int arch_write_trylock(arch_rwlock_t *rw) 183 + { 184 + unsigned int val, got_it = 0; 185 + 186 + smp_mb(); 187 + 188 + __asm__ __volatile__( 189 + "1: llock %[val], [%[rwlock]] \n" 190 + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ 191 + " mov %[val], %[WR_LOCKED] \n" 192 + " scond %[val], [%[rwlock]] \n" 193 + " bnz 1b \n" /* retry if collided with someone */ 194 + " mov %[got_it], 1 \n" 195 + " \n" 196 + "4: ; --- done --- \n" 197 + 198 + : [val] "=&r" (val), 199 + [got_it] "+&r" (got_it) 200 + : [rwlock] "r" (&(rw->counter)), 201 + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), 202 + [WR_LOCKED] "ir" (0) 203 + : "memory", "cc"); 204 + 205 + smp_mb(); 206 + 207 + return got_it; 208 + } 209 + 210 + static inline void arch_read_unlock(arch_rwlock_t *rw) 211 + { 212 + unsigned int val; 213 + 214 + smp_mb(); 215 + 216 + /* 217 + * rw->counter++; 218 + */ 219 + __asm__ __volatile__( 220 + "1: llock %[val], [%[rwlock]] \n" 221 + " add %[val], %[val], 1 \n" 222 + " scond %[val], [%[rwlock]] \n" 223 + " bnz 1b \n" 224 + " \n" 225 + : [val] "=&r" (val) 226 + : [rwlock] "r" (&(rw->counter)) 227 + : "memory", "cc"); 228 + 229 + smp_mb(); 230 + } 231 + 232 + static inline void arch_write_unlock(arch_rwlock_t *rw) 233 + { 234 + smp_mb(); 235 + 236 + rw->counter = __ARCH_RW_LOCK_UNLOCKED__; 237 + 238 + smp_mb(); 239 + } 240 + 241 + #else /* CONFIG_ARC_STAR_9000923308 */ 242 + 243 + /* 244 + * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping 245 + * coherency transactions in the SCU. The exclusive line state keeps rotating 246 + * among contenting cores leading to a never ending cycle. So break the cycle 247 + * by deferring the retry of failed exclusive access (SCOND). The actual delay 248 + * needed is function of number of contending cores as well as the unrelated 249 + * coherency traffic from other cores. To keep the code simple, start off with 250 + * small delay of 1 which would suffice most cases and in case of contention 251 + * double the delay. Eventually the delay is sufficient such that the coherency 252 + * pipeline is drained, thus a subsequent exclusive access would succeed. 253 + */ 254 + 255 + #define SCOND_FAIL_RETRY_VAR_DEF \ 256 + unsigned int delay, tmp; \ 257 + 258 + #define SCOND_FAIL_RETRY_ASM \ 259 + " ; --- scond fail delay --- \n" \ 260 + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ 261 + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ 262 + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ 263 + " rol %[delay], %[delay] \n" /* delay *= 2 */ \ 264 + " b 1b \n" /* start over */ \ 265 + " \n" \ 266 + "4: ; --- done --- \n" \ 267 + 268 + #define SCOND_FAIL_RETRY_VARS \ 269 + ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ 270 + 271 + static inline void arch_spin_lock(arch_spinlock_t *lock) 272 + { 273 + unsigned int val; 274 + SCOND_FAIL_RETRY_VAR_DEF; 275 + 276 + smp_mb(); 277 + 278 + __asm__ __volatile__( 279 + "0: mov %[delay], 1 \n" 280 + "1: llock %[val], [%[slock]] \n" 281 + " breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */ 282 + " scond %[LOCKED], [%[slock]] \n" /* acquire */ 283 + " bz 4f \n" /* done */ 284 + " \n" 285 + SCOND_FAIL_RETRY_ASM 286 + 287 + : [val] "=&r" (val) 288 + SCOND_FAIL_RETRY_VARS 289 + : [slock] "r" (&(lock->slock)), 290 + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) 291 + : "memory", "cc"); 292 + 293 + smp_mb(); 294 + } 295 + 296 + /* 1 - lock taken successfully */ 297 + static inline int arch_spin_trylock(arch_spinlock_t *lock) 298 + { 299 + unsigned int val, got_it = 0; 300 + SCOND_FAIL_RETRY_VAR_DEF; 301 + 302 + smp_mb(); 303 + 304 + __asm__ __volatile__( 305 + "0: mov %[delay], 1 \n" 306 + "1: llock %[val], [%[slock]] \n" 307 + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ 308 + " scond %[LOCKED], [%[slock]] \n" /* acquire */ 309 + " bz.d 4f \n" 310 + " mov.z %[got_it], 1 \n" /* got it */ 311 + " \n" 312 + SCOND_FAIL_RETRY_ASM 313 + 314 + : [val] "=&r" (val), 315 + [got_it] "+&r" (got_it) 316 + SCOND_FAIL_RETRY_VARS 317 + : [slock] "r" (&(lock->slock)), 318 + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) 319 + : "memory", "cc"); 320 + 321 + smp_mb(); 322 + 323 + return got_it; 324 + } 325 + 326 + static inline void arch_spin_unlock(arch_spinlock_t *lock) 327 + { 328 + smp_mb(); 329 + 330 + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; 331 + 332 + smp_mb(); 333 + } 334 + 335 + /* 336 + * Read-write spinlocks, allowing multiple readers but only one writer. 337 + * Unfair locking as Writers could be starved indefinitely by Reader(s) 338 + */ 339 + 340 + static inline void arch_read_lock(arch_rwlock_t *rw) 341 + { 342 + unsigned int val; 343 + SCOND_FAIL_RETRY_VAR_DEF; 344 + 345 + smp_mb(); 346 + 347 + /* 348 + * zero means writer holds the lock exclusively, deny Reader. 349 + * Otherwise grant lock to first/subseq reader 350 + * 351 + * if (rw->counter > 0) { 352 + * rw->counter--; 353 + * ret = 1; 354 + * } 355 + */ 356 + 357 + __asm__ __volatile__( 358 + "0: mov %[delay], 1 \n" 359 + "1: llock %[val], [%[rwlock]] \n" 360 + " brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */ 361 + " sub %[val], %[val], 1 \n" /* reader lock */ 362 + " scond %[val], [%[rwlock]] \n" 363 + " bz 4f \n" /* done */ 364 + " \n" 365 + SCOND_FAIL_RETRY_ASM 366 + 367 + : [val] "=&r" (val) 368 + SCOND_FAIL_RETRY_VARS 369 + : [rwlock] "r" (&(rw->counter)), 370 + [WR_LOCKED] "ir" (0) 371 + : "memory", "cc"); 372 + 373 + smp_mb(); 374 + } 375 + 376 + /* 1 - lock taken successfully */ 377 + static inline int arch_read_trylock(arch_rwlock_t *rw) 378 + { 379 + unsigned int val, got_it = 0; 380 + SCOND_FAIL_RETRY_VAR_DEF; 381 + 382 + smp_mb(); 383 + 384 + __asm__ __volatile__( 385 + "0: mov %[delay], 1 \n" 386 + "1: llock %[val], [%[rwlock]] \n" 387 + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ 388 + " sub %[val], %[val], 1 \n" /* counter-- */ 389 + " scond %[val], [%[rwlock]] \n" 390 + " bz.d 4f \n" 391 + " mov.z %[got_it], 1 \n" /* got it */ 392 + " \n" 393 + SCOND_FAIL_RETRY_ASM 394 + 395 + : [val] "=&r" (val), 396 + [got_it] "+&r" (got_it) 397 + SCOND_FAIL_RETRY_VARS 398 + : [rwlock] "r" (&(rw->counter)), 399 + [WR_LOCKED] "ir" (0) 400 + : "memory", "cc"); 401 + 402 + smp_mb(); 403 + 404 + return got_it; 405 + } 406 + 407 + static inline void arch_write_lock(arch_rwlock_t *rw) 408 + { 409 + unsigned int val; 410 + SCOND_FAIL_RETRY_VAR_DEF; 411 + 412 + smp_mb(); 413 + 414 + /* 415 + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), 416 + * deny writer. Otherwise if unlocked grant to writer 417 + * Hence the claim that Linux rwlocks are unfair to writers. 418 + * (can be starved for an indefinite time by readers). 419 + * 420 + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { 421 + * rw->counter = 0; 422 + * ret = 1; 423 + * } 424 + */ 425 + 426 + __asm__ __volatile__( 427 + "0: mov %[delay], 1 \n" 428 + "1: llock %[val], [%[rwlock]] \n" 429 + " brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */ 430 + " mov %[val], %[WR_LOCKED] \n" 431 + " scond %[val], [%[rwlock]] \n" 432 + " bz 4f \n" 433 + " \n" 434 + SCOND_FAIL_RETRY_ASM 435 + 436 + : [val] "=&r" (val) 437 + SCOND_FAIL_RETRY_VARS 438 + : [rwlock] "r" (&(rw->counter)), 439 + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), 440 + [WR_LOCKED] "ir" (0) 441 + : "memory", "cc"); 442 + 443 + smp_mb(); 444 + } 445 + 446 + /* 1 - lock taken successfully */ 447 + static inline int arch_write_trylock(arch_rwlock_t *rw) 448 + { 449 + unsigned int val, got_it = 0; 450 + SCOND_FAIL_RETRY_VAR_DEF; 451 + 452 + smp_mb(); 453 + 454 + __asm__ __volatile__( 455 + "0: mov %[delay], 1 \n" 456 + "1: llock %[val], [%[rwlock]] \n" 457 + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ 458 + " mov %[val], %[WR_LOCKED] \n" 459 + " scond %[val], [%[rwlock]] \n" 460 + " bz.d 4f \n" 461 + " mov.z %[got_it], 1 \n" /* got it */ 462 + " \n" 463 + SCOND_FAIL_RETRY_ASM 464 + 465 + : [val] "=&r" (val), 466 + [got_it] "+&r" (got_it) 467 + SCOND_FAIL_RETRY_VARS 468 + : [rwlock] "r" (&(rw->counter)), 469 + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), 470 + [WR_LOCKED] "ir" (0) 471 + : "memory", "cc"); 472 + 473 + smp_mb(); 474 + 475 + return got_it; 476 + } 477 + 478 + static inline void arch_read_unlock(arch_rwlock_t *rw) 479 + { 480 + unsigned int val; 481 + 482 + smp_mb(); 483 + 484 + /* 485 + * rw->counter++; 486 + */ 487 + __asm__ __volatile__( 488 + "1: llock %[val], [%[rwlock]] \n" 489 + " add %[val], %[val], 1 \n" 490 + " scond %[val], [%[rwlock]] \n" 491 + " bnz 1b \n" 492 + " \n" 493 + : [val] "=&r" (val) 494 + : [rwlock] "r" (&(rw->counter)) 495 + : "memory", "cc"); 496 + 497 + smp_mb(); 498 + } 499 + 500 + static inline void arch_write_unlock(arch_rwlock_t *rw) 501 + { 502 + unsigned int val; 503 + 504 + smp_mb(); 505 + 506 + /* 507 + * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; 508 + */ 509 + __asm__ __volatile__( 510 + "1: llock %[val], [%[rwlock]] \n" 511 + " scond %[UNLOCKED], [%[rwlock]]\n" 512 + " bnz 1b \n" 513 + " \n" 514 + : [val] "=&r" (val) 515 + : [rwlock] "r" (&(rw->counter)), 516 + [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) 517 + : "memory", "cc"); 518 + 519 + smp_mb(); 520 + } 521 + 522 + #undef SCOND_FAIL_RETRY_VAR_DEF 523 + #undef SCOND_FAIL_RETRY_ASM 524 + #undef SCOND_FAIL_RETRY_VARS 525 + 526 + #endif /* CONFIG_ARC_STAR_9000923308 */ 527 + 528 + #else /* !CONFIG_ARC_HAS_LLSC */ 529 + 530 + static inline void arch_spin_lock(arch_spinlock_t *lock) 531 + { 532 + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; 24 533 25 534 /* 26 535 * This smp_mb() is technically superfluous, we only need the one ··· 542 33 __asm__ __volatile__( 543 34 "1: ex %0, [%1] \n" 544 35 " breq %0, %2, 1b \n" 545 - : "+&r" (tmp) 36 + : "+&r" (val) 546 37 : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) 547 38 : "memory"); 548 39 ··· 557 48 smp_mb(); 558 49 } 559 50 51 + /* 1 - lock taken successfully */ 560 52 static inline int arch_spin_trylock(arch_spinlock_t *lock) 561 53 { 562 - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; 54 + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; 563 55 564 56 smp_mb(); 565 57 566 58 __asm__ __volatile__( 567 59 "1: ex %0, [%1] \n" 568 - : "+r" (tmp) 60 + : "+r" (val) 569 61 : "r"(&(lock->slock)) 570 62 : "memory"); 571 63 572 64 smp_mb(); 573 65 574 - return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); 66 + return (val == __ARCH_SPIN_LOCK_UNLOCKED__); 575 67 } 576 68 577 69 static inline void arch_spin_unlock(arch_spinlock_t *lock) 578 70 { 579 - unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; 71 + unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__; 580 72 581 73 /* 582 74 * RELEASE barrier: given the instructions avail on ARCv2, full barrier ··· 587 77 588 78 __asm__ __volatile__( 589 79 " ex %0, [%1] \n" 590 - : "+r" (tmp) 80 + : "+r" (val) 591 81 : "r"(&(lock->slock)) 592 82 : "memory"); 593 83 ··· 600 90 601 91 /* 602 92 * Read-write spinlocks, allowing multiple readers but only one writer. 93 + * Unfair locking as Writers could be starved indefinitely by Reader(s) 603 94 * 604 95 * The spinlock itself is contained in @counter and access to it is 605 96 * serialized with @lock_mutex. 606 - * 607 - * Unfair locking as Writers could be starved indefinitely by Reader(s) 608 97 */ 609 - 610 - /* Would read_trylock() succeed? */ 611 - #define arch_read_can_lock(x) ((x)->counter > 0) 612 - 613 - /* Would write_trylock() succeed? */ 614 - #define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) 615 98 616 99 /* 1 - lock taken successfully */ 617 100 static inline int arch_read_trylock(arch_rwlock_t *rw) ··· 675 172 rw->counter = __ARCH_RW_LOCK_UNLOCKED__; 676 173 arch_spin_unlock(&(rw->lock_mutex)); 677 174 } 175 + 176 + #endif 177 + 178 + #define arch_read_can_lock(x) ((x)->counter > 0) 179 + #define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) 678 180 679 181 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 680 182 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+2
arch/arc/include/asm/spinlock_types.h
··· 26 26 */ 27 27 typedef struct { 28 28 volatile unsigned int counter; 29 + #ifndef CONFIG_ARC_HAS_LLSC 29 30 arch_spinlock_t lock_mutex; 31 + #endif 30 32 } arch_rwlock_t; 31 33 32 34 #define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000
+10 -10
arch/arc/include/uapi/asm/ptrace.h
··· 32 32 */ 33 33 struct user_regs_struct { 34 34 35 - long pad; 35 + unsigned long pad; 36 36 struct { 37 - long bta, lp_start, lp_end, lp_count; 38 - long status32, ret, blink, fp, gp; 39 - long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; 40 - long sp; 37 + unsigned long bta, lp_start, lp_end, lp_count; 38 + unsigned long status32, ret, blink, fp, gp; 39 + unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; 40 + unsigned long sp; 41 41 } scratch; 42 - long pad2; 42 + unsigned long pad2; 43 43 struct { 44 - long r25, r24, r23, r22, r21, r20; 45 - long r19, r18, r17, r16, r15, r14, r13; 44 + unsigned long r25, r24, r23, r22, r21, r20; 45 + unsigned long r19, r18, r17, r16, r15, r14, r13; 46 46 } callee; 47 - long efa; /* break pt addr, for break points in delay slots */ 48 - long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */ 47 + unsigned long efa; /* break pt addr, for break points in delay slots */ 48 + unsigned long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */ 49 49 }; 50 50 #endif /* !__ASSEMBLY__ */ 51 51
+11 -1
arch/arc/kernel/setup.c
··· 47 47 struct bcr_perip uncached_space; 48 48 struct bcr_generic bcr; 49 49 struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; 50 + unsigned long perip_space; 50 51 FIX_PTR(cpu); 51 52 52 53 READ_BCR(AUX_IDENTITY, cpu->core); ··· 57 56 cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); 58 57 59 58 READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); 60 - BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE); 59 + if (uncached_space.ver < 3) 60 + perip_space = uncached_space.start << 24; 61 + else 62 + perip_space = read_aux_reg(AUX_NON_VOL) & 0xF0000000; 63 + 64 + BUG_ON(perip_space != ARC_UNCACHED_ADDR_SPACE); 61 65 62 66 READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); 63 67 ··· 336 330 pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n"); 337 331 else if (!cpu->extn.fpu_dp && fpu_enabled) 338 332 panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); 333 + 334 + if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic && 335 + !IS_ENABLED(CONFIG_ARC_STAR_9000923308)) 336 + panic("llock/scond livelock workaround missing\n"); 339 337 } 340 338 341 339 /*
+15 -25
arch/arc/kernel/time.c
··· 203 203 return 0; 204 204 } 205 205 206 - static void arc_clkevent_set_mode(enum clock_event_mode mode, 207 - struct clock_event_device *dev) 206 + static int arc_clkevent_set_periodic(struct clock_event_device *dev) 208 207 { 209 - switch (mode) { 210 - case CLOCK_EVT_MODE_PERIODIC: 211 - /* 212 - * At X Hz, 1 sec = 1000ms -> X cycles; 213 - * 10ms -> X / 100 cycles 214 - */ 215 - arc_timer_event_setup(arc_get_core_freq() / HZ); 216 - break; 217 - case CLOCK_EVT_MODE_ONESHOT: 218 - break; 219 - default: 220 - break; 221 - } 222 - 223 - return; 208 + /* 209 + * At X Hz, 1 sec = 1000ms -> X cycles; 210 + * 10ms -> X / 100 cycles 211 + */ 212 + arc_timer_event_setup(arc_get_core_freq() / HZ); 213 + return 0; 224 214 } 225 215 226 216 static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = { 227 - .name = "ARC Timer0", 228 - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, 229 - .mode = CLOCK_EVT_MODE_UNUSED, 230 - .rating = 300, 231 - .irq = TIMER0_IRQ, /* hardwired, no need for resources */ 232 - .set_next_event = arc_clkevent_set_next_event, 233 - .set_mode = arc_clkevent_set_mode, 217 + .name = "ARC Timer0", 218 + .features = CLOCK_EVT_FEAT_ONESHOT | 219 + CLOCK_EVT_FEAT_PERIODIC, 220 + .rating = 300, 221 + .irq = TIMER0_IRQ, /* hardwired, no need for resources */ 222 + .set_next_event = arc_clkevent_set_next_event, 223 + .set_state_periodic = arc_clkevent_set_periodic, 234 224 }; 235 225 236 226 static irqreturn_t timer_irq_handler(int irq, void *dev_id) ··· 230 240 * irq_set_chip_and_handler() asked for handle_percpu_devid_irq() 231 241 */ 232 242 struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device); 233 - int irq_reenable = evt->mode == CLOCK_EVT_MODE_PERIODIC; 243 + int irq_reenable = clockevent_state_periodic(evt); 234 244 235 245 /* 236 246 * Any write to CTRL reg ACks the interrupt, we rewrite the
+1 -1
arch/arc/lib/memcpy-archs.S
··· 206 206 ld.ab r6, [r1, 4] 207 207 prefetch [r1, 28] ;Prefetch the next read location 208 208 ld.ab r8, [r1,4] 209 - prefetch [r3, 32] ;Prefetch the next write location 209 + prefetchw [r3, 32] ;Prefetch the next write location 210 210 211 211 SHIFT_1 (r7, r6, 8) 212 212 or r7, r7, r5
+36 -7
arch/arc/lib/memset-archs.S
··· 10 10 11 11 #undef PREALLOC_NOT_AVAIL 12 12 13 - #ifdef PREALLOC_NOT_AVAIL 14 - #define PREWRITE(A,B) prefetchw [(A),(B)] 15 - #else 16 - #define PREWRITE(A,B) prealloc [(A),(B)] 17 - #endif 18 - 19 13 ENTRY(memset) 20 14 prefetchw [r0] ; Prefetch the write location 21 15 mov.f 0, r2 ··· 45 51 46 52 ;;; Convert len to Dwords, unfold x8 47 53 lsr.f lp_count, lp_count, 6 54 + 48 55 lpnz @.Lset64bytes 49 56 ;; LOOP START 50 - PREWRITE(r3, 64) ;Prefetch the next write location 57 + #ifdef PREALLOC_NOT_AVAIL 58 + prefetchw [r3, 64] ;Prefetch the next write location 59 + #else 60 + prealloc [r3, 64] 61 + #endif 62 + #ifdef CONFIG_ARC_HAS_LL64 51 63 std.ab r4, [r3, 8] 52 64 std.ab r4, [r3, 8] 53 65 std.ab r4, [r3, 8] ··· 62 62 std.ab r4, [r3, 8] 63 63 std.ab r4, [r3, 8] 64 64 std.ab r4, [r3, 8] 65 + #else 66 + st.ab r4, [r3, 4] 67 + st.ab r4, [r3, 4] 68 + st.ab r4, [r3, 4] 69 + st.ab r4, [r3, 4] 70 + st.ab r4, [r3, 4] 71 + st.ab r4, [r3, 4] 72 + st.ab r4, [r3, 4] 73 + st.ab r4, [r3, 4] 74 + st.ab r4, [r3, 4] 75 + st.ab r4, [r3, 4] 76 + st.ab r4, [r3, 4] 77 + st.ab r4, [r3, 4] 78 + st.ab r4, [r3, 4] 79 + st.ab r4, [r3, 4] 80 + st.ab r4, [r3, 4] 81 + st.ab r4, [r3, 4] 82 + #endif 65 83 .Lset64bytes: 66 84 67 85 lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes 68 86 lpnz .Lset32bytes 69 87 ;; LOOP START 70 88 prefetchw [r3, 32] ;Prefetch the next write location 89 + #ifdef CONFIG_ARC_HAS_LL64 71 90 std.ab r4, [r3, 8] 72 91 std.ab r4, [r3, 8] 73 92 std.ab r4, [r3, 8] 74 93 std.ab r4, [r3, 8] 94 + #else 95 + st.ab r4, [r3, 4] 96 + st.ab r4, [r3, 4] 97 + st.ab r4, [r3, 4] 98 + st.ab r4, [r3, 4] 99 + st.ab r4, [r3, 4] 100 + st.ab r4, [r3, 4] 101 + st.ab r4, [r3, 4] 102 + st.ab r4, [r3, 4] 103 + #endif 75 104 .Lset32bytes: 76 105 77 106 and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
+15
arch/arc/plat-axs10x/axs10x.c
··· 389 389 390 390 static void __init axs103_early_init(void) 391 391 { 392 + /* 393 + * AXS103 configurations for SMP/QUAD configurations share device tree 394 + * which defaults to 90 MHz. However recent failures of Quad config 395 + * revealed P&R timing violations so clamp it down to safe 50 MHz 396 + * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack 397 + * 398 + * This hack is really hacky as of now. Fix it properly by getting the 399 + * number of cores as return value of platform's early SMP callback 400 + */ 401 + #ifdef CONFIG_ARC_MCIP 402 + unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; 403 + if (num_cores > 2) 404 + arc_set_core_freq(50 * 1000000); 405 + #endif 406 + 392 407 switch (arc_get_core_freq()/1000000) { 393 408 case 33: 394 409 axs103_set_freq(1, 1, 1);