Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 's390-5.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull more s390 updates from Vasily Gorbik:

- Fix preempt_count initialization.

- Rework call_on_stack() macro to add proper type handling and avoid
possible register corruption.

- More error prone "register asm" removal and fixes.

- Fix syscall restarting when multiple signals are coming in. This adds
minimalistic trampolines to vdso so we can return from signal without
using the stack which requires pgm check handler hacks when NX is
enabled.

- Remove HAVE_IRQ_EXIT_ON_IRQ_STACK since this is no longer true after
switch to generic entry.

- Fix protected virtualization secure storage access exception
handling.

- Make machine check C handler always enter with DAT enabled and move
register validation to C code.

- Fix tinyconfig boot problem by avoiding MONITOR CALL without
CONFIG_BUG.

- Increase asm symbols alignment to 16 to make it consistent with
compilers.

- Enable concurrent access to the CPU Measurement Counter Facility.

- Add support for dynamic AP bus size limit and rework ap_dqap to deal
with messages greater than recv buffer.

* tag 's390-5.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (41 commits)
s390: preempt: Fix preempt_count initialization
s390/linkage: increase asm symbols alignment to 16
s390: rename CALL_ON_STACK_NORETURN() to call_on_stack_noreturn()
s390: add type checking to CALL_ON_STACK_NORETURN() macro
s390: remove old CALL_ON_STACK() macro
s390/softirq: use call_on_stack() macro
s390/lib: use call_on_stack() macro
s390/smp: use call_on_stack() macro
s390/kexec: use call_on_stack() macro
s390/irq: use call_on_stack() macro
s390/mm: use call_on_stack() macro
s390: introduce proper type handling call_on_stack() macro
s390/irq: simplify on_async_stack()
s390/irq: inline do_softirq_own_stack()
s390/irq: simplify do_softirq_own_stack()
s390/ap: get rid of register asm in ap_dqap()
s390: rename PIF_SYSCALL_RESTART to PIF_EXECVE_PGSTE_RESTART
s390: move restart of execve() syscall
s390/signal: remove sigreturn on stack
s390/signal: switch to using vdso for sigreturn and syscall restart
...

+2110 -1721
+1 -1
arch/s390/Kconfig
··· 163 163 select HAVE_GCC_PLUGINS 164 164 select HAVE_GENERIC_VDSO 165 165 select HAVE_IOREMAP_PROT if PCI 166 - select HAVE_IRQ_EXIT_ON_IRQ_STACK 167 166 select HAVE_KERNEL_BZIP2 168 167 select HAVE_KERNEL_GZIP 169 168 select HAVE_KERNEL_LZ4 ··· 437 438 select COMPAT_OLD_SIGACTION 438 439 select HAVE_UID16 439 440 depends on MULTIUSER 441 + depends on !CC_IS_CLANG 440 442 help 441 443 Select this option if you want to enable your system kernel to 442 444 handle system-calls from ELF binaries for 31 bit ESA. This option
+13
arch/s390/Makefile
··· 166 166 archprepare: 167 167 $(Q)$(MAKE) $(build)=$(syscalls) kapi 168 168 $(Q)$(MAKE) $(build)=$(tools) kapi 169 + ifeq ($(KBUILD_EXTMOD),) 170 + # We need to generate vdso-offsets.h before compiling certain files in kernel/. 171 + # In order to do that, we should use the archprepare target, but we can't since 172 + # asm-offsets.h is included in some files used to generate vdso-offsets.h, and 173 + # asm-offsets.h is built in prepare0, for which archprepare is a dependency. 174 + # Therefore we need to generate the header after prepare0 has been made, hence 175 + # this hack. 176 + prepare: vdso_prepare 177 + vdso_prepare: prepare0 178 + $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h 179 + $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ 180 + $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) 181 + endif 169 182 170 183 # Don't use tabs in echo arguments 171 184 define archhelp
+1
arch/s390/boot/startup.c
··· 23 23 unsigned long __bootdata_preserved(MODULES_VADDR); 24 24 unsigned long __bootdata_preserved(MODULES_END); 25 25 unsigned long __bootdata(ident_map_size); 26 + int __bootdata(is_full_image) = 1; 26 27 27 28 u64 __bootdata_preserved(stfle_fac_list[16]); 28 29 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+1
arch/s390/boot/uv.c
··· 36 36 uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); 37 37 uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; 38 38 uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; 39 + uv_info.uv_feature_indications = uvcb.uv_feature_indications; 39 40 } 40 41 41 42 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+141 -77
arch/s390/include/asm/ap.h
··· 53 53 */ 54 54 static inline bool ap_instructions_available(void) 55 55 { 56 - register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); 57 - register unsigned long reg1 asm ("1") = 0; 58 - register unsigned long reg2 asm ("2") = 0; 56 + unsigned long reg0 = AP_MKQID(0, 0); 57 + unsigned long reg1 = 0; 59 58 60 59 asm volatile( 61 - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ 62 - "0: la %0,1\n" 60 + " lgr 0,%[reg0]\n" /* qid into gr0 */ 61 + " lghi 1,0\n" /* 0 into gr1 */ 62 + " lghi 2,0\n" /* 0 into gr2 */ 63 + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ 64 + "0: la %[reg1],1\n" /* 1 into reg1 */ 63 65 "1:\n" 64 66 EX_TABLE(0b, 1b) 65 - : "+d" (reg1), "+d" (reg2) 66 - : "d" (reg0) 67 - : "cc"); 67 + : [reg1] "+&d" (reg1) 68 + : [reg0] "d" (reg0) 69 + : "cc", "0", "1", "2"); 68 70 return reg1 != 0; 69 71 } 70 72 ··· 79 77 */ 80 78 static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) 81 79 { 82 - register unsigned long reg0 asm ("0") = qid; 83 - register struct ap_queue_status reg1 asm ("1"); 84 - register unsigned long reg2 asm ("2"); 80 + struct ap_queue_status reg1; 81 + unsigned long reg2; 85 82 86 - asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ 87 - : "=d" (reg1), "=d" (reg2) 88 - : "d" (reg0) 89 - : "cc"); 83 + asm volatile( 84 + " lgr 0,%[qid]\n" /* qid into gr0 */ 85 + " lghi 2,0\n" /* 0 into gr2 */ 86 + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ 87 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 88 + " lgr %[reg2],2\n" /* gr2 into reg2 */ 89 + : [reg1] "=&d" (reg1), [reg2] "=&d" (reg2) 90 + : [qid] "d" (qid) 91 + : "cc", "0", "1", "2"); 90 92 if (info) 91 93 *info = reg2; 92 94 return reg1; ··· 121 115 */ 122 116 static inline struct ap_queue_status ap_rapq(ap_qid_t qid) 123 117 { 124 - register unsigned long reg0 asm ("0") = qid | (1UL << 24); 125 - register struct ap_queue_status reg1 asm ("1"); 118 + unsigned long reg0 = qid | (1UL << 24); /* fc 1UL is RAPQ */ 119 + struct ap_queue_status reg1; 126 120 127 121 asm volatile( 128 - ".long 0xb2af0000" /* PQAP(RAPQ) */ 129 - : "=d" (reg1) 130 - : "d" (reg0) 131 - : "cc"); 122 + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ 123 + " .long 0xb2af0000\n" /* PQAP(RAPQ) */ 124 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 125 + : [reg1] "=&d" (reg1) 126 + : [reg0] "d" (reg0) 127 + : "cc", "0", "1"); 132 128 return reg1; 133 129 } 134 130 ··· 142 134 */ 143 135 static inline struct ap_queue_status ap_zapq(ap_qid_t qid) 144 136 { 145 - register unsigned long reg0 asm ("0") = qid | (2UL << 24); 146 - register struct ap_queue_status reg1 asm ("1"); 137 + unsigned long reg0 = qid | (2UL << 24); /* fc 2UL is ZAPQ */ 138 + struct ap_queue_status reg1; 147 139 148 140 asm volatile( 149 - ".long 0xb2af0000" /* PQAP(ZAPQ) */ 150 - : "=d" (reg1) 151 - : "d" (reg0) 152 - : "cc"); 141 + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ 142 + " .long 0xb2af0000\n" /* PQAP(ZAPQ) */ 143 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 144 + : [reg1] "=&d" (reg1) 145 + : [reg0] "d" (reg0) 146 + : "cc", "0", "1"); 153 147 return reg1; 154 148 } 155 149 ··· 182 172 */ 183 173 static inline int ap_qci(struct ap_config_info *config) 184 174 { 185 - register unsigned long reg0 asm ("0") = 4UL << 24; 186 - register unsigned long reg1 asm ("1") = -EOPNOTSUPP; 187 - register struct ap_config_info *reg2 asm ("2") = config; 175 + unsigned long reg0 = 4UL << 24; /* fc 4UL is QCI */ 176 + unsigned long reg1 = -EOPNOTSUPP; 177 + struct ap_config_info *reg2 = config; 188 178 189 179 asm volatile( 190 - ".long 0xb2af0000\n" /* PQAP(QCI) */ 191 - "0: la %0,0\n" 180 + " lgr 0,%[reg0]\n" /* QCI fc into gr0 */ 181 + " lgr 2,%[reg2]\n" /* ptr to config into gr2 */ 182 + " .long 0xb2af0000\n" /* PQAP(QCI) */ 183 + "0: la %[reg1],0\n" /* good case, QCI fc available */ 192 184 "1:\n" 193 185 EX_TABLE(0b, 1b) 194 - : "+d" (reg1) 195 - : "d" (reg0), "d" (reg2) 196 - : "cc", "memory"); 186 + : [reg1] "+&d" (reg1) 187 + : [reg0] "d" (reg0), [reg2] "d" (reg2) 188 + : "cc", "memory", "0", "2"); 197 189 198 190 return reg1; 199 191 } ··· 232 220 struct ap_qirq_ctrl qirqctrl, 233 221 void *ind) 234 222 { 235 - register unsigned long reg0 asm ("0") = qid | (3UL << 24); 236 - register union { 223 + unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */ 224 + union { 237 225 unsigned long value; 238 226 struct ap_qirq_ctrl qirqctrl; 239 227 struct ap_queue_status status; 240 - } reg1 asm ("1"); 241 - register void *reg2 asm ("2") = ind; 228 + } reg1; 229 + void *reg2 = ind; 242 230 243 231 reg1.qirqctrl = qirqctrl; 244 232 245 233 asm volatile( 246 - ".long 0xb2af0000" /* PQAP(AQIC) */ 247 - : "+d" (reg1) 248 - : "d" (reg0), "d" (reg2) 249 - : "cc"); 234 + " lgr 0,%[reg0]\n" /* qid param into gr0 */ 235 + " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */ 236 + " lgr 2,%[reg2]\n" /* ni addr into gr2 */ 237 + " .long 0xb2af0000\n" /* PQAP(AQIC) */ 238 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 239 + : [reg1] "+&d" (reg1) 240 + : [reg0] "d" (reg0), [reg2] "d" (reg2) 241 + : "cc", "0", "1", "2"); 250 242 251 243 return reg1.status; 252 244 } ··· 284 268 static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, 285 269 union ap_qact_ap_info *apinfo) 286 270 { 287 - register unsigned long reg0 asm ("0") = qid | (5UL << 24) 288 - | ((ifbit & 0x01) << 22); 289 - register union { 271 + unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22); 272 + union { 290 273 unsigned long value; 291 274 struct ap_queue_status status; 292 - } reg1 asm ("1"); 293 - register unsigned long reg2 asm ("2"); 275 + } reg1; 276 + unsigned long reg2; 294 277 295 278 reg1.value = apinfo->val; 296 279 297 280 asm volatile( 298 - ".long 0xb2af0000" /* PQAP(QACT) */ 299 - : "+d" (reg1), "=d" (reg2) 300 - : "d" (reg0) 301 - : "cc"); 281 + " lgr 0,%[reg0]\n" /* qid param into gr0 */ 282 + " lgr 1,%[reg1]\n" /* qact in info into gr1 */ 283 + " .long 0xb2af0000\n" /* PQAP(QACT) */ 284 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 285 + " lgr %[reg2],2\n" /* qact out info into reg2 */ 286 + : [reg1] "+&d" (reg1), [reg2] "=&d" (reg2) 287 + : [reg0] "d" (reg0) 288 + : "cc", "0", "1", "2"); 302 289 apinfo->val = reg2; 303 290 return reg1.status; 304 291 } ··· 322 303 unsigned long long psmid, 323 304 void *msg, size_t length) 324 305 { 325 - register unsigned long reg0 asm ("0") = qid | 0x40000000UL; 326 - register struct ap_queue_status reg1 asm ("1"); 327 - register unsigned long reg2 asm ("2") = (unsigned long) msg; 328 - register unsigned long reg3 asm ("3") = (unsigned long) length; 329 - register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); 330 - register unsigned long reg5 asm ("5") = psmid & 0xffffffff; 306 + unsigned long reg0 = qid | 0x40000000UL; /* 0x4... is last msg part */ 307 + union register_pair nqap_r1, nqap_r2; 308 + struct ap_queue_status reg1; 309 + 310 + nqap_r1.even = (unsigned int)(psmid >> 32); 311 + nqap_r1.odd = psmid & 0xffffffff; 312 + nqap_r2.even = (unsigned long)msg; 313 + nqap_r2.odd = (unsigned long)length; 331 314 332 315 asm volatile ( 333 - "0: .long 0xb2ad0042\n" /* NQAP */ 334 - " brc 2,0b" 335 - : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) 336 - : "d" (reg4), "d" (reg5) 337 - : "cc", "memory"); 316 + " lgr 0,%[reg0]\n" /* qid param in gr0 */ 317 + "0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n" 318 + " brc 2,0b\n" /* handle partial completion */ 319 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 320 + : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), 321 + [nqap_r2] "+&d" (nqap_r2.pair) 322 + : [nqap_r1] "d" (nqap_r1.pair) 323 + : "cc", "memory", "0", "1"); 338 324 return reg1; 339 325 } 340 326 ··· 349 325 * @psmid: Pointer to program supplied message identifier 350 326 * @msg: The message text 351 327 * @length: The message length 328 + * @reslength: Resitual length on return 329 + * @resgr0: input: gr0 value (only used if != 0), output: resitual gr0 content 352 330 * 353 331 * Returns AP queue status structure. 354 332 * Condition code 1 on DQAP means the receive has taken place ··· 362 336 * Note that gpr2 is used by the DQAP instruction to keep track of 363 337 * any 'residual' length, in case the instruction gets interrupted. 364 338 * Hence it gets zeroed before the instruction. 339 + * If the message does not fit into the buffer, this function will 340 + * return with a truncated message and the reply in the firmware queue 341 + * is not removed. This is indicated to the caller with an 342 + * ap_queue_status response_code value of all bits on (0xFF) and (if 343 + * the reslength ptr is given) the remaining length is stored in 344 + * *reslength and (if the resgr0 ptr is given) the updated gr0 value 345 + * for further processing of this msg entry is stored in *resgr0. The 346 + * caller needs to detect this situation and should invoke ap_dqap 347 + * with a valid resgr0 ptr and a value in there != 0 to indicate that 348 + * *resgr0 is to be used instead of qid to further process this entry. 365 349 */ 366 350 static inline struct ap_queue_status ap_dqap(ap_qid_t qid, 367 351 unsigned long long *psmid, 368 - void *msg, size_t length) 352 + void *msg, size_t length, 353 + size_t *reslength, 354 + unsigned long *resgr0) 369 355 { 370 - register unsigned long reg0 asm("0") = qid | 0x80000000UL; 371 - register struct ap_queue_status reg1 asm ("1"); 372 - register unsigned long reg2 asm("2") = 0UL; 373 - register unsigned long reg4 asm("4") = (unsigned long) msg; 374 - register unsigned long reg5 asm("5") = (unsigned long) length; 375 - register unsigned long reg6 asm("6") = 0UL; 376 - register unsigned long reg7 asm("7") = 0UL; 356 + unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL; 357 + struct ap_queue_status reg1; 358 + unsigned long reg2; 359 + union register_pair rp1, rp2; 377 360 361 + rp1.even = 0UL; 362 + rp1.odd = 0UL; 363 + rp2.even = (unsigned long)msg; 364 + rp2.odd = (unsigned long)length; 378 365 379 366 asm volatile( 380 - "0: .long 0xb2ae0064\n" /* DQAP */ 381 - " brc 6,0b\n" 382 - : "+d" (reg0), "=d" (reg1), "+d" (reg2), 383 - "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) 384 - : : "cc", "memory"); 385 - *psmid = (((unsigned long long) reg6) << 32) + reg7; 367 + " lgr 0,%[reg0]\n" /* qid param into gr0 */ 368 + " lghi 2,0\n" /* 0 into gr2 (res length) */ 369 + "0: ltgr %N[rp2],%N[rp2]\n" /* check buf len */ 370 + " jz 2f\n" /* go out if buf len is 0 */ 371 + "1: .insn rre,0xb2ae0000,%[rp1],%[rp2]\n" 372 + " brc 6,0b\n" /* handle partial complete */ 373 + "2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */ 374 + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ 375 + " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */ 376 + : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), [reg2] "=&d" (reg2), 377 + [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair) 378 + : 379 + : "cc", "memory", "0", "1", "2"); 380 + 381 + if (reslength) 382 + *reslength = reg2; 383 + if (reg2 != 0 && rp2.odd == 0) { 384 + /* 385 + * Partially complete, status in gr1 is not set. 386 + * Signal the caller that this dqap is only partially received 387 + * with a special status response code 0xFF and *resgr0 updated 388 + */ 389 + reg1.response_code = 0xFF; 390 + if (resgr0) 391 + *resgr0 = reg0; 392 + } else { 393 + *psmid = (((unsigned long long)rp1.even) << 32) + rp1.odd; 394 + if (resgr0) 395 + *resgr0 = 0; 396 + } 397 + 386 398 return reg1; 387 399 } 388 400
+14 -22
arch/s390/include/asm/cpu_mcf.h
··· 32 32 [CPUMF_CTR_SET_MT_DIAG] = 0x20, 33 33 }; 34 34 35 - static inline void ctr_set_enable(u64 *state, int ctr_set) 36 - { 37 - *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; 38 - } 39 - static inline void ctr_set_disable(u64 *state, int ctr_set) 40 - { 41 - *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); 42 - } 43 - static inline void ctr_set_start(u64 *state, int ctr_set) 44 - { 45 - *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; 46 - } 47 - static inline void ctr_set_stop(u64 *state, int ctr_set) 48 - { 49 - *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); 50 - } 51 - 52 - static inline void ctr_set_multiple_enable(u64 *state, u64 ctrsets) 35 + static inline void ctr_set_enable(u64 *state, u64 ctrsets) 53 36 { 54 37 *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; 55 38 } 56 39 57 - static inline void ctr_set_multiple_disable(u64 *state, u64 ctrsets) 40 + static inline void ctr_set_disable(u64 *state, u64 ctrsets) 58 41 { 59 42 *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); 60 43 } 61 44 62 - static inline void ctr_set_multiple_start(u64 *state, u64 ctrsets) 45 + static inline void ctr_set_start(u64 *state, u64 ctrsets) 63 46 { 64 47 *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; 65 48 } 66 49 67 - static inline void ctr_set_multiple_stop(u64 *state, u64 ctrsets) 50 + static inline void ctr_set_stop(u64 *state, u64 ctrsets) 68 51 { 69 52 *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); 70 53 } ··· 75 92 struct cpumf_ctr_info info; 76 93 atomic_t ctr_set[CPUMF_CTR_SET_MAX]; 77 94 atomic64_t alert; 78 - u64 state; 95 + u64 state; /* For perf_event_open SVC */ 96 + u64 dev_state; /* For /dev/hwctr */ 79 97 unsigned int flags; 98 + size_t used; /* Bytes used in data */ 99 + size_t usedss; /* Bytes used in start/stop */ 100 + unsigned char start[PAGE_SIZE]; /* Counter set at event add */ 101 + unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ 102 + unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ 103 + unsigned int sets; /* # Counter set saved in memory */ 80 104 }; 81 105 DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events); 82 106 ··· 114 124 115 125 size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, 116 126 struct cpumf_ctr_info *info); 127 + int cfset_online_cpu(unsigned int cpu); 128 + int cfset_offline_cpu(unsigned int cpu); 117 129 #endif /* _ASM_S390_CPU_MCF_H */
-2
arch/s390/include/asm/ctl_reg.h
··· 21 21 #define CR0_INTERRUPT_KEY_SUBMASK BIT(63 - 57) 22 22 #define CR0_MEASUREMENT_ALERT_SUBMASK BIT(63 - 58) 23 23 24 - #define CR2_GUARDED_STORAGE BIT(63 - 59) 25 - 26 24 #define CR14_UNUSED_32 BIT(63 - 32) 27 25 #define CR14_UNUSED_33 BIT(63 - 33) 28 26 #define CR14_CHANNEL_REPORT_SUBMASK BIT(63 - 35)
+5 -10
arch/s390/include/asm/elf.h
··· 144 144 #include <linux/sched/mm.h> /* for task_struct */ 145 145 #include <asm/mmu_context.h> 146 146 147 - #include <asm/vdso.h> 148 - 149 - extern unsigned int vdso_enabled; 150 - 151 147 /* 152 148 * This is used to ensure we don't load something for the wrong architecture. 153 149 */ ··· 172 176 !current->mm->context.alloc_pgste) { \ 173 177 set_thread_flag(TIF_PGSTE); \ 174 178 set_pt_regs_flag(task_pt_regs(current), \ 175 - PIF_SYSCALL_RESTART); \ 179 + PIF_EXECVE_PGSTE_RESTART); \ 176 180 _state->rc = -EAGAIN; \ 177 181 } \ 178 182 _state->rc; \ ··· 264 268 #define STACK_RND_MASK MMAP_RND_MASK 265 269 266 270 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ 267 - #define ARCH_DLINFO \ 268 - do { \ 269 - if (vdso_enabled) \ 270 - NEW_AUX_ENT(AT_SYSINFO_EHDR, \ 271 - (unsigned long)current->mm->context.vdso_base); \ 271 + #define ARCH_DLINFO \ 272 + do { \ 273 + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ 274 + (unsigned long)current->mm->context.vdso_base); \ 272 275 } while (0) 273 276 274 277 struct linux_binprm;
-1
arch/s390/include/asm/entry-common.h
··· 14 14 #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) 15 15 16 16 void do_per_trap(struct pt_regs *regs); 17 - void do_syscall(struct pt_regs *regs); 18 17 19 18 #ifdef CONFIG_DEBUG_ENTRY 20 19 static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+1 -1
arch/s390/include/asm/linkage.h
··· 5 5 #include <asm/asm-const.h> 6 6 #include <linux/stringify.h> 7 7 8 - #define __ALIGN .align 4, 0x07 8 + #define __ALIGN .align 16, 0x07 9 9 #define __ALIGN_STR __stringify(__ALIGN) 10 10 11 11 /*
+5 -1
arch/s390/include/asm/nmi.h
··· 23 23 #define MCCK_CODE_SYSTEM_DAMAGE BIT(63) 24 24 #define MCCK_CODE_EXT_DAMAGE BIT(63 - 5) 25 25 #define MCCK_CODE_CP BIT(63 - 9) 26 - #define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46) 26 + #define MCCK_CODE_STG_ERROR BIT(63 - 16) 27 + #define MCCK_CODE_STG_KEY_ERROR BIT(63 - 18) 28 + #define MCCK_CODE_STG_DEGRAD BIT(63 - 19) 27 29 #define MCCK_CODE_PSW_MWP_VALID BIT(63 - 20) 28 30 #define MCCK_CODE_PSW_IA_VALID BIT(63 - 23) 31 + #define MCCK_CODE_STG_FAIL_ADDR BIT(63 - 24) 29 32 #define MCCK_CODE_CR_VALID BIT(63 - 29) 30 33 #define MCCK_CODE_GS_VALID BIT(63 - 36) 31 34 #define MCCK_CODE_FC_VALID BIT(63 - 43) 35 + #define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46) 32 36 33 37 #ifndef __ASSEMBLY__ 34 38
+4 -12
arch/s390/include/asm/preempt.h
··· 29 29 old, new) != old); 30 30 } 31 31 32 - #define init_task_preempt_count(p) do { } while (0) 33 - 34 - #define init_idle_preempt_count(p, cpu) do { \ 35 - S390_lowcore.preempt_count = PREEMPT_DISABLED; \ 36 - } while (0) 37 - 38 32 static inline void set_preempt_need_resched(void) 39 33 { 40 34 __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); ··· 82 88 S390_lowcore.preempt_count = pc; 83 89 } 84 90 85 - #define init_task_preempt_count(p) do { } while (0) 86 - 87 - #define init_idle_preempt_count(p, cpu) do { \ 88 - S390_lowcore.preempt_count = PREEMPT_DISABLED; \ 89 - } while (0) 90 - 91 91 static inline void set_preempt_need_resched(void) 92 92 { 93 93 } ··· 117 129 } 118 130 119 131 #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ 132 + 133 + #define init_task_preempt_count(p) do { } while (0) 134 + /* Deferred to CPU bringup time */ 135 + #define init_idle_preempt_count(p, cpu) do { } while (0) 120 136 121 137 #ifdef CONFIG_PREEMPTION 122 138 extern void preempt_schedule(void);
+16 -8
arch/s390/include/asm/ptrace.h
··· 11 11 #include <uapi/asm/ptrace.h> 12 12 #include <asm/tpi.h> 13 13 14 - #define PIF_SYSCALL 0 /* inside a system call */ 15 - #define PIF_SYSCALL_RESTART 1 /* restart the current system call */ 16 - #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ 17 - #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ 14 + #define PIF_SYSCALL 0 /* inside a system call */ 15 + #define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ 16 + #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ 17 + #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ 18 18 19 - #define _PIF_SYSCALL BIT(PIF_SYSCALL) 20 - #define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART) 21 - #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) 22 - #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) 19 + #define _PIF_SYSCALL BIT(PIF_SYSCALL) 20 + #define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) 21 + #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) 22 + #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) 23 23 24 24 #ifndef __ASSEMBLY__ 25 25 ··· 160 160 static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) 161 161 { 162 162 return !!(regs->flags & (1UL << flag)); 163 + } 164 + 165 + static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag) 166 + { 167 + int ret = test_pt_regs_flag(regs, flag); 168 + 169 + clear_pt_regs_flag(regs, flag); 170 + return ret; 163 171 } 164 172 165 173 /*
+2
arch/s390/include/asm/setup.h
··· 159 159 return __kaslr_offset; 160 160 } 161 161 162 + extern int is_full_image; 163 + 162 164 static inline u32 gen_lpswe(unsigned long addr) 163 165 { 164 166 BUILD_BUG_ON(addr > 0xfff);
+13
arch/s390/include/asm/softirq_stack.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + #ifndef __ASM_S390_SOFTIRQ_STACK_H 3 + #define __ASM_S390_SOFTIRQ_STACK_H 4 + 5 + #include <asm/lowcore.h> 6 + #include <asm/stacktrace.h> 7 + 8 + static inline void do_softirq_own_stack(void) 9 + { 10 + call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq); 11 + } 12 + 13 + #endif /* __ASM_S390_SOFTIRQ_STACK_H */
+89 -27
arch/s390/include/asm/stacktrace.h
··· 74 74 ((unsigned long)__builtin_frame_address(0) - \ 75 75 offsetof(struct stack_frame, back_chain)) 76 76 77 - #define CALL_ARGS_0() \ 78 - register unsigned long r2 asm("2") 79 - #define CALL_ARGS_1(arg1) \ 80 - register unsigned long r2 asm("2") = (unsigned long)(arg1) 81 - #define CALL_ARGS_2(arg1, arg2) \ 82 - CALL_ARGS_1(arg1); \ 83 - register unsigned long r3 asm("3") = (unsigned long)(arg2) 84 - #define CALL_ARGS_3(arg1, arg2, arg3) \ 85 - CALL_ARGS_2(arg1, arg2); \ 86 - register unsigned long r4 asm("4") = (unsigned long)(arg3) 87 - #define CALL_ARGS_4(arg1, arg2, arg3, arg4) \ 88 - CALL_ARGS_3(arg1, arg2, arg3); \ 89 - register unsigned long r4 asm("5") = (unsigned long)(arg4) 90 - #define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \ 91 - CALL_ARGS_4(arg1, arg2, arg3, arg4); \ 92 - register unsigned long r4 asm("6") = (unsigned long)(arg5) 93 - 94 77 /* 95 78 * To keep this simple mark register 2-6 as being changed (volatile) 96 79 * by the called function, even though register 6 is saved/nonvolatile. ··· 92 109 #define CALL_CLOBBER_1 CALL_CLOBBER_2, "3" 93 110 #define CALL_CLOBBER_0 CALL_CLOBBER_1 94 111 95 - #define CALL_ON_STACK(fn, stack, nr, args...) \ 112 + #define CALL_LARGS_0(...) \ 113 + long dummy = 0 114 + #define CALL_LARGS_1(t1, a1) \ 115 + long arg1 = (long)(t1)(a1) 116 + #define CALL_LARGS_2(t1, a1, t2, a2) \ 117 + CALL_LARGS_1(t1, a1); \ 118 + long arg2 = (long)(t2)(a2) 119 + #define CALL_LARGS_3(t1, a1, t2, a2, t3, a3) \ 120 + CALL_LARGS_2(t1, a1, t2, a2); \ 121 + long arg3 = (long)(t3)(a3) 122 + #define CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4) \ 123 + CALL_LARGS_3(t1, a1, t2, a2, t3, a3); \ 124 + long arg4 = (long)(t4)(a4) 125 + #define CALL_LARGS_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) \ 126 + CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4); \ 127 + long arg5 = (long)(t5)(a5) 128 + 129 + #define CALL_REGS_0 \ 130 + register long r2 asm("2") = dummy 131 + #define CALL_REGS_1 \ 132 + register long r2 asm("2") = arg1 133 + #define CALL_REGS_2 \ 134 + CALL_REGS_1; \ 135 + register long r3 asm("3") = arg2 136 + #define CALL_REGS_3 \ 137 + CALL_REGS_2; \ 138 + register long r4 asm("4") = arg3 139 + #define CALL_REGS_4 \ 140 + CALL_REGS_3; \ 141 + register long r5 asm("5") = arg4 142 + #define CALL_REGS_5 \ 143 + CALL_REGS_4; \ 144 + register long r6 asm("6") = arg5 145 + 146 + #define CALL_TYPECHECK_0(...) 147 + #define CALL_TYPECHECK_1(t, a, ...) \ 148 + typecheck(t, a) 149 + #define CALL_TYPECHECK_2(t, a, ...) \ 150 + CALL_TYPECHECK_1(__VA_ARGS__); \ 151 + typecheck(t, a) 152 + #define CALL_TYPECHECK_3(t, a, ...) \ 153 + CALL_TYPECHECK_2(__VA_ARGS__); \ 154 + typecheck(t, a) 155 + #define CALL_TYPECHECK_4(t, a, ...) \ 156 + CALL_TYPECHECK_3(__VA_ARGS__); \ 157 + typecheck(t, a) 158 + #define CALL_TYPECHECK_5(t, a, ...) \ 159 + CALL_TYPECHECK_4(__VA_ARGS__); \ 160 + typecheck(t, a) 161 + 162 + #define CALL_PARM_0(...) void 163 + #define CALL_PARM_1(t, a, ...) t 164 + #define CALL_PARM_2(t, a, ...) t, CALL_PARM_1(__VA_ARGS__) 165 + #define CALL_PARM_3(t, a, ...) t, CALL_PARM_2(__VA_ARGS__) 166 + #define CALL_PARM_4(t, a, ...) t, CALL_PARM_3(__VA_ARGS__) 167 + #define CALL_PARM_5(t, a, ...) t, CALL_PARM_4(__VA_ARGS__) 168 + #define CALL_PARM_6(t, a, ...) t, CALL_PARM_5(__VA_ARGS__) 169 + 170 + /* 171 + * Use call_on_stack() to call a function switching to a specified 172 + * stack. Proper sign and zero extension of function arguments is 173 + * done. Usage: 174 + * 175 + * rc = call_on_stack(nr, stack, rettype, fn, t1, a1, t2, a2, ...) 176 + * 177 + * - nr specifies the number of function arguments of fn. 178 + * - stack specifies the stack to be used. 179 + * - fn is the function to be called. 180 + * - rettype is the return type of fn. 181 + * - t1, a1, ... are pairs, where t1 must match the type of the first 182 + * argument of fn, t2 the second, etc. a1 is the corresponding 183 + * first function argument (not name), etc. 184 + */ 185 + #define call_on_stack(nr, stack, rettype, fn, ...) \ 96 186 ({ \ 187 + rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = fn; \ 97 188 unsigned long frame = current_frame_address(); \ 98 - CALL_ARGS_##nr(args); \ 189 + unsigned long __stack = stack; \ 99 190 unsigned long prev; \ 191 + CALL_LARGS_##nr(__VA_ARGS__); \ 192 + CALL_REGS_##nr; \ 100 193 \ 194 + CALL_TYPECHECK_##nr(__VA_ARGS__); \ 101 195 asm volatile( \ 102 - " la %[_prev],0(15)\n" \ 196 + " lgr %[_prev],15\n" \ 103 197 " lg 15,%[_stack]\n" \ 104 198 " stg %[_frame],%[_bc](15)\n" \ 105 199 " brasl 14,%[_fn]\n" \ 106 - " la 15,0(%[_prev])\n" \ 107 - : [_prev] "=&a" (prev), CALL_FMT_##nr \ 108 - : [_stack] "R" (stack), \ 200 + " lgr 15,%[_prev]\n" \ 201 + : [_prev] "=&d" (prev), CALL_FMT_##nr \ 202 + : [_stack] "R" (__stack), \ 109 203 [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ 110 204 [_frame] "d" (frame), \ 111 - [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ 112 - r2; \ 205 + [_fn] "X" (__fn) : CALL_CLOBBER_##nr); \ 206 + (rettype)r2; \ 113 207 }) 114 208 115 - #define CALL_ON_STACK_NORETURN(fn, stack) \ 209 + #define call_on_stack_noreturn(fn, stack) \ 116 210 ({ \ 211 + void (*__fn)(void) = fn; \ 212 + \ 117 213 asm volatile( \ 118 214 " la 15,0(%[_stack])\n" \ 119 215 " xc %[_bc](8,15),%[_bc](15)\n" \ 120 216 " brasl 14,%[_fn]\n" \ 121 217 ::[_bc] "i" (offsetof(struct stack_frame, back_chain)), \ 122 - [_stack] "a" (stack), [_fn] "X" (fn)); \ 218 + [_stack] "a" (stack), [_fn] "X" (__fn)); \ 123 219 BUG(); \ 124 220 }) 125 221
+7 -1
arch/s390/include/asm/uv.h
··· 73 73 BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, 74 74 }; 75 75 76 + enum uv_feat_ind { 77 + BIT_UV_FEAT_MISC = 0, 78 + }; 79 + 76 80 struct uv_cb_header { 77 81 u16 len; 78 82 u16 cmd; /* Command Code */ ··· 101 97 u64 max_guest_stor_addr; 102 98 u8 reserved88[158 - 136]; 103 99 u16 max_guest_cpu_id; 104 - u8 reserveda0[200 - 160]; 100 + u64 uv_feature_indications; 101 + u8 reserveda0[200 - 168]; 105 102 } __packed __aligned(8); 106 103 107 104 /* Initialize Ultravisor */ ··· 279 274 unsigned long max_sec_stor_addr; 280 275 unsigned int max_num_sec_conf; 281 276 unsigned short max_guest_cpu_id; 277 + unsigned long uv_feature_indications; 282 278 }; 283 279 284 280 extern struct uv_info uv_info;
+20 -7
arch/s390/include/asm/vdso.h
··· 4 4 5 5 #include <vdso/datapage.h> 6 6 7 - /* Default link address for the vDSO */ 8 - #define VDSO64_LBASE 0 9 - 10 - #define __VVAR_PAGES 2 11 - 12 - #define VDSO_VERSION_STRING LINUX_2.6.29 13 - 14 7 #ifndef __ASSEMBLY__ 8 + 9 + #include <generated/vdso64-offsets.h> 10 + #ifdef CONFIG_COMPAT 11 + #include <generated/vdso32-offsets.h> 12 + #endif 13 + 14 + #define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) 15 + #ifdef CONFIG_COMPAT 16 + #define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) 17 + #else 18 + #define VDSO32_SYMBOL(tsk, name) (-1UL) 19 + #endif 15 20 16 21 extern struct vdso_data *vdso_data; 17 22 18 23 int vdso_getcpu_init(void); 19 24 20 25 #endif /* __ASSEMBLY__ */ 26 + 27 + /* Default link address for the vDSO */ 28 + #define VDSO_LBASE 0 29 + 30 + #define __VVAR_PAGES 2 31 + 32 + #define VDSO_VERSION_STRING LINUX_2.6.29 33 + 21 34 #endif /* __S390_VDSO_H__ */
-1
arch/s390/include/asm/vdso/gettimeofday.h
··· 8 8 9 9 #include <asm/timex.h> 10 10 #include <asm/unistd.h> 11 - #include <asm/vdso.h> 12 11 #include <linux/compiler.h> 13 12 14 13 #define vdso_calc_delta __arch_vdso_calc_delta
+1 -1
arch/s390/kernel/Makefile
··· 71 71 obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o 72 72 obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o 73 73 obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o 74 - obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o 75 74 76 75 obj-$(CONFIG_TRACEPOINTS) += trace.o 77 76 obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o 78 77 79 78 # vdso 80 79 obj-y += vdso64/ 80 + obj-$(CONFIG_COMPAT) += vdso32/
-6
arch/s390/kernel/asm-offsets.c
··· 14 14 #include <linux/pgtable.h> 15 15 #include <asm/idle.h> 16 16 #include <asm/gmap.h> 17 - #include <asm/nmi.h> 18 - #include <asm/setup.h> 19 17 #include <asm/stacktrace.h> 20 18 21 19 int main(void) ··· 106 108 OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); 107 109 OFFSET(__LC_INT_CLOCK, lowcore, int_clock); 108 110 OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); 109 - OFFSET(__LC_CLOCK_COMPARATOR, lowcore, clock_comparator); 110 111 OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); 111 112 OFFSET(__LC_CURRENT, lowcore, current_task); 112 113 OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); ··· 141 144 OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area); 142 145 OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); 143 146 OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); 144 - BLANK(); 145 - /* extended machine check save area */ 146 - OFFSET(__MCESA_GS_SAVE_AREA, mcesa, guarded_storage_save_area); 147 147 BLANK(); 148 148 /* gmap/sie offsets */ 149 149 OFFSET(__GMAP_ASCE, gmap, asce);
+3 -10
arch/s390/kernel/compat_signal.c
··· 28 28 #include <linux/uaccess.h> 29 29 #include <asm/lowcore.h> 30 30 #include <asm/switch_to.h> 31 + #include <asm/vdso.h> 31 32 #include "compat_linux.h" 32 33 #include "compat_ptrace.h" 33 34 #include "entry.h" ··· 119 118 fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu); 120 119 121 120 clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ 122 - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); 123 121 return 0; 124 122 } 125 123 ··· 304 304 restorer = (unsigned long __force) 305 305 ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; 306 306 } else { 307 - /* Signal frames without vectors registers are short ! */ 308 - __u16 __user *svc = (void __user *) frame + frame_size - 2; 309 - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) 310 - return -EFAULT; 311 - restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; 307 + restorer = VDSO32_SYMBOL(current, sigreturn); 312 308 } 313 309 314 310 /* Set up registers for signal handler */ ··· 367 371 restorer = (unsigned long __force) 368 372 ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; 369 373 } else { 370 - __u16 __user *svc = &frame->svc_insn; 371 - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) 372 - return -EFAULT; 373 - restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; 374 + restorer = VDSO32_SYMBOL(current, rt_sigreturn); 374 375 } 375 376 376 377 /* Create siginfo on the signal stack */
+3 -1
arch/s390/kernel/early.c
··· 33 33 #include <asm/switch_to.h> 34 34 #include "entry.h" 35 35 36 + int __bootdata(is_full_image); 37 + 36 38 static void __init reset_tod_clock(void) 37 39 { 38 40 union tod_clock clk; ··· 281 279 282 280 static void __init check_image_bootable(void) 283 281 { 284 - if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING))) 282 + if (is_full_image) 285 283 return; 286 284 287 285 sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
+76 -65
arch/s390/kernel/entry.S
··· 14 14 #include <asm/alternative-asm.h> 15 15 #include <asm/processor.h> 16 16 #include <asm/cache.h> 17 - #include <asm/ctl_reg.h> 18 17 #include <asm/dwarf.h> 19 18 #include <asm/errno.h> 20 19 #include <asm/ptrace.h> ··· 128 129 "jnz .+8; .long 0xb2e8d000", 82 129 130 .endm 130 131 132 + /* 133 + * The CHKSTG macro jumps to the provided label in case the 134 + * machine check interruption code reports one of unrecoverable 135 + * storage errors: 136 + * - Storage error uncorrected 137 + * - Storage key error uncorrected 138 + * - Storage degradation with Failing-storage-address validity 139 + */ 140 + .macro CHKSTG errlabel 141 + TSTMSK __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR) 142 + jnz \errlabel 143 + TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD 144 + jz oklabel\@ 145 + TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR 146 + jnz \errlabel 147 + oklabel\@: 148 + .endm 149 + 131 150 #if IS_ENABLED(CONFIG_KVM) 132 151 /* 133 152 * The OUTSIDE macro jumps to the provided label in case the value ··· 164 147 lghi %r13,\end - \start 165 148 clgr %r14,%r13 166 149 jhe \outside_label 150 + .endm 151 + 152 + .macro SIEEXIT 153 + lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer 154 + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE 155 + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce 156 + larl %r9,sie_exit # skip forward to sie_exit 167 157 .endm 168 158 #endif 169 159 ··· 259 235 # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. 260 236 # Other instructions between sie64a and .Lsie_done should not cause program 261 237 # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. 262 - # See also .Lcleanup_sie 263 238 .Lrewind_pad6: 264 239 nopr 7 265 240 .Lrewind_pad4: ··· 364 341 #if IS_ENABLED(CONFIG_KVM) 365 342 # cleanup critical section for program checks in sie64a 366 343 OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f 367 - lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer 368 - ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE 369 - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce 370 - larl %r9,sie_exit # skip forward to sie_exit 344 + SIEEXIT 371 345 lghi %r10,_PIF_GUEST_FAULT 372 346 #endif 373 347 1: tmhh %r8,0x4000 # PER bit set in old PSW ? ··· 430 410 jnz 1f 431 411 #if IS_ENABLED(CONFIG_KVM) 432 412 OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f 433 - brasl %r14,.Lcleanup_sie 413 + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) 414 + SIEEXIT 434 415 #endif 435 416 0: CHECK_STACK __LC_SAVE_AREA_ASYNC 436 417 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) ··· 505 484 BPOFF 506 485 la %r1,4095 # validate r1 507 486 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer 508 - sckc __LC_CLOCK_COMPARATOR # validate comparator 509 - lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs 510 487 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs 511 488 lg %r12,__LC_CURRENT 512 489 lmg %r8,%r9,__LC_MCK_OLD_PSW ··· 515 496 la %r14,4095 516 497 lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs 517 498 ptlb 518 - lg %r11,__LC_MCESAD-4095(%r14) # extended machine check save area 519 - nill %r11,0xfc00 # MCESA_ORIGIN_MASK 520 - TSTMSK __LC_CREGS_SAVE_AREA+16-4095(%r14),CR2_GUARDED_STORAGE 521 - jno 0f 522 - TSTMSK __LC_MCCK_CODE,MCCK_CODE_GS_VALID 523 - jno 0f 524 - .insn rxy,0xe3000000004d,0,__MCESA_GS_SAVE_AREA(%r11) # LGSC 525 - 0: l %r14,__LC_FP_CREG_SAVE_AREA-4095(%r14) 526 - TSTMSK __LC_MCCK_CODE,MCCK_CODE_FC_VALID 527 - jo 0f 528 - sr %r14,%r14 529 - 0: sfpc %r14 530 - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX 531 - jo 0f 532 - lghi %r14,__LC_FPREGS_SAVE_AREA 533 - ld %f0,0(%r14) 534 - ld %f1,8(%r14) 535 - ld %f2,16(%r14) 536 - ld %f3,24(%r14) 537 - ld %f4,32(%r14) 538 - ld %f5,40(%r14) 539 - ld %f6,48(%r14) 540 - ld %f7,56(%r14) 541 - ld %f8,64(%r14) 542 - ld %f9,72(%r14) 543 - ld %f10,80(%r14) 544 - ld %f11,88(%r14) 545 - ld %f12,96(%r14) 546 - ld %f13,104(%r14) 547 - ld %f14,112(%r14) 548 - ld %f15,120(%r14) 549 - j 1f 550 - 0: VLM %v0,%v15,0,%r11 551 - VLM %v16,%v31,256,%r11 552 - 1: lghi %r14,__LC_CPU_TIMER_SAVE_AREA 499 + lghi %r14,__LC_CPU_TIMER_SAVE_AREA 553 500 mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) 554 501 TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID 555 502 jo 3f ··· 531 546 3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID 532 547 jno .Lmcck_panic 533 548 tmhh %r8,0x0001 # interrupting from user ? 534 - jnz 4f 549 + jnz 6f 535 550 TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID 536 551 jno .Lmcck_panic 537 - 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off 538 - tmhh %r8,0x0001 # interrupting from user ? 539 - jnz .Lmcck_user 540 552 #if IS_ENABLED(CONFIG_KVM) 541 - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_stack 542 - OUTSIDE %r9,.Lsie_entry,.Lsie_skip,5f 553 + OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f 554 + OUTSIDE %r9,.Lsie_entry,.Lsie_skip,4f 543 555 oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST 544 - 5: brasl %r14,.Lcleanup_sie 545 - #endif 556 + j 5f 557 + 4: CHKSTG .Lmcck_panic 558 + 5: larl %r14,.Lstosm_tmp 559 + stosm 0(%r14),0x04 # turn dat on, keep irqs off 560 + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) 561 + SIEEXIT 546 562 j .Lmcck_stack 547 - .Lmcck_user: 563 + #endif 564 + 6: CHKSTG .Lmcck_panic 565 + larl %r14,.Lstosm_tmp 566 + stosm 0(%r14),0x04 # turn dat on, keep irqs off 567 + tmhh %r8,0x0001 # interrupting from user ? 568 + jz .Lmcck_stack 548 569 BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP 549 570 .Lmcck_stack: 550 571 lg %r15,__LC_MCCK_STACK 551 - .Lmcck_skip: 552 572 la %r11,STACK_FRAME_OVERHEAD(%r15) 553 573 stctg %c1,%c1,__PT_CR1(%r11) 554 574 lctlg %c1,%c1,__LC_KERNEL_ASCE ··· 595 605 b __LC_RETURN_MCCK_LPSWE 596 606 597 607 .Lmcck_panic: 598 - lg %r15,__LC_NODAT_STACK 599 - j .Lmcck_skip 608 + /* 609 + * Iterate over all possible CPU addresses in the range 0..0xffff 610 + * and stop each CPU using signal processor. Use compare and swap 611 + * to allow just one CPU-stopper and prevent concurrent CPUs from 612 + * stopping each other while leaving the others running. 613 + */ 614 + lhi %r5,0 615 + lhi %r6,1 616 + larl %r7,.Lstop_lock 617 + cs %r5,%r6,0(%r7) # single CPU-stopper only 618 + jnz 4f 619 + larl %r7,.Lthis_cpu 620 + stap 0(%r7) # this CPU address 621 + lh %r4,0(%r7) 622 + nilh %r4,0 623 + lhi %r0,1 624 + sll %r0,16 # CPU counter 625 + lhi %r3,0 # next CPU address 626 + 0: cr %r3,%r4 627 + je 2f 628 + 1: sigp %r1,%r3,SIGP_STOP # stop next CPU 629 + brc SIGP_CC_BUSY,1b 630 + 2: ahi %r3,1 631 + brct %r0,0b 632 + 3: sigp %r1,%r4,SIGP_STOP # stop this CPU 633 + brc SIGP_CC_BUSY,3b 634 + 4: j 4b 600 635 ENDPROC(mcck_int_handler) 601 636 602 637 # ··· 672 657 ENDPROC(stack_overflow) 673 658 #endif 674 659 675 - #if IS_ENABLED(CONFIG_KVM) 676 - .Lcleanup_sie: 677 - BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) 678 - lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer 679 - ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE 680 - lctlg %c1,%c1,__LC_KERNEL_ASCE 681 - larl %r9,sie_exit # skip forward to sie_exit 682 - BR_EX %r14,%r13 683 - #endif 660 + .section .data, "aw" 661 + .align 4 662 + .Lstop_lock: .long 0 663 + .Lthis_cpu: .short 0 664 + .Lstosm_tmp: .byte 0 684 665 .section .rodata, "a" 685 666 #define SYSCALL(esame,emu) .quad __s390x_ ## esame 686 667 .globl sys_call_table
+6 -22
arch/s390/kernel/irq.c
··· 110 110 { 111 111 unsigned long frame = current_frame_address(); 112 112 113 - return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)); 113 + return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0; 114 114 } 115 115 116 116 static void do_irq_async(struct pt_regs *regs, int irq) 117 117 { 118 - if (on_async_stack()) 118 + if (on_async_stack()) { 119 119 do_IRQ(regs, irq); 120 - else 121 - CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq); 120 + } else { 121 + call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ, 122 + struct pt_regs *, regs, int, irq); 123 + } 122 124 } 123 125 124 126 static int irq_pending(struct pt_regs *regs) ··· 265 263 unsigned int arch_dynirq_lower_bound(unsigned int from) 266 264 { 267 265 return from < NR_IRQS_BASE ? NR_IRQS_BASE : from; 268 - } 269 - 270 - /* 271 - * Switch to the asynchronous interrupt stack for softirq execution. 272 - */ 273 - void do_softirq_own_stack(void) 274 - { 275 - unsigned long old, new; 276 - 277 - old = current_stack_pointer(); 278 - /* Check against async. stack address range. */ 279 - new = S390_lowcore.async_stack; 280 - if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { 281 - CALL_ON_STACK(__do_softirq, new, 0); 282 - } else { 283 - /* We are already on the async stack. */ 284 - __do_softirq(); 285 - } 286 266 } 287 267 288 268 /*
+2 -7
arch/s390/kernel/kprobes.c
··· 92 92 } 93 93 NOKPROBE_SYMBOL(copy_instruction); 94 94 95 - static inline int is_kernel_addr(void *addr) 96 - { 97 - return addr < (void *)_end; 98 - } 99 - 100 95 static int s390_get_insn_slot(struct kprobe *p) 101 96 { 102 97 /* ··· 100 105 * field can be patched and executed within the insn slot. 101 106 */ 102 107 p->ainsn.insn = NULL; 103 - if (is_kernel_addr(p->addr)) 108 + if (is_kernel((unsigned long)p->addr)) 104 109 p->ainsn.insn = get_s390_insn_slot(); 105 110 else if (is_module_addr(p->addr)) 106 111 p->ainsn.insn = get_insn_slot(); ··· 112 117 { 113 118 if (!p->ainsn.insn) 114 119 return; 115 - if (is_kernel_addr(p->addr)) 120 + if (is_kernel((unsigned long)p->addr)) 116 121 free_s390_insn_slot(p->ainsn.insn, 0); 117 122 else 118 123 free_insn_slot(p->ainsn.insn, 0);
+2 -1
arch/s390/kernel/machine_kexec.c
··· 132 132 int rc; 133 133 134 134 preempt_disable(); 135 - rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); 135 + rc = call_on_stack(1, S390_lowcore.nodat_stack, unsigned long, do_start_kdump, 136 + unsigned long, (unsigned long)image); 136 137 preempt_enable(); 137 138 return rc == 0; 138 139 #else
+77 -52
arch/s390/kernel/nmi.c
··· 189 189 * returns 0 if all required registers are available 190 190 * returns 1 otherwise 191 191 */ 192 - static int notrace s390_check_registers(union mci mci, int umode) 192 + static int notrace s390_validate_registers(union mci mci, int umode) 193 193 { 194 + struct mcesa *mcesa; 195 + void *fpt_save_area; 194 196 union ctlreg2 cr2; 195 197 int kill_task; 198 + u64 zero; 196 199 197 200 kill_task = 0; 201 + zero = 0; 198 202 199 203 if (!mci.gr) { 200 204 /* ··· 208 204 if (!umode) 209 205 s390_handle_damage(); 210 206 kill_task = 1; 211 - } 212 - /* Check control registers */ 213 - if (!mci.cr) { 214 - /* 215 - * Control registers have unknown contents. 216 - * Can't recover and therefore stopping machine. 217 - */ 218 - s390_handle_damage(); 219 207 } 220 208 if (!mci.fp) { 221 209 /* ··· 221 225 if (!test_cpu_flag(CIF_FPU)) 222 226 kill_task = 1; 223 227 } 228 + fpt_save_area = &S390_lowcore.floating_pt_save_area; 224 229 if (!mci.fc) { 225 230 /* 226 231 * Floating point control register can't be restored. 227 232 * If the kernel currently uses the floating pointer 228 233 * registers and needs the FPC register the system is 229 234 * stopped. If the process has its floating pointer 230 - * registers loaded it is terminated. 235 + * registers loaded it is terminated. Otherwise the 236 + * FPC is just validated. 231 237 */ 232 238 if (S390_lowcore.fpu_flags & KERNEL_FPC) 233 239 s390_handle_damage(); 240 + asm volatile( 241 + " lfpc %0\n" 242 + : 243 + : "Q" (zero)); 234 244 if (!test_cpu_flag(CIF_FPU)) 235 245 kill_task = 1; 246 + } else { 247 + asm volatile( 248 + " lfpc %0\n" 249 + : 250 + : "Q" (S390_lowcore.fpt_creg_save_area)); 236 251 } 237 252 238 - if (MACHINE_HAS_VX) { 253 + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); 254 + if (!MACHINE_HAS_VX) { 255 + /* Validate floating point registers */ 256 + asm volatile( 257 + " ld 0,0(%0)\n" 258 + " ld 1,8(%0)\n" 259 + " ld 2,16(%0)\n" 260 + " ld 3,24(%0)\n" 261 + " ld 4,32(%0)\n" 262 + " ld 5,40(%0)\n" 263 + " ld 6,48(%0)\n" 264 + " ld 7,56(%0)\n" 265 + " ld 8,64(%0)\n" 266 + " ld 9,72(%0)\n" 267 + " ld 10,80(%0)\n" 268 + " ld 11,88(%0)\n" 269 + " ld 12,96(%0)\n" 270 + " ld 13,104(%0)\n" 271 + " ld 14,112(%0)\n" 272 + " ld 15,120(%0)\n" 273 + : 274 + : "a" (fpt_save_area) 275 + : "memory"); 276 + } else { 277 + /* Validate vector registers */ 278 + union ctlreg0 cr0; 279 + 239 280 if (!mci.vr) { 240 281 /* 241 282 * Vector registers can't be restored. If the kernel 242 283 * currently uses vector registers the system is 243 284 * stopped. If the process has its vector registers 244 - * loaded it is terminated. 285 + * loaded it is terminated. Otherwise just validate 286 + * the registers. 245 287 */ 246 288 if (S390_lowcore.fpu_flags & KERNEL_VXR) 247 289 s390_handle_damage(); 248 290 if (!test_cpu_flag(CIF_FPU)) 249 291 kill_task = 1; 250 292 } 293 + cr0.val = S390_lowcore.cregs_save_area[0]; 294 + cr0.afp = cr0.vx = 1; 295 + __ctl_load(cr0.val, 0, 0); 296 + asm volatile( 297 + " la 1,%0\n" 298 + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 299 + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ 300 + : 301 + : "Q" (*(struct vx_array *)mcesa->vector_save_area) 302 + : "1"); 303 + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); 251 304 } 252 - /* Check if access registers are valid */ 305 + /* Validate access registers */ 306 + asm volatile( 307 + " lam 0,15,0(%0)\n" 308 + : 309 + : "a" (&S390_lowcore.access_regs_save_area) 310 + : "memory"); 253 311 if (!mci.ar) { 254 312 /* 255 313 * Access registers have unknown contents. ··· 311 261 */ 312 262 kill_task = 1; 313 263 } 314 - /* Check guarded storage registers */ 264 + /* Validate guarded storage registers */ 315 265 cr2.val = S390_lowcore.cregs_save_area[2]; 316 266 if (cr2.gse) { 317 267 if (!mci.gs) { ··· 321 271 * It has to be terminated. 322 272 */ 323 273 kill_task = 1; 274 + } else { 275 + load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); 324 276 } 325 277 } 326 - /* Check if old PSW is valid */ 327 - if (!mci.wp) { 328 - /* 329 - * Can't tell if we come from user or kernel mode 330 - * -> stopping machine. 331 - */ 332 - s390_handle_damage(); 333 - } 334 - /* Check for invalid kernel instruction address */ 335 - if (!mci.ia && !umode) { 336 - /* 337 - * The instruction address got lost while running 338 - * in the kernel -> stopping machine. 339 - */ 340 - s390_handle_damage(); 341 - } 278 + /* 279 + * The getcpu vdso syscall reads CPU number from the programmable 280 + * field of the TOD clock. Disregard the TOD programmable register 281 + * validity bit and load the CPU number into the TOD programmable 282 + * field unconditionally. 283 + */ 284 + set_tod_programmable_field(raw_smp_processor_id()); 285 + /* Validate clock comparator register */ 286 + set_clock_comparator(S390_lowcore.clock_comparator); 342 287 343 288 if (!mci.ms || !mci.pm || !mci.ia) 344 289 kill_task = 1; 345 290 346 291 return kill_task; 347 292 } 348 - NOKPROBE_SYMBOL(s390_check_registers); 293 + NOKPROBE_SYMBOL(s390_validate_registers); 349 294 350 295 /* 351 296 * Backup the guest's machine check info to its description block ··· 398 353 mci.val = S390_lowcore.mcck_interruption_code; 399 354 mcck = this_cpu_ptr(&cpu_mcck); 400 355 401 - if (mci.sd) { 402 - /* System damage -> stopping machine */ 403 - s390_handle_damage(); 404 - } 405 - 406 356 /* 407 357 * Reinject the instruction processing damages' machine checks 408 358 * including Delayed Access Exception into the guest ··· 438 398 s390_handle_damage(); 439 399 } 440 400 } 441 - if (s390_check_registers(mci, user_mode(regs))) { 401 + if (s390_validate_registers(mci, user_mode(regs))) { 442 402 /* 443 403 * Couldn't restore all register contents for the 444 404 * user space process -> mark task for termination. ··· 468 428 mcck_pending = 1; 469 429 } 470 430 471 - /* 472 - * Reinject storage related machine checks into the guest if they 473 - * happen when the guest is running. 474 - */ 475 - if (!test_cpu_flag(CIF_MCCK_GUEST)) { 476 - if (mci.se) 477 - /* Storage error uncorrected */ 478 - s390_handle_damage(); 479 - if (mci.ke) 480 - /* Storage key-error uncorrected */ 481 - s390_handle_damage(); 482 - if (mci.ds && mci.fa) 483 - /* Storage degradation */ 484 - s390_handle_damage(); 485 - } 486 431 if (mci.cp) { 487 432 /* Channel report word pending */ 488 433 mcck->channel_report = 1;
+995 -31
arch/s390/kernel/perf_cpum_cf.c
··· 2 2 /* 3 3 * Performance event support for s390x - CPU-measurement Counter Facility 4 4 * 5 - * Copyright IBM Corp. 2012, 2019 5 + * Copyright IBM Corp. 2012, 2021 6 6 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7 + * Thomas Richter <tmricht@linux.ibm.com> 7 8 */ 8 9 #define KMSG_COMPONENT "cpum_cf" 9 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt ··· 15 14 #include <linux/notifier.h> 16 15 #include <linux/init.h> 17 16 #include <linux/export.h> 17 + #include <linux/miscdevice.h> 18 + 18 19 #include <asm/cpu_mcf.h> 20 + #include <asm/hwctrset.h> 21 + #include <asm/debug.h> 22 + 23 + static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 24 + static debug_info_t *cf_dbg; 25 + 26 + #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 27 + /* interval in seconds */ 28 + 29 + /* Counter sets are stored as data stream in a page sized memory buffer and 30 + * exported to user space via raw data attached to the event sample data. 31 + * Each counter set starts with an eight byte header consisting of: 32 + * - a two byte eye catcher (0xfeef) 33 + * - a one byte counter set number 34 + * - a two byte counter set size (indicates the number of counters in this set) 35 + * - a three byte reserved value (must be zero) to make the header the same 36 + * size as a counter value. 37 + * All counter values are eight byte in size. 38 + * 39 + * All counter sets are followed by a 64 byte trailer. 40 + * The trailer consists of a: 41 + * - flag field indicating valid fields when corresponding bit set 42 + * - the counter facility first and second version number 43 + * - the CPU speed if nonzero 44 + * - the time stamp the counter sets have been collected 45 + * - the time of day (TOD) base value 46 + * - the machine type. 47 + * 48 + * The counter sets are saved when the process is prepared to be executed on a 49 + * CPU and saved again when the process is going to be removed from a CPU. 50 + * The difference of both counter sets are calculated and stored in the event 51 + * sample data area. 52 + */ 53 + struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 54 + unsigned int def:16; /* 0-15 Data Entry Format */ 55 + unsigned int set:16; /* 16-31 Counter set identifier */ 56 + unsigned int ctr:16; /* 32-47 Number of stored counters */ 57 + unsigned int res1:16; /* 48-63 Reserved */ 58 + }; 59 + 60 + struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 61 + /* 0 - 7 */ 62 + union { 63 + struct { 64 + unsigned int clock_base:1; /* TOD clock base set */ 65 + unsigned int speed:1; /* CPU speed set */ 66 + /* Measurement alerts */ 67 + unsigned int mtda:1; /* Loss of MT ctr. data alert */ 68 + unsigned int caca:1; /* Counter auth. change alert */ 69 + unsigned int lcda:1; /* Loss of counter data alert */ 70 + }; 71 + unsigned long flags; /* 0-63 All indicators */ 72 + }; 73 + /* 8 - 15 */ 74 + unsigned int cfvn:16; /* 64-79 Ctr First Version */ 75 + unsigned int csvn:16; /* 80-95 Ctr Second Version */ 76 + unsigned int cpu_speed:32; /* 96-127 CPU speed */ 77 + /* 16 - 23 */ 78 + unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 79 + /* 24 - 55 */ 80 + union { 81 + struct { 82 + unsigned long progusage1; 83 + unsigned long progusage2; 84 + unsigned long progusage3; 85 + unsigned long tod_base; 86 + }; 87 + unsigned long progusage[4]; 88 + }; 89 + /* 56 - 63 */ 90 + unsigned int mach_type:16; /* Machine type */ 91 + unsigned int res1:16; /* Reserved */ 92 + unsigned int res2:32; /* Reserved */ 93 + }; 94 + 95 + /* Create the trailer data at the end of a page. */ 96 + static void cfdiag_trailer(struct cf_trailer_entry *te) 97 + { 98 + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 99 + struct cpuid cpuid; 100 + 101 + te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ 102 + te->csvn = cpuhw->info.csvn; 103 + 104 + get_cpu_id(&cpuid); /* Machine type */ 105 + te->mach_type = cpuid.machine; 106 + te->cpu_speed = cfdiag_cpu_speed; 107 + if (te->cpu_speed) 108 + te->speed = 1; 109 + te->clock_base = 1; /* Save clock base */ 110 + te->tod_base = tod_clock_base.tod; 111 + te->timestamp = get_tod_clock_fast(); 112 + } 113 + 114 + /* Read a counter set. The counter set number determines the counter set and 115 + * the CPUM-CF first and second version number determine the number of 116 + * available counters in each counter set. 117 + * Each counter set starts with header containing the counter set number and 118 + * the number of eight byte counters. 119 + * 120 + * The functions returns the number of bytes occupied by this counter set 121 + * including the header. 122 + * If there is no counter in the counter set, this counter set is useless and 123 + * zero is returned on this case. 124 + * 125 + * Note that the counter sets may not be enabled or active and the stcctm 126 + * instruction might return error 3. Depending on error_ok value this is ok, 127 + * for example when called from cpumf_pmu_start() call back function. 128 + */ 129 + static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 130 + size_t room, bool error_ok) 131 + { 132 + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 133 + size_t ctrset_size, need = 0; 134 + int rc = 3; /* Assume write failure */ 135 + 136 + ctrdata->def = CF_DIAG_CTRSET_DEF; 137 + ctrdata->set = ctrset; 138 + ctrdata->res1 = 0; 139 + ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); 140 + 141 + if (ctrset_size) { /* Save data */ 142 + need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 143 + if (need <= room) { 144 + rc = ctr_stcctm(ctrset, ctrset_size, 145 + (u64 *)(ctrdata + 1)); 146 + } 147 + if (rc != 3 || error_ok) 148 + ctrdata->ctr = ctrset_size; 149 + else 150 + need = 0; 151 + } 152 + 153 + debug_sprintf_event(cf_dbg, 3, 154 + "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" 155 + " need %zd rc %d\n", __func__, ctrset, ctrset_size, 156 + cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); 157 + return need; 158 + } 159 + 160 + /* Read out all counter sets and save them in the provided data buffer. 161 + * The last 64 byte host an artificial trailer entry. 162 + */ 163 + static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 164 + bool error_ok) 165 + { 166 + struct cf_trailer_entry *trailer; 167 + size_t offset = 0, done; 168 + int i; 169 + 170 + memset(data, 0, sz); 171 + sz -= sizeof(*trailer); /* Always room for trailer */ 172 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 173 + struct cf_ctrset_entry *ctrdata = data + offset; 174 + 175 + if (!(auth & cpumf_ctr_ctl[i])) 176 + continue; /* Counter set not authorized */ 177 + 178 + done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 179 + offset += done; 180 + } 181 + trailer = data + offset; 182 + cfdiag_trailer(trailer); 183 + return offset + sizeof(*trailer); 184 + } 185 + 186 + /* Calculate the difference for each counter in a counter set. */ 187 + static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 188 + { 189 + for (; --counters >= 0; ++pstart, ++pstop) 190 + if (*pstop >= *pstart) 191 + *pstop -= *pstart; 192 + else 193 + *pstop = *pstart - *pstop + 1; 194 + } 195 + 196 + /* Scan the counter sets and calculate the difference of each counter 197 + * in each set. The result is the increment of each counter during the 198 + * period the counter set has been activated. 199 + * 200 + * Return true on success. 201 + */ 202 + static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 203 + { 204 + struct cf_trailer_entry *trailer_start, *trailer_stop; 205 + struct cf_ctrset_entry *ctrstart, *ctrstop; 206 + size_t offset = 0; 207 + 208 + auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 209 + do { 210 + ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 211 + ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 212 + 213 + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 214 + pr_err_once("cpum_cf_diag counter set compare error " 215 + "in set %i\n", ctrstart->set); 216 + return 0; 217 + } 218 + auth &= ~cpumf_ctr_ctl[ctrstart->set]; 219 + if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 220 + cfdiag_diffctrset((u64 *)(ctrstart + 1), 221 + (u64 *)(ctrstop + 1), ctrstart->ctr); 222 + offset += ctrstart->ctr * sizeof(u64) + 223 + sizeof(*ctrstart); 224 + } 225 + } while (ctrstart->def && auth); 226 + 227 + /* Save time_stamp from start of event in stop's trailer */ 228 + trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 229 + trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 230 + trailer_stop->progusage[0] = trailer_start->timestamp; 231 + 232 + return 1; 233 + } 19 234 20 235 static enum cpumf_ctr_set get_counter_set(u64 event) 21 236 { ··· 251 34 return set; 252 35 } 253 36 254 - static int validate_ctr_version(const struct hw_perf_event *hwc) 37 + static int validate_ctr_version(const struct hw_perf_event *hwc, 38 + enum cpumf_ctr_set set) 255 39 { 256 40 struct cpu_cf_events *cpuhw; 257 41 int err = 0; ··· 261 43 cpuhw = &get_cpu_var(cpu_cf_events); 262 44 263 45 /* check required version for counter sets */ 264 - switch (hwc->config_base) { 46 + switch (set) { 265 47 case CPUMF_CTR_SET_BASIC: 266 48 case CPUMF_CTR_SET_USER: 267 49 if (cpuhw->info.cfvn < 1) ··· 304 86 (cpuhw->info.act_ctl & mtdiag_ctl))) 305 87 err = -EOPNOTSUPP; 306 88 break; 89 + case CPUMF_CTR_SET_MAX: 90 + err = -EOPNOTSUPP; 307 91 } 308 92 309 93 put_cpu_var(cpu_cf_events); ··· 315 95 static int validate_ctr_auth(const struct hw_perf_event *hwc) 316 96 { 317 97 struct cpu_cf_events *cpuhw; 318 - u64 ctrs_state; 319 98 int err = 0; 320 99 321 100 cpuhw = &get_cpu_var(cpu_cf_events); ··· 324 105 * return with -ENOENT in order to fall back to other 325 106 * PMUs that might suffice the event request. 326 107 */ 327 - ctrs_state = cpumf_ctr_ctl[hwc->config_base]; 328 - if (!(ctrs_state & cpuhw->info.auth_ctl)) 108 + if (!(hwc->config_base & cpuhw->info.auth_ctl)) 329 109 err = -ENOENT; 330 110 331 111 put_cpu_var(cpu_cf_events); ··· 344 126 if (cpuhw->flags & PMU_F_ENABLED) 345 127 return; 346 128 347 - err = lcctl(cpuhw->state); 129 + err = lcctl(cpuhw->state | cpuhw->dev_state); 348 130 if (err) { 349 131 pr_err("Enabling the performance measuring unit " 350 132 "failed with rc=%x\n", err); ··· 369 151 return; 370 152 371 153 inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 154 + inactive |= cpuhw->dev_state; 372 155 err = lcctl(inactive); 373 156 if (err) { 374 157 pr_err("Disabling the performance measuring unit " ··· 417 198 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 418 199 [PERF_COUNT_HW_BUS_CYCLES] = -1, 419 200 }; 201 + 202 + static void cpumf_hw_inuse(void) 203 + { 204 + mutex_lock(&pmc_reserve_mutex); 205 + if (atomic_inc_return(&num_events) == 1) 206 + __kernel_cpumcf_begin(); 207 + mutex_unlock(&pmc_reserve_mutex); 208 + } 420 209 421 210 static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 422 211 { ··· 485 258 /* 486 259 * Use the hardware perf event structure to store the 487 260 * counter number in the 'config' member and the counter 488 - * set number in the 'config_base'. The counter set number 489 - * is then later used to enable/disable the counter(s). 261 + * set number in the 'config_base' as bit mask. 262 + * It is later used to enable/disable the counter(s). 490 263 */ 491 264 hwc->config = ev; 492 - hwc->config_base = set; 265 + hwc->config_base = cpumf_ctr_ctl[set]; 493 266 break; 494 267 case CPUMF_CTR_SET_MAX: 495 268 /* The counter could not be associated to a counter set */ ··· 497 270 } 498 271 499 272 /* Initialize for using the CPU-measurement counter facility */ 500 - if (!atomic_inc_not_zero(&num_events)) { 501 - mutex_lock(&pmc_reserve_mutex); 502 - if (atomic_read(&num_events) == 0 && __kernel_cpumcf_begin()) 503 - err = -EBUSY; 504 - else 505 - atomic_inc(&num_events); 506 - mutex_unlock(&pmc_reserve_mutex); 507 - } 508 - if (err) 509 - return err; 273 + cpumf_hw_inuse(); 510 274 event->destroy = hw_perf_event_destroy; 511 275 512 276 /* Finally, validate version and authorization of the counter set */ 513 277 err = validate_ctr_auth(hwc); 514 278 if (!err) 515 - err = validate_ctr_version(hwc); 279 + err = validate_ctr_version(hwc, set); 516 280 517 281 return err; 518 282 } ··· 579 361 { 580 362 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 581 363 struct hw_perf_event *hwc = &event->hw; 364 + int i; 582 365 583 366 if (!(hwc->state & PERF_HES_STOPPED)) 584 367 return; ··· 595 376 * needs to be synchronized. At this point, the counter set can be in 596 377 * the inactive or disabled state. 597 378 */ 598 - hw_perf_event_reset(event); 379 + if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 380 + cpuhw->usedss = cfdiag_getctr(cpuhw->start, 381 + sizeof(cpuhw->start), 382 + hwc->config_base, true); 383 + } else { 384 + hw_perf_event_reset(event); 385 + } 599 386 600 - /* increment refcount for this counter set */ 601 - atomic_inc(&cpuhw->ctr_set[hwc->config_base]); 387 + /* Increment refcount for counter sets */ 388 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 389 + if ((hwc->config_base & cpumf_ctr_ctl[i])) 390 + atomic_inc(&cpuhw->ctr_set[i]); 391 + } 392 + 393 + /* Create perf event sample with the counter sets as raw data. The sample 394 + * is then pushed to the event subsystem and the function checks for 395 + * possible event overflows. If an event overflow occurs, the PMU is 396 + * stopped. 397 + * 398 + * Return non-zero if an event overflow occurred. 399 + */ 400 + static int cfdiag_push_sample(struct perf_event *event, 401 + struct cpu_cf_events *cpuhw) 402 + { 403 + struct perf_sample_data data; 404 + struct perf_raw_record raw; 405 + struct pt_regs regs; 406 + int overflow; 407 + 408 + /* Setup perf sample */ 409 + perf_sample_data_init(&data, 0, event->hw.last_period); 410 + memset(&regs, 0, sizeof(regs)); 411 + memset(&raw, 0, sizeof(raw)); 412 + 413 + if (event->attr.sample_type & PERF_SAMPLE_CPU) 414 + data.cpu_entry.cpu = event->cpu; 415 + if (event->attr.sample_type & PERF_SAMPLE_RAW) { 416 + raw.frag.size = cpuhw->usedss; 417 + raw.frag.data = cpuhw->stop; 418 + raw.size = raw.frag.size; 419 + data.raw = &raw; 420 + } 421 + 422 + overflow = perf_event_overflow(event, &data, &regs); 423 + debug_sprintf_event(cf_dbg, 3, 424 + "%s event %#llx sample_type %#llx raw %d ov %d\n", 425 + __func__, event->hw.config, 426 + event->attr.sample_type, raw.size, overflow); 427 + if (overflow) 428 + event->pmu->stop(event, 0); 429 + 430 + perf_event_update_userpage(event); 431 + return overflow; 602 432 } 603 433 604 434 static void cpumf_pmu_stop(struct perf_event *event, int flags) 605 435 { 606 436 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 607 437 struct hw_perf_event *hwc = &event->hw; 438 + int i; 608 439 609 440 if (!(hwc->state & PERF_HES_STOPPED)) { 610 441 /* Decrement reference count for this counter set and if this 611 442 * is the last used counter in the set, clear activation 612 443 * control and set the counter set state to inactive. 613 444 */ 614 - if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) 615 - ctr_set_stop(&cpuhw->state, hwc->config_base); 445 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 446 + if (!(hwc->config_base & cpumf_ctr_ctl[i])) 447 + continue; 448 + if (!atomic_dec_return(&cpuhw->ctr_set[i])) 449 + ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 450 + } 616 451 hwc->state |= PERF_HES_STOPPED; 617 452 } 618 453 619 454 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 620 - hw_perf_event_update(event); 455 + if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 456 + local64_inc(&event->count); 457 + cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 458 + sizeof(cpuhw->stop), 459 + event->hw.config_base, 460 + false); 461 + if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 462 + cfdiag_push_sample(event, cpuhw); 463 + } else 464 + hw_perf_event_update(event); 621 465 hwc->state |= PERF_HES_UPTODATE; 622 466 } 623 467 } ··· 701 419 static void cpumf_pmu_del(struct perf_event *event, int flags) 702 420 { 703 421 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 422 + int i; 704 423 705 424 cpumf_pmu_stop(event, PERF_EF_UPDATE); 706 425 ··· 713 430 * clear enable control and resets all counters in a set. Therefore, 714 431 * cpumf_pmu_start() always has to reenable a counter set. 715 432 */ 716 - if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) 717 - ctr_set_disable(&cpuhw->state, event->hw.config_base); 433 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 434 + if (!atomic_read(&cpuhw->ctr_set[i])) 435 + ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 718 436 } 719 437 720 438 /* Performance monitoring unit for s390x */ ··· 732 448 .read = cpumf_pmu_read, 733 449 }; 734 450 451 + static int cfset_init(void); 735 452 static int __init cpumf_pmu_init(void) 736 453 { 737 454 int rc; ··· 740 455 if (!kernel_cpumcf_avail()) 741 456 return -ENODEV; 742 457 458 + /* Setup s390dbf facility */ 459 + cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 460 + if (!cf_dbg) { 461 + pr_err("Registration of s390dbf(cpum_cf) failed\n"); 462 + return -ENOMEM; 463 + }; 464 + debug_register_view(cf_dbg, &debug_sprintf_view); 465 + 743 466 cpumf_pmu.attr_groups = cpumf_cf_event_group(); 744 467 rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 745 - if (rc) 468 + if (rc) { 469 + debug_unregister_view(cf_dbg, &debug_sprintf_view); 470 + debug_unregister(cf_dbg); 746 471 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 472 + } else if (stccm_avail()) { /* Setup counter set device */ 473 + cfset_init(); 474 + } 747 475 return rc; 748 476 } 749 - subsys_initcall(cpumf_pmu_init); 477 + 478 + /* Support for the CPU Measurement Facility counter set extraction using 479 + * device /dev/hwctr. This allows user space programs to extract complete 480 + * counter set via normal file operations. 481 + */ 482 + 483 + static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Excl. access */ 484 + static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ 485 + struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 486 + unsigned int sets; /* Counter set bit mask */ 487 + atomic_t cpus_ack; /* # CPUs successfully executed func */ 488 + }; 489 + 490 + static struct cfset_request { /* CPUs and counter set bit mask */ 491 + unsigned long ctrset; /* Bit mask of counter set to read */ 492 + cpumask_t mask; /* CPU mask to read from */ 493 + } cfset_request; 494 + 495 + static void cfset_ctrset_clear(void) 496 + { 497 + cpumask_clear(&cfset_request.mask); 498 + cfset_request.ctrset = 0; 499 + } 500 + 501 + /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 502 + * path is currently used. 503 + * The cpu_cf_events::dev_state is used to denote counter sets in use by this 504 + * interface. It is always or'ed in. If this interface is not active, its 505 + * value is zero and no additional counter sets will be included. 506 + * 507 + * The cpu_cf_events::state is used by the perf_event_open SVC and remains 508 + * unchanged. 509 + * 510 + * perf_pmu_enable() and perf_pmu_enable() and its call backs 511 + * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 512 + * performance measurement subsystem to enable per process 513 + * CPU Measurement counter facility. 514 + * The XXX_enable() and XXX_disable functions are used to turn off 515 + * x86 performance monitoring interrupt (PMI) during scheduling. 516 + * s390 uses these calls to temporarily stop and resume the active CPU 517 + * counters sets during scheduling. 518 + * 519 + * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 520 + * device access. The perf_event_open() SVC interface makes a lot of effort 521 + * to only run the counters while the calling process is actively scheduled 522 + * to run. 523 + * When /dev/hwctr interface is also used at the same time, the counter sets 524 + * will keep running, even when the process is scheduled off a CPU. 525 + * However this is not a problem and does not lead to wrong counter values 526 + * for the perf_event_open() SVC. The current counter value will be recorded 527 + * during schedule-in. At schedule-out time the current counter value is 528 + * extracted again and the delta is calculated and added to the event. 529 + */ 530 + /* Stop all counter sets via ioctl interface */ 531 + static void cfset_ioctl_off(void *parm) 532 + { 533 + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 534 + struct cfset_call_on_cpu_parm *p = parm; 535 + int rc; 536 + 537 + cpuhw->dev_state = 0; 538 + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 539 + if ((p->sets & cpumf_ctr_ctl[rc])) 540 + atomic_dec(&cpuhw->ctr_set[rc]); 541 + rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 542 + if (rc) 543 + pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 544 + cpuhw->state, S390_HWCTR_DEVICE, rc); 545 + cpuhw->flags &= ~PMU_F_IN_USE; 546 + debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 547 + __func__, rc, cpuhw->state, cpuhw->dev_state); 548 + } 549 + 550 + /* Start counter sets on particular CPU */ 551 + static void cfset_ioctl_on(void *parm) 552 + { 553 + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 554 + struct cfset_call_on_cpu_parm *p = parm; 555 + int rc; 556 + 557 + cpuhw->flags |= PMU_F_IN_USE; 558 + ctr_set_enable(&cpuhw->dev_state, p->sets); 559 + ctr_set_start(&cpuhw->dev_state, p->sets); 560 + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 561 + if ((p->sets & cpumf_ctr_ctl[rc])) 562 + atomic_inc(&cpuhw->ctr_set[rc]); 563 + rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 564 + if (!rc) 565 + atomic_inc(&p->cpus_ack); 566 + else 567 + pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 568 + cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 569 + debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 570 + __func__, rc, cpuhw->state, cpuhw->dev_state); 571 + } 572 + 573 + static void cfset_release_cpu(void *p) 574 + { 575 + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 576 + int rc; 577 + 578 + debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", 579 + __func__, cpuhw->state, cpuhw->dev_state); 580 + rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 581 + if (rc) 582 + pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 583 + cpuhw->state, S390_HWCTR_DEVICE, rc); 584 + cpuhw->dev_state = 0; 585 + } 586 + 587 + /* Release function is also called when application gets terminated without 588 + * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 589 + */ 590 + static int cfset_release(struct inode *inode, struct file *file) 591 + { 592 + on_each_cpu(cfset_release_cpu, NULL, 1); 593 + hw_perf_event_destroy(NULL); 594 + cfset_ctrset_clear(); 595 + atomic_set(&cfset_opencnt, 0); 596 + return 0; 597 + } 598 + 599 + static int cfset_open(struct inode *inode, struct file *file) 600 + { 601 + if (!capable(CAP_SYS_ADMIN)) 602 + return -EPERM; 603 + /* Only one user space program can open /dev/hwctr */ 604 + if (atomic_xchg(&cfset_opencnt, 1)) 605 + return -EBUSY; 606 + 607 + cpumf_hw_inuse(); 608 + file->private_data = NULL; 609 + /* nonseekable_open() never fails */ 610 + return nonseekable_open(inode, file); 611 + } 612 + 613 + static int cfset_all_stop(void) 614 + { 615 + struct cfset_call_on_cpu_parm p = { 616 + .sets = cfset_request.ctrset, 617 + }; 618 + cpumask_var_t mask; 619 + 620 + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 621 + return -ENOMEM; 622 + cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 623 + on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 624 + free_cpumask_var(mask); 625 + return 0; 626 + } 627 + 628 + static int cfset_all_start(void) 629 + { 630 + struct cfset_call_on_cpu_parm p = { 631 + .sets = cfset_request.ctrset, 632 + .cpus_ack = ATOMIC_INIT(0), 633 + }; 634 + cpumask_var_t mask; 635 + int rc = 0; 636 + 637 + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 638 + return -ENOMEM; 639 + cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 640 + on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 641 + if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 642 + on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 643 + rc = -EIO; 644 + debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); 645 + } 646 + free_cpumask_var(mask); 647 + return rc; 648 + } 649 + 650 + 651 + /* Return the maximum required space for all possible CPUs in case one 652 + * CPU will be onlined during the START, READ, STOP cycles. 653 + * To find out the size of the counter sets, any one CPU will do. They 654 + * all have the same counter sets. 655 + */ 656 + static size_t cfset_needspace(unsigned int sets) 657 + { 658 + struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); 659 + size_t bytes = 0; 660 + int i; 661 + 662 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 663 + if (!(sets & cpumf_ctr_ctl[i])) 664 + continue; 665 + bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + 666 + sizeof(((struct s390_ctrset_setdata *)0)->set) + 667 + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 668 + } 669 + bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 670 + (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 671 + sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 672 + put_cpu_ptr(&cpu_cf_events); 673 + return bytes; 674 + } 675 + 676 + static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 677 + { 678 + struct s390_ctrset_read __user *ctrset_read; 679 + unsigned int cpu, cpus, rc; 680 + void __user *uptr; 681 + 682 + ctrset_read = (struct s390_ctrset_read __user *)arg; 683 + uptr = ctrset_read->data; 684 + for_each_cpu(cpu, mask) { 685 + struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); 686 + struct s390_ctrset_cpudata __user *ctrset_cpudata; 687 + 688 + ctrset_cpudata = uptr; 689 + rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 690 + rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 691 + rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 692 + cpuhw->used); 693 + if (rc) 694 + return -EFAULT; 695 + uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 696 + cond_resched(); 697 + } 698 + cpus = cpumask_weight(mask); 699 + if (put_user(cpus, &ctrset_read->no_cpus)) 700 + return -EFAULT; 701 + debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__, 702 + uptr - (void __user *)ctrset_read->data); 703 + return 0; 704 + } 705 + 706 + static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 707 + int ctrset_size, size_t room) 708 + { 709 + size_t need = 0; 710 + int rc = -1; 711 + 712 + need = sizeof(*p) + sizeof(u64) * ctrset_size; 713 + if (need <= room) { 714 + p->set = cpumf_ctr_ctl[ctrset]; 715 + p->no_cnts = ctrset_size; 716 + rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 717 + if (rc == 3) /* Nothing stored */ 718 + need = 0; 719 + } 720 + return need; 721 + } 722 + 723 + /* Read all counter sets. */ 724 + static void cfset_cpu_read(void *parm) 725 + { 726 + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 727 + struct cfset_call_on_cpu_parm *p = parm; 728 + int set, set_size; 729 + size_t space; 730 + 731 + /* No data saved yet */ 732 + cpuhw->used = 0; 733 + cpuhw->sets = 0; 734 + memset(cpuhw->data, 0, sizeof(cpuhw->data)); 735 + 736 + /* Scan the counter sets */ 737 + for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 738 + struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 739 + cpuhw->used; 740 + 741 + if (!(p->sets & cpumf_ctr_ctl[set])) 742 + continue; /* Counter set not in list */ 743 + set_size = cpum_cf_ctrset_size(set, &cpuhw->info); 744 + space = sizeof(cpuhw->data) - cpuhw->used; 745 + space = cfset_cpuset_read(sp, set, set_size, space); 746 + if (space) { 747 + cpuhw->used += space; 748 + cpuhw->sets += 1; 749 + } 750 + } 751 + debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, 752 + cpuhw->sets, cpuhw->used); 753 + } 754 + 755 + static int cfset_all_read(unsigned long arg) 756 + { 757 + struct cfset_call_on_cpu_parm p; 758 + cpumask_var_t mask; 759 + int rc; 760 + 761 + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 762 + return -ENOMEM; 763 + 764 + p.sets = cfset_request.ctrset; 765 + cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 766 + on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 767 + rc = cfset_all_copy(arg, mask); 768 + free_cpumask_var(mask); 769 + return rc; 770 + } 771 + 772 + static long cfset_ioctl_read(unsigned long arg) 773 + { 774 + struct s390_ctrset_read read; 775 + int ret = 0; 776 + 777 + if (copy_from_user(&read, (char __user *)arg, sizeof(read))) 778 + return -EFAULT; 779 + ret = cfset_all_read(arg); 780 + return ret; 781 + } 782 + 783 + static long cfset_ioctl_stop(void) 784 + { 785 + int ret = ENXIO; 786 + 787 + if (cfset_request.ctrset) { 788 + ret = cfset_all_stop(); 789 + cfset_ctrset_clear(); 790 + } 791 + return ret; 792 + } 793 + 794 + static long cfset_ioctl_start(unsigned long arg) 795 + { 796 + struct s390_ctrset_start __user *ustart; 797 + struct s390_ctrset_start start; 798 + void __user *umask; 799 + unsigned int len; 800 + int ret = 0; 801 + size_t need; 802 + 803 + if (cfset_request.ctrset) 804 + return -EBUSY; 805 + ustart = (struct s390_ctrset_start __user *)arg; 806 + if (copy_from_user(&start, ustart, sizeof(start))) 807 + return -EFAULT; 808 + if (start.version != S390_HWCTR_START_VERSION) 809 + return -EINVAL; 810 + if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 811 + cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 812 + cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 813 + cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 814 + cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 815 + return -EINVAL; /* Invalid counter set */ 816 + if (!start.counter_sets) 817 + return -EINVAL; /* No counter set at all? */ 818 + cpumask_clear(&cfset_request.mask); 819 + len = min_t(u64, start.cpumask_len, cpumask_size()); 820 + umask = (void __user *)start.cpumask; 821 + if (copy_from_user(&cfset_request.mask, umask, len)) 822 + return -EFAULT; 823 + if (cpumask_empty(&cfset_request.mask)) 824 + return -EINVAL; 825 + need = cfset_needspace(start.counter_sets); 826 + if (put_user(need, &ustart->data_bytes)) 827 + ret = -EFAULT; 828 + if (ret) 829 + goto out; 830 + cfset_request.ctrset = start.counter_sets; 831 + ret = cfset_all_start(); 832 + out: 833 + if (ret) 834 + cfset_ctrset_clear(); 835 + debug_sprintf_event(cf_dbg, 4, "%s sets %#lx need %ld ret %d\n", 836 + __func__, cfset_request.ctrset, need, ret); 837 + return ret; 838 + } 839 + 840 + /* Entry point to the /dev/hwctr device interface. 841 + * The ioctl system call supports three subcommands: 842 + * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 843 + * counter set keeps running until explicitly stopped. Returns the number 844 + * of bytes needed to store the counter values. If another S390_HWCTR_START 845 + * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 846 + * command, -EBUSY is returned. 847 + * S390_HWCTR_READ: Read the counter set values from specified CPU list given 848 + * with the S390_HWCTR_START command. 849 + * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 850 + * previous S390_HWCTR_START subcommand. 851 + */ 852 + static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 853 + { 854 + int ret; 855 + 856 + get_online_cpus(); 857 + mutex_lock(&cfset_ctrset_mutex); 858 + switch (cmd) { 859 + case S390_HWCTR_START: 860 + ret = cfset_ioctl_start(arg); 861 + break; 862 + case S390_HWCTR_STOP: 863 + ret = cfset_ioctl_stop(); 864 + break; 865 + case S390_HWCTR_READ: 866 + ret = cfset_ioctl_read(arg); 867 + break; 868 + default: 869 + ret = -ENOTTY; 870 + break; 871 + } 872 + mutex_unlock(&cfset_ctrset_mutex); 873 + put_online_cpus(); 874 + return ret; 875 + } 876 + 877 + static const struct file_operations cfset_fops = { 878 + .owner = THIS_MODULE, 879 + .open = cfset_open, 880 + .release = cfset_release, 881 + .unlocked_ioctl = cfset_ioctl, 882 + .compat_ioctl = cfset_ioctl, 883 + .llseek = no_llseek 884 + }; 885 + 886 + static struct miscdevice cfset_dev = { 887 + .name = S390_HWCTR_DEVICE, 888 + .minor = MISC_DYNAMIC_MINOR, 889 + .fops = &cfset_fops, 890 + }; 891 + 892 + int cfset_online_cpu(unsigned int cpu) 893 + { 894 + struct cfset_call_on_cpu_parm p; 895 + 896 + mutex_lock(&cfset_ctrset_mutex); 897 + if (cfset_request.ctrset) { 898 + p.sets = cfset_request.ctrset; 899 + cfset_ioctl_on(&p); 900 + cpumask_set_cpu(cpu, &cfset_request.mask); 901 + } 902 + mutex_unlock(&cfset_ctrset_mutex); 903 + return 0; 904 + } 905 + 906 + int cfset_offline_cpu(unsigned int cpu) 907 + { 908 + struct cfset_call_on_cpu_parm p; 909 + 910 + mutex_lock(&cfset_ctrset_mutex); 911 + if (cfset_request.ctrset) { 912 + p.sets = cfset_request.ctrset; 913 + cfset_ioctl_off(&p); 914 + cpumask_clear_cpu(cpu, &cfset_request.mask); 915 + } 916 + mutex_unlock(&cfset_ctrset_mutex); 917 + return 0; 918 + } 919 + 920 + static void cfdiag_read(struct perf_event *event) 921 + { 922 + debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, 923 + event->attr.config, local64_read(&event->count)); 924 + } 925 + 926 + static int get_authctrsets(void) 927 + { 928 + struct cpu_cf_events *cpuhw; 929 + unsigned long auth = 0; 930 + enum cpumf_ctr_set i; 931 + 932 + cpuhw = &get_cpu_var(cpu_cf_events); 933 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 934 + if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) 935 + auth |= cpumf_ctr_ctl[i]; 936 + } 937 + put_cpu_var(cpu_cf_events); 938 + return auth; 939 + } 940 + 941 + /* Setup the event. Test for authorized counter sets and only include counter 942 + * sets which are authorized at the time of the setup. Including unauthorized 943 + * counter sets result in specification exception (and panic). 944 + */ 945 + static int cfdiag_event_init2(struct perf_event *event) 946 + { 947 + struct perf_event_attr *attr = &event->attr; 948 + int err = 0; 949 + 950 + /* Set sample_period to indicate sampling */ 951 + event->hw.config = attr->config; 952 + event->hw.sample_period = attr->sample_period; 953 + local64_set(&event->hw.period_left, event->hw.sample_period); 954 + local64_set(&event->count, 0); 955 + event->hw.last_period = event->hw.sample_period; 956 + 957 + /* Add all authorized counter sets to config_base. The 958 + * the hardware init function is either called per-cpu or just once 959 + * for all CPUS (event->cpu == -1). This depends on the whether 960 + * counting is started for all CPUs or on a per workload base where 961 + * the perf event moves from one CPU to another CPU. 962 + * Checking the authorization on any CPU is fine as the hardware 963 + * applies the same authorization settings to all CPUs. 964 + */ 965 + event->hw.config_base = get_authctrsets(); 966 + 967 + /* No authorized counter sets, nothing to count/sample */ 968 + if (!event->hw.config_base) 969 + err = -EINVAL; 970 + 971 + debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", 972 + __func__, err, event->hw.config_base); 973 + return err; 974 + } 975 + 976 + static int cfdiag_event_init(struct perf_event *event) 977 + { 978 + struct perf_event_attr *attr = &event->attr; 979 + int err = -ENOENT; 980 + 981 + if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 982 + event->attr.type != event->pmu->type) 983 + goto out; 984 + 985 + /* Raw events are used to access counters directly, 986 + * hence do not permit excludes. 987 + * This event is useless without PERF_SAMPLE_RAW to return counter set 988 + * values as raw data. 989 + */ 990 + if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 991 + !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 992 + err = -EOPNOTSUPP; 993 + goto out; 994 + } 995 + 996 + /* Initialize for using the CPU-measurement counter facility */ 997 + cpumf_hw_inuse(); 998 + event->destroy = hw_perf_event_destroy; 999 + 1000 + err = cfdiag_event_init2(event); 1001 + if (unlikely(err)) 1002 + event->destroy(event); 1003 + out: 1004 + return err; 1005 + } 1006 + 1007 + /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1008 + * to collect the complete counter sets for a scheduled process. Target 1009 + * are complete counter sets attached as raw data to the artificial event. 1010 + * This results in complete counter sets available when a process is 1011 + * scheduled. Contains the delta of every counter while the process was 1012 + * running. 1013 + */ 1014 + CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1015 + 1016 + static struct attribute *cfdiag_events_attr[] = { 1017 + CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1018 + NULL, 1019 + }; 1020 + 1021 + PMU_FORMAT_ATTR(event, "config:0-63"); 1022 + 1023 + static struct attribute *cfdiag_format_attr[] = { 1024 + &format_attr_event.attr, 1025 + NULL, 1026 + }; 1027 + 1028 + static struct attribute_group cfdiag_events_group = { 1029 + .name = "events", 1030 + .attrs = cfdiag_events_attr, 1031 + }; 1032 + static struct attribute_group cfdiag_format_group = { 1033 + .name = "format", 1034 + .attrs = cfdiag_format_attr, 1035 + }; 1036 + static const struct attribute_group *cfdiag_attr_groups[] = { 1037 + &cfdiag_events_group, 1038 + &cfdiag_format_group, 1039 + NULL, 1040 + }; 1041 + 1042 + /* Performance monitoring unit for event CF_DIAG. Since this event 1043 + * is also started and stopped via the perf_event_open() system call, use 1044 + * the same event enable/disable call back functions. They do not 1045 + * have a pointer to the perf_event strcture as first parameter. 1046 + * 1047 + * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1048 + * Reuse them and distinguish the event (always first parameter) via 1049 + * 'config' member. 1050 + */ 1051 + static struct pmu cf_diag = { 1052 + .task_ctx_nr = perf_sw_context, 1053 + .event_init = cfdiag_event_init, 1054 + .pmu_enable = cpumf_pmu_enable, 1055 + .pmu_disable = cpumf_pmu_disable, 1056 + .add = cpumf_pmu_add, 1057 + .del = cpumf_pmu_del, 1058 + .start = cpumf_pmu_start, 1059 + .stop = cpumf_pmu_stop, 1060 + .read = cfdiag_read, 1061 + 1062 + .attr_groups = cfdiag_attr_groups 1063 + }; 1064 + 1065 + /* Calculate memory needed to store all counter sets together with header and 1066 + * trailer data. This is independent of the counter set authorization which 1067 + * can vary depending on the configuration. 1068 + */ 1069 + static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1070 + { 1071 + size_t max_size = sizeof(struct cf_trailer_entry); 1072 + enum cpumf_ctr_set i; 1073 + 1074 + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1075 + size_t size = cpum_cf_ctrset_size(i, info); 1076 + 1077 + if (size) 1078 + max_size += size * sizeof(u64) + 1079 + sizeof(struct cf_ctrset_entry); 1080 + } 1081 + return max_size; 1082 + } 1083 + 1084 + /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1085 + static void cfdiag_get_cpu_speed(void) 1086 + { 1087 + if (cpum_sf_avail()) { /* Sampling facility first */ 1088 + struct hws_qsi_info_block si; 1089 + 1090 + memset(&si, 0, sizeof(si)); 1091 + if (!qsi(&si)) { 1092 + cfdiag_cpu_speed = si.cpu_speed; 1093 + return; 1094 + } 1095 + } 1096 + 1097 + /* Fallback: CPU speed extract static part. Used in case 1098 + * CPU Measurement Sampling Facility is turned off. 1099 + */ 1100 + if (test_facility(34)) { 1101 + unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1102 + 1103 + if (mhz != -1UL) 1104 + cfdiag_cpu_speed = mhz & 0xffffffff; 1105 + } 1106 + } 1107 + 1108 + static int cfset_init(void) 1109 + { 1110 + struct cpumf_ctr_info info; 1111 + size_t need; 1112 + int rc; 1113 + 1114 + if (qctri(&info)) 1115 + return -ENODEV; 1116 + 1117 + cfdiag_get_cpu_speed(); 1118 + /* Make sure the counter set data fits into predefined buffer. */ 1119 + need = cfdiag_maxsize(&info); 1120 + if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1121 + pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1122 + need); 1123 + return -ENOMEM; 1124 + } 1125 + 1126 + rc = misc_register(&cfset_dev); 1127 + if (rc) { 1128 + pr_err("Registration of /dev/%s failed rc=%i\n", 1129 + cfset_dev.name, rc); 1130 + goto out; 1131 + } 1132 + 1133 + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1134 + if (rc) { 1135 + misc_deregister(&cfset_dev); 1136 + pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1137 + rc); 1138 + } 1139 + out: 1140 + return rc; 1141 + } 1142 + 1143 + device_initcall(cpumf_pmu_init);
+7 -20
arch/s390/kernel/perf_cpum_cf_common.c
··· 29 29 }, 30 30 .alert = ATOMIC64_INIT(0), 31 31 .state = 0, 32 + .dev_state = 0, 32 33 .flags = 0, 34 + .used = 0, 35 + .usedss = 0, 36 + .sets = 0 33 37 }; 34 38 /* Indicator whether the CPU-Measurement Counter Facility Support is ready */ 35 39 static bool cpum_cf_initalized; ··· 100 96 } 101 97 EXPORT_SYMBOL(kernel_cpumcf_avail); 102 98 103 - 104 - /* Reserve/release functions for sharing perf hardware */ 105 - static DEFINE_SPINLOCK(cpumcf_owner_lock); 106 - static void *cpumcf_owner; 107 - 108 99 /* Initialize the CPU-measurement counter facility */ 109 100 int __kernel_cpumcf_begin(void) 110 101 { 111 102 int flags = PMC_INIT; 112 - int err = 0; 113 - 114 - spin_lock(&cpumcf_owner_lock); 115 - if (cpumcf_owner) 116 - err = -EBUSY; 117 - else 118 - cpumcf_owner = __builtin_return_address(0); 119 - spin_unlock(&cpumcf_owner_lock); 120 - if (err) 121 - return err; 122 103 123 104 on_each_cpu(cpum_cf_setup_cpu, &flags, 1); 124 105 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); ··· 133 144 134 145 on_each_cpu(cpum_cf_setup_cpu, &flags, 1); 135 146 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); 136 - 137 - spin_lock(&cpumcf_owner_lock); 138 - cpumcf_owner = NULL; 139 - spin_unlock(&cpumcf_owner_lock); 140 147 } 141 148 EXPORT_SYMBOL(__kernel_cpumcf_end); 142 149 ··· 146 161 147 162 static int cpum_cf_online_cpu(unsigned int cpu) 148 163 { 149 - return cpum_cf_setup(cpu, PMC_INIT); 164 + cpum_cf_setup(cpu, PMC_INIT); 165 + return cfset_online_cpu(cpu); 150 166 } 151 167 152 168 static int cpum_cf_offline_cpu(unsigned int cpu) 153 169 { 170 + cfset_offline_cpu(cpu); 154 171 return cpum_cf_setup(cpu, PMC_RELEASE); 155 172 } 156 173
-1148
arch/s390/kernel/perf_cpum_cf_diag.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Performance event support for s390x - CPU-measurement Counter Sets 4 - * 5 - * Copyright IBM Corp. 2019, 2021 6 - * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7 - * Thomas Richer <tmricht@linux.ibm.com> 8 - */ 9 - #define KMSG_COMPONENT "cpum_cf_diag" 10 - #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 - 12 - #include <linux/kernel.h> 13 - #include <linux/kernel_stat.h> 14 - #include <linux/percpu.h> 15 - #include <linux/notifier.h> 16 - #include <linux/init.h> 17 - #include <linux/export.h> 18 - #include <linux/slab.h> 19 - #include <linux/processor.h> 20 - #include <linux/miscdevice.h> 21 - #include <linux/mutex.h> 22 - 23 - #include <asm/ctl_reg.h> 24 - #include <asm/irq.h> 25 - #include <asm/cpu_mcf.h> 26 - #include <asm/timex.h> 27 - #include <asm/debug.h> 28 - 29 - #include <asm/hwctrset.h> 30 - 31 - #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 32 - /* interval in seconds */ 33 - static unsigned int cf_diag_cpu_speed; 34 - static debug_info_t *cf_diag_dbg; 35 - 36 - struct cf_diag_csd { /* Counter set data per CPU */ 37 - size_t used; /* Bytes used in data/start */ 38 - unsigned char start[PAGE_SIZE]; /* Counter set at event start */ 39 - unsigned char data[PAGE_SIZE]; /* Counter set at event delete */ 40 - unsigned int sets; /* # Counter set saved in data */ 41 - }; 42 - static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); 43 - 44 - /* Counter sets are stored as data stream in a page sized memory buffer and 45 - * exported to user space via raw data attached to the event sample data. 46 - * Each counter set starts with an eight byte header consisting of: 47 - * - a two byte eye catcher (0xfeef) 48 - * - a one byte counter set number 49 - * - a two byte counter set size (indicates the number of counters in this set) 50 - * - a three byte reserved value (must be zero) to make the header the same 51 - * size as a counter value. 52 - * All counter values are eight byte in size. 53 - * 54 - * All counter sets are followed by a 64 byte trailer. 55 - * The trailer consists of a: 56 - * - flag field indicating valid fields when corresponding bit set 57 - * - the counter facility first and second version number 58 - * - the CPU speed if nonzero 59 - * - the time stamp the counter sets have been collected 60 - * - the time of day (TOD) base value 61 - * - the machine type. 62 - * 63 - * The counter sets are saved when the process is prepared to be executed on a 64 - * CPU and saved again when the process is going to be removed from a CPU. 65 - * The difference of both counter sets are calculated and stored in the event 66 - * sample data area. 67 - */ 68 - 69 - struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 70 - unsigned int def:16; /* 0-15 Data Entry Format */ 71 - unsigned int set:16; /* 16-31 Counter set identifier */ 72 - unsigned int ctr:16; /* 32-47 Number of stored counters */ 73 - unsigned int res1:16; /* 48-63 Reserved */ 74 - }; 75 - 76 - struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 77 - /* 0 - 7 */ 78 - union { 79 - struct { 80 - unsigned int clock_base:1; /* TOD clock base set */ 81 - unsigned int speed:1; /* CPU speed set */ 82 - /* Measurement alerts */ 83 - unsigned int mtda:1; /* Loss of MT ctr. data alert */ 84 - unsigned int caca:1; /* Counter auth. change alert */ 85 - unsigned int lcda:1; /* Loss of counter data alert */ 86 - }; 87 - unsigned long flags; /* 0-63 All indicators */ 88 - }; 89 - /* 8 - 15 */ 90 - unsigned int cfvn:16; /* 64-79 Ctr First Version */ 91 - unsigned int csvn:16; /* 80-95 Ctr Second Version */ 92 - unsigned int cpu_speed:32; /* 96-127 CPU speed */ 93 - /* 16 - 23 */ 94 - unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 95 - /* 24 - 55 */ 96 - union { 97 - struct { 98 - unsigned long progusage1; 99 - unsigned long progusage2; 100 - unsigned long progusage3; 101 - unsigned long tod_base; 102 - }; 103 - unsigned long progusage[4]; 104 - }; 105 - /* 56 - 63 */ 106 - unsigned int mach_type:16; /* Machine type */ 107 - unsigned int res1:16; /* Reserved */ 108 - unsigned int res2:32; /* Reserved */ 109 - }; 110 - 111 - /* Create the trailer data at the end of a page. */ 112 - static void cf_diag_trailer(struct cf_trailer_entry *te) 113 - { 114 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 115 - struct cpuid cpuid; 116 - 117 - te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ 118 - te->csvn = cpuhw->info.csvn; 119 - 120 - get_cpu_id(&cpuid); /* Machine type */ 121 - te->mach_type = cpuid.machine; 122 - te->cpu_speed = cf_diag_cpu_speed; 123 - if (te->cpu_speed) 124 - te->speed = 1; 125 - te->clock_base = 1; /* Save clock base */ 126 - te->tod_base = tod_clock_base.tod; 127 - te->timestamp = get_tod_clock_fast(); 128 - } 129 - 130 - /* 131 - * Change the CPUMF state to active. 132 - * Enable and activate the CPU-counter sets according 133 - * to the per-cpu control state. 134 - */ 135 - static void cf_diag_enable(struct pmu *pmu) 136 - { 137 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 138 - int err; 139 - 140 - debug_sprintf_event(cf_diag_dbg, 5, 141 - "%s pmu %p cpu %d flags %#x state %#llx\n", 142 - __func__, pmu, smp_processor_id(), cpuhw->flags, 143 - cpuhw->state); 144 - if (cpuhw->flags & PMU_F_ENABLED) 145 - return; 146 - 147 - err = lcctl(cpuhw->state); 148 - if (err) { 149 - pr_err("Enabling the performance measuring unit " 150 - "failed with rc=%x\n", err); 151 - return; 152 - } 153 - cpuhw->flags |= PMU_F_ENABLED; 154 - } 155 - 156 - /* 157 - * Change the CPUMF state to inactive. 158 - * Disable and enable (inactive) the CPU-counter sets according 159 - * to the per-cpu control state. 160 - */ 161 - static void cf_diag_disable(struct pmu *pmu) 162 - { 163 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 164 - u64 inactive; 165 - int err; 166 - 167 - debug_sprintf_event(cf_diag_dbg, 5, 168 - "%s pmu %p cpu %d flags %#x state %#llx\n", 169 - __func__, pmu, smp_processor_id(), cpuhw->flags, 170 - cpuhw->state); 171 - if (!(cpuhw->flags & PMU_F_ENABLED)) 172 - return; 173 - 174 - inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 175 - err = lcctl(inactive); 176 - if (err) { 177 - pr_err("Disabling the performance measuring unit " 178 - "failed with rc=%x\n", err); 179 - return; 180 - } 181 - cpuhw->flags &= ~PMU_F_ENABLED; 182 - } 183 - 184 - /* Number of perf events counting hardware events */ 185 - static atomic_t cf_diag_events = ATOMIC_INIT(0); 186 - /* Used to avoid races in calling reserve/release_cpumf_hardware */ 187 - static DEFINE_MUTEX(cf_diag_reserve_mutex); 188 - 189 - /* Release the PMU if event is the last perf event */ 190 - static void cf_diag_perf_event_destroy(struct perf_event *event) 191 - { 192 - debug_sprintf_event(cf_diag_dbg, 5, 193 - "%s event %p cpu %d cf_diag_events %d\n", 194 - __func__, event, smp_processor_id(), 195 - atomic_read(&cf_diag_events)); 196 - if (atomic_dec_return(&cf_diag_events) == 0) 197 - __kernel_cpumcf_end(); 198 - } 199 - 200 - static int get_authctrsets(void) 201 - { 202 - struct cpu_cf_events *cpuhw; 203 - unsigned long auth = 0; 204 - enum cpumf_ctr_set i; 205 - 206 - cpuhw = &get_cpu_var(cpu_cf_events); 207 - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 208 - if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) 209 - auth |= cpumf_ctr_ctl[i]; 210 - } 211 - put_cpu_var(cpu_cf_events); 212 - return auth; 213 - } 214 - 215 - /* Setup the event. Test for authorized counter sets and only include counter 216 - * sets which are authorized at the time of the setup. Including unauthorized 217 - * counter sets result in specification exception (and panic). 218 - */ 219 - static int __hw_perf_event_init(struct perf_event *event) 220 - { 221 - struct perf_event_attr *attr = &event->attr; 222 - int err = 0; 223 - 224 - debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__, 225 - event, event->cpu); 226 - 227 - event->hw.config = attr->config; 228 - 229 - /* Add all authorized counter sets to config_base. The 230 - * the hardware init function is either called per-cpu or just once 231 - * for all CPUS (event->cpu == -1). This depends on the whether 232 - * counting is started for all CPUs or on a per workload base where 233 - * the perf event moves from one CPU to another CPU. 234 - * Checking the authorization on any CPU is fine as the hardware 235 - * applies the same authorization settings to all CPUs. 236 - */ 237 - event->hw.config_base = get_authctrsets(); 238 - 239 - /* No authorized counter sets, nothing to count/sample */ 240 - if (!event->hw.config_base) { 241 - err = -EINVAL; 242 - goto out; 243 - } 244 - 245 - /* Set sample_period to indicate sampling */ 246 - event->hw.sample_period = attr->sample_period; 247 - local64_set(&event->hw.period_left, event->hw.sample_period); 248 - event->hw.last_period = event->hw.sample_period; 249 - out: 250 - debug_sprintf_event(cf_diag_dbg, 5, "%s err %d config_base %#lx\n", 251 - __func__, err, event->hw.config_base); 252 - return err; 253 - } 254 - 255 - /* Return 0 if the CPU-measurement counter facility is currently free 256 - * and an error otherwise. 257 - */ 258 - static int cf_diag_perf_event_inuse(void) 259 - { 260 - int err = 0; 261 - 262 - if (!atomic_inc_not_zero(&cf_diag_events)) { 263 - mutex_lock(&cf_diag_reserve_mutex); 264 - if (atomic_read(&cf_diag_events) == 0 && 265 - __kernel_cpumcf_begin()) 266 - err = -EBUSY; 267 - else 268 - err = atomic_inc_return(&cf_diag_events); 269 - mutex_unlock(&cf_diag_reserve_mutex); 270 - } 271 - return err; 272 - } 273 - 274 - static int cf_diag_event_init(struct perf_event *event) 275 - { 276 - struct perf_event_attr *attr = &event->attr; 277 - int err = -ENOENT; 278 - 279 - debug_sprintf_event(cf_diag_dbg, 5, 280 - "%s event %p cpu %d config %#llx type:%u " 281 - "sample_type %#llx cf_diag_events %d\n", __func__, 282 - event, event->cpu, attr->config, event->pmu->type, 283 - attr->sample_type, atomic_read(&cf_diag_events)); 284 - 285 - if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 286 - event->attr.type != event->pmu->type) 287 - goto out; 288 - 289 - /* Raw events are used to access counters directly, 290 - * hence do not permit excludes. 291 - * This event is usesless without PERF_SAMPLE_RAW to return counter set 292 - * values as raw data. 293 - */ 294 - if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 295 - !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 296 - err = -EOPNOTSUPP; 297 - goto out; 298 - } 299 - 300 - /* Initialize for using the CPU-measurement counter facility */ 301 - err = cf_diag_perf_event_inuse(); 302 - if (err < 0) 303 - goto out; 304 - event->destroy = cf_diag_perf_event_destroy; 305 - 306 - err = __hw_perf_event_init(event); 307 - if (unlikely(err)) 308 - event->destroy(event); 309 - out: 310 - debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err); 311 - return err; 312 - } 313 - 314 - static void cf_diag_read(struct perf_event *event) 315 - { 316 - debug_sprintf_event(cf_diag_dbg, 5, "%s event %p\n", __func__, event); 317 - } 318 - 319 - /* Calculate memory needed to store all counter sets together with header and 320 - * trailer data. This is independend of the counter set authorization which 321 - * can vary depending on the configuration. 322 - */ 323 - static size_t cf_diag_ctrset_maxsize(struct cpumf_ctr_info *info) 324 - { 325 - size_t max_size = sizeof(struct cf_trailer_entry); 326 - enum cpumf_ctr_set i; 327 - 328 - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 329 - size_t size = cpum_cf_ctrset_size(i, info); 330 - 331 - if (size) 332 - max_size += size * sizeof(u64) + 333 - sizeof(struct cf_ctrset_entry); 334 - } 335 - debug_sprintf_event(cf_diag_dbg, 5, "%s max_size %zu\n", __func__, 336 - max_size); 337 - 338 - return max_size; 339 - } 340 - 341 - /* Read a counter set. The counter set number determines which counter set and 342 - * the CPUM-CF first and second version number determine the number of 343 - * available counters in this counter set. 344 - * Each counter set starts with header containing the counter set number and 345 - * the number of 8 byte counters. 346 - * 347 - * The functions returns the number of bytes occupied by this counter set 348 - * including the header. 349 - * If there is no counter in the counter set, this counter set is useless and 350 - * zero is returned on this case. 351 - */ 352 - static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 353 - size_t room) 354 - { 355 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 356 - size_t ctrset_size, need = 0; 357 - int rc = 3; /* Assume write failure */ 358 - 359 - ctrdata->def = CF_DIAG_CTRSET_DEF; 360 - ctrdata->set = ctrset; 361 - ctrdata->res1 = 0; 362 - ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); 363 - 364 - if (ctrset_size) { /* Save data */ 365 - need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 366 - if (need <= room) 367 - rc = ctr_stcctm(ctrset, ctrset_size, 368 - (u64 *)(ctrdata + 1)); 369 - if (rc != 3) 370 - ctrdata->ctr = ctrset_size; 371 - else 372 - need = 0; 373 - } 374 - 375 - debug_sprintf_event(cf_diag_dbg, 6, 376 - "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" 377 - " need %zd rc %d\n", 378 - __func__, ctrset, ctrset_size, cpuhw->info.cfvn, 379 - cpuhw->info.csvn, need, rc); 380 - return need; 381 - } 382 - 383 - /* Read out all counter sets and save them in the provided data buffer. 384 - * The last 64 byte host an artificial trailer entry. 385 - */ 386 - static size_t cf_diag_getctr(void *data, size_t sz, unsigned long auth) 387 - { 388 - struct cf_trailer_entry *trailer; 389 - size_t offset = 0, done; 390 - int i; 391 - 392 - memset(data, 0, sz); 393 - sz -= sizeof(*trailer); /* Always room for trailer */ 394 - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 395 - struct cf_ctrset_entry *ctrdata = data + offset; 396 - 397 - if (!(auth & cpumf_ctr_ctl[i])) 398 - continue; /* Counter set not authorized */ 399 - 400 - done = cf_diag_getctrset(ctrdata, i, sz - offset); 401 - offset += done; 402 - debug_sprintf_event(cf_diag_dbg, 6, 403 - "%s ctrset %d offset %zu done %zu\n", 404 - __func__, i, offset, done); 405 - } 406 - trailer = data + offset; 407 - cf_diag_trailer(trailer); 408 - return offset + sizeof(*trailer); 409 - } 410 - 411 - /* Calculate the difference for each counter in a counter set. */ 412 - static void cf_diag_diffctrset(u64 *pstart, u64 *pstop, int counters) 413 - { 414 - for (; --counters >= 0; ++pstart, ++pstop) 415 - if (*pstop >= *pstart) 416 - *pstop -= *pstart; 417 - else 418 - *pstop = *pstart - *pstop; 419 - } 420 - 421 - /* Scan the counter sets and calculate the difference of each counter 422 - * in each set. The result is the increment of each counter during the 423 - * period the counter set has been activated. 424 - * 425 - * Return true on success. 426 - */ 427 - static int cf_diag_diffctr(struct cf_diag_csd *csd, unsigned long auth) 428 - { 429 - struct cf_trailer_entry *trailer_start, *trailer_stop; 430 - struct cf_ctrset_entry *ctrstart, *ctrstop; 431 - size_t offset = 0; 432 - 433 - auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 434 - do { 435 - ctrstart = (struct cf_ctrset_entry *)(csd->start + offset); 436 - ctrstop = (struct cf_ctrset_entry *)(csd->data + offset); 437 - 438 - if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 439 - pr_err("cpum_cf_diag counter set compare error " 440 - "in set %i\n", ctrstart->set); 441 - return 0; 442 - } 443 - auth &= ~cpumf_ctr_ctl[ctrstart->set]; 444 - if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 445 - cf_diag_diffctrset((u64 *)(ctrstart + 1), 446 - (u64 *)(ctrstop + 1), ctrstart->ctr); 447 - offset += ctrstart->ctr * sizeof(u64) + 448 - sizeof(*ctrstart); 449 - } 450 - debug_sprintf_event(cf_diag_dbg, 6, 451 - "%s set %d ctr %d offset %zu auth %lx\n", 452 - __func__, ctrstart->set, ctrstart->ctr, 453 - offset, auth); 454 - } while (ctrstart->def && auth); 455 - 456 - /* Save time_stamp from start of event in stop's trailer */ 457 - trailer_start = (struct cf_trailer_entry *)(csd->start + offset); 458 - trailer_stop = (struct cf_trailer_entry *)(csd->data + offset); 459 - trailer_stop->progusage[0] = trailer_start->timestamp; 460 - 461 - return 1; 462 - } 463 - 464 - /* Create perf event sample with the counter sets as raw data. The sample 465 - * is then pushed to the event subsystem and the function checks for 466 - * possible event overflows. If an event overflow occurs, the PMU is 467 - * stopped. 468 - * 469 - * Return non-zero if an event overflow occurred. 470 - */ 471 - static int cf_diag_push_sample(struct perf_event *event, 472 - struct cf_diag_csd *csd) 473 - { 474 - struct perf_sample_data data; 475 - struct perf_raw_record raw; 476 - struct pt_regs regs; 477 - int overflow; 478 - 479 - /* Setup perf sample */ 480 - perf_sample_data_init(&data, 0, event->hw.last_period); 481 - memset(&regs, 0, sizeof(regs)); 482 - memset(&raw, 0, sizeof(raw)); 483 - 484 - if (event->attr.sample_type & PERF_SAMPLE_CPU) 485 - data.cpu_entry.cpu = event->cpu; 486 - if (event->attr.sample_type & PERF_SAMPLE_RAW) { 487 - raw.frag.size = csd->used; 488 - raw.frag.data = csd->data; 489 - raw.size = csd->used; 490 - data.raw = &raw; 491 - } 492 - 493 - overflow = perf_event_overflow(event, &data, &regs); 494 - debug_sprintf_event(cf_diag_dbg, 6, 495 - "%s event %p cpu %d sample_type %#llx raw %d " 496 - "ov %d\n", __func__, event, event->cpu, 497 - event->attr.sample_type, raw.size, overflow); 498 - if (overflow) 499 - event->pmu->stop(event, 0); 500 - 501 - perf_event_update_userpage(event); 502 - return overflow; 503 - } 504 - 505 - static void cf_diag_start(struct perf_event *event, int flags) 506 - { 507 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 508 - struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); 509 - struct hw_perf_event *hwc = &event->hw; 510 - 511 - debug_sprintf_event(cf_diag_dbg, 5, 512 - "%s event %p cpu %d flags %#x hwc-state %#x\n", 513 - __func__, event, event->cpu, flags, hwc->state); 514 - if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 515 - return; 516 - 517 - /* (Re-)enable and activate all counter sets */ 518 - lcctl(0); /* Reset counter sets */ 519 - hwc->state = 0; 520 - ctr_set_multiple_enable(&cpuhw->state, hwc->config_base); 521 - lcctl(cpuhw->state); /* Enable counter sets */ 522 - csd->used = cf_diag_getctr(csd->start, sizeof(csd->start), 523 - event->hw.config_base); 524 - ctr_set_multiple_start(&cpuhw->state, hwc->config_base); 525 - /* Function cf_diag_enable() starts the counter sets. */ 526 - } 527 - 528 - static void cf_diag_stop(struct perf_event *event, int flags) 529 - { 530 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 531 - struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); 532 - struct hw_perf_event *hwc = &event->hw; 533 - 534 - debug_sprintf_event(cf_diag_dbg, 5, 535 - "%s event %p cpu %d flags %#x hwc-state %#x\n", 536 - __func__, event, event->cpu, flags, hwc->state); 537 - 538 - /* Deactivate all counter sets */ 539 - ctr_set_multiple_stop(&cpuhw->state, hwc->config_base); 540 - local64_inc(&event->count); 541 - csd->used = cf_diag_getctr(csd->data, sizeof(csd->data), 542 - event->hw.config_base); 543 - if (cf_diag_diffctr(csd, event->hw.config_base)) 544 - cf_diag_push_sample(event, csd); 545 - hwc->state |= PERF_HES_STOPPED; 546 - } 547 - 548 - static int cf_diag_add(struct perf_event *event, int flags) 549 - { 550 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 551 - int err = 0; 552 - 553 - debug_sprintf_event(cf_diag_dbg, 5, 554 - "%s event %p cpu %d flags %#x cpuhw %p\n", 555 - __func__, event, event->cpu, flags, cpuhw); 556 - 557 - if (cpuhw->flags & PMU_F_IN_USE) { 558 - err = -EAGAIN; 559 - goto out; 560 - } 561 - 562 - event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 563 - 564 - cpuhw->flags |= PMU_F_IN_USE; 565 - if (flags & PERF_EF_START) 566 - cf_diag_start(event, PERF_EF_RELOAD); 567 - out: 568 - debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err); 569 - return err; 570 - } 571 - 572 - static void cf_diag_del(struct perf_event *event, int flags) 573 - { 574 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 575 - 576 - debug_sprintf_event(cf_diag_dbg, 5, 577 - "%s event %p cpu %d flags %#x\n", 578 - __func__, event, event->cpu, flags); 579 - 580 - cf_diag_stop(event, PERF_EF_UPDATE); 581 - ctr_set_multiple_stop(&cpuhw->state, event->hw.config_base); 582 - ctr_set_multiple_disable(&cpuhw->state, event->hw.config_base); 583 - cpuhw->flags &= ~PMU_F_IN_USE; 584 - } 585 - 586 - /* Default counter set events and format attribute groups */ 587 - 588 - CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 589 - 590 - static struct attribute *cf_diag_events_attr[] = { 591 - CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 592 - NULL, 593 - }; 594 - 595 - PMU_FORMAT_ATTR(event, "config:0-63"); 596 - 597 - static struct attribute *cf_diag_format_attr[] = { 598 - &format_attr_event.attr, 599 - NULL, 600 - }; 601 - 602 - static struct attribute_group cf_diag_events_group = { 603 - .name = "events", 604 - .attrs = cf_diag_events_attr, 605 - }; 606 - static struct attribute_group cf_diag_format_group = { 607 - .name = "format", 608 - .attrs = cf_diag_format_attr, 609 - }; 610 - static const struct attribute_group *cf_diag_attr_groups[] = { 611 - &cf_diag_events_group, 612 - &cf_diag_format_group, 613 - NULL, 614 - }; 615 - 616 - /* Performance monitoring unit for s390x */ 617 - static struct pmu cf_diag = { 618 - .task_ctx_nr = perf_sw_context, 619 - .pmu_enable = cf_diag_enable, 620 - .pmu_disable = cf_diag_disable, 621 - .event_init = cf_diag_event_init, 622 - .add = cf_diag_add, 623 - .del = cf_diag_del, 624 - .start = cf_diag_start, 625 - .stop = cf_diag_stop, 626 - .read = cf_diag_read, 627 - 628 - .attr_groups = cf_diag_attr_groups 629 - }; 630 - 631 - /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 632 - static void cf_diag_get_cpu_speed(void) 633 - { 634 - if (cpum_sf_avail()) { /* Sampling facility first */ 635 - struct hws_qsi_info_block si; 636 - 637 - memset(&si, 0, sizeof(si)); 638 - if (!qsi(&si)) { 639 - cf_diag_cpu_speed = si.cpu_speed; 640 - return; 641 - } 642 - } 643 - 644 - if (test_facility(34)) { /* CPU speed extract static part */ 645 - unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 646 - 647 - if (mhz != -1UL) 648 - cf_diag_cpu_speed = mhz & 0xffffffff; 649 - } 650 - } 651 - 652 - /* Code to create device and file I/O operations */ 653 - static atomic_t ctrset_opencnt = ATOMIC_INIT(0); /* Excl. access */ 654 - 655 - static int cf_diag_open(struct inode *inode, struct file *file) 656 - { 657 - int err = 0; 658 - 659 - if (!capable(CAP_SYS_ADMIN)) 660 - return -EPERM; 661 - if (atomic_xchg(&ctrset_opencnt, 1)) 662 - return -EBUSY; 663 - 664 - /* Avoid concurrent access with perf_event_open() system call */ 665 - mutex_lock(&cf_diag_reserve_mutex); 666 - if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin()) 667 - err = -EBUSY; 668 - mutex_unlock(&cf_diag_reserve_mutex); 669 - if (err) { 670 - atomic_set(&ctrset_opencnt, 0); 671 - return err; 672 - } 673 - file->private_data = NULL; 674 - debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); 675 - /* nonseekable_open() never fails */ 676 - return nonseekable_open(inode, file); 677 - } 678 - 679 - /* Variables for ioctl() interface support */ 680 - static DEFINE_MUTEX(cf_diag_ctrset_mutex); 681 - static struct cf_diag_ctrset { 682 - unsigned long ctrset; /* Bit mask of counter set to read */ 683 - cpumask_t mask; /* CPU mask to read from */ 684 - } cf_diag_ctrset; 685 - 686 - static void cf_diag_ctrset_clear(void) 687 - { 688 - cpumask_clear(&cf_diag_ctrset.mask); 689 - cf_diag_ctrset.ctrset = 0; 690 - } 691 - 692 - static void cf_diag_release_cpu(void *p) 693 - { 694 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 695 - 696 - debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__, 697 - smp_processor_id()); 698 - lcctl(0); /* Reset counter sets */ 699 - cpuhw->state = 0; /* Save state in CPU hardware state */ 700 - } 701 - 702 - /* Release function is also called when application gets terminated without 703 - * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 704 - * Since only one application is allowed to open the device, simple stop all 705 - * CPU counter sets. 706 - */ 707 - static int cf_diag_release(struct inode *inode, struct file *file) 708 - { 709 - on_each_cpu(cf_diag_release_cpu, NULL, 1); 710 - cf_diag_ctrset_clear(); 711 - atomic_set(&ctrset_opencnt, 0); 712 - __kernel_cpumcf_end(); 713 - debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); 714 - return 0; 715 - } 716 - 717 - struct cf_diag_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 718 - unsigned int sets; /* Counter set bit mask */ 719 - atomic_t cpus_ack; /* # CPUs successfully executed func */ 720 - }; 721 - 722 - static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask) 723 - { 724 - struct s390_ctrset_read __user *ctrset_read; 725 - unsigned int cpu, cpus, rc; 726 - void __user *uptr; 727 - 728 - ctrset_read = (struct s390_ctrset_read __user *)arg; 729 - uptr = ctrset_read->data; 730 - for_each_cpu(cpu, mask) { 731 - struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu); 732 - struct s390_ctrset_cpudata __user *ctrset_cpudata; 733 - 734 - ctrset_cpudata = uptr; 735 - debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n", 736 - __func__, cpu, csd->used); 737 - rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 738 - rc |= put_user(csd->sets, &ctrset_cpudata->no_sets); 739 - rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used); 740 - if (rc) 741 - return -EFAULT; 742 - uptr += sizeof(struct s390_ctrset_cpudata) + csd->used; 743 - cond_resched(); 744 - } 745 - cpus = cpumask_weight(mask); 746 - if (put_user(cpus, &ctrset_read->no_cpus)) 747 - return -EFAULT; 748 - debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n", 749 - __func__, uptr - (void __user *)ctrset_read->data); 750 - return 0; 751 - } 752 - 753 - static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 754 - int ctrset_size, size_t room) 755 - { 756 - size_t need = 0; 757 - int rc = -1; 758 - 759 - need = sizeof(*p) + sizeof(u64) * ctrset_size; 760 - debug_sprintf_event(cf_diag_dbg, 5, 761 - "%s room %zd need %zd set %#x set_size %d\n", 762 - __func__, room, need, ctrset, ctrset_size); 763 - if (need <= room) { 764 - p->set = cpumf_ctr_ctl[ctrset]; 765 - p->no_cnts = ctrset_size; 766 - rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 767 - if (rc == 3) /* Nothing stored */ 768 - need = 0; 769 - } 770 - debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__, 771 - need, rc); 772 - return need; 773 - } 774 - 775 - /* Read all counter sets. Since the perf_event_open() system call with 776 - * event cpum_cf_diag/.../ is blocked when this interface is active, reuse 777 - * the perf_event_open() data buffer to store the counter sets. 778 - */ 779 - static void cf_diag_cpu_read(void *parm) 780 - { 781 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 782 - struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); 783 - struct cf_diag_call_on_cpu_parm *p = parm; 784 - int set, set_size; 785 - size_t space; 786 - 787 - debug_sprintf_event(cf_diag_dbg, 5, 788 - "%s new %#x flags %#x state %#llx\n", 789 - __func__, p->sets, cpuhw->flags, 790 - cpuhw->state); 791 - /* No data saved yet */ 792 - csd->used = 0; 793 - csd->sets = 0; 794 - memset(csd->data, 0, sizeof(csd->data)); 795 - 796 - /* Scan the counter sets */ 797 - for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 798 - struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used; 799 - 800 - if (!(p->sets & cpumf_ctr_ctl[set])) 801 - continue; /* Counter set not in list */ 802 - set_size = cpum_cf_ctrset_size(set, &cpuhw->info); 803 - space = sizeof(csd->data) - csd->used; 804 - space = cf_diag_cpuset_read(sp, set, set_size, space); 805 - if (space) { 806 - csd->used += space; 807 - csd->sets += 1; 808 - } 809 - debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n", 810 - __func__, sp, space); 811 - } 812 - debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__, 813 - csd->sets, csd->used); 814 - } 815 - 816 - static int cf_diag_all_read(unsigned long arg) 817 - { 818 - struct cf_diag_call_on_cpu_parm p; 819 - cpumask_var_t mask; 820 - int rc; 821 - 822 - debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); 823 - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 824 - return -ENOMEM; 825 - 826 - p.sets = cf_diag_ctrset.ctrset; 827 - cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); 828 - on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1); 829 - rc = cf_diag_all_copy(arg, mask); 830 - free_cpumask_var(mask); 831 - debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc); 832 - return rc; 833 - } 834 - 835 - /* Stop all counter sets via ioctl interface */ 836 - static void cf_diag_ioctl_off(void *parm) 837 - { 838 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 839 - struct cf_diag_call_on_cpu_parm *p = parm; 840 - int rc; 841 - 842 - debug_sprintf_event(cf_diag_dbg, 5, 843 - "%s new %#x flags %#x state %#llx\n", 844 - __func__, p->sets, cpuhw->flags, 845 - cpuhw->state); 846 - 847 - ctr_set_multiple_disable(&cpuhw->state, p->sets); 848 - ctr_set_multiple_stop(&cpuhw->state, p->sets); 849 - rc = lcctl(cpuhw->state); /* Stop counter sets */ 850 - if (!cpuhw->state) 851 - cpuhw->flags &= ~PMU_F_IN_USE; 852 - debug_sprintf_event(cf_diag_dbg, 5, 853 - "%s rc %d flags %#x state %#llx\n", __func__, 854 - rc, cpuhw->flags, cpuhw->state); 855 - } 856 - 857 - /* Start counter sets on particular CPU */ 858 - static void cf_diag_ioctl_on(void *parm) 859 - { 860 - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 861 - struct cf_diag_call_on_cpu_parm *p = parm; 862 - int rc; 863 - 864 - debug_sprintf_event(cf_diag_dbg, 5, 865 - "%s new %#x flags %#x state %#llx\n", 866 - __func__, p->sets, cpuhw->flags, 867 - cpuhw->state); 868 - 869 - if (!(cpuhw->flags & PMU_F_IN_USE)) 870 - cpuhw->state = 0; 871 - cpuhw->flags |= PMU_F_IN_USE; 872 - rc = lcctl(cpuhw->state); /* Reset unused counter sets */ 873 - ctr_set_multiple_enable(&cpuhw->state, p->sets); 874 - ctr_set_multiple_start(&cpuhw->state, p->sets); 875 - rc |= lcctl(cpuhw->state); /* Start counter sets */ 876 - if (!rc) 877 - atomic_inc(&p->cpus_ack); 878 - debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n", 879 - __func__, rc, cpuhw->state); 880 - } 881 - 882 - static int cf_diag_all_stop(void) 883 - { 884 - struct cf_diag_call_on_cpu_parm p = { 885 - .sets = cf_diag_ctrset.ctrset, 886 - }; 887 - cpumask_var_t mask; 888 - 889 - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 890 - return -ENOMEM; 891 - cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); 892 - on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); 893 - free_cpumask_var(mask); 894 - return 0; 895 - } 896 - 897 - static int cf_diag_all_start(void) 898 - { 899 - struct cf_diag_call_on_cpu_parm p = { 900 - .sets = cf_diag_ctrset.ctrset, 901 - .cpus_ack = ATOMIC_INIT(0), 902 - }; 903 - cpumask_var_t mask; 904 - int rc = 0; 905 - 906 - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 907 - return -ENOMEM; 908 - cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); 909 - on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1); 910 - if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 911 - on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); 912 - rc = -EIO; 913 - } 914 - free_cpumask_var(mask); 915 - return rc; 916 - } 917 - 918 - /* Return the maximum required space for all possible CPUs in case one 919 - * CPU will be onlined during the START, READ, STOP cycles. 920 - * To find out the size of the counter sets, any one CPU will do. They 921 - * all have the same counter sets. 922 - */ 923 - static size_t cf_diag_needspace(unsigned int sets) 924 - { 925 - struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); 926 - size_t bytes = 0; 927 - int i; 928 - 929 - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 930 - if (!(sets & cpumf_ctr_ctl[i])) 931 - continue; 932 - bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + 933 - sizeof(((struct s390_ctrset_setdata *)0)->set) + 934 - sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 935 - } 936 - bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 937 - (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 938 - sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 939 - debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, 940 - bytes); 941 - put_cpu_ptr(&cpu_cf_events); 942 - return bytes; 943 - } 944 - 945 - static long cf_diag_ioctl_read(unsigned long arg) 946 - { 947 - struct s390_ctrset_read read; 948 - int ret = 0; 949 - 950 - debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); 951 - if (copy_from_user(&read, (char __user *)arg, sizeof(read))) 952 - return -EFAULT; 953 - ret = cf_diag_all_read(arg); 954 - debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); 955 - return ret; 956 - } 957 - 958 - static long cf_diag_ioctl_stop(void) 959 - { 960 - int ret; 961 - 962 - debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); 963 - ret = cf_diag_all_stop(); 964 - cf_diag_ctrset_clear(); 965 - debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); 966 - return ret; 967 - } 968 - 969 - static long cf_diag_ioctl_start(unsigned long arg) 970 - { 971 - struct s390_ctrset_start __user *ustart; 972 - struct s390_ctrset_start start; 973 - void __user *umask; 974 - unsigned int len; 975 - int ret = 0; 976 - size_t need; 977 - 978 - if (cf_diag_ctrset.ctrset) 979 - return -EBUSY; 980 - ustart = (struct s390_ctrset_start __user *)arg; 981 - if (copy_from_user(&start, ustart, sizeof(start))) 982 - return -EFAULT; 983 - if (start.version != S390_HWCTR_START_VERSION) 984 - return -EINVAL; 985 - if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 986 - cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 987 - cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 988 - cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 989 - cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 990 - return -EINVAL; /* Invalid counter set */ 991 - if (!start.counter_sets) 992 - return -EINVAL; /* No counter set at all? */ 993 - cpumask_clear(&cf_diag_ctrset.mask); 994 - len = min_t(u64, start.cpumask_len, cpumask_size()); 995 - umask = (void __user *)start.cpumask; 996 - if (copy_from_user(&cf_diag_ctrset.mask, umask, len)) 997 - return -EFAULT; 998 - if (cpumask_empty(&cf_diag_ctrset.mask)) 999 - return -EINVAL; 1000 - need = cf_diag_needspace(start.counter_sets); 1001 - if (put_user(need, &ustart->data_bytes)) 1002 - ret = -EFAULT; 1003 - if (ret) 1004 - goto out; 1005 - cf_diag_ctrset.ctrset = start.counter_sets; 1006 - ret = cf_diag_all_start(); 1007 - out: 1008 - if (ret) 1009 - cf_diag_ctrset_clear(); 1010 - debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n", 1011 - __func__, cf_diag_ctrset.ctrset, need, ret); 1012 - return ret; 1013 - } 1014 - 1015 - static long cf_diag_ioctl(struct file *file, unsigned int cmd, 1016 - unsigned long arg) 1017 - { 1018 - int ret; 1019 - 1020 - debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__, 1021 - cmd, arg); 1022 - get_online_cpus(); 1023 - mutex_lock(&cf_diag_ctrset_mutex); 1024 - switch (cmd) { 1025 - case S390_HWCTR_START: 1026 - ret = cf_diag_ioctl_start(arg); 1027 - break; 1028 - case S390_HWCTR_STOP: 1029 - ret = cf_diag_ioctl_stop(); 1030 - break; 1031 - case S390_HWCTR_READ: 1032 - ret = cf_diag_ioctl_read(arg); 1033 - break; 1034 - default: 1035 - ret = -ENOTTY; 1036 - break; 1037 - } 1038 - mutex_unlock(&cf_diag_ctrset_mutex); 1039 - put_online_cpus(); 1040 - debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret); 1041 - return ret; 1042 - } 1043 - 1044 - static const struct file_operations cf_diag_fops = { 1045 - .owner = THIS_MODULE, 1046 - .open = cf_diag_open, 1047 - .release = cf_diag_release, 1048 - .unlocked_ioctl = cf_diag_ioctl, 1049 - .compat_ioctl = cf_diag_ioctl, 1050 - .llseek = no_llseek 1051 - }; 1052 - 1053 - static struct miscdevice cf_diag_dev = { 1054 - .name = S390_HWCTR_DEVICE, 1055 - .minor = MISC_DYNAMIC_MINOR, 1056 - .fops = &cf_diag_fops, 1057 - }; 1058 - 1059 - static int cf_diag_online_cpu(unsigned int cpu) 1060 - { 1061 - struct cf_diag_call_on_cpu_parm p; 1062 - 1063 - mutex_lock(&cf_diag_ctrset_mutex); 1064 - if (!cf_diag_ctrset.ctrset) 1065 - goto out; 1066 - p.sets = cf_diag_ctrset.ctrset; 1067 - cf_diag_ioctl_on(&p); 1068 - out: 1069 - mutex_unlock(&cf_diag_ctrset_mutex); 1070 - return 0; 1071 - } 1072 - 1073 - static int cf_diag_offline_cpu(unsigned int cpu) 1074 - { 1075 - struct cf_diag_call_on_cpu_parm p; 1076 - 1077 - mutex_lock(&cf_diag_ctrset_mutex); 1078 - if (!cf_diag_ctrset.ctrset) 1079 - goto out; 1080 - p.sets = cf_diag_ctrset.ctrset; 1081 - cf_diag_ioctl_off(&p); 1082 - out: 1083 - mutex_unlock(&cf_diag_ctrset_mutex); 1084 - return 0; 1085 - } 1086 - 1087 - /* Initialize the counter set PMU to generate complete counter set data as 1088 - * event raw data. This relies on the CPU Measurement Counter Facility device 1089 - * already being loaded and initialized. 1090 - */ 1091 - static int __init cf_diag_init(void) 1092 - { 1093 - struct cpumf_ctr_info info; 1094 - size_t need; 1095 - int rc; 1096 - 1097 - if (!kernel_cpumcf_avail() || !stccm_avail() || qctri(&info)) 1098 - return -ENODEV; 1099 - cf_diag_get_cpu_speed(); 1100 - 1101 - /* Make sure the counter set data fits into predefined buffer. */ 1102 - need = cf_diag_ctrset_maxsize(&info); 1103 - if (need > sizeof(((struct cf_diag_csd *)0)->start)) { 1104 - pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1105 - need); 1106 - return -ENOMEM; 1107 - } 1108 - 1109 - rc = misc_register(&cf_diag_dev); 1110 - if (rc) { 1111 - pr_err("Registration of /dev/" S390_HWCTR_DEVICE 1112 - "failed rc=%d\n", rc); 1113 - goto out; 1114 - } 1115 - 1116 - /* Setup s390dbf facility */ 1117 - cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 1118 - if (!cf_diag_dbg) { 1119 - pr_err("Registration of s390dbf(cpum_cf_diag) failed\n"); 1120 - rc = -ENOMEM; 1121 - goto out_dbf; 1122 - } 1123 - debug_register_view(cf_diag_dbg, &debug_sprintf_view); 1124 - 1125 - rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1126 - if (rc) { 1127 - pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1128 - rc); 1129 - goto out_perf; 1130 - } 1131 - rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE, 1132 - "perf/s390/cfd:online", 1133 - cf_diag_online_cpu, cf_diag_offline_cpu); 1134 - if (!rc) 1135 - goto out; 1136 - 1137 - pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n", 1138 - rc); 1139 - perf_pmu_unregister(&cf_diag); 1140 - out_perf: 1141 - debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); 1142 - debug_unregister(cf_diag_dbg); 1143 - out_dbf: 1144 - misc_deregister(&cf_diag_dev); 1145 - out: 1146 - return rc; 1147 - } 1148 - device_initcall(cf_diag_init);
+6
arch/s390/kernel/process.c
··· 166 166 p->thread.acrs[1] = (unsigned int)tls; 167 167 } 168 168 } 169 + /* 170 + * s390 stores the svc return address in arch_data when calling 171 + * sigreturn()/restart_syscall() via vdso. 1 means no valid address 172 + * stored. 173 + */ 174 + p->restart_block.arch_data = 1; 169 175 return 0; 170 176 } 171 177
+2 -1
arch/s390/kernel/setup.c
··· 354 354 set_task_stack_end_magic(current); 355 355 stack += STACK_INIT_OFFSET; 356 356 S390_lowcore.kernel_stack = stack; 357 - CALL_ON_STACK_NORETURN(rest_init, stack); 357 + call_on_stack_noreturn(rest_init, stack); 358 358 } 359 359 360 360 static void __init setup_lowcore_dat_off(void) ··· 442 442 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 443 443 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 444 444 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 445 + lc->preempt_count = PREEMPT_DISABLED; 445 446 446 447 set_prefix((u32)(unsigned long) lc); 447 448 lowcore_ptr[0] = lc;
+18 -21
arch/s390/kernel/signal.c
··· 32 32 #include <linux/uaccess.h> 33 33 #include <asm/lowcore.h> 34 34 #include <asm/switch_to.h> 35 + #include <asm/vdso.h> 35 36 #include "entry.h" 36 37 37 38 /* ··· 172 171 fpregs_load(&user_sregs.fpregs, &current->thread.fpu); 173 172 174 173 clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ 175 - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); 176 174 return 0; 177 175 } 178 176 ··· 334 334 335 335 /* Set up to return from userspace. If provided, use a stub 336 336 already in userspace. */ 337 - if (ka->sa.sa_flags & SA_RESTORER) { 337 + if (ka->sa.sa_flags & SA_RESTORER) 338 338 restorer = (unsigned long) ka->sa.sa_restorer; 339 - } else { 340 - /* Signal frame without vector registers are short ! */ 341 - __u16 __user *svc = (void __user *) frame + frame_size - 2; 342 - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) 343 - return -EFAULT; 344 - restorer = (unsigned long) svc; 345 - } 339 + else 340 + restorer = VDSO64_SYMBOL(current, sigreturn); 346 341 347 342 /* Set up registers for signal handler */ 348 343 regs->gprs[14] = restorer; ··· 392 397 393 398 /* Set up to return from userspace. If provided, use a stub 394 399 already in userspace. */ 395 - if (ksig->ka.sa.sa_flags & SA_RESTORER) { 400 + if (ksig->ka.sa.sa_flags & SA_RESTORER) 396 401 restorer = (unsigned long) ksig->ka.sa.sa_restorer; 397 - } else { 398 - __u16 __user *svc = &frame->svc_insn; 399 - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) 400 - return -EFAULT; 401 - restorer = (unsigned long) svc; 402 - } 402 + else 403 + restorer = VDSO64_SYMBOL(current, rt_sigreturn); 403 404 404 405 /* Create siginfo on the signal stack */ 405 406 if (copy_siginfo_to_user(&frame->info, &ksig->info)) ··· 492 501 } 493 502 /* No longer in a system call */ 494 503 clear_pt_regs_flag(regs, PIF_SYSCALL); 495 - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); 504 + 496 505 rseq_signal_deliver(&ksig, regs); 497 506 if (is_compat_task()) 498 507 handle_signal32(&ksig, oldset, regs); ··· 508 517 switch (regs->gprs[2]) { 509 518 case -ERESTART_RESTARTBLOCK: 510 519 /* Restart with sys_restart_syscall */ 511 - regs->int_code = __NR_restart_syscall; 512 - fallthrough; 520 + regs->gprs[2] = regs->orig_gpr2; 521 + current->restart_block.arch_data = regs->psw.addr; 522 + if (is_compat_task()) 523 + regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall); 524 + else 525 + regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall); 526 + if (test_thread_flag(TIF_SINGLE_STEP)) 527 + clear_thread_flag(TIF_PER_TRAP); 528 + break; 513 529 case -ERESTARTNOHAND: 514 530 case -ERESTARTSYS: 515 531 case -ERESTARTNOINTR: 516 - /* Restart system call with magic TIF bit. */ 517 532 regs->gprs[2] = regs->orig_gpr2; 518 - set_pt_regs_flag(regs, PIF_SYSCALL_RESTART); 533 + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); 519 534 if (test_thread_flag(TIF_SINGLE_STEP)) 520 535 clear_thread_flag(TIF_PER_TRAP); 521 536 break;
+10 -5
arch/s390/kernel/smp.c
··· 210 210 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 211 211 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 212 212 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 213 + lc->preempt_count = PREEMPT_DISABLED; 213 214 if (nmi_alloc_per_cpu(lc)) 214 215 goto out; 215 216 lowcore_ptr[cpu] = lc; ··· 301 300 pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); 302 301 } 303 302 303 + typedef void (pcpu_delegate_fn)(void *); 304 + 304 305 /* 305 306 * Call function via PSW restart on pcpu and stop the current cpu. 306 307 */ 307 - static void __pcpu_delegate(void (*func)(void*), void *data) 308 + static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) 308 309 { 309 310 func(data); /* should not return */ 310 311 } 311 312 312 313 static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu, 313 - void (*func)(void *), 314 + pcpu_delegate_fn *func, 314 315 void *data, unsigned long stack) 315 316 { 316 317 struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; 317 318 unsigned long source_cpu = stap(); 318 319 319 320 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); 320 - if (pcpu->address == source_cpu) 321 - CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data); 321 + if (pcpu->address == source_cpu) { 322 + call_on_stack(2, stack, void, __pcpu_delegate, 323 + pcpu_delegate_fn *, func, void *, data); 324 + } 322 325 /* Stop target cpu (if func returns this stops the current cpu). */ 323 326 pcpu_sigp_retry(pcpu, SIGP_STOP, 0); 324 327 /* Restart func on the target cpu and stop the current cpu. */ ··· 903 898 S390_lowcore.restart_source = -1UL; 904 899 __ctl_load(S390_lowcore.cregs_save_area, 0, 15); 905 900 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); 906 - CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack); 901 + call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack); 907 902 } 908 903 909 904 /* Upping and downing of CPUs */
+18 -16
arch/s390/kernel/syscall.c
··· 108 108 return -ENOSYS; 109 109 } 110 110 111 - void do_syscall(struct pt_regs *regs) 111 + static void do_syscall(struct pt_regs *regs) 112 112 { 113 113 unsigned long nr; 114 114 ··· 121 121 122 122 regs->gprs[2] = nr; 123 123 124 + if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { 125 + regs->psw.addr = current->restart_block.arch_data; 126 + current->restart_block.arch_data = 1; 127 + } 124 128 nr = syscall_enter_from_user_mode_work(regs, nr); 125 129 126 130 /* ··· 134 130 * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here 135 131 * and if set, the syscall will be skipped. 136 132 */ 137 - if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { 138 - regs->gprs[2] = -ENOSYS; 139 - if (likely(nr < NR_syscalls)) 140 - regs->gprs[2] = current->thread.sys_call_table[nr](regs); 141 - } else { 142 - clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); 143 - } 133 + 134 + if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) 135 + goto out; 136 + regs->gprs[2] = -ENOSYS; 137 + if (likely(nr >= NR_syscalls)) 138 + goto out; 139 + do { 140 + regs->gprs[2] = current->thread.sys_call_table[nr](regs); 141 + } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); 142 + out: 144 143 syscall_exit_to_user_mode_work(regs); 145 144 } 146 145 ··· 161 154 if (per_trap) 162 155 set_thread_flag(TIF_PER_TRAP); 163 156 164 - for (;;) { 165 - regs->flags = 0; 166 - set_pt_regs_flag(regs, PIF_SYSCALL); 167 - do_syscall(regs); 168 - if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART)) 169 - break; 170 - local_irq_enable(); 171 - } 157 + regs->flags = 0; 158 + set_pt_regs_flag(regs, PIF_SYSCALL); 159 + do_syscall(regs); 172 160 exit_to_user_mode(); 173 161 }
+3 -11
arch/s390/kernel/traps.c
··· 277 277 { 278 278 int val = 1; 279 279 280 + if (!IS_ENABLED(CONFIG_BUG)) 281 + return; 280 282 asm volatile( 281 283 " mc 0,0\n" 282 284 "0: xgr %0,%0\n" ··· 301 299 void noinstr __do_pgm_check(struct pt_regs *regs) 302 300 { 303 301 unsigned long last_break = S390_lowcore.breaking_event_addr; 304 - unsigned int trapnr, syscall_redirect = 0; 302 + unsigned int trapnr; 305 303 irqentry_state_t state; 306 304 307 - add_random_kstack_offset(); 308 305 regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; 309 306 regs->int_parm_long = S390_lowcore.trans_exc_code; 310 307 ··· 345 344 trapnr = regs->int_code & PGM_INT_CODE_MASK; 346 345 if (trapnr) 347 346 pgm_check_table[trapnr](regs); 348 - syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL); 349 347 out: 350 348 local_irq_disable(); 351 349 irqentry_exit(regs, state); 352 - 353 - if (syscall_redirect) { 354 - enter_from_user_mode(regs); 355 - local_irq_enable(); 356 - regs->orig_gpr2 = regs->gprs[2]; 357 - do_syscall(regs); 358 - exit_to_user_mode(); 359 - } 360 350 } 361 351 362 352 /*
+10
arch/s390/kernel/uv.c
··· 358 358 static struct kobj_attribute uv_query_facilities_attr = 359 359 __ATTR(facilities, 0444, uv_query_facilities, NULL); 360 360 361 + static ssize_t uv_query_feature_indications(struct kobject *kobj, 362 + struct kobj_attribute *attr, char *buf) 363 + { 364 + return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications); 365 + } 366 + 367 + static struct kobj_attribute uv_query_feature_indications_attr = 368 + __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL); 369 + 361 370 static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, 362 371 struct kobj_attribute *attr, char *page) 363 372 { ··· 399 390 400 391 static struct attribute *uv_query_attrs[] = { 401 392 &uv_query_facilities_attr.attr, 393 + &uv_query_feature_indications_attr.attr, 402 394 &uv_query_max_guest_cpus_attr.attr, 403 395 &uv_query_max_guest_vms_attr.attr, 404 396 &uv_query_max_guest_addr_attr.attr,
+33 -30
arch/s390/kernel/vdso.c
··· 20 20 #include <asm/vdso.h> 21 21 22 22 extern char vdso64_start[], vdso64_end[]; 23 - static unsigned int vdso_pages; 23 + extern char vdso32_start[], vdso32_end[]; 24 24 25 25 static struct vm_special_mapping vvar_mapping; 26 26 ··· 36 36 VVAR_TIMENS_PAGE_OFFSET, 37 37 VVAR_NR_PAGES, 38 38 }; 39 - 40 - unsigned int __read_mostly vdso_enabled = 1; 41 - 42 - static int __init vdso_setup(char *str) 43 - { 44 - bool enabled; 45 - 46 - if (!kstrtobool(str, &enabled)) 47 - vdso_enabled = enabled; 48 - return 1; 49 - } 50 - __setup("vdso=", vdso_setup); 51 39 52 40 #ifdef CONFIG_TIME_NS 53 41 struct vdso_data *arch_get_vdso_data(void *vvar_page) ··· 143 155 .fault = vvar_fault, 144 156 }; 145 157 146 - static struct vm_special_mapping vdso_mapping = { 158 + static struct vm_special_mapping vdso64_mapping = { 159 + .name = "[vdso]", 160 + .mremap = vdso_mremap, 161 + }; 162 + 163 + static struct vm_special_mapping vdso32_mapping = { 147 164 .name = "[vdso]", 148 165 .mremap = vdso_mremap, 149 166 }; ··· 164 171 { 165 172 unsigned long vdso_text_len, vdso_mapping_len; 166 173 unsigned long vvar_start, vdso_text_start; 174 + struct vm_special_mapping *vdso_mapping; 167 175 struct mm_struct *mm = current->mm; 168 176 struct vm_area_struct *vma; 169 177 int rc; 170 178 171 179 BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); 172 - if (!vdso_enabled || is_compat_task()) 173 - return 0; 174 180 if (mmap_write_lock_killable(mm)) 175 181 return -EINTR; 176 - vdso_text_len = vdso_pages << PAGE_SHIFT; 182 + 183 + if (is_compat_task()) { 184 + vdso_text_len = vdso32_end - vdso32_start; 185 + vdso_mapping = &vdso32_mapping; 186 + } else { 187 + vdso_text_len = vdso64_end - vdso64_start; 188 + vdso_mapping = &vdso64_mapping; 189 + } 177 190 vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; 178 191 vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); 179 192 rc = vvar_start; ··· 197 198 vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, 198 199 VM_READ|VM_EXEC| 199 200 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 200 - &vdso_mapping); 201 + vdso_mapping); 201 202 if (IS_ERR(vma)) { 202 203 do_munmap(mm, vvar_start, PAGE_SIZE, NULL); 203 204 rc = PTR_ERR(vma); ··· 210 211 return rc; 211 212 } 212 213 213 - static int __init vdso_init(void) 214 + static struct page ** __init vdso_setup_pages(void *start, void *end) 214 215 { 215 - struct page **pages; 216 + int pages = (end - start) >> PAGE_SHIFT; 217 + struct page **pagelist; 216 218 int i; 217 219 218 - vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT; 219 - pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); 220 - if (!pages) { 221 - vdso_enabled = 0; 222 - return -ENOMEM; 223 - } 224 - for (i = 0; i < vdso_pages; i++) 225 - pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE); 226 - pages[vdso_pages] = NULL; 227 - vdso_mapping.pages = pages; 220 + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); 221 + if (!pagelist) 222 + panic("%s: Cannot allocate page list for VDSO", __func__); 223 + for (i = 0; i < pages; i++) 224 + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); 225 + return pagelist; 226 + } 227 + 228 + static int __init vdso_init(void) 229 + { 230 + vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end); 231 + if (IS_ENABLED(CONFIG_COMPAT)) 232 + vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end); 228 233 return 0; 229 234 } 230 235 arch_initcall(vdso_init);
+2
arch/s390/kernel/vdso32/.gitignore
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + vdso32.lds
+75
arch/s390/kernel/vdso32/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # List of files in the vdso 3 + 4 + KCOV_INSTRUMENT := n 5 + ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE 6 + ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT 7 + 8 + include $(srctree)/lib/vdso/Makefile 9 + obj-vdso32 = vdso_user_wrapper-32.o note-32.o 10 + 11 + # Build rules 12 + 13 + targets := $(obj-vdso32) vdso32.so vdso32.so.dbg 14 + obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) 15 + 16 + KBUILD_AFLAGS += -DBUILD_VDSO 17 + KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING 18 + 19 + KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) 20 + KBUILD_AFLAGS_32 += -m31 -s 21 + 22 + KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) 23 + KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin 24 + 25 + LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \ 26 + --hash-style=both --build-id=sha1 -melf_s390 -T 27 + 28 + $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) 29 + $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) 30 + 31 + obj-y += vdso32_wrapper.o 32 + CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) 33 + 34 + # Disable gcov profiling, ubsan and kasan for VDSO code 35 + GCOV_PROFILE := n 36 + UBSAN_SANITIZE := n 37 + KASAN_SANITIZE := n 38 + 39 + # Force dependency (incbin is bad) 40 + $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so 41 + 42 + $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE 43 + $(call if_changed,ld) 44 + 45 + # strip rule for the .so file 46 + $(obj)/%.so: OBJCOPYFLAGS := -S 47 + $(obj)/%.so: $(obj)/%.so.dbg FORCE 48 + $(call if_changed,objcopy) 49 + 50 + $(obj-vdso32): %-32.o: %.S FORCE 51 + $(call if_changed_dep,vdso32as) 52 + 53 + # actual build commands 54 + quiet_cmd_vdso32as = VDSO32A $@ 55 + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< 56 + quiet_cmd_vdso32cc = VDSO32C $@ 57 + cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $< 58 + 59 + # install commands for the unstripped file 60 + quiet_cmd_vdso_install = INSTALL $@ 61 + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ 62 + 63 + vdso32.so: $(obj)/vdso32.so.dbg 64 + @mkdir -p $(MODLIB)/vdso 65 + $(call cmd,vdso_install) 66 + 67 + vdso_install: vdso32.so 68 + 69 + # Generate VDSO offsets using helper script 70 + gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh 71 + quiet_cmd_vdsosym = VDSOSYM $@ 72 + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ 73 + 74 + include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE 75 + $(call if_changed,vdsosym)
+15
arch/s390/kernel/vdso32/gen_vdso_offsets.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + # 5 + # Match symbols in the DSO that look like VDSO_*; produce a header file 6 + # of constant offsets into the shared object. 7 + # 8 + # Doing this inside the Makefile will break the $(filter-out) function, 9 + # causing Kbuild to rebuild the vdso-offsets header file every time. 10 + # 11 + # Inspired by arm64 version. 12 + # 13 + 14 + LC_ALL=C 15 + sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
+13
arch/s390/kernel/vdso32/note.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. 4 + * Here we can supply some information useful to userland. 5 + */ 6 + 7 + #include <linux/uts.h> 8 + #include <linux/version.h> 9 + #include <linux/elfnote.h> 10 + 11 + ELFNOTE_START(Linux, 0, "a") 12 + .long LINUX_VERSION_CODE 13 + ELFNOTE_END
+141
arch/s390/kernel/vdso32/vdso32.lds.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * This is the infamous ld script for the 64 bits vdso 4 + * library 5 + */ 6 + 7 + #include <asm/page.h> 8 + #include <asm/vdso.h> 9 + 10 + OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") 11 + OUTPUT_ARCH(s390:31-bit) 12 + ENTRY(_start) 13 + 14 + SECTIONS 15 + { 16 + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); 17 + #ifdef CONFIG_TIME_NS 18 + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); 19 + #endif 20 + . = VDSO_LBASE + SIZEOF_HEADERS; 21 + 22 + .hash : { *(.hash) } :text 23 + .gnu.hash : { *(.gnu.hash) } 24 + .dynsym : { *(.dynsym) } 25 + .dynstr : { *(.dynstr) } 26 + .gnu.version : { *(.gnu.version) } 27 + .gnu.version_d : { *(.gnu.version_d) } 28 + .gnu.version_r : { *(.gnu.version_r) } 29 + 30 + .note : { *(.note.*) } :text :note 31 + 32 + . = ALIGN(16); 33 + .text : { 34 + *(.text .stub .text.* .gnu.linkonce.t.*) 35 + } :text 36 + PROVIDE(__etext = .); 37 + PROVIDE(_etext = .); 38 + PROVIDE(etext = .); 39 + 40 + /* 41 + * Other stuff is appended to the text segment: 42 + */ 43 + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } 44 + .rodata1 : { *(.rodata1) } 45 + 46 + .dynamic : { *(.dynamic) } :text :dynamic 47 + 48 + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr 49 + .eh_frame : { KEEP (*(.eh_frame)) } :text 50 + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } 51 + 52 + .rela.dyn ALIGN(8) : { *(.rela.dyn) } 53 + .got ALIGN(8) : { *(.got .toc) } 54 + 55 + _end = .; 56 + PROVIDE(end = .); 57 + 58 + /* 59 + * Stabs debugging sections are here too. 60 + */ 61 + .stab 0 : { *(.stab) } 62 + .stabstr 0 : { *(.stabstr) } 63 + .stab.excl 0 : { *(.stab.excl) } 64 + .stab.exclstr 0 : { *(.stab.exclstr) } 65 + .stab.index 0 : { *(.stab.index) } 66 + .stab.indexstr 0 : { *(.stab.indexstr) } 67 + .comment 0 : { *(.comment) } 68 + 69 + /* 70 + * DWARF debug sections. 71 + * Symbols in the DWARF debugging sections are relative to the 72 + * beginning of the section so we begin them at 0. 73 + */ 74 + /* DWARF 1 */ 75 + .debug 0 : { *(.debug) } 76 + .line 0 : { *(.line) } 77 + /* GNU DWARF 1 extensions */ 78 + .debug_srcinfo 0 : { *(.debug_srcinfo) } 79 + .debug_sfnames 0 : { *(.debug_sfnames) } 80 + /* DWARF 1.1 and DWARF 2 */ 81 + .debug_aranges 0 : { *(.debug_aranges) } 82 + .debug_pubnames 0 : { *(.debug_pubnames) } 83 + /* DWARF 2 */ 84 + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } 85 + .debug_abbrev 0 : { *(.debug_abbrev) } 86 + .debug_line 0 : { *(.debug_line) } 87 + .debug_frame 0 : { *(.debug_frame) } 88 + .debug_str 0 : { *(.debug_str) } 89 + .debug_loc 0 : { *(.debug_loc) } 90 + .debug_macinfo 0 : { *(.debug_macinfo) } 91 + /* SGI/MIPS DWARF 2 extensions */ 92 + .debug_weaknames 0 : { *(.debug_weaknames) } 93 + .debug_funcnames 0 : { *(.debug_funcnames) } 94 + .debug_typenames 0 : { *(.debug_typenames) } 95 + .debug_varnames 0 : { *(.debug_varnames) } 96 + /* DWARF 3 */ 97 + .debug_pubtypes 0 : { *(.debug_pubtypes) } 98 + .debug_ranges 0 : { *(.debug_ranges) } 99 + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } 100 + 101 + /DISCARD/ : { 102 + *(.note.GNU-stack) 103 + *(.branch_lt) 104 + *(.data .data.* .gnu.linkonce.d.* .sdata*) 105 + *(.bss .sbss .dynbss .dynsbss) 106 + } 107 + } 108 + 109 + /* 110 + * Very old versions of ld do not recognize this name token; use the constant. 111 + */ 112 + #define PT_GNU_EH_FRAME 0x6474e550 113 + 114 + /* 115 + * We must supply the ELF program headers explicitly to get just one 116 + * PT_LOAD segment, and set the flags explicitly to make segments read-only. 117 + */ 118 + PHDRS 119 + { 120 + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ 121 + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ 122 + note PT_NOTE FLAGS(4); /* PF_R */ 123 + eh_frame_hdr PT_GNU_EH_FRAME; 124 + } 125 + 126 + /* 127 + * This controls what symbols we export from the DSO. 128 + */ 129 + VERSION 130 + { 131 + VDSO_VERSION_STRING { 132 + global: 133 + /* 134 + * Has to be there for the kernel to find 135 + */ 136 + __kernel_compat_restart_syscall; 137 + __kernel_compat_rt_sigreturn; 138 + __kernel_compat_sigreturn; 139 + local: *; 140 + }; 141 + }
+15
arch/s390/kernel/vdso32/vdso32_wrapper.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include <linux/init.h> 3 + #include <linux/linkage.h> 4 + #include <asm/page.h> 5 + 6 + __PAGE_ALIGNED_DATA 7 + 8 + .globl vdso32_start, vdso32_end 9 + .balign PAGE_SIZE 10 + vdso32_start: 11 + .incbin "arch/s390/kernel/vdso32/vdso32.so" 12 + .balign PAGE_SIZE 13 + vdso32_end: 14 + 15 + .previous
+21
arch/s390/kernel/vdso32/vdso_user_wrapper.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #include <asm/unistd.h> 4 + #include <asm/dwarf.h> 5 + 6 + .macro vdso_syscall func,syscall 7 + .globl __kernel_compat_\func 8 + .type __kernel_compat_\func,@function 9 + .align 8 10 + __kernel_compat_\func: 11 + CFI_STARTPROC 12 + svc \syscall 13 + /* Make sure we notice when a syscall returns, which shouldn't happen */ 14 + .word 0 15 + CFI_ENDPROC 16 + .size __kernel_compat_\func,.-__kernel_compat_\func 17 + .endm 18 + 19 + vdso_syscall restart_syscall,__NR_restart_syscall 20 + vdso_syscall sigreturn,__NR_sigreturn 21 + vdso_syscall rt_sigreturn,__NR_rt_sigreturn
+8
arch/s390/kernel/vdso64/Makefile
··· 74 74 $(call cmd,vdso_install) 75 75 76 76 vdso_install: vdso64.so 77 + 78 + # Generate VDSO offsets using helper script 79 + gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh 80 + quiet_cmd_vdsosym = VDSOSYM $@ 81 + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ 82 + 83 + include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE 84 + $(call if_changed,vdsosym)
+15
arch/s390/kernel/vdso64/gen_vdso_offsets.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + # 5 + # Match symbols in the DSO that look like VDSO_*; produce a header file 6 + # of constant offsets into the shared object. 7 + # 8 + # Doing this inside the Makefile will break the $(filter-out) function, 9 + # causing Kbuild to rebuild the vdso-offsets header file every time. 10 + # 11 + # Inspired by arm64 version. 12 + # 13 + 14 + LC_ALL=C 15 + sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
+4 -1
arch/s390/kernel/vdso64/vdso64.lds.S
··· 17 17 #ifdef CONFIG_TIME_NS 18 18 PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); 19 19 #endif 20 - . = VDSO64_LBASE + SIZEOF_HEADERS; 20 + . = VDSO_LBASE + SIZEOF_HEADERS; 21 21 22 22 .hash : { *(.hash) } :text 23 23 .gnu.hash : { *(.gnu.hash) } ··· 137 137 __kernel_clock_gettime; 138 138 __kernel_clock_getres; 139 139 __kernel_getcpu; 140 + __kernel_restart_syscall; 141 + __kernel_rt_sigreturn; 142 + __kernel_sigreturn; 140 143 local: *; 141 144 }; 142 145 }
+17
arch/s390/kernel/vdso64/vdso_user_wrapper.S
··· 37 37 vdso_func clock_getres 38 38 vdso_func clock_gettime 39 39 vdso_func getcpu 40 + 41 + .macro vdso_syscall func,syscall 42 + .globl __kernel_\func 43 + .type __kernel_\func,@function 44 + .align 8 45 + __kernel_\func: 46 + CFI_STARTPROC 47 + svc \syscall 48 + /* Make sure we notice when a syscall returns, which shouldn't happen */ 49 + .word 0 50 + CFI_ENDPROC 51 + .size __kernel_\func,.-__kernel_\func 52 + .endm 53 + 54 + vdso_syscall restart_syscall,__NR_restart_syscall 55 + vdso_syscall sigreturn,__NR_sigreturn 56 + vdso_syscall rt_sigreturn,__NR_rt_sigreturn
+1 -1
arch/s390/lib/string.c
··· 162 162 " jo 0b\n" 163 163 "1: mvst %[dummy],%[src]\n" 164 164 " jo 1b\n" 165 - : [dummy] "=&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src) 165 + : [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src) 166 166 : 167 167 : "cc", "memory", "0"); 168 168 return ret;
+3 -2
arch/s390/lib/test_unwind.c
··· 120 120 #define UWM_REGS 0x2 /* Pass regs to test_unwind(). */ 121 121 #define UWM_SP 0x4 /* Pass sp to test_unwind(). */ 122 122 #define UWM_CALLER 0x8 /* Unwind starting from caller. */ 123 - #define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */ 123 + #define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ 124 124 #define UWM_IRQ 0x20 /* Unwind from irq context. */ 125 125 #define UWM_PGM 0x40 /* Unwind from program check handler. */ 126 126 ··· 211 211 if (u->flags & UWM_SWITCH_STACK) { 212 212 local_irq_save(flags); 213 213 local_mcck_disable(); 214 - rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u); 214 + rc = call_on_stack(1, S390_lowcore.nodat_stack, 215 + int, unwindme_func3, struct unwindme *, u); 215 216 local_mcck_enable(); 216 217 local_irq_restore(flags); 217 218 return rc;
+1 -1
arch/s390/lib/uaccess.c
··· 224 224 EX_TABLE(0b,3b) 225 225 : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) 226 226 : [spec] "d" (0x810081UL) 227 - : "cc", "memory"); 227 + : "cc", "memory", "0"); 228 228 return size; 229 229 } 230 230
+26 -23
arch/s390/mm/fault.c
··· 285 285 (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); 286 286 } 287 287 288 - static noinline int signal_return(struct pt_regs *regs) 289 - { 290 - u16 instruction; 291 - int rc; 292 - 293 - rc = __get_user(instruction, (u16 __user *) regs->psw.addr); 294 - if (rc) 295 - return rc; 296 - if (instruction == 0x0a77) { 297 - set_pt_regs_flag(regs, PIF_SYSCALL); 298 - regs->int_code = 0x00040077; 299 - return 0; 300 - } else if (instruction == 0x0aad) { 301 - set_pt_regs_flag(regs, PIF_SYSCALL); 302 - regs->int_code = 0x000400ad; 303 - return 0; 304 - } 305 - return -EACCES; 306 - } 307 - 308 288 static noinline void do_fault_error(struct pt_regs *regs, int access, 309 289 vm_fault_t fault) 310 290 { ··· 292 312 293 313 switch (fault) { 294 314 case VM_FAULT_BADACCESS: 295 - if (access == VM_EXEC && signal_return(regs) == 0) 296 - break; 297 - fallthrough; 298 315 case VM_FAULT_BADMAP: 299 316 /* Bad memory access. Check if it is kernel or user space. */ 300 317 if (user_mode(regs)) { ··· 768 791 struct mm_struct *mm; 769 792 struct page *page; 770 793 int rc; 794 + 795 + /* 796 + * bit 61 tells us if the address is valid, if it's not we 797 + * have a major problem and should stop the kernel or send a 798 + * SIGSEGV to the process. Unfortunately bit 61 is not 799 + * reliable without the misc UV feature so we need to check 800 + * for that as well. 801 + */ 802 + if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) && 803 + !test_bit_inv(61, &regs->int_parm_long)) { 804 + /* 805 + * When this happens, userspace did something that it 806 + * was not supposed to do, e.g. branching into secure 807 + * memory. Trigger a segmentation fault. 808 + */ 809 + if (user_mode(regs)) { 810 + send_sig(SIGSEGV, current, 0); 811 + return; 812 + } 813 + 814 + /* 815 + * The kernel should never run into this case and we 816 + * have no way out of this situation. 817 + */ 818 + panic("Unexpected PGM 0x3d with TEID bit 61=0"); 819 + } 771 820 772 821 switch (get_fault_type(regs)) { 773 822 case USER_FAULT:
+9 -4
arch/s390/mm/maccess.c
··· 125 125 */ 126 126 int memcpy_real(void *dest, void *src, size_t count) 127 127 { 128 + unsigned long _dest = (unsigned long)dest; 129 + unsigned long _src = (unsigned long)src; 130 + unsigned long _count = (unsigned long)count; 128 131 int rc; 129 132 130 133 if (S390_lowcore.nodat_stack != 0) { 131 134 preempt_disable(); 132 - rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3, 133 - dest, src, count); 135 + rc = call_on_stack(3, S390_lowcore.nodat_stack, 136 + unsigned long, _memcpy_real, 137 + unsigned long, _dest, 138 + unsigned long, _src, 139 + unsigned long, _count); 134 140 preempt_enable(); 135 141 return rc; 136 142 } ··· 145 139 * not set up yet. Just call _memcpy_real on the early boot 146 140 * stack 147 141 */ 148 - return _memcpy_real((unsigned long) dest,(unsigned long) src, 149 - (unsigned long) count); 142 + return _memcpy_real(_dest, _src, _count); 150 143 } 151 144 152 145 /*
+37 -13
drivers/s390/crypto/ap_bus.c
··· 61 61 module_param_named(aqmask, aqm_str, charp, 0440); 62 62 MODULE_PARM_DESC(aqmask, "AP bus domain mask."); 63 63 64 + atomic_t ap_max_msg_size = ATOMIC_INIT(AP_DEFAULT_MAX_MSG_SIZE); 65 + EXPORT_SYMBOL(ap_max_msg_size); 66 + 64 67 static struct device *ap_root_device; 65 68 66 69 /* Hashtable of all queue devices on the AP bus */ ··· 319 316 * Returns true if TAPQ succeeded and the info is filled or 320 317 * false otherwise. 321 318 */ 322 - static bool ap_queue_info(ap_qid_t qid, int *q_type, 323 - unsigned int *q_fac, int *q_depth, bool *q_decfg) 319 + static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, 320 + int *q_depth, int *q_ml, bool *q_decfg) 324 321 { 325 322 struct ap_queue_status status; 326 - unsigned long info = 0; 323 + union { 324 + unsigned long value; 325 + struct { 326 + unsigned int fac : 32; /* facility bits */ 327 + unsigned int at : 8; /* ap type */ 328 + unsigned int _res1 : 8; 329 + unsigned int _res2 : 4; 330 + unsigned int ml : 4; /* apxl ml */ 331 + unsigned int _res3 : 4; 332 + unsigned int qd : 4; /* queue depth */ 333 + } tapq_gr2; 334 + } tapq_info; 335 + 336 + tapq_info.value = 0; 327 337 328 338 /* make sure we don't run into a specifiation exception */ 329 339 if (AP_QID_CARD(qid) > ap_max_adapter_id || ··· 344 328 return false; 345 329 346 330 /* call TAPQ on this APQN */ 347 - status = ap_test_queue(qid, ap_apft_available(), &info); 331 + status = ap_test_queue(qid, ap_apft_available(), &tapq_info.value); 348 332 switch (status.response_code) { 349 333 case AP_RESPONSE_NORMAL: 350 334 case AP_RESPONSE_RESET_IN_PROGRESS: ··· 356 340 * info should be filled. All bits 0 is not possible as 357 341 * there is at least one of the mode bits set. 358 342 */ 359 - if (WARN_ON_ONCE(!info)) 343 + if (WARN_ON_ONCE(!tapq_info.value)) 360 344 return false; 361 - *q_type = (int)((info >> 24) & 0xff); 362 - *q_fac = (unsigned int)(info >> 32); 363 - *q_depth = (int)(info & 0xff); 345 + *q_type = tapq_info.tapq_gr2.at; 346 + *q_fac = tapq_info.tapq_gr2.fac; 347 + *q_depth = tapq_info.tapq_gr2.qd; 348 + *q_ml = tapq_info.tapq_gr2.ml; 364 349 *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; 365 350 switch (*q_type) { 366 351 /* For CEX2 and CEX3 the available functions ··· 1533 1516 unsigned int func; 1534 1517 struct device *dev; 1535 1518 struct ap_queue *aq; 1536 - int rc, dom, depth, type; 1519 + int rc, dom, depth, type, ml; 1537 1520 1538 1521 /* 1539 1522 * Go through the configuration for the domains and compare them ··· 1557 1540 continue; 1558 1541 } 1559 1542 /* domain is valid, get info from this APQN */ 1560 - if (!ap_queue_info(qid, &type, &func, &depth, &decfg)) { 1543 + if (!ap_queue_info(qid, &type, &func, &depth, &ml, &decfg)) { 1561 1544 if (aq) { 1562 1545 AP_DBF_INFO( 1563 1546 "%s(%d,%d) ap_queue_info() not successful, rm queue device\n", ··· 1656 1639 unsigned int func; 1657 1640 struct device *dev; 1658 1641 struct ap_card *ac; 1659 - int rc, dom, depth, type, comp_type; 1642 + int rc, dom, depth, type, comp_type, ml; 1660 1643 1661 1644 /* Is there currently a card device for this adapter ? */ 1662 1645 dev = bus_find_device(&ap_bus_type, NULL, ··· 1685 1668 for (dom = 0; dom <= ap_max_domain_id; dom++) 1686 1669 if (ap_test_config_usage_domain(dom)) { 1687 1670 qid = AP_MKQID(ap, dom); 1688 - if (ap_queue_info(qid, &type, &func, &depth, &decfg)) 1671 + if (ap_queue_info(qid, &type, &func, 1672 + &depth, &ml, &decfg)) 1689 1673 break; 1690 1674 } 1691 1675 if (dom > ap_max_domain_id) { ··· 1755 1737 __func__, ap, type); 1756 1738 return; 1757 1739 } 1758 - ac = ap_card_create(ap, depth, type, comp_type, func); 1740 + ac = ap_card_create(ap, depth, type, comp_type, func, ml); 1759 1741 if (!ac) { 1760 1742 AP_DBF_WARN("%s(%d) ap_card_create() failed\n", 1761 1743 __func__, ap); ··· 1766 1748 dev->bus = &ap_bus_type; 1767 1749 dev->parent = ap_root_device; 1768 1750 dev_set_name(dev, "card%02x", ap); 1751 + /* maybe enlarge ap_max_msg_size to support this card */ 1752 + if (ac->maxmsgsize > atomic_read(&ap_max_msg_size)) { 1753 + atomic_set(&ap_max_msg_size, ac->maxmsgsize); 1754 + AP_DBF_INFO("%s(%d) ap_max_msg_size update to %d byte\n", 1755 + __func__, ap, atomic_read(&ap_max_msg_size)); 1756 + } 1769 1757 /* Register the new card device with AP bus */ 1770 1758 rc = device_register(dev); 1771 1759 if (rc) {
+8 -3
drivers/s390/crypto/ap_bus.h
··· 25 25 #define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */ 26 26 #define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ 27 27 #define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ 28 + #define AP_DEFAULT_MAX_MSG_SIZE (12 * 1024) 29 + #define AP_TAPQ_ML_FIELD_CHUNK_SIZE (4096) 28 30 29 31 extern int ap_domain_index; 32 + extern atomic_t ap_max_msg_size; 30 33 31 34 extern DECLARE_HASHTABLE(ap_queues, 8); 32 35 extern spinlock_t ap_queues_lock; ··· 170 167 unsigned int functions; /* AP device function bitfield. */ 171 168 int queue_depth; /* AP queue depth.*/ 172 169 int id; /* AP card number. */ 170 + unsigned int maxmsgsize; /* AP msg limit for this card */ 173 171 bool config; /* configured state */ 174 172 atomic64_t total_request_count; /* # requests ever for this AP device.*/ 175 173 }; ··· 232 228 struct list_head list; /* Request queueing. */ 233 229 unsigned long long psmid; /* Message id. */ 234 230 void *msg; /* Pointer to message buffer. */ 235 - unsigned int len; /* Message length. */ 231 + unsigned int len; /* actual msg len in msg buffer */ 232 + unsigned int bufsize; /* allocated msg buffer size */ 236 233 u16 flags; /* Flags, see AP_MSG_FLAG_xxx */ 237 234 struct ap_fi fi; /* Failure Injection cmd */ 238 235 int rc; /* Return code for this message */ ··· 295 290 void ap_queue_remove(struct ap_queue *aq); 296 291 void ap_queue_init_state(struct ap_queue *aq); 297 292 298 - struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type, 299 - int comp_device_type, unsigned int functions); 293 + struct ap_card *ap_card_create(int id, int queue_depth, int raw_type, 294 + int comp_type, unsigned int functions, int ml); 300 295 301 296 struct ap_perms { 302 297 unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)];
+15 -1
drivers/s390/crypto/ap_card.c
··· 174 174 175 175 static DEVICE_ATTR_RW(config); 176 176 177 + static ssize_t max_msg_size_show(struct device *dev, 178 + struct device_attribute *attr, char *buf) 179 + { 180 + struct ap_card *ac = to_ap_card(dev); 181 + 182 + return scnprintf(buf, PAGE_SIZE, "%u\n", ac->maxmsgsize); 183 + } 184 + 185 + static DEVICE_ATTR_RO(max_msg_size); 186 + 177 187 static struct attribute *ap_card_dev_attrs[] = { 178 188 &dev_attr_hwtype.attr, 179 189 &dev_attr_raw_hwtype.attr, ··· 194 184 &dev_attr_pendingq_count.attr, 195 185 &dev_attr_modalias.attr, 196 186 &dev_attr_config.attr, 187 + &dev_attr_max_msg_size.attr, 197 188 NULL 198 189 }; 199 190 ··· 220 209 } 221 210 222 211 struct ap_card *ap_card_create(int id, int queue_depth, int raw_type, 223 - int comp_type, unsigned int functions) 212 + int comp_type, unsigned int functions, int ml) 224 213 { 225 214 struct ap_card *ac; 226 215 ··· 234 223 ac->queue_depth = queue_depth; 235 224 ac->functions = functions; 236 225 ac->id = id; 226 + ac->maxmsgsize = ml > 0 ? 227 + ml * AP_TAPQ_ML_FIELD_CHUNK_SIZE : AP_DEFAULT_MAX_MSG_SIZE; 228 + 237 229 return ac; 238 230 }
+24 -4
drivers/s390/crypto/ap_queue.c
··· 101 101 102 102 if (msg == NULL) 103 103 return -EINVAL; 104 - status = ap_dqap(qid, psmid, msg, length); 104 + status = ap_dqap(qid, psmid, msg, length, NULL, NULL); 105 105 switch (status.response_code) { 106 106 case AP_RESPONSE_NORMAL: 107 107 return 0; ··· 136 136 struct ap_queue_status status; 137 137 struct ap_message *ap_msg; 138 138 bool found = false; 139 + size_t reslen; 140 + unsigned long resgr0 = 0; 141 + int parts = 0; 139 142 140 - status = ap_dqap(aq->qid, &aq->reply->psmid, 141 - aq->reply->msg, aq->reply->len); 143 + /* 144 + * DQAP loop until response code and resgr0 indicate that 145 + * the msg is totally received. As we use the very same buffer 146 + * the msg is overwritten with each invocation. That's intended 147 + * and the receiver of the msg is informed with a msg rc code 148 + * of EMSGSIZE in such a case. 149 + */ 150 + do { 151 + status = ap_dqap(aq->qid, &aq->reply->psmid, 152 + aq->reply->msg, aq->reply->bufsize, 153 + &reslen, &resgr0); 154 + parts++; 155 + } while (status.response_code == 0xFF && resgr0 != 0); 156 + 142 157 switch (status.response_code) { 143 158 case AP_RESPONSE_NORMAL: 144 159 aq->queue_count = max_t(int, 0, aq->queue_count - 1); ··· 165 150 continue; 166 151 list_del_init(&ap_msg->list); 167 152 aq->pendingq_count--; 168 - ap_msg->receive(aq, ap_msg, aq->reply); 153 + if (parts > 1) { 154 + ap_msg->rc = -EMSGSIZE; 155 + ap_msg->receive(aq, ap_msg, NULL); 156 + } else { 157 + ap_msg->receive(aq, ap_msg, aq->reply); 158 + } 169 159 found = true; 170 160 break; 171 161 }
+6
drivers/s390/crypto/zcrypt_api.c
··· 900 900 if (xcRB->user_defined != AUTOSELECT && 901 901 xcRB->user_defined != zc->card->id) 902 902 continue; 903 + /* check if request size exceeds card max msg size */ 904 + if (ap_msg.len > zc->card->maxmsgsize) 905 + continue; 903 906 /* check if device node has admission for this card */ 904 907 if (!zcrypt_check_card(perms, zc->card->id)) 905 908 continue; ··· 1070 1067 /* Check for user selected EP11 card */ 1071 1068 if (targets && 1072 1069 !is_desired_ep11_card(zc->card->id, target_num, targets)) 1070 + continue; 1071 + /* check if request size exceeds card max msg size */ 1072 + if (ap_msg.len > zc->card->maxmsgsize) 1073 1073 continue; 1074 1074 /* check if device node has admission for this card */ 1075 1075 if (!zcrypt_check_card(perms, zc->card->id))
+3 -6
drivers/s390/crypto/zcrypt_cex4.c
··· 28 28 #define CEX4C_MIN_MOD_SIZE 16 /* 256 bits */ 29 29 #define CEX4C_MAX_MOD_SIZE 512 /* 4096 bits */ 30 30 31 - #define CEX4A_MAX_MESSAGE_SIZE MSGTYPE50_CRB3_MAX_MSG_SIZE 32 - #define CEX4C_MAX_MESSAGE_SIZE MSGTYPE06_MAX_MSG_SIZE 33 - 34 31 /* Waiting time for requests to be processed. 35 32 * Currently there are some types of request which are not deterministic. 36 33 * But the maximum time limit managed by the stomper code is set to 60sec. ··· 602 605 int rc; 603 606 604 607 if (ap_test_bit(&aq->card->functions, AP_FUNC_ACCEL)) { 605 - zq = zcrypt_queue_alloc(CEX4A_MAX_MESSAGE_SIZE); 608 + zq = zcrypt_queue_alloc(aq->card->maxmsgsize); 606 609 if (!zq) 607 610 return -ENOMEM; 608 611 zq->ops = zcrypt_msgtype(MSGTYPE50_NAME, 609 612 MSGTYPE50_VARIANT_DEFAULT); 610 613 } else if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) { 611 - zq = zcrypt_queue_alloc(CEX4C_MAX_MESSAGE_SIZE); 614 + zq = zcrypt_queue_alloc(aq->card->maxmsgsize); 612 615 if (!zq) 613 616 return -ENOMEM; 614 617 zq->ops = zcrypt_msgtype(MSGTYPE06_NAME, 615 618 MSGTYPE06_VARIANT_DEFAULT); 616 619 } else if (ap_test_bit(&aq->card->functions, AP_FUNC_EP11)) { 617 - zq = zcrypt_queue_alloc(CEX4C_MAX_MESSAGE_SIZE); 620 + zq = zcrypt_queue_alloc(aq->card->maxmsgsize); 618 621 if (!zq) 619 622 return -ENOMEM; 620 623 zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
+13 -13
drivers/s390/crypto/zcrypt_msgtype50.c
··· 442 442 goto out; /* ap_msg->rc indicates the error */ 443 443 t80h = reply->msg; 444 444 if (t80h->type == TYPE80_RSP_CODE) { 445 - if (aq->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) 446 - len = min_t(int, CEX2A_MAX_RESPONSE_SIZE, t80h->len); 447 - else 448 - len = min_t(int, CEX3A_MAX_RESPONSE_SIZE, t80h->len); 449 - memcpy(msg->msg, reply->msg, len); 445 + len = t80h->len; 446 + if (len > reply->bufsize || len > msg->bufsize) { 447 + msg->rc = -EMSGSIZE; 448 + } else { 449 + memcpy(msg->msg, reply->msg, len); 450 + msg->len = len; 451 + } 450 452 } else 451 453 memcpy(msg->msg, reply->msg, sizeof(error_reply)); 452 454 out: ··· 471 469 struct completion work; 472 470 int rc; 473 471 474 - if (zq->zcard->user_space_type == ZCRYPT_CEX2A) 475 - ap_msg->msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); 476 - else 477 - ap_msg->msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); 472 + ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ? 473 + MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE; 474 + ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); 478 475 if (!ap_msg->msg) 479 476 return -ENOMEM; 480 477 ap_msg->receive = zcrypt_cex2a_receive; ··· 516 515 struct completion work; 517 516 int rc; 518 517 519 - if (zq->zcard->user_space_type == ZCRYPT_CEX2A) 520 - ap_msg->msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); 521 - else 522 - ap_msg->msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); 518 + ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ? 519 + MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE; 520 + ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); 523 521 if (!ap_msg->msg) 524 522 return -ENOMEM; 525 523 ap_msg->receive = zcrypt_cex2a_receive;
+30 -24
drivers/s390/crypto/zcrypt_msgtype6.c
··· 403 403 } __packed * msg = ap_msg->msg; 404 404 405 405 int rcblen = CEIL4(xcRB->request_control_blk_length); 406 - int replylen, req_sumlen, resp_sumlen; 406 + int req_sumlen, resp_sumlen; 407 407 char *req_data = ap_msg->msg + sizeof(struct type6_hdr) + rcblen; 408 408 char *function_code; 409 409 ··· 415 415 ap_msg->len = sizeof(struct type6_hdr) + 416 416 CEIL4(xcRB->request_control_blk_length) + 417 417 xcRB->request_data_length; 418 - if (ap_msg->len > MSGTYPE06_MAX_MSG_SIZE) 418 + if (ap_msg->len > ap_msg->bufsize) 419 419 return -EINVAL; 420 420 421 421 /* ··· 434 434 if (CEIL4(xcRB->reply_control_blk_length) < 435 435 xcRB->reply_control_blk_length) 436 436 return -EINVAL; /* overflow after alignment*/ 437 - 438 - replylen = sizeof(struct type86_fmt2_msg) + 439 - CEIL4(xcRB->reply_control_blk_length) + 440 - xcRB->reply_data_length; 441 - if (replylen > MSGTYPE06_MAX_MSG_SIZE) 442 - return -EINVAL; 443 437 444 438 /* 445 439 * Overflow check ··· 524 530 return -EINVAL; /* overflow after alignment*/ 525 531 526 532 /* length checks */ 527 - ap_msg->len = sizeof(struct type6_hdr) + xcRB->req_len; 528 - if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE - 529 - (sizeof(struct type6_hdr))) 533 + ap_msg->len = sizeof(struct type6_hdr) + CEIL4(xcRB->req_len); 534 + if (ap_msg->len > ap_msg->bufsize) 530 535 return -EINVAL; 531 536 532 537 if (CEIL4(xcRB->resp_len) < xcRB->resp_len) 533 538 return -EINVAL; /* overflow after alignment*/ 534 - 535 - if (CEIL4(xcRB->resp_len) > MSGTYPE06_MAX_MSG_SIZE - 536 - (sizeof(struct type86_fmt2_msg))) 537 - return -EINVAL; 538 539 539 540 /* prepare type6 header */ 540 541 msg->hdr = static_type6_ep11_hdr; ··· 941 952 switch (resp_type->type) { 942 953 case CEXXC_RESPONSE_TYPE_ICA: 943 954 len = sizeof(struct type86x_reply) + t86r->length - 2; 944 - len = min_t(int, CEXXC_MAX_ICA_RESPONSE_SIZE, len); 945 - memcpy(msg->msg, reply->msg, len); 955 + if (len > reply->bufsize || len > msg->bufsize) { 956 + msg->rc = -EMSGSIZE; 957 + } else { 958 + memcpy(msg->msg, reply->msg, len); 959 + msg->len = len; 960 + } 946 961 break; 947 962 case CEXXC_RESPONSE_TYPE_XCRB: 948 963 len = t86r->fmt2.offset2 + t86r->fmt2.count2; 949 - len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len); 950 - memcpy(msg->msg, reply->msg, len); 964 + if (len > reply->bufsize || len > msg->bufsize) { 965 + msg->rc = -EMSGSIZE; 966 + } else { 967 + memcpy(msg->msg, reply->msg, len); 968 + msg->len = len; 969 + } 951 970 break; 952 971 default: 953 972 memcpy(msg->msg, &error_reply, sizeof(error_reply)); ··· 996 999 switch (resp_type->type) { 997 1000 case CEXXC_RESPONSE_TYPE_EP11: 998 1001 len = t86r->fmt2.offset1 + t86r->fmt2.count1; 999 - len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len); 1000 - memcpy(msg->msg, reply->msg, len); 1002 + if (len > reply->bufsize || len > msg->bufsize) { 1003 + msg->rc = -EMSGSIZE; 1004 + } else { 1005 + memcpy(msg->msg, reply->msg, len); 1006 + msg->len = len; 1007 + } 1001 1008 break; 1002 1009 default: 1003 1010 memcpy(msg->msg, &error_reply, sizeof(error_reply)); ··· 1034 1033 ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL); 1035 1034 if (!ap_msg->msg) 1036 1035 return -ENOMEM; 1036 + ap_msg->bufsize = PAGE_SIZE; 1037 1037 ap_msg->receive = zcrypt_msgtype6_receive; 1038 1038 ap_msg->psmid = (((unsigned long long) current->pid) << 32) + 1039 1039 atomic_inc_return(&zcrypt_step); ··· 1082 1080 ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL); 1083 1081 if (!ap_msg->msg) 1084 1082 return -ENOMEM; 1083 + ap_msg->bufsize = PAGE_SIZE; 1085 1084 ap_msg->receive = zcrypt_msgtype6_receive; 1086 1085 ap_msg->psmid = (((unsigned long long) current->pid) << 32) + 1087 1086 atomic_inc_return(&zcrypt_step); ··· 1127 1124 .type = CEXXC_RESPONSE_TYPE_XCRB, 1128 1125 }; 1129 1126 1130 - ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); 1127 + ap_msg->bufsize = atomic_read(&ap_max_msg_size); 1128 + ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); 1131 1129 if (!ap_msg->msg) 1132 1130 return -ENOMEM; 1133 1131 ap_msg->receive = zcrypt_msgtype6_receive; ··· 1185 1181 .type = CEXXC_RESPONSE_TYPE_EP11, 1186 1182 }; 1187 1183 1188 - ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); 1184 + ap_msg->bufsize = atomic_read(&ap_max_msg_size); 1185 + ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); 1189 1186 if (!ap_msg->msg) 1190 1187 return -ENOMEM; 1191 1188 ap_msg->receive = zcrypt_msgtype6_receive_ep11; ··· 1282 1277 .type = CEXXC_RESPONSE_TYPE_XCRB, 1283 1278 }; 1284 1279 1285 - ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); 1280 + ap_msg->bufsize = AP_DEFAULT_MAX_MSG_SIZE; 1281 + ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); 1286 1282 if (!ap_msg->msg) 1287 1283 return -ENOMEM; 1288 1284 ap_msg->receive = zcrypt_msgtype6_receive;
-2
drivers/s390/crypto/zcrypt_msgtype6.h
··· 19 19 #define MSGTYPE06_VARIANT_NORNG 1 20 20 #define MSGTYPE06_VARIANT_EP11 2 21 21 22 - #define MSGTYPE06_MAX_MSG_SIZE (12*1024) 23 - 24 22 /** 25 23 * The type 6 message family is associated with CEXxC/CEXxP cards. 26 24 *
+3 -3
drivers/s390/crypto/zcrypt_queue.c
··· 111 111 return false; 112 112 } 113 113 114 - struct zcrypt_queue *zcrypt_queue_alloc(size_t max_response_size) 114 + struct zcrypt_queue *zcrypt_queue_alloc(size_t reply_buf_size) 115 115 { 116 116 struct zcrypt_queue *zq; 117 117 118 118 zq = kzalloc(sizeof(struct zcrypt_queue), GFP_KERNEL); 119 119 if (!zq) 120 120 return NULL; 121 - zq->reply.msg = kmalloc(max_response_size, GFP_KERNEL); 121 + zq->reply.msg = kmalloc(reply_buf_size, GFP_KERNEL); 122 122 if (!zq->reply.msg) 123 123 goto out_free; 124 - zq->reply.len = max_response_size; 124 + zq->reply.bufsize = reply_buf_size; 125 125 INIT_LIST_HEAD(&zq->list); 126 126 kref_init(&zq->refcount); 127 127 return zq;
-1
include/linux/cpuhotplug.h
··· 171 171 CPUHP_AP_PERF_X86_CSTATE_ONLINE, 172 172 CPUHP_AP_PERF_X86_IDXD_ONLINE, 173 173 CPUHP_AP_PERF_S390_CF_ONLINE, 174 - CPUHP_AP_PERF_S390_CFD_ONLINE, 175 174 CPUHP_AP_PERF_S390_SF_ONLINE, 176 175 CPUHP_AP_PERF_ARM_CCI_ONLINE, 177 176 CPUHP_AP_PERF_ARM_CCN_ONLINE,