Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 's390-6.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Vasily Gorbik:

- Make use of the IBM z16 processor activity instrumentation facility
extension to count neural network processor assist operations: add a
new PMU device driver so that perf can make use of this.

- Rework memcpy_real() to avoid DAT-off mode.

- Rework absolute lowcore access code.

- Various small fixes and improvements all over the code.

* tag 's390-6.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
s390/pci: remove unused bus_next field from struct zpci_dev
s390/cio: remove unused ccw_device_force_console() declaration
s390/pai: Add support for PAI Extension 1 NNPA counters
s390/mm: fix no previous prototype warnings in maccess.c
s390/mm: uninline copy_oldmem_kernel() function
s390/mm,ptdump: add real memory copy page markers
s390/mm: rework memcpy_real() to avoid DAT-off mode
s390/dump: save IPL CPU registers once DAT is available
s390/pci: convert high_memory to physical address
s390/smp,ptdump: add absolute lowcore markers
s390/smp: rework absolute lowcore access
s390/smp: call smp_reinit_ipl_cpu() before scheduler is available
s390/ptdump: add missing amode31 markers
s390/mm: split lowcore pages with set_memory_4k()
s390/mm: remove unused access parameter from do_fault_error()
s390/delay: sync comment within __delay() with reality
s390: move from strlcpy with unused retval to strscpy

+1174 -261
+7 -1
arch/s390/boot/startup.c
··· 10 10 #include <asm/sclp.h> 11 11 #include <asm/diag.h> 12 12 #include <asm/uv.h> 13 + #include <asm/abs_lowcore.h> 13 14 #include "decompressor.h" 14 15 #include "boot.h" 15 16 #include "uv.h" 16 17 17 18 unsigned long __bootdata_preserved(__kaslr_offset); 19 + unsigned long __bootdata_preserved(__abs_lowcore); 20 + unsigned long __bootdata_preserved(__memcpy_real_area); 18 21 unsigned long __bootdata(__amode31_base); 19 22 unsigned long __bootdata_preserved(VMALLOC_START); 20 23 unsigned long __bootdata_preserved(VMALLOC_END); ··· 183 180 /* force vmalloc and modules below kasan shadow */ 184 181 vmax = min(vmax, KASAN_SHADOW_START); 185 182 #endif 186 - MODULES_END = vmax; 183 + __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE); 184 + __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, 185 + sizeof(struct lowcore)); 186 + MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); 187 187 MODULES_VADDR = MODULES_END - MODULES_LEN; 188 188 VMALLOC_END = MODULES_VADDR; 189 189
+17
arch/s390/include/asm/abs_lowcore.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_S390_ABS_LOWCORE_H 3 + #define _ASM_S390_ABS_LOWCORE_H 4 + 5 + #include <asm/lowcore.h> 6 + 7 + #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) 8 + 9 + extern unsigned long __abs_lowcore; 10 + extern bool abs_lowcore_mapped; 11 + 12 + struct lowcore *get_abs_lowcore(unsigned long *flags); 13 + void put_abs_lowcore(struct lowcore *lc, unsigned long flags); 14 + int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc); 15 + void abs_lowcore_unmap(int cpu); 16 + 17 + #endif /* _ASM_S390_ABS_LOWCORE_H */
-1
arch/s390/include/asm/ccwdev.h
··· 214 214 extern void ccw_device_destroy_console(struct ccw_device *); 215 215 extern int ccw_device_enable_console(struct ccw_device *); 216 216 extern void ccw_device_wait_idle(struct ccw_device *); 217 - extern int ccw_device_force_console(struct ccw_device *); 218 217 219 218 extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size); 220 219 extern void ccw_device_dma_free(struct ccw_device *cdev,
+2 -1
arch/s390/include/asm/ctl_reg.h
··· 95 95 Interruption-Filtering Override */ 96 96 unsigned long : 3; 97 97 unsigned long ccc : 1; /* Cryptography counter control */ 98 - unsigned long : 18; 98 + unsigned long pec : 1; /* PAI extension control */ 99 + unsigned long : 17; 99 100 unsigned long : 3; 100 101 unsigned long lap : 1; /* Low-address-protection control */ 101 102 unsigned long : 4;
+3 -1
arch/s390/include/asm/lowcore.h
··· 203 203 __u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */ 204 204 /* Cryptography-counter designation */ 205 205 __u64 ccd; /* 0x1500 */ 206 - __u8 pad_0x1508[0x1800-0x1508]; /* 0x1508 */ 206 + /* AI-extension counter designation */ 207 + __u64 aicd; /* 0x1508 */ 208 + __u8 pad_0x1510[0x1800-0x1510]; /* 0x1510 */ 207 209 208 210 /* Transaction abort diagnostic block */ 209 211 struct pgm_tdb pgm_tdb; /* 0x1800 */
+17
arch/s390/include/asm/maccess.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ASM_S390_MACCESS_H 3 + #define __ASM_S390_MACCESS_H 4 + 5 + #include <linux/types.h> 6 + 7 + struct iov_iter; 8 + 9 + extern unsigned long __memcpy_real_area; 10 + void memcpy_real_init(void); 11 + size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count); 12 + int memcpy_real(void *dest, unsigned long src, size_t count); 13 + #ifdef CONFIG_CRASH_DUMP 14 + int copy_oldmem_kernel(void *dst, unsigned long src, size_t count); 15 + #endif 16 + 17 + #endif /* __ASM_S390_MACCESS_H */
-14
arch/s390/include/asm/os_info.h
··· 41 41 42 42 #ifdef CONFIG_CRASH_DUMP 43 43 void *os_info_old_entry(int nr, unsigned long *size); 44 - size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count); 45 - 46 - static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) 47 - { 48 - struct iov_iter iter; 49 - struct kvec kvec; 50 - 51 - kvec.iov_base = dst; 52 - kvec.iov_len = count; 53 - iov_iter_kvec(&iter, WRITE, &kvec, 1, count); 54 - if (copy_oldmem_iter(&iter, src, count) < count) 55 - return -EFAULT; 56 - return 0; 57 - } 58 44 #else 59 45 static inline void *os_info_old_entry(int nr, unsigned long *size) 60 46 {
+5 -1
arch/s390/include/asm/pai.h
··· 17 17 struct { 18 18 u64 : 8; 19 19 u64 num_cc : 8; /* # of supported crypto counters */ 20 - u64 : 48; 20 + u64 : 9; 21 + u64 num_nnpa : 7; /* # of supported NNPA counters */ 22 + u64 : 32; 21 23 }; 22 24 }; 23 25 ··· 44 42 #define PAI_CRYPTO_BASE 0x1000 /* First event number */ 45 43 #define PAI_CRYPTO_MAXCTR 256 /* Max # of event counters */ 46 44 #define PAI_CRYPTO_KERNEL_OFFSET 2048 45 + #define PAI_NNPA_BASE 0x1800 /* First event number */ 46 + #define PAI_NNPA_MAXCTR 128 /* Max # of event counters */ 47 47 48 48 DECLARE_STATIC_KEY_FALSE(pai_key); 49 49
-1
arch/s390/include/asm/pci.h
··· 117 117 struct zpci_dev { 118 118 struct zpci_bus *zbus; 119 119 struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ 120 - struct list_head bus_next; 121 120 struct kref kref; 122 121 struct hotplug_slot hotplug_slot; 123 122
+4
arch/s390/include/asm/pgtable.h
··· 1777 1777 1778 1778 extern int vmem_add_mapping(unsigned long start, unsigned long size); 1779 1779 extern void vmem_remove_mapping(unsigned long start, unsigned long size); 1780 + extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); 1781 + extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot); 1782 + extern void vmem_unmap_4k_page(unsigned long addr); 1783 + extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc); 1780 1784 extern int s390_enable_sie(void); 1781 1785 extern int s390_enable_skey(void); 1782 1786 extern void s390_reset_cmma(struct mm_struct *mm);
-17
arch/s390/include/asm/processor.h
··· 306 306 307 307 #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL 308 308 309 - extern int memcpy_real(void *, unsigned long, size_t); 310 - extern void memcpy_absolute(void *, void *, size_t); 311 - 312 - #define put_abs_lowcore(member, x) do { \ 313 - unsigned long __abs_address = offsetof(struct lowcore, member); \ 314 - __typeof__(((struct lowcore *)0)->member) __tmp = (x); \ 315 - \ 316 - memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \ 317 - } while (0) 318 - 319 - #define get_abs_lowcore(x, member) do { \ 320 - unsigned long __abs_address = offsetof(struct lowcore, member); \ 321 - __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \ 322 - \ 323 - memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \ 324 - } while (0) 325 - 326 309 extern int s390_isolate_bp(void); 327 310 extern int s390_isolate_bp_guest(void); 328 311
+3 -1
arch/s390/include/asm/smp.h
··· 30 30 31 31 extern int smp_find_processor_id(u16 address); 32 32 extern int smp_store_status(int cpu); 33 - extern void smp_save_dump_cpus(void); 33 + extern void smp_save_dump_ipl_cpu(void); 34 + extern void smp_save_dump_secondary_cpus(void); 34 35 extern void smp_yield_cpu(int cpu); 35 36 extern void smp_cpu_set_polarization(int cpu, int val); 36 37 extern int smp_cpu_get_polarization(int cpu); ··· 59 58 { 60 59 } 61 60 61 + extern int smp_reinit_ipl_cpu(void); 62 62 extern int smp_rescan_cpus(void); 63 63 extern void __noreturn cpu_die(void); 64 64 extern void __cpu_die(unsigned int cpu);
+2 -2
arch/s390/kernel/Makefile
··· 40 40 obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o 41 41 obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o 42 42 obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o 43 - obj-y += smp.o text_amode31.o stacktrace.o 43 + obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o 44 44 45 45 extra-y += head64.o vmlinux.lds 46 46 ··· 72 72 obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o 73 73 obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o 74 74 obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o 75 - obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o 75 + obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o 76 76 77 77 obj-$(CONFIG_TRACEPOINTS) += trace.o 78 78 obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
+95
arch/s390/kernel/abs_lowcore.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/pgtable.h> 4 + #include <asm/abs_lowcore.h> 5 + 6 + #define ABS_LOWCORE_UNMAPPED 1 7 + #define ABS_LOWCORE_LAP_ON 2 8 + #define ABS_LOWCORE_IRQS_ON 4 9 + 10 + unsigned long __bootdata_preserved(__abs_lowcore); 11 + bool __ro_after_init abs_lowcore_mapped; 12 + 13 + int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) 14 + { 15 + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); 16 + unsigned long phys = __pa(lc); 17 + int rc, i; 18 + 19 + for (i = 0; i < LC_PAGES; i++) { 20 + rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc); 21 + if (rc) { 22 + /* 23 + * Do not unmap allocated page tables in case the 24 + * allocation was not requested. In such a case the 25 + * request is expected coming from an atomic context, 26 + * while the unmap attempt might sleep. 27 + */ 28 + if (alloc) { 29 + for (--i; i >= 0; i--) { 30 + addr -= PAGE_SIZE; 31 + vmem_unmap_4k_page(addr); 32 + } 33 + } 34 + return rc; 35 + } 36 + addr += PAGE_SIZE; 37 + phys += PAGE_SIZE; 38 + } 39 + return 0; 40 + } 41 + 42 + void abs_lowcore_unmap(int cpu) 43 + { 44 + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); 45 + int i; 46 + 47 + for (i = 0; i < LC_PAGES; i++) { 48 + vmem_unmap_4k_page(addr); 49 + addr += PAGE_SIZE; 50 + } 51 + } 52 + 53 + struct lowcore *get_abs_lowcore(unsigned long *flags) 54 + { 55 + unsigned long irq_flags; 56 + union ctlreg0 cr0; 57 + int cpu; 58 + 59 + *flags = 0; 60 + cpu = get_cpu(); 61 + if (abs_lowcore_mapped) { 62 + return ((struct lowcore *)__abs_lowcore) + cpu; 63 + } else { 64 + if (cpu != 0) 65 + panic("Invalid unmapped absolute lowcore access\n"); 66 + local_irq_save(irq_flags); 67 + if (!irqs_disabled_flags(irq_flags)) 68 + *flags |= ABS_LOWCORE_IRQS_ON; 69 + __ctl_store(cr0.val, 0, 0); 70 + if (cr0.lap) { 71 + *flags |= ABS_LOWCORE_LAP_ON; 72 + __ctl_clear_bit(0, 28); 73 + } 74 + *flags |= ABS_LOWCORE_UNMAPPED; 75 + return lowcore_ptr[0]; 76 + } 77 + } 78 + 79 + void put_abs_lowcore(struct lowcore *lc, unsigned long flags) 80 + { 81 + if (abs_lowcore_mapped) { 82 + if (flags) 83 + panic("Invalid mapped absolute lowcore release\n"); 84 + } else { 85 + if (smp_processor_id() != 0) 86 + panic("Invalid mapped absolute lowcore access\n"); 87 + if (!(flags & ABS_LOWCORE_UNMAPPED)) 88 + panic("Invalid unmapped absolute lowcore release\n"); 89 + if (flags & ABS_LOWCORE_LAP_ON) 90 + __ctl_set_bit(0, 28); 91 + if (flags & ABS_LOWCORE_IRQS_ON) 92 + local_irq_enable(); 93 + } 94 + put_cpu(); 95 + }
+16 -24
arch/s390/kernel/crash_dump.c
··· 21 21 #include <asm/elf.h> 22 22 #include <asm/ipl.h> 23 23 #include <asm/sclp.h> 24 + #include <asm/maccess.h> 24 25 25 26 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) 26 27 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) ··· 54 53 }; 55 54 56 55 static LIST_HEAD(dump_save_areas); 57 - static DEFINE_MUTEX(memcpy_real_mutex); 58 - static char memcpy_real_buf[PAGE_SIZE]; 59 56 60 57 /* 61 58 * Allocate a save area ··· 115 116 memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); 116 117 } 117 118 118 - static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count) 119 - { 120 - size_t len, copied, res = 0; 121 - 122 - mutex_lock(&memcpy_real_mutex); 123 - while (count) { 124 - len = min(PAGE_SIZE, count); 125 - if (memcpy_real(memcpy_real_buf, src, len)) 126 - break; 127 - copied = copy_to_iter(memcpy_real_buf, len, iter); 128 - count -= copied; 129 - src += copied; 130 - res += copied; 131 - if (copied < len) 132 - break; 133 - } 134 - mutex_unlock(&memcpy_real_mutex); 135 - return res; 136 - } 137 - 138 - size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) 119 + static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) 139 120 { 140 121 size_t len, copied, res = 0; 141 122 ··· 135 156 } else { 136 157 len = count; 137 158 } 138 - copied = copy_to_iter_real(iter, src, len); 159 + copied = memcpy_real_iter(iter, src, len); 139 160 } 140 161 count -= copied; 141 162 src += copied; ··· 144 165 break; 145 166 } 146 167 return res; 168 + } 169 + 170 + int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) 171 + { 172 + struct iov_iter iter; 173 + struct kvec kvec; 174 + 175 + kvec.iov_base = dst; 176 + kvec.iov_len = count; 177 + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); 178 + if (copy_oldmem_iter(&iter, src, count) < count) 179 + return -EFAULT; 180 + return 0; 147 181 } 148 182 149 183 /*
+1 -1
arch/s390/kernel/debug.c
··· 250 250 rc->level = level; 251 251 rc->buf_size = buf_size; 252 252 rc->entry_size = sizeof(debug_entry_t) + buf_size; 253 - strlcpy(rc->name, name, sizeof(rc->name)); 253 + strscpy(rc->name, name, sizeof(rc->name)); 254 254 memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); 255 255 memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); 256 256 refcount_set(&(rc->ref_count), 0);
+1 -1
arch/s390/kernel/early.c
··· 267 267 static void __init setup_boot_command_line(void) 268 268 { 269 269 /* copy arch command line */ 270 - strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); 270 + strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); 271 271 } 272 272 273 273 static void __init check_image_bootable(void)
+7 -2
arch/s390/kernel/ipl.c
··· 29 29 #include <asm/sclp.h> 30 30 #include <asm/checksum.h> 31 31 #include <asm/debug.h> 32 + #include <asm/abs_lowcore.h> 32 33 #include <asm/os_info.h> 33 34 #include <asm/sections.h> 34 35 #include <asm/boot_data.h> ··· 1643 1642 static void dump_reipl_run(struct shutdown_trigger *trigger) 1644 1643 { 1645 1644 unsigned long ipib = (unsigned long) reipl_block_actual; 1645 + struct lowcore *abs_lc; 1646 + unsigned long flags; 1646 1647 unsigned int csum; 1647 1648 1648 1649 csum = (__force unsigned int) 1649 1650 csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); 1650 - put_abs_lowcore(ipib, ipib); 1651 - put_abs_lowcore(ipib_checksum, csum); 1651 + abs_lc = get_abs_lowcore(&flags); 1652 + abs_lc->ipib = ipib; 1653 + abs_lc->ipib_checksum = csum; 1654 + put_abs_lowcore(abs_lc, flags); 1652 1655 dump_run(trigger); 1653 1656 } 1654 1657
+7 -1
arch/s390/kernel/machine_kexec.c
··· 21 21 #include <asm/elf.h> 22 22 #include <asm/asm-offsets.h> 23 23 #include <asm/cacheflush.h> 24 + #include <asm/abs_lowcore.h> 24 25 #include <asm/os_info.h> 25 26 #include <asm/set_memory.h> 26 27 #include <asm/stacktrace.h> ··· 223 222 224 223 void arch_crash_save_vmcoreinfo(void) 225 224 { 225 + struct lowcore *abs_lc; 226 + unsigned long flags; 227 + 226 228 VMCOREINFO_SYMBOL(lowcore_ptr); 227 229 VMCOREINFO_SYMBOL(high_memory); 228 230 VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); 229 231 vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); 230 232 vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); 231 233 vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); 232 - put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note()); 234 + abs_lc = get_abs_lowcore(&flags); 235 + abs_lc->vmcore_info = paddr_vmcoreinfo_note(); 236 + put_abs_lowcore(abs_lc, flags); 233 237 } 234 238 235 239 void machine_shutdown(void)
+7 -3
arch/s390/kernel/os_info.c
··· 13 13 #include <linux/kernel.h> 14 14 #include <linux/slab.h> 15 15 #include <asm/checksum.h> 16 - #include <asm/lowcore.h> 16 + #include <asm/abs_lowcore.h> 17 17 #include <asm/os_info.h> 18 + #include <asm/maccess.h> 18 19 #include <asm/asm-offsets.h> 19 20 20 21 /* ··· 58 57 */ 59 58 void __init os_info_init(void) 60 59 { 61 - void *ptr = &os_info; 60 + struct lowcore *abs_lc; 61 + unsigned long flags; 62 62 63 63 os_info.version_major = OS_INFO_VERSION_MAJOR; 64 64 os_info.version_minor = OS_INFO_VERSION_MINOR; 65 65 os_info.magic = OS_INFO_MAGIC; 66 66 os_info.csum = os_info_csum(&os_info); 67 - put_abs_lowcore(os_info, __pa(ptr)); 67 + abs_lc = get_abs_lowcore(&flags); 68 + abs_lc->os_info = __pa(&os_info); 69 + put_abs_lowcore(abs_lc, flags); 68 70 } 69 71 70 72 #ifdef CONFIG_CRASH_DUMP
+671
arch/s390/kernel/perf_pai_ext.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Performance event support - Processor Activity Instrumentation Extension 4 + * Facility 5 + * 6 + * Copyright IBM Corp. 2022 7 + * Author(s): Thomas Richter <tmricht@linux.ibm.com> 8 + */ 9 + #define KMSG_COMPONENT "pai_ext" 10 + #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 + 12 + #include <linux/kernel.h> 13 + #include <linux/kernel_stat.h> 14 + #include <linux/percpu.h> 15 + #include <linux/notifier.h> 16 + #include <linux/init.h> 17 + #include <linux/export.h> 18 + #include <linux/io.h> 19 + 20 + #include <asm/cpu_mcf.h> 21 + #include <asm/ctl_reg.h> 22 + #include <asm/pai.h> 23 + #include <asm/debug.h> 24 + 25 + #define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ 26 + #define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ 27 + 28 + static debug_info_t *paiext_dbg; 29 + static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ 30 + 31 + enum paiext_mode { 32 + PAI_MODE_NONE, 33 + PAI_MODE_SAMPLING, 34 + PAI_MODE_COUNTER, 35 + }; 36 + 37 + struct pai_userdata { 38 + u16 num; 39 + u64 value; 40 + } __packed; 41 + 42 + /* Create the PAI extension 1 control block area. 43 + * The PAI extension control block 1 is pointed to by lowcore 44 + * address 0x1508 for each CPU. This control block is 512 bytes in size 45 + * and requires a 512 byte boundary alignment. 46 + */ 47 + struct paiext_cb { /* PAI extension 1 control block */ 48 + u64 header; /* Not used */ 49 + u64 reserved1; 50 + u64 acc; /* Addr to analytics counter control block */ 51 + u8 reserved2[488]; 52 + } __packed; 53 + 54 + struct paiext_map { 55 + unsigned long *area; /* Area for CPU to store counters */ 56 + struct pai_userdata *save; /* Area to store non-zero counters */ 57 + enum paiext_mode mode; /* Type of event */ 58 + unsigned int active_events; /* # of PAI Extension users */ 59 + unsigned int refcnt; 60 + struct perf_event *event; /* Perf event for sampling */ 61 + struct paiext_cb *paiext_cb; /* PAI extension control block area */ 62 + }; 63 + 64 + struct paiext_mapptr { 65 + struct paiext_map *mapptr; 66 + }; 67 + 68 + static struct paiext_root { /* Anchor to per CPU data */ 69 + int refcnt; /* Overall active events */ 70 + struct paiext_mapptr __percpu *mapptr; 71 + } paiext_root; 72 + 73 + /* Free per CPU data when the last event is removed. */ 74 + static void paiext_root_free(void) 75 + { 76 + if (!--paiext_root.refcnt) { 77 + free_percpu(paiext_root.mapptr); 78 + paiext_root.mapptr = NULL; 79 + } 80 + } 81 + 82 + /* On initialization of first event also allocate per CPU data dynamically. 83 + * Start with an array of pointers, the array size is the maximum number of 84 + * CPUs possible, which might be larger than the number of CPUs currently 85 + * online. 86 + */ 87 + static int paiext_root_alloc(void) 88 + { 89 + if (++paiext_root.refcnt == 1) { 90 + /* The memory is already zeroed. */ 91 + paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); 92 + if (!paiext_root.mapptr) { 93 + /* Returing without refcnt adjustment is ok. The 94 + * error code is handled by paiext_alloc() which 95 + * decrements refcnt when an event can not be 96 + * created. 97 + */ 98 + return -ENOMEM; 99 + } 100 + } 101 + return 0; 102 + } 103 + 104 + /* Protects against concurrent increment of sampler and counter member 105 + * increments at the same time and prohibits concurrent execution of 106 + * counting and sampling events. 107 + * Ensures that analytics counter block is deallocated only when the 108 + * sampling and counting on that cpu is zero. 109 + * For details see paiext_alloc(). 110 + */ 111 + static DEFINE_MUTEX(paiext_reserve_mutex); 112 + 113 + /* Free all memory allocated for event counting/sampling setup */ 114 + static void paiext_free(struct paiext_mapptr *mp) 115 + { 116 + kfree(mp->mapptr->area); 117 + kfree(mp->mapptr->paiext_cb); 118 + kvfree(mp->mapptr->save); 119 + kfree(mp->mapptr); 120 + mp->mapptr = NULL; 121 + } 122 + 123 + /* Release the PMU if event is the last perf event */ 124 + static void paiext_event_destroy(struct perf_event *event) 125 + { 126 + struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); 127 + struct paiext_map *cpump = mp->mapptr; 128 + 129 + mutex_lock(&paiext_reserve_mutex); 130 + cpump->event = NULL; 131 + if (!--cpump->refcnt) /* Last reference gone */ 132 + paiext_free(mp); 133 + paiext_root_free(); 134 + mutex_unlock(&paiext_reserve_mutex); 135 + debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__, 136 + event->cpu, mp->mapptr); 137 + 138 + } 139 + 140 + /* Used to avoid races in checking concurrent access of counting and 141 + * sampling for pai_extension events. 142 + * 143 + * Only one instance of event pai_ext/NNPA_ALL/ for sampling is 144 + * allowed and when this event is running, no counting event is allowed. 145 + * Several counting events are allowed in parallel, but no sampling event 146 + * is allowed while one (or more) counting events are running. 147 + * 148 + * This function is called in process context and it is safe to block. 149 + * When the event initialization functions fails, no other call back will 150 + * be invoked. 151 + * 152 + * Allocate the memory for the event. 153 + */ 154 + static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) 155 + { 156 + struct paiext_mapptr *mp; 157 + struct paiext_map *cpump; 158 + int rc; 159 + 160 + mutex_lock(&paiext_reserve_mutex); 161 + 162 + rc = paiext_root_alloc(); 163 + if (rc) 164 + goto unlock; 165 + 166 + mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); 167 + cpump = mp->mapptr; 168 + if (!cpump) { /* Paiext_map allocated? */ 169 + rc = -ENOMEM; 170 + cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); 171 + if (!cpump) 172 + goto unlock; 173 + 174 + /* Allocate memory for counter area and counter extraction. 175 + * These are 176 + * - a 512 byte block and requires 512 byte boundary alignment. 177 + * - a 1KB byte block and requires 1KB boundary alignment. 178 + * Only the first counting event has to allocate the area. 179 + * 180 + * Note: This works with commit 59bb47985c1d by default. 181 + * Backporting this to kernels without this commit might 182 + * need adjustment. 183 + */ 184 + mp->mapptr = cpump; 185 + cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); 186 + cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); 187 + cpump->save = kvmalloc_array(paiext_cnt + 1, 188 + sizeof(struct pai_userdata), 189 + GFP_KERNEL); 190 + if (!cpump->save || !cpump->area || !cpump->paiext_cb) { 191 + paiext_free(mp); 192 + goto unlock; 193 + } 194 + cpump->mode = a->sample_period ? PAI_MODE_SAMPLING 195 + : PAI_MODE_COUNTER; 196 + } else { 197 + /* Multiple invocation, check whats active. 198 + * Supported are multiple counter events or only one sampling 199 + * event concurrently at any one time. 200 + */ 201 + if (cpump->mode == PAI_MODE_SAMPLING || 202 + (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) { 203 + rc = -EBUSY; 204 + goto unlock; 205 + } 206 + } 207 + 208 + rc = 0; 209 + cpump->event = event; 210 + ++cpump->refcnt; 211 + 212 + unlock: 213 + if (rc) { 214 + /* Error in allocation of event, decrement anchor. Since 215 + * the event in not created, its destroy() function is never 216 + * invoked. Adjust the reference counter for the anchor. 217 + */ 218 + paiext_root_free(); 219 + } 220 + mutex_unlock(&paiext_reserve_mutex); 221 + /* If rc is non-zero, no increment of counter/sampler was done. */ 222 + return rc; 223 + } 224 + 225 + /* The PAI extension 1 control block supports up to 128 entries. Return 226 + * the index within PAIE1_CB given the event number. Also validate event 227 + * number. 228 + */ 229 + static int paiext_event_valid(struct perf_event *event) 230 + { 231 + u64 cfg = event->attr.config; 232 + 233 + if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { 234 + /* Offset NNPA in paiext_cb */ 235 + event->hw.config_base = offsetof(struct paiext_cb, acc); 236 + return 0; 237 + } 238 + return -EINVAL; 239 + } 240 + 241 + /* Might be called on different CPU than the one the event is intended for. */ 242 + static int paiext_event_init(struct perf_event *event) 243 + { 244 + struct perf_event_attr *a = &event->attr; 245 + int rc; 246 + 247 + /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ 248 + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) 249 + return -ENOENT; 250 + /* PAI extension event must be valid and in supported range */ 251 + rc = paiext_event_valid(event); 252 + if (rc) 253 + return rc; 254 + /* Allow only CPU wide operation, no process context for now. */ 255 + if (event->hw.target || event->cpu == -1) 256 + return -ENOENT; 257 + /* Allow only event NNPA_ALL for sampling. */ 258 + if (a->sample_period && a->config != PAI_NNPA_BASE) 259 + return -EINVAL; 260 + /* Prohibit exclude_user event selection */ 261 + if (a->exclude_user) 262 + return -EINVAL; 263 + 264 + rc = paiext_alloc(a, event); 265 + if (rc) 266 + return rc; 267 + event->hw.last_tag = 0; 268 + event->destroy = paiext_event_destroy; 269 + 270 + if (a->sample_period) { 271 + a->sample_period = 1; 272 + a->freq = 0; 273 + /* Register for paicrypt_sched_task() to be called */ 274 + event->attach_state |= PERF_ATTACH_SCHED_CB; 275 + /* Add raw data which are the memory mapped counters */ 276 + a->sample_type |= PERF_SAMPLE_RAW; 277 + /* Turn off inheritance */ 278 + a->inherit = 0; 279 + } 280 + 281 + return 0; 282 + } 283 + 284 + static u64 paiext_getctr(struct paiext_map *cpump, int nr) 285 + { 286 + return cpump->area[nr]; 287 + } 288 + 289 + /* Read the counter values. Return value from location in buffer. For event 290 + * NNPA_ALL sum up all events. 291 + */ 292 + static u64 paiext_getdata(struct perf_event *event) 293 + { 294 + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 295 + struct paiext_map *cpump = mp->mapptr; 296 + u64 sum = 0; 297 + int i; 298 + 299 + if (event->attr.config != PAI_NNPA_BASE) 300 + return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE); 301 + 302 + for (i = 1; i <= paiext_cnt; i++) 303 + sum += paiext_getctr(cpump, i); 304 + 305 + return sum; 306 + } 307 + 308 + static u64 paiext_getall(struct perf_event *event) 309 + { 310 + return paiext_getdata(event); 311 + } 312 + 313 + static void paiext_read(struct perf_event *event) 314 + { 315 + u64 prev, new, delta; 316 + 317 + prev = local64_read(&event->hw.prev_count); 318 + new = paiext_getall(event); 319 + local64_set(&event->hw.prev_count, new); 320 + delta = new - prev; 321 + local64_add(delta, &event->count); 322 + } 323 + 324 + static void paiext_start(struct perf_event *event, int flags) 325 + { 326 + u64 sum; 327 + 328 + if (event->hw.last_tag) 329 + return; 330 + event->hw.last_tag = 1; 331 + sum = paiext_getall(event); /* Get current value */ 332 + local64_set(&event->hw.prev_count, sum); 333 + local64_set(&event->count, 0); 334 + } 335 + 336 + static int paiext_add(struct perf_event *event, int flags) 337 + { 338 + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 339 + struct paiext_map *cpump = mp->mapptr; 340 + struct paiext_cb *pcb = cpump->paiext_cb; 341 + 342 + if (++cpump->active_events == 1) { 343 + S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); 344 + pcb->acc = virt_to_phys(cpump->area) | 0x1; 345 + /* Enable CPU instruction lookup for PAIE1 control block */ 346 + __ctl_set_bit(0, 49); 347 + debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", 348 + __func__, S390_lowcore.aicd, pcb->acc); 349 + } 350 + if (flags & PERF_EF_START && !event->attr.sample_period) { 351 + /* Only counting needs initial counter value */ 352 + paiext_start(event, PERF_EF_RELOAD); 353 + } 354 + event->hw.state = 0; 355 + if (event->attr.sample_period) { 356 + cpump->event = event; 357 + perf_sched_cb_inc(event->pmu); 358 + } 359 + return 0; 360 + } 361 + 362 + static void paiext_stop(struct perf_event *event, int flags) 363 + { 364 + paiext_read(event); 365 + event->hw.state = PERF_HES_STOPPED; 366 + } 367 + 368 + static void paiext_del(struct perf_event *event, int flags) 369 + { 370 + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 371 + struct paiext_map *cpump = mp->mapptr; 372 + struct paiext_cb *pcb = cpump->paiext_cb; 373 + 374 + if (event->attr.sample_period) 375 + perf_sched_cb_dec(event->pmu); 376 + if (!event->attr.sample_period) { 377 + /* Only counting needs to read counter */ 378 + paiext_stop(event, PERF_EF_UPDATE); 379 + } 380 + if (--cpump->active_events == 0) { 381 + /* Disable CPU instruction lookup for PAIE1 control block */ 382 + __ctl_clear_bit(0, 49); 383 + pcb->acc = 0; 384 + S390_lowcore.aicd = 0; 385 + debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", 386 + __func__, S390_lowcore.aicd, pcb->acc); 387 + } 388 + } 389 + 390 + /* Create raw data and save it in buffer. Returns number of bytes copied. 391 + * Saves only positive counter entries of the form 392 + * 2 bytes: Number of counter 393 + * 8 bytes: Value of counter 394 + */ 395 + static size_t paiext_copy(struct paiext_map *cpump) 396 + { 397 + struct pai_userdata *userdata = cpump->save; 398 + int i, outidx = 0; 399 + 400 + for (i = 1; i <= paiext_cnt; i++) { 401 + u64 val = paiext_getctr(cpump, i); 402 + 403 + if (val) { 404 + userdata[outidx].num = i; 405 + userdata[outidx].value = val; 406 + outidx++; 407 + } 408 + } 409 + return outidx * sizeof(*userdata); 410 + } 411 + 412 + /* Write sample when one or more counters values are nonzero. 413 + * 414 + * Note: The function paiext_sched_task() and paiext_push_sample() are not 415 + * invoked after function paiext_del() has been called because of function 416 + * perf_sched_cb_dec(). 417 + * The function paiext_sched_task() and paiext_push_sample() are only 418 + * called when sampling is active. Function perf_sched_cb_inc() 419 + * has been invoked to install function paiext_sched_task() as call back 420 + * to run at context switch time (see paiext_add()). 421 + * 422 + * This causes function perf_event_context_sched_out() and 423 + * perf_event_context_sched_in() to check whether the PMU has installed an 424 + * sched_task() callback. That callback is not active after paiext_del() 425 + * returns and has deleted the event on that CPU. 426 + */ 427 + static int paiext_push_sample(void) 428 + { 429 + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 430 + struct paiext_map *cpump = mp->mapptr; 431 + struct perf_event *event = cpump->event; 432 + struct perf_sample_data data; 433 + struct perf_raw_record raw; 434 + struct pt_regs regs; 435 + size_t rawsize; 436 + int overflow; 437 + 438 + rawsize = paiext_copy(cpump); 439 + if (!rawsize) /* No incremented counters */ 440 + return 0; 441 + 442 + /* Setup perf sample */ 443 + memset(&regs, 0, sizeof(regs)); 444 + memset(&raw, 0, sizeof(raw)); 445 + memset(&data, 0, sizeof(data)); 446 + perf_sample_data_init(&data, 0, event->hw.last_period); 447 + if (event->attr.sample_type & PERF_SAMPLE_TID) { 448 + data.tid_entry.pid = task_tgid_nr(current); 449 + data.tid_entry.tid = task_pid_nr(current); 450 + } 451 + if (event->attr.sample_type & PERF_SAMPLE_TIME) 452 + data.time = event->clock(); 453 + if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) 454 + data.id = event->id; 455 + if (event->attr.sample_type & PERF_SAMPLE_CPU) 456 + data.cpu_entry.cpu = smp_processor_id(); 457 + if (event->attr.sample_type & PERF_SAMPLE_RAW) { 458 + raw.frag.size = rawsize; 459 + raw.frag.data = cpump->save; 460 + raw.size = raw.frag.size; 461 + data.raw = &raw; 462 + } 463 + 464 + overflow = perf_event_overflow(event, &data, &regs); 465 + perf_event_update_userpage(event); 466 + /* Clear lowcore area after read */ 467 + memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ); 468 + return overflow; 469 + } 470 + 471 + /* Called on schedule-in and schedule-out. No access to event structure, 472 + * but for sampling only event NNPA_ALL is allowed. 473 + */ 474 + static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in) 475 + { 476 + /* We started with a clean page on event installation. So read out 477 + * results on schedule_out and if page was dirty, clear values. 478 + */ 479 + if (!sched_in) 480 + paiext_push_sample(); 481 + } 482 + 483 + /* Attribute definitions for pai extension1 interface. As with other CPU 484 + * Measurement Facilities, there is one attribute per mapped counter. 485 + * The number of mapped counters may vary per machine generation. Use 486 + * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction 487 + * to determine the number of mapped counters. The instructions returns 488 + * a positive number, which is the highest number of supported counters. 489 + * All counters less than this number are also supported, there are no 490 + * holes. A returned number of zero means no support for mapped counters. 491 + * 492 + * The identification of the counter is a unique number. The chosen range 493 + * is 0x1800 + offset in mapped kernel page. 494 + * All CPU Measurement Facility counters identifiers must be unique and 495 + * the numbers from 0 to 496 are already used for the CPU Measurement 496 + * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography 497 + * counters. 498 + * Numbers 0xb0000, 0xbc000 and 0xbd000 are already 499 + * used for the CPU Measurement Sampling facility. 500 + */ 501 + PMU_FORMAT_ATTR(event, "config:0-63"); 502 + 503 + static struct attribute *paiext_format_attr[] = { 504 + &format_attr_event.attr, 505 + NULL, 506 + }; 507 + 508 + static struct attribute_group paiext_events_group = { 509 + .name = "events", 510 + .attrs = NULL, /* Filled in attr_event_init() */ 511 + }; 512 + 513 + static struct attribute_group paiext_format_group = { 514 + .name = "format", 515 + .attrs = paiext_format_attr, 516 + }; 517 + 518 + static const struct attribute_group *paiext_attr_groups[] = { 519 + &paiext_events_group, 520 + &paiext_format_group, 521 + NULL, 522 + }; 523 + 524 + /* Performance monitoring unit for mapped counters */ 525 + static struct pmu paiext = { 526 + .task_ctx_nr = perf_invalid_context, 527 + .event_init = paiext_event_init, 528 + .add = paiext_add, 529 + .del = paiext_del, 530 + .start = paiext_start, 531 + .stop = paiext_stop, 532 + .read = paiext_read, 533 + .sched_task = paiext_sched_task, 534 + .attr_groups = paiext_attr_groups, 535 + }; 536 + 537 + /* List of symbolic PAI extension 1 NNPA counter names. */ 538 + static const char * const paiext_ctrnames[] = { 539 + [0] = "NNPA_ALL", 540 + [1] = "NNPA_ADD", 541 + [2] = "NNPA_SUB", 542 + [3] = "NNPA_MUL", 543 + [4] = "NNPA_DIV", 544 + [5] = "NNPA_MIN", 545 + [6] = "NNPA_MAX", 546 + [7] = "NNPA_LOG", 547 + [8] = "NNPA_EXP", 548 + [9] = "NNPA_IBM_RESERVED_9", 549 + [10] = "NNPA_RELU", 550 + [11] = "NNPA_TANH", 551 + [12] = "NNPA_SIGMOID", 552 + [13] = "NNPA_SOFTMAX", 553 + [14] = "NNPA_BATCHNORM", 554 + [15] = "NNPA_MAXPOOL2D", 555 + [16] = "NNPA_AVGPOOL2D", 556 + [17] = "NNPA_LSTMACT", 557 + [18] = "NNPA_GRUACT", 558 + [19] = "NNPA_CONVOLUTION", 559 + [20] = "NNPA_MATMUL_OP", 560 + [21] = "NNPA_MATMUL_OP_BCAST23", 561 + [22] = "NNPA_SMALLBATCH", 562 + [23] = "NNPA_LARGEDIM", 563 + [24] = "NNPA_SMALLTENSOR", 564 + [25] = "NNPA_1MFRAME", 565 + [26] = "NNPA_2GFRAME", 566 + [27] = "NNPA_ACCESSEXCEPT", 567 + }; 568 + 569 + static void __init attr_event_free(struct attribute **attrs, int num) 570 + { 571 + struct perf_pmu_events_attr *pa; 572 + struct device_attribute *dap; 573 + int i; 574 + 575 + for (i = 0; i < num; i++) { 576 + dap = container_of(attrs[i], struct device_attribute, attr); 577 + pa = container_of(dap, struct perf_pmu_events_attr, attr); 578 + kfree(pa); 579 + } 580 + kfree(attrs); 581 + } 582 + 583 + static int __init attr_event_init_one(struct attribute **attrs, int num) 584 + { 585 + struct perf_pmu_events_attr *pa; 586 + 587 + pa = kzalloc(sizeof(*pa), GFP_KERNEL); 588 + if (!pa) 589 + return -ENOMEM; 590 + 591 + sysfs_attr_init(&pa->attr.attr); 592 + pa->id = PAI_NNPA_BASE + num; 593 + pa->attr.attr.name = paiext_ctrnames[num]; 594 + pa->attr.attr.mode = 0444; 595 + pa->attr.show = cpumf_events_sysfs_show; 596 + pa->attr.store = NULL; 597 + attrs[num] = &pa->attr.attr; 598 + return 0; 599 + } 600 + 601 + /* Create PMU sysfs event attributes on the fly. */ 602 + static int __init attr_event_init(void) 603 + { 604 + struct attribute **attrs; 605 + int ret, i; 606 + 607 + attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs), 608 + GFP_KERNEL); 609 + if (!attrs) 610 + return -ENOMEM; 611 + for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { 612 + ret = attr_event_init_one(attrs, i); 613 + if (ret) { 614 + attr_event_free(attrs, i - 1); 615 + return ret; 616 + } 617 + } 618 + attrs[i] = NULL; 619 + paiext_events_group.attrs = attrs; 620 + return 0; 621 + } 622 + 623 + static int __init paiext_init(void) 624 + { 625 + struct qpaci_info_block ib; 626 + int rc = -ENOMEM; 627 + 628 + if (!test_facility(197)) 629 + return 0; 630 + 631 + qpaci(&ib); 632 + paiext_cnt = ib.num_nnpa; 633 + if (paiext_cnt >= PAI_NNPA_MAXCTR) 634 + paiext_cnt = PAI_NNPA_MAXCTR; 635 + if (!paiext_cnt) 636 + return 0; 637 + 638 + rc = attr_event_init(); 639 + if (rc) { 640 + pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n"); 641 + return rc; 642 + } 643 + 644 + /* Setup s390dbf facility */ 645 + paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); 646 + if (!paiext_dbg) { 647 + pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n"); 648 + rc = -ENOMEM; 649 + goto out_init; 650 + } 651 + debug_register_view(paiext_dbg, &debug_sprintf_view); 652 + 653 + rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1); 654 + if (rc) { 655 + pr_err("Registration of " KMSG_COMPONENT " PMU failed with " 656 + "rc=%i\n", rc); 657 + goto out_pmu; 658 + } 659 + 660 + return 0; 661 + 662 + out_pmu: 663 + debug_unregister_view(paiext_dbg, &debug_sprintf_view); 664 + debug_unregister(paiext_dbg); 665 + out_init: 666 + attr_event_free(paiext_events_group.attrs, 667 + ARRAY_SIZE(paiext_ctrnames) + 1); 668 + return rc; 669 + } 670 + 671 + device_initcall(paiext_init);
+30 -17
arch/s390/kernel/setup.c
··· 58 58 #include <asm/smp.h> 59 59 #include <asm/mmu_context.h> 60 60 #include <asm/cpcmd.h> 61 - #include <asm/lowcore.h> 61 + #include <asm/abs_lowcore.h> 62 62 #include <asm/nmi.h> 63 63 #include <asm/irq.h> 64 64 #include <asm/page.h> ··· 74 74 #include <asm/alternative.h> 75 75 #include <asm/nospec-branch.h> 76 76 #include <asm/mem_detect.h> 77 + #include <asm/maccess.h> 77 78 #include <asm/uv.h> 78 79 #include <asm/asm-offsets.h> 79 80 #include "entry.h" ··· 396 395 { 397 396 unsigned long stack; 398 397 398 + smp_reinit_ipl_cpu(); 399 399 stack = stack_alloc(); 400 400 if (!stack) 401 401 panic("Couldn't allocate kernel stack"); ··· 413 411 static void __init setup_lowcore_dat_off(void) 414 412 { 415 413 unsigned long int_psw_mask = PSW_KERNEL_BITS; 414 + struct lowcore *abs_lc, *lc; 416 415 unsigned long mcck_stack; 417 - struct lowcore *lc; 416 + unsigned long flags; 418 417 419 418 if (IS_ENABLED(CONFIG_KASAN)) 420 419 int_psw_mask |= PSW_MASK_DAT; ··· 477 474 lc->restart_data = 0; 478 475 lc->restart_source = -1U; 479 476 480 - put_abs_lowcore(restart_stack, lc->restart_stack); 481 - put_abs_lowcore(restart_fn, lc->restart_fn); 482 - put_abs_lowcore(restart_data, lc->restart_data); 483 - put_abs_lowcore(restart_source, lc->restart_source); 484 - put_abs_lowcore(restart_psw, lc->restart_psw); 485 - put_abs_lowcore(mcesad, lc->mcesad); 477 + abs_lc = get_abs_lowcore(&flags); 478 + abs_lc->restart_stack = lc->restart_stack; 479 + abs_lc->restart_fn = lc->restart_fn; 480 + abs_lc->restart_data = lc->restart_data; 481 + abs_lc->restart_source = lc->restart_source; 482 + abs_lc->restart_psw = lc->restart_psw; 483 + abs_lc->mcesad = lc->mcesad; 484 + put_abs_lowcore(abs_lc, flags); 486 485 487 486 mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); 488 487 if (!mcck_stack) ··· 505 500 506 501 static void __init setup_lowcore_dat_on(void) 507 502 { 508 - struct lowcore *lc = lowcore_ptr[0]; 509 - int cr; 503 + struct lowcore *abs_lc; 504 + unsigned long flags; 510 505 511 506 __ctl_clear_bit(0, 28); 512 507 S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; ··· 515 510 S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; 516 511 __ctl_set_bit(0, 28); 517 512 __ctl_store(S390_lowcore.cregs_save_area, 0, 15); 518 - put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS); 519 - put_abs_lowcore(program_new_psw, lc->program_new_psw); 520 - for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++) 521 - put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]); 513 + if (abs_lowcore_map(0, lowcore_ptr[0], true)) 514 + panic("Couldn't setup absolute lowcore"); 515 + abs_lowcore_mapped = true; 516 + abs_lc = get_abs_lowcore(&flags); 517 + abs_lc->restart_flags = RESTART_FLAG_CTLREGS; 518 + abs_lc->program_new_psw = S390_lowcore.program_new_psw; 519 + memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area, 520 + sizeof(abs_lc->cregs_save_area)); 521 + put_abs_lowcore(abs_lc, flags); 522 522 } 523 523 524 524 static struct resource code_resource = { ··· 1029 1019 reserve_crashkernel(); 1030 1020 #ifdef CONFIG_CRASH_DUMP 1031 1021 /* 1032 - * Be aware that smp_save_dump_cpus() triggers a system reset. 1022 + * Be aware that smp_save_dump_secondary_cpus() triggers a system reset. 1033 1023 * Therefore CPU and device initialization should be done afterwards. 1034 1024 */ 1035 - smp_save_dump_cpus(); 1025 + smp_save_dump_secondary_cpus(); 1036 1026 #endif 1037 1027 1038 1028 setup_resources(); ··· 1051 1041 * Create kernel page tables and switch to virtual addressing. 1052 1042 */ 1053 1043 paging_init(); 1054 - 1044 + memcpy_real_init(); 1055 1045 /* 1056 1046 * After paging_init created the kernel page table, the new PSWs 1057 1047 * in lowcore can now run with DAT enabled. 1058 1048 */ 1059 1049 setup_lowcore_dat_on(); 1050 + #ifdef CONFIG_CRASH_DUMP 1051 + smp_save_dump_ipl_cpu(); 1052 + #endif 1060 1053 1061 1054 /* Setup default console */ 1062 1055 conmode_default();
+54 -43
arch/s390/kernel/smp.c
··· 45 45 #include <asm/irq.h> 46 46 #include <asm/tlbflush.h> 47 47 #include <asm/vtimer.h> 48 - #include <asm/lowcore.h> 48 + #include <asm/abs_lowcore.h> 49 49 #include <asm/sclp.h> 50 50 #include <asm/debug.h> 51 51 #include <asm/os_info.h> ··· 55 55 #include <asm/stacktrace.h> 56 56 #include <asm/topology.h> 57 57 #include <asm/vdso.h> 58 + #include <asm/maccess.h> 58 59 #include "entry.h" 59 60 60 61 enum { ··· 213 212 lc->preempt_count = PREEMPT_DISABLED; 214 213 if (nmi_alloc_mcesa(&lc->mcesad)) 215 214 goto out; 215 + if (abs_lowcore_map(cpu, lc, true)) 216 + goto out_mcesa; 216 217 lowcore_ptr[cpu] = lc; 217 218 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); 218 219 return 0; 219 220 221 + out_mcesa: 222 + nmi_free_mcesa(&lc->mcesad); 220 223 out: 221 224 stack_free(mcck_stack); 222 225 stack_free(async_stack); ··· 242 237 mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; 243 238 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 244 239 lowcore_ptr[cpu] = NULL; 240 + abs_lowcore_unmap(cpu); 245 241 nmi_free_mcesa(&lc->mcesad); 246 242 stack_free(async_stack); 247 243 stack_free(mcck_stack); ··· 321 315 pcpu_delegate_fn *func, 322 316 void *data, unsigned long stack) 323 317 { 324 - struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; 325 - unsigned int source_cpu = stap(); 318 + struct lowcore *lc, *abs_lc; 319 + unsigned int source_cpu; 320 + unsigned long flags; 326 321 322 + lc = lowcore_ptr[pcpu - pcpu_devices]; 323 + source_cpu = stap(); 327 324 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); 328 325 if (pcpu->address == source_cpu) { 329 326 call_on_stack(2, stack, void, __pcpu_delegate, ··· 341 332 lc->restart_data = (unsigned long)data; 342 333 lc->restart_source = source_cpu; 343 334 } else { 344 - put_abs_lowcore(restart_stack, stack); 345 - put_abs_lowcore(restart_fn, (unsigned long)func); 346 - put_abs_lowcore(restart_data, (unsigned long)data); 347 - put_abs_lowcore(restart_source, source_cpu); 335 + abs_lc = get_abs_lowcore(&flags); 336 + abs_lc->restart_stack = stack; 337 + abs_lc->restart_fn = (unsigned long)func; 338 + abs_lc->restart_data = (unsigned long)data; 339 + abs_lc->restart_source = source_cpu; 340 + put_abs_lowcore(abs_lc, flags); 348 341 } 349 342 __bpon(); 350 343 asm volatile( ··· 592 581 void smp_ctl_set_clear_bit(int cr, int bit, bool set) 593 582 { 594 583 struct ec_creg_mask_parms parms = { .cr = cr, }; 584 + struct lowcore *abs_lc; 585 + unsigned long flags; 595 586 u64 ctlreg; 596 587 597 588 if (set) { ··· 604 591 parms.andval = ~(1UL << bit); 605 592 } 606 593 spin_lock(&ctl_lock); 607 - get_abs_lowcore(ctlreg, cregs_save_area[cr]); 594 + abs_lc = get_abs_lowcore(&flags); 595 + ctlreg = abs_lc->cregs_save_area[cr]; 608 596 ctlreg = (ctlreg & parms.andval) | parms.orval; 609 - put_abs_lowcore(cregs_save_area[cr], ctlreg); 597 + abs_lc->cregs_save_area[cr] = ctlreg; 598 + put_abs_lowcore(abs_lc, flags); 610 599 spin_unlock(&ctl_lock); 611 600 on_each_cpu(smp_ctl_bit_callback, &parms, 1); 612 601 } ··· 665 650 * This case does not exist for s390 anymore, setup_arch explicitly 666 651 * deactivates the elfcorehdr= kernel parameter 667 652 */ 668 - static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr, 669 - bool is_boot_cpu, __vector128 *vxrs) 653 + static bool dump_available(void) 670 654 { 671 - if (is_boot_cpu) 672 - vxrs = boot_cpu_vector_save_area; 673 - else 674 - __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs)); 675 - save_area_add_vxrs(sa, vxrs); 655 + return oldmem_data.start || is_ipl_type_dump(); 676 656 } 677 657 678 - static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, 679 - bool is_boot_cpu, void *regs) 658 + void __init smp_save_dump_ipl_cpu(void) 680 659 { 681 - if (is_boot_cpu) 682 - copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); 683 - else 684 - __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs)); 660 + struct save_area *sa; 661 + void *regs; 662 + 663 + if (!dump_available()) 664 + return; 665 + sa = save_area_alloc(true); 666 + regs = memblock_alloc(512, 8); 667 + if (!sa || !regs) 668 + panic("could not allocate memory for boot CPU save area\n"); 669 + copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); 685 670 save_area_add_regs(sa, regs); 671 + memblock_free(regs, 512); 672 + if (MACHINE_HAS_VX) 673 + save_area_add_vxrs(sa, boot_cpu_vector_save_area); 686 674 } 687 675 688 - void __init smp_save_dump_cpus(void) 676 + void __init smp_save_dump_secondary_cpus(void) 689 677 { 690 678 int addr, boot_cpu_addr, max_cpu_addr; 691 679 struct save_area *sa; 692 - bool is_boot_cpu; 693 680 void *page; 694 681 695 - if (!(oldmem_data.start || is_ipl_type_dump())) 696 - /* No previous system present, normal boot. */ 682 + if (!dump_available()) 697 683 return; 698 684 /* Allocate a page as dumping area for the store status sigps */ 699 685 page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); ··· 707 691 boot_cpu_addr = stap(); 708 692 max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; 709 693 for (addr = 0; addr <= max_cpu_addr; addr++) { 694 + if (addr == boot_cpu_addr) 695 + continue; 710 696 if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == 711 697 SIGP_CC_NOT_OPERATIONAL) 712 698 continue; 713 - is_boot_cpu = (addr == boot_cpu_addr); 714 - /* Allocate save area */ 715 - sa = save_area_alloc(is_boot_cpu); 699 + sa = save_area_alloc(false); 716 700 if (!sa) 717 701 panic("could not allocate memory for save area\n"); 718 - if (MACHINE_HAS_VX) 719 - /* Get the vector registers */ 720 - smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); 721 - /* 722 - * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers 723 - * of the boot CPU are stored in the HSA. To retrieve 724 - * these registers an SCLP request is required which is 725 - * done by drivers/s390/char/zcore.c:init_cpu_info() 726 - */ 727 - if (!is_boot_cpu || oldmem_data.start) 728 - /* Get the CPU registers */ 729 - smp_save_cpu_regs(sa, addr, is_boot_cpu, page); 702 + __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); 703 + save_area_add_regs(sa, page); 704 + if (MACHINE_HAS_VX) { 705 + __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page)); 706 + save_area_add_vxrs(sa, page); 707 + } 730 708 } 731 709 memblock_free(page, PAGE_SIZE); 732 710 diag_amode31_ops.diag308_reset(); ··· 1266 1256 : "memory", "cc"); 1267 1257 } 1268 1258 1269 - static int __init smp_reinit_ipl_cpu(void) 1259 + int __init smp_reinit_ipl_cpu(void) 1270 1260 { 1271 1261 unsigned long async_stack, nodat_stack, mcck_stack; 1272 1262 struct lowcore *lc, *lc_ipl; ··· 1291 1281 __ctl_clear_bit(0, 28); /* disable lowcore protection */ 1292 1282 S390_lowcore.mcesad = mcesad; 1293 1283 __ctl_load(cr0, 0, 0); 1284 + if (abs_lowcore_map(0, lc, false)) 1285 + panic("Couldn't remap absolute lowcore"); 1294 1286 lowcore_ptr[0] = lc; 1295 1287 local_mcck_enable(); 1296 1288 local_irq_restore(flags); ··· 1303 1291 1304 1292 return 0; 1305 1293 } 1306 - early_initcall(smp_reinit_ipl_cpu);
+4 -7
arch/s390/lib/delay.c
··· 13 13 14 14 void __delay(unsigned long loops) 15 15 { 16 - /* 17 - * To end the bloody studid and useless discussion about the 18 - * BogoMips number I took the liberty to define the __delay 19 - * function in a way that that resulting BogoMips number will 20 - * yield the megahertz number of the cpu. The important function 21 - * is udelay and that is done using the tod clock. -- martin. 22 - */ 16 + /* 17 + * Loop 'loops' times. Callers must not assume a specific 18 + * amount of time passes before this function returns. 19 + */ 23 20 asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); 24 21 } 25 22 EXPORT_SYMBOL(__delay);
+20
arch/s390/mm/dump_pagetables.c
··· 8 8 #include <linux/kasan.h> 9 9 #include <asm/ptdump.h> 10 10 #include <asm/kasan.h> 11 + #include <asm/abs_lowcore.h> 11 12 #include <asm/nospec-branch.h> 12 13 #include <asm/sections.h> 14 + #include <asm/maccess.h> 13 15 14 16 static unsigned long max_addr; 15 17 ··· 23 21 enum address_markers_idx { 24 22 IDENTITY_BEFORE_NR = 0, 25 23 IDENTITY_BEFORE_END_NR, 24 + AMODE31_START_NR, 25 + AMODE31_END_NR, 26 26 KERNEL_START_NR, 27 27 KERNEL_END_NR, 28 28 #ifdef CONFIG_KFENCE ··· 43 39 VMALLOC_END_NR, 44 40 MODULES_NR, 45 41 MODULES_END_NR, 42 + ABS_LOWCORE_NR, 43 + ABS_LOWCORE_END_NR, 44 + MEMCPY_REAL_NR, 45 + MEMCPY_REAL_END_NR, 46 46 }; 47 47 48 48 static struct addr_marker address_markers[] = { 49 49 [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, 50 50 [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, 51 + [AMODE31_START_NR] = {0, "Amode31 Area Start"}, 52 + [AMODE31_END_NR] = {0, "Amode31 Area End"}, 51 53 [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, 52 54 [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, 53 55 #ifdef CONFIG_KFENCE ··· 72 62 [VMALLOC_END_NR] = {0, "vmalloc Area End"}, 73 63 [MODULES_NR] = {0, "Modules Area Start"}, 74 64 [MODULES_END_NR] = {0, "Modules Area End"}, 65 + [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, 66 + [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, 67 + [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, 68 + [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, 75 69 { -1, NULL } 76 70 }; 77 71 ··· 290 276 max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; 291 277 max_addr = 1UL << (max_addr * 11 + 31); 292 278 address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; 279 + address_markers[AMODE31_START_NR].start_address = __samode31; 280 + address_markers[AMODE31_END_NR].start_address = __eamode31; 293 281 address_markers[MODULES_NR].start_address = MODULES_VADDR; 294 282 address_markers[MODULES_END_NR].start_address = MODULES_END; 283 + address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; 284 + address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; 285 + address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area; 286 + address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE; 295 287 address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; 296 288 address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; 297 289 address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+7 -8
arch/s390/mm/fault.c
··· 268 268 (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); 269 269 } 270 270 271 - static noinline void do_fault_error(struct pt_regs *regs, int access, 272 - vm_fault_t fault) 271 + static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault) 273 272 { 274 273 int si_code; 275 274 ··· 515 516 fault = do_exception(regs, access); 516 517 } 517 518 if (unlikely(fault)) 518 - do_fault_error(regs, access, fault); 519 + do_fault_error(regs, fault); 519 520 } 520 521 NOKPROBE_SYMBOL(do_protection_exception); 521 522 ··· 527 528 access = VM_ACCESS_FLAGS; 528 529 fault = do_exception(regs, access); 529 530 if (unlikely(fault)) 530 - do_fault_error(regs, access, fault); 531 + do_fault_error(regs, fault); 531 532 } 532 533 NOKPROBE_SYMBOL(do_dat_exception); 533 534 ··· 802 803 addr = __gmap_translate(gmap, addr); 803 804 mmap_read_unlock(mm); 804 805 if (IS_ERR_VALUE(addr)) { 805 - do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP); 806 + do_fault_error(regs, VM_FAULT_BADMAP); 806 807 break; 807 808 } 808 809 fallthrough; ··· 812 813 vma = find_vma(mm, addr); 813 814 if (!vma) { 814 815 mmap_read_unlock(mm); 815 - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); 816 + do_fault_error(regs, VM_FAULT_BADMAP); 816 817 break; 817 818 } 818 819 page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); ··· 835 836 BUG(); 836 837 break; 837 838 default: 838 - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); 839 + do_fault_error(regs, VM_FAULT_BADMAP); 839 840 WARN_ON_ONCE(1); 840 841 } 841 842 } ··· 847 848 struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; 848 849 849 850 if (get_fault_type(regs) != GMAP_FAULT) { 850 - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); 851 + do_fault_error(regs, VM_FAULT_BADMAP); 851 852 WARN_ON_ONCE(1); 852 853 return; 853 854 }
+1 -1
arch/s390/mm/init.c
··· 37 37 #include <asm/kfence.h> 38 38 #include <asm/ptdump.h> 39 39 #include <asm/dma.h> 40 - #include <asm/lowcore.h> 40 + #include <asm/abs_lowcore.h> 41 41 #include <asm/tlb.h> 42 42 #include <asm/tlbflush.h> 43 43 #include <asm/sections.h>
+75 -96
arch/s390/mm/maccess.c
··· 12 12 #include <linux/errno.h> 13 13 #include <linux/gfp.h> 14 14 #include <linux/cpu.h> 15 + #include <linux/uio.h> 15 16 #include <asm/asm-extable.h> 16 17 #include <asm/ctl_reg.h> 17 18 #include <asm/io.h> 19 + #include <asm/abs_lowcore.h> 18 20 #include <asm/stacktrace.h> 21 + #include <asm/maccess.h> 22 + 23 + unsigned long __bootdata_preserved(__memcpy_real_area); 24 + static __ro_after_init pte_t *memcpy_real_ptep; 25 + static DEFINE_MUTEX(memcpy_real_mutex); 19 26 20 27 static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) 21 28 { ··· 83 76 return dst; 84 77 } 85 78 86 - static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) 79 + void __init memcpy_real_init(void) 87 80 { 88 - union register_pair _dst, _src; 89 - int rc = -EFAULT; 90 - 91 - _dst.even = (unsigned long) dest; 92 - _dst.odd = (unsigned long) count; 93 - _src.even = (unsigned long) src; 94 - _src.odd = (unsigned long) count; 95 - asm volatile ( 96 - "0: mvcle %[dst],%[src],0\n" 97 - "1: jo 0b\n" 98 - " lhi %[rc],0\n" 99 - "2:\n" 100 - EX_TABLE(1b,2b) 101 - : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair) 102 - : : "cc", "memory"); 103 - return rc; 81 + memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true); 82 + if (!memcpy_real_ptep) 83 + panic("Couldn't setup memcpy real area"); 104 84 } 105 85 106 - static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, 107 - unsigned long src, 108 - unsigned long count) 86 + size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) 109 87 { 110 - int irqs_disabled, rc; 111 - unsigned long flags; 88 + size_t len, copied, res = 0; 89 + unsigned long phys, offset; 90 + void *chunk; 91 + pte_t pte; 112 92 113 - if (!count) 114 - return 0; 115 - flags = arch_local_irq_save(); 116 - irqs_disabled = arch_irqs_disabled_flags(flags); 117 - if (!irqs_disabled) 118 - trace_hardirqs_off(); 119 - __arch_local_irq_stnsm(0xf8); // disable DAT 120 - rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); 121 - if (flags & PSW_MASK_DAT) 122 - __arch_local_irq_stosm(0x04); // enable DAT 123 - if (!irqs_disabled) 124 - trace_hardirqs_on(); 125 - __arch_local_irq_ssm(flags); 126 - return rc; 93 + while (count) { 94 + phys = src & PAGE_MASK; 95 + offset = src & ~PAGE_MASK; 96 + chunk = (void *)(__memcpy_real_area + offset); 97 + len = min(count, PAGE_SIZE - offset); 98 + pte = mk_pte_phys(phys, PAGE_KERNEL_RO); 99 + 100 + mutex_lock(&memcpy_real_mutex); 101 + if (pte_val(pte) != pte_val(*memcpy_real_ptep)) { 102 + __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL); 103 + set_pte(memcpy_real_ptep, pte); 104 + } 105 + copied = copy_to_iter(chunk, len, iter); 106 + mutex_unlock(&memcpy_real_mutex); 107 + 108 + count -= copied; 109 + src += copied; 110 + res += copied; 111 + if (copied < len) 112 + break; 113 + } 114 + return res; 127 115 } 128 116 129 - /* 130 - * Copy memory in real mode (kernel to kernel) 131 - */ 132 117 int memcpy_real(void *dest, unsigned long src, size_t count) 133 118 { 134 - unsigned long _dest = (unsigned long)dest; 135 - unsigned long _src = (unsigned long)src; 136 - unsigned long _count = (unsigned long)count; 137 - int rc; 119 + struct iov_iter iter; 120 + struct kvec kvec; 138 121 139 - if (S390_lowcore.nodat_stack != 0) { 140 - preempt_disable(); 141 - rc = call_on_stack(3, S390_lowcore.nodat_stack, 142 - unsigned long, _memcpy_real, 143 - unsigned long, _dest, 144 - unsigned long, _src, 145 - unsigned long, _count); 146 - preempt_enable(); 147 - return rc; 148 - } 149 - /* 150 - * This is a really early memcpy_real call, the stacks are 151 - * not set up yet. Just call _memcpy_real on the early boot 152 - * stack 153 - */ 154 - return _memcpy_real(_dest, _src, _count); 122 + kvec.iov_base = dest; 123 + kvec.iov_len = count; 124 + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); 125 + if (memcpy_real_iter(&iter, src, count) < count) 126 + return -EFAULT; 127 + return 0; 155 128 } 156 129 157 130 /* 158 - * Copy memory in absolute mode (kernel to kernel) 131 + * Find CPU that owns swapped prefix page 159 132 */ 160 - void memcpy_absolute(void *dest, void *src, size_t count) 161 - { 162 - unsigned long cr0, flags, prefix; 163 - 164 - flags = arch_local_irq_save(); 165 - __ctl_store(cr0, 0, 0); 166 - __ctl_clear_bit(0, 28); /* disable lowcore protection */ 167 - prefix = store_prefix(); 168 - if (prefix) { 169 - local_mcck_disable(); 170 - set_prefix(0); 171 - memcpy(dest, src, count); 172 - set_prefix(prefix); 173 - local_mcck_enable(); 174 - } else { 175 - memcpy(dest, src, count); 176 - } 177 - __ctl_load(cr0, 0, 0); 178 - arch_local_irq_restore(flags); 179 - } 180 - 181 - /* 182 - * Check if physical address is within prefix or zero page 183 - */ 184 - static int is_swapped(phys_addr_t addr) 133 + static int get_swapped_owner(phys_addr_t addr) 185 134 { 186 135 phys_addr_t lc; 187 136 int cpu; 188 137 189 - if (addr < sizeof(struct lowcore)) 190 - return 1; 191 138 for_each_online_cpu(cpu) { 192 139 lc = virt_to_phys(lowcore_ptr[cpu]); 193 140 if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) 194 141 continue; 195 - return 1; 142 + return cpu; 196 143 } 197 - return 0; 144 + return -1; 198 145 } 199 146 200 147 /* ··· 161 200 { 162 201 void *ptr = phys_to_virt(addr); 163 202 void *bounce = ptr; 203 + struct lowcore *abs_lc; 204 + unsigned long flags; 164 205 unsigned long size; 206 + int this_cpu, cpu; 165 207 166 208 cpus_read_lock(); 167 - preempt_disable(); 168 - if (is_swapped(addr)) { 169 - size = PAGE_SIZE - (addr & ~PAGE_MASK); 170 - bounce = (void *) __get_free_page(GFP_ATOMIC); 171 - if (bounce) 172 - memcpy_absolute(bounce, ptr, size); 209 + this_cpu = get_cpu(); 210 + if (addr >= sizeof(struct lowcore)) { 211 + cpu = get_swapped_owner(addr); 212 + if (cpu < 0) 213 + goto out; 173 214 } 174 - preempt_enable(); 215 + bounce = (void *)__get_free_page(GFP_ATOMIC); 216 + if (!bounce) 217 + goto out; 218 + size = PAGE_SIZE - (addr & ~PAGE_MASK); 219 + if (addr < sizeof(struct lowcore)) { 220 + abs_lc = get_abs_lowcore(&flags); 221 + ptr = (void *)abs_lc + addr; 222 + memcpy(bounce, ptr, size); 223 + put_abs_lowcore(abs_lc, flags); 224 + } else if (cpu == this_cpu) { 225 + ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); 226 + memcpy(bounce, ptr, size); 227 + } else { 228 + memcpy(bounce, ptr, size); 229 + } 230 + out: 231 + put_cpu(); 175 232 cpus_read_unlock(); 176 233 return bounce; 177 234 }
+102 -2
arch/s390/mm/vmem.c
··· 240 240 } else if (pmd_none(*pmd)) { 241 241 if (IS_ALIGNED(addr, PMD_SIZE) && 242 242 IS_ALIGNED(next, PMD_SIZE) && 243 - MACHINE_HAS_EDAT1 && addr && direct && 243 + MACHINE_HAS_EDAT1 && direct && 244 244 !debug_pagealloc_enabled()) { 245 245 set_pmd(pmd, __pmd(__pa(addr) | prot)); 246 246 pages++; ··· 336 336 } else if (pud_none(*pud)) { 337 337 if (IS_ALIGNED(addr, PUD_SIZE) && 338 338 IS_ALIGNED(next, PUD_SIZE) && 339 - MACHINE_HAS_EDAT2 && addr && direct && 339 + MACHINE_HAS_EDAT2 && direct && 340 340 !debug_pagealloc_enabled()) { 341 341 set_pud(pud, __pud(__pa(addr) | prot)); 342 342 pages++; ··· 561 561 } 562 562 563 563 /* 564 + * Allocate new or return existing page-table entry, but do not map it 565 + * to any physical address. If missing, allocate segment- and region- 566 + * table entries along. Meeting a large segment- or region-table entry 567 + * while traversing is an error, since the function is expected to be 568 + * called against virtual regions reserverd for 4KB mappings only. 569 + */ 570 + pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) 571 + { 572 + pte_t *ptep = NULL; 573 + pgd_t *pgd; 574 + p4d_t *p4d; 575 + pud_t *pud; 576 + pmd_t *pmd; 577 + pte_t *pte; 578 + 579 + pgd = pgd_offset_k(addr); 580 + if (pgd_none(*pgd)) { 581 + if (!alloc) 582 + goto out; 583 + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); 584 + if (!p4d) 585 + goto out; 586 + pgd_populate(&init_mm, pgd, p4d); 587 + } 588 + p4d = p4d_offset(pgd, addr); 589 + if (p4d_none(*p4d)) { 590 + if (!alloc) 591 + goto out; 592 + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); 593 + if (!pud) 594 + goto out; 595 + p4d_populate(&init_mm, p4d, pud); 596 + } 597 + pud = pud_offset(p4d, addr); 598 + if (pud_none(*pud)) { 599 + if (!alloc) 600 + goto out; 601 + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 602 + if (!pmd) 603 + goto out; 604 + pud_populate(&init_mm, pud, pmd); 605 + } else if (WARN_ON_ONCE(pud_large(*pud))) { 606 + goto out; 607 + } 608 + pmd = pmd_offset(pud, addr); 609 + if (pmd_none(*pmd)) { 610 + if (!alloc) 611 + goto out; 612 + pte = vmem_pte_alloc(); 613 + if (!pte) 614 + goto out; 615 + pmd_populate(&init_mm, pmd, pte); 616 + } else if (WARN_ON_ONCE(pmd_large(*pmd))) { 617 + goto out; 618 + } 619 + ptep = pte_offset_kernel(pmd, addr); 620 + out: 621 + return ptep; 622 + } 623 + 624 + int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc) 625 + { 626 + pte_t *ptep, pte; 627 + 628 + if (!IS_ALIGNED(addr, PAGE_SIZE)) 629 + return -EINVAL; 630 + ptep = vmem_get_alloc_pte(addr, alloc); 631 + if (!ptep) 632 + return -ENOMEM; 633 + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 634 + pte = mk_pte_phys(phys, prot); 635 + set_pte(ptep, pte); 636 + return 0; 637 + } 638 + 639 + int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) 640 + { 641 + int rc; 642 + 643 + mutex_lock(&vmem_mutex); 644 + rc = __vmem_map_4k_page(addr, phys, prot, true); 645 + mutex_unlock(&vmem_mutex); 646 + return rc; 647 + } 648 + 649 + void vmem_unmap_4k_page(unsigned long addr) 650 + { 651 + pte_t *ptep; 652 + 653 + mutex_lock(&vmem_mutex); 654 + ptep = virt_to_kpte(addr); 655 + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 656 + pte_clear(&init_mm, addr, ptep); 657 + mutex_unlock(&vmem_mutex); 658 + } 659 + 660 + /* 564 661 * map whole physical memory to virtual memory (identity mapping) 565 662 * we reserve enough space in the vmalloc area for vmemmap to hotplug 566 663 * additional memory segments. ··· 680 583 SET_MEMORY_RO | SET_MEMORY_X); 681 584 __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, 682 585 SET_MEMORY_RO | SET_MEMORY_X); 586 + 587 + /* lowcore requires 4k mapping for real addresses / prefixing */ 588 + set_memory_4k(0, LC_PAGES); 683 589 684 590 /* lowcore must be executable for LPSWE */ 685 591 if (!static_key_enabled(&cpu_has_bear))
+1 -1
arch/s390/pci/pci_dma.c
··· 666 666 667 667 int __init zpci_dma_init(void) 668 668 { 669 - s390_iommu_aperture = (u64)high_memory; 669 + s390_iommu_aperture = (u64)virt_to_phys(high_memory); 670 670 if (!s390_iommu_aperture_factor) 671 671 s390_iommu_aperture = ULONG_MAX; 672 672 else
+1 -1
drivers/s390/block/dasd_devmap.c
··· 426 426 if (!devmap) { 427 427 /* This bus_id is new. */ 428 428 new->devindex = dasd_max_devindex++; 429 - strlcpy(new->bus_id, bus_id, DASD_BUS_ID_SIZE); 429 + strscpy(new->bus_id, bus_id, DASD_BUS_ID_SIZE); 430 430 new->features = features; 431 431 new->device = NULL; 432 432 list_add(&new->list, &dasd_hashlists[hash]);
+2 -2
drivers/s390/block/dasd_eer.c
··· 313 313 ktime_get_real_ts64(&ts); 314 314 header.tv_sec = ts.tv_sec; 315 315 header.tv_usec = ts.tv_nsec / NSEC_PER_USEC; 316 - strlcpy(header.busid, dev_name(&device->cdev->dev), 316 + strscpy(header.busid, dev_name(&device->cdev->dev), 317 317 DASD_EER_BUSID_SIZE); 318 318 319 319 spin_lock_irqsave(&bufferlock, flags); ··· 356 356 ktime_get_real_ts64(&ts); 357 357 header.tv_sec = ts.tv_sec; 358 358 header.tv_usec = ts.tv_nsec / NSEC_PER_USEC; 359 - strlcpy(header.busid, dev_name(&device->cdev->dev), 359 + strscpy(header.busid, dev_name(&device->cdev->dev), 360 360 DASD_EER_BUSID_SIZE); 361 361 362 362 spin_lock_irqsave(&bufferlock, flags);
+1 -1
drivers/s390/block/dcssblk.c
··· 614 614 rc = -ENAMETOOLONG; 615 615 goto seg_list_del; 616 616 } 617 - strlcpy(local_buf, buf, i + 1); 617 + strscpy(local_buf, buf, i + 1); 618 618 dev_info->num_of_segments = num_of_segments; 619 619 rc = dcssblk_is_continuous(dev_info); 620 620 if (rc < 0)
+1 -1
drivers/s390/char/hmcdrv_cache.c
··· 154 154 /* cache some file info (FTP command, file name and file 155 155 * size) unconditionally 156 156 */ 157 - strlcpy(hmcdrv_cache_file.fname, ftp->fname, 157 + strscpy(hmcdrv_cache_file.fname, ftp->fname, 158 158 HMCDRV_FTP_FIDENT_MAX); 159 159 hmcdrv_cache_file.id = ftp->id; 160 160 pr_debug("caching cmd %d, file size %zu for '%s'\n",
+2 -2
drivers/s390/char/tape_class.c
··· 54 54 if (!tcd) 55 55 return ERR_PTR(-ENOMEM); 56 56 57 - strlcpy(tcd->device_name, device_name, TAPECLASS_NAME_LEN); 57 + strscpy(tcd->device_name, device_name, TAPECLASS_NAME_LEN); 58 58 for (s = strchr(tcd->device_name, '/'); s; s = strchr(s, '/')) 59 59 *s = '!'; 60 - strlcpy(tcd->mode_name, mode_name, TAPECLASS_NAME_LEN); 60 + strscpy(tcd->mode_name, mode_name, TAPECLASS_NAME_LEN); 61 61 for (s = strchr(tcd->mode_name, '/'); s; s = strchr(s, '/')) 62 62 *s = '!'; 63 63
+1
drivers/s390/char/zcore.c
··· 30 30 #include <asm/checksum.h> 31 31 #include <asm/os_info.h> 32 32 #include <asm/switch_to.h> 33 + #include <asm/maccess.h> 33 34 #include "sclp.h" 34 35 35 36 #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
+1 -1
drivers/s390/cio/qdio_debug.c
··· 87 87 debug_unregister(irq_ptr->debug_area); 88 88 return -ENOMEM; 89 89 } 90 - strlcpy(new_entry->dbf_name, text, QDIO_DBF_NAME_LEN); 90 + strscpy(new_entry->dbf_name, text, QDIO_DBF_NAME_LEN); 91 91 new_entry->dbf_info = irq_ptr->debug_area; 92 92 mutex_lock(&qdio_dbf_list_mutex); 93 93 list_add(&new_entry->dbf_list, &qdio_dbf_list);
+1 -1
drivers/s390/net/ctcm_main.c
··· 1566 1566 goto out_dev; 1567 1567 } 1568 1568 1569 - strlcpy(priv->fsm->name, dev->name, sizeof(priv->fsm->name)); 1569 + strscpy(priv->fsm->name, dev->name, sizeof(priv->fsm->name)); 1570 1570 1571 1571 dev_info(&dev->dev, 1572 1572 "setup OK : r/w = %s/%s, protocol : %d\n",
+1 -1
drivers/s390/net/fsm.c
··· 28 28 "fsm(%s): init_fsm: Couldn't alloc instance\n", name); 29 29 return NULL; 30 30 } 31 - strlcpy(this->name, name, sizeof(this->name)); 31 + strscpy(this->name, name, sizeof(this->name)); 32 32 init_waitqueue_head(&this->wait_q); 33 33 34 34 f = kzalloc(sizeof(fsm), order);
+2 -2
drivers/s390/net/qeth_ethtool.c
··· 188 188 { 189 189 struct qeth_card *card = dev->ml_priv; 190 190 191 - strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", 191 + strscpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", 192 192 sizeof(info->driver)); 193 - strlcpy(info->fw_version, card->info.mcl_level, 193 + strscpy(info->fw_version, card->info.mcl_level, 194 194 sizeof(info->fw_version)); 195 195 snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", 196 196 CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card));
+1 -1
drivers/s390/scsi/zfcp_aux.c
··· 103 103 token = strsep(&str, ","); 104 104 if (!token || strlen(token) >= ZFCP_BUS_ID_SIZE) 105 105 goto err_out; 106 - strlcpy(busid, token, ZFCP_BUS_ID_SIZE); 106 + strscpy(busid, token, ZFCP_BUS_ID_SIZE); 107 107 108 108 token = strsep(&str, ","); 109 109 if (!token || kstrtoull(token, 0, (unsigned long long *) &wwpn))
+1 -1
drivers/s390/scsi/zfcp_fc.c
··· 885 885 dev_name(&adapter->ccw_device->dev), 886 886 init_utsname()->nodename); 887 887 else 888 - strlcpy(fc_host_symbolic_name(adapter->scsi_host), 888 + strscpy(fc_host_symbolic_name(adapter->scsi_host), 889 889 gspn_rsp->gspn.fp_name, FC_SYMBOLIC_NAME_SIZE); 890 890 891 891 return 0;