Merge tag 'powerpc-5.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

+25

Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7

··· 1 + What: /sys/bus/event_source/devices/hv_24x7/format 2 + Date: September 2020 3 + Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org> 4 + Description: Read-only. Attribute group to describe the magic bits 5 + that go into perf_event_attr.config for a particular pmu. 6 + (See ABI/testing/sysfs-bus-event_source-devices-format). 7 + 8 + Each attribute under this group defines a bit range of the 9 + perf_event_attr.config. All supported attributes are listed 10 + below. 11 + 12 + chip = "config:16-31" 13 + core = "config:16-31" 14 + domain = "config:0-3" 15 + lpar = "config:0-15" 16 + offset = "config:32-63" 17 + vcpu = "config:16-31" 18 + 19 + For example, 20 + 21 + PM_PB_CYC = "domain=1,offset=0x80,chip=?,lpar=0x0" 22 + 23 + In this event, '?' after chip specifies that 24 + this value will be provided by user while running this event. 25 + 1 26 What: /sys/bus/event_source/devices/hv_24x7/interface/catalog 2 27 Date: February 2014 3 28 Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>

+38

Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci

··· 1 + What: /sys/bus/event_source/devices/hv_gpci/format 2 + Date: September 2020 3 + Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org> 4 + Description: Read-only. Attribute group to describe the magic bits 5 + that go into perf_event_attr.config for a particular pmu. 6 + (See ABI/testing/sysfs-bus-event_source-devices-format). 7 + 8 + Each attribute under this group defines a bit range of the 9 + perf_event_attr.config. All supported attributes are listed 10 + below. 11 + 12 + counter_info_version = "config:16-23" 13 + length = "config:24-31" 14 + partition_id = "config:32-63" 15 + request = "config:0-31" 16 + sibling_part_id = "config:32-63" 17 + hw_chip_id = "config:32-63" 18 + offset = "config:32-63" 19 + phys_processor_idx = "config:32-63" 20 + secondary_index = "config:0-15" 21 + starting_index = "config:32-63" 22 + 23 + For example, 24 + 25 + processor_core_utilization_instructions_completed = "request=0x94, 26 + phys_processor_idx=?,counter_info_version=0x8, 27 + length=8,offset=0x18" 28 + 29 + In this event, '?' after phys_processor_idx specifies this value 30 + this value will be provided by user while running this event. 31 + 1 32 What: /sys/bus/event_source/devices/hv_gpci/interface/collect_privileged 2 33 Date: February 2014 3 34 Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org> ··· 72 41 Description: 73 42 A number indicating the latest version of the gpci interface 74 43 that the kernel is aware of. 44 + 45 + What: /sys/devices/hv_gpci/cpumask 46 + Date: October 2020 47 + Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org> 48 + Description: read only 49 + This sysfs file exposes the cpumask which is designated to make 50 + HCALLs to retrieve hv-gpci pmu event counter data.

+4

Documentation/powerpc/isa-versions.rst

··· 7 7 ========= ==================================================================== 8 8 CPU Architecture version 9 9 ========= ==================================================================== 10 + Power10 Power ISA v3.1 10 11 Power9 Power ISA v3.0B 11 12 Power8 Power ISA v2.07 12 13 Power7 Power ISA v2.06 ··· 33 32 ========== ================== 34 33 CPU VMX (aka. Altivec) 35 34 ========== ================== 35 + Power10 Yes 36 36 Power9 Yes 37 37 Power8 Yes 38 38 Power7 Yes ··· 49 47 ========== ==== 50 48 CPU VSX 51 49 ========== ==== 50 + Power10 Yes 52 51 Power9 Yes 53 52 Power8 Yes 54 53 Power7 Yes ··· 65 62 ========== ==================================== 66 63 CPU Transactional Memory 67 64 ========== ==================================== 65 + Power10 No (* see Power ISA v3.1, "Appendix A. Notes on the Removal of Transactional Memory from the Architecture") 68 66 Power9 Yes (* see transactional_memory.txt) 69 67 Power8 Yes 70 68 Power7 No

+1

Documentation/powerpc/ptrace.rst

··· 46 46 #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4 47 47 #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8 48 48 #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10 49 + #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 49 50 50 51 2. PTRACE_SETHWDEBUG 51 52

+7

arch/Kconfig

··· 420 420 bool 421 421 depends on MMU_GATHER_TABLE_FREE 422 422 423 + config ARCH_WANT_IRQS_OFF_ACTIVATE_MM 424 + bool 425 + help 426 + Temporary select until all architectures can be converted to have 427 + irqs disabled over activate_mm. Architectures that do IPI based TLB 428 + shootdowns should enable this. 429 + 423 430 config ARCH_HAVE_NMI_SAFE_CMPXCHG 424 431 bool 425 432

+19 -2

arch/powerpc/Kconfig

··· 59 59 def_bool PPC64 60 60 61 61 config NEED_PER_CPU_EMBED_FIRST_CHUNK 62 - def_bool PPC64 62 + def_bool y if PPC64 63 + 64 + config NEED_PER_CPU_PAGE_FIRST_CHUNK 65 + def_bool y if PPC64 63 66 64 67 config NR_IRQS 65 68 int "Number of virtual interrupt numbers" ··· 151 148 select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS 152 149 select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS 153 150 select ARCH_WANT_IPC_PARSE_VERSION 151 + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM 154 152 select ARCH_WEAK_RELEASE_ACQUIRE 155 153 select BINFMT_ELF 156 154 select BUILDTIME_TABLE_SORT ··· 968 964 config PPC_SECURE_BOOT 969 965 prompt "Enable secure boot support" 970 966 bool 971 - depends on PPC_POWERNV 967 + depends on PPC_POWERNV || PPC_PSERIES 972 968 depends on IMA_ARCH_POLICY 973 969 imply IMA_SECURE_AND_OR_TRUSTED_BOOT 974 970 help ··· 987 983 These variables are exposed to userspace via sysfs to enable 988 984 read/write operations on these variables. Say Y if you have 989 985 secure boot enabled and want to expose variables to userspace. 986 + 987 + config PPC_RTAS_FILTER 988 + bool "Enable filtering of RTAS syscalls" 989 + default y 990 + depends on PPC_RTAS 991 + help 992 + The RTAS syscall API has security issues that could be used to 993 + compromise system integrity. This option enforces restrictions on the 994 + RTAS calls and arguments passed by userspace programs to mitigate 995 + these issues. 996 + 997 + Say Y unless you know what you are doing and the filter is causing 998 + problems for you. 990 999 991 1000 endmenu 992 1001

+2 -1

arch/powerpc/Makefile

··· 264 264 KBUILD_AFLAGS += $(aflags-y) 265 265 KBUILD_CFLAGS += $(cflags-y) 266 266 267 - head-y := arch/powerpc/kernel/head_$(BITS).o 267 + head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o 268 + head-$(CONFIG_PPC_BOOK3S_32) := arch/powerpc/kernel/head_book3s_32.o 268 269 head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o 269 270 head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o 270 271 head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o

+1 -1

arch/powerpc/Makefile.postlink

··· 18 18 ifdef CONFIG_PPC_BOOK3S_64 19 19 cmd_relocs_check = \ 20 20 $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \ 21 - $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@" 21 + $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@" 22 22 else 23 23 cmd_relocs_check = \ 24 24 $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"

+1 -1

arch/powerpc/boot/Makefile

··· 7 7 # Based on coffboot by Paul Mackerras 8 8 # Simplified for ppc64 by Todd Inglett 9 9 # 10 - # NOTE: this code is built for 32 bit in ELF32 format even though 10 + # NOTE: this code may be built for 32 bit in ELF32 format even though 11 11 # it packages a 64 bit kernel. We do this to simplify the 12 12 # bootloader and increase compatibility with OpenFirmware. 13 13 #

-1

arch/powerpc/boot/dts/fsl/t1024rdb.dts

··· 161 161 rtc@68 { 162 162 compatible = "dallas,ds1339"; 163 163 reg = <0x68>; 164 - interrupts = <0x1 0x1 0 0>; 165 164 }; 166 165 }; 167 166

-1

arch/powerpc/boot/dts/fsl/t4240rdb.dts

··· 144 144 rtc@68 { 145 145 compatible = "dallas,ds1374"; 146 146 reg = <0x68>; 147 - interrupts = <0x1 0x1 0 0>; 148 147 }; 149 148 }; 150 149

+1 -14

arch/powerpc/boot/util.S

··· 18 18 19 19 .text 20 20 21 - /* udelay (on non-601 processors) needs to know the period of the 21 + /* udelay needs to know the period of the 22 22 * timebase in nanoseconds. This used to be hardcoded to be 60ns 23 23 * (period of 66MHz/4). Now a variable is used that is initialized to 24 24 * 60 for backward compatibility, but it can be overridden as necessary ··· 37 37 */ 38 38 .globl udelay 39 39 udelay: 40 - mfspr r4,SPRN_PVR 41 - srwi r4,r4,16 42 - cmpwi 0,r4,1 /* 601 ? */ 43 - bne .Ludelay_not_601 44 - 00: li r0,86 /* Instructions / microsecond? */ 45 - mtctr r0 46 - 10: addi r0,r0,0 /* NOP */ 47 - bdnz 10b 48 - subic. r3,r3,1 49 - bne 00b 50 - blr 51 - 52 - .Ludelay_not_601: 53 40 mulli r4,r3,1000 /* nanoseconds */ 54 41 /* Change r4 to be the number of ticks using: 55 42 * (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns

+3 -3

arch/powerpc/configs/85xx/mpc85xx_cds_defconfig

··· 29 29 CONFIG_BLK_DEV_LOOP=y 30 30 CONFIG_BLK_DEV_RAM=y 31 31 CONFIG_BLK_DEV_RAM_SIZE=32768 32 - CONFIG_IDE=y 33 - CONFIG_BLK_DEV_GENERIC=y 34 - CONFIG_BLK_DEV_VIA82CXXX=y 32 + CONFIG_ATA=y 33 + CONFIG_ATA_GENERIC=y 34 + CONFIG_PATA_VIA=y 35 35 CONFIG_NETDEVICES=y 36 36 CONFIG_GIANFAR=y 37 37 CONFIG_E1000=y

+3 -3

arch/powerpc/configs/85xx/tqm8540_defconfig

··· 30 30 CONFIG_BLK_DEV_LOOP=y 31 31 CONFIG_BLK_DEV_RAM=y 32 32 CONFIG_BLK_DEV_RAM_SIZE=32768 33 - CONFIG_IDE=y 34 - CONFIG_BLK_DEV_GENERIC=y 35 - CONFIG_BLK_DEV_VIA82CXXX=y 33 + CONFIG_ATA=y 34 + CONFIG_ATA_GENERIC=y 35 + CONFIG_PATA_VIA=y 36 36 CONFIG_NETDEVICES=y 37 37 CONFIG_GIANFAR=y 38 38 CONFIG_E100=y

+3 -3

arch/powerpc/configs/85xx/tqm8541_defconfig

··· 30 30 CONFIG_BLK_DEV_LOOP=y 31 31 CONFIG_BLK_DEV_RAM=y 32 32 CONFIG_BLK_DEV_RAM_SIZE=32768 33 - CONFIG_IDE=y 34 - CONFIG_BLK_DEV_GENERIC=y 35 - CONFIG_BLK_DEV_VIA82CXXX=y 33 + CONFIG_ATA=y 34 + CONFIG_ATA_GENERIC=y 35 + CONFIG_PATA_VIA=y 36 36 CONFIG_NETDEVICES=y 37 37 CONFIG_GIANFAR=y 38 38 CONFIG_E100=y

+3 -3

arch/powerpc/configs/85xx/tqm8555_defconfig

··· 30 30 CONFIG_BLK_DEV_LOOP=y 31 31 CONFIG_BLK_DEV_RAM=y 32 32 CONFIG_BLK_DEV_RAM_SIZE=32768 33 - CONFIG_IDE=y 34 - CONFIG_BLK_DEV_GENERIC=y 35 - CONFIG_BLK_DEV_VIA82CXXX=y 33 + CONFIG_ATA=y 34 + CONFIG_ATA_GENERIC=y 35 + CONFIG_PATA_VIA=y 36 36 CONFIG_NETDEVICES=y 37 37 CONFIG_GIANFAR=y 38 38 CONFIG_E100=y

+3 -3

arch/powerpc/configs/85xx/tqm8560_defconfig

··· 30 30 CONFIG_BLK_DEV_LOOP=y 31 31 CONFIG_BLK_DEV_RAM=y 32 32 CONFIG_BLK_DEV_RAM_SIZE=32768 33 - CONFIG_IDE=y 34 - CONFIG_BLK_DEV_GENERIC=y 35 - CONFIG_BLK_DEV_VIA82CXXX=y 33 + CONFIG_ATA=y 34 + CONFIG_ATA_GENERIC=y 35 + CONFIG_PATA_VIA=y 36 36 CONFIG_NETDEVICES=y 37 37 CONFIG_GIANFAR=y 38 38 CONFIG_E100=y

+4 -1

arch/powerpc/include/asm/asm-prototypes.h

··· 67 67 void program_check_exception(struct pt_regs *regs); 68 68 void alignment_exception(struct pt_regs *regs); 69 69 void StackOverflow(struct pt_regs *regs); 70 + void stack_overflow_exception(struct pt_regs *regs); 70 71 void kernel_fp_unavailable_exception(struct pt_regs *regs); 71 72 void altivec_unavailable_exception(struct pt_regs *regs); 72 73 void vsx_unavailable_exception(struct pt_regs *regs); ··· 145 144 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); 146 145 147 146 /* Patch sites */ 148 - extern s32 patch__call_flush_branch_caches; 147 + extern s32 patch__call_flush_branch_caches1; 148 + extern s32 patch__call_flush_branch_caches2; 149 + extern s32 patch__call_flush_branch_caches3; 149 150 extern s32 patch__flush_count_cache_return; 150 151 extern s32 patch__flush_link_stack_return; 151 152 extern s32 patch__call_kvm_flush_link_stack;

+11 -7

arch/powerpc/include/asm/book3s/64/hash-4k.h

··· 13 13 */ 14 14 #define MAX_EA_BITS_PER_CONTEXT 46 15 15 16 - #define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) 17 16 18 17 /* 19 - * Our page table limit us to 64TB. Hence for the kernel mapping, 20 - * each MAP area is limited to 16 TB. 21 - * The four map areas are: linear mapping, vmap, IO and vmemmap 18 + * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB 19 + * of vmemmap space. To better support sparse memory layout, we use 61TB 20 + * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap. 22 21 */ 22 + #define REGION_SHIFT (40) 23 23 #define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) 24 24 25 25 /* 26 - * Define the address range of the kernel non-linear virtual area 27 - * 16TB 26 + * Limits the linear mapping range 28 27 */ 29 - #define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) 28 + #define H_MAX_PHYSMEM_BITS 46 29 + 30 + /* 31 + * Define the address range of the kernel non-linear virtual area (61TB) 32 + */ 33 + #define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000) 30 34 31 35 #ifndef __ASSEMBLY__ 32 36 #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)

+13

arch/powerpc/include/asm/book3s/64/hash-64k.h

··· 7 7 #define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB 8 8 #define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB 9 9 10 + /* 11 + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS 12 + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and 13 + * page_to_nid does a page->section->node lookup 14 + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce 15 + * memory requirements with large number of sections. 16 + * 51 bits is the max physical real address on POWER9 17 + */ 18 + #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) 19 + #define H_MAX_PHYSMEM_BITS 51 20 + #else 21 + #define H_MAX_PHYSMEM_BITS 46 22 + #endif 10 23 11 24 /* 12 25 * Each context is 512TB size. SLB miss for first context/default context

+2 -2

arch/powerpc/include/asm/book3s/64/mmu-hash.h

··· 577 577 * For vmalloc and memmap, we use just one context with 512TB. With 64 byte 578 578 * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)). 579 579 */ 580 - #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) 581 - #define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT)) 580 + #if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) 581 + #define MAX_KERNEL_CTX_CNT (1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT)) 582 582 #else 583 583 #define MAX_KERNEL_CTX_CNT 1 584 584 #endif

+1 -16

arch/powerpc/include/asm/book3s/64/mmu.h

··· 27 27 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 28 28 #endif /* __ASSEMBLY__ */ 29 29 30 - /* 31 - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS 32 - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and 33 - * page_to_nid does a page->section->node lookup 34 - * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce 35 - * memory requirements with large number of sections. 36 - * 51 bits is the max physical real address on POWER9 37 - */ 38 - #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ 39 - defined(CONFIG_PPC_64K_PAGES) 40 - #define MAX_PHYSMEM_BITS 51 41 - #else 42 - #define MAX_PHYSMEM_BITS 46 43 - #endif 44 - 45 30 /* 64-bit classic hash table MMU */ 46 31 #include <asm/book3s/64/mmu-hash.h> 47 32 ··· 70 85 /* 71 86 * memory block size used with radix translation. 72 87 */ 73 - extern unsigned int __ro_after_init radix_mem_block_size; 88 + extern unsigned long __ro_after_init radix_mem_block_size; 74 89 75 90 #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) 76 91 #define PRTB_ENTRIES (1ul << mmu_pid_bits)

+7

arch/powerpc/include/asm/book3s/64/pgtable.h

··· 294 294 #include <asm/book3s/64/hash.h> 295 295 #include <asm/book3s/64/radix.h> 296 296 297 + #if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS 298 + #define MAX_PHYSMEM_BITS H_MAX_PHYSMEM_BITS 299 + #else 300 + #define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS 301 + #endif 302 + 303 + 297 304 #ifdef CONFIG_PPC_64K_PAGES 298 305 #include <asm/book3s/64/pgtable-64k.h> 299 306 #else

+16

arch/powerpc/include/asm/book3s/64/radix.h

··· 91 91 * +------------------------------+ Kernel linear (0xc.....) 92 92 */ 93 93 94 + 95 + /* 96 + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS 97 + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and 98 + * page_to_nid does a page->section->node lookup 99 + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce 100 + * memory requirements with large number of sections. 101 + * 51 bits is the max physical real address on POWER9 102 + */ 103 + 104 + #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) 105 + #define R_MAX_PHYSMEM_BITS 51 106 + #else 107 + #define R_MAX_PHYSMEM_BITS 46 108 + #endif 109 + 94 110 #define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) 95 111 /* 96 112 * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick

+10

arch/powerpc/include/asm/cacheflush.h

··· 98 98 mb(); /* sync */ 99 99 } 100 100 101 + #ifdef CONFIG_4xx 102 + static inline void flush_instruction_cache(void) 103 + { 104 + iccci((void *)KERNELBASE); 105 + isync(); 106 + } 107 + #else 108 + void flush_instruction_cache(void); 109 + #endif 110 + 101 111 #include <asm-generic/cacheflush.h> 102 112 103 113 #endif /* _ASM_POWERPC_CACHEFLUSH_H */

+4 -14

arch/powerpc/include/asm/cputable.h

··· 9 9 10 10 #ifndef __ASSEMBLY__ 11 11 12 - /* 13 - * Added to include __machine_check_early_realmode_* functions 14 - */ 15 - #include <asm/mce.h> 16 - 17 12 /* This structure can grow, it's real size is used by head.S code 18 13 * via the mkdefs mechanism. 19 14 */ ··· 165 170 #else /* CONFIG_PPC32 */ 166 171 /* Define these to 0 for the sake of tests in common code */ 167 172 #define CPU_FTR_PPC_LE (0) 173 + #define CPU_FTR_SPE (0) 168 174 #endif 169 175 170 176 /* ··· 295 299 #define CPU_FTR_MAYBE_CAN_NAP 0 296 300 #endif 297 301 298 - #define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \ 299 - CPU_FTR_COHERENT_ICACHE) 300 302 #define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ 301 303 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE) 302 304 #define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE) ··· 510 516 #else 511 517 enum { 512 518 CPU_FTRS_POSSIBLE = 513 - #ifdef CONFIG_PPC_BOOK3S_601 514 - CPU_FTRS_PPC601 | 515 - #elif defined(CONFIG_PPC_BOOK3S_32) 516 - CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | 519 + #ifdef CONFIG_PPC_BOOK3S_32 520 + CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | 517 521 CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | 518 522 CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | 519 523 CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 | ··· 586 594 #else 587 595 enum { 588 596 CPU_FTRS_ALWAYS = 589 - #ifdef CONFIG_PPC_BOOK3S_601 590 - CPU_FTRS_PPC601 & 591 - #elif defined(CONFIG_PPC_BOOK3S_32) 597 + #ifdef CONFIG_PPC_BOOK3S_32 592 598 CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & 593 599 CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & 594 600 CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &

-1

arch/powerpc/include/asm/cputhreads.h

··· 23 23 extern int threads_per_core; 24 24 extern int threads_per_subcore; 25 25 extern int threads_shift; 26 - extern bool has_big_cores; 27 26 extern cpumask_t threads_core_mask; 28 27 #else 29 28 #define threads_per_core 1

+1 -1

arch/powerpc/include/asm/delay.h

··· 54 54 ({ \ 55 55 typeof(condition) __ret; \ 56 56 unsigned long __loops = tb_ticks_per_usec * timeout; \ 57 - unsigned long __start = get_tbl(); \ 57 + unsigned long __start = mftb(); \ 58 58 \ 59 59 if (delay) { \ 60 60 while (!(__ret = (condition)) && \

+19 -24

arch/powerpc/include/asm/drmem.h

··· 8 8 #ifndef _ASM_POWERPC_LMB_H 9 9 #define _ASM_POWERPC_LMB_H 10 10 11 + #include <linux/sched.h> 12 + 11 13 struct drmem_lmb { 12 14 u64 base_addr; 13 15 u32 drc_index; 14 16 u32 aa_index; 15 17 u32 flags; 16 - #ifdef CONFIG_MEMORY_HOTPLUG 17 - int nid; 18 - #endif 19 18 }; 20 19 21 20 struct drmem_lmb_info { 22 21 struct drmem_lmb *lmbs; 23 22 int n_lmbs; 24 - u32 lmb_size; 23 + u64 lmb_size; 25 24 }; 26 25 27 26 extern struct drmem_lmb_info *drmem_info; 28 27 28 + static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb, 29 + const struct drmem_lmb *start) 30 + { 31 + /* 32 + * DLPAR code paths can take several milliseconds per element 33 + * when interacting with firmware. Ensure that we don't 34 + * unfairly monopolize the CPU. 35 + */ 36 + if (((++lmb - start) % 16) == 0) 37 + cond_resched(); 38 + 39 + return lmb; 40 + } 41 + 29 42 #define for_each_drmem_lmb_in_range(lmb, start, end) \ 30 - for ((lmb) = (start); (lmb) < (end); (lmb)++) 43 + for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start)) 31 44 32 45 #define for_each_drmem_lmb(lmb) \ 33 46 for_each_drmem_lmb_in_range((lmb), \ ··· 80 67 #define DRCONF_MEM_RESERVED 0x00000080 81 68 #define DRCONF_MEM_HOTREMOVABLE 0x00000100 82 69 83 - static inline u32 drmem_lmb_size(void) 70 + static inline u64 drmem_lmb_size(void) 84 71 { 85 72 return drmem_info->lmb_size; 86 73 } ··· 117 104 { 118 105 lmb->aa_index = 0xffffffff; 119 106 } 120 - 121 - #ifdef CONFIG_MEMORY_HOTPLUG 122 - static inline void lmb_set_nid(struct drmem_lmb *lmb) 123 - { 124 - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); 125 - } 126 - static inline void lmb_clear_nid(struct drmem_lmb *lmb) 127 - { 128 - lmb->nid = -1; 129 - } 130 - #else 131 - static inline void lmb_set_nid(struct drmem_lmb *lmb) 132 - { 133 - } 134 - static inline void lmb_clear_nid(struct drmem_lmb *lmb) 135 - { 136 - } 137 - #endif 138 107 139 108 #endif /* _ASM_POWERPC_LMB_H */

+2 -7

arch/powerpc/include/asm/eeh.h

··· 27 27 #define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ 28 28 #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ 29 29 #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ 30 - #define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ 31 30 #define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ 32 31 #define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ 33 32 ··· 73 74 struct eeh_pe { 74 75 int type; /* PE type: PHB/Bus/Device */ 75 76 int state; /* PE EEH dependent mode */ 76 - int config_addr; /* Traditional PCI address */ 77 77 int addr; /* PE configuration address */ 78 78 struct pci_controller *phb; /* Associated PHB */ 79 79 struct pci_bus *bus; /* Top PCI bus for bus PE */ ··· 214 216 215 217 struct eeh_ops { 216 218 char *name; 217 - int (*init)(void); 218 219 struct eeh_dev *(*probe)(struct pci_dev *pdev); 219 220 int (*set_option)(struct eeh_pe *pe, int option); 220 221 int (*get_state)(struct eeh_pe *pe, int *delay); ··· 278 281 int eeh_wait_state(struct eeh_pe *pe, int max_wait); 279 282 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); 280 283 struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); 281 - struct eeh_pe *eeh_pe_get(struct pci_controller *phb, 282 - int pe_no, int config_addr); 284 + struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no); 283 285 int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent); 284 286 int eeh_pe_tree_remove(struct eeh_dev *edev); 285 287 void eeh_pe_update_time_stamp(struct eeh_pe *pe); ··· 291 295 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); 292 296 293 297 void eeh_show_enabled(void); 294 - int __init eeh_ops_register(struct eeh_ops *ops); 295 - int __exit eeh_ops_unregister(const char *name); 298 + int __init eeh_init(struct eeh_ops *ops); 296 299 int eeh_check_failure(const volatile void __iomem *token); 297 300 int eeh_dev_check_failure(struct eeh_dev *edev); 298 301 void eeh_addr_cache_init(void);

+38

arch/powerpc/include/asm/hvcall.h

··· 375 375 #define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 376 376 #define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 377 377 #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9 378 + #define H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST (1ull << 52) // IBM bit 11 378 379 379 380 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 380 381 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 381 382 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 382 383 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 384 + #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 383 385 384 386 /* Flag values used in H_REGISTER_PROC_TBL hcall */ 385 387 #define PROC_TABLE_OP_MASK 0x18 ··· 561 559 562 560 /* Latest version of hv_guest_state structure */ 563 561 #define HV_GUEST_STATE_VERSION 1 562 + 563 + /* 564 + * From the document "H_GetPerformanceCounterInfo Interface" v1.07 565 + * 566 + * H_GET_PERF_COUNTER_INFO argument 567 + */ 568 + struct hv_get_perf_counter_info_params { 569 + __be32 counter_request; /* I */ 570 + __be32 starting_index; /* IO */ 571 + __be16 secondary_index; /* IO */ 572 + __be16 returned_values; /* O */ 573 + __be32 detail_rc; /* O, only needed when called via *_norets() */ 574 + 575 + /* 576 + * O, size each of counter_value element in bytes, only set for version 577 + * >= 0x3 578 + */ 579 + __be16 cv_element_size; 580 + 581 + /* I, 0 (zero) for versions < 0x3 */ 582 + __u8 counter_info_version_in; 583 + 584 + /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */ 585 + __u8 counter_info_version_out; 586 + __u8 reserved[0xC]; 587 + __u8 counter_value[]; 588 + } __packed; 589 + 590 + #define HGPCI_REQ_BUFFER_SIZE 4096 591 + #define HGPCI_MAX_DATA_BYTES \ 592 + (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) 593 + 594 + struct hv_gpci_request_buffer { 595 + struct hv_get_perf_counter_info_params params; 596 + uint8_t bytes[HGPCI_MAX_DATA_BYTES]; 597 + } __packed; 564 598 565 599 #endif /* __ASSEMBLY__ */ 566 600 #endif /* __KERNEL__ */

+12

arch/powerpc/include/asm/hw_breakpoint.h

··· 10 10 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H 11 11 12 12 #include <asm/cpu_has_feature.h> 13 + #include <asm/inst.h> 13 14 14 15 #ifdef __KERNEL__ 15 16 struct arch_hw_breakpoint { ··· 18 17 u16 type; 19 18 u16 len; /* length of the target data symbol */ 20 19 u16 hw_len; /* length programmed in hw */ 20 + u8 flags; 21 21 }; 22 22 23 23 /* Note: Don't change the first 6 bits below as they are in the same order ··· 38 36 #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ 39 37 HW_BRK_TYPE_HYP) 40 38 39 + #define HW_BRK_FLAG_DISABLED 0x1 40 + 41 41 /* Minimum granularity */ 42 42 #ifdef CONFIG_PPC_8xx 43 43 #define HW_BREAKPOINT_SIZE 0x4 44 44 #else 45 45 #define HW_BREAKPOINT_SIZE 0x8 46 46 #endif 47 + #define HW_BREAKPOINT_SIZE_QUADWORD 0x10 47 48 48 49 #define DABR_MAX_LEN 8 49 50 #define DAWR_MAX_LEN 512 ··· 55 50 { 56 51 return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; 57 52 } 53 + 54 + bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, 55 + unsigned long ea, int type, int size, 56 + struct arch_hw_breakpoint *info); 57 + 58 + void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, 59 + int *type, int *size, unsigned long *ea); 58 60 59 61 #ifdef CONFIG_HAVE_HW_BREAKPOINT 60 62 #include <linux/kdebug.h>

+2 -9

arch/powerpc/include/asm/hw_irq.h

··· 25 25 #define PACA_IRQ_DBELL 0x02 26 26 #define PACA_IRQ_EE 0x04 27 27 #define PACA_IRQ_DEC 0x08 /* Or FIT */ 28 - #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ 29 - #define PACA_IRQ_HMI 0x20 30 - #define PACA_IRQ_PMI 0x40 28 + #define PACA_IRQ_HMI 0x10 29 + #define PACA_IRQ_PMI 0x20 31 30 32 31 /* 33 32 * Some soft-masked interrupts must be hard masked until they are replayed ··· 367 368 #endif /* CONFIG_PPC64 */ 368 369 369 370 #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST 370 - 371 - /* 372 - * interrupt-retrigger: should we handle this via lost interrupts and IPIs 373 - * or should we not care like we do now ? --BenH. 374 - */ 375 - struct irq_chip; 376 371 377 372 #endif /* __ASSEMBLY__ */ 378 373 #endif /* __KERNEL__ */

+4 -2

arch/powerpc/include/asm/icswx.h

··· 156 156 u8 reserved[32]; 157 157 158 158 struct coprocessor_status_block csb; 159 - } __packed; 160 - 159 + } __aligned(128); 161 160 162 161 /* RFC02167 Initiate Coprocessor Instructions document 163 162 * Chapter 8.2.1.1.1 RS ··· 186 187 { 187 188 __be64 ccw_reg = ccw; 188 189 u32 cr; 190 + 191 + /* NB: the same structures are used by VAS-NX */ 192 + BUILD_BUG_ON(sizeof(*crb) != 128); 189 193 190 194 __asm__ __volatile__( 191 195 PPC_ICSWX(%1,0,%2) "\n"

-1

arch/powerpc/include/asm/irq.h

··· 35 35 36 36 extern int distribute_irqs; 37 37 38 - struct irqaction; 39 38 struct pt_regs; 40 39 41 40 #define __ARCH_HAS_DO_SOFTIRQ

-3

arch/powerpc/include/asm/machdep.h

··· 65 65 void __noreturn (*restart)(char *cmd); 66 66 void __noreturn (*halt)(void); 67 67 void (*panic)(char *str); 68 - void (*cpu_die)(void); 69 68 70 69 long (*time_init)(void); /* Optional, may be NULL */ 71 70 ··· 221 222 222 223 extern void e500_idle(void); 223 224 extern void power4_idle(void); 224 - extern void power7_idle(void); 225 - extern void power9_idle(void); 226 225 extern void ppc6xx_idle(void); 227 226 extern void book3e_idle(void); 228 227

+1 -1

arch/powerpc/include/asm/mmu_context.h

··· 244 244 */ 245 245 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) 246 246 { 247 - switch_mm(prev, next, current); 247 + switch_mm_irqs_off(prev, next, current); 248 248 } 249 249 250 250 /* We don't currently use enter_lazy_tlb() for anything */

+14

arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h

··· 65 65 pte_update(mm, addr, ptep, clr, set, 1); 66 66 } 67 67 68 + #ifdef CONFIG_PPC_4K_PAGES 69 + static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, 70 + struct page *page, int writable) 71 + { 72 + size_t size = huge_page_size(hstate_vma(vma)); 73 + 74 + if (size == SZ_16K) 75 + return __pte(pte_val(entry) & ~_PAGE_HUGE); 76 + else 77 + return entry; 78 + } 79 + #define arch_make_huge_pte arch_make_huge_pte 80 + #endif 81 + 68 82 #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */

+14 -6

arch/powerpc/include/asm/nohash/32/pgtable.h

··· 227 227 */ 228 228 #ifdef CONFIG_PPC_8xx 229 229 static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); 230 + static int hugepd_ok(hugepd_t hpd); 231 + 232 + static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) 233 + { 234 + if (!huge) 235 + return PAGE_SIZE / SZ_4K; 236 + else if (hugepd_ok(*((hugepd_t *)pmd))) 237 + return 1; 238 + else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE)) 239 + return SZ_16K / SZ_4K; 240 + else 241 + return SZ_512K / SZ_4K; 242 + } 230 243 231 244 static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, 232 245 unsigned long clr, unsigned long set, int huge) ··· 250 237 int num, i; 251 238 pmd_t *pmd = pmd_off(mm, addr); 252 239 253 - if (!huge) 254 - num = PAGE_SIZE / SZ_4K; 255 - else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M) 256 - num = SZ_512K / SZ_4K; 257 - else 258 - num = 1; 240 + num = number_of_cells_per_pte(pmd, new, huge); 259 241 260 242 for (i = 0; i < num; i++, entry++, new += SZ_4K) 261 243 *entry = new;

-3

arch/powerpc/include/asm/pnv-ocxl.h

··· 28 28 void pnv_ocxl_spa_release(void *platform_data); 29 29 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); 30 30 31 - int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); 32 - void pnv_ocxl_free_xive_irq(u32 irq); 33 - 34 31 #endif /* _ASM_PNV_OCXL_H */

+1 -12

arch/powerpc/include/asm/ppc_asm.h

··· 382 382 #endif 383 383 384 384 /* various errata or part fixups */ 385 - #ifdef CONFIG_PPC601_SYNC_FIX 386 - #define SYNC sync; isync 387 - #define SYNC_601 sync 388 - #define ISYNC_601 isync 389 - #else 390 - #define SYNC 391 - #define SYNC_601 392 - #define ISYNC_601 393 - #endif 394 - 395 385 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) 396 386 #define MFTB(dest) \ 397 387 90: mfspr dest, SPRN_TBRL; \ ··· 401 411 #define MFTBU(dest) mfspr dest, SPRN_TBRU 402 412 #endif 403 413 404 - /* tlbsync is not implemented on 601 */ 405 - #if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601) 414 + #ifndef CONFIG_SMP 406 415 #define TLBSYNC 407 416 #else 408 417 #define TLBSYNC tlbsync; sync

+2 -7

arch/powerpc/include/asm/processor.h

··· 220 220 unsigned long tm_tar; 221 221 unsigned long tm_ppr; 222 222 unsigned long tm_dscr; 223 + unsigned long tm_amr; 223 224 224 225 /* 225 226 * Checkpointed FP and VSX 0-31 register set. ··· 433 432 extern int powersave_nap; /* set if nap mode can be used in idle loop */ 434 433 435 434 extern void power7_idle_type(unsigned long type); 436 - extern void power9_idle_type(unsigned long stop_psscr_val, 435 + extern void arch300_idle_type(unsigned long stop_psscr_val, 437 436 unsigned long stop_psscr_mask); 438 437 439 - extern void flush_instruction_cache(void); 440 - extern void hard_reset_now(void); 441 - extern void poweroff_now(void); 442 438 extern int fix_alignment(struct pt_regs *); 443 - extern void cvt_fd(float *from, double *to); 444 - extern void cvt_df(double *from, float *to); 445 - extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); 446 439 447 440 #ifdef CONFIG_PPC64 448 441 /*

-4

arch/powerpc/include/asm/ptrace.h

··· 243 243 } 244 244 245 245 #define arch_has_single_step() (1) 246 - #ifndef CONFIG_PPC_BOOK3S_601 247 246 #define arch_has_block_step() (true) 248 - #else 249 - #define arch_has_block_step() (false) 250 - #endif 251 247 #define ARCH_HAS_USER_SINGLE_STEP_REPORT 252 248 253 249 /*

+8 -12

arch/powerpc/include/asm/reg.h

··· 521 521 #define SPRN_TSCR 0x399 /* Thread Switch Control Register */ 522 522 523 523 #define SPRN_DEC 0x016 /* Decrement Register */ 524 + #define SPRN_PIT 0x3DB /* Programmable Interval Timer (40x/BOOKE) */ 525 + 524 526 #define SPRN_DER 0x095 /* Debug Enable Register */ 525 527 #define DER_RSTE 0x40000000 /* Reset Interrupt */ 526 528 #define DER_CHSTPE 0x20000000 /* Check Stop */ ··· 819 817 #define THRM1_TIN (1 << 31) 820 818 #define THRM1_TIV (1 << 30) 821 819 #define THRM1_THRES(x) ((x&0x7f)<<23) 822 - #define THRM3_SITV(x) ((x&0x3fff)<<1) 820 + #define THRM3_SITV(x) ((x & 0x1fff) << 1) 823 821 #define THRM1_TID (1<<2) 824 822 #define THRM1_TIE (1<<1) 825 823 #define THRM1_V (1<<0) ··· 1355 1353 #define PVR_POWER8NVL 0x004C 1356 1354 #define PVR_POWER8 0x004D 1357 1355 #define PVR_POWER9 0x004E 1356 + #define PVR_POWER10 0x0080 1358 1357 #define PVR_BE 0x0070 1359 1358 #define PVR_PA6T 0x0090 1360 1359 ··· 1419 1416 __msr_check_and_clear(bits); 1420 1417 } 1421 1418 1422 - #ifdef __powerpc64__ 1423 - #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) 1419 + #if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) 1424 1420 #define mftb() ({unsigned long rval; \ 1425 1421 asm volatile( \ 1426 1422 "90: mfspr %0, %2;\n" \ ··· 1429 1427 : "=r" (rval) \ 1430 1428 : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ 1431 1429 rval;}) 1430 + #elif defined(CONFIG_PPC_8xx) 1431 + #define mftb() ({unsigned long rval; \ 1432 + asm volatile("mftbl %0" : "=r" (rval)); rval;}) 1432 1433 #else 1433 1434 #define mftb() ({unsigned long rval; \ 1434 1435 asm volatile("mfspr %0, %1" : \ 1435 1436 "=r" (rval) : "i" (SPRN_TBRL)); rval;}) 1436 1437 #endif /* !CONFIG_PPC_CELL */ 1437 1438 1438 - #else /* __powerpc64__ */ 1439 - 1440 1439 #if defined(CONFIG_PPC_8xx) 1441 - #define mftbl() ({unsigned long rval; \ 1442 - asm volatile("mftbl %0" : "=r" (rval)); rval;}) 1443 1440 #define mftbu() ({unsigned long rval; \ 1444 1441 asm volatile("mftbu %0" : "=r" (rval)); rval;}) 1445 1442 #else 1446 - #define mftbl() ({unsigned long rval; \ 1447 - asm volatile("mfspr %0, %1" : "=r" (rval) : \ 1448 - "i" (SPRN_TBRL)); rval;}) 1449 1443 #define mftbu() ({unsigned long rval; \ 1450 1444 asm volatile("mfspr %0, %1" : "=r" (rval) : \ 1451 1445 "i" (SPRN_TBRU)); rval;}) 1452 1446 #endif 1453 - #define mftb() mftbl() 1454 - #endif /* !__powerpc64__ */ 1455 1447 1456 1448 #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) 1457 1449 #define mttbu(v) asm volatile("mttbu %0":: "r"(v))

-1

arch/powerpc/include/asm/reg_booke.h

··· 174 174 #define SPRN_L1CSR1 0x3F3 /* L1 Cache Control and Status Register 1 */ 175 175 #define SPRN_MMUCSR0 0x3F4 /* MMU Control and Status Register 0 */ 176 176 #define SPRN_MMUCFG 0x3F7 /* MMU Configuration Register */ 177 - #define SPRN_PIT 0x3DB /* Programmable Interval Timer */ 178 177 #define SPRN_BUCSR 0x3F5 /* Branch Unit Control and Status */ 179 178 #define SPRN_L2CSR0 0x3F9 /* L2 Data Cache Control and Status Register 0 */ 180 179 #define SPRN_L2CSR1 0x3FA /* L2 Data Cache Control and Status Register 1 */

+17 -7

arch/powerpc/include/asm/smp.h

··· 28 28 extern int boot_cpuid; 29 29 extern int spinning_secondaries; 30 30 extern u32 *cpu_to_phys_id; 31 + extern bool coregroup_enabled; 31 32 32 - extern void cpu_die(void); 33 33 extern int cpu_to_chip_id(int cpu); 34 34 35 35 #ifdef CONFIG_SMP ··· 50 50 int (*cpu_disable)(void); 51 51 void (*cpu_die)(unsigned int nr); 52 52 int (*cpu_bootable)(unsigned int nr); 53 + #ifdef CONFIG_HOTPLUG_CPU 54 + void (*cpu_offline_self)(void); 55 + #endif 53 56 }; 54 57 55 58 extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); ··· 121 118 return per_cpu(cpu_sibling_map, cpu); 122 119 } 123 120 124 - static inline struct cpumask *cpu_core_mask(int cpu) 125 - { 126 - return per_cpu(cpu_core_map, cpu); 127 - } 128 - 129 121 static inline struct cpumask *cpu_l2_cache_mask(int cpu) 130 122 { 131 123 return per_cpu(cpu_l2_cache_map, cpu); ··· 132 134 } 133 135 134 136 extern int cpu_to_core_id(int cpu); 137 + 138 + extern bool has_big_cores; 139 + 140 + #define cpu_smt_mask cpu_smt_mask 141 + #ifdef CONFIG_SCHED_SMT 142 + static inline const struct cpumask *cpu_smt_mask(int cpu) 143 + { 144 + if (has_big_cores) 145 + return per_cpu(cpu_smallcore_map, cpu); 146 + 147 + return per_cpu(cpu_sibling_map, cpu); 148 + } 149 + #endif /* CONFIG_SCHED_SMT */ 135 150 136 151 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. 137 152 * ··· 254 243 * 64-bit but defining them all here doesn't harm 255 244 */ 256 245 extern void generic_secondary_smp_init(void); 257 - extern void generic_secondary_thread_init(void); 258 246 extern unsigned long __secondary_hold_spinloop; 259 247 extern unsigned long __secondary_hold_acknowledge; 260 248 extern char __secondary_hold;

+4

arch/powerpc/include/asm/svm.h

··· 15 15 return mfmsr() & MSR_S; 16 16 } 17 17 18 + void __init svm_swiotlb_init(void); 19 + 18 20 void dtl_cache_ctor(void *addr); 19 21 #define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL) 20 22 ··· 26 24 { 27 25 return false; 28 26 } 27 + 28 + static inline void svm_swiotlb_init(void) {} 29 29 30 30 #define get_dtl_cache_ctor() NULL 31 31

+18 -1

arch/powerpc/include/asm/synch.h

··· 3 3 #define _ASM_POWERPC_SYNCH_H 4 4 #ifdef __KERNEL__ 5 5 6 + #include <asm/cputable.h> 6 7 #include <asm/feature-fixups.h> 7 - #include <asm/asm-const.h> 8 + #include <asm/ppc-opcode.h> 8 9 9 10 #ifndef __ASSEMBLY__ 10 11 extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; ··· 20 19 static inline void isync(void) 21 20 { 22 21 __asm__ __volatile__ ("isync" : : : "memory"); 22 + } 23 + 24 + static inline void ppc_after_tlbiel_barrier(void) 25 + { 26 + asm volatile("ptesync": : :"memory"); 27 + /* 28 + * POWER9, POWER10 need a cp_abort after tlbiel to ensure the copy is 29 + * invalidated correctly. If this is not done, the paste can take data 30 + * from the physical address that was translated at copy time. 31 + * 32 + * POWER9 in practice does not need this, because address spaces with 33 + * accelerators mapped will use tlbie (which does invalidate the copy) 34 + * to invalidate translations. It's not possible to limit POWER10 this 35 + * way due to local copy-paste. 36 + */ 37 + asm volatile(ASM_FTR_IFSET(PPC_CP_ABORT, "", %0) : : "i" (CPU_FTR_ARCH_31) : "memory"); 23 38 } 24 39 #endif /* __ASSEMBLY__ */ 25 40

+18 -68

arch/powerpc/include/asm/time.h

··· 38 38 u64 result_low; 39 39 }; 40 40 41 - /* Accessor functions for the timebase (RTC on 601) registers. */ 42 - #define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) 43 - 44 - #ifdef CONFIG_PPC64 45 - 46 41 /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ 47 - #define get_tbl get_tb 48 - 49 - #else 50 - 51 42 static inline unsigned long get_tbl(void) 52 43 { 53 - return mftbl(); 54 - } 55 - 56 - static inline unsigned int get_tbu(void) 57 - { 58 - return mftbu(); 59 - } 60 - #endif /* !CONFIG_PPC64 */ 61 - 62 - static inline unsigned int get_rtcl(void) 63 - { 64 - unsigned int rtcl; 65 - 66 - asm volatile("mfrtcl %0" : "=r" (rtcl)); 67 - return rtcl; 68 - } 69 - 70 - static inline u64 get_rtc(void) 71 - { 72 - unsigned int hi, lo, hi2; 73 - 74 - do { 75 - asm volatile("mfrtcu %0; mfrtcl %1; mfrtcu %2" 76 - : "=r" (hi), "=r" (lo), "=r" (hi2)); 77 - } while (hi2 != hi); 78 - return (u64)hi * 1000000000 + lo; 44 + return mftb(); 79 45 } 80 46 81 47 static inline u64 get_vtb(void) ··· 53 87 return 0; 54 88 } 55 89 56 - #ifdef CONFIG_PPC64 57 - static inline u64 get_tb(void) 58 - { 59 - return mftb(); 60 - } 61 - #else /* CONFIG_PPC64 */ 62 90 static inline u64 get_tb(void) 63 91 { 64 92 unsigned int tbhi, tblo, tbhi2; 65 93 94 + if (IS_ENABLED(CONFIG_PPC64)) 95 + return mftb(); 96 + 66 97 do { 67 - tbhi = get_tbu(); 68 - tblo = get_tbl(); 69 - tbhi2 = get_tbu(); 98 + tbhi = mftbu(); 99 + tblo = mftb(); 100 + tbhi2 = mftbu(); 70 101 } while (tbhi != tbhi2); 71 102 72 103 return ((u64)tbhi << 32) | tblo; 73 - } 74 - #endif /* !CONFIG_PPC64 */ 75 - 76 - static inline u64 get_tb_or_rtc(void) 77 - { 78 - return __USE_RTC() ? get_rtc() : get_tb(); 79 104 } 80 105 81 106 static inline void set_tb(unsigned int upper, unsigned int lower) ··· 84 127 */ 85 128 static inline u64 get_dec(void) 86 129 { 87 - #if defined(CONFIG_40x) 88 - return (mfspr(SPRN_PIT)); 89 - #else 90 - return (mfspr(SPRN_DEC)); 91 - #endif 130 + if (IS_ENABLED(CONFIG_40x)) 131 + return mfspr(SPRN_PIT); 132 + 133 + return mfspr(SPRN_DEC); 92 134 } 93 135 94 136 /* ··· 97 141 */ 98 142 static inline void set_dec(u64 val) 99 143 { 100 - #if defined(CONFIG_40x) 101 - mtspr(SPRN_PIT, (u32) val); 102 - #else 103 - #ifndef CONFIG_BOOKE 104 - --val; 105 - #endif 106 - mtspr(SPRN_DEC, val); 107 - #endif /* not 40x */ 144 + if (IS_ENABLED(CONFIG_40x)) 145 + mtspr(SPRN_PIT, (u32)val); 146 + else if (IS_ENABLED(CONFIG_BOOKE)) 147 + mtspr(SPRN_DEC, val); 148 + else 149 + mtspr(SPRN_DEC, val - 1); 108 150 } 109 151 110 152 static inline unsigned long tb_ticks_since(unsigned long tstamp) 111 153 { 112 - if (__USE_RTC()) { 113 - int delta = get_rtcl() - (unsigned int) tstamp; 114 - return delta < 0 ? delta + 1000000000 : delta; 115 - } 116 - return get_tbl() - tstamp; 154 + return mftb() - tstamp; 117 155 } 118 156 119 157 #define mulhwu(x,y) \

-3

arch/powerpc/include/asm/timex.h

··· 17 17 18 18 static inline cycles_t get_cycles(void) 19 19 { 20 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) 21 - return 0; 22 - 23 20 return mftb(); 24 21 } 25 22

-13

arch/powerpc/include/asm/tlb.h

··· 66 66 return false; 67 67 return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); 68 68 } 69 - static inline void mm_reset_thread_local(struct mm_struct *mm) 70 - { 71 - WARN_ON(atomic_read(&mm->context.copros) > 0); 72 - /* 73 - * It's possible for mm_access to take a reference on mm_users to 74 - * access the remote mm from another thread, but it's not allowed 75 - * to set mm_cpumask, so mm_users may be > 1 here. 76 - */ 77 - WARN_ON(current->mm != mm); 78 - atomic_set(&mm->context.active_cpus, 1); 79 - cpumask_clear(mm_cpumask(mm)); 80 - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 81 - } 82 69 #else /* CONFIG_PPC_BOOK3S_64 */ 83 70 static inline int mm_is_thread_local(struct mm_struct *mm) 84 71 {

+14 -6

arch/powerpc/include/asm/topology.h

··· 86 86 87 87 #endif /* CONFIG_NUMA */ 88 88 89 + struct drmem_lmb; 90 + int of_drconf_to_nid_single(struct drmem_lmb *lmb); 91 + 89 92 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) 90 93 extern int find_and_online_cpu_nid(int cpu); 94 + extern int cpu_to_coregroup_id(int cpu); 91 95 #else 92 96 static inline int find_and_online_cpu_nid(int cpu) 93 97 { 94 98 return 0; 99 + } 100 + 101 + static inline int cpu_to_coregroup_id(int cpu) 102 + { 103 + #ifdef CONFIG_SMP 104 + return cpu_to_core_id(cpu); 105 + #else 106 + return 0; 107 + #endif 95 108 } 96 109 97 110 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ ··· 117 104 #ifdef CONFIG_PPC64 118 105 #include <asm/smp.h> 119 106 120 - #ifdef CONFIG_PPC_SPLPAR 121 - int get_physical_package_id(int cpu); 122 - #define topology_physical_package_id(cpu) (get_physical_package_id(cpu)) 123 - #else 124 107 #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) 125 - #endif 126 108 127 109 #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) 128 - #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) 110 + #define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu)) 129 111 #define topology_core_id(cpu) (cpu_to_core_id(cpu)) 130 112 131 113 #endif

+22 -53

arch/powerpc/include/asm/uaccess.h

··· 151 151 152 152 extern long __put_user_bad(void); 153 153 154 - /* 155 - * We don't tell gcc that we are accessing memory, but this is OK 156 - * because we do not write to any memory gcc knows about, so there 157 - * are no aliasing issues. 158 - */ 159 - #define __put_user_asm(x, addr, err, op) \ 160 - __asm__ __volatile__( \ 161 - "1: " op " %1,0(%2) # put_user\n" \ 162 - "2:\n" \ 163 - ".section .fixup,\"ax\"\n" \ 164 - "3: li %0,%3\n" \ 165 - " b 2b\n" \ 166 - ".previous\n" \ 167 - EX_TABLE(1b, 3b) \ 168 - : "=r" (err) \ 169 - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) 170 - 171 - #ifdef __powerpc64__ 172 - #define __put_user_asm2(x, ptr, retval) \ 173 - __put_user_asm(x, ptr, retval, "std") 174 - #else /* __powerpc64__ */ 175 - #define __put_user_asm2(x, addr, err) \ 176 - __asm__ __volatile__( \ 177 - "1: stw %1,0(%2)\n" \ 178 - "2: stw %1+1,4(%2)\n" \ 179 - "3:\n" \ 180 - ".section .fixup,\"ax\"\n" \ 181 - "4: li %0,%3\n" \ 182 - " b 3b\n" \ 183 - ".previous\n" \ 184 - EX_TABLE(1b, 4b) \ 185 - EX_TABLE(2b, 4b) \ 186 - : "=r" (err) \ 187 - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) 188 - #endif /* __powerpc64__ */ 189 - 190 154 #define __put_user_size_allowed(x, ptr, size, retval) \ 191 155 do { \ 156 + __label__ __pu_failed; \ 157 + \ 192 158 retval = 0; \ 193 - switch (size) { \ 194 - case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ 195 - case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ 196 - case 4: __put_user_asm(x, ptr, retval, "stw"); break; \ 197 - case 8: __put_user_asm2(x, ptr, retval); break; \ 198 - default: __put_user_bad(); \ 199 - } \ 159 + __put_user_size_goto(x, ptr, size, __pu_failed); \ 160 + break; \ 161 + \ 162 + __pu_failed: \ 163 + retval = -EFAULT; \ 200 164 } while (0) 201 165 202 166 #define __put_user_size(x, ptr, size, retval) \ ··· 213 249 }) 214 250 215 251 252 + /* 253 + * We don't tell gcc that we are accessing memory, but this is OK 254 + * because we do not write to any memory gcc knows about, so there 255 + * are no aliasing issues. 256 + */ 216 257 #define __put_user_asm_goto(x, addr, label, op) \ 217 258 asm volatile goto( \ 218 259 "1: " op "%U1%X1 %0,%1 # put_user\n" \ 219 260 EX_TABLE(1b, %l2) \ 220 261 : \ 221 - : "r" (x), "m" (*addr) \ 262 + : "r" (x), "m<>" (*addr) \ 222 263 : \ 223 264 : label) 224 265 ··· 285 316 286 317 #define __get_user_asm(x, addr, err, op) \ 287 318 __asm__ __volatile__( \ 288 - "1: "op" %1,0(%2) # get_user\n" \ 319 + "1: "op"%U2%X2 %1, %2 # get_user\n" \ 289 320 "2:\n" \ 290 321 ".section .fixup,\"ax\"\n" \ 291 322 "3: li %0,%3\n" \ ··· 294 325 ".previous\n" \ 295 326 EX_TABLE(1b, 3b) \ 296 327 : "=r" (err), "=r" (x) \ 297 - : "b" (addr), "i" (-EFAULT), "0" (err)) 328 + : "m<>" (*addr), "i" (-EFAULT), "0" (err)) 298 329 299 330 #ifdef __powerpc64__ 300 331 #define __get_user_asm2(x, addr, err) \ ··· 302 333 #else /* __powerpc64__ */ 303 334 #define __get_user_asm2(x, addr, err) \ 304 335 __asm__ __volatile__( \ 305 - "1: lwz %1,0(%2)\n" \ 306 - "2: lwz %1+1,4(%2)\n" \ 336 + "1: lwz%X2 %1, %2\n" \ 337 + "2: lwz%X2 %L1, %L2\n" \ 307 338 "3:\n" \ 308 339 ".section .fixup,\"ax\"\n" \ 309 340 "4: li %0,%3\n" \ ··· 314 345 EX_TABLE(1b, 4b) \ 315 346 EX_TABLE(2b, 4b) \ 316 347 : "=r" (err), "=&r" (x) \ 317 - : "b" (addr), "i" (-EFAULT), "0" (err)) 348 + : "m" (*addr), "i" (-EFAULT), "0" (err)) 318 349 #endif /* __powerpc64__ */ 319 350 320 351 #define __get_user_size_allowed(x, ptr, size, retval) \ ··· 324 355 if (size > sizeof(x)) \ 325 356 (x) = __get_user_bad(); \ 326 357 switch (size) { \ 327 - case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ 328 - case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ 329 - case 4: __get_user_asm(x, ptr, retval, "lwz"); break; \ 330 - case 8: __get_user_asm2(x, ptr, retval); break; \ 358 + case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \ 359 + case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \ 360 + case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \ 361 + case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \ 331 362 default: (x) = __get_user_bad(); \ 332 363 } \ 333 364 } while (0)

+1

arch/powerpc/include/uapi/asm/ptrace.h

··· 222 222 #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x0000000000000004 223 223 #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x0000000000000008 224 224 #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x0000000000000010 225 + #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x0000000000000020 225 226 226 227 #ifndef __ASSEMBLY__ 227 228

+4 -2

arch/powerpc/kernel/Makefile

··· 45 45 signal.o sysfs.o cacheinfo.o time.o \ 46 46 prom.o traps.o setup-common.o \ 47 47 udbg.o misc.o io.o misc_$(BITS).o \ 48 - of_platform.o prom_parse.o firmware.o 48 + of_platform.o prom_parse.o firmware.o \ 49 + hw_breakpoint_constraints.o 49 50 obj-y += ptrace/ 50 51 obj-$(CONFIG_PPC64) += setup_64.o \ 51 52 paca.o nvram_64.o note.o syscall_64.o ··· 95 94 obj-$(CONFIG_PPC_DOORBELL) += dbell.o 96 95 obj-$(CONFIG_JUMP_LABEL) += jump_label.o 97 96 98 - extra-y := head_$(BITS).o 97 + extra-$(CONFIG_PPC64) := head_64.o 98 + extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o 99 99 extra-$(CONFIG_40x) := head_40x.o 100 100 extra-$(CONFIG_44x) := head_44x.o 101 101 extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o

+1

arch/powerpc/kernel/asm-offsets.c

··· 176 176 OFFSET(THREAD_TM_TAR, thread_struct, tm_tar); 177 177 OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr); 178 178 OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr); 179 + OFFSET(THREAD_TM_AMR, thread_struct, tm_amr); 179 180 OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs); 180 181 OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); 181 182 OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);

+4 -13

arch/powerpc/kernel/btext.c

··· 95 95 boot_text_mapped = 0; 96 96 return; 97 97 } 98 - if (PVR_VER(mfspr(SPRN_PVR)) != 1) { 99 - /* 603, 604, G3, G4, ... */ 100 - lowbits = addr & ~0xFF000000UL; 101 - addr &= 0xFF000000UL; 102 - disp_BAT[0] = vaddr | (BL_16M<<2) | 2; 103 - disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); 104 - } else { 105 - /* 601 */ 106 - lowbits = addr & ~0xFF800000UL; 107 - addr &= 0xFF800000UL; 108 - disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4; 109 - disp_BAT[1] = addr | BL_8M | 0x40; 110 - } 98 + lowbits = addr & ~0xFF000000UL; 99 + addr &= 0xFF000000UL; 100 + disp_BAT[0] = vaddr | (BL_16M<<2) | 2; 101 + disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); 111 102 logicalDisplayBase = (void *) (vaddr + lowbits); 112 103 } 113 104 #endif

+1 -15

arch/powerpc/kernel/cputable.c

··· 16 16 #include <asm/oprofile_impl.h> 17 17 #include <asm/cputable.h> 18 18 #include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */ 19 + #include <asm/mce.h> 19 20 #include <asm/mmu.h> 20 21 #include <asm/setup.h> 21 22 ··· 609 608 #endif /* CONFIG_PPC_BOOK3S_64 */ 610 609 611 610 #ifdef CONFIG_PPC32 612 - #ifdef CONFIG_PPC_BOOK3S_601 613 - { /* 601 */ 614 - .pvr_mask = 0xffff0000, 615 - .pvr_value = 0x00010000, 616 - .cpu_name = "601", 617 - .cpu_features = CPU_FTRS_PPC601, 618 - .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR | 619 - PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, 620 - .mmu_features = MMU_FTR_HPTE_TABLE, 621 - .icache_bsize = 32, 622 - .dcache_bsize = 32, 623 - .machine_check = machine_check_generic, 624 - .platform = "ppc601", 625 - }, 626 - #endif /* CONFIG_PPC_BOOK3S_601 */ 627 611 #ifdef CONFIG_PPC_BOOK3S_6xx 628 612 { /* 603 */ 629 613 .pvr_mask = 0xffff0000,

+1

arch/powerpc/kernel/dt_cpu_ftrs.c

··· 17 17 18 18 #include <asm/cputable.h> 19 19 #include <asm/dt_cpu_ftrs.h> 20 + #include <asm/mce.h> 20 21 #include <asm/mmu.h> 21 22 #include <asm/oprofile_impl.h> 22 23 #include <asm/prom.h>

+41 -104

arch/powerpc/kernel/eeh.c

··· 466 466 return 0; 467 467 } 468 468 469 - if (!pe->addr && !pe->config_addr) { 469 + if (!pe->addr) { 470 470 eeh_stats.no_cfg_addr++; 471 471 return 0; 472 472 } ··· 929 929 edev->config_space[1] |= PCI_COMMAND_MASTER; 930 930 } 931 931 932 - /** 933 - * eeh_ops_register - Register platform dependent EEH operations 934 - * @ops: platform dependent EEH operations 935 - * 936 - * Register the platform dependent EEH operation callback 937 - * functions. The platform should call this function before 938 - * any other EEH operations. 939 - */ 940 - int __init eeh_ops_register(struct eeh_ops *ops) 941 - { 942 - if (!ops->name) { 943 - pr_warn("%s: Invalid EEH ops name for %p\n", 944 - __func__, ops); 945 - return -EINVAL; 946 - } 947 - 948 - if (eeh_ops && eeh_ops != ops) { 949 - pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 950 - __func__, eeh_ops->name, ops->name); 951 - return -EEXIST; 952 - } 953 - 954 - eeh_ops = ops; 955 - 956 - return 0; 957 - } 958 - 959 - /** 960 - * eeh_ops_unregister - Unreigster platform dependent EEH operations 961 - * @name: name of EEH platform operations 962 - * 963 - * Unregister the platform dependent EEH operation callback 964 - * functions. 965 - */ 966 - int __exit eeh_ops_unregister(const char *name) 967 - { 968 - if (!name || !strlen(name)) { 969 - pr_warn("%s: Invalid EEH ops name\n", 970 - __func__); 971 - return -EINVAL; 972 - } 973 - 974 - if (eeh_ops && !strcmp(eeh_ops->name, name)) { 975 - eeh_ops = NULL; 976 - return 0; 977 - } 978 - 979 - return -EEXIST; 980 - } 981 - 982 932 static int eeh_reboot_notifier(struct notifier_block *nb, 983 933 unsigned long action, void *unused) 984 934 { ··· 939 989 static struct notifier_block eeh_reboot_nb = { 940 990 .notifier_call = eeh_reboot_notifier, 941 991 }; 942 - 943 - /** 944 - * eeh_init - EEH initialization 945 - * 946 - * Initialize EEH by trying to enable it for all of the adapters in the system. 947 - * As a side effect we can determine here if eeh is supported at all. 948 - * Note that we leave EEH on so failed config cycles won't cause a machine 949 - * check. If a user turns off EEH for a particular adapter they are really 950 - * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 951 - * grant access to a slot if EEH isn't enabled, and so we always enable 952 - * EEH for all slots/all devices. 953 - * 954 - * The eeh-force-off option disables EEH checking globally, for all slots. 955 - * Even if force-off is set, the EEH hardware is still enabled, so that 956 - * newer systems can boot. 957 - */ 958 - static int eeh_init(void) 959 - { 960 - struct pci_controller *hose, *tmp; 961 - int ret = 0; 962 - 963 - /* Register reboot notifier */ 964 - ret = register_reboot_notifier(&eeh_reboot_nb); 965 - if (ret) { 966 - pr_warn("%s: Failed to register notifier (%d)\n", 967 - __func__, ret); 968 - return ret; 969 - } 970 - 971 - /* call platform initialization function */ 972 - if (!eeh_ops) { 973 - pr_warn("%s: Platform EEH operation not found\n", 974 - __func__); 975 - return -EEXIST; 976 - } else if ((ret = eeh_ops->init())) 977 - return ret; 978 - 979 - /* Initialize PHB PEs */ 980 - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 981 - eeh_phb_pe_create(hose); 982 - 983 - eeh_addr_cache_init(); 984 - 985 - /* Initialize EEH event */ 986 - return eeh_event_init(); 987 - } 988 - 989 - core_initcall_sync(eeh_init); 990 992 991 993 static int eeh_device_notifier(struct notifier_block *nb, 992 994 unsigned long action, void *data) ··· 964 1062 .notifier_call = eeh_device_notifier, 965 1063 }; 966 1064 967 - static __init int eeh_set_bus_notifier(void) 1065 + /** 1066 + * eeh_init - System wide EEH initialization 1067 + * 1068 + * It's the platform's job to call this from an arch_initcall(). 1069 + */ 1070 + int eeh_init(struct eeh_ops *ops) 968 1071 { 969 - bus_register_notifier(&pci_bus_type, &eeh_device_nb); 970 - return 0; 1072 + struct pci_controller *hose, *tmp; 1073 + int ret = 0; 1074 + 1075 + /* the platform should only initialise EEH once */ 1076 + if (WARN_ON(eeh_ops)) 1077 + return -EEXIST; 1078 + if (WARN_ON(!ops)) 1079 + return -ENOENT; 1080 + eeh_ops = ops; 1081 + 1082 + /* Register reboot notifier */ 1083 + ret = register_reboot_notifier(&eeh_reboot_nb); 1084 + if (ret) { 1085 + pr_warn("%s: Failed to register reboot notifier (%d)\n", 1086 + __func__, ret); 1087 + return ret; 1088 + } 1089 + 1090 + ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb); 1091 + if (ret) { 1092 + pr_warn("%s: Failed to register bus notifier (%d)\n", 1093 + __func__, ret); 1094 + return ret; 1095 + } 1096 + 1097 + /* Initialize PHB PEs */ 1098 + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 1099 + eeh_phb_pe_create(hose); 1100 + 1101 + eeh_addr_cache_init(); 1102 + 1103 + /* Initialize EEH event */ 1104 + return eeh_event_init(); 971 1105 } 972 - arch_initcall(eeh_set_bus_notifier); 973 1106 974 1107 /** 975 1108 * eeh_probe_device() - Perform EEH initialization for the indicated pci device ··· 1657 1720 return -ENODEV; 1658 1721 1659 1722 /* Retrieve PE */ 1660 - pe = eeh_pe_get(hose, pe_no, 0); 1723 + pe = eeh_pe_get(hose, pe_no); 1661 1724 if (!pe) 1662 1725 return -ENODEV; 1663 1726

+8 -42

arch/powerpc/kernel/eeh_pe.c

··· 251 251 252 252 /** 253 253 * __eeh_pe_get - Check the PE address 254 - * @data: EEH PE 255 - * @flag: EEH device 256 254 * 257 255 * For one particular PE, it can be identified by PE address 258 256 * or tranditional BDF address. BDF address is composed of 259 257 * Bus/Device/Function number. The extra data referred by flag 260 258 * indicates which type of address should be used. 261 259 */ 262 - struct eeh_pe_get_flag { 263 - int pe_no; 264 - int config_addr; 265 - }; 266 - 267 260 static void *__eeh_pe_get(struct eeh_pe *pe, void *flag) 268 261 { 269 - struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag; 262 + int *target_pe = flag; 270 263 271 - /* Unexpected PHB PE */ 264 + /* PHB PEs are special and should be ignored */ 272 265 if (pe->type & EEH_PE_PHB) 273 266 return NULL; 274 267 275 - /* 276 - * We prefer PE address. For most cases, we should 277 - * have non-zero PE address 278 - */ 279 - if (eeh_has_flag(EEH_VALID_PE_ZERO)) { 280 - if (tmp->pe_no == pe->addr) 281 - return pe; 282 - } else { 283 - if (tmp->pe_no && 284 - (tmp->pe_no == pe->addr)) 285 - return pe; 286 - } 287 - 288 - /* Try BDF address */ 289 - if (tmp->config_addr && 290 - (tmp->config_addr == pe->config_addr)) 268 + if (*target_pe == pe->addr) 291 269 return pe; 292 270 293 271 return NULL; ··· 275 297 * eeh_pe_get - Search PE based on the given address 276 298 * @phb: PCI controller 277 299 * @pe_no: PE number 278 - * @config_addr: Config address 279 300 * 280 301 * Search the corresponding PE based on the specified address which 281 302 * is included in the eeh device. The function is used to check if ··· 283 306 * which is composed of PCI bus/device/function number, or unified 284 307 * PE address. 285 308 */ 286 - struct eeh_pe *eeh_pe_get(struct pci_controller *phb, 287 - int pe_no, int config_addr) 309 + struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no) 288 310 { 289 311 struct eeh_pe *root = eeh_phb_pe_get(phb); 290 - struct eeh_pe_get_flag tmp = { pe_no, config_addr }; 291 - struct eeh_pe *pe; 292 312 293 - pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp); 294 - 295 - return pe; 313 + return eeh_pe_traverse(root, __eeh_pe_get, &pe_no); 296 314 } 297 315 298 316 /** ··· 308 336 struct pci_controller *hose = edev->controller; 309 337 struct eeh_pe *pe, *parent; 310 338 311 - /* Check if the PE number is valid */ 312 - if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { 313 - eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n"); 314 - return -EINVAL; 315 - } 316 - 317 339 /* 318 340 * Search the PE has been existing or not according 319 341 * to the PE address. If that has been existing, the 320 342 * PE should be composed of PCI bus and its subordinate 321 343 * components. 322 344 */ 323 - pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); 345 + pe = eeh_pe_get(hose, edev->pe_config_addr); 324 346 if (pe) { 325 347 if (pe->type & EEH_PE_INVALID) { 326 348 list_add_tail(&edev->entry, &pe->edevs); ··· 354 388 pr_err("%s: out of memory!\n", __func__); 355 389 return -ENOMEM; 356 390 } 357 - pe->addr = edev->pe_config_addr; 358 - pe->config_addr = edev->bdfn; 391 + 392 + pe->addr = edev->pe_config_addr; 359 393 360 394 /* 361 395 * Put the new EEH PE into hierarchy tree. If the parent

+1 -34

arch/powerpc/kernel/entry_32.S

··· 234 234 mtspr SPRN_SRR0,r11 235 235 mtspr SPRN_SRR1,r10 236 236 mtlr r9 237 - SYNC 238 237 RFI /* jump to handler, enable MMU */ 239 238 240 239 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) ··· 263 264 LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) 264 265 mtspr SPRN_SRR0,r12 265 266 mtspr SPRN_SRR1,r0 266 - SYNC 267 267 RFI 268 268 269 269 reenable_mmu: ··· 321 323 #endif 322 324 mtspr SPRN_SRR0,r9 323 325 mtspr SPRN_SRR1,r10 324 - SYNC 325 326 RFI 326 327 _ASM_NOKPROBE_SYMBOL(stack_ovf) 327 328 #endif ··· 408 411 /* disable interrupts so current_thread_info()->flags can't change */ 409 412 LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ 410 413 /* Note: We don't bother telling lockdep about it */ 411 - SYNC 412 414 mtmsr r10 413 415 lwz r9,TI_FLAGS(r2) 414 416 li r8,-MAX_ERRNO ··· 470 474 #endif 471 475 mtspr SPRN_SRR0,r7 472 476 mtspr SPRN_SRR1,r8 473 - SYNC 474 477 RFI 475 478 _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) 476 479 #ifdef CONFIG_44x ··· 562 567 * lockdep as we are supposed to have IRQs on at this point 563 568 */ 564 569 ori r10,r10,MSR_EE 565 - SYNC 566 570 mtmsr r10 567 571 568 572 /* Save NVGPRS if they're not saved already */ ··· 600 606 #endif 601 607 mtspr SPRN_SRR0, r9 602 608 mtspr SPRN_SRR1, r10 603 - SYNC 604 609 RFI 605 610 _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) 606 611 ··· 803 810 REST_GPR(9, r11) 804 811 REST_GPR(12, r11) 805 812 lwz r11,GPR11(r11) 806 - SYNC 807 813 RFI 808 814 _ASM_NOKPROBE_SYMBOL(fast_exception_return) 809 815 ··· 811 819 1: lis r3,exc_exit_restart_end@ha 812 820 addi r3,r3,exc_exit_restart_end@l 813 821 cmplw r12,r3 814 - #ifdef CONFIG_PPC_BOOK3S_601 815 - bge 2b 816 - #else 817 822 bge 3f 818 - #endif 819 823 lis r4,exc_exit_restart@ha 820 824 addi r4,r4,exc_exit_restart@l 821 825 cmplw r12,r4 822 - #ifdef CONFIG_PPC_BOOK3S_601 823 - blt 2b 824 - #else 825 826 blt 3f 826 - #endif 827 827 lis r3,fee_restarts@ha 828 828 tophys(r3,r3) 829 829 lwz r5,fee_restarts@l(r3) ··· 832 848 833 849 /* aargh, a nonrecoverable interrupt, panic */ 834 850 /* aargh, we don't know which trap this is */ 835 - /* but the 601 doesn't implement the RI bit, so assume it's OK */ 836 851 3: 837 852 li r10,-1 838 853 stw r10,_TRAP(r11) ··· 855 872 * from the interrupt. */ 856 873 /* Note: We don't bother telling lockdep about it */ 857 874 LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) 858 - SYNC /* Some chip revs have problems here... */ 859 875 mtmsr r10 /* disable interrupts */ 860 876 861 877 lwz r3,_MSR(r1) /* Returning to user mode? */ ··· 1017 1035 * exc_exit_restart below. -- paulus 1018 1036 */ 1019 1037 LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) 1020 - SYNC 1021 1038 mtmsr r10 /* clear the RI bit */ 1022 1039 .globl exc_exit_restart 1023 1040 exc_exit_restart: ··· 1027 1046 lwz r1,GPR1(r1) 1028 1047 .globl exc_exit_restart_end 1029 1048 exc_exit_restart_end: 1030 - SYNC 1031 1049 RFI 1032 1050 _ASM_NOKPROBE_SYMBOL(exc_exit_restart) 1033 1051 _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) ··· 1254 1274 mfmsr r10 1255 1275 #endif 1256 1276 ori r10,r10,MSR_EE 1257 - SYNC 1258 1277 mtmsr r10 /* hard-enable interrupts */ 1259 1278 bl schedule 1260 1279 recheck: ··· 1262 1283 * TI_FLAGS aren't advertised. 1263 1284 */ 1264 1285 LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) 1265 - SYNC 1266 1286 mtmsr r10 /* disable interrupts */ 1267 1287 lwz r9,TI_FLAGS(r2) 1268 1288 andi. r0,r9,_TIF_NEED_RESCHED ··· 1270 1292 beq restore_user 1271 1293 do_user_signal: /* r10 contains MSR_KERNEL here */ 1272 1294 ori r10,r10,MSR_EE 1273 - SYNC 1274 1295 mtmsr r10 /* hard-enable interrupts */ 1275 1296 /* save r13-r31 in the exception frame, if not already done */ 1276 1297 lwz r3,_TRAP(r1) ··· 1293 1316 lis r10,exc_exit_restart_end@ha 1294 1317 addi r10,r10,exc_exit_restart_end@l 1295 1318 cmplw r12,r10 1296 - #ifdef CONFIG_PPC_BOOK3S_601 1297 - bgelr 1298 - #else 1299 1319 bge 3f 1300 - #endif 1301 1320 lis r11,exc_exit_restart@ha 1302 1321 addi r11,r11,exc_exit_restart@l 1303 1322 cmplw r12,r11 1304 - #ifdef CONFIG_PPC_BOOK3S_601 1305 - bltlr 1306 - #else 1307 1323 blt 3f 1308 - #endif 1309 1324 lis r10,ee_restarts@ha 1310 1325 lwz r12,ee_restarts@l(r10) 1311 1326 addi r12,r12,1 ··· 1305 1336 mr r12,r11 /* restart at exc_exit_restart */ 1306 1337 blr 1307 1338 3: /* OK, we can't recover, kill this process */ 1308 - /* but the 601 doesn't implement the RI bit, so assume it's OK */ 1309 1339 lwz r3,_TRAP(r1) 1310 1340 andi. r0,r3,1 1311 1341 beq 5f ··· 1350 1382 mfmsr r9 1351 1383 stw r9,8(r1) 1352 1384 LOAD_REG_IMMEDIATE(r0,MSR_KERNEL) 1353 - SYNC /* disable interrupts so SRR0/1 */ 1354 - mtmsr r0 /* don't get trashed */ 1385 + mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ 1355 1386 li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) 1356 1387 mtlr r6 1357 1388 stw r7, THREAD + RTAS_SP(r2)

+6 -2

arch/powerpc/kernel/entry_64.S

··· 430 430 431 431 #define FLUSH_COUNT_CACHE \ 432 432 1: nop; \ 433 - patch_site 1b, patch__call_flush_branch_caches 433 + patch_site 1b, patch__call_flush_branch_caches1; \ 434 + 1: nop; \ 435 + patch_site 1b, patch__call_flush_branch_caches2; \ 436 + 1: nop; \ 437 + patch_site 1b, patch__call_flush_branch_caches3 434 438 435 439 .macro nops number 436 440 .rept \number ··· 516 512 517 513 kuap_check_amr r9, r10 518 514 519 - FLUSH_COUNT_CACHE 515 + FLUSH_COUNT_CACHE /* Clobbers r9, ctr */ 520 516 521 517 /* 522 518 * On SMP kernels, care must be taken because a task may be

-11

arch/powerpc/kernel/exceptions-64e.S

··· 988 988 .endm 989 989 990 990 masked_interrupt_book3e_0x500: 991 - // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE 992 991 masked_interrupt_book3e PACA_IRQ_EE 1 993 992 994 993 masked_interrupt_book3e_0x900: ··· 1301 1302 bne 1f 1302 1303 addi r3,r1,STACK_FRAME_OVERHEAD; 1303 1304 bl do_IRQ 1304 - b ret_from_except 1305 - 1: cmpwi cr0,r3,0xf00 1306 - bne 1f 1307 - addi r3,r1,STACK_FRAME_OVERHEAD; 1308 - bl performance_monitor_exception 1309 - b ret_from_except 1310 - 1: cmpwi cr0,r3,0xe60 1311 - bne 1f 1312 - addi r3,r1,STACK_FRAME_OVERHEAD; 1313 - bl handle_hmi_exception 1314 1305 b ret_from_except 1315 1306 1: cmpwi cr0,r3,0x900 1316 1307 bne 1f

-2

arch/powerpc/kernel/fadump.c

··· 754 754 755 755 void fadump_update_elfcore_header(char *bufp) 756 756 { 757 - struct elfhdr *elf; 758 757 struct elf_phdr *phdr; 759 758 760 - elf = (struct elfhdr *)bufp; 761 759 bufp += sizeof(struct elfhdr); 762 760 763 761 /* First note is a place holder for cpu notes info. */

-16

arch/powerpc/kernel/fpu.S

··· 87 87 oris r5,r5,MSR_VSX@h 88 88 END_FTR_SECTION_IFSET(CPU_FTR_VSX) 89 89 #endif 90 - SYNC 91 90 MTMSRD(r5) /* enable use of fpu now */ 92 91 isync 93 92 /* enable use of FP after return */ ··· 132 133 2: SAVE_32FPVSRS(0, R4, R6) 133 134 mffs fr0 134 135 stfd fr0,FPSTATE_FPSCR(r6) 135 - blr 136 - 137 - /* 138 - * These are used in the alignment trap handler when emulating 139 - * single-precision loads and stores. 140 - */ 141 - 142 - _GLOBAL(cvt_fd) 143 - lfs 0,0(r3) 144 - stfd 0,0(r4) 145 - blr 146 - 147 - _GLOBAL(cvt_df) 148 - lfd 0,0(r3) 149 - stfs 0,0(r4) 150 136 blr

+11 -82

arch/powerpc/kernel/head_32.S arch/powerpc/kernel/head_book3s_32.S

··· 34 34 35 35 #include "head_32.h" 36 36 37 - /* 601 only have IBAT */ 38 - #ifdef CONFIG_PPC_BOOK3S_601 39 - #define LOAD_BAT(n, reg, RA, RB) \ 40 - li RA,0; \ 41 - mtspr SPRN_IBAT##n##U,RA; \ 42 - lwz RA,(n*16)+0(reg); \ 43 - lwz RB,(n*16)+4(reg); \ 44 - mtspr SPRN_IBAT##n##U,RA; \ 45 - mtspr SPRN_IBAT##n##L,RB 46 - #else 47 37 #define LOAD_BAT(n, reg, RA, RB) \ 48 38 /* see the comment for clear_bats() -- Cort */ \ 49 39 li RA,0; \ ··· 47 57 lwz RB,(n*16)+12(reg); \ 48 58 mtspr SPRN_DBAT##n##U,RA; \ 49 59 mtspr SPRN_DBAT##n##L,RB 50 - #endif 51 60 52 61 __HEAD 53 62 .stabs "arch/powerpc/kernel/",N_SO,0,0,0f 54 - .stabs "head_32.S",N_SO,0,0,0f 63 + .stabs "head_book3s_32.S",N_SO,0,0,0f 55 64 0: 56 65 _ENTRY(_stext); 57 66 ··· 155 166 156 167 bl initial_bats 157 168 bl load_segment_registers 158 - #ifdef CONFIG_KASAN 169 + BEGIN_MMU_FTR_SECTION 159 170 bl early_hash_table 160 - #endif 171 + END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) 161 172 #if defined(CONFIG_BOOTX_TEXT) 162 173 bl setup_disp_bat 163 174 #endif ··· 174 185 bl reloc_offset 175 186 li r24,0 /* cpu# */ 176 187 bl call_setup_cpu /* Call setup_cpu for this CPU */ 177 - #ifdef CONFIG_PPC_BOOK3S_32 178 188 bl reloc_offset 179 189 bl init_idle_6xx 180 - #endif /* CONFIG_PPC_BOOK3S_32 */ 181 190 182 191 183 192 /* ··· 206 219 lis r0,start_here@h 207 220 ori r0,r0,start_here@l 208 221 mtspr SPRN_SRR0,r0 209 - SYNC 210 222 RFI /* enables MMU */ 211 223 212 224 /* ··· 260 274 DO_KVM 0x200 261 275 MachineCheck: 262 276 EXCEPTION_PROLOG_0 263 - #ifdef CONFIG_VMAP_STACK 264 - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ 265 - mtmsr r11 266 - isync 267 - #endif 268 277 #ifdef CONFIG_PPC_CHRP 269 278 mfspr r11, SPRN_SPRG_THREAD 270 - tovirt_vmstack r11, r11 271 279 lwz r11, RTAS_SP(r11) 272 280 cmpwi cr1, r11, 0 273 281 bne cr1, 7f ··· 419 439 SystemCall: 420 440 SYSCALL_ENTRY 0xc00 421 441 422 - /* Single step - not used on 601 */ 423 442 EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) 424 443 EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) 425 444 ··· 769 790 mtcr r11 770 791 lwz r11, THR11(r10) 771 792 mfspr r10, SPRN_SPRG_SCRATCH0 772 - SYNC 773 793 RFI 774 794 775 795 1: /* ISI */ 776 796 mtcr r11 777 797 mfspr r11, SPRN_SPRG_SCRATCH1 778 798 mfspr r10, SPRN_SPRG_SCRATCH0 779 - SYNC 780 799 RFI 781 800 782 801 stack_overflow: ··· 865 888 set to map the 0xf0000000 - 0xffffffff region */ 866 889 mfmsr r0 867 890 rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ 868 - SYNC 869 891 mtmsr r0 870 892 isync 871 893 ··· 876 900 lis r3,-KERNELBASE@h 877 901 mr r4,r24 878 902 bl call_setup_cpu /* Call setup_cpu for this CPU */ 879 - #ifdef CONFIG_PPC_BOOK3S_32 880 903 lis r3,-KERNELBASE@h 881 904 bl init_idle_6xx 882 - #endif /* CONFIG_PPC_BOOK3S_32 */ 883 905 884 906 /* get current's stack and current */ 885 907 lis r2,secondary_current@ha ··· 910 936 ori r3,r3,start_secondary@l 911 937 mtspr SPRN_SRR0,r3 912 938 mtspr SPRN_SRR1,r4 913 - SYNC 914 939 RFI 915 940 #endif /* CONFIG_SMP */ 916 941 ··· 918 945 #endif 919 946 920 947 /* 921 - * Those generic dummy functions are kept for CPUs not 922 - * included in CONFIG_PPC_BOOK3S_32 923 - */ 924 - #if !defined(CONFIG_PPC_BOOK3S_32) 925 - _ENTRY(__save_cpu_setup) 926 - blr 927 - _ENTRY(__restore_cpu_setup) 928 - blr 929 - #endif /* !defined(CONFIG_PPC_BOOK3S_32) */ 930 - 931 - /* 932 948 * Load stuff into the MMU. Intended to be called with 933 949 * IR=0 and DR=0. 934 950 */ 935 - #ifdef CONFIG_KASAN 936 951 early_hash_table: 937 952 sync /* Force all PTE updates to finish */ 938 953 isync ··· 931 970 lis r6, early_hash - PAGE_OFFSET@h 932 971 ori r6, r6, 3 /* 256kB table */ 933 972 mtspr SPRN_SDR1, r6 973 + lis r6, early_hash@h 974 + lis r3, Hash@ha 975 + stw r6, Hash@l(r3) 934 976 blr 935 - #endif 936 977 937 978 load_up_mmu: 938 979 sync /* Force all PTE updates to finish */ ··· 948 985 lwz r6,_SDR1@l(r6) 949 986 mtspr SPRN_SDR1,r6 950 987 951 - /* Load the BAT registers with the values set up by MMU_init. 952 - MMU_init takes care of whether we're on a 601 or not. */ 988 + /* Load the BAT registers with the values set up by MMU_init. */ 953 989 lis r3,BATS@ha 954 990 addi r3,r3,BATS@l 955 991 tophys(r3,r3) ··· 964 1002 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) 965 1003 blr 966 1004 967 - load_segment_registers: 1005 + _GLOBAL(load_segment_registers) 968 1006 li r0, NUM_USER_SEGMENTS /* load up user segment register values */ 969 1007 mtctr r0 /* for context 0 */ 970 1008 li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ ··· 1023 1061 bl machine_init 1024 1062 bl __save_cpu_setup 1025 1063 bl MMU_init 1026 - #ifdef CONFIG_KASAN 1027 - BEGIN_MMU_FTR_SECTION 1028 1064 bl MMU_init_hw_patch 1029 - END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) 1030 - #endif 1031 1065 1032 1066 /* 1033 1067 * Go back to running unmapped so we can load up new values ··· 1038 1080 .align 4 1039 1081 mtspr SPRN_SRR0,r4 1040 1082 mtspr SPRN_SRR1,r3 1041 - SYNC 1042 1083 RFI 1043 1084 /* Load up the kernel context */ 1044 1085 2: bl load_up_mmu ··· 1049 1092 */ 1050 1093 lis r5, abatron_pteptrs@h 1051 1094 ori r5, r5, abatron_pteptrs@l 1052 - stw r5, 0xf0(r0) /* This much match your Abatron config */ 1095 + stw r5, 0xf0(0) /* This much match your Abatron config */ 1053 1096 lis r6, swapper_pg_dir@h 1054 1097 ori r6, r6, swapper_pg_dir@l 1055 1098 tophys(r5, r5) ··· 1062 1105 ori r3,r3,start_kernel@l 1063 1106 mtspr SPRN_SRR0,r3 1064 1107 mtspr SPRN_SRR1,r4 1065 - SYNC 1066 1108 RFI 1067 1109 1068 1110 /* ··· 1121 1165 clear_bats: 1122 1166 li r10,0 1123 1167 1124 - #ifndef CONFIG_PPC_BOOK3S_601 1125 1168 mtspr SPRN_DBAT0U,r10 1126 1169 mtspr SPRN_DBAT0L,r10 1127 1170 mtspr SPRN_DBAT1U,r10 ··· 1129 1174 mtspr SPRN_DBAT2L,r10 1130 1175 mtspr SPRN_DBAT3U,r10 1131 1176 mtspr SPRN_DBAT3L,r10 1132 - #endif 1133 1177 mtspr SPRN_IBAT0U,r10 1134 1178 mtspr SPRN_IBAT0L,r10 1135 1179 mtspr SPRN_IBAT1U,r10 ··· 1177 1223 .align 4 1178 1224 mtspr SPRN_SRR0, r4 1179 1225 mtspr SPRN_SRR1, r3 1180 - SYNC 1181 1226 RFI 1182 1227 1: bl clear_bats 1183 1228 lis r3, BATS@ha ··· 1196 1243 mtmsr r3 1197 1244 mtspr SPRN_SRR0, r7 1198 1245 mtspr SPRN_SRR1, r6 1199 - SYNC 1200 1246 RFI 1201 1247 1202 1248 flush_tlbs: ··· 1219 1267 sync 1220 1268 RFI 1221 1269 1222 - /* 1223 - * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET 1224 - * (we keep one for debugging) and on others, we use one 256M BAT. 1225 - */ 1270 + /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ 1226 1271 initial_bats: 1227 1272 lis r11,PAGE_OFFSET@h 1228 - #ifdef CONFIG_PPC_BOOK3S_601 1229 - ori r11,r11,4 /* set up BAT registers for 601 */ 1230 - li r8,0x7f /* valid, block length = 8MB */ 1231 - mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */ 1232 - mtspr SPRN_IBAT0L,r8 /* lower BAT register */ 1233 - addis r11,r11,0x800000@h 1234 - addis r8,r8,0x800000@h 1235 - mtspr SPRN_IBAT1U,r11 1236 - mtspr SPRN_IBAT1L,r8 1237 - addis r11,r11,0x800000@h 1238 - addis r8,r8,0x800000@h 1239 - mtspr SPRN_IBAT2U,r11 1240 - mtspr SPRN_IBAT2L,r8 1241 - #else 1242 1273 tophys(r8,r11) 1243 1274 #ifdef CONFIG_SMP 1244 1275 ori r8,r8,0x12 /* R/W access, M=1 */ ··· 1230 1295 #endif /* CONFIG_SMP */ 1231 1296 ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ 1232 1297 1233 - mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ 1298 + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */ 1234 1299 mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ 1235 1300 mtspr SPRN_IBAT0L,r8 1236 1301 mtspr SPRN_IBAT0U,r11 1237 - #endif 1238 1302 isync 1239 1303 blr 1240 1304 ··· 1251 1317 beqlr 1252 1318 lwz r11,0(r8) 1253 1319 lwz r8,4(r8) 1254 - #ifndef CONFIG_PPC_BOOK3S_601 1255 1320 mtspr SPRN_DBAT3L,r8 1256 1321 mtspr SPRN_DBAT3U,r11 1257 - #else 1258 - mtspr SPRN_IBAT3L,r8 1259 - mtspr SPRN_IBAT3U,r11 1260 - #endif 1261 1322 blr 1262 1323 #endif /* CONFIG_BOOTX_TEXT */ 1263 1324

+34 -39

arch/powerpc/kernel/head_32.h

··· 40 40 41 41 .macro EXCEPTION_PROLOG_1 for_rtas=0 42 42 #ifdef CONFIG_VMAP_STACK 43 - .ifeq \for_rtas 44 - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ 45 - mtmsr r11 46 - isync 47 - .endif 48 - subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ 43 + mr r11, r1 44 + subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ 45 + beq 1f 46 + mfspr r1,SPRN_SPRG_THREAD 47 + lwz r1,TASK_STACK-THREAD(r1) 48 + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 49 49 #else 50 - tophys(r11,r1) /* use tophys(r1) if kernel */ 51 - subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */ 52 - #endif 50 + subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ 53 51 beq 1f 54 52 mfspr r11,SPRN_SPRG_THREAD 55 - tovirt_vmstack r11, r11 56 53 lwz r11,TASK_STACK-THREAD(r11) 57 54 addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE 58 - tophys_novmstack r11, r11 55 + #endif 59 56 1: 57 + tophys_novmstack r11, r11 60 58 #ifdef CONFIG_VMAP_STACK 61 - mtcrf 0x7f, r11 59 + mtcrf 0x7f, r1 62 60 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow 63 61 #endif 64 62 .endm 65 63 66 64 .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 67 - #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) 68 - BEGIN_MMU_FTR_SECTION 65 + #ifdef CONFIG_VMAP_STACK 69 66 mtcr r10 70 - FTR_SECTION_ELSE 71 - stw r10, _CCR(r11) 72 - ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) 67 + li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ 68 + mtmsr r10 69 + isync 73 70 #else 74 71 stw r10,_CCR(r11) /* save registers */ 75 72 #endif 76 73 mfspr r10, SPRN_SPRG_SCRATCH0 74 + #ifdef CONFIG_VMAP_STACK 75 + stw r11,GPR1(r1) 76 + stw r11,0(r1) 77 + mr r11, r1 78 + #else 79 + stw r1,GPR1(r11) 80 + stw r1,0(r11) 81 + tovirt(r1, r11) /* set new kernel sp */ 82 + #endif 77 83 stw r12,GPR12(r11) 78 84 stw r9,GPR9(r11) 79 85 stw r10,GPR10(r11) 80 - #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) 81 - BEGIN_MMU_FTR_SECTION 86 + #ifdef CONFIG_VMAP_STACK 82 87 mfcr r10 83 88 stw r10, _CCR(r11) 84 - END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) 85 89 #endif 86 90 mfspr r12,SPRN_SPRG_SCRATCH1 87 91 stw r12,GPR11(r11) ··· 101 97 stw r10, _DSISR(r11) 102 98 .endif 103 99 lwz r9, SRR1(r12) 104 - #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) 105 - BEGIN_MMU_FTR_SECTION 106 100 andi. r10, r9, MSR_PR 107 - END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) 108 - #endif 109 101 lwz r12, SRR0(r12) 110 102 #else 111 103 mfspr r12,SPRN_SRR0 112 104 mfspr r9,SPRN_SRR1 113 105 #endif 114 - stw r1,GPR1(r11) 115 - stw r1,0(r11) 116 - tovirt_novmstack r1, r11 /* set new kernel sp */ 117 106 #ifdef CONFIG_40x 118 107 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ 119 108 #else ··· 222 225 #endif 223 226 mtspr SPRN_SRR1,r10 224 227 mtspr SPRN_SRR0,r11 225 - SYNC 226 228 RFI /* jump to handler, enable MMU */ 227 229 99: b ret_from_kernel_syscall 228 230 .endm ··· 323 327 .macro vmap_stack_overflow_exception 324 328 #ifdef CONFIG_VMAP_STACK 325 329 #ifdef CONFIG_SMP 326 - mfspr r11, SPRN_SPRG_THREAD 327 - tovirt(r11, r11) 328 - lwz r11, TASK_CPU - THREAD(r11) 329 - slwi r11, r11, 3 330 - addis r11, r11, emergency_ctx@ha 330 + mfspr r1, SPRN_SPRG_THREAD 331 + lwz r1, TASK_CPU - THREAD(r1) 332 + slwi r1, r1, 3 333 + addis r1, r1, emergency_ctx@ha 331 334 #else 332 - lis r11, emergency_ctx@ha 335 + lis r1, emergency_ctx@ha 333 336 #endif 334 - lwz r11, emergency_ctx@l(r11) 335 - cmpwi cr1, r11, 0 337 + lwz r1, emergency_ctx@l(r1) 338 + cmpwi cr1, r1, 0 336 339 bne cr1, 1f 337 - lis r11, init_thread_union@ha 338 - addi r11, r11, init_thread_union@l 339 - 1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE 340 + lis r1, init_thread_union@ha 341 + addi r1, r1, init_thread_union@l 342 + 1: addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 340 343 EXCEPTION_PROLOG_2 341 344 SAVE_NVGPRS(r11) 342 345 addi r3, r1, STACK_FRAME_OVERHEAD

-1

arch/powerpc/kernel/head_40x.S

··· 72 72 lis r0,start_here@h 73 73 ori r0,r0,start_here@l 74 74 mtspr SPRN_SRR0,r0 75 - SYNC 76 75 rfi /* enables MMU */ 77 76 b . /* prevent prefetch past rfi */ 78 77

+2 -5

arch/powerpc/kernel/head_64.S

··· 300 300 rlwimi r3, r3, 30, 2, 30 301 301 mtspr SPRN_PIR, r3 302 302 1: 303 - #endif 304 - 305 - _GLOBAL(generic_secondary_thread_init) 306 303 mr r24,r3 307 304 308 305 /* turn on 64-bit mode */ ··· 309 312 bl relative_toc 310 313 tovirt(r2,r2) 311 314 312 - #ifdef CONFIG_PPC_BOOK3E 313 315 /* Book3E initialization */ 314 316 mr r3,r24 315 317 bl book3e_secondary_thread_init 316 - #endif 317 318 b generic_secondary_common_init 319 + 320 + #endif /* CONFIG_PPC_BOOK3E */ 318 321 319 322 /* 320 323 * On pSeries and most other platforms, secondary processors spin

-1

arch/powerpc/kernel/head_booke.h

··· 176 176 #endif 177 177 mtspr SPRN_SRR1,r10 178 178 mtspr SPRN_SRR0,r11 179 - SYNC 180 179 RFI /* jump to handler, enable MMU */ 181 180 99: b ret_from_kernel_syscall 182 181 .endm

+2 -147

arch/powerpc/kernel/hw_breakpoint.c

··· 494 494 } 495 495 } 496 496 497 - static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) 498 - { 499 - return ((info->address <= dar) && (dar - info->address < info->len)); 500 - } 501 - 502 - static bool ea_user_range_overlaps(unsigned long ea, int size, 503 - struct arch_hw_breakpoint *info) 504 - { 505 - return ((ea < info->address + info->len) && 506 - (ea + size > info->address)); 507 - } 508 - 509 - static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) 510 - { 511 - unsigned long hw_start_addr, hw_end_addr; 512 - 513 - hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); 514 - hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); 515 - 516 - return ((hw_start_addr <= dar) && (hw_end_addr > dar)); 517 - } 518 - 519 - static bool ea_hw_range_overlaps(unsigned long ea, int size, 520 - struct arch_hw_breakpoint *info) 521 - { 522 - unsigned long hw_start_addr, hw_end_addr; 523 - 524 - hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); 525 - hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); 526 - 527 - return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); 528 - } 529 - 530 - /* 531 - * If hw has multiple DAWR registers, we also need to check all 532 - * dawrx constraint bits to confirm this is _really_ a valid event. 533 - * If type is UNKNOWN, but privilege level matches, consider it as 534 - * a positive match. 535 - */ 536 - static bool check_dawrx_constraints(struct pt_regs *regs, int type, 537 - struct arch_hw_breakpoint *info) 538 - { 539 - if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) 540 - return false; 541 - 542 - /* 543 - * The Cache Management instructions other than dcbz never 544 - * cause a match. i.e. if type is CACHEOP, the instruction 545 - * is dcbz, and dcbz is treated as Store. 546 - */ 547 - if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) 548 - return false; 549 - 550 - if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) 551 - return false; 552 - 553 - if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) 554 - return false; 555 - 556 - return true; 557 - } 558 - 559 - /* 560 - * Return true if the event is valid wrt dawr configuration, 561 - * including extraneous exception. Otherwise return false. 562 - */ 563 - static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, 564 - unsigned long ea, int type, int size, 565 - struct arch_hw_breakpoint *info) 566 - { 567 - bool in_user_range = dar_in_user_range(regs->dar, info); 568 - bool dawrx_constraints; 569 - 570 - /* 571 - * 8xx supports only one breakpoint and thus we can 572 - * unconditionally return true. 573 - */ 574 - if (IS_ENABLED(CONFIG_PPC_8xx)) { 575 - if (!in_user_range) 576 - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; 577 - return true; 578 - } 579 - 580 - if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { 581 - if (cpu_has_feature(CPU_FTR_ARCH_31) && 582 - !dar_in_hw_range(regs->dar, info)) 583 - return false; 584 - 585 - return true; 586 - } 587 - 588 - dawrx_constraints = check_dawrx_constraints(regs, type, info); 589 - 590 - if (type == UNKNOWN) { 591 - if (cpu_has_feature(CPU_FTR_ARCH_31) && 592 - !dar_in_hw_range(regs->dar, info)) 593 - return false; 594 - 595 - return dawrx_constraints; 596 - } 597 - 598 - if (ea_user_range_overlaps(ea, size, info)) 599 - return dawrx_constraints; 600 - 601 - if (ea_hw_range_overlaps(ea, size, info)) { 602 - if (dawrx_constraints) { 603 - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; 604 - return true; 605 - } 606 - } 607 - return false; 608 - } 609 - 610 - static int cache_op_size(void) 611 - { 612 - #ifdef __powerpc64__ 613 - return ppc64_caches.l1d.block_size; 614 - #else 615 - return L1_CACHE_BYTES; 616 - #endif 617 - } 618 - 619 - static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, 620 - int *type, int *size, unsigned long *ea) 621 - { 622 - struct instruction_op op; 623 - 624 - if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) 625 - return; 626 - 627 - analyse_instr(&op, regs, *instr); 628 - *type = GETTYPE(op.type); 629 - *ea = op.ea; 630 - #ifdef __powerpc64__ 631 - if (!(regs->msr & MSR_64BIT)) 632 - *ea &= 0xffffffffUL; 633 - #endif 634 - 635 - *size = GETSIZE(op.type); 636 - if (*type == CACHEOP) { 637 - *size = cache_op_size(); 638 - *ea &= ~(*size - 1); 639 - } 640 - } 641 - 642 497 static bool is_larx_stcx_instr(int type) 643 498 { 644 499 return type == LARX || type == STCX; ··· 577 722 rcu_read_lock(); 578 723 579 724 if (!IS_ENABLED(CONFIG_PPC_8xx)) 580 - get_instr_detail(regs, &instr, &type, &size, &ea); 725 + wp_get_instr_detail(regs, &instr, &type, &size, &ea); 581 726 582 727 for (i = 0; i < nr_wp_slots(); i++) { 583 728 bp[i] = __this_cpu_read(bp_per_reg[i]); ··· 587 732 info[i] = counter_arch_bp(bp[i]); 588 733 info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; 589 734 590 - if (check_constraints(regs, instr, ea, type, size, info[i])) { 735 + if (wp_check_constraints(regs, instr, ea, type, size, info[i])) { 591 736 if (!IS_ENABLED(CONFIG_PPC_8xx) && 592 737 ppc_inst_equal(instr, ppc_inst(0))) { 593 738 handler_error(bp[i], info[i]);

+162

arch/powerpc/kernel/hw_breakpoint_constraints.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + #include <linux/kernel.h> 3 + #include <linux/uaccess.h> 4 + #include <linux/sched.h> 5 + #include <asm/hw_breakpoint.h> 6 + #include <asm/sstep.h> 7 + #include <asm/cache.h> 8 + 9 + static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) 10 + { 11 + return ((info->address <= dar) && (dar - info->address < info->len)); 12 + } 13 + 14 + static bool ea_user_range_overlaps(unsigned long ea, int size, 15 + struct arch_hw_breakpoint *info) 16 + { 17 + return ((ea < info->address + info->len) && 18 + (ea + size > info->address)); 19 + } 20 + 21 + static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) 22 + { 23 + unsigned long hw_start_addr, hw_end_addr; 24 + 25 + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); 26 + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); 27 + 28 + return ((hw_start_addr <= dar) && (hw_end_addr > dar)); 29 + } 30 + 31 + static bool ea_hw_range_overlaps(unsigned long ea, int size, 32 + struct arch_hw_breakpoint *info) 33 + { 34 + unsigned long hw_start_addr, hw_end_addr; 35 + unsigned long align_size = HW_BREAKPOINT_SIZE; 36 + 37 + /* 38 + * On p10 predecessors, quadword is handle differently then 39 + * other instructions. 40 + */ 41 + if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16) 42 + align_size = HW_BREAKPOINT_SIZE_QUADWORD; 43 + 44 + hw_start_addr = ALIGN_DOWN(info->address, align_size); 45 + hw_end_addr = ALIGN(info->address + info->len, align_size); 46 + 47 + return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); 48 + } 49 + 50 + /* 51 + * If hw has multiple DAWR registers, we also need to check all 52 + * dawrx constraint bits to confirm this is _really_ a valid event. 53 + * If type is UNKNOWN, but privilege level matches, consider it as 54 + * a positive match. 55 + */ 56 + static bool check_dawrx_constraints(struct pt_regs *regs, int type, 57 + struct arch_hw_breakpoint *info) 58 + { 59 + if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) 60 + return false; 61 + 62 + /* 63 + * The Cache Management instructions other than dcbz never 64 + * cause a match. i.e. if type is CACHEOP, the instruction 65 + * is dcbz, and dcbz is treated as Store. 66 + */ 67 + if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) 68 + return false; 69 + 70 + if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) 71 + return false; 72 + 73 + if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) 74 + return false; 75 + 76 + return true; 77 + } 78 + 79 + /* 80 + * Return true if the event is valid wrt dawr configuration, 81 + * including extraneous exception. Otherwise return false. 82 + */ 83 + bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, 84 + unsigned long ea, int type, int size, 85 + struct arch_hw_breakpoint *info) 86 + { 87 + bool in_user_range = dar_in_user_range(regs->dar, info); 88 + bool dawrx_constraints; 89 + 90 + /* 91 + * 8xx supports only one breakpoint and thus we can 92 + * unconditionally return true. 93 + */ 94 + if (IS_ENABLED(CONFIG_PPC_8xx)) { 95 + if (!in_user_range) 96 + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; 97 + return true; 98 + } 99 + 100 + if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { 101 + if (cpu_has_feature(CPU_FTR_ARCH_31) && 102 + !dar_in_hw_range(regs->dar, info)) 103 + return false; 104 + 105 + return true; 106 + } 107 + 108 + dawrx_constraints = check_dawrx_constraints(regs, type, info); 109 + 110 + if (type == UNKNOWN) { 111 + if (cpu_has_feature(CPU_FTR_ARCH_31) && 112 + !dar_in_hw_range(regs->dar, info)) 113 + return false; 114 + 115 + return dawrx_constraints; 116 + } 117 + 118 + if (ea_user_range_overlaps(ea, size, info)) 119 + return dawrx_constraints; 120 + 121 + if (ea_hw_range_overlaps(ea, size, info)) { 122 + if (dawrx_constraints) { 123 + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; 124 + return true; 125 + } 126 + } 127 + return false; 128 + } 129 + 130 + static int cache_op_size(void) 131 + { 132 + #ifdef __powerpc64__ 133 + return ppc64_caches.l1d.block_size; 134 + #else 135 + return L1_CACHE_BYTES; 136 + #endif 137 + } 138 + 139 + void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, 140 + int *type, int *size, unsigned long *ea) 141 + { 142 + struct instruction_op op; 143 + 144 + if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) 145 + return; 146 + 147 + analyse_instr(&op, regs, *instr); 148 + *type = GETTYPE(op.type); 149 + *ea = op.ea; 150 + #ifdef __powerpc64__ 151 + if (!(regs->msr & MSR_64BIT)) 152 + *ea &= 0xffffffffUL; 153 + #endif 154 + 155 + *size = GETSIZE(op.type); 156 + if (*type == CACHEOP) { 157 + *size = cache_op_size(); 158 + *ea &= ~(*size - 1); 159 + } else if (*type == LOAD_VMX || *type == STORE_VMX) { 160 + *ea &= ~(*size - 1); 161 + } 162 + }

-8

arch/powerpc/kernel/idle.c

··· 41 41 } 42 42 __setup("powersave=off", powersave_off); 43 43 44 - #ifdef CONFIG_HOTPLUG_CPU 45 - void arch_cpu_idle_dead(void) 46 - { 47 - sched_preempt_enable_no_resched(); 48 - cpu_die(); 49 - } 50 - #endif 51 - 52 44 void arch_cpu_idle(void) 53 45 { 54 46 ppc64_runlatch_off();

+29 -44

arch/powerpc/kernel/irq.c

··· 104 104 105 105 static inline notrace int decrementer_check_overflow(void) 106 106 { 107 - u64 now = get_tb_or_rtc(); 107 + u64 now = get_tb(); 108 108 u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); 109 109 110 110 return now >= *next_tb; ··· 113 113 #ifdef CONFIG_PPC_BOOK3E 114 114 115 115 /* This is called whenever we are re-enabling interrupts 116 - * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if 116 + * and returns either 0 (nothing to do) or 500/900/280 if 117 117 * there's an EE, DEC or DBELL to generate. 118 118 * 119 119 * This is called in two contexts: From arch_local_irq_restore() ··· 181 181 return 0x500; 182 182 } 183 183 184 - /* 185 - * Check if an EPR external interrupt happened this bit is typically 186 - * set if we need to handle another "edge" interrupt from within the 187 - * MPIC "EPR" handler. 188 - */ 189 - if (happened & PACA_IRQ_EE_EDGE) { 190 - local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; 191 - return 0x500; 192 - } 193 - 194 184 if (happened & PACA_IRQ_DBELL) { 195 185 local_paca->irq_happened &= ~PACA_IRQ_DBELL; 196 186 return 0x280; ··· 191 201 192 202 return 0; 193 203 } 204 + 205 + /* 206 + * This is specifically called by assembly code to re-enable interrupts 207 + * if they are currently disabled. This is typically called before 208 + * schedule() or do_signal() when returning to userspace. We do it 209 + * in C to avoid the burden of dealing with lockdep etc... 210 + * 211 + * NOTE: This is called with interrupts hard disabled but not marked 212 + * as such in paca->irq_happened, so we need to resync this. 213 + */ 214 + void notrace restore_interrupts(void) 215 + { 216 + if (irqs_disabled()) { 217 + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 218 + local_irq_enable(); 219 + } else 220 + __hard_irq_enable(); 221 + } 222 + 194 223 #endif /* CONFIG_PPC_BOOK3E */ 195 224 196 225 void replay_soft_interrupts(void) ··· 223 214 struct pt_regs regs; 224 215 225 216 ppc_save_regs(&regs); 226 - regs.softe = IRQS_ALL_DISABLED; 217 + regs.softe = IRQS_ENABLED; 227 218 228 219 again: 229 220 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) ··· 273 264 274 265 if (happened & PACA_IRQ_EE) { 275 266 local_paca->irq_happened &= ~PACA_IRQ_EE; 276 - regs.trap = 0x500; 277 - do_IRQ(&regs); 278 - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) 279 - hard_irq_disable(); 280 - } 281 - 282 - /* 283 - * Check if an EPR external interrupt happened this bit is typically 284 - * set if we need to handle another "edge" interrupt from within the 285 - * MPIC "EPR" handler. 286 - */ 287 - if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) { 288 - local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; 289 267 regs.trap = 0x500; 290 268 do_IRQ(&regs); 291 269 if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) ··· 364 368 } 365 369 } 366 370 371 + /* 372 + * Disable preempt here, so that the below preempt_enable will 373 + * perform resched if required (a replayed interrupt may set 374 + * need_resched). 375 + */ 376 + preempt_disable(); 367 377 irq_soft_mask_set(IRQS_ALL_DISABLED); 368 378 trace_hardirqs_off(); 369 379 ··· 379 377 trace_hardirqs_on(); 380 378 irq_soft_mask_set(IRQS_ENABLED); 381 379 __hard_irq_enable(); 380 + preempt_enable(); 382 381 } 383 382 EXPORT_SYMBOL(arch_local_irq_restore); 384 - 385 - /* 386 - * This is specifically called by assembly code to re-enable interrupts 387 - * if they are currently disabled. This is typically called before 388 - * schedule() or do_signal() when returning to userspace. We do it 389 - * in C to avoid the burden of dealing with lockdep etc... 390 - * 391 - * NOTE: This is called with interrupts hard disabled but not marked 392 - * as such in paca->irq_happened, so we need to resync this. 393 - */ 394 - void notrace restore_interrupts(void) 395 - { 396 - if (irqs_disabled()) { 397 - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 398 - local_irq_enable(); 399 - } else 400 - __hard_irq_enable(); 401 - } 402 383 403 384 /* 404 385 * This is a helper to use when about to go into idle low-power

+1 -2

arch/powerpc/kernel/l2cr_6xx.S

··· 256 256 sync 257 257 258 258 /* Restore MSR (restores EE and DR bits to original state) */ 259 - SYNC 260 259 mtmsr r7 261 260 isync 262 261 ··· 376 377 1: bdnz 1b 377 378 378 379 /* Restore MSR (restores EE and DR bits to original state) */ 379 - 4: SYNC 380 + 4: 380 381 mtmsr r7 381 382 isync 382 383 blr

-48

arch/powerpc/kernel/misc_32.S

··· 215 215 216 216 #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */ 217 217 218 - /* 219 - * complement mask on the msr then "or" some values on. 220 - * _nmask_and_or_msr(nmask, value_to_or) 221 - */ 222 - _GLOBAL(_nmask_and_or_msr) 223 - mfmsr r0 /* Get current msr */ 224 - andc r0,r0,r3 /* And off the bits set in r3 (first parm) */ 225 - or r0,r0,r4 /* Or on the bits in r4 (second parm) */ 226 - SYNC /* Some chip revs have problems here... */ 227 - mtmsr r0 /* Update machine state */ 228 - isync 229 - blr /* Done */ 230 - 231 218 #ifdef CONFIG_40x 232 219 233 220 /* ··· 254 267 _ASM_NOKPROBE_SYMBOL(real_writeb) 255 268 256 269 #endif /* CONFIG_40x */ 257 - 258 - 259 - /* 260 - * Flush instruction cache. 261 - * This is a no-op on the 601. 262 - */ 263 - #ifndef CONFIG_PPC_8xx 264 - _GLOBAL(flush_instruction_cache) 265 - #if defined(CONFIG_4xx) 266 - lis r3, KERNELBASE@h 267 - iccci 0,r3 268 - #elif defined(CONFIG_FSL_BOOKE) 269 - #ifdef CONFIG_E200 270 - mfspr r3,SPRN_L1CSR0 271 - ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC 272 - /* msync; isync recommended here */ 273 - mtspr SPRN_L1CSR0,r3 274 - isync 275 - blr 276 - #endif 277 - mfspr r3,SPRN_L1CSR1 278 - ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR 279 - mtspr SPRN_L1CSR1,r3 280 - #elif defined(CONFIG_PPC_BOOK3S_601) 281 - blr /* for 601, do nothing */ 282 - #else 283 - /* 603/604 processor - use invalidate-all bit in HID0 */ 284 - mfspr r3,SPRN_HID0 285 - ori r3,r3,HID0_ICFI 286 - mtspr SPRN_HID0,r3 287 - #endif /* CONFIG_4xx */ 288 - isync 289 - blr 290 - EXPORT_SYMBOL(flush_instruction_cache) 291 - #endif /* CONFIG_PPC_8xx */ 292 270 293 271 /* 294 272 * Copy a whole page. We use the dcbz instruction on the destination

-1

arch/powerpc/kernel/misc_64.S

··· 365 365 366 366 li r4,KEXEC_STATE_REAL_MODE 367 367 stb r4,PACAKEXECSTATE(r13) 368 - SYNC 369 368 370 369 b kexec_wait 371 370

+85 -64

arch/powerpc/kernel/process.c

··· 124 124 125 125 newmsr = oldmsr | bits; 126 126 127 - #ifdef CONFIG_VSX 128 127 if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) 129 128 newmsr |= MSR_VSX; 130 - #endif 131 129 132 130 if (oldmsr != newmsr) 133 131 mtmsr_isync(newmsr); ··· 142 144 143 145 newmsr = oldmsr & ~bits; 144 146 145 - #ifdef CONFIG_VSX 146 147 if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) 147 148 newmsr &= ~MSR_VSX; 148 - #endif 149 149 150 150 if (oldmsr != newmsr) 151 151 mtmsr_isync(newmsr); ··· 158 162 save_fpu(tsk); 159 163 msr = tsk->thread.regs->msr; 160 164 msr &= ~(MSR_FP|MSR_FE0|MSR_FE1); 161 - #ifdef CONFIG_VSX 162 165 if (cpu_has_feature(CPU_FTR_VSX)) 163 166 msr &= ~MSR_VSX; 164 - #endif 165 167 tsk->thread.regs->msr = msr; 166 168 } 167 169 ··· 229 235 } 230 236 } 231 237 EXPORT_SYMBOL(enable_kernel_fp); 238 + #else 239 + static inline void __giveup_fpu(struct task_struct *tsk) { } 232 240 #endif /* CONFIG_PPC_FPU */ 233 241 234 242 #ifdef CONFIG_ALTIVEC ··· 241 245 save_altivec(tsk); 242 246 msr = tsk->thread.regs->msr; 243 247 msr &= ~MSR_VEC; 244 - #ifdef CONFIG_VSX 245 248 if (cpu_has_feature(CPU_FTR_VSX)) 246 249 msr &= ~MSR_VSX; 247 - #endif 248 250 tsk->thread.regs->msr = msr; 249 251 } 250 252 ··· 408 414 409 415 static int __init init_msr_all_available(void) 410 416 { 411 - #ifdef CONFIG_PPC_FPU 412 - msr_all_available |= MSR_FP; 413 - #endif 414 - #ifdef CONFIG_ALTIVEC 417 + if (IS_ENABLED(CONFIG_PPC_FPU)) 418 + msr_all_available |= MSR_FP; 415 419 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 416 420 msr_all_available |= MSR_VEC; 417 - #endif 418 - #ifdef CONFIG_VSX 419 421 if (cpu_has_feature(CPU_FTR_VSX)) 420 422 msr_all_available |= MSR_VSX; 421 - #endif 422 - #ifdef CONFIG_SPE 423 423 if (cpu_has_feature(CPU_FTR_SPE)) 424 424 msr_all_available |= MSR_SPE; 425 - #endif 426 425 427 426 return 0; 428 427 } ··· 439 452 440 453 WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); 441 454 442 - #ifdef CONFIG_PPC_FPU 443 455 if (usermsr & MSR_FP) 444 456 __giveup_fpu(tsk); 445 - #endif 446 - #ifdef CONFIG_ALTIVEC 447 457 if (usermsr & MSR_VEC) 448 458 __giveup_altivec(tsk); 449 - #endif 450 - #ifdef CONFIG_SPE 451 459 if (usermsr & MSR_SPE) 452 460 __giveup_spe(tsk); 453 - #endif 454 461 455 462 msr_check_and_clear(msr_all_available); 456 463 } ··· 490 509 static void do_restore_altivec(void) { } 491 510 #endif /* CONFIG_ALTIVEC */ 492 511 493 - #ifdef CONFIG_VSX 494 512 static bool should_restore_vsx(void) 495 513 { 496 514 if (cpu_has_feature(CPU_FTR_VSX)) 497 515 return true; 498 516 return false; 499 517 } 518 + #ifdef CONFIG_VSX 500 519 static void do_restore_vsx(void) 501 520 { 502 521 current->thread.used_vsr = 1; 503 522 } 504 523 #else 505 - static bool should_restore_vsx(void) { return false; } 506 524 static void do_restore_vsx(void) { } 507 525 #endif /* CONFIG_VSX */ 508 526 ··· 561 581 regs->msr |= new_msr | fpexc_mode; 562 582 } 563 583 } 564 - #endif 584 + #endif /* CONFIG_PPC_BOOK3S_64 */ 565 585 566 586 static void save_all(struct task_struct *tsk) 567 587 { ··· 622 642 (void __user *)address); 623 643 } 624 644 #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ 645 + 646 + static void do_break_handler(struct pt_regs *regs) 647 + { 648 + struct arch_hw_breakpoint null_brk = {0}; 649 + struct arch_hw_breakpoint *info; 650 + struct ppc_inst instr = ppc_inst(0); 651 + int type = 0; 652 + int size = 0; 653 + unsigned long ea; 654 + int i; 655 + 656 + /* 657 + * If underneath hw supports only one watchpoint, we know it 658 + * caused exception. 8xx also falls into this category. 659 + */ 660 + if (nr_wp_slots() == 1) { 661 + __set_breakpoint(0, &null_brk); 662 + current->thread.hw_brk[0] = null_brk; 663 + current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED; 664 + return; 665 + } 666 + 667 + /* Otherwise findout which DAWR caused exception and disable it. */ 668 + wp_get_instr_detail(regs, &instr, &type, &size, &ea); 669 + 670 + for (i = 0; i < nr_wp_slots(); i++) { 671 + info = &current->thread.hw_brk[i]; 672 + if (!info->address) 673 + continue; 674 + 675 + if (wp_check_constraints(regs, instr, ea, type, size, info)) { 676 + __set_breakpoint(i, &null_brk); 677 + current->thread.hw_brk[i] = null_brk; 678 + current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED; 679 + } 680 + } 681 + } 682 + 625 683 void do_break (struct pt_regs *regs, unsigned long address, 626 684 unsigned long error_code) 627 685 { ··· 670 652 671 653 if (debugger_break_match(regs)) 672 654 return; 655 + 656 + /* 657 + * We reach here only when watchpoint exception is generated by ptrace 658 + * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set, 659 + * watchpoint is already handled by hw_breakpoint_handler() so we don't 660 + * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set, 661 + * we need to manually handle the watchpoint here. 662 + */ 663 + if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) 664 + do_break_handler(regs); 673 665 674 666 /* Deliver the signal to userspace */ 675 667 force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); ··· 811 783 static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) 812 784 { 813 785 mtspr(SPRN_DAC1, dabr); 814 - #ifdef CONFIG_PPC_47x 815 - isync(); 816 - #endif 786 + if (IS_ENABLED(CONFIG_PPC_47x)) 787 + isync(); 817 788 return 0; 818 789 } 819 790 #elif defined(CONFIG_PPC_BOOK3S) ··· 1283 1256 restore_math(current->thread.regs); 1284 1257 1285 1258 /* 1286 - * The copy-paste buffer can only store into foreign real 1287 - * addresses, so unprivileged processes can not see the 1288 - * data or use it in any way unless they have foreign real 1289 - * mappings. If the new process has the foreign real address 1290 - * mappings, we must issue a cp_abort to clear any state and 1291 - * prevent snooping, corruption or a covert channel. 1259 + * On POWER9 the copy-paste buffer can only paste into 1260 + * foreign real addresses, so unprivileged processes can not 1261 + * see the data or use it in any way unless they have 1262 + * foreign real mappings. If the new process has the foreign 1263 + * real address mappings, we must issue a cp_abort to clear 1264 + * any state and prevent snooping, corruption or a covert 1265 + * channel. ISA v3.1 supports paste into local memory. 1292 1266 */ 1293 1267 if (current->mm && 1294 - atomic_read(&current->mm->context.vas_windows)) 1268 + (cpu_has_feature(CPU_FTR_ARCH_31) || 1269 + atomic_read(&current->mm->context.vas_windows))) 1295 1270 asm volatile(PPC_CP_ABORT); 1296 1271 } 1297 1272 #endif /* CONFIG_PPC_BOOK3S_64 */ ··· 1482 1453 trap = TRAP(regs); 1483 1454 if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) 1484 1455 pr_cont("CFAR: "REG" ", regs->orig_gpr3); 1485 - if (trap == 0x200 || trap == 0x300 || trap == 0x600) 1486 - #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) 1487 - pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); 1488 - #else 1489 - pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); 1490 - #endif 1456 + if (trap == 0x200 || trap == 0x300 || trap == 0x600) { 1457 + if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) 1458 + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); 1459 + else 1460 + pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); 1461 + } 1462 + 1491 1463 #ifdef CONFIG_PPC64 1492 1464 pr_cont("IRQMASK: %lx ", regs->softe); 1493 1465 #endif ··· 1505 1475 break; 1506 1476 } 1507 1477 pr_cont("\n"); 1508 - #ifdef CONFIG_KALLSYMS 1509 1478 /* 1510 1479 * Lookup NIP late so we have the best change of getting the 1511 1480 * above info out without failing 1512 1481 */ 1513 - printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); 1514 - printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); 1515 - #endif 1482 + if (IS_ENABLED(CONFIG_KALLSYMS)) { 1483 + printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); 1484 + printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); 1485 + } 1516 1486 show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); 1517 1487 if (!user_mode(regs)) 1518 1488 show_instructions(regs); ··· 1761 1731 #ifdef CONFIG_PPC64 1762 1732 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ 1763 1733 1764 - #ifdef CONFIG_PPC_BOOK3S_64 1765 - if (!radix_enabled()) 1734 + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled()) 1766 1735 preload_new_slb_context(start, sp); 1767 - #endif 1768 1736 #endif 1769 1737 1770 1738 /* ··· 1894 1866 * fpexc_mode. fpexc_mode is also used for setting FP exception 1895 1867 * mode (asyn, precise, disabled) for 'Classic' FP. */ 1896 1868 if (val & PR_FP_EXC_SW_ENABLE) { 1897 - #ifdef CONFIG_SPE 1898 1869 if (cpu_has_feature(CPU_FTR_SPE)) { 1899 1870 /* 1900 1871 * When the sticky exception bits are set ··· 1907 1880 * anyway to restore the prctl settings from 1908 1881 * the saved environment. 1909 1882 */ 1883 + #ifdef CONFIG_SPE 1910 1884 tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); 1911 1885 tsk->thread.fpexc_mode = val & 1912 1886 (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); 1887 + #endif 1913 1888 return 0; 1914 1889 } else { 1915 1890 return -EINVAL; 1916 1891 } 1917 - #else 1918 - return -EINVAL; 1919 - #endif 1920 1892 } 1921 1893 1922 1894 /* on a CONFIG_SPE this does not hurt us. The bits that ··· 1934 1908 1935 1909 int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) 1936 1910 { 1937 - unsigned int val; 1911 + unsigned int val = 0; 1938 1912 1939 - if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) 1940 - #ifdef CONFIG_SPE 1913 + if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { 1941 1914 if (cpu_has_feature(CPU_FTR_SPE)) { 1942 1915 /* 1943 1916 * When the sticky exception bits are set ··· 1950 1925 * anyway to restore the prctl settings from 1951 1926 * the saved environment. 1952 1927 */ 1928 + #ifdef CONFIG_SPE 1953 1929 tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); 1954 1930 val = tsk->thread.fpexc_mode; 1931 + #endif 1955 1932 } else 1956 1933 return -EINVAL; 1957 - #else 1958 - return -EINVAL; 1959 - #endif 1960 - else 1934 + } else { 1961 1935 val = __unpack_fe01(tsk->thread.fpexc_mode); 1936 + } 1962 1937 return put_user(val, (unsigned int __user *) adr); 1963 1938 } 1964 1939 ··· 2127 2102 unsigned long sp, ip, lr, newsp; 2128 2103 int count = 0; 2129 2104 int firstframe = 1; 2130 - #ifdef CONFIG_FUNCTION_GRAPH_TRACER 2131 2105 unsigned long ret_addr; 2132 2106 int ftrace_idx = 0; 2133 - #endif 2134 2107 2135 2108 if (tsk == NULL) 2136 2109 tsk = current; ··· 2156 2133 if (!firstframe || ip != lr) { 2157 2134 printk("%s["REG"] ["REG"] %pS", 2158 2135 loglvl, sp, ip, (void *)ip); 2159 - #ifdef CONFIG_FUNCTION_GRAPH_TRACER 2160 2136 ret_addr = ftrace_graph_ret_addr(current, 2161 2137 &ftrace_idx, ip, stack); 2162 2138 if (ret_addr != ip) 2163 2139 pr_cont(" (%pS)", (void *)ret_addr); 2164 - #endif 2165 2140 if (firstframe) 2166 2141 pr_cont(" (unreliable)"); 2167 2142 pr_cont("\n");

+5

arch/powerpc/kernel/prom.c

··· 776 776 limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); 777 777 memblock_enforce_memory_limit(limit); 778 778 779 + #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) 780 + if (!early_radix_enabled()) 781 + memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS)); 782 + #endif 783 + 779 784 memblock_allow_resize(); 780 785 memblock_dump_all(); 781 786

+13 -4

arch/powerpc/kernel/prom_init.c

··· 2422 2422 u32 width, height, pitch, addr; 2423 2423 2424 2424 prom_printf("Setting btext !\n"); 2425 - prom_getprop(node, "width", &width, 4); 2426 - prom_getprop(node, "height", &height, 4); 2427 - prom_getprop(node, "linebytes", &pitch, 4); 2428 - prom_getprop(node, "address", &addr, 4); 2425 + 2426 + if (prom_getprop(node, "width", &width, 4) == PROM_ERROR) 2427 + return; 2428 + 2429 + if (prom_getprop(node, "height", &height, 4) == PROM_ERROR) 2430 + return; 2431 + 2432 + if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR) 2433 + return; 2434 + 2435 + if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR) 2436 + return; 2437 + 2429 2438 prom_printf("W=%d H=%d LB=%d addr=0x%x\n", 2430 2439 width, height, pitch, addr); 2431 2440 btext_setup_display(width, height, 8, pitch, addr);

+7 -2

arch/powerpc/kernel/ptrace/ptrace-noadv.c

··· 57 57 } else { 58 58 dbginfo->features = 0; 59 59 } 60 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 61 + dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31; 60 62 } 61 63 62 64 int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, ··· 219 217 return -EIO; 220 218 221 219 brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); 222 - brk.type = HW_BRK_TYPE_TRANSLATE; 220 + brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL; 223 221 brk.len = DABR_MAX_LEN; 222 + brk.hw_len = DABR_MAX_LEN; 224 223 if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) 225 224 brk.type |= HW_BRK_TYPE_READ; 226 225 if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) ··· 289 286 } 290 287 return ret; 291 288 #else /* CONFIG_HAVE_HW_BREAKPOINT */ 292 - if (child->thread.hw_brk[data - 1].address == 0) 289 + if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) && 290 + child->thread.hw_brk[data - 1].address == 0) 293 291 return -ENOENT; 294 292 295 293 child->thread.hw_brk[data - 1].address = 0; 296 294 child->thread.hw_brk[data - 1].type = 0; 295 + child->thread.hw_brk[data - 1].flags = 0; 297 296 #endif /* CONFIG_HAVE_HW_BREAKPOINT */ 298 297 299 298 return 0;

+153

arch/powerpc/kernel/rtas.c

··· 992 992 return NULL; 993 993 } 994 994 995 + #ifdef CONFIG_PPC_RTAS_FILTER 996 + 997 + /* 998 + * The sys_rtas syscall, as originally designed, allows root to pass 999 + * arbitrary physical addresses to RTAS calls. A number of RTAS calls 1000 + * can be abused to write to arbitrary memory and do other things that 1001 + * are potentially harmful to system integrity, and thus should only 1002 + * be used inside the kernel and not exposed to userspace. 1003 + * 1004 + * All known legitimate users of the sys_rtas syscall will only ever 1005 + * pass addresses that fall within the RMO buffer, and use a known 1006 + * subset of RTAS calls. 1007 + * 1008 + * Accordingly, we filter RTAS requests to check that the call is 1009 + * permitted, and that provided pointers fall within the RMO buffer. 1010 + * The rtas_filters list contains an entry for each permitted call, 1011 + * with the indexes of the parameters which are expected to contain 1012 + * addresses and sizes of buffers allocated inside the RMO buffer. 1013 + */ 1014 + struct rtas_filter { 1015 + const char *name; 1016 + int token; 1017 + /* Indexes into the args buffer, -1 if not used */ 1018 + int buf_idx1; 1019 + int size_idx1; 1020 + int buf_idx2; 1021 + int size_idx2; 1022 + 1023 + int fixed_size; 1024 + }; 1025 + 1026 + static struct rtas_filter rtas_filters[] __ro_after_init = { 1027 + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, 1028 + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ 1029 + { "display-character", -1, -1, -1, -1, -1 }, 1030 + { "ibm,display-message", -1, 0, -1, -1, -1 }, 1031 + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, 1032 + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, 1033 + { "ibm,open-errinct", -1, -1, -1, -1, -1 }, 1034 + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, 1035 + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, 1036 + { "ibm,get-indices", -1, 2, 3, -1, -1 }, 1037 + { "get-power-level", -1, -1, -1, -1, -1 }, 1038 + { "get-sensor-state", -1, -1, -1, -1, -1 }, 1039 + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, 1040 + { "get-time-of-day", -1, -1, -1, -1, -1 }, 1041 + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, 1042 + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, 1043 + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, 1044 + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, 1045 + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, 1046 + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, 1047 + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, 1048 + { "set-indicator", -1, -1, -1, -1, -1 }, 1049 + { "set-power-level", -1, -1, -1, -1, -1 }, 1050 + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, 1051 + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, 1052 + { "set-time-of-day", -1, -1, -1, -1, -1 }, 1053 + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, 1054 + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, 1055 + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, 1056 + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, 1057 + }; 1058 + 1059 + static bool in_rmo_buf(u32 base, u32 end) 1060 + { 1061 + return base >= rtas_rmo_buf && 1062 + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && 1063 + base <= end && 1064 + end >= rtas_rmo_buf && 1065 + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); 1066 + } 1067 + 1068 + static bool block_rtas_call(int token, int nargs, 1069 + struct rtas_args *args) 1070 + { 1071 + int i; 1072 + 1073 + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { 1074 + struct rtas_filter *f = &rtas_filters[i]; 1075 + u32 base, size, end; 1076 + 1077 + if (token != f->token) 1078 + continue; 1079 + 1080 + if (f->buf_idx1 != -1) { 1081 + base = be32_to_cpu(args->args[f->buf_idx1]); 1082 + if (f->size_idx1 != -1) 1083 + size = be32_to_cpu(args->args[f->size_idx1]); 1084 + else if (f->fixed_size) 1085 + size = f->fixed_size; 1086 + else 1087 + size = 1; 1088 + 1089 + end = base + size - 1; 1090 + if (!in_rmo_buf(base, end)) 1091 + goto err; 1092 + } 1093 + 1094 + if (f->buf_idx2 != -1) { 1095 + base = be32_to_cpu(args->args[f->buf_idx2]); 1096 + if (f->size_idx2 != -1) 1097 + size = be32_to_cpu(args->args[f->size_idx2]); 1098 + else if (f->fixed_size) 1099 + size = f->fixed_size; 1100 + else 1101 + size = 1; 1102 + end = base + size - 1; 1103 + 1104 + /* 1105 + * Special case for ibm,configure-connector where the 1106 + * address can be 0 1107 + */ 1108 + if (!strcmp(f->name, "ibm,configure-connector") && 1109 + base == 0) 1110 + return false; 1111 + 1112 + if (!in_rmo_buf(base, end)) 1113 + goto err; 1114 + } 1115 + 1116 + return false; 1117 + } 1118 + 1119 + err: 1120 + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); 1121 + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", 1122 + token, nargs, current->comm); 1123 + return true; 1124 + } 1125 + 1126 + #else 1127 + 1128 + static bool block_rtas_call(int token, int nargs, 1129 + struct rtas_args *args) 1130 + { 1131 + return false; 1132 + } 1133 + 1134 + #endif /* CONFIG_PPC_RTAS_FILTER */ 1135 + 995 1136 /* We assume to be passed big endian arguments */ 996 1137 SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) 997 1138 { ··· 1169 1028 1170 1029 args.rets = &args.args[nargs]; 1171 1030 memset(args.rets, 0, nret * sizeof(rtas_arg_t)); 1031 + 1032 + if (block_rtas_call(token, nargs, &args)) 1033 + return -EINVAL; 1172 1034 1173 1035 /* Need to handle ibm,suspend_me call specially */ 1174 1036 if (token == ibm_suspend_me_token) { ··· 1234 1090 unsigned long rtas_region = RTAS_INSTANTIATE_MAX; 1235 1091 u32 base, size, entry; 1236 1092 int no_base, no_size, no_entry; 1093 + #ifdef CONFIG_PPC_RTAS_FILTER 1094 + int i; 1095 + #endif 1237 1096 1238 1097 /* Get RTAS dev node and fill up our "rtas" structure with infos 1239 1098 * about it. ··· 1275 1128 1276 1129 #ifdef CONFIG_RTAS_ERROR_LOGGING 1277 1130 rtas_last_error_token = rtas_token("rtas-last-error"); 1131 + #endif 1132 + 1133 + #ifdef CONFIG_PPC_RTAS_FILTER 1134 + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { 1135 + rtas_filters[i].token = rtas_token(rtas_filters[i].name); 1136 + } 1278 1137 #endif 1279 1138 } 1280 1139

+24 -10

arch/powerpc/kernel/security.c

··· 430 430 431 431 static void update_branch_cache_flush(void) 432 432 { 433 + u32 *site; 434 + 433 435 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 436 + site = &patch__call_kvm_flush_link_stack; 434 437 // This controls the branch from guest_exit_cont to kvm_flush_link_stack 435 438 if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { 436 - patch_instruction_site(&patch__call_kvm_flush_link_stack, 437 - ppc_inst(PPC_INST_NOP)); 439 + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); 438 440 } else { 439 441 // Could use HW flush, but that could also flush count cache 440 - patch_branch_site(&patch__call_kvm_flush_link_stack, 441 - (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); 442 + patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); 442 443 } 443 444 #endif 445 + 446 + // Patch out the bcctr first, then nop the rest 447 + site = &patch__call_flush_branch_caches3; 448 + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); 449 + site = &patch__call_flush_branch_caches2; 450 + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); 451 + site = &patch__call_flush_branch_caches1; 452 + patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); 444 453 445 454 // This controls the branch from _switch to flush_branch_caches 446 455 if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && 447 456 link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { 448 - patch_instruction_site(&patch__call_flush_branch_caches, 449 - ppc_inst(PPC_INST_NOP)); 457 + // Nothing to be done 458 + 450 459 } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW && 451 460 link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) { 452 - patch_instruction_site(&patch__call_flush_branch_caches, 453 - ppc_inst(PPC_INST_BCCTR_FLUSH)); 461 + // Patch in the bcctr last 462 + site = &patch__call_flush_branch_caches1; 463 + patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff 464 + site = &patch__call_flush_branch_caches2; 465 + patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9 466 + site = &patch__call_flush_branch_caches3; 467 + patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH)); 468 + 454 469 } else { 455 - patch_branch_site(&patch__call_flush_branch_caches, 456 - (u64)&flush_branch_caches, BRANCH_SET_LINK); 470 + patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK); 457 471 458 472 // If we just need to flush the link stack, early return 459 473 if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {

+1 -1

arch/powerpc/kernel/setup_32.c

··· 223 223 dcache_bsize = cur_cpu_spec->dcache_bsize; 224 224 icache_bsize = cur_cpu_spec->icache_bsize; 225 225 ucache_bsize = 0; 226 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200)) 226 + if (IS_ENABLED(CONFIG_E200)) 227 227 ucache_bsize = icache_bsize = dcache_bsize; 228 228 }

+95 -10

arch/powerpc/kernel/setup_64.c

··· 66 66 #include <asm/feature-fixups.h> 67 67 #include <asm/kup.h> 68 68 #include <asm/early_ioremap.h> 69 + #include <asm/pgalloc.h> 69 70 70 71 #include "setup.h" 71 72 ··· 757 756 } 758 757 759 758 #ifdef CONFIG_SMP 760 - #define PCPU_DYN_SIZE () 761 - 762 - static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) 759 + /** 760 + * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu 761 + * @cpu: cpu to allocate for 762 + * @size: size allocation in bytes 763 + * @align: alignment 764 + * 765 + * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper 766 + * does the right thing for NUMA regardless of the current 767 + * configuration. 768 + * 769 + * RETURNS: 770 + * Pointer to the allocated area on success, NULL on failure. 771 + */ 772 + static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, 773 + size_t align) 763 774 { 764 - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), 765 - MEMBLOCK_ALLOC_ACCESSIBLE, 766 - early_cpu_to_node(cpu)); 775 + const unsigned long goal = __pa(MAX_DMA_ADDRESS); 776 + #ifdef CONFIG_NEED_MULTIPLE_NODES 777 + int node = early_cpu_to_node(cpu); 778 + void *ptr; 767 779 780 + if (!node_online(node) || !NODE_DATA(node)) { 781 + ptr = memblock_alloc_from(size, align, goal); 782 + pr_info("cpu %d has no node %d or node-local memory\n", 783 + cpu, node); 784 + pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", 785 + cpu, size, __pa(ptr)); 786 + } else { 787 + ptr = memblock_alloc_try_nid(size, align, goal, 788 + MEMBLOCK_ALLOC_ACCESSIBLE, node); 789 + pr_debug("per cpu data for cpu%d %lu bytes on node%d at " 790 + "%016lx\n", cpu, size, node, __pa(ptr)); 791 + } 792 + return ptr; 793 + #else 794 + return memblock_alloc_from(size, align, goal); 795 + #endif 768 796 } 769 797 770 - static void __init pcpu_fc_free(void *ptr, size_t size) 798 + static void __init pcpu_free_bootmem(void *ptr, size_t size) 771 799 { 772 800 memblock_free(__pa(ptr), size); 773 801 } ··· 812 782 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 813 783 EXPORT_SYMBOL(__per_cpu_offset); 814 784 785 + static void __init pcpu_populate_pte(unsigned long addr) 786 + { 787 + pgd_t *pgd = pgd_offset_k(addr); 788 + p4d_t *p4d; 789 + pud_t *pud; 790 + pmd_t *pmd; 791 + 792 + p4d = p4d_offset(pgd, addr); 793 + if (p4d_none(*p4d)) { 794 + pud_t *new; 795 + 796 + new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); 797 + if (!new) 798 + goto err_alloc; 799 + p4d_populate(&init_mm, p4d, new); 800 + } 801 + 802 + pud = pud_offset(p4d, addr); 803 + if (pud_none(*pud)) { 804 + pmd_t *new; 805 + 806 + new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); 807 + if (!new) 808 + goto err_alloc; 809 + pud_populate(&init_mm, pud, new); 810 + } 811 + 812 + pmd = pmd_offset(pud, addr); 813 + if (!pmd_present(*pmd)) { 814 + pte_t *new; 815 + 816 + new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); 817 + if (!new) 818 + goto err_alloc; 819 + pmd_populate_kernel(&init_mm, pmd, new); 820 + } 821 + 822 + return; 823 + 824 + err_alloc: 825 + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", 826 + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); 827 + } 828 + 829 + 815 830 void __init setup_per_cpu_areas(void) 816 831 { 817 832 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 818 833 size_t atom_size; 819 834 unsigned long delta; 820 835 unsigned int cpu; 821 - int rc; 836 + int rc = -EINVAL; 822 837 823 838 /* 824 839 * Linear mapping is one of 4K, 1M and 16M. For 4K, no need ··· 875 800 else 876 801 atom_size = 1 << 20; 877 802 878 - rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, 879 - pcpu_fc_alloc, pcpu_fc_free); 803 + if (pcpu_chosen_fc != PCPU_FC_PAGE) { 804 + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, 805 + pcpu_alloc_bootmem, pcpu_free_bootmem); 806 + if (rc) 807 + pr_warn("PERCPU: %s allocator failed (%d), " 808 + "falling back to page size\n", 809 + pcpu_fc_names[pcpu_chosen_fc], rc); 810 + } 811 + 812 + if (rc < 0) 813 + rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, 814 + pcpu_populate_pte); 880 815 if (rc < 0) 881 816 panic("cannot initialize percpu area (err=%d)", rc); 882 817

+248 -134

arch/powerpc/kernel/smp.c

··· 75 75 76 76 struct task_struct *secondary_current; 77 77 bool has_big_cores; 78 + bool coregroup_enabled; 78 79 79 80 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); 80 81 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); 81 82 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); 82 83 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 84 + DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); 83 85 84 86 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 85 87 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); 86 88 EXPORT_PER_CPU_SYMBOL(cpu_core_map); 87 89 EXPORT_SYMBOL_GPL(has_big_cores); 90 + 91 + enum { 92 + #ifdef CONFIG_SCHED_SMT 93 + smt_idx, 94 + #endif 95 + cache_idx, 96 + mc_idx, 97 + die_idx, 98 + }; 88 99 89 100 #define MAX_THREAD_LIST_SIZE 8 90 101 #define THREAD_GROUP_SHARE_L1 1 ··· 671 660 #endif 672 661 673 662 /* 663 + * Extends set_cpus_related. Instead of setting one CPU at a time in 664 + * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask. 665 + */ 666 + static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), 667 + struct cpumask *(*dstmask)(int)) 668 + { 669 + struct cpumask *mask; 670 + int k; 671 + 672 + mask = srcmask(j); 673 + for_each_cpu(k, srcmask(i)) 674 + cpumask_or(dstmask(k), dstmask(k), mask); 675 + 676 + if (i == j) 677 + return; 678 + 679 + mask = srcmask(i); 680 + for_each_cpu(k, srcmask(j)) 681 + cpumask_or(dstmask(k), dstmask(k), mask); 682 + } 683 + 684 + /* 674 685 * parse_thread_groups: Parses the "ibm,thread-groups" device tree 675 686 * property for the CPU device node @dn and stores 676 687 * the parsed output in the thread_groups ··· 822 789 if (err) 823 790 goto out; 824 791 825 - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), 826 - GFP_KERNEL, 827 - cpu_to_node(cpu)); 828 - 829 792 cpu_group_start = get_cpu_thread_group_start(cpu, &tg); 830 793 831 794 if (unlikely(cpu_group_start == -1)) { ··· 829 800 err = -ENODATA; 830 801 goto out; 831 802 } 803 + 804 + zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), 805 + GFP_KERNEL, cpu_to_node(cpu)); 832 806 833 807 for (i = first_thread; i < first_thread + threads_per_core; i++) { 834 808 int i_group_start = get_cpu_thread_group_start(i, &tg); ··· 850 818 of_node_put(dn); 851 819 return err; 852 820 } 821 + 822 + static bool shared_caches; 823 + 824 + #ifdef CONFIG_SCHED_SMT 825 + /* cpumask of CPUs with asymmetric SMT dependency */ 826 + static int powerpc_smt_flags(void) 827 + { 828 + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; 829 + 830 + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { 831 + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); 832 + flags |= SD_ASYM_PACKING; 833 + } 834 + return flags; 835 + } 836 + #endif 837 + 838 + /* 839 + * P9 has a slightly odd architecture where pairs of cores share an L2 cache. 840 + * This topology makes it *much* cheaper to migrate tasks between adjacent cores 841 + * since the migrated task remains cache hot. We want to take advantage of this 842 + * at the scheduler level so an extra topology level is required. 843 + */ 844 + static int powerpc_shared_cache_flags(void) 845 + { 846 + return SD_SHARE_PKG_RESOURCES; 847 + } 848 + 849 + /* 850 + * We can't just pass cpu_l2_cache_mask() directly because 851 + * returns a non-const pointer and the compiler barfs on that. 852 + */ 853 + static const struct cpumask *shared_cache_mask(int cpu) 854 + { 855 + return per_cpu(cpu_l2_cache_map, cpu); 856 + } 857 + 858 + #ifdef CONFIG_SCHED_SMT 859 + static const struct cpumask *smallcore_smt_mask(int cpu) 860 + { 861 + return cpu_smallcore_mask(cpu); 862 + } 863 + #endif 864 + 865 + static struct cpumask *cpu_coregroup_mask(int cpu) 866 + { 867 + return per_cpu(cpu_coregroup_map, cpu); 868 + } 869 + 870 + static bool has_coregroup_support(void) 871 + { 872 + return coregroup_enabled; 873 + } 874 + 875 + static const struct cpumask *cpu_mc_mask(int cpu) 876 + { 877 + return cpu_coregroup_mask(cpu); 878 + } 879 + 880 + static struct sched_domain_topology_level powerpc_topology[] = { 881 + #ifdef CONFIG_SCHED_SMT 882 + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, 883 + #endif 884 + { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, 885 + { cpu_mc_mask, SD_INIT_NAME(MC) }, 886 + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 887 + { NULL, }, 888 + }; 853 889 854 890 static int init_big_cores(void) 855 891 { ··· 961 861 GFP_KERNEL, cpu_to_node(cpu)); 962 862 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), 963 863 GFP_KERNEL, cpu_to_node(cpu)); 864 + if (has_coregroup_support()) 865 + zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu), 866 + GFP_KERNEL, cpu_to_node(cpu)); 867 + 868 + #ifdef CONFIG_NEED_MULTIPLE_NODES 964 869 /* 965 870 * numa_node_id() works after this. 966 871 */ ··· 974 869 set_cpu_numa_mem(cpu, 975 870 local_memory_node(numa_cpu_lookup_table[cpu])); 976 871 } 872 + #endif 873 + /* 874 + * cpu_core_map is now more updated and exists only since 875 + * its been exported for long. It only will have a snapshot 876 + * of cpu_cpu_mask. 877 + */ 878 + cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); 977 879 } 978 880 979 881 /* Init the cpumasks so the boot CPU is related to itself */ 980 882 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); 981 883 cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); 982 - cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); 884 + 885 + if (has_coregroup_support()) 886 + cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); 983 887 984 888 init_big_cores(); 985 889 if (has_big_cores) { ··· 1240 1126 return cache; 1241 1127 } 1242 1128 1243 - static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) 1129 + static bool update_mask_by_l2(int cpu) 1244 1130 { 1131 + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; 1245 1132 struct device_node *l2_cache, *np; 1133 + cpumask_var_t mask; 1246 1134 int i; 1247 1135 1248 1136 l2_cache = cpu_to_l2cache(cpu); 1249 - if (!l2_cache) 1250 - return false; 1137 + if (!l2_cache) { 1138 + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; 1251 1139 1252 - for_each_cpu(i, cpu_online_mask) { 1140 + /* 1141 + * If no l2cache for this CPU, assume all siblings to share 1142 + * cache with this CPU. 1143 + */ 1144 + if (has_big_cores) 1145 + sibling_mask = cpu_smallcore_mask; 1146 + 1147 + for_each_cpu(i, sibling_mask(cpu)) 1148 + set_cpus_related(cpu, i, cpu_l2_cache_mask); 1149 + 1150 + return false; 1151 + } 1152 + 1153 + alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu)); 1154 + cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu)); 1155 + 1156 + if (has_big_cores) 1157 + submask_fn = cpu_smallcore_mask; 1158 + 1159 + /* Update l2-cache mask with all the CPUs that are part of submask */ 1160 + or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask); 1161 + 1162 + /* Skip all CPUs already part of current CPU l2-cache mask */ 1163 + cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu)); 1164 + 1165 + for_each_cpu(i, mask) { 1253 1166 /* 1254 1167 * when updating the marks the current CPU has not been marked 1255 1168 * online, but we need to update the cache masks 1256 1169 */ 1257 1170 np = cpu_to_l2cache(i); 1258 - if (!np) 1259 - continue; 1260 1171 1261 - if (np == l2_cache) 1262 - set_cpus_related(cpu, i, mask_fn); 1172 + /* Skip all CPUs already part of current CPU l2-cache */ 1173 + if (np == l2_cache) { 1174 + or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask); 1175 + cpumask_andnot(mask, mask, submask_fn(i)); 1176 + } else { 1177 + cpumask_andnot(mask, mask, cpu_l2_cache_mask(i)); 1178 + } 1263 1179 1264 1180 of_node_put(np); 1265 1181 } 1266 1182 of_node_put(l2_cache); 1183 + free_cpumask_var(mask); 1267 1184 1268 1185 return true; 1269 1186 } ··· 1302 1157 #ifdef CONFIG_HOTPLUG_CPU 1303 1158 static void remove_cpu_from_masks(int cpu) 1304 1159 { 1160 + struct cpumask *(*mask_fn)(int) = cpu_sibling_mask; 1305 1161 int i; 1306 1162 1307 - /* NB: cpu_core_mask is a superset of the others */ 1308 - for_each_cpu(i, cpu_core_mask(cpu)) { 1309 - set_cpus_unrelated(cpu, i, cpu_core_mask); 1163 + if (shared_caches) 1164 + mask_fn = cpu_l2_cache_mask; 1165 + 1166 + for_each_cpu(i, mask_fn(cpu)) { 1310 1167 set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); 1311 1168 set_cpus_unrelated(cpu, i, cpu_sibling_mask); 1312 1169 if (has_big_cores) 1313 1170 set_cpus_unrelated(cpu, i, cpu_smallcore_mask); 1171 + } 1172 + 1173 + if (has_coregroup_support()) { 1174 + for_each_cpu(i, cpu_coregroup_mask(cpu)) 1175 + set_cpus_unrelated(cpu, i, cpu_coregroup_mask); 1314 1176 } 1315 1177 } 1316 1178 #endif 1317 1179 1318 1180 static inline void add_cpu_to_smallcore_masks(int cpu) 1319 1181 { 1320 - struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu); 1321 - int i, first_thread = cpu_first_thread_sibling(cpu); 1182 + int i; 1322 1183 1323 1184 if (!has_big_cores) 1324 1185 return; 1325 1186 1326 1187 cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); 1327 1188 1328 - for (i = first_thread; i < first_thread + threads_per_core; i++) { 1329 - if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map)) 1189 + for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { 1190 + if (cpu_online(i)) 1330 1191 set_cpus_related(i, cpu, cpu_smallcore_mask); 1331 1192 } 1332 1193 } 1333 1194 1334 - int get_physical_package_id(int cpu) 1195 + static void update_coregroup_mask(int cpu) 1335 1196 { 1336 - int pkg_id = cpu_to_chip_id(cpu); 1197 + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; 1198 + cpumask_var_t mask; 1199 + int coregroup_id = cpu_to_coregroup_id(cpu); 1200 + int i; 1337 1201 1338 - /* 1339 - * If the platform is PowerNV or Guest on KVM, ibm,chip-id is 1340 - * defined. Hence we would return the chip-id as the result of 1341 - * get_physical_package_id. 1342 - */ 1343 - if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) && 1344 - IS_ENABLED(CONFIG_PPC_SPLPAR)) { 1345 - struct device_node *np = of_get_cpu_node(cpu, NULL); 1346 - pkg_id = of_node_to_nid(np); 1347 - of_node_put(np); 1202 + alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu)); 1203 + cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu)); 1204 + 1205 + if (shared_caches) 1206 + submask_fn = cpu_l2_cache_mask; 1207 + 1208 + /* Update coregroup mask with all the CPUs that are part of submask */ 1209 + or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask); 1210 + 1211 + /* Skip all CPUs already part of coregroup mask */ 1212 + cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu)); 1213 + 1214 + for_each_cpu(i, mask) { 1215 + /* Skip all CPUs not part of this coregroup */ 1216 + if (coregroup_id == cpu_to_coregroup_id(i)) { 1217 + or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask); 1218 + cpumask_andnot(mask, mask, submask_fn(i)); 1219 + } else { 1220 + cpumask_andnot(mask, mask, cpu_coregroup_mask(i)); 1221 + } 1348 1222 } 1349 - 1350 - return pkg_id; 1223 + free_cpumask_var(mask); 1351 1224 } 1352 - EXPORT_SYMBOL_GPL(get_physical_package_id); 1353 1225 1354 1226 static void add_cpu_to_masks(int cpu) 1355 1227 { 1356 1228 int first_thread = cpu_first_thread_sibling(cpu); 1357 - int pkg_id = get_physical_package_id(cpu); 1358 1229 int i; 1359 1230 1360 1231 /* ··· 1384 1223 set_cpus_related(i, cpu, cpu_sibling_mask); 1385 1224 1386 1225 add_cpu_to_smallcore_masks(cpu); 1387 - /* 1388 - * Copy the thread sibling mask into the cache sibling mask 1389 - * and mark any CPUs that share an L2 with this CPU. 1390 - */ 1391 - for_each_cpu(i, cpu_sibling_mask(cpu)) 1392 - set_cpus_related(cpu, i, cpu_l2_cache_mask); 1393 - update_mask_by_l2(cpu, cpu_l2_cache_mask); 1226 + update_mask_by_l2(cpu); 1394 1227 1395 - /* 1396 - * Copy the cache sibling mask into core sibling mask and mark 1397 - * any CPUs on the same chip as this CPU. 1398 - */ 1399 - for_each_cpu(i, cpu_l2_cache_mask(cpu)) 1400 - set_cpus_related(cpu, i, cpu_core_mask); 1401 - 1402 - if (pkg_id == -1) 1403 - return; 1404 - 1405 - for_each_cpu(i, cpu_online_mask) 1406 - if (get_physical_package_id(i) == pkg_id) 1407 - set_cpus_related(cpu, i, cpu_core_mask); 1228 + if (has_coregroup_support()) 1229 + update_coregroup_mask(cpu); 1408 1230 } 1409 - 1410 - static bool shared_caches; 1411 1231 1412 1232 /* Activate a secondary processor. */ 1413 1233 void start_secondary(void *unused) 1414 1234 { 1415 1235 unsigned int cpu = smp_processor_id(); 1416 - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; 1417 1236 1418 1237 mmgrab(&init_mm); 1419 1238 current->active_mm = &init_mm; ··· 1419 1278 /* Update topology CPU masks */ 1420 1279 add_cpu_to_masks(cpu); 1421 1280 1422 - if (has_big_cores) 1423 - sibling_mask = cpu_smallcore_mask; 1424 1281 /* 1425 1282 * Check for any shared caches. Note that this must be done on a 1426 1283 * per-core basis because one core in the pair might be disabled. 1427 1284 */ 1428 - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) 1429 - shared_caches = true; 1285 + if (!shared_caches) { 1286 + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; 1287 + struct cpumask *mask = cpu_l2_cache_mask(cpu); 1288 + 1289 + if (has_big_cores) 1290 + sibling_mask = cpu_smallcore_mask; 1291 + 1292 + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) 1293 + shared_caches = true; 1294 + } 1430 1295 1431 1296 set_numa_node(numa_cpu_lookup_table[cpu]); 1432 1297 set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); ··· 1458 1311 return 0; 1459 1312 } 1460 1313 1461 - #ifdef CONFIG_SCHED_SMT 1462 - /* cpumask of CPUs with asymetric SMT dependancy */ 1463 - static int powerpc_smt_flags(void) 1314 + static void fixup_topology(void) 1464 1315 { 1465 - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; 1316 + int i; 1466 1317 1467 - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { 1468 - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); 1469 - flags |= SD_ASYM_PACKING; 1318 + #ifdef CONFIG_SCHED_SMT 1319 + if (has_big_cores) { 1320 + pr_info("Big cores detected but using small core scheduling\n"); 1321 + powerpc_topology[smt_idx].mask = smallcore_smt_mask; 1470 1322 } 1471 - return flags; 1472 - } 1473 1323 #endif 1474 1324 1475 - static struct sched_domain_topology_level powerpc_topology[] = { 1476 - #ifdef CONFIG_SCHED_SMT 1477 - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, 1325 + if (!has_coregroup_support()) 1326 + powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; 1327 + 1328 + /* 1329 + * Try to consolidate topology levels here instead of 1330 + * allowing scheduler to degenerate. 1331 + * - Dont consolidate if masks are different. 1332 + * - Dont consolidate if sd_flags exists and are different. 1333 + */ 1334 + for (i = 1; i <= die_idx; i++) { 1335 + if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) 1336 + continue; 1337 + 1338 + if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && 1339 + powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) 1340 + continue; 1341 + 1342 + if (!powerpc_topology[i - 1].sd_flags) 1343 + powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; 1344 + 1345 + powerpc_topology[i].mask = powerpc_topology[i + 1].mask; 1346 + powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; 1347 + #ifdef CONFIG_SCHED_DEBUG 1348 + powerpc_topology[i].name = powerpc_topology[i + 1].name; 1478 1349 #endif 1479 - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 1480 - { NULL, }, 1481 - }; 1482 - 1483 - /* 1484 - * P9 has a slightly odd architecture where pairs of cores share an L2 cache. 1485 - * This topology makes it *much* cheaper to migrate tasks between adjacent cores 1486 - * since the migrated task remains cache hot. We want to take advantage of this 1487 - * at the scheduler level so an extra topology level is required. 1488 - */ 1489 - static int powerpc_shared_cache_flags(void) 1490 - { 1491 - return SD_SHARE_PKG_RESOURCES; 1350 + } 1492 1351 } 1493 - 1494 - /* 1495 - * We can't just pass cpu_l2_cache_mask() directly because 1496 - * returns a non-const pointer and the compiler barfs on that. 1497 - */ 1498 - static const struct cpumask *shared_cache_mask(int cpu) 1499 - { 1500 - return cpu_l2_cache_mask(cpu); 1501 - } 1502 - 1503 - #ifdef CONFIG_SCHED_SMT 1504 - static const struct cpumask *smallcore_smt_mask(int cpu) 1505 - { 1506 - return cpu_smallcore_mask(cpu); 1507 - } 1508 - #endif 1509 - 1510 - static struct sched_domain_topology_level power9_topology[] = { 1511 - #ifdef CONFIG_SCHED_SMT 1512 - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, 1513 - #endif 1514 - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, 1515 - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 1516 - { NULL, }, 1517 - }; 1518 1352 1519 1353 void __init smp_cpus_done(unsigned int max_cpus) 1520 1354 { ··· 1510 1382 1511 1383 dump_numa_cpu_topology(); 1512 1384 1513 - #ifdef CONFIG_SCHED_SMT 1514 - if (has_big_cores) { 1515 - pr_info("Big cores detected but using small core scheduling\n"); 1516 - power9_topology[0].mask = smallcore_smt_mask; 1517 - powerpc_topology[0].mask = smallcore_smt_mask; 1518 - } 1519 - #endif 1520 - /* 1521 - * If any CPU detects that it's sharing a cache with another CPU then 1522 - * use the deeper topology that is aware of this sharing. 1523 - */ 1524 - if (shared_caches) { 1525 - pr_info("Using shared cache scheduler topology\n"); 1526 - set_sched_topology(power9_topology); 1527 - } else { 1528 - pr_info("Using standard scheduler topology\n"); 1529 - set_sched_topology(powerpc_topology); 1530 - } 1385 + fixup_topology(); 1386 + set_sched_topology(powerpc_topology); 1531 1387 } 1532 1388 1533 1389 #ifdef CONFIG_HOTPLUG_CPU ··· 1541 1429 smp_ops->cpu_die(cpu); 1542 1430 } 1543 1431 1544 - void cpu_die(void) 1432 + void arch_cpu_idle_dead(void) 1545 1433 { 1434 + sched_preempt_enable_no_resched(); 1435 + 1546 1436 /* 1547 1437 * Disable on the down path. This will be re-enabled by 1548 1438 * start_secondary() via start_secondary_resume() below 1549 1439 */ 1550 1440 this_cpu_disable_ftrace(); 1551 1441 1552 - if (ppc_md.cpu_die) 1553 - ppc_md.cpu_die(); 1442 + if (smp_ops->cpu_offline_self) 1443 + smp_ops->cpu_offline_self(); 1554 1444 1555 1445 /* If we return, we re-enter start_secondary */ 1556 1446 start_secondary_resume();

+21 -28

arch/powerpc/kernel/sysfs.c

··· 32 32 33 33 static DEFINE_PER_CPU(struct cpu, cpu_devices); 34 34 35 - /* 36 - * SMT snooze delay stuff, 64-bit only for now 37 - */ 38 - 39 35 #ifdef CONFIG_PPC64 40 36 41 - /* Time in microseconds we delay before sleeping in the idle loop */ 42 - static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; 37 + /* 38 + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: 39 + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in 40 + * 2014: 41 + * 42 + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean 43 + * up the kernel code." 44 + * 45 + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this 46 + * code should be removed. 47 + */ 43 48 44 49 static ssize_t store_smt_snooze_delay(struct device *dev, 45 50 struct device_attribute *attr, 46 51 const char *buf, 47 52 size_t count) 48 53 { 49 - struct cpu *cpu = container_of(dev, struct cpu, dev); 50 - ssize_t ret; 51 - long snooze; 52 - 53 - ret = sscanf(buf, "%ld", &snooze); 54 - if (ret != 1) 55 - return -EINVAL; 56 - 57 - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; 54 + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", 55 + current->comm, current->pid); 58 56 return count; 59 57 } 60 58 ··· 60 62 struct device_attribute *attr, 61 63 char *buf) 62 64 { 63 - struct cpu *cpu = container_of(dev, struct cpu, dev); 64 - 65 - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); 65 + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", 66 + current->comm, current->pid); 67 + return sprintf(buf, "100\n"); 66 68 } 67 69 68 70 static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, ··· 70 72 71 73 static int __init setup_smt_snooze_delay(char *str) 72 74 { 73 - unsigned int cpu; 74 - long snooze; 75 - 76 75 if (!cpu_has_feature(CPU_FTR_SMT)) 77 76 return 1; 78 77 79 - snooze = simple_strtol(str, NULL, 10); 80 - for_each_possible_cpu(cpu) 81 - per_cpu(smt_snooze_delay, cpu) = snooze; 82 - 78 + pr_warn("smt-snooze-delay command line option has no effect\n"); 83 79 return 1; 84 80 } 85 81 __setup("smt-snooze-delay=", setup_smt_snooze_delay); ··· 217 225 static void sysfs_create_dscr_default(void) 218 226 { 219 227 if (cpu_has_feature(CPU_FTR_DSCR)) { 220 - int err = 0; 221 228 int cpu; 222 229 223 230 dscr_default = spr_default_dscr; 224 231 for_each_possible_cpu(cpu) 225 232 paca_ptrs[cpu]->dscr_default = dscr_default; 226 233 227 - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); 234 + device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); 228 235 } 229 236 } 230 237 #endif /* CONFIG_PPC64 */ ··· 1159 1168 for_each_possible_cpu(cpu) { 1160 1169 struct cpu *c = &per_cpu(cpu_devices, cpu); 1161 1170 1171 + #ifdef CONFIG_HOTPLUG_CPU 1162 1172 /* 1163 1173 * For now, we just see if the system supports making 1164 1174 * the RTAS calls for CPU hotplug. But, there may be a ··· 1167 1175 * CPU. For instance, the boot cpu might never be valid 1168 1176 * for hotplugging. 1169 1177 */ 1170 - if (ppc_md.cpu_die) 1178 + if (smp_ops->cpu_offline_self) 1171 1179 c->hotpluggable = 1; 1180 + #endif 1172 1181 1173 1182 if (cpu_online(cpu) || c->hotpluggable) { 1174 1183 register_cpu(c, cpu);

+57 -90

arch/powerpc/kernel/tau_6xx.c

··· 13 13 */ 14 14 15 15 #include <linux/errno.h> 16 - #include <linux/jiffies.h> 17 16 #include <linux/kernel.h> 18 17 #include <linux/param.h> 19 18 #include <linux/string.h> 20 19 #include <linux/mm.h> 21 20 #include <linux/interrupt.h> 22 21 #include <linux/init.h> 22 + #include <linux/delay.h> 23 + #include <linux/workqueue.h> 23 24 24 25 #include <asm/io.h> 25 26 #include <asm/reg.h> ··· 40 39 unsigned char grew; 41 40 } tau[NR_CPUS]; 42 41 43 - struct timer_list tau_timer; 44 - 45 - #undef DEBUG 42 + static bool tau_int_enable; 46 43 47 44 /* TODO: put these in a /proc interface, with some sanity checks, and maybe 48 45 * dynamic adjustment to minimize # of interrupts */ ··· 49 50 #define step_size 2 /* step size when temp goes out of range */ 50 51 #define window_expand 1 /* expand the window by this much */ 51 52 /* configurable values for shrinking the window */ 52 - #define shrink_timer 2*HZ /* period between shrinking the window */ 53 + #define shrink_timer 2000 /* period between shrinking the window */ 53 54 #define min_window 2 /* minimum window size, degrees C */ 54 55 55 56 static void set_thresholds(unsigned long cpu) 56 57 { 57 - #ifdef CONFIG_TAU_INT 58 - /* 59 - * setup THRM1, 60 - * threshold, valid bit, enable interrupts, interrupt when below threshold 61 - */ 62 - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); 58 + u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0; 63 59 64 - /* setup THRM2, 65 - * threshold, valid bit, enable interrupts, interrupt when above threshold 66 - */ 67 - mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); 68 - #else 69 - /* same thing but don't enable interrupts */ 70 - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); 71 - mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); 72 - #endif 60 + /* setup THRM1, threshold, valid bit, interrupt when below threshold */ 61 + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID); 62 + 63 + /* setup THRM2, threshold, valid bit, interrupt when above threshold */ 64 + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie); 73 65 } 74 66 75 67 static void TAUupdate(int cpu) 76 68 { 77 - unsigned thrm; 78 - 79 - #ifdef DEBUG 80 - printk("TAUupdate "); 81 - #endif 69 + u32 thrm; 70 + u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V; 82 71 83 72 /* if both thresholds are crossed, the step_sizes cancel out 84 73 * and the window winds up getting expanded twice. */ 85 - if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ 86 - if(thrm & THRM1_TIN){ /* crossed low threshold */ 87 - if (tau[cpu].low >= step_size){ 88 - tau[cpu].low -= step_size; 89 - tau[cpu].high -= (step_size - window_expand); 90 - } 91 - tau[cpu].grew = 1; 92 - #ifdef DEBUG 93 - printk("low threshold crossed "); 94 - #endif 74 + thrm = mfspr(SPRN_THRM1); 75 + if ((thrm & bits) == bits) { 76 + mtspr(SPRN_THRM1, 0); 77 + 78 + if (tau[cpu].low >= step_size) { 79 + tau[cpu].low -= step_size; 80 + tau[cpu].high -= (step_size - window_expand); 95 81 } 82 + tau[cpu].grew = 1; 83 + pr_debug("%s: low threshold crossed\n", __func__); 96 84 } 97 - if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ 98 - if(thrm & THRM1_TIN){ /* crossed high threshold */ 99 - if (tau[cpu].high <= 127-step_size){ 100 - tau[cpu].low += (step_size - window_expand); 101 - tau[cpu].high += step_size; 102 - } 103 - tau[cpu].grew = 1; 104 - #ifdef DEBUG 105 - printk("high threshold crossed "); 106 - #endif 85 + thrm = mfspr(SPRN_THRM2); 86 + if ((thrm & bits) == bits) { 87 + mtspr(SPRN_THRM2, 0); 88 + 89 + if (tau[cpu].high <= 127 - step_size) { 90 + tau[cpu].low += (step_size - window_expand); 91 + tau[cpu].high += step_size; 107 92 } 93 + tau[cpu].grew = 1; 94 + pr_debug("%s: high threshold crossed\n", __func__); 108 95 } 109 - 110 - #ifdef DEBUG 111 - printk("grew = %d\n", tau[cpu].grew); 112 - #endif 113 - 114 - #ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ 115 - set_thresholds(cpu); 116 - #endif 117 - 118 96 } 119 97 120 98 #ifdef CONFIG_TAU_INT ··· 116 140 static void tau_timeout(void * info) 117 141 { 118 142 int cpu; 119 - unsigned long flags; 120 143 int size; 121 144 int shrink; 122 145 123 - /* disabling interrupts *should* be okay */ 124 - local_irq_save(flags); 125 146 cpu = smp_processor_id(); 126 147 127 - #ifndef CONFIG_TAU_INT 128 - TAUupdate(cpu); 129 - #endif 148 + if (!tau_int_enable) 149 + TAUupdate(cpu); 150 + 151 + /* Stop thermal sensor comparisons and interrupts */ 152 + mtspr(SPRN_THRM3, 0); 130 153 131 154 size = tau[cpu].high - tau[cpu].low; 132 155 if (size > min_window && ! tau[cpu].grew) { ··· 148 173 149 174 set_thresholds(cpu); 150 175 151 - /* 152 - * Do the enable every time, since otherwise a bunch of (relatively) 153 - * complex sleep code needs to be added. One mtspr every time 154 - * tau_timeout is called is probably not a big deal. 155 - * 156 - * Enable thermal sensor and set up sample interval timer 157 - * need 20 us to do the compare.. until a nice 'cpu_speed' function 158 - * call is implemented, just assume a 500 mhz clock. It doesn't really 159 - * matter if we take too long for a compare since it's all interrupt 160 - * driven anyway. 161 - * 162 - * use a extra long time.. (60 us @ 500 mhz) 176 + /* Restart thermal sensor comparisons and interrupts. 177 + * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet" 178 + * recommends that "the maximum value be set in THRM3 under all 179 + * conditions." 163 180 */ 164 - mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); 165 - 166 - local_irq_restore(flags); 181 + mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E); 167 182 } 168 183 169 - static void tau_timeout_smp(struct timer_list *unused) 184 + static struct workqueue_struct *tau_workq; 185 + 186 + static void tau_work_func(struct work_struct *work) 170 187 { 171 - 172 - /* schedule ourselves to be run again */ 173 - mod_timer(&tau_timer, jiffies + shrink_timer) ; 188 + msleep(shrink_timer); 174 189 on_each_cpu(tau_timeout, NULL, 0); 190 + /* schedule ourselves to be run again */ 191 + queue_work(tau_workq, work); 175 192 } 193 + 194 + DECLARE_WORK(tau_work, tau_work_func); 176 195 177 196 /* 178 197 * setup the TAU ··· 200 231 return 1; 201 232 } 202 233 234 + tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && 235 + !strcmp(cur_cpu_spec->platform, "ppc750"); 203 236 204 - /* first, set up the window shrinking timer */ 205 - timer_setup(&tau_timer, tau_timeout_smp, 0); 206 - tau_timer.expires = jiffies + shrink_timer; 207 - add_timer(&tau_timer); 237 + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); 238 + if (!tau_workq) 239 + return -ENOMEM; 208 240 209 241 on_each_cpu(TAU_init_smp, NULL, 0); 210 242 211 - printk("Thermal assist unit "); 212 - #ifdef CONFIG_TAU_INT 213 - printk("using interrupts, "); 214 - #else 215 - printk("using timers, "); 216 - #endif 217 - printk("shrink_timer: %d jiffies\n", shrink_timer); 243 + queue_work(tau_workq, &tau_work); 244 + 245 + pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n", 246 + tau_int_enable ? "interrupts" : "workqueue", shrink_timer); 218 247 tau_initialized = 1; 219 248 220 249 return 0;

+13 -49

arch/powerpc/kernel/time.c

··· 75 75 #include <linux/clockchips.h> 76 76 #include <linux/timekeeper_internal.h> 77 77 78 - static u64 rtc_read(struct clocksource *); 79 - static struct clocksource clocksource_rtc = { 80 - .name = "rtc", 81 - .rating = 400, 82 - .flags = CLOCK_SOURCE_IS_CONTINUOUS, 83 - .mask = CLOCKSOURCE_MASK(64), 84 - .read = rtc_read, 85 - }; 86 - 87 78 static u64 timebase_read(struct clocksource *); 88 79 static struct clocksource clocksource_timebase = { 89 80 .name = "timebase", ··· 438 447 void __delay(unsigned long loops) 439 448 { 440 449 unsigned long start; 441 - int diff; 442 450 443 451 spin_begin(); 444 - if (__USE_RTC()) { 445 - start = get_rtcl(); 446 - do { 447 - /* the RTCL register wraps at 1000000000 */ 448 - diff = get_rtcl() - start; 449 - if (diff < 0) 450 - diff += 1000000000; 451 - spin_cpu_relax(); 452 - } while (diff < loops); 453 - } else if (tb_invalid) { 452 + if (tb_invalid) { 454 453 /* 455 454 * TB is in error state and isn't ticking anymore. 456 455 * HMI handler was unable to recover from TB error. ··· 448 467 */ 449 468 spin_cpu_relax(); 450 469 } else { 451 - start = get_tbl(); 452 - while (get_tbl() - start < loops) 470 + start = mftb(); 471 + while (mftb() - start < loops) 453 472 spin_cpu_relax(); 454 473 } 455 474 spin_end(); ··· 595 614 irq_work_run(); 596 615 } 597 616 598 - now = get_tb_or_rtc(); 617 + now = get_tb(); 599 618 if (now >= *next_tb) { 600 619 *next_tb = ~(u64)0; 601 620 if (evt->event_handler) ··· 677 696 */ 678 697 notrace unsigned long long sched_clock(void) 679 698 { 680 - if (__USE_RTC()) 681 - return get_rtc(); 682 699 return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; 683 700 } 684 701 ··· 826 847 } 827 848 828 849 /* clocksource code */ 829 - static notrace u64 rtc_read(struct clocksource *cs) 830 - { 831 - return (u64)get_rtc(); 832 - } 833 - 834 850 static notrace u64 timebase_read(struct clocksource *cs) 835 851 { 836 852 return (u64)get_tb(); ··· 922 948 923 949 static void __init clocksource_init(void) 924 950 { 925 - struct clocksource *clock; 926 - 927 - if (__USE_RTC()) 928 - clock = &clocksource_rtc; 929 - else 930 - clock = &clocksource_timebase; 951 + struct clocksource *clock = &clocksource_timebase; 931 952 932 953 if (clocksource_register_hz(clock, tb_ticks_per_sec)) { 933 954 printk(KERN_ERR "clocksource: %s is already registered\n", ··· 937 968 static int decrementer_set_next_event(unsigned long evt, 938 969 struct clock_event_device *dev) 939 970 { 940 - __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); 971 + __this_cpu_write(decrementers_next_tb, get_tb() + evt); 941 972 set_dec(evt); 942 973 943 974 /* We may have raced with new irq work */ ··· 1040 1071 u64 scale; 1041 1072 unsigned shift; 1042 1073 1043 - if (__USE_RTC()) { 1044 - /* 601 processor: dec counts down by 128 every 128ns */ 1045 - ppc_tb_freq = 1000000000; 1046 - } else { 1047 - /* Normal PowerPC with timebase register */ 1048 - ppc_md.calibrate_decr(); 1049 - printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", 1050 - ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); 1051 - printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", 1052 - ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); 1053 - } 1074 + /* Normal PowerPC with timebase register */ 1075 + ppc_md.calibrate_decr(); 1076 + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", 1077 + ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); 1078 + printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", 1079 + ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); 1054 1080 1055 1081 tb_ticks_per_jiffy = ppc_tb_freq / HZ; 1056 1082 tb_ticks_per_sec = ppc_tb_freq; ··· 1071 1107 tb_to_ns_scale = scale; 1072 1108 tb_to_ns_shift = shift; 1073 1109 /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ 1074 - boot_tb = get_tb_or_rtc(); 1110 + boot_tb = get_tb(); 1075 1111 1076 1112 /* If platform provided a timezone (pmac), we correct the time */ 1077 1113 if (timezone_offset) {

+31 -4

arch/powerpc/kernel/tm.S

··· 122 122 std r3, STK_PARAM(R3)(r1) 123 123 SAVE_NVGPRS(r1) 124 124 125 + /* 126 + * Save kernel live AMR since it will be clobbered by treclaim 127 + * but can be used elsewhere later in kernel space. 128 + */ 129 + mfspr r3, SPRN_AMR 130 + std r3, TM_FRAME_L1(r1) 131 + 125 132 /* We need to setup MSR for VSX register save instructions. */ 126 133 mfmsr r14 127 134 mr r15, r14 ··· 252 245 * but is used in signal return to 'wind back' to the abort handler. 253 246 */ 254 247 255 - /* ******************** CR,LR,CCR,MSR ********** */ 248 + /* ***************** CTR, LR, CR, XER ********** */ 256 249 mfctr r3 257 250 mflr r4 258 251 mfcr r5 ··· 263 256 std r5, _CCR(r7) 264 257 std r6, _XER(r7) 265 258 266 - 267 259 /* ******************** TAR, DSCR ********** */ 268 260 mfspr r3, SPRN_TAR 269 261 mfspr r4, SPRN_DSCR 270 262 271 263 std r3, THREAD_TM_TAR(r12) 272 264 std r4, THREAD_TM_DSCR(r12) 265 + 266 + /* ******************** AMR **************** */ 267 + mfspr r3, SPRN_AMR 268 + std r3, THREAD_TM_AMR(r12) 273 269 274 270 /* 275 271 * MSR and flags: We don't change CRs, and we don't need to alter MSR. ··· 318 308 std r3, THREAD_TM_TFHAR(r12) 319 309 std r4, THREAD_TM_TFIAR(r12) 320 310 321 - /* AMR is checkpointed too, but is unsupported by Linux. */ 311 + /* Restore kernel live AMR */ 312 + ld r8, TM_FRAME_L1(r1) 313 + mtspr SPRN_AMR, r8 322 314 323 315 /* Restore original MSR/IRQ state & clear TM mode */ 324 316 ld r14, TM_FRAME_L0(r1) /* Orig MSR */ ··· 366 354 * This is used for backing up the NVGPRs: 367 355 */ 368 356 SAVE_NVGPRS(r1) 357 + 358 + /* 359 + * Save kernel live AMR since it will be clobbered for trechkpt 360 + * but can be used elsewhere later in kernel space. 361 + */ 362 + mfspr r8, SPRN_AMR 363 + std r8, TM_FRAME_L0(r1) 369 364 370 365 /* Load complete register state from ts_ckpt* registers */ 371 366 ··· 423 404 424 405 restore_gprs: 425 406 426 - /* ******************** CR,LR,CCR,MSR ********** */ 407 + /* ****************** CTR, LR, XER ************* */ 427 408 ld r4, _CTR(r7) 428 409 ld r5, _LINK(r7) 429 410 ld r8, _XER(r7) ··· 435 416 /* ******************** TAR ******************** */ 436 417 ld r4, THREAD_TM_TAR(r3) 437 418 mtspr SPRN_TAR, r4 419 + 420 + /* ******************** AMR ******************** */ 421 + ld r4, THREAD_TM_AMR(r3) 422 + mtspr SPRN_AMR, r4 438 423 439 424 /* Load up the PPR and DSCR in GPRs only at this stage */ 440 425 ld r5, THREAD_TM_DSCR(r3) ··· 531 508 /* R1 is restored, so we are recoverable again. EE is still off */ 532 509 li r4, MSR_RI 533 510 mtmsrd r4, 1 511 + 512 + /* Restore kernel live AMR */ 513 + ld r8, TM_FRAME_L0(r1) 514 + mtspr SPRN_AMR, r8 534 515 535 516 REST_NVGPRS(r1) 536 517

-4

arch/powerpc/kernel/traps.c

··· 529 529 * Check if the NIP corresponds to the address of a sync 530 530 * instruction for which there is an entry in the exception 531 531 * table. 532 - * Note that the 601 only takes a machine check on TEA 533 - * (transfer error ack) signal assertion, and does not 534 - * set any of the top 16 bits of SRR1. 535 532 * -- paulus. 536 533 */ 537 534 static inline int check_io_access(struct pt_regs *regs) ··· 793 796 case 0x80000: 794 797 pr_cont("Machine check signal\n"); 795 798 break; 796 - case 0: /* for 601 */ 797 799 case 0x40000: 798 800 case 0x140000: /* 7450 MSS error and TEA */ 799 801 pr_cont("Transfer error ack signal\n");

-2

arch/powerpc/kernel/vdso32/datapage.S

··· 47 47 * 48 48 * returns the timebase frequency in HZ 49 49 */ 50 - #ifndef CONFIG_PPC_BOOK3S_601 51 50 V_FUNCTION_BEGIN(__kernel_get_tbfreq) 52 51 .cfi_startproc 53 52 mflr r12 ··· 59 60 blr 60 61 .cfi_endproc 61 62 V_FUNCTION_END(__kernel_get_tbfreq) 62 - #endif

-2

arch/powerpc/kernel/vdso32/vdso32.lds.S

··· 144 144 __kernel_datapage_offset; 145 145 146 146 __kernel_get_syscall_map; 147 - #ifndef CONFIG_PPC_BOOK3S_601 148 147 __kernel_gettimeofday; 149 148 __kernel_clock_gettime; 150 149 __kernel_clock_getres; 151 150 __kernel_time; 152 151 __kernel_get_tbfreq; 153 - #endif 154 152 __kernel_sync_dicache; 155 153 __kernel_sync_dicache_p5; 156 154 __kernel_sigtramp32;

+7

arch/powerpc/kvm/book3s_hv.c

··· 3530 3530 */ 3531 3531 asm volatile("eieio; tlbsync; ptesync"); 3532 3532 3533 + /* 3534 + * cp_abort is required if the processor supports local copy-paste 3535 + * to clear the copy buffer that was under control of the guest. 3536 + */ 3537 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 3538 + asm volatile(PPC_CP_ABORT); 3539 + 3533 3540 mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */ 3534 3541 isync(); 3535 3542

+8

arch/powerpc/kvm/book3s_hv_rmhandlers.S

··· 1831 1831 #endif /* CONFIG_PPC_RADIX_MMU */ 1832 1832 1833 1833 /* 1834 + * cp_abort is required if the processor supports local copy-paste 1835 + * to clear the copy buffer that was under control of the guest. 1836 + */ 1837 + BEGIN_FTR_SECTION 1838 + PPC_CP_ABORT 1839 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) 1840 + 1841 + /* 1834 1842 * POWER7/POWER8 guest -> host partition switch code. 1835 1843 * We don't have to lock against tlbies but we do 1836 1844 * have to coordinate the hardware threads.

+7 -10

arch/powerpc/lib/code-patching.c

··· 21 21 static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, 22 22 struct ppc_inst *patch_addr) 23 23 { 24 - int err = 0; 25 - 26 - if (!ppc_inst_prefixed(instr)) { 27 - __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); 28 - } else { 29 - __put_user_asm(ppc_inst_as_u64(instr), patch_addr, err, "std"); 30 - } 31 - 32 - if (err) 33 - return err; 24 + if (!ppc_inst_prefixed(instr)) 25 + __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw"); 26 + else 27 + __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std"); 34 28 35 29 asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), 36 30 "r" (exec_addr)); 37 31 38 32 return 0; 33 + 34 + failed: 35 + return -EFAULT; 39 36 } 40 37 41 38 int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)

+6 -3

arch/powerpc/lib/sstep.c

··· 219 219 ea += regs->gpr[ra]; 220 220 else if (!prefix_r && !ra) 221 221 ; /* Leave ea as is */ 222 - else if (prefix_r && !ra) 222 + else if (prefix_r) 223 223 ea += regs->nip; 224 - else if (prefix_r && ra) 225 - ; /* Invalid form. Should already be checked for by caller! */ 224 + 225 + /* 226 + * (prefix_r && ra) is an invalid form. Should already be 227 + * checked for by caller! 228 + */ 226 229 227 230 return ea; 228 231 }

+7 -14

arch/powerpc/mm/book3s32/hash_low.S

··· 15 15 */ 16 16 17 17 #include <linux/pgtable.h> 18 + #include <linux/init.h> 18 19 #include <asm/reg.h> 19 20 #include <asm/page.h> 20 21 #include <asm/cputable.h> ··· 200 199 * covered by a BAT). -- paulus 201 200 */ 202 201 mfmsr r9 203 - SYNC 204 202 rlwinm r0,r9,0,17,15 /* clear bit 16 (MSR_EE) */ 205 203 rlwinm r0,r0,0,28,26 /* clear MSR_DR */ 206 204 mtmsr r0 207 - SYNC_601 208 205 isync 209 206 210 207 #ifdef CONFIG_SMP ··· 261 262 262 263 /* reenable interrupts and DR */ 263 264 mtmsr r9 264 - SYNC_601 265 265 isync 266 266 267 267 lwz r0,4(r1) ··· 285 287 * 286 288 * For speed, 4 of the instructions get patched once the size and 287 289 * physical address of the hash table are known. These definitions 288 - * of Hash_base and Hash_bits below are just an example. 290 + * of Hash_base and Hash_bits below are for the early hash table. 289 291 */ 290 - Hash_base = 0xc0180000 292 + Hash_base = early_hash 291 293 Hash_bits = 12 /* e.g. 256kB hash table */ 292 294 Hash_msk = (((1 << Hash_bits) - 1) * 64) 293 295 ··· 308 310 #define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) 309 311 #define HASH_RIGHT 31-LG_PTEG_SIZE 310 312 313 + __REF 311 314 _GLOBAL(create_hpte) 312 315 /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ 313 316 rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ ··· 475 476 476 477 sync /* make sure pte updates get to memory */ 477 478 blr 479 + .previous 478 480 _ASM_NOKPROBE_SYMBOL(create_hpte) 479 481 480 482 .section .bss ··· 496 496 * 497 497 * We assume that there is a hash table in use (Hash != 0). 498 498 */ 499 + __REF 499 500 _GLOBAL(flush_hash_pages) 500 501 /* 501 502 * We disable interrupts here, even on UP, because we want ··· 507 506 * covered by a BAT). -- paulus 508 507 */ 509 508 mfmsr r10 510 - SYNC 511 509 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ 512 510 rlwinm r0,r0,0,28,26 /* clear MSR_DR */ 513 511 mtmsr r0 514 - SYNC_601 515 512 isync 516 513 517 514 /* First find a PTE in the range that has _PAGE_HASHPTE set */ ··· 628 629 #endif 629 630 630 631 19: mtmsr r10 631 - SYNC_601 632 632 isync 633 633 blr 634 + .previous 634 635 EXPORT_SYMBOL(flush_hash_pages) 635 636 _ASM_NOKPROBE_SYMBOL(flush_hash_pages) 636 637 ··· 642 643 lwz r8,TASK_CPU(r2) 643 644 oris r8,r8,11 644 645 mfmsr r10 645 - SYNC 646 646 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ 647 647 rlwinm r0,r0,0,28,26 /* clear DR */ 648 648 mtmsr r0 649 - SYNC_601 650 649 isync 651 650 lis r9,mmu_hash_lock@h 652 651 ori r9,r9,mmu_hash_lock@l ··· 661 664 li r0,0 662 665 stw r0,0(r9) /* clear mmu_hash_lock */ 663 666 mtmsr r10 664 - SYNC_601 665 667 isync 666 668 #else /* CONFIG_SMP */ 667 669 tlbie r3 ··· 677 681 lwz r8,TASK_CPU(r2) 678 682 oris r8,r8,10 679 683 mfmsr r10 680 - SYNC 681 684 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ 682 685 rlwinm r0,r0,0,28,26 /* clear DR */ 683 686 mtmsr r0 684 - SYNC_601 685 687 isync 686 688 lis r9,mmu_hash_lock@h 687 689 ori r9,r9,mmu_hash_lock@l ··· 703 709 li r0,0 704 710 stw r0,0(r9) /* clear mmu_hash_lock */ 705 711 mtmsr r10 706 - SYNC_601 707 712 isync 708 713 #endif /* CONFIG_SMP */ 709 714 blr

+28 -66

arch/powerpc/mm/book3s32/mmu.c

··· 31 31 32 32 #include <mm/mmu_decl.h> 33 33 34 + u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; 35 + 34 36 struct hash_pte *Hash; 35 37 static unsigned long Hash_size, Hash_mask; 36 38 unsigned long _SDR1; ··· 75 73 static int find_free_bat(void) 76 74 { 77 75 int b; 76 + int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; 78 77 79 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) { 80 - for (b = 0; b < 4; b++) { 81 - struct ppc_bat *bat = BATS[b]; 78 + for (b = 0; b < n; b++) { 79 + struct ppc_bat *bat = BATS[b]; 82 80 83 - if (!(bat[0].batl & 0x40)) 84 - return b; 85 - } 86 - } else { 87 - int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; 88 - 89 - for (b = 0; b < n; b++) { 90 - struct ppc_bat *bat = BATS[b]; 91 - 92 - if (!(bat[1].batu & 3)) 93 - return b; 94 - } 81 + if (!(bat[1].batu & 3)) 82 + return b; 95 83 } 96 84 return -1; 97 85 } ··· 89 97 /* 90 98 * This function calculates the size of the larger block usable to map the 91 99 * beginning of an area based on the start address and size of that area: 92 - * - max block size is 8M on 601 and 256 on other 6xx. 100 + * - max block size is 256 on 6xx. 93 101 * - base address must be aligned to the block size. So the maximum block size 94 102 * is identified by the lowest bit set to 1 in the base address (for instance 95 103 * if base is 0x16000000, max size is 0x02000000). ··· 98 106 */ 99 107 static unsigned int block_size(unsigned long base, unsigned long top) 100 108 { 101 - unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M; 109 + unsigned int max_size = SZ_256M; 102 110 unsigned int base_shift = (ffs(base) - 1) & 31; 103 111 unsigned int block_shift = (fls(top - base) - 1) & 31; 104 112 ··· 109 117 * Set up one of the IBAT (block address translation) register pairs. 110 118 * The parameters are not checked; in particular size must be a power 111 119 * of 2 between 128k and 256M. 112 - * Only for 603+ ... 113 120 */ 114 121 static void setibat(int index, unsigned long virt, phys_addr_t phys, 115 122 unsigned int size, pgprot_t prot) ··· 205 214 unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; 206 215 unsigned long size; 207 216 208 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) 209 - return; 210 - 211 217 for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { 212 218 size = block_size(base, top); 213 219 setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); ··· 240 252 { 241 253 int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; 242 254 int i; 243 - 244 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) 245 - return; 246 255 247 256 for (i = 0; i < nb; i++) { 248 257 struct ppc_bat *bat = BATS[i]; ··· 279 294 flags &= ~_PAGE_COHERENT; 280 295 281 296 bl = (size >> 17) - 1; 282 - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) { 283 - /* 603, 604, etc. */ 284 - /* Do DBAT first */ 285 - wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE 286 - | _PAGE_COHERENT | _PAGE_GUARDED); 287 - wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; 288 - bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ 289 - bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; 290 - if (flags & _PAGE_USER) 291 - bat[1].batu |= 1; /* Vp = 1 */ 292 - if (flags & _PAGE_GUARDED) { 293 - /* G bit must be zero in IBATs */ 294 - flags &= ~_PAGE_EXEC; 295 - } 296 - if (flags & _PAGE_EXEC) 297 - bat[0] = bat[1]; 298 - else 299 - bat[0].batu = bat[0].batl = 0; 300 - } else { 301 - /* 601 cpu */ 302 - if (bl > BL_8M) 303 - bl = BL_8M; 304 - wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE 305 - | _PAGE_COHERENT); 306 - wimgxpp |= (flags & _PAGE_RW)? 307 - ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; 308 - bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ 309 - bat->batl = phys | bl | 0x40; /* V=1 */ 297 + /* Do DBAT first */ 298 + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE 299 + | _PAGE_COHERENT | _PAGE_GUARDED); 300 + wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; 301 + bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ 302 + bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; 303 + if (flags & _PAGE_USER) 304 + bat[1].batu |= 1; /* Vp = 1 */ 305 + if (flags & _PAGE_GUARDED) { 306 + /* G bit must be zero in IBATs */ 307 + flags &= ~_PAGE_EXEC; 310 308 } 309 + if (flags & _PAGE_EXEC) 310 + bat[0] = bat[1]; 311 + else 312 + bat[0].batu = bat[0].batl = 0; 311 313 312 314 bat_addrs[index].start = virt; 313 315 bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; ··· 397 425 hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; 398 426 if (lg_n_hpteg > 16) 399 427 hash_mb2 = 16 - LG_HPTEG_SIZE; 400 - 401 - /* 402 - * When KASAN is selected, there is already an early temporary hash 403 - * table and the switch to the final hash table is done later. 404 - */ 405 - if (IS_ENABLED(CONFIG_KASAN)) 406 - return; 407 - 408 - MMU_init_hw_patch(); 409 428 } 410 429 411 430 void __init MMU_init_hw_patch(void) 412 431 { 413 432 unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); 414 433 unsigned int hash = (unsigned int)Hash - PAGE_OFFSET; 434 + 435 + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) 436 + return; 415 437 416 438 if (ppc_md.progress) 417 439 ppc_md.progress("hash:patch", 0x345); ··· 440 474 */ 441 475 BUG_ON(first_memblock_base != 0); 442 476 443 - /* 601 can only access 16MB at the moment */ 444 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) 445 - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000)); 446 - else /* Anything else has 256M mapped */ 447 - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); 477 + memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_256M)); 448 478 } 449 479 450 480 void __init print_system_hash_info(void)

+4 -4

arch/powerpc/mm/book3s64/hash_native.c

··· 82 82 for (set = 0; set < num_sets; set++) 83 83 tlbiel_hash_set_isa206(set, is); 84 84 85 - asm volatile("ptesync": : :"memory"); 85 + ppc_after_tlbiel_barrier(); 86 86 } 87 87 88 88 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) ··· 110 110 */ 111 111 tlbiel_hash_set_isa300(0, is, 0, 2, 1); 112 112 113 - asm volatile("ptesync": : :"memory"); 113 + ppc_after_tlbiel_barrier(); 114 114 115 115 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 116 116 } ··· 303 303 asm volatile("ptesync": : :"memory"); 304 304 if (use_local) { 305 305 __tlbiel(vpn, psize, apsize, ssize); 306 - asm volatile("ptesync": : :"memory"); 306 + ppc_after_tlbiel_barrier(); 307 307 } else { 308 308 __tlbie(vpn, psize, apsize, ssize); 309 309 fixup_tlbie_vpn(vpn, psize, apsize, ssize); ··· 879 879 __tlbiel(vpn, psize, psize, ssize); 880 880 } pte_iterate_hashed_end(); 881 881 } 882 - asm volatile("ptesync":::"memory"); 882 + ppc_after_tlbiel_barrier(); 883 883 } else { 884 884 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 885 885

+9 -3

arch/powerpc/mm/book3s64/hash_utils.c

··· 260 260 DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", 261 261 vstart, vend, pstart, prot, psize, ssize); 262 262 263 - for (vaddr = vstart, paddr = pstart; vaddr < vend; 264 - vaddr += step, paddr += step) { 263 + /* Carefully map only the possible range */ 264 + vaddr = ALIGN(vstart, step); 265 + paddr = ALIGN(pstart, step); 266 + vend = ALIGN_DOWN(vend, step); 267 + 268 + for (; vaddr < vend; vaddr += step, paddr += step) { 265 269 unsigned long hash, hpteg; 266 270 unsigned long vsid = get_kernel_vsid(vaddr, ssize); 267 271 unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); ··· 347 343 if (!mmu_hash_ops.hpte_removebolted) 348 344 return -ENODEV; 349 345 350 - for (vaddr = vstart; vaddr < vend; vaddr += step) { 346 + /* Unmap the full range specificied */ 347 + vaddr = ALIGN_DOWN(vstart, step); 348 + for (;vaddr < vend; vaddr += step) { 351 349 rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); 352 350 if (rc == -ENOENT) { 353 351 ret = -ENOENT;

+2

arch/powerpc/mm/book3s64/internal.h

··· 13 13 return static_branch_unlikely(&stress_slb_key); 14 14 } 15 15 16 + void slb_setup_new_exec(void); 17 + 16 18 #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */

+2 -2

arch/powerpc/mm/book3s64/mmu_context.c

··· 21 21 #include <asm/mmu_context.h> 22 22 #include <asm/pgalloc.h> 23 23 24 + #include "internal.h" 25 + 24 26 static DEFINE_IDA(mmu_context_ida); 25 27 26 28 static int alloc_context_id(int min_id, int max_id) ··· 49 47 return alloc_context_id(MIN_USER_CONTEXT, max); 50 48 } 51 49 EXPORT_SYMBOL_GPL(hash__alloc_context_id); 52 - 53 - void slb_setup_new_exec(void); 54 50 55 51 static int realloc_context_ids(mm_context_t *ctx) 56 52 {

+6 -4

arch/powerpc/mm/book3s64/radix_pgtable.c

··· 34 34 35 35 unsigned int mmu_pid_bits; 36 36 unsigned int mmu_base_pid; 37 - unsigned int radix_mem_block_size __ro_after_init; 37 + unsigned long radix_mem_block_size __ro_after_init; 38 38 39 39 static __ref void *early_alloc_pgtable(unsigned long size, int nid, 40 40 unsigned long region_start, unsigned long region_end) ··· 276 276 int psize; 277 277 278 278 start = ALIGN(start, PAGE_SIZE); 279 + end = ALIGN_DOWN(end, PAGE_SIZE); 279 280 for (addr = start; addr < end; addr += mapping_size) { 280 281 unsigned long gap, previous_size; 281 282 int rc; ··· 498 497 depth, void *data) 499 498 { 500 499 unsigned long *mem_block_size = (unsigned long *)data; 501 - const __be64 *prop; 500 + const __be32 *prop; 502 501 int len; 503 502 504 503 if (depth != 1) ··· 508 507 return 0; 509 508 510 509 prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); 511 - if (!prop || len < sizeof(__be64)) 510 + 511 + if (!prop || len < dt_root_size_cells * sizeof(__be32)) 512 512 /* 513 513 * Nothing in the device tree 514 514 */ 515 515 *mem_block_size = MIN_MEMORY_BLOCK_SIZE; 516 516 else 517 - *mem_block_size = be64_to_cpup(prop); 517 + *mem_block_size = of_read_number(prop, dt_root_size_cells); 518 518 return 1; 519 519 } 520 520

+22 -13

arch/powerpc/mm/book3s64/radix_tlb.c

··· 65 65 for (set = 1; set < num_sets; set++) 66 66 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 67 67 68 - asm volatile("ptesync": : :"memory"); 68 + ppc_after_tlbiel_barrier(); 69 69 } 70 70 71 71 void radix__tlbiel_all(unsigned int action) ··· 296 296 297 297 /* For PWC, only one flush is needed */ 298 298 if (ric == RIC_FLUSH_PWC) { 299 - asm volatile("ptesync": : :"memory"); 299 + ppc_after_tlbiel_barrier(); 300 300 return; 301 301 } 302 302 ··· 304 304 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 305 305 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 306 306 307 - asm volatile("ptesync": : :"memory"); 307 + ppc_after_tlbiel_barrier(); 308 308 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 309 309 } 310 310 ··· 431 431 432 432 asm volatile("ptesync": : :"memory"); 433 433 __tlbiel_va(va, pid, ap, ric); 434 - asm volatile("ptesync": : :"memory"); 434 + ppc_after_tlbiel_barrier(); 435 435 } 436 436 437 437 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, ··· 442 442 if (also_pwc) 443 443 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 444 444 __tlbiel_va_range(start, end, pid, page_size, psize); 445 - asm volatile("ptesync": : :"memory"); 445 + ppc_after_tlbiel_barrier(); 446 446 } 447 447 448 448 static inline void __tlbie_va_range(unsigned long start, unsigned long end, ··· 645 645 struct mm_struct *mm = arg; 646 646 unsigned long pid = mm->context.id; 647 647 648 + /* 649 + * A kthread could have done a mmget_not_zero() after the flushing CPU 650 + * checked mm_is_singlethreaded, and be in the process of 651 + * kthread_use_mm when interrupted here. In that case, current->mm will 652 + * be set to mm, because kthread_use_mm() setting ->mm and switching to 653 + * the mm is done with interrupts off. 654 + */ 648 655 if (current->mm == mm) 649 - return; /* Local CPU */ 656 + goto out_flush; 650 657 651 658 if (current->active_mm == mm) { 652 - /* 653 - * Must be a kernel thread because sender is single-threaded. 654 - */ 655 - BUG_ON(current->mm); 659 + WARN_ON_ONCE(current->mm != NULL); 660 + /* Is a kernel thread and is using mm as the lazy tlb */ 656 661 mmgrab(&init_mm); 657 - switch_mm(mm, &init_mm, current); 658 662 current->active_mm = &init_mm; 663 + switch_mm_irqs_off(mm, &init_mm, current); 659 664 mmdrop(mm); 660 665 } 666 + 667 + atomic_dec(&mm->context.active_cpus); 668 + cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); 669 + 670 + out_flush: 661 671 _tlbiel_pid(pid, RIC_FLUSH_ALL); 662 672 } 663 673 ··· 682 672 */ 683 673 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 684 674 (void *)mm, 1); 685 - mm_reset_thread_local(mm); 686 675 } 687 676 688 677 void radix__flush_tlb_mm(struct mm_struct *mm) ··· 949 940 if (hflush) 950 941 __tlbiel_va_range(hstart, hend, pid, 951 942 PMD_SIZE, MMU_PAGE_2M); 952 - asm volatile("ptesync": : :"memory"); 943 + ppc_after_tlbiel_barrier(); 953 944 } else if (cputlb_use_tlbie()) { 954 945 asm volatile("ptesync": : :"memory"); 955 946 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);

+2 -2

arch/powerpc/mm/book3s64/slb.c

··· 765 765 766 766 if (id == LINEAR_MAP_REGION_ID) { 767 767 768 - /* We only support upto MAX_PHYSMEM_BITS */ 769 - if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS)) 768 + /* We only support upto H_MAX_PHYSMEM_BITS */ 769 + if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS)) 770 770 return -EFAULT; 771 771 772 772 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;

+1 -5

arch/powerpc/mm/drmem.c

··· 389 389 if (!drmem_info->lmbs) 390 390 return; 391 391 392 - for_each_drmem_lmb(lmb) { 392 + for_each_drmem_lmb(lmb) 393 393 read_drconf_v1_cell(lmb, &prop); 394 - lmb_set_nid(lmb); 395 - } 396 394 } 397 395 398 396 static void __init init_drmem_v2_lmbs(const __be32 *prop) ··· 435 437 436 438 lmb->aa_index = dr_cell.aa_index; 437 439 lmb->flags = dr_cell.flags; 438 - 439 - lmb_set_nid(lmb); 440 440 } 441 441 } 442 442 }

+17 -3

arch/powerpc/mm/hugetlbpage.c

··· 180 180 if (!hpdp) 181 181 return NULL; 182 182 183 - if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K) 183 + if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) 184 184 return pte_alloc_map(mm, (pmd_t *)hpdp, addr); 185 185 186 186 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); ··· 330 330 get_hugepd_cache_index(pdshift - shift)); 331 331 } 332 332 333 - static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) 333 + static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, 334 + unsigned long addr, unsigned long end, 335 + unsigned long floor, unsigned long ceiling) 334 336 { 337 + unsigned long start = addr; 335 338 pgtable_t token = pmd_pgtable(*pmd); 339 + 340 + start &= PMD_MASK; 341 + if (start < floor) 342 + return; 343 + if (ceiling) { 344 + ceiling &= PMD_MASK; 345 + if (!ceiling) 346 + return; 347 + } 348 + if (end - 1 > ceiling - 1) 349 + return; 336 350 337 351 pmd_clear(pmd); 338 352 pte_free_tlb(tlb, token, addr); ··· 377 363 */ 378 364 WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); 379 365 380 - hugetlb_free_pte_range(tlb, pmd, addr); 366 + hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling); 381 367 382 368 continue; 383 369 }

+29 -10

arch/powerpc/mm/init_64.c

··· 162 162 return next++; 163 163 } 164 164 165 - static __meminit void vmemmap_list_populate(unsigned long phys, 166 - unsigned long start, 167 - int node) 165 + static __meminit int vmemmap_list_populate(unsigned long phys, 166 + unsigned long start, 167 + int node) 168 168 { 169 169 struct vmemmap_backing *vmem_back; 170 170 171 171 vmem_back = vmemmap_list_alloc(node); 172 172 if (unlikely(!vmem_back)) { 173 - WARN_ON(1); 174 - return; 173 + pr_debug("vmemap list allocation failed\n"); 174 + return -ENOMEM; 175 175 } 176 176 177 177 vmem_back->phys = phys; ··· 179 179 vmem_back->list = vmemmap_list; 180 180 181 181 vmemmap_list = vmem_back; 182 + return 0; 182 183 } 183 184 184 185 static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, ··· 200 199 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 201 200 struct vmem_altmap *altmap) 202 201 { 202 + bool altmap_alloc; 203 203 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; 204 204 205 205 /* Align to the page size of the linear mapping. */ ··· 230 228 p = vmemmap_alloc_block_buf(page_size, node, altmap); 231 229 if (!p) 232 230 pr_debug("altmap block allocation failed, falling back to system memory"); 231 + else 232 + altmap_alloc = true; 233 233 } 234 - if (!p) 234 + if (!p) { 235 235 p = vmemmap_alloc_block_buf(page_size, node, NULL); 236 + altmap_alloc = false; 237 + } 236 238 if (!p) 237 239 return -ENOMEM; 238 240 239 - vmemmap_list_populate(__pa(p), start, node); 241 + if (vmemmap_list_populate(__pa(p), start, node)) { 242 + /* 243 + * If we don't populate vmemap list, we don't have 244 + * the ability to free the allocated vmemmap 245 + * pages in section_deactivate. Hence free them 246 + * here. 247 + */ 248 + int nr_pfns = page_size >> PAGE_SHIFT; 249 + unsigned long page_order = get_order(page_size); 250 + 251 + if (altmap_alloc) 252 + vmem_altmap_free(altmap, nr_pfns); 253 + else 254 + free_pages((unsigned long)p, page_order); 255 + return -ENOMEM; 256 + } 240 257 241 258 pr_debug(" * %016lx..%016lx allocated at %p\n", 242 259 start, start + page_size, p); ··· 285 264 vmem_back_prev = vmem_back; 286 265 } 287 266 288 - if (unlikely(!vmem_back)) { 289 - WARN_ON(1); 267 + if (unlikely(!vmem_back)) 290 268 return 0; 291 - } 292 269 293 270 /* remove it from vmemmap_list */ 294 271 if (vmem_back == vmemmap_list) /* remove head */

+9 -22

arch/powerpc/mm/kasan/kasan_init_32.c

··· 127 127 { 128 128 int ret; 129 129 130 - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || 131 - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { 130 + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { 132 131 ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); 133 132 134 133 if (ret) ··· 139 140 { 140 141 phys_addr_t base, end; 141 142 u64 i; 143 + int ret; 142 144 143 145 for_each_mem_range(i, &base, &end) { 144 146 phys_addr_t top = min(end, total_lowmem); 145 - int ret; 146 147 147 148 if (base >= top) 148 149 continue; ··· 150 151 ret = kasan_init_region(__va(base), top - base); 151 152 if (ret) 152 153 panic("kasan: kasan_init_region() failed"); 154 + } 155 + 156 + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { 157 + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); 158 + 159 + if (ret) 160 + panic("kasan: kasan_init_shadow_page_tables() failed"); 153 161 } 154 162 155 163 kasan_remap_early_shadow_ro(); ··· 174 168 kasan_unmap_early_shadow_vmalloc(); 175 169 } 176 170 177 - #ifdef CONFIG_PPC_BOOK3S_32 178 - u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0}; 179 - 180 - static void __init kasan_early_hash_table(void) 181 - { 182 - unsigned int hash = __pa(early_hash); 183 - 184 - modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16); 185 - modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16); 186 - 187 - Hash = (struct hash_pte *)early_hash; 188 - } 189 - #else 190 - static void __init kasan_early_hash_table(void) {} 191 - #endif 192 - 193 171 void __init kasan_early_init(void) 194 172 { 195 173 unsigned long addr = KASAN_SHADOW_START; ··· 189 199 next = pgd_addr_end(addr, end); 190 200 pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte); 191 201 } while (pmd++, addr = next, addr != end); 192 - 193 - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) 194 - kasan_early_hash_table(); 195 202 }

+5 -1

arch/powerpc/mm/mem.c

··· 49 49 #include <asm/swiotlb.h> 50 50 #include <asm/rtas.h> 51 51 #include <asm/kasan.h> 52 + #include <asm/svm.h> 52 53 53 54 #include <mm/mmu_decl.h> 54 55 ··· 284 283 * back to to-down. 285 284 */ 286 285 memblock_set_bottom_up(true); 287 - swiotlb_init(0); 286 + if (is_secure_guest()) 287 + svm_swiotlb_init(); 288 + else 289 + swiotlb_init(0); 288 290 #endif 289 291 290 292 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);

-7

arch/powerpc/mm/nohash/8xx.c

··· 244 244 mb(); 245 245 } 246 246 247 - void flush_instruction_cache(void) 248 - { 249 - isync(); 250 - mtspr(SPRN_IC_CST, IDC_INVALL); 251 - isync(); 252 - } 253 - 254 247 #ifdef CONFIG_PPC_KUEP 255 248 void __init setup_kuep(bool disabled) 256 249 {

+16

arch/powerpc/mm/nohash/fsl_booke.c

··· 219 219 return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; 220 220 } 221 221 222 + void flush_instruction_cache(void) 223 + { 224 + unsigned long tmp; 225 + 226 + if (IS_ENABLED(CONFIG_E200)) { 227 + tmp = mfspr(SPRN_L1CSR0); 228 + tmp |= L1CSR0_CFI | L1CSR0_CLFC; 229 + mtspr(SPRN_L1CSR0, tmp); 230 + } else { 231 + tmp = mfspr(SPRN_L1CSR1); 232 + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; 233 + mtspr(SPRN_L1CSR1, tmp); 234 + } 235 + isync(); 236 + } 237 + 222 238 /* 223 239 * MMU_init_hw does the chip-specific initialization of the MMU hardware. 224 240 */

-4

arch/powerpc/mm/nohash/tlb.c

··· 83 83 }; 84 84 #elif defined(CONFIG_PPC_8xx) 85 85 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 86 - /* we only manage 4k and 16k pages as normal pages */ 87 - #ifdef CONFIG_PPC_4K_PAGES 88 86 [MMU_PAGE_4K] = { 89 87 .shift = 12, 90 88 }, 91 - #else 92 89 [MMU_PAGE_16K] = { 93 90 .shift = 14, 94 91 }, 95 - #endif 96 92 [MMU_PAGE_512K] = { 97 93 .shift = 19, 98 94 },

+84 -17

arch/powerpc/mm/numa.c

··· 430 430 * This is like of_node_to_nid_single() for memory represented in the 431 431 * ibm,dynamic-reconfiguration-memory node. 432 432 */ 433 - static int of_drconf_to_nid_single(struct drmem_lmb *lmb) 433 + int of_drconf_to_nid_single(struct drmem_lmb *lmb) 434 434 { 435 435 struct assoc_arrays aa = { .arrays = NULL }; 436 436 int default_nid = NUMA_NO_NODE; ··· 506 506 struct device_node *cpu; 507 507 int fcpu = cpu_first_thread_sibling(lcpu); 508 508 int nid = NUMA_NO_NODE; 509 + 510 + if (!cpu_present(lcpu)) { 511 + set_cpu_numa_node(lcpu, first_online_node); 512 + return first_online_node; 513 + } 509 514 510 515 /* 511 516 * If a valid cpu-to-node mapping is already available, use it ··· 728 723 */ 729 724 for_each_present_cpu(i) { 730 725 struct device_node *cpu; 731 - int nid; 732 - 733 - cpu = of_get_cpu_node(i, NULL); 734 - BUG_ON(!cpu); 735 - nid = of_node_to_nid_single(cpu); 736 - of_node_put(cpu); 726 + int nid = vphn_get_nid(i); 737 727 738 728 /* 739 729 * Don't fall back to default_nid yet -- we will plug 740 730 * cpus into nodes once the memory scan has discovered 741 731 * the topology. 742 732 */ 743 - if (nid < 0) 744 - continue; 745 - node_set_online(nid); 733 + if (nid == NUMA_NO_NODE) { 734 + cpu = of_get_cpu_node(i, NULL); 735 + BUG_ON(!cpu); 736 + nid = of_node_to_nid_single(cpu); 737 + of_node_put(cpu); 738 + } 739 + 740 + if (likely(nid > 0)) 741 + node_set_online(nid); 746 742 } 747 743 748 744 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); ··· 894 888 static void __init find_possible_nodes(void) 895 889 { 896 890 struct device_node *rtas; 897 - u32 numnodes, i; 891 + const __be32 *domains; 892 + int prop_length, max_nodes; 893 + u32 i; 898 894 899 895 if (!numa_enabled) 900 896 return; ··· 905 897 if (!rtas) 906 898 return; 907 899 908 - if (of_property_read_u32_index(rtas, 909 - "ibm,max-associativity-domains", 910 - min_common_depth, &numnodes)) 911 - goto out; 900 + /* 901 + * ibm,current-associativity-domains is a fairly recent property. If 902 + * it doesn't exist, then fallback on ibm,max-associativity-domains. 903 + * Current denotes what the platform can support compared to max 904 + * which denotes what the Hypervisor can support. 905 + */ 906 + domains = of_get_property(rtas, "ibm,current-associativity-domains", 907 + &prop_length); 908 + if (!domains) { 909 + domains = of_get_property(rtas, "ibm,max-associativity-domains", 910 + &prop_length); 911 + if (!domains) 912 + goto out; 913 + } 912 914 913 - for (i = 0; i < numnodes; i++) { 915 + max_nodes = of_read_number(&domains[min_common_depth], 1); 916 + for (i = 0; i < max_nodes; i++) { 914 917 if (!node_possible(i)) 915 918 node_set(i, node_possible_map); 916 919 } 920 + 921 + prop_length /= sizeof(int); 922 + if (prop_length > min_common_depth + 2) 923 + coregroup_enabled = 1; 917 924 918 925 out: 919 926 of_node_put(rtas); ··· 937 914 void __init mem_topology_setup(void) 938 915 { 939 916 int cpu; 917 + 918 + /* 919 + * Linux/mm assumes node 0 to be online at boot. However this is not 920 + * true on PowerPC, where node 0 is similar to any other node, it 921 + * could be cpuless, memoryless node. So force node 0 to be offline 922 + * for now. This will prevent cpuless, memoryless node 0 showing up 923 + * unnecessarily as online. If a node has cpus or memory that need 924 + * to be online, then node will anyway be marked online. 925 + */ 926 + node_set_offline(0); 940 927 941 928 if (parse_numa_properties()) 942 929 setup_nonnuma(); ··· 965 932 966 933 reset_numa_cpu_lookup_table(); 967 934 968 - for_each_present_cpu(cpu) 935 + for_each_possible_cpu(cpu) { 936 + /* 937 + * Powerpc with CONFIG_NUMA always used to have a node 0, 938 + * even if it was memoryless or cpuless. For all cpus that 939 + * are possible but not present, cpu_to_node() would point 940 + * to node 0. To remove a cpuless, memoryless dummy node, 941 + * powerpc need to make sure all possible but not present 942 + * cpu_to_node are set to a proper node. 943 + */ 969 944 numa_setup_cpu(cpu); 945 + } 970 946 } 971 947 972 948 void __init initmem_init(void) ··· 1240 1198 pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__, 1241 1199 cpu, new_nid); 1242 1200 return new_nid; 1201 + } 1202 + 1203 + int cpu_to_coregroup_id(int cpu) 1204 + { 1205 + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1206 + int index; 1207 + 1208 + if (cpu < 0 || cpu > nr_cpu_ids) 1209 + return -1; 1210 + 1211 + if (!coregroup_enabled) 1212 + goto out; 1213 + 1214 + if (!firmware_has_feature(FW_FEATURE_VPHN)) 1215 + goto out; 1216 + 1217 + if (vphn_get_associativity(cpu, associativity)) 1218 + goto out; 1219 + 1220 + index = of_read_number(associativity, 1); 1221 + if (index > min_common_depth + 1) 1222 + return of_read_number(&associativity[index - 1], 1); 1223 + 1224 + out: 1225 + return cpu_to_core_id(cpu); 1243 1226 } 1244 1227 1245 1228 static int topology_update_init(void)

+4 -2

arch/powerpc/mm/pgtable.c

··· 263 263 pmd_t *pmd = pmd_off(mm, addr); 264 264 pte_basic_t val; 265 265 pte_basic_t *entry = &ptep->pte; 266 - int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; 267 - int i; 266 + int num, i; 268 267 269 268 /* 270 269 * Make sure hardware valid bit is not set. We don't do ··· 274 275 pte = set_pte_filter(pte); 275 276 276 277 val = pte_val(pte); 278 + 279 + num = number_of_cells_per_pte(pmd, val, 1); 280 + 277 281 for (i = 0; i < num; i++, entry++, val += SZ_4K) 278 282 *entry = val; 279 283 }

+5

arch/powerpc/mm/ptdump/8xx.c

··· 11 11 12 12 static const struct flag_info flag_array[] = { 13 13 { 14 + #ifdef CONFIG_PPC_16K_PAGES 14 15 .mask = _PAGE_HUGE, 15 16 .val = _PAGE_HUGE, 17 + #else 18 + .mask = _PAGE_SPS, 19 + .val = _PAGE_SPS, 20 + #endif 16 21 .set = "huge", 17 22 .clear = " ", 18 23 }, {

-59

arch/powerpc/mm/ptdump/bats.c

··· 12 12 13 13 #include "ptdump.h" 14 14 15 - static char *pp_601(int k, int pp) 16 - { 17 - if (pp == 0) 18 - return k ? " " : "rwx"; 19 - if (pp == 1) 20 - return k ? "r x" : "rwx"; 21 - if (pp == 2) 22 - return "rwx"; 23 - return "r x"; 24 - } 25 - 26 - static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) 27 - { 28 - u32 blpi = upper & 0xfffe0000; 29 - u32 k = (upper >> 2) & 3; 30 - u32 pp = upper & 3; 31 - phys_addr_t pbn = PHYS_BAT_ADDR(lower); 32 - u32 bsm = lower & 0x3ff; 33 - u32 size = (bsm + 1) << 17; 34 - 35 - seq_printf(m, "%d: ", idx); 36 - if (!(lower & 0x40)) { 37 - seq_puts(m, " -\n"); 38 - return; 39 - } 40 - 41 - seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1); 42 - #ifdef CONFIG_PHYS_64BIT 43 - seq_printf(m, "0x%016llx ", pbn); 44 - #else 45 - seq_printf(m, "0x%08x ", pbn); 46 - #endif 47 - pt_dump_size(m, size); 48 - 49 - seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); 50 - 51 - seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); 52 - seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); 53 - seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); 54 - seq_puts(m, "\n"); 55 - } 56 - 57 - #define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u)) 58 - 59 - static int bats_show_601(struct seq_file *m, void *v) 60 - { 61 - seq_puts(m, "---[ Block Address Translation ]---\n"); 62 - 63 - BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U); 64 - BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U); 65 - BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U); 66 - BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U); 67 - 68 - return 0; 69 - } 70 - 71 15 static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d) 72 16 { 73 17 u32 bepi = upper & 0xfffe0000; ··· 90 146 91 147 static int bats_open(struct inode *inode, struct file *file) 92 148 { 93 - if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) 94 - return single_open(file, bats_show_601, NULL); 95 - 96 149 return single_open(file, bats_show_603, NULL); 97 150 } 98 151

+1 -1

arch/powerpc/oprofile/cell/spu_task_sync.c

··· 572 572 * samples are recorded. 573 573 * No big deal -- so we just drop a few samples. 574 574 */ 575 - pr_debug("SPU_PROF: No cached SPU contex " 575 + pr_debug("SPU_PROF: No cached SPU context " 576 576 "for SPU #%d. Dropping samples.\n", spu_num); 577 577 goto out; 578 578 }

+3 -3

arch/powerpc/perf/hv-gpci-requests.h

··· 95 95 96 96 #define REQUEST_NAME system_performance_capabilities 97 97 #define REQUEST_NUM 0x40 98 - #define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" 98 + #define REQUEST_IDX_KIND "starting_index=0xffffffff" 99 99 #include I(REQUEST_BEGIN) 100 100 REQUEST(__field(0, 1, perf_collect_privileged) 101 101 __field(0x1, 1, capability_mask) ··· 223 223 224 224 #define REQUEST_NAME system_hypervisor_times 225 225 #define REQUEST_NUM 0xF0 226 - #define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" 226 + #define REQUEST_IDX_KIND "starting_index=0xffffffff" 227 227 #include I(REQUEST_BEGIN) 228 228 REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) 229 229 __count(0x8, 8, time_spent_processing_virtual_processor_timers) ··· 234 234 235 235 #define REQUEST_NAME system_tlbie_count_and_time 236 236 #define REQUEST_NUM 0xF4 237 - #define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" 237 + #define REQUEST_IDX_KIND "starting_index=0xffffffff" 238 238 #include I(REQUEST_BEGIN) 239 239 REQUEST(__count(0, 8, tlbie_instructions_issued) 240 240 /*

+64 -9

arch/powerpc/perf/hv-gpci.c

··· 48 48 /* u32, byte offset */ 49 49 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); 50 50 51 + static cpumask_t hv_gpci_cpumask; 52 + 51 53 static struct attribute *format_attrs[] = { 52 54 &format_attr_request.attr, 53 55 &format_attr_starting_index.attr, ··· 96 94 return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); 97 95 } 98 96 97 + static ssize_t cpumask_show(struct device *dev, 98 + struct device_attribute *attr, char *buf) 99 + { 100 + return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); 101 + } 102 + 99 103 static DEVICE_ATTR_RO(kernel_version); 104 + static DEVICE_ATTR_RO(cpumask); 105 + 100 106 HV_CAPS_ATTR(version, "0x%x\n"); 101 107 HV_CAPS_ATTR(ga, "%d\n"); 102 108 HV_CAPS_ATTR(expanded, "%d\n"); ··· 121 111 NULL, 122 112 }; 123 113 114 + static struct attribute *cpumask_attrs[] = { 115 + &dev_attr_cpumask.attr, 116 + NULL, 117 + }; 118 + 119 + static struct attribute_group cpumask_attr_group = { 120 + .attrs = cpumask_attrs, 121 + }; 122 + 124 123 static struct attribute_group interface_group = { 125 124 .name = "interface", 126 125 .attrs = interface_attrs, ··· 139 120 &format_group, 140 121 &event_group, 141 122 &interface_group, 123 + &cpumask_attr_group, 142 124 NULL, 143 125 }; 144 126 145 - #define HGPCI_REQ_BUFFER_SIZE 4096 146 - #define HGPCI_MAX_DATA_BYTES \ 147 - (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) 148 - 149 127 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); 150 - 151 - struct hv_gpci_request_buffer { 152 - struct hv_get_perf_counter_info_params params; 153 - uint8_t bytes[HGPCI_MAX_DATA_BYTES]; 154 - } __packed; 155 128 156 129 static unsigned long single_gpci_request(u32 req, u32 starting_index, 157 130 u16 secondary_index, u8 version_in, u32 offset, u8 length, ··· 286 275 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 287 276 }; 288 277 278 + static int ppc_hv_gpci_cpu_online(unsigned int cpu) 279 + { 280 + if (cpumask_empty(&hv_gpci_cpumask)) 281 + cpumask_set_cpu(cpu, &hv_gpci_cpumask); 282 + 283 + return 0; 284 + } 285 + 286 + static int ppc_hv_gpci_cpu_offline(unsigned int cpu) 287 + { 288 + int target; 289 + 290 + /* Check if exiting cpu is used for collecting gpci events */ 291 + if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) 292 + return 0; 293 + 294 + /* Find a new cpu to collect gpci events */ 295 + target = cpumask_last(cpu_active_mask); 296 + 297 + if (target < 0 || target >= nr_cpu_ids) { 298 + pr_err("hv_gpci: CPU hotplug init failed\n"); 299 + return -1; 300 + } 301 + 302 + /* Migrate gpci events to the new target */ 303 + cpumask_set_cpu(target, &hv_gpci_cpumask); 304 + perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); 305 + 306 + return 0; 307 + } 308 + 309 + static int hv_gpci_cpu_hotplug_init(void) 310 + { 311 + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, 312 + "perf/powerpc/hv_gcpi:online", 313 + ppc_hv_gpci_cpu_online, 314 + ppc_hv_gpci_cpu_offline); 315 + } 316 + 289 317 static int hv_gpci_init(void) 290 318 { 291 319 int r; ··· 344 294 hret); 345 295 return -ENODEV; 346 296 } 297 + 298 + /* init cpuhotplug */ 299 + r = hv_gpci_cpu_hotplug_init(); 300 + if (r) 301 + return r; 347 302 348 303 /* sampling not supported */ 349 304 h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;

-27

arch/powerpc/perf/hv-gpci.h

··· 2 2 #ifndef LINUX_POWERPC_PERF_HV_GPCI_H_ 3 3 #define LINUX_POWERPC_PERF_HV_GPCI_H_ 4 4 5 - #include <linux/types.h> 6 - 7 - /* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */ 8 - 9 - /* H_GET_PERF_COUNTER_INFO argument */ 10 - struct hv_get_perf_counter_info_params { 11 - __be32 counter_request; /* I */ 12 - __be32 starting_index; /* IO */ 13 - __be16 secondary_index; /* IO */ 14 - __be16 returned_values; /* O */ 15 - __be32 detail_rc; /* O, only needed when called via *_norets() */ 16 - 17 - /* 18 - * O, size each of counter_value element in bytes, only set for version 19 - * >= 0x3 20 - */ 21 - __be16 cv_element_size; 22 - 23 - /* I, 0 (zero) for versions < 0x3 */ 24 - __u8 counter_info_version_in; 25 - 26 - /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */ 27 - __u8 counter_info_version_out; 28 - __u8 reserved[0xC]; 29 - __u8 counter_value[]; 30 - } __packed; 31 - 32 5 /* 33 6 * counter info version => fw version/reference (spec version) 34 7 *

-3

arch/powerpc/perf/imc-pmu.c

··· 1426 1426 1427 1427 static int trace_imc_event_init(struct perf_event *event) 1428 1428 { 1429 - struct task_struct *target; 1430 - 1431 1429 if (event->attr.type != event->pmu->type) 1432 1430 return -ENOENT; 1433 1431 ··· 1456 1458 mutex_unlock(&imc_global_refc.lock); 1457 1459 1458 1460 event->hw.idx = -1; 1459 - target = event->hw.target; 1460 1461 1461 1462 event->pmu->task_ctx_nr = perf_hw_context; 1462 1463 event->destroy = reset_global_refc;

+10

arch/powerpc/perf/isa207-common.c

··· 288 288 289 289 mask |= CNST_PMC_MASK(pmc); 290 290 value |= CNST_PMC_VAL(pmc); 291 + 292 + /* 293 + * PMC5 and PMC6 are used to count cycles and instructions and 294 + * they do not support most of the constraint bits. Add a check 295 + * to exclude PMC5/6 from most of the constraints except for 296 + * EBB/BHRB. 297 + */ 298 + if (pmc >= 5) 299 + goto ebb_bhrb; 291 300 } 292 301 293 302 if (pmc <= 4) { ··· 366 357 } 367 358 } 368 359 360 + ebb_bhrb: 369 361 if (!pmc && ebb) 370 362 /* EBB events must specify the PMC */ 371 363 return -1;

+2

arch/powerpc/perf/isa207-common.h

··· 13 13 #include <asm/firmware.h> 14 14 #include <asm/cputable.h> 15 15 16 + #include "internal.h" 17 + 16 18 #define EVENT_EBB_MASK 1ull 17 19 #define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT 18 20 #define EVENT_BHRB_MASK 1ull

-1

arch/powerpc/perf/power10-pmu.c

··· 9 9 #define pr_fmt(fmt) "power10-pmu: " fmt 10 10 11 11 #include "isa207-common.h" 12 - #include "internal.h" 13 12 14 13 /* 15 14 * Raw event encoding for Power10:

+2

arch/powerpc/perf/power5+-pmu.c

··· 10 10 #include <asm/reg.h> 11 11 #include <asm/cputable.h> 12 12 13 + #include "internal.h" 14 + 13 15 /* 14 16 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) 15 17 */

+2

arch/powerpc/perf/power5-pmu.c

··· 10 10 #include <asm/reg.h> 11 11 #include <asm/cputable.h> 12 12 13 + #include "internal.h" 14 + 13 15 /* 14 16 * Bits in event code for POWER5 (not POWER5++) 15 17 */

+2

arch/powerpc/perf/power6-pmu.c

··· 10 10 #include <asm/reg.h> 11 11 #include <asm/cputable.h> 12 12 13 + #include "internal.h" 14 + 13 15 /* 14 16 * Bits in event code for POWER6 15 17 */

+2

arch/powerpc/perf/power7-pmu.c

··· 10 10 #include <asm/reg.h> 11 11 #include <asm/cputable.h> 12 12 13 + #include "internal.h" 14 + 13 15 /* 14 16 * Bits in event code for POWER7 15 17 */

+2

arch/powerpc/perf/ppc970-pmu.c

··· 9 9 #include <asm/reg.h> 10 10 #include <asm/cputable.h> 11 11 12 + #include "internal.h" 13 + 12 14 /* 13 15 * Bits in event code for PPC970 14 16 */

+1

arch/powerpc/platforms/44x/machine_check.c

··· 7 7 #include <linux/ptrace.h> 8 8 9 9 #include <asm/reg.h> 10 + #include <asm/cacheflush.h> 10 11 11 12 int machine_check_440A(struct pt_regs *regs) 12 13 {

+2 -3

arch/powerpc/platforms/44x/ppc476.c

··· 86 86 avr_halt_system(AVR_PWRCTL_RESET); 87 87 } 88 88 89 - static int avr_probe(struct i2c_client *client, 90 - const struct i2c_device_id *id) 89 + static int avr_probe(struct i2c_client *client) 91 90 { 92 91 avr_i2c_client = client; 93 92 ppc_md.restart = avr_reset_system; ··· 103 104 .driver = { 104 105 .name = "akebono-avr", 105 106 }, 106 - .probe = avr_probe, 107 + .probe_new = avr_probe, 107 108 .id_table = avr_id, 108 109 }; 109 110

+2 -2

arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c

··· 142 142 return 0; 143 143 } 144 144 145 - static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id) 145 + static int mcu_probe(struct i2c_client *client) 146 146 { 147 147 struct mcu *mcu; 148 148 int ret; ··· 221 221 .name = "mcu-mpc8349emitx", 222 222 .of_match_table = mcu_of_match_table, 223 223 }, 224 - .probe = mcu_probe, 224 + .probe_new = mcu_probe, 225 225 .remove = mcu_remove, 226 226 .id_table = mcu_ids, 227 227 };

+2 -2

arch/powerpc/platforms/85xx/smp.c

··· 112 112 local_irq_restore(flags); 113 113 } 114 114 115 - static void smp_85xx_mach_cpu_die(void) 115 + static void smp_85xx_cpu_offline_self(void) 116 116 { 117 117 unsigned int cpu = smp_processor_id(); 118 118 ··· 506 506 if (qoriq_pm_ops) { 507 507 smp_85xx_ops.give_timebase = mpc85xx_give_timebase; 508 508 smp_85xx_ops.take_timebase = mpc85xx_take_timebase; 509 - ppc_md.cpu_die = smp_85xx_mach_cpu_die; 509 + smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; 510 510 smp_85xx_ops.cpu_die = qoriq_cpu_kill; 511 511 } 512 512 #endif

+4 -25

arch/powerpc/platforms/Kconfig

··· 199 199 200 200 endmenu 201 201 202 - config PPC601_SYNC_FIX 203 - bool "Workarounds for PPC601 bugs" 204 - depends on PPC_BOOK3S_601 && PPC_PMAC 205 - default y 206 - help 207 - Some versions of the PPC601 (the first PowerPC chip) have bugs which 208 - mean that extra synchronization instructions are required near 209 - certain instructions, typically those that make major changes to the 210 - CPU state. These extra instructions reduce performance slightly. 211 - If you say N here, these extra instructions will not be included, 212 - resulting in a kernel which will run faster but may not run at all 213 - on some systems with the PPC601 chip. 214 - 215 - If in doubt, say Y here. 216 - 217 202 config TAU 218 203 bool "On-chip CPU temperature sensor support" 219 204 depends on PPC_BOOK3S_32 ··· 208 223 temperature within 2-4 degrees Celsius. This option shows the current 209 224 on-die temperature in /proc/cpuinfo if the cpu supports it. 210 225 211 - Unfortunately, on some chip revisions, this sensor is very inaccurate 212 - and in many cases, does not work at all, so don't assume the cpu 213 - temp is actually what /proc/cpuinfo says it is. 226 + Unfortunately, this sensor is very inaccurate when uncalibrated, so 227 + don't assume the cpu temp is actually what /proc/cpuinfo says it is. 214 228 215 229 config TAU_INT 216 - bool "Interrupt driven TAU driver (DANGEROUS)" 230 + bool "Interrupt driven TAU driver (EXPERIMENTAL)" 217 231 depends on TAU 218 232 help 219 233 The TAU supports an interrupt driven mode which causes an interrupt ··· 220 236 to get notified the temp has exceeded a range. With this option off, 221 237 a timer is used to re-check the temperature periodically. 222 238 223 - However, on some cpus it appears that the TAU interrupt hardware 224 - is buggy and can cause a situation which would lead unexplained hard 225 - lockups. 226 - 227 - Unless you are extending the TAU driver, or enjoy kernel/hardware 228 - debugging, leave this option off. 239 + If in doubt, say N here. 229 240 230 241 config TAU_AVERAGE 231 242 bool "Average high and low temp"

+5 -13

arch/powerpc/platforms/Kconfig.cputype

··· 20 20 depends on PPC32 21 21 help 22 22 There are five families of 32 bit PowerPC chips supported. 23 - The most common ones are the desktop and server CPUs (601, 603, 23 + The most common ones are the desktop and server CPUs (603, 24 24 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their 25 25 embedded 512x/52xx/82xx/83xx/86xx counterparts. 26 26 The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500 ··· 30 30 If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. 31 31 32 32 config PPC_BOOK3S_6xx 33 - bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601" 33 + bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" 34 34 select PPC_BOOK3S_32 35 35 select PPC_FPU 36 36 select PPC_HAVE_PMU_SUPPORT 37 37 select PPC_HAVE_KUEP 38 38 select PPC_HAVE_KUAP 39 39 select HAVE_ARCH_VMAP_STACK if !ADB_PMU 40 - 41 - config PPC_BOOK3S_601 42 - bool "PowerPC 601" 43 - select PPC_BOOK3S_32 44 - select PPC_FPU 45 - select PPC_HAVE_KUAP 46 - select HAVE_ARCH_VMAP_STACK 47 40 48 41 config PPC_85xx 49 42 bool "Freescale 85xx" ··· 483 490 484 491 config VDSO32 485 492 def_bool y 486 - depends on PPC32 || CPU_BIG_ENDIAN 493 + depends on PPC32 || COMPAT 487 494 help 488 495 This symbol controls whether we build the 32-bit VDSO. We obviously 489 496 want to do that if we're building a 32-bit kernel. If we're building 490 - a 64-bit kernel then we only want a 32-bit VDSO if we're building for 491 - big endian. That is because the only little endian configuration we 492 - support is ppc64le which is 64-bit only. 497 + a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling 498 + COMPAT. 493 499 494 500 choice 495 501 prompt "Endianness selection"

+2 -1

arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c

··· 147 147 local_irq_disable(); 148 148 149 149 /* Set exception prefix high - to the firmware */ 150 - _nmask_and_or_msr(0, MSR_IP); 150 + mtmsr(mfmsr() | MSR_IP); 151 + isync(); 151 152 152 153 for (;;) ; /* Spin until reset happens */ 153 154 }

+2 -1

arch/powerpc/platforms/embedded6xx/storcenter.c

··· 101 101 local_irq_disable(); 102 102 103 103 /* Set exception prefix high - to the firmware */ 104 - _nmask_and_or_msr(0, MSR_IP); 104 + mtmsr(mfmsr() | MSR_IP); 105 + isync(); 105 106 106 107 /* Wait for reset to happen */ 107 108 for (;;) ;

+1 -1

arch/powerpc/platforms/powermac/pmac.h

··· 34 34 35 35 extern void pmac_setup_smp(void); 36 36 extern int psurge_secondary_virq; 37 - extern void low_cpu_die(void) __attribute__((noreturn)); 37 + extern void low_cpu_offline_self(void) __attribute__((noreturn)); 38 38 39 39 extern int pmac_nvram_init(void); 40 40 extern void pmac_pic_init(void);

+1 -1

arch/powerpc/platforms/powermac/setup.c

··· 284 284 /* 604, G3, G4 etc. */ 285 285 loops_per_jiffy = *fp / HZ; 286 286 else 287 - /* 601, 603, etc. */ 287 + /* 603, etc. */ 288 288 loops_per_jiffy = *fp / (2 * HZ); 289 289 of_node_put(cpu); 290 290 break;

+4 -11

arch/powerpc/platforms/powermac/sleep.S

··· 201 201 addi r3,r3,sleep_storage@l 202 202 stw r5,0(r3) 203 203 204 - .globl low_cpu_die 205 - low_cpu_die: 204 + .globl low_cpu_offline_self 205 + low_cpu_offline_self: 206 206 /* Flush & disable all caches */ 207 207 bl flush_disable_caches 208 208 ··· 244 244 mtmsr r2 245 245 isync 246 246 b 1b 247 - _ASM_NOKPROBE_SYMBOL(low_cpu_die) 247 + _ASM_NOKPROBE_SYMBOL(low_cpu_offline_self) 248 248 /* 249 249 * Here is the resume code. 250 250 */ ··· 294 294 * we do any r1 memory access as we are not sure they 295 295 * are in a sane state above the first 256Mb region 296 296 */ 297 - li r0,16 /* load up segment register values */ 298 - mtctr r0 /* for context 0 */ 299 - lis r3,0x2000 /* Ku = 1, VSID = 0 */ 300 - li r4,0 301 - 3: mtsrin r3,r4 302 - addi r3,r3,0x111 /* increment VSID */ 303 - addis r4,r4,0x1000 /* address of next segment */ 304 - bdnz 3b 297 + bl load_segment_registers 305 298 sync 306 299 isync 307 300

+4 -8

arch/powerpc/platforms/powermac/smp.c

··· 270 270 int i, ncpus; 271 271 struct device_node *dn; 272 272 273 - /* We don't do SMP on the PPC601 -- paulus */ 274 - if (PVR_VER(mfspr(SPRN_PVR)) == 1) 275 - return; 276 - 277 273 /* 278 274 * The powersurge cpu board can be used in the generation 279 275 * of powermacs that have a socket for an upgradeable cpu card, ··· 916 920 917 921 #ifdef CONFIG_PPC32 918 922 919 - static void pmac_cpu_die(void) 923 + static void pmac_cpu_offline_self(void) 920 924 { 921 925 int cpu = smp_processor_id(); 922 926 ··· 926 930 generic_set_cpu_dead(cpu); 927 931 smp_wmb(); 928 932 mb(); 929 - low_cpu_die(); 933 + low_cpu_offline_self(); 930 934 } 931 935 932 936 #else /* CONFIG_PPC32 */ 933 937 934 - static void pmac_cpu_die(void) 938 + static void pmac_cpu_offline_self(void) 935 939 { 936 940 int cpu = smp_processor_id(); 937 941 ··· 1016 1020 #endif /* CONFIG_PPC_PMAC32_PSURGE */ 1017 1021 1018 1022 #ifdef CONFIG_HOTPLUG_CPU 1019 - ppc_md.cpu_die = pmac_cpu_die; 1023 + smp_ops->cpu_offline_self = pmac_cpu_offline_self; 1020 1024 #endif 1021 1025 } 1022 1026

+42 -56

arch/powerpc/platforms/powernv/eeh-powernv.c

··· 38 38 39 39 static int eeh_event_irq = -EINVAL; 40 40 41 - void pnv_pcibios_bus_add_device(struct pci_dev *pdev) 41 + static void pnv_pcibios_bus_add_device(struct pci_dev *pdev) 42 42 { 43 43 dev_dbg(&pdev->dev, "EEH: Setting up device\n"); 44 44 eeh_probe_device(pdev); 45 - } 46 - 47 - static int pnv_eeh_init(void) 48 - { 49 - struct pci_controller *hose; 50 - struct pnv_phb *phb; 51 - int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; 52 - 53 - if (!firmware_has_feature(FW_FEATURE_OPAL)) { 54 - pr_warn("%s: OPAL is required !\n", 55 - __func__); 56 - return -EINVAL; 57 - } 58 - 59 - /* Set probe mode */ 60 - eeh_add_flag(EEH_PROBE_MODE_DEV); 61 - 62 - /* 63 - * P7IOC blocks PCI config access to frozen PE, but PHB3 64 - * doesn't do that. So we have to selectively enable I/O 65 - * prior to collecting error log. 66 - */ 67 - list_for_each_entry(hose, &hose_list, list_node) { 68 - phb = hose->private_data; 69 - 70 - if (phb->model == PNV_PHB_MODEL_P7IOC) 71 - eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); 72 - 73 - if (phb->diag_data_size > max_diag_size) 74 - max_diag_size = phb->diag_data_size; 75 - 76 - /* 77 - * PE#0 should be regarded as valid by EEH core 78 - * if it's not the reserved one. Currently, we 79 - * have the reserved PE#255 and PE#127 for PHB3 80 - * and P7IOC separately. So we should regard 81 - * PE#0 as valid for PHB3 and P7IOC. 82 - */ 83 - if (phb->ioda.reserved_pe_idx != 0) 84 - eeh_add_flag(EEH_VALID_PE_ZERO); 85 - 86 - break; 87 - } 88 - 89 - eeh_set_pe_aux_size(max_diag_size); 90 - ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; 91 - 92 - return 0; 93 45 } 94 46 95 47 static irqreturn_t pnv_eeh_event(int irq, void *data) ··· 87 135 return -EINVAL; 88 136 89 137 /* Retrieve PE */ 90 - pe = eeh_pe_get(hose, pe_no, 0); 138 + pe = eeh_pe_get(hose, pe_no); 91 139 if (!pe) 92 140 return -ENODEV; 93 141 ··· 142 190 143 191 #endif /* CONFIG_DEBUG_FS */ 144 192 145 - void pnv_eeh_enable_phbs(void) 193 + static void pnv_eeh_enable_phbs(void) 146 194 { 147 195 struct pci_controller *hose; 148 196 struct pnv_phb *phb; ··· 306 354 if (parent) { 307 355 struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent); 308 356 309 - return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0); 357 + return eeh_pe_get(phb->hose, ioda_pe->pe_number); 310 358 } 311 359 312 360 return NULL; ··· 1358 1406 } 1359 1407 1360 1408 /* Find the PE according to PE# */ 1361 - dev_pe = eeh_pe_get(hose, pe_no, 0); 1409 + dev_pe = eeh_pe_get(hose, pe_no); 1362 1410 if (!dev_pe) 1363 1411 return -EEXIST; 1364 1412 ··· 1626 1674 1627 1675 static struct eeh_ops pnv_eeh_ops = { 1628 1676 .name = "powernv", 1629 - .init = pnv_eeh_init, 1630 1677 .probe = pnv_eeh_probe, 1631 1678 .set_option = pnv_eeh_set_option, 1632 1679 .get_state = pnv_eeh_get_state, ··· 1666 1715 */ 1667 1716 static int __init eeh_powernv_init(void) 1668 1717 { 1718 + int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; 1719 + struct pci_controller *hose; 1720 + struct pnv_phb *phb; 1669 1721 int ret = -EINVAL; 1670 1722 1671 - ret = eeh_ops_register(&pnv_eeh_ops); 1723 + if (!firmware_has_feature(FW_FEATURE_OPAL)) { 1724 + pr_warn("%s: OPAL is required !\n", __func__); 1725 + return -EINVAL; 1726 + } 1727 + 1728 + /* Set probe mode */ 1729 + eeh_add_flag(EEH_PROBE_MODE_DEV); 1730 + 1731 + /* 1732 + * P7IOC blocks PCI config access to frozen PE, but PHB3 1733 + * doesn't do that. So we have to selectively enable I/O 1734 + * prior to collecting error log. 1735 + */ 1736 + list_for_each_entry(hose, &hose_list, list_node) { 1737 + phb = hose->private_data; 1738 + 1739 + if (phb->model == PNV_PHB_MODEL_P7IOC) 1740 + eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); 1741 + 1742 + if (phb->diag_data_size > max_diag_size) 1743 + max_diag_size = phb->diag_data_size; 1744 + 1745 + break; 1746 + } 1747 + 1748 + /* 1749 + * eeh_init() allocates the eeh_pe and its aux data buf so the 1750 + * size needs to be set before calling eeh_init(). 1751 + */ 1752 + eeh_set_pe_aux_size(max_diag_size); 1753 + ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; 1754 + 1755 + ret = eeh_init(&pnv_eeh_ops); 1672 1756 if (!ret) 1673 1757 pr_info("EEH: PowerNV platform initialized\n"); 1674 1758 else ··· 1711 1725 1712 1726 return ret; 1713 1727 } 1714 - machine_early_initcall(powernv, eeh_powernv_init); 1728 + machine_arch_initcall(powernv, eeh_powernv_init);

+209 -93

arch/powerpc/platforms/powernv/idle.c

··· 565 565 irq_set_pending_from_srr1(srr1); 566 566 } 567 567 568 - void power7_idle(void) 568 + static void power7_idle(void) 569 569 { 570 570 if (!powersave_nap) 571 571 return; ··· 659 659 mmcr0 = mfspr(SPRN_MMCR0); 660 660 } 661 661 662 - if (cpu_has_feature(CPU_FTR_ARCH_31)) { 663 - /* 664 - * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit. 665 - * If the user hasn't asked for the BHRB to be 666 - * written, the value of MMCRA[BHRBRD] is 1. 667 - * On wakeup from stop, MMCRA[BHRBD] will be 0, 668 - * since it is previleged resource and will be lost. 669 - * Thus, if we do not save and restore the MMCRA[BHRBD], 670 - * hardware will be needlessly writing to the BHRB 671 - * in problem mode. 672 - */ 673 - mmcra = mfspr(SPRN_MMCRA); 674 - } 675 - 676 662 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 677 663 sprs.lpcr = mfspr(SPRN_LPCR); 678 664 sprs.hfscr = mfspr(SPRN_HFSCR); ··· 720 734 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); 721 735 mtspr(SPRN_MMCR0, mmcr0); 722 736 } 723 - 724 - /* Reload MMCRA to restore BHRB disable bit for POWER10 */ 725 - if (cpu_has_feature(CPU_FTR_ARCH_31)) 726 - mtspr(SPRN_MMCRA, mmcra); 727 737 728 738 /* 729 739 * DD2.2 and earlier need to set then clear bit 60 in MMCRA ··· 805 823 return srr1; 806 824 } 807 825 808 - #ifdef CONFIG_HOTPLUG_CPU 809 - static unsigned long power9_offline_stop(unsigned long psscr) 810 - { 811 - unsigned long srr1; 812 - 813 - #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 814 - __ppc64_runlatch_off(); 815 - srr1 = power9_idle_stop(psscr, true); 816 - __ppc64_runlatch_on(); 817 - #else 818 - /* 819 - * Tell KVM we're entering idle. 820 - * This does not have to be done in real mode because the P9 MMU 821 - * is independent per-thread. Some steppings share radix/hash mode 822 - * between threads, but in that case KVM has a barrier sync in real 823 - * mode before and after switching between radix and hash. 824 - * 825 - * kvm_start_guest must still be called in real mode though, hence 826 - * the false argument. 827 - */ 828 - local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 829 - 830 - __ppc64_runlatch_off(); 831 - srr1 = power9_idle_stop(psscr, false); 832 - __ppc64_runlatch_on(); 833 - 834 - local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 835 - /* Order setting hwthread_state vs. testing hwthread_req */ 836 - smp_mb(); 837 - if (local_paca->kvm_hstate.hwthread_req) 838 - srr1 = idle_kvm_start_guest(srr1); 839 - mtmsr(MSR_KERNEL); 840 - #endif 841 - 842 - return srr1; 843 - } 844 - #endif 845 - 846 - void power9_idle_type(unsigned long stop_psscr_val, 847 - unsigned long stop_psscr_mask) 848 - { 849 - unsigned long psscr; 850 - unsigned long srr1; 851 - 852 - if (!prep_irq_for_idle_irqsoff()) 853 - return; 854 - 855 - psscr = mfspr(SPRN_PSSCR); 856 - psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 857 - 858 - __ppc64_runlatch_off(); 859 - srr1 = power9_idle_stop(psscr, true); 860 - __ppc64_runlatch_on(); 861 - 862 - fini_irq_for_idle_irqsoff(); 863 - 864 - irq_set_pending_from_srr1(srr1); 865 - } 866 - 867 - /* 868 - * Used for ppc_md.power_save which needs a function with no parameters 869 - */ 870 - void power9_idle(void) 871 - { 872 - power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 873 - } 874 - 875 826 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 876 827 /* 877 828 * This is used in working around bugs in thread reconfiguration ··· 877 962 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 878 963 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 879 964 965 + struct p10_sprs { 966 + /* 967 + * SPRs that get lost in shallow states: 968 + * 969 + * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 970 + * isa300 idle routines restore CR, LR. 971 + * CTR is volatile 972 + * idle thread doesn't use FP or VEC 973 + * kernel doesn't use TAR 974 + * HSPRG1 is only live in HV interrupt entry 975 + * SPRG2 is only live in KVM guests, KVM handles it. 976 + */ 977 + }; 978 + 979 + static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on) 980 + { 981 + int cpu = raw_smp_processor_id(); 982 + int first = cpu_first_thread_sibling(cpu); 983 + unsigned long *state = &paca_ptrs[first]->idle_state; 984 + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 985 + unsigned long srr1; 986 + unsigned long pls; 987 + // struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ 988 + bool sprs_saved = false; 989 + 990 + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 991 + /* EC=ESL=0 case */ 992 + 993 + BUG_ON(!mmu_on); 994 + 995 + /* 996 + * Wake synchronously. SRESET via xscom may still cause 997 + * a 0x100 powersave wakeup with SRR1 reason! 998 + */ 999 + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 1000 + if (likely(!srr1)) 1001 + return 0; 1002 + 1003 + /* 1004 + * Registers not saved, can't recover! 1005 + * This would be a hardware bug 1006 + */ 1007 + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 1008 + 1009 + goto out; 1010 + } 1011 + 1012 + /* EC=ESL=1 case */ 1013 + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 1014 + /* XXX: save SPRs for deep state loss here. */ 1015 + 1016 + sprs_saved = true; 1017 + 1018 + atomic_start_thread_idle(); 1019 + } 1020 + 1021 + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 1022 + 1023 + psscr = mfspr(SPRN_PSSCR); 1024 + 1025 + WARN_ON_ONCE(!srr1); 1026 + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 1027 + 1028 + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 1029 + hmi_exception_realmode(NULL); 1030 + 1031 + /* 1032 + * On POWER10, SRR1 bits do not match exactly as expected. 1033 + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 1034 + * just always test PSSCR for SPR/TB state loss. 1035 + */ 1036 + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 1037 + if (likely(pls < deep_spr_loss_state)) { 1038 + if (sprs_saved) 1039 + atomic_stop_thread_idle(); 1040 + goto out; 1041 + } 1042 + 1043 + /* HV state loss */ 1044 + BUG_ON(!sprs_saved); 1045 + 1046 + atomic_lock_thread_idle(); 1047 + 1048 + if ((*state & core_thread_mask) != 0) 1049 + goto core_woken; 1050 + 1051 + /* XXX: restore per-core SPRs here */ 1052 + 1053 + if (pls >= pnv_first_tb_loss_level) { 1054 + /* TB loss */ 1055 + if (opal_resync_timebase() != OPAL_SUCCESS) 1056 + BUG(); 1057 + } 1058 + 1059 + /* 1060 + * isync after restoring shared SPRs and before unlocking. Unlock 1061 + * only contains hwsync which does not necessarily do the right 1062 + * thing for SPRs. 1063 + */ 1064 + isync(); 1065 + 1066 + core_woken: 1067 + atomic_unlock_and_stop_thread_idle(); 1068 + 1069 + /* XXX: restore per-thread SPRs here */ 1070 + 1071 + if (!radix_enabled()) 1072 + __slb_restore_bolted_realmode(); 1073 + 1074 + out: 1075 + if (mmu_on) 1076 + mtmsr(MSR_KERNEL); 1077 + 1078 + return srr1; 1079 + } 1080 + 1081 + #ifdef CONFIG_HOTPLUG_CPU 1082 + static unsigned long arch300_offline_stop(unsigned long psscr) 1083 + { 1084 + unsigned long srr1; 1085 + 1086 + #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1087 + __ppc64_runlatch_off(); 1088 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 1089 + srr1 = power10_idle_stop(psscr, true); 1090 + else 1091 + srr1 = power9_idle_stop(psscr, true); 1092 + __ppc64_runlatch_on(); 1093 + #else 1094 + /* 1095 + * Tell KVM we're entering idle. 1096 + * This does not have to be done in real mode because the P9 MMU 1097 + * is independent per-thread. Some steppings share radix/hash mode 1098 + * between threads, but in that case KVM has a barrier sync in real 1099 + * mode before and after switching between radix and hash. 1100 + * 1101 + * kvm_start_guest must still be called in real mode though, hence 1102 + * the false argument. 1103 + */ 1104 + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 1105 + 1106 + __ppc64_runlatch_off(); 1107 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 1108 + srr1 = power10_idle_stop(psscr, false); 1109 + else 1110 + srr1 = power9_idle_stop(psscr, false); 1111 + __ppc64_runlatch_on(); 1112 + 1113 + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 1114 + /* Order setting hwthread_state vs. testing hwthread_req */ 1115 + smp_mb(); 1116 + if (local_paca->kvm_hstate.hwthread_req) 1117 + srr1 = idle_kvm_start_guest(srr1); 1118 + mtmsr(MSR_KERNEL); 1119 + #endif 1120 + 1121 + return srr1; 1122 + } 1123 + #endif 1124 + 1125 + void arch300_idle_type(unsigned long stop_psscr_val, 1126 + unsigned long stop_psscr_mask) 1127 + { 1128 + unsigned long psscr; 1129 + unsigned long srr1; 1130 + 1131 + if (!prep_irq_for_idle_irqsoff()) 1132 + return; 1133 + 1134 + psscr = mfspr(SPRN_PSSCR); 1135 + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 1136 + 1137 + __ppc64_runlatch_off(); 1138 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 1139 + srr1 = power10_idle_stop(psscr, true); 1140 + else 1141 + srr1 = power9_idle_stop(psscr, true); 1142 + __ppc64_runlatch_on(); 1143 + 1144 + fini_irq_for_idle_irqsoff(); 1145 + 1146 + irq_set_pending_from_srr1(srr1); 1147 + } 1148 + 1149 + /* 1150 + * Used for ppc_md.power_save which needs a function with no parameters 1151 + */ 1152 + static void arch300_idle(void) 1153 + { 1154 + arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 1155 + } 1156 + 880 1157 #ifdef CONFIG_HOTPLUG_CPU 881 1158 882 1159 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) ··· 1102 995 psscr = mfspr(SPRN_PSSCR); 1103 996 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 1104 997 pnv_deepest_stop_psscr_val; 1105 - srr1 = power9_offline_stop(psscr); 998 + srr1 = arch300_offline_stop(psscr); 1106 999 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 1107 1000 srr1 = power7_offline(); 1108 1001 } else { ··· 1200 1093 * @dt_idle_states: Number of idle state entries 1201 1094 * Returns 0 on success 1202 1095 */ 1203 - static void __init pnv_power9_idle_init(void) 1096 + static void __init pnv_arch300_idle_init(void) 1204 1097 { 1205 1098 u64 max_residency_ns = 0; 1206 1099 int i; 1100 + 1101 + /* stop is not really architected, we only have p9,p10 drivers */ 1102 + if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) 1103 + return; 1207 1104 1208 1105 /* 1209 1106 * pnv_deepest_stop_{val,mask} should be set to values corresponding to ··· 1222 1111 int err; 1223 1112 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1224 1113 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1114 + 1115 + /* No deep loss driver implemented for POWER10 yet */ 1116 + if (pvr_version_is(PVR_POWER10) && 1117 + state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) 1118 + continue; 1225 1119 1226 1120 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1227 1121 (pnv_first_tb_loss_level > psscr_rl)) ··· 1278 1162 if (unlikely(!default_stop_found)) { 1279 1163 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1280 1164 } else { 1281 - ppc_md.power_save = power9_idle; 1165 + ppc_md.power_save = arch300_idle; 1282 1166 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1283 1167 pnv_default_stop_val, pnv_default_stop_mask); 1284 1168 } ··· 1340 1224 } 1341 1225 1342 1226 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1343 - pnv_power9_idle_init(); 1227 + pnv_arch300_idle_init(); 1344 1228 1345 1229 for (i = 0; i < nr_pnv_idle_states; i++) 1346 1230 supported_cpuidle_states |= pnv_idle_states[i].flags; ··· 1411 1295 for (i = 0; i < nr_idle_states; i++) 1412 1296 pnv_idle_states[i].residency_ns = temp_u32[i]; 1413 1297 1414 - /* For power9 */ 1298 + /* For power9 and later */ 1415 1299 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1416 1300 /* Read pm_crtl_val */ 1417 1301 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", ··· 1474 1358 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1475 1359 /* P7/P8 nap */ 1476 1360 p->thread_idle_state = PNV_THREAD_RUNNING; 1477 - } else { 1478 - /* P9 stop */ 1361 + } else if (pvr_version_is(PVR_POWER9)) { 1362 + /* P9 stop workarounds */ 1479 1363 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1480 1364 p->requested_psscr = 0; 1481 1365 atomic_set(&p->dont_stop, 0);

-30

arch/powerpc/platforms/powernv/ocxl.c

··· 2 2 // Copyright 2017 IBM Corp. 3 3 #include <asm/pnv-ocxl.h> 4 4 #include <asm/opal.h> 5 - #include <asm/xive.h> 6 5 #include <misc/ocxl-config.h> 7 6 #include "pci.h" 8 7 ··· 483 484 return rc; 484 485 } 485 486 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); 486 - 487 - int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) 488 - { 489 - __be64 flags, trigger_page; 490 - s64 rc; 491 - u32 hwirq; 492 - 493 - hwirq = xive_native_alloc_irq(); 494 - if (!hwirq) 495 - return -ENOENT; 496 - 497 - rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, 498 - NULL); 499 - if (rc || !trigger_page) { 500 - xive_native_free_irq(hwirq); 501 - return -ENOENT; 502 - } 503 - *irq = hwirq; 504 - *trigger_addr = be64_to_cpu(trigger_page); 505 - return 0; 506 - 507 - } 508 - EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); 509 - 510 - void pnv_ocxl_free_xive_irq(u32 irq) 511 - { 512 - xive_native_free_irq(irq); 513 - } 514 - EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);

+1 -1

arch/powerpc/platforms/powernv/opal-core.c

··· 510 510 idx = be32_to_cpu(opalc_metadata->region_cnt); 511 511 if (idx > MAX_PT_LOAD_CNT) { 512 512 pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)", 513 - MAX_PT_LOAD_CNT, idx); 513 + idx, MAX_PT_LOAD_CNT); 514 514 idx = MAX_PT_LOAD_CNT; 515 515 } 516 516 for (i = 0; i < idx; i++) {

+26 -7

arch/powerpc/platforms/powernv/opal-elog.c

··· 179 179 return count; 180 180 } 181 181 182 - static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) 182 + static void create_elog_obj(uint64_t id, size_t size, uint64_t type) 183 183 { 184 184 struct elog_obj *elog; 185 185 int rc; 186 186 187 187 elog = kzalloc(sizeof(*elog), GFP_KERNEL); 188 188 if (!elog) 189 - return NULL; 189 + return; 190 190 191 191 elog->kobj.kset = elog_kset; 192 192 ··· 219 219 rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); 220 220 if (rc) { 221 221 kobject_put(&elog->kobj); 222 - return NULL; 222 + return; 223 223 } 224 224 225 + /* 226 + * As soon as the sysfs file for this elog is created/activated there is 227 + * a chance the opal_errd daemon (or any userspace) might read and 228 + * acknowledge the elog before kobject_uevent() is called. If that 229 + * happens then there is a potential race between 230 + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a 231 + * use-after-free of a kernfs object resulting in a kernel crash. 232 + * 233 + * To avoid that, we need to take a reference on behalf of the bin file, 234 + * so that our reference remains valid while we call kobject_uevent(). 235 + * We then drop our reference before exiting the function, leaving the 236 + * bin file to drop the last reference (if it hasn't already). 237 + */ 238 + 239 + /* Take a reference for the bin file */ 240 + kobject_get(&elog->kobj); 225 241 rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); 226 - if (rc) { 242 + if (rc == 0) { 243 + kobject_uevent(&elog->kobj, KOBJ_ADD); 244 + } else { 245 + /* Drop the reference taken for the bin file */ 227 246 kobject_put(&elog->kobj); 228 - return NULL; 229 247 } 230 248 231 - kobject_uevent(&elog->kobj, KOBJ_ADD); 249 + /* Drop our reference */ 250 + kobject_put(&elog->kobj); 232 251 233 - return elog; 252 + return; 234 253 } 235 254 236 255 static irqreturn_t elog_event(int irq, void *data)

+2

arch/powerpc/platforms/powernv/opal-msglog.c

··· 12 12 #include <linux/types.h> 13 13 #include <asm/barrier.h> 14 14 15 + #include "powernv.h" 16 + 15 17 /* OPAL in-memory console. Defined in OPAL source at core/console.c */ 16 18 struct memcons { 17 19 __be64 magic;

+1 -1

arch/powerpc/platforms/powernv/opal-prd.c

··· 24 24 #include <linux/uaccess.h> 25 25 26 26 27 - /** 27 + /* 28 28 * The msg member must be at the end of the struct, as it's followed by the 29 29 * message data. 30 30 */

-8

arch/powerpc/platforms/powernv/pci-ioda.c

··· 894 894 895 895 int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 896 896 { 897 - struct pci_dev *parent; 898 897 uint8_t bcomp, dcomp, fcomp; 899 898 long rc, rid_end, rid; 900 899 ··· 903 904 904 905 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 905 906 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 906 - parent = pe->pbus->self; 907 907 if (pe->flags & PNV_IODA_PE_BUS_ALL) 908 908 count = resource_size(&pe->pbus->busn_res); 909 909 else ··· 923 925 } 924 926 rid_end = pe->rid + (count << 8); 925 927 } else { 926 - #ifdef CONFIG_PCI_IOV 927 - if (pe->flags & PNV_IODA_PE_VF) 928 - parent = pe->parent_dev; 929 - else 930 - #endif /* CONFIG_PCI_IOV */ 931 - parent = pe->pdev->bus->self; 932 928 bcomp = OpalPciBusAll; 933 929 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 934 930 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;

+7

arch/powerpc/platforms/powernv/powernv.h

··· 2 2 #ifndef _POWERNV_H 3 3 #define _POWERNV_H 4 4 5 + /* 6 + * There's various hacks scattered throughout the generic powerpc arch code 7 + * that needs to call into powernv platform stuff. The prototypes for those 8 + * functions are in asm/powernv.h 9 + */ 10 + #include <asm/powernv.h> 11 + 5 12 #ifdef CONFIG_SMP 6 13 extern void pnv_smp_init(void); 7 14 #else

+1 -1

arch/powerpc/platforms/powernv/rng.c

··· 65 65 return 1; 66 66 } 67 67 68 - int powernv_get_random_darn(unsigned long *v) 68 + static int powernv_get_random_darn(unsigned long *v) 69 69 { 70 70 unsigned long val; 71 71

+24

arch/powerpc/platforms/powernv/setup.c

··· 130 130 setup_count_cache_flush(); 131 131 } 132 132 133 + static void __init pnv_check_guarded_cores(void) 134 + { 135 + struct device_node *dn; 136 + int bad_count = 0; 137 + 138 + for_each_node_by_type(dn, "cpu") { 139 + if (of_property_match_string(dn, "status", "bad") >= 0) 140 + bad_count++; 141 + }; 142 + 143 + if (bad_count) { 144 + printk(" _ _______________\n"); 145 + pr_cont(" | | / \\\n"); 146 + pr_cont(" | | | WARNING! |\n"); 147 + pr_cont(" | | | |\n"); 148 + pr_cont(" | | | It looks like |\n"); 149 + pr_cont(" |_| | you have %*d |\n", 3, bad_count); 150 + pr_cont(" _ | guarded cores |\n"); 151 + pr_cont(" (_) \\_______________/\n"); 152 + } 153 + } 154 + 133 155 static void __init pnv_setup_arch(void) 134 156 { 135 157 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); ··· 171 149 172 150 /* Enable NAP mode */ 173 151 powersave_nap = 1; 152 + 153 + pnv_check_guarded_cores(); 174 154 175 155 /* XXX PMCS */ 176 156 }

+3 -3

arch/powerpc/platforms/powernv/smp.c

··· 43 43 #include <asm/udbg.h> 44 44 #define DBG(fmt...) udbg_printf(fmt) 45 45 #else 46 - #define DBG(fmt...) 46 + #define DBG(fmt...) do { } while (0) 47 47 #endif 48 48 49 49 static void pnv_smp_setup_cpu(int cpu) ··· 158 158 } 159 159 } 160 160 161 - static void pnv_smp_cpu_kill_self(void) 161 + static void pnv_cpu_offline_self(void) 162 162 { 163 163 unsigned long srr1, unexpected_mask, wmask; 164 164 unsigned int cpu; ··· 417 417 #ifdef CONFIG_HOTPLUG_CPU 418 418 .cpu_disable = pnv_smp_cpu_disable, 419 419 .cpu_die = generic_cpu_die, 420 + .cpu_offline_self = pnv_cpu_offline_self, 420 421 #endif /* CONFIG_HOTPLUG_CPU */ 421 422 }; 422 423 ··· 431 430 smp_ops = &pnv_smp_ops; 432 431 433 432 #ifdef CONFIG_HOTPLUG_CPU 434 - ppc_md.cpu_die = pnv_smp_cpu_kill_self; 435 433 #ifdef CONFIG_KEXEC_CORE 436 434 crash_wake_offline = 1; 437 435 #endif

+4 -5

arch/powerpc/platforms/powernv/vas-window.c

··· 186 186 * OS/User Window Context (UWC) MMIO Base Address Region for the given window. 187 187 * Map these bus addresses and save the mapped kernel addresses in @window. 188 188 */ 189 - int map_winctx_mmio_bars(struct vas_window *window) 189 + static int map_winctx_mmio_bars(struct vas_window *window) 190 190 { 191 191 int len; 192 192 u64 start; ··· 214 214 * registers are not sequential. And, we can only write to offsets 215 215 * with valid registers. 216 216 */ 217 - void reset_window_regs(struct vas_window *window) 217 + static void reset_window_regs(struct vas_window *window) 218 218 { 219 219 write_hvwc_reg(window, VREG(LPID), 0ULL); 220 220 write_hvwc_reg(window, VREG(PID), 0ULL); ··· 357 357 * as a one-time task? That could work for NX but what about other 358 358 * receivers? Let the receivers tell us the rx-fifo buffers for now. 359 359 */ 360 - int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) 360 + static void init_winctx_regs(struct vas_window *window, 361 + struct vas_winctx *winctx) 361 362 { 362 363 u64 val; 363 364 int fifo_size; ··· 500 499 val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win); 501 500 val = SET_FIELD(VAS_WINCTL_OPEN, val, 1); 502 501 write_hvwc_reg(window, VREG(WINCTL), val); 503 - 504 - return 0; 505 502 } 506 503 507 504 static void vas_release_window_id(struct ida *ida, int winid)

+2 -2

arch/powerpc/platforms/ps3/spu.c

··· 448 448 ctx->ops->runcntl_stop(ctx); 449 449 } 450 450 451 - const struct spu_management_ops spu_management_ps3_ops = { 451 + static const struct spu_management_ops spu_management_ps3_ops = { 452 452 .enumerate_spus = ps3_enumerate_spus, 453 453 .create_spu = ps3_create_spu, 454 454 .destroy_spu = ps3_destroy_spu, ··· 589 589 return 0; /* No support. */ 590 590 } 591 591 592 - const struct spu_priv1_ops spu_priv1_ps3_ops = { 592 + static const struct spu_priv1_ops spu_priv1_ps3_ops = { 593 593 .int_mask_and = int_mask_and, 594 594 .int_mask_or = int_mask_or, 595 595 .int_mask_set = int_mask_set,

+133 -247

arch/powerpc/platforms/pseries/eeh_pseries.c

··· 33 33 #include <asm/ppc-pci.h> 34 34 #include <asm/rtas.h> 35 35 36 - static int pseries_eeh_get_pe_addr(struct pci_dn *pdn); 37 - 38 36 /* RTAS tokens */ 39 37 static int ibm_set_eeh_option; 40 38 static int ibm_set_slot_reset; ··· 84 86 85 87 86 88 /** 87 - * pseries_eeh_get_config_addr - Retrieve config address 89 + * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device 90 + * @pdn: pci_dn of the input device 88 91 * 89 - * Retrieve the assocated config address. Actually, there're 2 RTAS 90 - * function calls dedicated for the purpose. We need implement 91 - * it through the new function and then the old one. Besides, 92 - * you should make sure the config address is figured out from 93 - * FDT node before calling the function. 92 + * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo, 93 + * pe_config_addr) as a handle to a given PE. This function finds the 94 + * pe_config_addr based on the device's config addr. 94 95 * 95 - * It's notable that zero'ed return value means invalid PE config 96 - * address. 96 + * Keep in mind that the pe_config_addr *might* be numerically identical to the 97 + * device's config addr, but the two are conceptually distinct. 98 + * 99 + * Returns the pe_config_addr, or a negative error code. 97 100 */ 98 - static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr) 101 + static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) 99 102 { 100 - int ret = 0; 101 - int rets[3]; 103 + int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); 104 + struct pci_controller *phb = pdn->phb; 105 + int ret, rets[3]; 102 106 103 107 if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { 104 108 /* 105 - * First of all, we need to make sure there has one PE 106 - * associated with the device. Otherwise, PE address is 107 - * meaningless. 109 + * First of all, use function 1 to determine if this device is 110 + * part of a PE or not. ret[0] being zero indicates it's not. 108 111 */ 109 112 ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, 110 113 config_addr, BUID_HI(phb->buid), 111 114 BUID_LO(phb->buid), 1); 112 115 if (ret || (rets[0] == 0)) 113 - return 0; 116 + return -ENOENT; 114 117 115 - /* Retrieve the associated PE config address */ 118 + /* Retrieve the associated PE config address with function 0 */ 116 119 ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, 117 120 config_addr, BUID_HI(phb->buid), 118 121 BUID_LO(phb->buid), 0); 119 122 if (ret) { 120 123 pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", 121 124 __func__, phb->global_number, config_addr); 122 - return 0; 125 + return -ENXIO; 123 126 } 124 127 125 128 return rets[0]; ··· 133 134 if (ret) { 134 135 pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", 135 136 __func__, phb->global_number, config_addr); 136 - return 0; 137 + return -ENXIO; 137 138 } 138 139 139 140 return rets[0]; 140 141 } 141 142 142 - return ret; 143 + /* 144 + * PAPR does describe a process for finding the pe_config_addr that was 145 + * used before the ibm,get-config-addr-info calls were added. However, 146 + * I haven't found *any* systems that don't have that RTAS call 147 + * implemented. If you happen to find one that needs the old DT based 148 + * process, patches are welcome! 149 + */ 150 + return -ENOENT; 143 151 } 144 152 145 153 /** ··· 167 161 BUID_LO(phb->buid), option); 168 162 169 163 /* If fundamental-reset not supported, try hot-reset */ 170 - if (option == EEH_RESET_FUNDAMENTAL && 171 - ret == -8) { 164 + if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { 172 165 option = EEH_RESET_HOT; 173 166 ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, 174 167 config_addr, BUID_HI(phb->buid), ··· 175 170 } 176 171 177 172 /* We need reset hold or settlement delay */ 178 - if (option == EEH_RESET_FUNDAMENTAL || 179 - option == EEH_RESET_HOT) 173 + if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) 180 174 msleep(EEH_PE_RST_HOLD_TIME); 181 175 else 182 176 msleep(EEH_PE_RST_SETTLE_TIME); ··· 242 238 static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; 243 239 static DEFINE_SPINLOCK(slot_errbuf_lock); 244 240 static int eeh_error_buf_size; 245 - 246 - /** 247 - * pseries_eeh_init - EEH platform dependent initialization 248 - * 249 - * EEH platform dependent initialization on pseries. 250 - */ 251 - static int pseries_eeh_init(void) 252 - { 253 - struct pci_controller *phb; 254 - struct pci_dn *pdn; 255 - int addr, config_addr; 256 - 257 - /* figure out EEH RTAS function call tokens */ 258 - ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); 259 - ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); 260 - ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); 261 - ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); 262 - ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); 263 - ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); 264 - ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); 265 - ibm_configure_pe = rtas_token("ibm,configure-pe"); 266 - 267 - /* 268 - * ibm,configure-pe and ibm,configure-bridge have the same semantics, 269 - * however ibm,configure-pe can be faster. If we can't find 270 - * ibm,configure-pe then fall back to using ibm,configure-bridge. 271 - */ 272 - if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE) 273 - ibm_configure_pe = rtas_token("ibm,configure-bridge"); 274 - 275 - /* 276 - * Necessary sanity check. We needn't check "get-config-addr-info" 277 - * and its variant since the old firmware probably support address 278 - * of domain/bus/slot/function for EEH RTAS operations. 279 - */ 280 - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || 281 - ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || 282 - (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && 283 - ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || 284 - ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || 285 - ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { 286 - pr_info("EEH functionality not supported\n"); 287 - return -EINVAL; 288 - } 289 - 290 - /* Initialize error log lock and size */ 291 - spin_lock_init(&slot_errbuf_lock); 292 - eeh_error_buf_size = rtas_token("rtas-error-log-max"); 293 - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { 294 - pr_info("%s: unknown EEH error log size\n", 295 - __func__); 296 - eeh_error_buf_size = 1024; 297 - } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { 298 - pr_info("%s: EEH error log size %d exceeds the maximal %d\n", 299 - __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); 300 - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; 301 - } 302 - 303 - /* Set EEH probe mode */ 304 - eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); 305 - 306 - /* Set EEH machine dependent code */ 307 - ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; 308 - 309 - if (is_kdump_kernel() || reset_devices) { 310 - pr_info("Issue PHB reset ...\n"); 311 - list_for_each_entry(phb, &hose_list, list_node) { 312 - pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); 313 - addr = (pdn->busno << 16) | (pdn->devfn << 8); 314 - config_addr = pseries_eeh_get_config_addr(phb, addr); 315 - /* invalid PE config addr */ 316 - if (config_addr == 0) 317 - continue; 318 - 319 - pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); 320 - pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); 321 - pseries_eeh_phb_configure_bridge(phb, config_addr); 322 - } 323 - } 324 - 325 - return 0; 326 - } 327 241 328 242 static int pseries_eeh_cap_start(struct pci_dn *pdn) 329 243 { ··· 361 439 */ 362 440 void pseries_eeh_init_edev(struct pci_dn *pdn) 363 441 { 442 + struct eeh_pe pe, *parent; 364 443 struct eeh_dev *edev; 365 - struct eeh_pe pe; 366 444 u32 pcie_flags; 367 - int enable = 0; 368 445 int ret; 369 446 370 447 if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))) ··· 420 499 } 421 500 } 422 501 423 - /* Initialize the fake PE */ 502 + /* first up, find the pe_config_addr for the PE containing the device */ 503 + ret = pseries_eeh_get_pe_config_addr(pdn); 504 + if (ret < 0) { 505 + eeh_edev_dbg(edev, "Unable to find pe_config_addr\n"); 506 + goto err; 507 + } 508 + 509 + /* Try enable EEH on the fake PE */ 424 510 memset(&pe, 0, sizeof(struct eeh_pe)); 425 511 pe.phb = pdn->phb; 426 - pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8); 512 + pe.addr = ret; 427 513 428 - /* Enable EEH on the device */ 429 514 eeh_edev_dbg(edev, "Enabling EEH on device\n"); 430 515 ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); 431 516 if (ret) { 432 517 eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret); 433 - } else { 434 - struct eeh_pe *parent; 435 - 436 - /* Retrieve PE address */ 437 - edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn); 438 - pe.addr = edev->pe_config_addr; 439 - 440 - /* Some older systems (Power4) allow the ibm,set-eeh-option 441 - * call to succeed even on nodes where EEH is not supported. 442 - * Verify support explicitly. 443 - */ 444 - ret = eeh_ops->get_state(&pe, NULL); 445 - if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) 446 - enable = 1; 447 - 448 - /* 449 - * This device doesn't support EEH, but it may have an 450 - * EEH parent. In this case any error on the device will 451 - * freeze the PE of it's upstream bridge, so added it to 452 - * the upstream PE. 453 - */ 454 - parent = pseries_eeh_pe_get_parent(edev); 455 - if (parent && !enable) 456 - edev->pe_config_addr = parent->addr; 457 - 458 - if (enable || parent) { 459 - eeh_add_flag(EEH_ENABLED); 460 - eeh_pe_tree_insert(edev, parent); 461 - } 462 - eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n", 463 - (enable ? "enabled" : "unsupported"), ret); 518 + goto err; 464 519 } 465 520 466 - /* Save memory bars */ 521 + edev->pe_config_addr = pe.addr; 522 + 523 + eeh_add_flag(EEH_ENABLED); 524 + 525 + parent = pseries_eeh_pe_get_parent(edev); 526 + eeh_pe_tree_insert(edev, parent); 467 527 eeh_save_bars(edev); 528 + eeh_edev_dbg(edev, "EEH enabled for device"); 529 + 530 + return; 531 + 532 + err: 533 + eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret); 468 534 } 469 535 470 536 static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev) ··· 508 600 static int pseries_eeh_set_option(struct eeh_pe *pe, int option) 509 601 { 510 602 int ret = 0; 511 - int config_addr; 512 603 513 604 /* 514 605 * When we're enabling or disabling EEH functioality on ··· 520 613 case EEH_OPT_ENABLE: 521 614 case EEH_OPT_THAW_MMIO: 522 615 case EEH_OPT_THAW_DMA: 523 - config_addr = pe->config_addr; 524 - if (pe->addr) 525 - config_addr = pe->addr; 526 616 break; 527 617 case EEH_OPT_FREEZE_PE: 528 618 /* Not support */ 529 619 return 0; 530 620 default: 531 - pr_err("%s: Invalid option %d\n", 532 - __func__, option); 621 + pr_err("%s: Invalid option %d\n", __func__, option); 533 622 return -EINVAL; 534 623 } 535 624 536 625 ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, 537 - config_addr, BUID_HI(pe->phb->buid), 626 + pe->addr, BUID_HI(pe->phb->buid), 538 627 BUID_LO(pe->phb->buid), option); 539 - 540 - return ret; 541 - } 542 - 543 - /** 544 - * pseries_eeh_get_pe_addr - Retrieve PE address 545 - * @pe: EEH PE 546 - * 547 - * Retrieve the assocated PE address. Actually, there're 2 RTAS 548 - * function calls dedicated for the purpose. We need implement 549 - * it through the new function and then the old one. Besides, 550 - * you should make sure the config address is figured out from 551 - * FDT node before calling the function. 552 - * 553 - * It's notable that zero'ed return value means invalid PE config 554 - * address. 555 - */ 556 - static int pseries_eeh_get_pe_addr(struct pci_dn *pdn) 557 - { 558 - int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); 559 - unsigned long buid = pdn->phb->buid; 560 - int ret = 0; 561 - int rets[3]; 562 - 563 - if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { 564 - /* 565 - * First of all, we need to make sure there has one PE 566 - * associated with the device. Otherwise, PE address is 567 - * meaningless. 568 - */ 569 - ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, 570 - config_addr, BUID_HI(buid), BUID_LO(buid), 1); 571 - if (ret || (rets[0] == 0)) 572 - return 0; 573 - 574 - /* Retrieve the associated PE config address */ 575 - ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, 576 - config_addr, BUID_HI(buid), BUID_LO(buid), 0); 577 - if (ret) { 578 - pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", 579 - __func__, pdn->phb->global_number, config_addr); 580 - return 0; 581 - } 582 - 583 - return rets[0]; 584 - } 585 - 586 - if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { 587 - ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, 588 - config_addr, BUID_HI(buid), BUID_LO(buid), 0); 589 - if (ret) { 590 - pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", 591 - __func__, pdn->phb->global_number, config_addr); 592 - return 0; 593 - } 594 - 595 - return rets[0]; 596 - } 597 628 598 629 return ret; 599 630 } ··· 551 706 */ 552 707 static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) 553 708 { 554 - int config_addr; 555 709 int ret; 556 710 int rets[4]; 557 711 int result; 558 712 559 - /* Figure out PE config address if possible */ 560 - config_addr = pe->config_addr; 561 - if (pe->addr) 562 - config_addr = pe->addr; 563 - 564 713 if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { 565 714 ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, 566 - config_addr, BUID_HI(pe->phb->buid), 715 + pe->addr, BUID_HI(pe->phb->buid), 567 716 BUID_LO(pe->phb->buid)); 568 717 } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { 569 718 /* Fake PE unavailable info */ 570 719 rets[2] = 0; 571 720 ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, 572 - config_addr, BUID_HI(pe->phb->buid), 721 + pe->addr, BUID_HI(pe->phb->buid), 573 722 BUID_LO(pe->phb->buid)); 574 723 } else { 575 724 return EEH_STATE_NOT_SUPPORT; ··· 617 778 */ 618 779 static int pseries_eeh_reset(struct eeh_pe *pe, int option) 619 780 { 620 - int config_addr; 621 - 622 - /* Figure out PE address */ 623 - config_addr = pe->config_addr; 624 - if (pe->addr) 625 - config_addr = pe->addr; 626 - 627 - return pseries_eeh_phb_reset(pe->phb, config_addr, option); 781 + return pseries_eeh_phb_reset(pe->phb, pe->addr, option); 628 782 } 629 783 630 784 /** ··· 633 801 */ 634 802 static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) 635 803 { 636 - int config_addr; 637 804 unsigned long flags; 638 805 int ret; 639 806 640 807 spin_lock_irqsave(&slot_errbuf_lock, flags); 641 808 memset(slot_errbuf, 0, eeh_error_buf_size); 642 809 643 - /* Figure out the PE address */ 644 - config_addr = pe->config_addr; 645 - if (pe->addr) 646 - config_addr = pe->addr; 647 - 648 - ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, 810 + ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr, 649 811 BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 650 812 virt_to_phys(drv_log), len, 651 813 virt_to_phys(slot_errbuf), eeh_error_buf_size, ··· 658 832 */ 659 833 static int pseries_eeh_configure_bridge(struct eeh_pe *pe) 660 834 { 661 - int config_addr; 662 - 663 - /* Figure out the PE address */ 664 - config_addr = pe->config_addr; 665 - if (pe->addr) 666 - config_addr = pe->addr; 667 - 668 - return pseries_eeh_phb_configure_bridge(pe->phb, config_addr); 835 + return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr); 669 836 } 670 837 671 838 /** ··· 773 954 if (!edev) 774 955 return -EEXIST; 775 956 776 - if (rtas_token("ibm,open-sriov-allow-unfreeze") 777 - == RTAS_UNKNOWN_SERVICE) 957 + if (rtas_token("ibm,open-sriov-allow-unfreeze") == RTAS_UNKNOWN_SERVICE) 778 958 return -EINVAL; 779 959 780 960 if (edev->pdev->is_physfn || edev->pdev->is_virtfn) ··· 785 967 786 968 static struct eeh_ops pseries_eeh_ops = { 787 969 .name = "pseries", 788 - .init = pseries_eeh_init, 789 970 .probe = pseries_eeh_probe, 790 971 .set_option = pseries_eeh_set_option, 791 972 .get_state = pseries_eeh_get_state, ··· 809 992 */ 810 993 static int __init eeh_pseries_init(void) 811 994 { 812 - int ret; 995 + struct pci_controller *phb; 996 + struct pci_dn *pdn; 997 + int ret, config_addr; 813 998 814 - ret = eeh_ops_register(&pseries_eeh_ops); 999 + /* figure out EEH RTAS function call tokens */ 1000 + ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); 1001 + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); 1002 + ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); 1003 + ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); 1004 + ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); 1005 + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); 1006 + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); 1007 + ibm_configure_pe = rtas_token("ibm,configure-pe"); 1008 + 1009 + /* 1010 + * ibm,configure-pe and ibm,configure-bridge have the same semantics, 1011 + * however ibm,configure-pe can be faster. If we can't find 1012 + * ibm,configure-pe then fall back to using ibm,configure-bridge. 1013 + */ 1014 + if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE) 1015 + ibm_configure_pe = rtas_token("ibm,configure-bridge"); 1016 + 1017 + /* 1018 + * Necessary sanity check. We needn't check "get-config-addr-info" 1019 + * and its variant since the old firmware probably support address 1020 + * of domain/bus/slot/function for EEH RTAS operations. 1021 + */ 1022 + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || 1023 + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || 1024 + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && 1025 + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || 1026 + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || 1027 + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { 1028 + pr_info("EEH functionality not supported\n"); 1029 + return -EINVAL; 1030 + } 1031 + 1032 + /* Initialize error log lock and size */ 1033 + spin_lock_init(&slot_errbuf_lock); 1034 + eeh_error_buf_size = rtas_token("rtas-error-log-max"); 1035 + if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { 1036 + pr_info("%s: unknown EEH error log size\n", 1037 + __func__); 1038 + eeh_error_buf_size = 1024; 1039 + } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { 1040 + pr_info("%s: EEH error log size %d exceeds the maximal %d\n", 1041 + __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); 1042 + eeh_error_buf_size = RTAS_ERROR_LOG_MAX; 1043 + } 1044 + 1045 + /* Set EEH probe mode */ 1046 + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); 1047 + 1048 + /* Set EEH machine dependent code */ 1049 + ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; 1050 + 1051 + if (is_kdump_kernel() || reset_devices) { 1052 + pr_info("Issue PHB reset ...\n"); 1053 + list_for_each_entry(phb, &hose_list, list_node) { 1054 + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); 1055 + config_addr = pseries_eeh_get_pe_config_addr(pdn); 1056 + 1057 + /* invalid PE config addr */ 1058 + if (config_addr < 0) 1059 + continue; 1060 + 1061 + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); 1062 + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); 1063 + pseries_eeh_phb_configure_bridge(phb, config_addr); 1064 + } 1065 + } 1066 + 1067 + ret = eeh_init(&pseries_eeh_ops); 815 1068 if (!ret) 816 1069 pr_info("EEH: pSeries platform initialized\n"); 817 1070 else 818 1071 pr_info("EEH: pSeries platform initialization failure (%d)\n", 819 1072 ret); 820 - 821 1073 return ret; 822 1074 } 823 - machine_early_initcall(pseries, eeh_pseries_init); 1075 + machine_arch_initcall(pseries, eeh_pseries_init);

+3 -3

arch/powerpc/platforms/pseries/hotplug-cpu.c

··· 55 55 panic("Alas, I survived.\n"); 56 56 } 57 57 58 - static void pseries_mach_cpu_die(void) 58 + static void pseries_cpu_offline_self(void) 59 59 { 60 60 unsigned int hwcpu = hard_smp_processor_id(); 61 61 ··· 102 102 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD 103 103 * notifications. 104 104 * 105 - * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to 105 + * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to 106 106 * self-destruct. 107 107 */ 108 108 static void pseries_cpu_die(unsigned int cpu) ··· 901 901 return 0; 902 902 } 903 903 904 - ppc_md.cpu_die = pseries_mach_cpu_die; 904 + smp_ops->cpu_offline_self = pseries_cpu_offline_self; 905 905 smp_ops->cpu_disable = pseries_cpu_disable; 906 906 smp_ops->cpu_die = pseries_cpu_die; 907 907

+56 -26

arch/powerpc/platforms/pseries/hotplug-memory.c

··· 30 30 31 31 np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 32 32 if (np) { 33 - const __be64 *size; 33 + int len; 34 + int size_cells; 35 + const __be32 *prop; 34 36 35 - size = of_get_property(np, "ibm,lmb-size", NULL); 36 - if (size) 37 - memblock_size = be64_to_cpup(size); 37 + size_cells = of_n_size_cells(np); 38 + 39 + prop = of_get_property(np, "ibm,lmb-size", &len); 40 + if (prop && len >= size_cells * sizeof(__be32)) 41 + memblock_size = of_read_number(prop, size_cells); 38 42 of_node_put(np); 43 + 39 44 } else if (machine_is(pseries)) { 40 45 /* This fallback really only applies to pseries */ 41 46 unsigned int memzero_size = 0; ··· 282 277 return dlpar_change_lmb_state(lmb, false); 283 278 } 284 279 285 - static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) 280 + static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) 286 281 { 287 282 unsigned long block_sz, start_pfn; 288 283 int sections_per_block; ··· 313 308 314 309 static int pseries_remove_mem_node(struct device_node *np) 315 310 { 316 - const __be32 *regs; 311 + const __be32 *prop; 317 312 unsigned long base; 318 - unsigned int lmb_size; 313 + unsigned long lmb_size; 319 314 int ret = -EINVAL; 315 + int addr_cells, size_cells; 320 316 321 317 /* 322 318 * Check to see if we are actually removing memory ··· 328 322 /* 329 323 * Find the base address and size of the memblock 330 324 */ 331 - regs = of_get_property(np, "reg", NULL); 332 - if (!regs) 325 + prop = of_get_property(np, "reg", NULL); 326 + if (!prop) 333 327 return ret; 334 328 335 - base = be64_to_cpu(*(unsigned long *)regs); 336 - lmb_size = be32_to_cpu(regs[3]); 329 + addr_cells = of_n_addr_cells(np); 330 + size_cells = of_n_size_cells(np); 331 + 332 + /* 333 + * "reg" property represents (addr,size) tuple. 334 + */ 335 + base = of_read_number(prop, addr_cells); 336 + prop += addr_cells; 337 + lmb_size = of_read_number(prop, size_cells); 337 338 338 339 pseries_remove_memblock(base, lmb_size); 339 340 return 0; ··· 367 354 368 355 static int dlpar_remove_lmb(struct drmem_lmb *lmb) 369 356 { 357 + struct memory_block *mem_block; 370 358 unsigned long block_sz; 371 359 int rc; 372 360 373 361 if (!lmb_is_removable(lmb)) 374 362 return -EINVAL; 375 363 364 + mem_block = lmb_to_memblock(lmb); 365 + if (mem_block == NULL) 366 + return -EINVAL; 367 + 376 368 rc = dlpar_offline_lmb(lmb); 377 - if (rc) 369 + if (rc) { 370 + put_device(&mem_block->dev); 378 371 return rc; 372 + } 379 373 380 374 block_sz = pseries_memory_block_size(); 381 375 382 - __remove_memory(lmb->nid, lmb->base_addr, block_sz); 376 + __remove_memory(mem_block->nid, lmb->base_addr, block_sz); 377 + put_device(&mem_block->dev); 383 378 384 379 /* Update memory regions for memory remove */ 385 380 memblock_remove(lmb->base_addr, block_sz); 386 381 387 382 invalidate_lmb_associativity_index(lmb); 388 - lmb_clear_nid(lmb); 389 383 lmb->flags &= ~DRCONF_MEM_ASSIGNED; 390 384 391 385 return 0; ··· 577 557 578 558 #else 579 559 static inline int pseries_remove_memblock(unsigned long base, 580 - unsigned int memblock_size) 560 + unsigned long memblock_size) 581 561 { 582 562 return -EOPNOTSUPP; 583 563 } ··· 611 591 static int dlpar_add_lmb(struct drmem_lmb *lmb) 612 592 { 613 593 unsigned long block_sz; 614 - int rc; 594 + int nid, rc; 615 595 616 596 if (lmb->flags & DRCONF_MEM_ASSIGNED) 617 597 return -EINVAL; ··· 622 602 return rc; 623 603 } 624 604 625 - lmb_set_nid(lmb); 626 605 block_sz = memory_block_size_bytes(); 627 606 607 + /* Find the node id for this LMB. Fake one if necessary. */ 608 + nid = of_drconf_to_nid_single(lmb); 609 + if (nid < 0 || !node_possible(nid)) 610 + nid = first_online_node; 611 + 628 612 /* Add the memory */ 629 - rc = __add_memory(lmb->nid, lmb->base_addr, block_sz, MHP_NONE); 613 + rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE); 630 614 if (rc) { 631 615 invalidate_lmb_associativity_index(lmb); 632 616 return rc; ··· 638 614 639 615 rc = dlpar_online_lmb(lmb); 640 616 if (rc) { 641 - __remove_memory(lmb->nid, lmb->base_addr, block_sz); 617 + __remove_memory(nid, lmb->base_addr, block_sz); 642 618 invalidate_lmb_associativity_index(lmb); 643 - lmb_clear_nid(lmb); 644 619 } else { 645 620 lmb->flags |= DRCONF_MEM_ASSIGNED; 646 621 } ··· 901 878 902 879 static int pseries_add_mem_node(struct device_node *np) 903 880 { 904 - const __be32 *regs; 881 + const __be32 *prop; 905 882 unsigned long base; 906 - unsigned int lmb_size; 883 + unsigned long lmb_size; 907 884 int ret = -EINVAL; 885 + int addr_cells, size_cells; 908 886 909 887 /* 910 888 * Check to see if we are actually adding memory ··· 916 892 /* 917 893 * Find the base and size of the memblock 918 894 */ 919 - regs = of_get_property(np, "reg", NULL); 920 - if (!regs) 895 + prop = of_get_property(np, "reg", NULL); 896 + if (!prop) 921 897 return ret; 922 898 923 - base = be64_to_cpu(*(unsigned long *)regs); 924 - lmb_size = be32_to_cpu(regs[3]); 899 + addr_cells = of_n_addr_cells(np); 900 + size_cells = of_n_size_cells(np); 901 + /* 902 + * "reg" property represents (addr,size) tuple. 903 + */ 904 + base = of_read_number(prop, addr_cells); 905 + prop += addr_cells; 906 + lmb_size = of_read_number(prop, size_cells); 925 907 926 908 /* 927 909 * Update memory region to represent the memory add

+3 -20

arch/powerpc/platforms/pseries/hvCall_inst.c

··· 70 70 return 0; 71 71 } 72 72 73 - static const struct seq_operations hcall_inst_seq_ops = { 73 + static const struct seq_operations hcall_inst_sops = { 74 74 .start = hc_start, 75 75 .next = hc_next, 76 76 .stop = hc_stop, 77 77 .show = hc_show 78 78 }; 79 79 80 - static int hcall_inst_seq_open(struct inode *inode, struct file *file) 81 - { 82 - int rc; 83 - struct seq_file *seq; 84 - 85 - rc = seq_open(file, &hcall_inst_seq_ops); 86 - seq = file->private_data; 87 - seq->private = file_inode(file)->i_private; 88 - 89 - return rc; 90 - } 91 - 92 - static const struct file_operations hcall_inst_seq_fops = { 93 - .open = hcall_inst_seq_open, 94 - .read = seq_read, 95 - .llseek = seq_lseek, 96 - .release = seq_release, 97 - }; 80 + DEFINE_SEQ_ATTRIBUTE(hcall_inst); 98 81 99 82 #define HCALL_ROOT_DIR "hcall_inst" 100 83 #define CPU_NAME_BUF_SIZE 32 ··· 132 149 snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu); 133 150 debugfs_create_file(cpu_name_buf, 0444, hcall_root, 134 151 per_cpu(hcall_stats, cpu), 135 - &hcall_inst_seq_fops); 152 + &hcall_inst_fops); 136 153 } 137 154 138 155 return 0;

+195 -47

arch/powerpc/platforms/pseries/iommu.c

··· 39 39 40 40 #include "pseries.h" 41 41 42 + enum { 43 + DDW_QUERY_PE_DMA_WIN = 0, 44 + DDW_CREATE_PE_DMA_WIN = 1, 45 + DDW_REMOVE_PE_DMA_WIN = 2, 46 + 47 + DDW_APPLICABLE_SIZE 48 + }; 49 + 50 + enum { 51 + DDW_EXT_SIZE = 0, 52 + DDW_EXT_RESET_DMA_WIN = 1, 53 + DDW_EXT_QUERY_OUT_SIZE = 2 54 + }; 55 + 42 56 static struct iommu_table_group *iommu_pseries_alloc_group(int node) 43 57 { 44 58 struct iommu_table_group *table_group; ··· 348 334 /* Dynamic DMA Window support */ 349 335 struct ddw_query_response { 350 336 u32 windows_available; 351 - u32 largest_available_block; 337 + u64 largest_available_block; 352 338 u32 page_size; 353 339 u32 migration_capable; 354 340 }; ··· 781 767 782 768 early_param("disable_ddw", disable_ddw_setup); 783 769 784 - static void remove_ddw(struct device_node *np, bool remove_prop) 770 + static void remove_dma_window(struct device_node *np, u32 *ddw_avail, 771 + struct property *win) 785 772 { 786 773 struct dynamic_dma_window_prop *dwp; 787 - struct property *win64; 788 - u32 ddw_avail[3]; 789 774 u64 liobn; 790 - int ret = 0; 775 + int ret; 791 776 792 - ret = of_property_read_u32_array(np, "ibm,ddw-applicable", 793 - &ddw_avail[0], 3); 794 - 795 - win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); 796 - if (!win64) 797 - return; 798 - 799 - if (ret || win64->length < sizeof(*dwp)) 800 - goto delprop; 801 - 802 - dwp = win64->value; 777 + dwp = win->value; 803 778 liobn = (u64)be32_to_cpu(dwp->liobn); 804 779 805 780 /* clear the whole window, note the arg is in kernel pages */ ··· 801 798 pr_debug("%pOF successfully cleared tces in window.\n", 802 799 np); 803 800 804 - ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); 801 + ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); 805 802 if (ret) 806 803 pr_warn("%pOF: failed to remove direct window: rtas returned " 807 804 "%d to ibm,remove-pe-dma-window(%x) %llx\n", 808 - np, ret, ddw_avail[2], liobn); 805 + np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); 809 806 else 810 807 pr_debug("%pOF: successfully removed direct window: rtas returned " 811 808 "%d to ibm,remove-pe-dma-window(%x) %llx\n", 812 - np, ret, ddw_avail[2], liobn); 809 + np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); 810 + } 813 811 814 - delprop: 815 - if (remove_prop) 816 - ret = of_remove_property(np, win64); 812 + static void remove_ddw(struct device_node *np, bool remove_prop) 813 + { 814 + struct property *win; 815 + u32 ddw_avail[DDW_APPLICABLE_SIZE]; 816 + int ret = 0; 817 + 818 + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", 819 + &ddw_avail[0], DDW_APPLICABLE_SIZE); 820 + if (ret) 821 + return; 822 + 823 + win = of_find_property(np, DIRECT64_PROPNAME, NULL); 824 + if (!win) 825 + return; 826 + 827 + if (win->length >= sizeof(struct dynamic_dma_window_prop)) 828 + remove_dma_window(np, ddw_avail, win); 829 + 830 + if (!remove_prop) 831 + return; 832 + 833 + ret = of_remove_property(np, win); 817 834 if (ret) 818 835 pr_warn("%pOF: failed to remove direct window property: %d\n", 819 836 np, ret); ··· 892 869 } 893 870 machine_arch_initcall(pseries, find_existing_ddw_windows); 894 871 872 + /** 873 + * ddw_read_ext - Get the value of an DDW extension 874 + * @np: device node from which the extension value is to be read. 875 + * @extnum: index number of the extension. 876 + * @value: pointer to return value, modified when extension is available. 877 + * 878 + * Checks if "ibm,ddw-extensions" exists for this node, and get the value 879 + * on index 'extnum'. 880 + * It can be used only to check if a property exists, passing value == NULL. 881 + * 882 + * Returns: 883 + * 0 if extension successfully read 884 + * -EINVAL if the "ibm,ddw-extensions" does not exist, 885 + * -ENODATA if "ibm,ddw-extensions" does not have a value, and 886 + * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. 887 + */ 888 + static inline int ddw_read_ext(const struct device_node *np, int extnum, 889 + u32 *value) 890 + { 891 + static const char propname[] = "ibm,ddw-extensions"; 892 + u32 count; 893 + int ret; 894 + 895 + ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count); 896 + if (ret) 897 + return ret; 898 + 899 + if (count < extnum) 900 + return -EOVERFLOW; 901 + 902 + if (!value) 903 + value = &count; 904 + 905 + return of_property_read_u32_index(np, propname, extnum, value); 906 + } 907 + 895 908 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, 896 - struct ddw_query_response *query) 909 + struct ddw_query_response *query, 910 + struct device_node *parent) 897 911 { 898 912 struct device_node *dn; 899 913 struct pci_dn *pdn; 900 - u32 cfg_addr; 914 + u32 cfg_addr, ext_query, query_out[5]; 901 915 u64 buid; 902 - int ret; 916 + int ret, out_sz; 917 + 918 + /* 919 + * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many 920 + * output parameters ibm,query-pe-dma-windows will have, ranging from 921 + * 5 to 6. 922 + */ 923 + ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query); 924 + if (!ret && ext_query == 1) 925 + out_sz = 6; 926 + else 927 + out_sz = 5; 903 928 904 929 /* 905 930 * Get the config address and phb buid of the PE window. ··· 960 889 buid = pdn->phb->buid; 961 890 cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); 962 891 963 - ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, 964 - cfg_addr, BUID_HI(buid), BUID_LO(buid)); 965 - dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" 966 - " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), 967 - BUID_LO(buid), ret); 892 + ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, 893 + cfg_addr, BUID_HI(buid), BUID_LO(buid)); 894 + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n", 895 + ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), 896 + BUID_LO(buid), ret); 897 + 898 + switch (out_sz) { 899 + case 5: 900 + query->windows_available = query_out[0]; 901 + query->largest_available_block = query_out[1]; 902 + query->page_size = query_out[2]; 903 + query->migration_capable = query_out[3]; 904 + break; 905 + case 6: 906 + query->windows_available = query_out[0]; 907 + query->largest_available_block = ((u64)query_out[1] << 32) | 908 + query_out[2]; 909 + query->page_size = query_out[3]; 910 + query->migration_capable = query_out[4]; 911 + break; 912 + } 913 + 968 914 return ret; 969 915 } 970 916 ··· 1008 920 1009 921 do { 1010 922 /* extra outputs are LIOBN and dma-addr (hi, lo) */ 1011 - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, 1012 - cfg_addr, BUID_HI(buid), BUID_LO(buid), 1013 - page_shift, window_shift); 923 + ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, 924 + (u32 *)create, cfg_addr, BUID_HI(buid), 925 + BUID_LO(buid), page_shift, window_shift); 1014 926 } while (rtas_busy_delay(ret)); 1015 927 dev_info(&dev->dev, 1016 928 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " 1017 - "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], 1018 - cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, 1019 - window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); 929 + "(liobn = 0x%x starting addr = %x %x)\n", 930 + ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), 931 + BUID_LO(buid), page_shift, window_shift, ret, create->liobn, 932 + create->addr_hi, create->addr_lo); 1020 933 1021 934 return ret; 1022 935 } ··· 1067 978 } 1068 979 1069 980 /* 981 + * Platforms supporting the DDW option starting with LoPAR level 2.7 implement 982 + * ibm,ddw-extensions, which carries the rtas token for 983 + * ibm,reset-pe-dma-windows. 984 + * That rtas-call can be used to restore the default DMA window for the device. 985 + */ 986 + static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) 987 + { 988 + int ret; 989 + u32 cfg_addr, reset_dma_win; 990 + u64 buid; 991 + struct device_node *dn; 992 + struct pci_dn *pdn; 993 + 994 + ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); 995 + if (ret) 996 + return; 997 + 998 + dn = pci_device_to_OF_node(dev); 999 + pdn = PCI_DN(dn); 1000 + buid = pdn->phb->buid; 1001 + cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); 1002 + 1003 + ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), 1004 + BUID_LO(buid)); 1005 + if (ret) 1006 + dev_info(&dev->dev, 1007 + "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ", 1008 + reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), 1009 + ret); 1010 + } 1011 + 1012 + /* 1070 1013 * If the PE supports dynamic dma windows, and there is space for a table 1071 1014 * that can map all pages in a linear offset, then setup such a table, 1072 1015 * and record the dma-offset in the struct device. ··· 1117 996 int page_shift; 1118 997 u64 dma_addr, max_addr; 1119 998 struct device_node *dn; 1120 - u32 ddw_avail[3]; 999 + u32 ddw_avail[DDW_APPLICABLE_SIZE]; 1121 1000 struct direct_window *window; 1122 1001 struct property *win64; 1123 1002 struct dynamic_dma_window_prop *ddwprop; 1124 1003 struct failed_ddw_pdn *fpdn; 1004 + bool default_win_removed = false; 1125 1005 1126 1006 mutex_lock(&direct_window_init_mutex); 1127 1007 ··· 1151 1029 * the property is actually in the parent, not the PE 1152 1030 */ 1153 1031 ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", 1154 - &ddw_avail[0], 3); 1032 + &ddw_avail[0], DDW_APPLICABLE_SIZE); 1155 1033 if (ret) 1156 1034 goto out_failed; 1157 1035 ··· 1162 1040 * of page sizes: supported and supported for migrate-dma. 1163 1041 */ 1164 1042 dn = pci_device_to_OF_node(dev); 1165 - ret = query_ddw(dev, ddw_avail, &query); 1043 + ret = query_ddw(dev, ddw_avail, &query, pdn); 1166 1044 if (ret != 0) 1167 1045 goto out_failed; 1168 1046 1047 + /* 1048 + * If there is no window available, remove the default DMA window, 1049 + * if it's present. This will make all the resources available to the 1050 + * new DDW window. 1051 + * If anything fails after this, we need to restore it, so also check 1052 + * for extensions presence. 1053 + */ 1169 1054 if (query.windows_available == 0) { 1170 - /* 1171 - * no additional windows are available for this device. 1172 - * We might be able to reallocate the existing window, 1173 - * trading in for a larger page size. 1174 - */ 1175 - dev_dbg(&dev->dev, "no free dynamic windows"); 1176 - goto out_failed; 1055 + struct property *default_win; 1056 + int reset_win_ext; 1057 + 1058 + default_win = of_find_property(pdn, "ibm,dma-window", NULL); 1059 + if (!default_win) 1060 + goto out_failed; 1061 + 1062 + reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL); 1063 + if (reset_win_ext) 1064 + goto out_failed; 1065 + 1066 + remove_dma_window(pdn, ddw_avail, default_win); 1067 + default_win_removed = true; 1068 + 1069 + /* Query again, to check if the window is available */ 1070 + ret = query_ddw(dev, ddw_avail, &query, pdn); 1071 + if (ret != 0) 1072 + goto out_failed; 1073 + 1074 + if (query.windows_available == 0) { 1075 + /* no windows are available for this device. */ 1076 + dev_dbg(&dev->dev, "no free dynamic windows"); 1077 + goto out_failed; 1078 + } 1177 1079 } 1178 1080 if (query.page_size & 4) { 1179 1081 page_shift = 24; /* 16MB */ ··· 1214 1068 /* check largest block * page size > max memory hotplug addr */ 1215 1069 max_addr = ddw_memory_hotplug_max(); 1216 1070 if (query.largest_available_block < (max_addr >> page_shift)) { 1217 - dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u " 1071 + dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu " 1218 1072 "%llu-sized pages\n", max_addr, query.largest_available_block, 1219 1073 1ULL << page_shift); 1220 1074 goto out_failed; ··· 1288 1142 kfree(win64); 1289 1143 1290 1144 out_failed: 1145 + if (default_win_removed) 1146 + reset_dma_window(dev, pdn); 1291 1147 1292 1148 fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); 1293 1149 if (!fpdn)

+2

arch/powerpc/platforms/pseries/lpar.c

··· 1724 1724 pseries_lpar_register_process_table(0, 0, 0); 1725 1725 } 1726 1726 1727 + #ifdef CONFIG_PPC_RADIX_MMU 1727 1728 void radix_init_pseries(void) 1728 1729 { 1729 1730 pr_info("Using radix MMU under hypervisor\n"); ··· 1732 1731 pseries_lpar_register_process_table(__pa(process_tb), 1733 1732 0, PRTB_SIZE_SHIFT - 12); 1734 1733 } 1734 + #endif 1735 1735 1736 1736 #ifdef CONFIG_PPC_SMLPAR 1737 1737 #define CMO_FREE_HINT_DEFAULT 1

+35

arch/powerpc/platforms/pseries/lparcfg.c

··· 136 136 return rc; 137 137 } 138 138 139 + static void show_gpci_data(struct seq_file *m) 140 + { 141 + struct hv_gpci_request_buffer *buf; 142 + unsigned int affinity_score; 143 + long ret; 144 + 145 + buf = kmalloc(sizeof(*buf), GFP_KERNEL); 146 + if (buf == NULL) 147 + return; 148 + 149 + /* 150 + * Show the local LPAR's affinity score. 151 + * 152 + * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall. 153 + * The score is at byte 0xB in the output buffer. 154 + */ 155 + memset(&buf->params, 0, sizeof(buf->params)); 156 + buf->params.counter_request = cpu_to_be32(0xB1); 157 + buf->params.starting_index = cpu_to_be32(-1); /* local LPAR */ 158 + buf->params.counter_info_version_in = 0x5; /* v5+ for score */ 159 + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf), 160 + sizeof(*buf)); 161 + if (ret != H_SUCCESS) { 162 + pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n", 163 + ret, be32_to_cpu(buf->params.detail_rc)); 164 + goto out; 165 + } 166 + affinity_score = buf->bytes[0xB]; 167 + seq_printf(m, "partition_affinity_score=%u\n", affinity_score); 168 + out: 169 + kfree(buf); 170 + } 171 + 139 172 static unsigned h_pic(unsigned long *pool_idle_time, 140 173 unsigned long *num_procs) 141 174 { ··· 519 486 seq_printf(m, "partition_entitled_capacity=%d\n", 520 487 partition_active_processors * 100); 521 488 } 489 + 490 + show_gpci_data(m); 522 491 523 492 seq_printf(m, "partition_active_processors=%d\n", 524 493 partition_active_processors);

+7 -3

arch/powerpc/platforms/pseries/papr_scm.c

··· 785 785 static ssize_t perf_stats_show(struct device *dev, 786 786 struct device_attribute *attr, char *buf) 787 787 { 788 - int index, rc; 788 + int index; 789 + ssize_t rc; 789 790 struct seq_buf s; 790 791 struct papr_scm_perf_stat *stat; 791 792 struct papr_scm_perf_stats *stats; ··· 821 820 822 821 free_stats: 823 822 kfree(stats); 824 - return rc ? rc : seq_buf_used(&s); 823 + return rc ? rc : (ssize_t)seq_buf_used(&s); 825 824 } 826 - DEVICE_ATTR_ADMIN_RO(perf_stats); 825 + static DEVICE_ATTR_ADMIN_RO(perf_stats); 827 826 828 827 static ssize_t flags_show(struct device *dev, 829 828 struct device_attribute *attr, char *buf) ··· 897 896 p->bus_desc.module = THIS_MODULE; 898 897 p->bus_desc.of_node = p->pdev->dev.of_node; 899 898 p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); 899 + 900 + /* Set the dimm command family mask to accept PDSMs */ 901 + set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); 900 902 901 903 if (!p->bus_desc.provider_name) 902 904 return -ENOMEM;

+1

arch/powerpc/platforms/pseries/rng.c

··· 36 36 37 37 ppc_md.get_random_seed = pseries_get_random_long; 38 38 39 + of_node_put(dn); 39 40 return 0; 40 41 } 41 42 machine_subsys_initcall(pseries, rng_init);

+6

arch/powerpc/platforms/pseries/setup.c

··· 519 519 if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) 520 520 security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); 521 521 522 + if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST) 523 + security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST); 524 + 522 525 if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE) 523 526 security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); 527 + 528 + if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK) 529 + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); 524 530 525 531 /* 526 532 * The features below are enabled by default, so we instead look to see

+26

arch/powerpc/platforms/pseries/svm.c

··· 7 7 */ 8 8 9 9 #include <linux/mm.h> 10 + #include <linux/memblock.h> 10 11 #include <asm/machdep.h> 11 12 #include <asm/svm.h> 12 13 #include <asm/swiotlb.h> ··· 35 34 return 0; 36 35 } 37 36 machine_early_initcall(pseries, init_svm); 37 + 38 + /* 39 + * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it 40 + * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have 41 + * any addressing limitation, we don't need to allocate it in low addresses. 42 + */ 43 + void __init svm_swiotlb_init(void) 44 + { 45 + unsigned char *vstart; 46 + unsigned long bytes, io_tlb_nslabs; 47 + 48 + io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT); 49 + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 50 + 51 + bytes = io_tlb_nslabs << IO_TLB_SHIFT; 52 + 53 + vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); 54 + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) 55 + return; 56 + 57 + if (io_tlb_start) 58 + memblock_free_early(io_tlb_start, 59 + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 60 + panic("SVM: Cannot allocate SWIOTLB buffer"); 61 + } 38 62 39 63 int set_memory_encrypted(unsigned long addr, int numpages) 40 64 {

+1

arch/powerpc/sysdev/xics/icp-hv.c

··· 174 174 175 175 icp_ops = &icp_hv_ops; 176 176 177 + of_node_put(np); 177 178 return 0; 178 179 } 179 180

+2 -2

arch/powerpc/sysdev/xive/common.c

··· 1565 1565 } 1566 1566 __setup("xive=off", xive_off); 1567 1567 1568 - void xive_debug_show_cpu(struct seq_file *m, int cpu) 1568 + static void xive_debug_show_cpu(struct seq_file *m, int cpu) 1569 1569 { 1570 1570 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 1571 1571 ··· 1599 1599 seq_puts(m, "\n"); 1600 1600 } 1601 1601 1602 - void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) 1602 + static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) 1603 1603 { 1604 1604 struct irq_chip *chip = irq_data_get_irq_chip(d); 1605 1605 int rc;

-1

arch/powerpc/tools/checkpatch.sh

··· 9 9 exec $script_base/../../../scripts/checkpatch.pl \ 10 10 --subjective \ 11 11 --no-summary \ 12 - --max-line-length=90 \ 13 12 --show-types \ 14 13 --ignore ARCH_INCLUDE_LINUX \ 15 14 --ignore BIT_MACRO \

+67 -48

arch/powerpc/tools/unrel_branch_check.sh

··· 1 - # Copyright © 2016 IBM Corporation 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0+ 3 + # Copyright © 2016,2020 IBM Corporation 2 4 # 3 - # This program is free software; you can redistribute it and/or 4 - # modify it under the terms of the GNU General Public License 5 - # as published by the Free Software Foundation; either version 6 - # 2 of the License, or (at your option) any later version. 7 - # 8 - # This script checks the relocations of a vmlinux for "suspicious" 9 - # branches from unrelocated code (head_64.S code). 5 + # This script checks the unrelocated code of a vmlinux for "suspicious" 6 + # branches to relocated code (head_64.S code). 10 7 11 - # Turn this on if you want more debug output: 12 - # set -x 13 - 14 - # Have Kbuild supply the path to objdump so we handle cross compilation. 8 + # Have Kbuild supply the path to objdump and nm so we handle cross compilation. 15 9 objdump="$1" 16 - vmlinux="$2" 10 + nm="$2" 11 + vmlinux="$3" 17 12 18 - #__end_interrupts should be located within the first 64K 13 + kstart=0xc000000000000000 19 14 20 - end_intr=0x$( 21 - $objdump -R "$vmlinux" -d --start-address=0xc000000000000000 \ 22 - --stop-address=0xc000000000010000 | 23 - grep '\<__end_interrupts>:' | 24 - awk '{print $1}' 25 - ) 15 + end_intr=0x$($nm -p "$vmlinux" | 16 + sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}') 17 + if [ "$end_intr" = "0x" ]; then 18 + exit 0 19 + fi 26 20 27 - BRANCHES=$( 28 - $objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ 29 - --stop-address=${end_intr} | 30 - grep -e "^c[0-9a-f]*:[[:space:]]*$[0-9a-f][0-9a-f][[:space:]]$\{4\}[[:space:]]*b" | 31 - grep -v '\<__start_initialization_multiplatform>' | 32 - grep -v -e 'b.\?.\?ctr' | 33 - grep -v -e 'b.\?.\?lr' | 34 - sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ 35 - -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \ 36 - -e 's/[[:space:]]0x/ /' \ 37 - -e 's/://' | 38 - awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' 39 - ) 21 + # we know that there is a correct branch to 22 + # __start_initialization_multiplatform, so find its address 23 + # so we can exclude it. 24 + sim=0x$($nm -p "$vmlinux" | 25 + sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}') 40 26 41 - for tuple in $BRANCHES 42 - do 43 - from=`echo $tuple | cut -d':' -f1` 44 - branch=`echo $tuple | cut -d':' -f2` 45 - to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'` 46 - sym=`echo $tuple | cut -d':' -f4` 27 + $objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" | 28 + sed -E -n ' 29 + # match lines that start with a kernel address 30 + /^c[0-9a-f]*:\s*b/ { 31 + # drop branches via ctr or lr 32 + /\<b.?.?(ct|l)r/d 33 + # cope with some differences between Clang and GNU objdumps 34 + s/\<bt.?\s*[[:digit:]]+,/beq/ 35 + s/\<bf.?\s*[[:digit:]]+,/bne/ 36 + # tidy up 37 + s/\s0x/ / 38 + s/:// 39 + # format for the loop below 40 + s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:\3:\4/ 41 + # strip out condition registers 42 + s/:cr[0-7],/:/ 43 + p 44 + }' | { 47 45 48 - if (( $to > $end_intr )) 49 - then 50 - if [ -z "$bad_branches" ]; then 51 - echo "WARNING: Unrelocated relative branches" 52 - bad_branches="yes" 46 + all_good=true 47 + while IFS=: read -r from branch to sym; do 48 + case "$to" in 49 + c*) to="0x$to" 50 + ;; 51 + .+*) 52 + to=${to#.+} 53 + if [ "$branch" = 'b' ]; then 54 + if (( to >= 0x2000000 )); then 55 + to=$(( to - 0x4000000 )) 56 + fi 57 + elif (( to >= 0x8000 )); then 58 + to=$(( to - 0x10000 )) 53 59 fi 54 - echo "$from $branch-> $to $sym" 60 + printf -v to '0x%x' $(( "0x$from" + to )) 61 + ;; 62 + *) printf 'Unkown branch format\n' 63 + ;; 64 + esac 65 + if [ "$to" = "$sim" ]; then 66 + continue 67 + fi 68 + if (( to > end_intr )); then 69 + if $all_good; then 70 + printf '%s\n' 'WARNING: Unrelocated relative branches' 71 + all_good=false 72 + fi 73 + printf '%s %s-> %s %s\n' "$from" "$branch" "$to" "$sym" 55 74 fi 56 75 done 57 76 58 - if [ -z "$bad_branches" ]; then 59 - exit 0 60 - fi 77 + $all_good 78 + 79 + }

+1

arch/powerpc/xmon/xmon.c

··· 969 969 brk.address = dabr[i].address; 970 970 brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; 971 971 brk.len = 8; 972 + brk.hw_len = 8; 972 973 __set_breakpoint(i, &brk); 973 974 } 974 975 }

+14 -51

arch/sparc/kernel/smp_64.c

··· 1039 1039 * are flush_tlb_*() routines, and these run after flush_cache_*() 1040 1040 * which performs the flushw. 1041 1041 * 1042 - * The SMP TLB coherency scheme we use works as follows: 1043 - * 1044 - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address 1045 - * space has (potentially) executed on, this is the heuristic 1046 - * we use to avoid doing cross calls. 1047 - * 1048 - * Also, for flushing from kswapd and also for clones, we 1049 - * use cpu_vm_mask as the list of cpus to make run the TLB. 1050 - * 1051 - * 2) TLB context numbers are shared globally across all processors 1052 - * in the system, this allows us to play several games to avoid 1053 - * cross calls. 1054 - * 1055 - * One invariant is that when a cpu switches to a process, and 1056 - * that processes tsk->active_mm->cpu_vm_mask does not have the 1057 - * current cpu's bit set, that tlb context is flushed locally. 1058 - * 1059 - * If the address space is non-shared (ie. mm->count == 1) we avoid 1060 - * cross calls when we want to flush the currently running process's 1061 - * tlb state. This is done by clearing all cpu bits except the current 1062 - * processor's in current->mm->cpu_vm_mask and performing the 1063 - * flush locally only. This will force any subsequent cpus which run 1064 - * this task to flush the context from the local tlb if the process 1065 - * migrates to another cpu (again). 1066 - * 1067 - * 3) For shared address spaces (threads) and swapping we bite the 1068 - * bullet for most cases and perform the cross call (but only to 1069 - * the cpus listed in cpu_vm_mask). 1070 - * 1071 - * The performance gain from "optimizing" away the cross call for threads is 1072 - * questionable (in theory the big win for threads is the massive sharing of 1073 - * address space state across processors). 1042 + * mm->cpu_vm_mask is a bit mask of which cpus an address 1043 + * space has (potentially) executed on, this is the heuristic 1044 + * we use to limit cross calls. 1074 1045 */ 1075 1046 1076 1047 /* This currently is only used by the hugetlb arch pre-fault ··· 1051 1080 void smp_flush_tlb_mm(struct mm_struct *mm) 1052 1081 { 1053 1082 u32 ctx = CTX_HWBITS(mm->context); 1054 - int cpu = get_cpu(); 1055 1083 1056 - if (atomic_read(&mm->mm_users) == 1) { 1057 - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); 1058 - goto local_flush_and_out; 1059 - } 1084 + get_cpu(); 1060 1085 1061 1086 smp_cross_call_masked(&xcall_flush_tlb_mm, 1062 1087 ctx, 0, 0, 1063 1088 mm_cpumask(mm)); 1064 1089 1065 - local_flush_and_out: 1066 1090 __flush_tlb_mm(ctx, SECONDARY_CONTEXT); 1067 1091 1068 1092 put_cpu(); ··· 1080 1114 { 1081 1115 u32 ctx = CTX_HWBITS(mm->context); 1082 1116 struct tlb_pending_info info; 1083 - int cpu = get_cpu(); 1117 + 1118 + get_cpu(); 1084 1119 1085 1120 info.ctx = ctx; 1086 1121 info.nr = nr; 1087 1122 info.vaddrs = vaddrs; 1088 1123 1089 - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) 1090 - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); 1091 - else 1092 - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, 1093 - &info, 1); 1124 + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, 1125 + &info, 1); 1094 1126 1095 1127 __flush_tlb_pending(ctx, nr, vaddrs); 1096 1128 ··· 1098 1134 void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) 1099 1135 { 1100 1136 unsigned long context = CTX_HWBITS(mm->context); 1101 - int cpu = get_cpu(); 1102 1137 1103 - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) 1104 - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); 1105 - else 1106 - smp_cross_call_masked(&xcall_flush_tlb_page, 1107 - context, vaddr, 0, 1108 - mm_cpumask(mm)); 1138 + get_cpu(); 1139 + 1140 + smp_cross_call_masked(&xcall_flush_tlb_page, 1141 + context, vaddr, 0, 1142 + mm_cpumask(mm)); 1143 + 1109 1144 __flush_tlb_page(context, vaddr); 1110 1145 1111 1146 put_cpu();

+6 -3

drivers/cpufreq/powernv-cpufreq.c

··· 885 885 unsigned long action, void *unused) 886 886 { 887 887 int cpu; 888 - struct cpufreq_policy cpu_policy; 888 + struct cpufreq_policy *cpu_policy; 889 889 890 890 rebooting = true; 891 891 for_each_online_cpu(cpu) { 892 - cpufreq_get_policy(&cpu_policy, cpu); 893 - powernv_cpufreq_target_index(&cpu_policy, get_nominal_index()); 892 + cpu_policy = cpufreq_cpu_get(cpu); 893 + if (!cpu_policy) 894 + continue; 895 + powernv_cpufreq_target_index(cpu_policy, get_nominal_index()); 896 + cpufreq_cpu_put(cpu_policy); 894 897 } 895 898 896 899 return NOTIFY_DONE;

+1 -1

drivers/cpuidle/cpuidle-powernv.c

··· 141 141 struct cpuidle_driver *drv, 142 142 int index) 143 143 { 144 - power9_idle_type(stop_psscr_table[index].val, 144 + arch300_idle_type(stop_psscr_table[index].val, 145 145 stop_psscr_table[index].mask); 146 146 return index; 147 147 }

+2 -2

drivers/macintosh/smu.c

··· 638 638 { 639 639 struct device_node *np; 640 640 641 - for (np = NULL; (np = of_get_next_child(smu->of_node, np)) != NULL;) 641 + for_each_child_of_node(smu->of_node, np) 642 642 if (of_device_is_compatible(np, "smu-sensors")) 643 643 of_platform_device_create(np, "smu-sensors", 644 644 &smu->of_dev->dev); ··· 1015 1015 /* Note: Only allowed to return error code in pointers (using ERR_PTR) 1016 1016 * when interruptible is 1 1017 1017 */ 1018 - const struct smu_sdbp_header *__smu_get_sdb_partition(int id, 1018 + static const struct smu_sdbp_header *__smu_get_sdb_partition(int id, 1019 1019 unsigned int *size, int interruptible) 1020 1020 { 1021 1021 char pname[32];

-2

drivers/macintosh/windfarm_lm75_sensor.c

··· 152 152 { 153 153 struct wf_lm75_sensor *lm = i2c_get_clientdata(client); 154 154 155 - DBG("wf_lm75: i2c detatch called for %s\n", lm->sens.name); 156 - 157 155 /* Mark client detached */ 158 156 lm->i2c = NULL; 159 157

-2

drivers/macintosh/windfarm_lm87_sensor.c

··· 149 149 { 150 150 struct wf_lm87_sensor *lm = i2c_get_clientdata(client); 151 151 152 - DBG("wf_lm87: i2c detatch called for %s\n", lm->sens.name); 153 - 154 152 /* Mark client detached */ 155 153 lm->i2c = NULL; 156 154

+1 -2

drivers/macintosh/windfarm_smu_sat.c

··· 216 216 217 217 vsens[0] = vsens[1] = -1; 218 218 isens[0] = isens[1] = -1; 219 - child = NULL; 220 - while ((child = of_get_next_child(dev, child)) != NULL) { 219 + for_each_child_of_node(dev, child) { 221 220 reg = of_get_property(child, "reg", NULL); 222 221 loc = of_get_property(child, "location", NULL); 223 222 if (reg == NULL || loc == NULL)

+1 -2

drivers/macintosh/windfarm_smu_sensors.c

··· 421 421 return -ENODEV; 422 422 423 423 /* Look for sensors subdir */ 424 - for (sensors = NULL; 425 - (sensors = of_get_next_child(smu, sensors)) != NULL;) 424 + for_each_child_of_node(smu, sensors) 426 425 if (of_node_name_eq(sensors, "sensors")) 427 426 break; 428 427

+2 -2

drivers/misc/cxl/pci.c

··· 393 393 *capp_unit_id = get_capp_unit_id(np, *phb_index); 394 394 of_node_put(np); 395 395 if (!*capp_unit_id) { 396 - pr_err("cxl: invalid capp unit id (phb_index: %d)\n", 397 - *phb_index); 396 + pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", 397 + *chipid, *phb_index); 398 398 return -ENODEV; 399 399 } 400 400

+1 -1

drivers/misc/ocxl/Kconfig

··· 9 9 10 10 config OCXL 11 11 tristate "OpenCAPI coherent accelerator support" 12 - depends on PPC_POWERNV && PCI && EEH && HOTPLUG_PCI_POWERNV 12 + depends on HOTPLUG_PCI_POWERNV 13 13 select OCXL_BASE 14 14 default m 15 15 help

+7 -5

drivers/misc/ocxl/afu_irq.c

··· 2 2 // Copyright 2017 IBM Corp. 3 3 #include <linux/interrupt.h> 4 4 #include <asm/pnv-ocxl.h> 5 + #include <asm/xive.h> 5 6 #include "ocxl_internal.h" 6 7 #include "trace.h" 7 8 ··· 11 10 int hw_irq; 12 11 unsigned int virq; 13 12 char *name; 14 - u64 trigger_page; 15 13 irqreturn_t (*handler)(void *private); 16 14 void (*free_private)(void *private); 17 15 void *private; ··· 124 124 goto err_unlock; 125 125 } 126 126 127 - rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq, 128 - &irq->trigger_page); 127 + rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq); 129 128 if (rc) 130 129 goto err_idr; 131 130 ··· 195 196 196 197 u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id) 197 198 { 199 + struct xive_irq_data *xd; 198 200 struct afu_irq *irq; 199 201 u64 addr = 0; 200 202 201 203 mutex_lock(&ctx->irq_lock); 202 204 irq = idr_find(&ctx->irq_idr, irq_id); 203 - if (irq) 204 - addr = irq->trigger_page; 205 + if (irq) { 206 + xd = irq_get_handler_data(irq->virq); 207 + addr = xd ? xd->trig_page : 0; 208 + } 205 209 mutex_unlock(&ctx->irq_lock); 206 210 return addr; 207 211 }

+7 -8

drivers/misc/ocxl/link.c

··· 6 6 #include <linux/mmu_context.h> 7 7 #include <asm/copro.h> 8 8 #include <asm/pnv-ocxl.h> 9 + #include <asm/xive.h> 9 10 #include <misc/ocxl.h> 10 11 #include "ocxl_internal.h" 11 12 #include "trace.h" ··· 683 682 } 684 683 EXPORT_SYMBOL_GPL(ocxl_link_remove_pe); 685 684 686 - int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) 685 + int ocxl_link_irq_alloc(void *link_handle, int *hw_irq) 687 686 { 688 687 struct ocxl_link *link = (struct ocxl_link *) link_handle; 689 - int rc, irq; 690 - u64 addr; 688 + int irq; 691 689 692 690 if (atomic_dec_if_positive(&link->irq_available) < 0) 693 691 return -ENOSPC; 694 692 695 - rc = pnv_ocxl_alloc_xive_irq(&irq, &addr); 696 - if (rc) { 693 + irq = xive_native_alloc_irq(); 694 + if (!irq) { 697 695 atomic_inc(&link->irq_available); 698 - return rc; 696 + return -ENXIO; 699 697 } 700 698 701 699 *hw_irq = irq; 702 - *trigger_addr = addr; 703 700 return 0; 704 701 } 705 702 EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc); ··· 706 707 { 707 708 struct ocxl_link *link = (struct ocxl_link *) link_handle; 708 709 709 - pnv_ocxl_free_xive_irq(hw_irq); 710 + xive_native_free_irq(hw_irq); 710 711 atomic_inc(&link->irq_available); 711 712 } 712 713 EXPORT_SYMBOL_GPL(ocxl_link_free_irq);

+9 -12

drivers/scsi/cxlflash/ocxl_hw.c

··· 15 15 #include <linux/pseudo_fs.h> 16 16 #include <linux/poll.h> 17 17 #include <linux/sched/signal.h> 18 - 18 + #include <linux/interrupt.h> 19 + #include <asm/xive.h> 19 20 #include <misc/ocxl.h> 20 21 21 22 #include <uapi/misc/cxl.h> ··· 181 180 struct ocxl_hw_afu *afu = ctx->hw_afu; 182 181 struct device *dev = afu->dev; 183 182 struct ocxlflash_irqs *irq; 184 - void __iomem *vtrig; 183 + struct xive_irq_data *xd; 185 184 u32 virq; 186 185 int rc = 0; 187 186 ··· 205 204 goto err1; 206 205 } 207 206 208 - vtrig = ioremap(irq->ptrig, PAGE_SIZE); 209 - if (unlikely(!vtrig)) { 210 - dev_err(dev, "%s: Trigger page mapping failed\n", __func__); 211 - rc = -ENOMEM; 207 + xd = irq_get_handler_data(virq); 208 + if (unlikely(!xd)) { 209 + dev_err(dev, "%s: Can't get interrupt data\n", __func__); 210 + rc = -ENXIO; 212 211 goto err2; 213 212 } 214 213 215 214 irq->virq = virq; 216 - irq->vtrig = vtrig; 215 + irq->vtrig = xd->trig_mmio; 217 216 out: 218 217 return rc; 219 218 err2: ··· 260 259 } 261 260 262 261 irq = &ctx->irqs[num]; 263 - if (irq->vtrig) 264 - iounmap(irq->vtrig); 265 262 266 263 if (irq_find_mapping(NULL, irq->hwirq)) { 267 264 free_irq(irq->virq, cookie); ··· 614 615 struct ocxl_hw_afu *afu = ctx->hw_afu; 615 616 struct device *dev = afu->dev; 616 617 struct ocxlflash_irqs *irqs; 617 - u64 addr; 618 618 int rc = 0; 619 619 int hwirq; 620 620 int i; ··· 638 640 } 639 641 640 642 for (i = 0; i < num; i++) { 641 - rc = ocxl_link_irq_alloc(afu->link_token, &hwirq, &addr); 643 + rc = ocxl_link_irq_alloc(afu->link_token, &hwirq); 642 644 if (unlikely(rc)) { 643 645 dev_err(dev, "%s: ocxl_link_irq_alloc failed rc=%d\n", 644 646 __func__, rc); ··· 646 648 } 647 649 648 650 irqs[i].hwirq = hwirq; 649 - irqs[i].ptrig = addr; 650 651 } 651 652 652 653 ctx->irqs = irqs;

-1

drivers/scsi/cxlflash/ocxl_hw.h

··· 13 13 struct ocxlflash_irqs { 14 14 int hwirq; 15 15 u32 virq; 16 - u64 ptrig; 17 16 void __iomem *vtrig; 18 17 }; 19 18

+15 -2

fs/exec.c

··· 1001 1001 } 1002 1002 1003 1003 task_lock(tsk); 1004 - active_mm = tsk->active_mm; 1005 1004 membarrier_exec_mmap(mm); 1006 - tsk->mm = mm; 1005 + 1006 + local_irq_disable(); 1007 + active_mm = tsk->active_mm; 1007 1008 tsk->active_mm = mm; 1009 + tsk->mm = mm; 1010 + /* 1011 + * This prevents preemption while active_mm is being loaded and 1012 + * it and mm are being updated, which could cause problems for 1013 + * lazy tlb mm refcounting when these are updated by context 1014 + * switches. Not all architectures can handle irqs off over 1015 + * activate_mm yet. 1016 + */ 1017 + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) 1018 + local_irq_enable(); 1008 1019 activate_mm(active_mm, mm); 1020 + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) 1021 + local_irq_enable(); 1009 1022 tsk->mm->vmacache_seqnum = 0; 1010 1023 vmacache_flush(tsk); 1011 1024 task_unlock(tsk);

+1

include/linux/cpuhotplug.h

··· 183 183 CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, 184 184 CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, 185 185 CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, 186 + CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, 186 187 CPUHP_AP_WATCHDOG_ONLINE, 187 188 CPUHP_AP_WORKQUEUE_ONLINE, 188 189 CPUHP_AP_RCUTREE_ONLINE,

+1 -1

include/linux/topology.h

··· 198 198 #define topology_die_cpumask(cpu) cpumask_of(cpu) 199 199 #endif 200 200 201 - #ifdef CONFIG_SCHED_SMT 201 + #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) 202 202 static inline const struct cpumask *cpu_smt_mask(int cpu) 203 203 { 204 204 return topology_sibling_cpumask(cpu);

+1 -7

include/misc/ocxl.h

··· 460 460 * Allocate an AFU interrupt associated to the link. 461 461 * 462 462 * 'hw_irq' is the hardware interrupt number 463 - * 'obj_handle' is the 64-bit object handle to be passed to the AFU to 464 - * trigger the interrupt. 465 - * On P9, 'obj_handle' is an address, which, if written, triggers the 466 - * interrupt. It is an MMIO address which needs to be remapped (one 467 - * page). 468 463 */ 469 - int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, 470 - u64 *obj_handle); 464 + int ocxl_link_irq_alloc(void *link_handle, int *hw_irq); 471 465 472 466 /* 473 467 * Free a previously allocated AFU interrupt

+1

include/uapi/asm-generic/hugetlb_encode.h

··· 20 20 #define HUGETLB_FLAG_ENCODE_SHIFT 26 21 21 #define HUGETLB_FLAG_ENCODE_MASK 0x3f 22 22 23 + #define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) 23 24 #define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) 24 25 #define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) 25 26 #define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)

+1

include/uapi/linux/mman.h

··· 27 27 #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT 28 28 #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK 29 29 30 + #define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB 30 31 #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB 31 32 #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB 32 33 #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB

+9 -3

tools/testing/selftests/powerpc/alignment/alignment_handler.c

··· 55 55 #include <setjmp.h> 56 56 #include <signal.h> 57 57 58 - #include <asm/cputable.h> 59 - 60 58 #include "utils.h" 61 59 #include "instructions.h" 62 60 ··· 62 64 int debug; 63 65 int testing; 64 66 volatile int gotsig; 67 + bool prefixes_enabled; 65 68 char *cipath = "/dev/fb0"; 66 69 long cioffset; 67 70 ··· 76 77 } 77 78 gotsig = sig; 78 79 #ifdef __powerpc64__ 79 - ucp->uc_mcontext.gp_regs[PT_NIP] += 4; 80 + if (prefixes_enabled) { 81 + u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP]; 82 + ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4); 83 + } else { 84 + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; 85 + } 80 86 #else 81 87 ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; 82 88 #endif ··· 651 647 perror("sigaction"); 652 648 exit(1); 653 649 } 650 + 651 + prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1); 654 652 655 653 rc |= test_harness(test_alignment_handler_vsx_206, 656 654 "test_alignment_handler_vsx_206");

+6

tools/testing/selftests/powerpc/benchmarks/context_switch.c

··· 481 481 else 482 482 printf("futex"); 483 483 484 + if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)) 485 + touch_altivec = 0; 486 + 487 + if (!have_hwcap(PPC_FEATURE_HAS_VSX)) 488 + touch_vector = 0; 489 + 484 490 printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n", 485 491 cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no", 486 492 touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");

+1 -1

tools/testing/selftests/powerpc/dscr/Makefile

··· 10 10 11 11 $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread 12 12 13 - $(TEST_GEN_PROGS): ../harness.c 13 + $(TEST_GEN_PROGS): ../harness.c ../utils.c

+2

tools/testing/selftests/powerpc/dscr/dscr_default_test.c

··· 63 63 unsigned long i, *status[THREADS]; 64 64 unsigned long orig_dscr_default; 65 65 66 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 67 + 66 68 orig_dscr_default = get_default_dscr(); 67 69 68 70 /* Initial DSCR default */

+2

tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c

··· 21 21 { 22 22 unsigned long i, dscr = 0; 23 23 24 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 25 + 24 26 srand(getpid()); 25 27 set_dscr(dscr); 26 28

+2

tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c

··· 44 44 unsigned long i, dscr = 0; 45 45 pid_t pid; 46 46 47 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 48 + 47 49 for (i = 0; i < COUNT; i++) { 48 50 dscr++; 49 51 if (dscr > DSCR_MAX)

+2

tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c

··· 22 22 unsigned long i, dscr = 0; 23 23 pid_t pid; 24 24 25 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 26 + 25 27 srand(getpid()); 26 28 set_dscr(dscr); 27 29

+2

tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c

··· 77 77 unsigned long orig_dscr_default; 78 78 int i, j; 79 79 80 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 81 + 80 82 orig_dscr_default = get_default_dscr(); 81 83 for (i = 0; i < COUNT; i++) { 82 84 for (j = 0; j < DSCR_MAX; j++) {

+2

tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c

··· 56 56 unsigned long orig_dscr_default; 57 57 int i, j; 58 58 59 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 60 + 59 61 orig_dscr_default = get_default_dscr(); 60 62 for (i = 0; i < COUNT; i++) { 61 63 for (j = 0; j < DSCR_MAX; j++) {

+2

tools/testing/selftests/powerpc/dscr/dscr_user_test.c

··· 36 36 { 37 37 int i; 38 38 39 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); 40 + 39 41 check_dscr(""); 40 42 41 43 for (i = 0; i < COUNT; i++) {

+6 -3

tools/testing/selftests/powerpc/eeh/eeh-basic.sh

··· 1 1 #!/bin/sh 2 2 # SPDX-License-Identifier: GPL-2.0-only 3 3 4 + KSELFTESTS_SKIP=4 5 + 4 6 . ./eeh-functions.sh 5 7 6 8 if ! eeh_supported ; then 7 9 echo "EEH not supported on this system, skipping" 8 - exit 0; 10 + exit $KSELFTESTS_SKIP; 9 11 fi 10 12 11 13 if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ 12 14 [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then 13 15 echo "debugfs EEH testing files are missing. Is debugfs mounted?" 14 - exit 1; 16 + exit $KSELFTESTS_SKIP; 15 17 fi 16 18 17 19 pre_lspci=`mktemp` ··· 86 84 lspci | diff -u $pre_lspci - 87 85 rm -f $pre_lspci 88 86 89 - exit $failed 87 + test "$failed" == 0 88 + exit $?

+1 -1

tools/testing/selftests/powerpc/include/utils.h

··· 12 12 #include <stdbool.h> 13 13 #include <linux/auxvec.h> 14 14 #include <linux/perf_event.h> 15 + #include <asm/cputable.h> 15 16 #include "reg.h" 16 17 17 18 /* Avoid headaches with PRI?64 - just use %ll? always */ ··· 36 35 int read_debugfs_file(char *debugfs_file, int *result); 37 36 int write_debugfs_file(char *debugfs_file, int result); 38 37 int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); 39 - void set_dscr(unsigned long val); 40 38 int perf_event_open_counter(unsigned int type, 41 39 unsigned long config, int group_fd); 42 40 int perf_event_enable(int fd);

+1

tools/testing/selftests/powerpc/mm/bad_accesses.c

··· 139 139 140 140 int main(void) 141 141 { 142 + test_harness_set_timeout(300); 142 143 return test_harness(test, "bad_accesses"); 143 144 }

-1

tools/testing/selftests/powerpc/pmu/count_stcx_fail.c

··· 9 9 #include <stdbool.h> 10 10 #include <string.h> 11 11 #include <sys/prctl.h> 12 - #include <asm/cputable.h> 13 12 14 13 #include "event.h" 15 14 #include "utils.h"

+3

tools/testing/selftests/powerpc/pmu/l3_bank_test.c

··· 20 20 char *p; 21 21 int i; 22 22 23 + // The L3 bank logic is only used on Power8 or later 24 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); 25 + 23 26 p = malloc(MALLOC_SIZE); 24 27 FAIL_IF(!p); 25 28

-2

tools/testing/selftests/powerpc/pmu/per_event_excludes.c

··· 12 12 #include <string.h> 13 13 #include <sys/prctl.h> 14 14 15 - #include <asm/cputable.h> 16 - 17 15 #include "event.h" 18 16 #include "lib.h" 19 17 #include "utils.h"

+46 -2

tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c

··· 20 20 #include <signal.h> 21 21 #include <sys/types.h> 22 22 #include <sys/wait.h> 23 + #include <sys/syscall.h> 24 + #include <linux/limits.h> 23 25 #include "ptrace.h" 24 26 25 27 #define SPRN_PVR 0x11F ··· 46 44 }; 47 45 static volatile struct gstruct gstruct __attribute__((aligned(512))); 48 46 47 + static volatile char cwd[PATH_MAX] __attribute__((aligned(8))); 49 48 50 49 static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) 51 50 { ··· 141 138 write_var(len); 142 139 } 143 140 141 + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ 142 + syscall(__NR_getcwd, &cwd, PATH_MAX); 143 + 144 144 /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ 145 145 write_var(1); 146 146 ··· 155 149 write_var(1); 156 150 else 157 151 read_var(1); 152 + 153 + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ 154 + syscall(__NR_getcwd, &cwd, PATH_MAX); 158 155 159 156 /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ 160 157 gstruct.a[rand() % A_LEN] = 'a'; ··· 302 293 return 0; 303 294 } 304 295 296 + static int test_set_debugreg_kernel_userspace(pid_t child_pid) 297 + { 298 + unsigned long wp_addr = (unsigned long)cwd; 299 + char *name = "PTRACE_SET_DEBUGREG"; 300 + 301 + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ 302 + wp_addr &= ~0x7UL; 303 + wp_addr |= (1Ul << DABR_READ_SHIFT); 304 + wp_addr |= (1UL << DABR_WRITE_SHIFT); 305 + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); 306 + ptrace_set_debugreg(child_pid, wp_addr); 307 + ptrace(PTRACE_CONT, child_pid, NULL, 0); 308 + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8); 309 + 310 + ptrace_set_debugreg(child_pid, 0); 311 + return 0; 312 + } 313 + 305 314 static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, 306 315 unsigned long addr, int len) 307 316 { ··· 362 335 wh = ptrace_sethwdebug(child_pid, &info); 363 336 ptrace(PTRACE_CONT, child_pid, NULL, 0); 364 337 check_success(child_pid, name, "RW", wp_addr, len); 338 + ptrace_delhwdebug(child_pid, wh); 339 + } 340 + 341 + static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid) 342 + { 343 + struct ppc_hw_breakpoint info; 344 + unsigned long wp_addr = (unsigned long)&cwd; 345 + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT"; 346 + int len = 1; /* hardcoded in kernel */ 347 + int wh; 348 + 349 + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ 350 + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0); 351 + wh = ptrace_sethwdebug(child_pid, &info); 352 + ptrace(PTRACE_CONT, child_pid, NULL, 0); 353 + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len); 365 354 ptrace_delhwdebug(child_pid, wh); 366 355 } 367 356 ··· 495 452 run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) 496 453 { 497 454 test_set_debugreg(child_pid); 455 + test_set_debugreg_kernel_userspace(child_pid); 456 + test_sethwdebug_exact(child_pid); 457 + test_sethwdebug_exact_kernel_userspace(child_pid); 498 458 if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { 499 - test_sethwdebug_exact(child_pid); 500 - 501 459 test_sethwdebug_range_aligned(child_pid); 502 460 if (dawr || is_8xx) { 503 461 test_sethwdebug_range_unaligned(child_pid);

+38

tools/testing/selftests/powerpc/security/rfi_flush.c

··· 10 10 #include <stdint.h> 11 11 #include <malloc.h> 12 12 #include <unistd.h> 13 + #include <signal.h> 13 14 #include <stdlib.h> 14 15 #include <string.h> 15 16 #include <stdio.h> ··· 42 41 } 43 42 } 44 43 44 + static void sigill_handler(int signr, siginfo_t *info, void *unused) 45 + { 46 + static int warned = 0; 47 + ucontext_t *ctx = (ucontext_t *)unused; 48 + unsigned long *pc = &UCONTEXT_NIA(ctx); 49 + 50 + /* mtspr 3,RS to check for move to DSCR below */ 51 + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { 52 + if (!warned++) 53 + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); 54 + *pc += 4; 55 + } else { 56 + printf("SIGILL at %p\n", pc); 57 + abort(); 58 + } 59 + } 60 + 61 + static void set_dscr(unsigned long val) 62 + { 63 + static int init = 0; 64 + struct sigaction sa; 65 + 66 + if (!init) { 67 + memset(&sa, 0, sizeof(sa)); 68 + sa.sa_sigaction = sigill_handler; 69 + sa.sa_flags = SA_SIGINFO; 70 + if (sigaction(SIGILL, &sa, NULL)) 71 + perror("sigill_handler"); 72 + init = 1; 73 + } 74 + 75 + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); 76 + } 77 + 45 78 int rfi_flush_test(void) 46 79 { 47 80 char *p; ··· 88 53 int rfi_flush_org, rfi_flush; 89 54 90 55 SKIP_IF(geteuid() != 0); 56 + 57 + // The PMU event we use only works on Power7 or later 58 + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); 91 59 92 60 if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { 93 61 perror("Unable to read powerpc/rfi_flush debugfs file");

+3

tools/testing/selftests/powerpc/security/spectre_v2.c

··· 134 134 s64 miss_percent; 135 135 bool is_p9; 136 136 137 + // The PMU events we use only work on Power8 or later 138 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); 139 + 137 140 state = get_sysfs_state(); 138 141 if (state == UNKNOWN) { 139 142 printf("Error: couldn't determine spectre_v2 mitigation state?\n");

+1 -1

tools/testing/selftests/powerpc/stringloops/memcmp.c

··· 4 4 #include <string.h> 5 5 #include <sys/mman.h> 6 6 #include <time.h> 7 - #include <asm/cputable.h> 7 + 8 8 #include "utils.h" 9 9 10 10 #define SIZE 256

+19 -4

tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S

··· 3 3 4 4 .data 5 5 .balign 8 6 - message: 6 + success_message: 7 7 .ascii "success: switch_endian_test\n\0" 8 + 9 + .balign 8 10 + failure_message: 11 + .ascii "failure: switch_endian_test\n\0" 8 12 9 13 .section ".toc" 10 14 .balign 8 ··· 68 64 li r0, __NR_switch_endian 69 65 sc 70 66 67 + tdi 0, 0, 0x48 // b +8 if the endian was switched 68 + b .Lfail // exit if endian didn't switch 69 + 71 70 #include "check-reversed.S" 72 71 73 72 /* Flip back, r0 already has the switch syscall number */ ··· 78 71 79 72 #include "check.S" 80 73 74 + ld r4, success_message@got(%r2) 75 + li r5, 28 // strlen(success_message) 76 + li r14, 0 // exit status 77 + .Lout: 81 78 li r0, __NR_write 82 79 li r3, 1 /* stdout */ 83 - ld r4, message@got(%r2) 84 - li r5, 28 /* strlen(message3) */ 85 80 sc 86 81 li r0, __NR_exit 87 - li r3, 0 82 + mr r3, r14 88 83 sc 89 84 b . 85 + 86 + .Lfail: 87 + ld r4, failure_message@got(%r2) 88 + li r5, 28 // strlen(failure_message) 89 + li r14, 1 90 + b .Lout

+1 -1

tools/testing/selftests/powerpc/syscalls/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - TEST_GEN_PROGS := ipc_unmuxed 2 + TEST_GEN_PROGS := ipc_unmuxed rtas_filter 3 3 4 4 CFLAGS += -I../../../../../usr/include 5 5

+285

tools/testing/selftests/powerpc/syscalls/rtas_filter.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* 3 + * Copyright 2005-2020 IBM Corporation. 4 + * 5 + * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/) 6 + */ 7 + 8 + #include <byteswap.h> 9 + #include <stdint.h> 10 + #include <inttypes.h> 11 + #include <stdio.h> 12 + #include <string.h> 13 + #include <sys/syscall.h> 14 + #include <sys/types.h> 15 + #include <unistd.h> 16 + #include <stdarg.h> 17 + #include <stdlib.h> 18 + #include <fcntl.h> 19 + #include <errno.h> 20 + #include "utils.h" 21 + 22 + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 23 + #define cpu_to_be32(x) bswap_32(x) 24 + #define be32_to_cpu(x) bswap_32(x) 25 + #else 26 + #define cpu_to_be32(x) (x) 27 + #define be32_to_cpu(x) (x) 28 + #endif 29 + 30 + #define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */ 31 + #define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */ 32 + #define BLOCK_SIZE 4096 33 + #define PAGE_SIZE 4096 34 + #define MAX_PAGES 64 35 + 36 + static const char *ofdt_rtas_path = "/proc/device-tree/rtas"; 37 + 38 + typedef __be32 uint32_t; 39 + struct rtas_args { 40 + __be32 token; 41 + __be32 nargs; 42 + __be32 nret; 43 + __be32 args[16]; 44 + __be32 *rets; /* Pointer to return values in args[]. */ 45 + }; 46 + 47 + struct region { 48 + uint64_t addr; 49 + uint32_t size; 50 + struct region *next; 51 + }; 52 + 53 + int read_entire_file(int fd, char **buf, size_t *len) 54 + { 55 + size_t buf_size = 0; 56 + size_t off = 0; 57 + int rc; 58 + 59 + *buf = NULL; 60 + do { 61 + buf_size += BLOCK_SIZE; 62 + if (*buf == NULL) 63 + *buf = malloc(buf_size); 64 + else 65 + *buf = realloc(*buf, buf_size); 66 + 67 + if (*buf == NULL) 68 + return -ENOMEM; 69 + 70 + rc = read(fd, *buf + off, BLOCK_SIZE); 71 + if (rc < 0) 72 + return -EIO; 73 + 74 + off += rc; 75 + } while (rc == BLOCK_SIZE); 76 + 77 + if (len) 78 + *len = off; 79 + 80 + return 0; 81 + } 82 + 83 + static int open_prop_file(const char *prop_path, const char *prop_name, int *fd) 84 + { 85 + char *path; 86 + int len; 87 + 88 + /* allocate enough for two string, a slash and trailing NULL */ 89 + len = strlen(prop_path) + strlen(prop_name) + 1 + 1; 90 + path = malloc(len); 91 + if (path == NULL) 92 + return -ENOMEM; 93 + 94 + snprintf(path, len, "%s/%s", prop_path, prop_name); 95 + 96 + *fd = open(path, O_RDONLY); 97 + free(path); 98 + if (*fd < 0) 99 + return -errno; 100 + 101 + return 0; 102 + } 103 + 104 + static int get_property(const char *prop_path, const char *prop_name, 105 + char **prop_val, size_t *prop_len) 106 + { 107 + int rc, fd; 108 + 109 + rc = open_prop_file(prop_path, prop_name, &fd); 110 + if (rc) 111 + return rc; 112 + 113 + rc = read_entire_file(fd, prop_val, prop_len); 114 + close(fd); 115 + 116 + return rc; 117 + } 118 + 119 + int rtas_token(const char *call_name) 120 + { 121 + char *prop_buf = NULL; 122 + size_t len; 123 + int rc; 124 + 125 + rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len); 126 + if (rc < 0) { 127 + rc = RTAS_UNKNOWN_OP; 128 + goto err; 129 + } 130 + 131 + rc = be32_to_cpu(*(int *)prop_buf); 132 + 133 + err: 134 + free(prop_buf); 135 + return rc; 136 + } 137 + 138 + static int read_kregion_bounds(struct region *kregion) 139 + { 140 + char *buf; 141 + int fd; 142 + int rc; 143 + 144 + fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY); 145 + if (fd < 0) { 146 + printf("Could not open rmo_buffer file\n"); 147 + return RTAS_IO_ASSERT; 148 + } 149 + 150 + rc = read_entire_file(fd, &buf, NULL); 151 + close(fd); 152 + if (rc) { 153 + free(buf); 154 + return rc; 155 + } 156 + 157 + sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size); 158 + free(buf); 159 + 160 + if (!(kregion->size && kregion->addr) || 161 + (kregion->size > (PAGE_SIZE * MAX_PAGES))) { 162 + printf("Unexpected kregion bounds\n"); 163 + return RTAS_IO_ASSERT; 164 + } 165 + 166 + return 0; 167 + } 168 + 169 + static int rtas_call(const char *name, int nargs, 170 + int nrets, ...) 171 + { 172 + struct rtas_args args; 173 + __be32 *rets[16]; 174 + int i, rc, token; 175 + va_list ap; 176 + 177 + va_start(ap, nrets); 178 + 179 + token = rtas_token(name); 180 + if (token == RTAS_UNKNOWN_OP) { 181 + // We don't care if the call doesn't exist 182 + printf("call '%s' not available, skipping...", name); 183 + rc = RTAS_UNKNOWN_OP; 184 + goto err; 185 + } 186 + 187 + args.token = cpu_to_be32(token); 188 + args.nargs = cpu_to_be32(nargs); 189 + args.nret = cpu_to_be32(nrets); 190 + 191 + for (i = 0; i < nargs; i++) 192 + args.args[i] = (__be32) va_arg(ap, unsigned long); 193 + 194 + for (i = 0; i < nrets; i++) 195 + rets[i] = (__be32 *) va_arg(ap, unsigned long); 196 + 197 + rc = syscall(__NR_rtas, &args); 198 + if (rc) { 199 + rc = -errno; 200 + goto err; 201 + } 202 + 203 + if (nrets) { 204 + *(rets[0]) = be32_to_cpu(args.args[nargs]); 205 + 206 + for (i = 1; i < nrets; i++) { 207 + *(rets[i]) = args.args[nargs + i]; 208 + } 209 + } 210 + 211 + err: 212 + va_end(ap); 213 + return rc; 214 + } 215 + 216 + static int test(void) 217 + { 218 + struct region rmo_region; 219 + uint32_t rmo_start; 220 + uint32_t rmo_end; 221 + __be32 rets[1]; 222 + int rc; 223 + 224 + // Test a legitimate harmless call 225 + // Expected: call succeeds 226 + printf("Test a permitted call, no parameters... "); 227 + rc = rtas_call("get-time-of-day", 0, 1, rets); 228 + printf("rc: %d\n", rc); 229 + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); 230 + 231 + // Test a prohibited call 232 + // Expected: call returns -EINVAL 233 + printf("Test a prohibited call... "); 234 + rc = rtas_call("nvram-fetch", 0, 1, rets); 235 + printf("rc: %d\n", rc); 236 + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); 237 + 238 + // Get RMO 239 + rc = read_kregion_bounds(&rmo_region); 240 + if (rc) { 241 + printf("Couldn't read RMO region bounds, skipping remaining cases\n"); 242 + return 0; 243 + } 244 + rmo_start = rmo_region.addr; 245 + rmo_end = rmo_start + rmo_region.size - 1; 246 + printf("RMO range: %08x - %08x\n", rmo_start, rmo_end); 247 + 248 + // Test a permitted call, user-supplied size, buffer inside RMO 249 + // Expected: call succeeds 250 + printf("Test a permitted call, user-supplied size, buffer inside RMO... "); 251 + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), 252 + cpu_to_be32(rmo_end - rmo_start + 1), rets); 253 + printf("rc: %d\n", rc); 254 + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); 255 + 256 + // Test a permitted call, user-supplied size, buffer start outside RMO 257 + // Expected: call returns -EINVAL 258 + printf("Test a permitted call, user-supplied size, buffer start outside RMO... "); 259 + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1), 260 + cpu_to_be32(4000), rets); 261 + printf("rc: %d\n", rc); 262 + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); 263 + 264 + // Test a permitted call, user-supplied size, buffer end outside RMO 265 + // Expected: call returns -EINVAL 266 + printf("Test a permitted call, user-supplied size, buffer end outside RMO... "); 267 + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), 268 + cpu_to_be32(rmo_end - rmo_start + 2), rets); 269 + printf("rc: %d\n", rc); 270 + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); 271 + 272 + // Test a permitted call, fixed size, buffer end outside RMO 273 + // Expected: call returns -EINVAL 274 + printf("Test a permitted call, fixed size, buffer end outside RMO... "); 275 + rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets); 276 + printf("rc: %d\n", rc); 277 + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); 278 + 279 + return 0; 280 + } 281 + 282 + int main(int argc, char *argv[]) 283 + { 284 + return test_harness(test, "rtas_filter"); 285 + }

+7 -4

tools/testing/selftests/powerpc/tm/tm-poison.c

··· 26 26 27 27 int tm_poison_test(void) 28 28 { 29 - int pid; 29 + int cpu, pid; 30 30 cpu_set_t cpuset; 31 31 uint64_t poison = 0xdeadbeefc0dec0fe; 32 32 uint64_t unknown = 0; ··· 35 35 36 36 SKIP_IF(!have_htm()); 37 37 38 - /* Attach both Child and Parent to CPU 0 */ 38 + cpu = pick_online_cpu(); 39 + FAIL_IF(cpu < 0); 40 + 41 + // Attach both Child and Parent to the same CPU 39 42 CPU_ZERO(&cpuset); 40 - CPU_SET(0, &cpuset); 41 - sched_setaffinity(0, sizeof(cpuset), &cpuset); 43 + CPU_SET(cpu, &cpuset); 44 + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0); 42 45 43 46 pid = fork(); 44 47 if (!pid) {

+2 -8

tools/testing/selftests/powerpc/tm/tm-tmspr.c

··· 33 33 #include "utils.h" 34 34 #include "tm.h" 35 35 36 - int num_loops = 10000; 36 + int num_loops = 1000000; 37 37 int passed = 1; 38 38 39 39 void tfiar_tfhar(void *in) 40 40 { 41 - int i, cpu; 42 41 unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; 43 - cpu_set_t cpuset; 44 - 45 - CPU_ZERO(&cpuset); 46 - cpu = (unsigned long)in >> 1; 47 - CPU_SET(cpu, &cpuset); 48 - sched_setaffinity(0, sizeof(cpuset), &cpuset); 42 + int i; 49 43 50 44 /* TFIAR: Last bit has to be high so userspace can read register */ 51 45 tfiar = ((unsigned long)in) + 1;

+6 -4

tools/testing/selftests/powerpc/tm/tm-trap.c

··· 247 247 int tm_trap_test(void) 248 248 { 249 249 uint16_t k = 1; 250 - 251 - int rc; 250 + int cpu, rc; 252 251 253 252 pthread_attr_t attr; 254 253 cpu_set_t cpuset; ··· 266 267 usr1_sa.sa_sigaction = usr1_signal_handler; 267 268 sigaction(SIGUSR1, &usr1_sa, NULL); 268 269 269 - /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ 270 + cpu = pick_online_cpu(); 271 + FAIL_IF(cpu < 0); 272 + 273 + // Set only one CPU in the mask. Both threads will be bound to that CPU. 270 274 CPU_ZERO(&cpuset); 271 - CPU_SET(0, &cpuset); 275 + CPU_SET(cpu, &cpuset); 272 276 273 277 /* Init pthread attribute */ 274 278 rc = pthread_attr_init(&attr);

+6 -3

tools/testing/selftests/powerpc/tm/tm-unavailable.c

··· 338 338 339 339 int tm_unavailable_test(void) 340 340 { 341 - int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ 341 + int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ 342 342 pthread_t t1; 343 343 pthread_attr_t attr; 344 344 cpu_set_t cpuset; 345 345 346 346 SKIP_IF(!have_htm()); 347 347 348 - /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ 348 + cpu = pick_online_cpu(); 349 + FAIL_IF(cpu < 0); 350 + 351 + // Set only one CPU in the mask. Both threads will be bound to that CPU. 349 352 CPU_ZERO(&cpuset); 350 - CPU_SET(0, &cpuset); 353 + CPU_SET(cpu, &cpuset); 351 354 352 355 /* Init pthread attribute. */ 353 356 rc = pthread_attr_init(&attr);

+1 -2

tools/testing/selftests/powerpc/tm/tm.h

··· 6 6 #ifndef _SELFTESTS_POWERPC_TM_TM_H 7 7 #define _SELFTESTS_POWERPC_TM_TM_H 8 8 9 - #include <asm/tm.h> 10 - #include <asm/cputable.h> 11 9 #include <stdbool.h> 10 + #include <asm/tm.h> 12 11 13 12 #include "utils.h" 14 13

+3 -36

tools/testing/selftests/powerpc/utils.c

··· 10 10 #include <fcntl.h> 11 11 #include <link.h> 12 12 #include <sched.h> 13 - #include <signal.h> 14 13 #include <stdio.h> 15 14 #include <stdlib.h> 16 15 #include <string.h> ··· 272 273 return 0; 273 274 } 274 275 275 - static void sigill_handler(int signr, siginfo_t *info, void *unused) 276 - { 277 - static int warned = 0; 278 - ucontext_t *ctx = (ucontext_t *)unused; 279 - unsigned long *pc = &UCONTEXT_NIA(ctx); 280 - 281 - /* mtspr 3,RS to check for move to DSCR below */ 282 - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { 283 - if (!warned++) 284 - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); 285 - *pc += 4; 286 - } else { 287 - printf("SIGILL at %p\n", pc); 288 - abort(); 289 - } 290 - } 291 - 292 - void set_dscr(unsigned long val) 293 - { 294 - static int init = 0; 295 - struct sigaction sa; 296 - 297 - if (!init) { 298 - memset(&sa, 0, sizeof(sa)); 299 - sa.sa_sigaction = sigill_handler; 300 - sa.sa_flags = SA_SIGINFO; 301 - if (sigaction(SIGILL, &sa, NULL)) 302 - perror("sigill_handler"); 303 - init = 1; 304 - } 305 - 306 - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); 307 - } 308 - 309 276 int using_hash_mmu(bool *using_hash) 310 277 { 311 278 char line[128]; ··· 283 318 284 319 rc = 0; 285 320 while (fgets(line, sizeof(line), f) != NULL) { 286 - if (strcmp(line, "MMU : Hash\n") == 0) { 321 + if (!strcmp(line, "MMU : Hash\n") || 322 + !strcmp(line, "platform : Cell\n") || 323 + !strcmp(line, "platform : PowerMac\n")) { 287 324 *using_hash = true; 288 325 goto out; 289 326 }

Configure Feed

Configure Feed