diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 12e2bf92783fb..40f7cd2405914 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -80,3 +80,163 @@ Contact: Linux on PowerPC Developer List Description: read only This sysfs file exposes the cpumask which is designated to make HCALLs to retrieve hv-gpci pmu event counter data. + +What: /sys/devices/hv_gpci/interface/processor_bus_topology +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + PROCESSOR_BUS_TOPOLOGY(0xD0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/processor_config +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + PROCESSOR_CONFIG(0x90). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_domain +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_partition +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 77942eedf4f69..183a07c4f1916 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -556,6 +556,7 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= "on" SMT is enabled "off" SMT is disabled + "" SMT is enabled with N threads per core. "forceoff" SMT is force disabled. Cannot be changed. "notsupported" SMT is not supported by the CPU "notimplemented" SMT runtime toggling is not diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 722b6eca2e938..d8fac551cda49 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3874,10 +3874,10 @@ nosmp [SMP] Tells an SMP kernel to act as a UP kernel, and disable the IO APIC. legacy for "maxcpus=0". - nosmt [KNL,MIPS,S390] Disable symmetric multithreading (SMT). + nosmt [KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,X86] Disable symmetric multithreading (SMT). + [KNL,X86,PPC] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst index 77725d69eb4a4..5629edf4d56ec 100644 --- a/Documentation/powerpc/ptrace.rst +++ b/Documentation/powerpc/ptrace.rst @@ -15,7 +15,7 @@ that's extendable and that covers both BookE and server processors, so that GDB doesn't need to special-case each of them. We added the following 3 new ptrace requests. -1. PTRACE_PPC_GETHWDEBUGINFO +1. PPC_PTRACE_GETHWDBGINFO ============================ Query for GDB to discover the hardware debug features. The main info to @@ -48,7 +48,7 @@ features will have bits indicating whether there is support for:: #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10 #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 -2. PTRACE_SETHWDEBUG +2. PPC_PTRACE_SETHWDEBUG Sets a hardware breakpoint or watchpoint, according to the provided structure:: @@ -88,7 +88,7 @@ that the BookE supports. COMEFROM breakpoints available in server processors are not contemplated, but that is out of the scope of this work. ptrace will return an integer (handle) uniquely identifying the breakpoint or -watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG +watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG request to ask for its removal. Return -ENOSPC if the requested breakpoint can't be allocated on the registers. @@ -150,7 +150,7 @@ Some examples of using the structure to: p.addr2 = (uint64_t) end_range; p.condition_value = 0; -3. PTRACE_DELHWDEBUG +3. PPC_PTRACE_DELHWDEBUG Takes an integer which identifies an existing breakpoint or watchpoint (i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the diff --git a/arch/Kconfig b/arch/Kconfig index aff2746c8af28..63c5d6a2022bc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +config SMT_NUM_THREADS_DYNAMIC + bool + # Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL config HOTPLUG_CORE_SYNC bool diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0b1172cbeccb3..b7a4cb5d9409a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -272,6 +272,8 @@ config PPC select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_GEN + select HOTPLUG_SMT if HOTPLUG_CPU + select SMT_NUM_THREADS_DYNAMIC select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE select IOMMU_HELPER if PPC64 select IRQ_DOMAIN @@ -1144,12 +1146,6 @@ config FSL_GTM help Freescale General-purpose Timers support -config PCI_8260 - bool - depends on PCI && 8260 - select PPC_INDIRECT_PCI - default y - config FSL_RIO bool "Freescale Embedded SRIO Controller support" depends on RAPIDIO = y && HAVE_RAPIDIO diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 05ed585764645..a205da9ee5f2f 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -176,8 +176,9 @@ CONFIG_MOUSE_APPLETOUCH=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_SERIAL_8250=m -CONFIG_SERIAL_PMACZILOG=m +CONFIG_SERIAL_PMACZILOG=y CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y CONFIG_NVRAM=y CONFIG_I2C_CHARDEV=m CONFIG_APM_POWER=y diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h index bdf0563ba4238..f9cac46a95cb3 100644 --- a/arch/powerpc/include/asm/8xx_immap.h +++ b/arch/powerpc/include/asm/8xx_immap.h @@ -560,5 +560,7 @@ typedef struct immap { cpm8xx_t im_cpm; /* Communication processor */ } immap_t; +extern immap_t __iomem *mpc8xx_immr; + #endif /* __IMMAP_8XX__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 419319c4963cc..61a8d5555cd7e 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_spu.h generic-y += agp.h -generic-y += export.h generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += qrwlock.h diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 9ee192a6c5d76..249d43cc64275 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -1080,6 +1080,9 @@ typedef struct im_idma { #define FCC2_MEM_OFFSET FCC_MEM_OFFSET(1) #define FCC3_MEM_OFFSET FCC_MEM_OFFSET(2) +/* Pipeline Maximum Depth */ +#define MPC82XX_BCR_PLDP 0x00800000 + /* Clocks and GRG's */ enum cpm_clk_dir { diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index 8def56ec05c60..d530f68b4eef6 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -14,28 +14,6 @@ #include #include -#ifdef CONFIG_CPM2 -#include - -#if defined(CONFIG_8260) -#include -#endif - -#define cpm2_map(member) (&cpm2_immr->member) -#define cpm2_map_size(member, size) (&cpm2_immr->member) -#define cpm2_unmap(addr) do {} while(0) -#endif - -#ifdef CONFIG_PPC_8xx -#include - -extern immap_t __iomem *mpc8xx_immr; - -#define immr_map(member) (&mpc8xx_immr->member) -#define immr_map_size(member, size) (&mpc8xx_immr->member) -#define immr_unmap(addr) do {} while (0) -#endif - static inline int uart_baudrate(void) { return get_baudrate(); diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 84d39fd42f711..66db0147d5b42 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ struct arch_hw_breakpoint { u16 len; /* length of the target data symbol */ u16 hw_len; /* length programmed in hw */ u8 flags; + bool perf_single_step; /* temporarily uninstalled for a perf single step */ }; /* Note: Don't change the first 6 bits below as they are in the same order diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index 6fd2b4d486c52..424ceef82ae61 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -23,7 +23,7 @@ static inline bool arch_kfence_init_pool(void) #ifdef CONFIG_PPC64 static inline bool kfence_protect_page(unsigned long addr, bool protect) { - struct page *page = virt_to_page(addr); + struct page *page = virt_to_page((void *)addr); __kernel_map_pages(page, 1, !protect); diff --git a/arch/powerpc/include/asm/mpc8260.h b/arch/powerpc/include/asm/mpc8260.h deleted file mode 100644 index 155114bbd1a26..0000000000000 --- a/arch/powerpc/include/asm/mpc8260.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Since there are many different boards and no standard configuration, - * we have a unique include file for each. Rather than change every - * file that has to include MPC8260 configuration, they all include - * this one and the configuration switching is done here. - */ -#ifdef __KERNEL__ -#ifndef __ASM_POWERPC_MPC8260_H__ -#define __ASM_POWERPC_MPC8260_H__ - -#define MPC82XX_BCR_PLDP 0x00800000 /* Pipeline Maximum Depth */ - -#ifdef CONFIG_8260 - -#ifdef CONFIG_PCI_8260 -#include -#endif - -#endif /* CONFIG_8260 */ -#endif /* !__ASM_POWERPC_MPC8260_H__ */ -#endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index fec56d965f00d..d6201b5096b83 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -355,7 +355,7 @@ static inline int pte_young(pte_t pte) #define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) #else #define pmd_page_vaddr(pmd) \ - ((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1))) + ((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1))) #define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT) #endif diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 287e25864ffae..81c801880933e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -127,7 +127,7 @@ static inline pte_t pmd_pte(pmd_t pmd) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) #define pmd_present(pmd) (!pmd_none(pmd)) -#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pmd_page_vaddr(pmd) ((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS)) extern struct page *pmd_page(pmd_t pmd); #define pmd_pfn(pmd) (page_to_pfn(pmd_page(pmd))) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f2b6bf5687d0e..9ee4b6d4a82a0 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -9,6 +9,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #else #include #endif @@ -119,16 +120,6 @@ extern long long virt_phys_offset; #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT)) #endif -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) - -#define virt_addr_valid(vaddr) ({ \ - unsigned long _addr = (unsigned long)vaddr; \ - _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ - pfn_valid(virt_to_pfn(_addr)); \ -}) - /* * On Book-E parts we need __va to parse the device tree and we can't * determine MEMORY_START until then. However we can determine PHYSICAL_START @@ -233,6 +224,25 @@ extern long long virt_phys_offset; #endif #endif +#ifndef __ASSEMBLY__ +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __pa(kaddr) >> PAGE_SHIFT; +} + +static inline const void *pfn_to_kaddr(unsigned long pfn) +{ + return (const void *)(((unsigned long)__va(pfn)) << PAGE_SHIFT); +} +#endif + +#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn((void *)_addr)); \ +}) + /* * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, * and needs to be executable. This means the whole heap ends diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 6a88bfdaa69b6..a9515d3d7831b 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -60,9 +60,9 @@ static inline pgprot_t pte_pgprot(pte_t pte) } #ifndef pmd_page_vaddr -static inline unsigned long pmd_page_vaddr(pmd_t pmd) +static inline const void *pmd_page_vaddr(pmd_t pmd) { - return ((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS)); + return (const void *)((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS)); } #define pmd_page_vaddr pmd_page_vaddr #endif diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ef6972aa33b92..005601243dda4 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -397,6 +397,7 @@ #define PPC_RAW_RFCI (0x4c000066) #define PPC_RAW_RFDI (0x4c00004e) #define PPC_RAW_RFMCI (0x4c00004c) +#define PPC_RAW_TLBILX_LPID (0x7c000024) #define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_RAW_WAIT_v203 (0x7c00007c) #define PPC_RAW_WAIT(w, p) (0x7c00003c | __PPC_WC(w) | __PPC_PL(p)) @@ -616,6 +617,7 @@ #define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b)) #define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) #define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) +#define PPC_TLBILX_LPID stringify_in_c(.long PPC_RAW_TLBILX_LPID) #define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) #define PPC_WAIT_v203 stringify_in_c(.long PPC_RAW_WAIT_v203) #define PPC_WAIT(w, p) stringify_in_c(.long PPC_RAW_WAIT(w, p)) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index a6c7069bec5d4..b2c51d337e60c 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -172,11 +172,6 @@ struct thread_struct { unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bps[HBP_NUM_MAX]; - /* - * Helps identify source of single-step exception and subsequent - * hw-breakpoint enablement - */ - struct perf_event *last_hit_ubp[HBP_NUM_MAX]; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */ unsigned long trap_nr; /* last trap # on this thread */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bb0121222ee3c..4ae4ab9090a2d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1414,11 +1414,9 @@ static inline void mtmsr_isync(unsigned long val) #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval;}) -#ifndef mtspr #define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ : "r" ((unsigned long)(v)) \ : "memory") -#endif #define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory") static inline void wrtee(unsigned long val) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 8a4d4f4d97495..f4e6f2dd04b73 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -143,5 +143,20 @@ static inline int cpu_to_coregroup_id(int cpu) #endif #endif +#ifdef CONFIG_HOTPLUG_SMT +#include +#include + +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return cpu == cpu_first_thread_sibling(cpu); +} + +static inline bool topology_smt_thread_allowed(unsigned int cpu) +{ + return cpu_thread_in_core(cpu) < cpu_smt_num_threads; +} +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TOPOLOGY_H */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index fe27d41f9a3de..9692acb0361fb 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 033116e465d08..1a9b5ae8ccb2f 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -3,6 +3,7 @@ * Copyright (C) 2012 Freescale Semiconductor, Inc. */ +#include #include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index f71f2bbd4de64..6a9acfb690c9f 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -9,6 +9,7 @@ * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). */ +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 3f68a16246462..b32e7b2ebdcfd 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -38,7 +38,6 @@ #include #include #include -#include #include "head_32.h" diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 63a85c16fef46..a3197c9f721cd 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -35,7 +35,6 @@ #include #include #include -#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 6440b1bb332ac..4690c219bfa4d 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,7 +40,6 @@ #include #include #include -#include #include #ifdef CONFIG_PPC_BOOK3S #include diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index fdbee1093e2ba..97e9ea0c72979 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -40,7 +40,6 @@ #include #include #include -#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a79751e05781d..647b0b445e89d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c51f28b5abc05..6764b98ca360f 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index e1b4e70c8fd0f..b8513dc3e53ac 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -43,16 +43,6 @@ int hw_breakpoint_slots(int type) return 0; /* no instruction breakpoints available */ } -static bool single_step_pending(void) -{ - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (current->thread.last_hit_ubp[i]) - return true; - } - return false; -} /* * Install a perf counter breakpoint. @@ -84,7 +74,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (!single_step_pending()) + if (!info->perf_single_step) __set_breakpoint(i, info); return 0; @@ -124,275 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp) return bp->overflow_handler == ptrace_triggered; } -struct breakpoint { - struct list_head list; - struct perf_event *bp; - bool ptrace_bp; -}; - -/* - * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot - * rely on it safely synchronizing internals here; however, we can rely on it - * not requesting more breakpoints than available. - */ -static DEFINE_SPINLOCK(cpu_bps_lock); -static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); -static DEFINE_SPINLOCK(task_bps_lock); -static LIST_HEAD(task_bps); - -static struct breakpoint *alloc_breakpoint(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return ERR_PTR(-ENOMEM); - tmp->bp = bp; - tmp->ptrace_bp = is_ptrace_bp(bp); - return tmp; -} - -static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) -{ - __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; - - bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); - bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); - - return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); -} - -static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) -{ - return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; -} - -static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) -{ - return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); -} - -static int task_bps_add(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - spin_lock(&task_bps_lock); - list_add(&tmp->list, &task_bps); - spin_unlock(&task_bps_lock); - return 0; -} - -static void task_bps_remove(struct perf_event *bp) -{ - struct list_head *pos, *q; - - spin_lock(&task_bps_lock); - list_for_each_safe(pos, q, &task_bps) { - struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); - - if (tmp->bp == bp) { - list_del(&tmp->list); - kfree(tmp); - break; - } - } - spin_unlock(&task_bps_lock); -} - -/* - * If any task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool all_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - bool ret = false; - - spin_lock(&task_bps_lock); - list_for_each_entry(tmp, &task_bps, list) { - if (!can_co_exist(tmp, bp)) { - ret = true; - break; - } - } - spin_unlock(&task_bps_lock); - return ret; -} - -/* - * If same task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool same_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - bool ret = false; - - spin_lock(&task_bps_lock); - list_for_each_entry(tmp, &task_bps, list) { - if (tmp->bp->hw.target == bp->hw.target && - !can_co_exist(tmp, bp)) { - ret = true; - break; - } - } - spin_unlock(&task_bps_lock); - return ret; -} - -static int cpu_bps_add(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - struct breakpoint *tmp; - int i = 0; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) { - cpu_bp[i] = tmp; - break; - } - } - spin_unlock(&cpu_bps_lock); - return 0; -} - -static void cpu_bps_remove(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - int i = 0; - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) - continue; - - if (cpu_bp[i]->bp == bp) { - kfree(cpu_bp[i]); - cpu_bp[i] = NULL; - break; - } - } - spin_unlock(&cpu_bps_lock); -} - -static bool cpu_bps_check(int cpu, struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - bool ret = false; - int i; - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) { - ret = true; - break; - } - } - spin_unlock(&cpu_bps_lock); - return ret; -} - -static bool all_cpu_bps_check(struct perf_event *bp) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpu_bps_check(cpu, bp)) - return true; - } - return false; -} - -int arch_reserve_bp_slot(struct perf_event *bp) -{ - int ret; - - /* ptrace breakpoint */ - if (is_ptrace_bp(bp)) { - if (all_cpu_bps_check(bp)) - return -ENOSPC; - - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } - - /* perf breakpoint */ - if (is_kernel_addr(bp->attr.bp_addr)) - return 0; - - if (bp->hw.target && bp->cpu == -1) { - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } else if (!bp->hw.target && bp->cpu != -1) { - if (all_task_bps_check(bp)) - return -ENOSPC; - - return cpu_bps_add(bp); - } - - if (same_task_bps_check(bp)) - return -ENOSPC; - - ret = cpu_bps_add(bp); - if (ret) - return ret; - ret = task_bps_add(bp); - if (ret) - cpu_bps_remove(bp); - - return ret; -} - -void arch_release_bp_slot(struct perf_event *bp) -{ - if (!is_kernel_addr(bp->attr.bp_addr)) { - if (bp->hw.target) - task_bps_remove(bp); - if (bp->cpu != -1) - cpu_bps_remove(bp); - } -} - -/* - * Perform cleanup of arch-specific counters during unregistration - * of the perf-event - */ -void arch_unregister_hw_breakpoint(struct perf_event *bp) -{ - /* - * If the breakpoint is unregistered between a hw_breakpoint_handler() - * and the single_step_dabr_instruction(), then cleanup the breakpoint - * restoration variables to prevent dangling pointers. - * FIXME, this should not be using bp->ctx at all! Sayeth peterz. - */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (bp->ctx->task->thread.last_hit_ubp[i] == bp) - bp->ctx->task->thread.last_hit_ubp[i] = NULL; - } - } -} - /* * Check for virtual address in kernel space. */ @@ -499,6 +220,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, * Restores the breakpoint on the debug registers. * Invoke this function if it is known that the execution context is * about to change to cause loss of MSR_SE settings. + * + * The perf watchpoint will simply re-trigger once the thread is started again, + * and the watchpoint handler will set up MSR_SE and perf_single_step as + * needed. */ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { @@ -506,7 +231,9 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) int i; for (i = 0; i < nr_wp_slots(); i++) { - if (unlikely(tsk->thread.last_hit_ubp[i])) + struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); + + if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } return; @@ -516,7 +243,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); - tsk->thread.last_hit_ubp[i] = NULL; + info->perf_single_step = false; } } @@ -534,23 +261,22 @@ static bool is_octword_vsx_instr(int type, int size) * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. */ -static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void handler_error(struct perf_event *bp) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } -static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void larx_stcx_err(struct perf_event *bp) { printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, - struct arch_hw_breakpoint **info, int *hit, - ppc_inst_t instr) + int *hit, ppc_inst_t instr) { int i; int stepped; @@ -560,8 +286,9 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - current->thread.last_hit_ubp[i] = bp[i]; - info[i] = NULL; + + counter_arch_bp(bp[i])->perf_single_step = true; + bp[i] = NULL; } regs_set_return_msr(regs, regs->msr | MSR_SE); return false; @@ -572,15 +299,15 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; } return false; } return true; } -static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, +static void handle_p10dd1_spurious_exception(struct perf_event **bp, int *hit, unsigned long ea) { int i; @@ -592,10 +319,14 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, * spurious exception. */ for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + struct arch_hw_breakpoint *info; + + if (!bp[i]) continue; - hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + info = counter_arch_bp(bp[i]); + + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); /* * Ending address of DAWR range is less than starting @@ -625,9 +356,9 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, return; for (i = 0; i < nr_wp_slots(); i++) { - if (info[i]) { + if (bp[i]) { hit[i] = 1; - info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; } } } @@ -638,7 +369,6 @@ int hw_breakpoint_handler(struct die_args *args) int rc = NOTIFY_STOP; struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; int i; int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; @@ -663,18 +393,20 @@ int hw_breakpoint_handler(struct die_args *args) wp_get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { + struct arch_hw_breakpoint *info; + bp[i] = __this_cpu_read(bp_per_reg[i]); if (!bp[i]) continue; - info[i] = counter_arch_bp(bp[i]); - info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + info = counter_arch_bp(bp[i]); + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (wp_check_constraints(regs, instr, ea, type, size, info[i])) { + if (wp_check_constraints(regs, instr, ea, type, size, info)) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; err = 1; continue; } @@ -693,7 +425,7 @@ int hw_breakpoint_handler(struct die_args *args) /* Workaround for Power10 DD1 */ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && is_octword_vsx_instr(type, size)) { - handle_p10dd1_spurious_exception(info, hit, ea); + handle_p10dd1_spurious_exception(bp, hit, ea); } else { rc = NOTIFY_DONE; goto out; @@ -708,10 +440,10 @@ int hw_breakpoint_handler(struct die_args *args) */ if (ptrace_bp) { for (i = 0; i < nr_wp_slots(); i++) { - if (!hit[i]) + if (!hit[i] || !is_ptrace_bp(bp[i])) continue; perf_bp_event(bp[i], regs); - info[i] = NULL; + bp[i] = NULL; } rc = NOTIFY_DONE; goto reset; @@ -722,13 +454,13 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - larx_stcx_err(bp[i], info[i]); - info[i] = NULL; + larx_stcx_err(bp[i]); + bp[i] = NULL; } goto reset; } - if (!stepping_handler(regs, bp, info, hit, instr)) + if (!stepping_handler(regs, bp, hit, instr)) goto reset; } @@ -739,15 +471,15 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp[i], regs); } reset: for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + if (!bp[i]) continue; - __set_breakpoint(i, info[i]); + __set_breakpoint(i, counter_arch_bp(bp[i])); } out: @@ -762,24 +494,28 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); static int single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; - struct perf_event *bp = NULL; - struct arch_hw_breakpoint *info; - int i; bool found = false; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - for (i = 0; i < nr_wp_slots(); i++) { - bp = current->thread.last_hit_ubp[i]; + for (int i = 0; i < nr_wp_slots(); i++) { + struct perf_event *bp; + struct arch_hw_breakpoint *info; + + bp = __this_cpu_read(bp_per_reg[i]); if (!bp) continue; - found = true; info = counter_arch_bp(bp); + if (!info->perf_single_step) + continue; + + found = true; + /* * We shall invoke the user-defined callback function in the * single stepping handler to confirm to 'trigger-after-execute' @@ -787,26 +523,16 @@ static int single_step_dabr_instruction(struct die_args *args) */ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - current->thread.last_hit_ubp[i] = NULL; - } - if (!found) - return NOTIFY_DONE; - - for (i = 0; i < nr_wp_slots(); i++) { - bp = __this_cpu_read(bp_per_reg[i]); - if (!bp) - continue; - - info = counter_arch_bp(bp); - __set_breakpoint(i, info); + info->perf_single_step = false; + __set_breakpoint(i, counter_arch_bp(bp)); } /* * If the process was being single-stepped by ptrace, let the * other single-step actions occur (e.g. generate SIGTRAP). */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (!found || test_thread_flag(TIF_SINGLESTEP)) return NOTIFY_DONE; return NOTIFY_STOP; diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index fb7de3543c037..29e1440d14cc1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -10,11 +10,11 @@ * * setjmp/longjmp code by Paul Mackerras. */ +#include #include #include #include #include -#include .text diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index daf8f87d23728..2eabb15687a64 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -8,6 +8,7 @@ * */ +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include .text diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 2c9ac70aaf0c9..1a8cdafd68e85 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include #include #include #include @@ -23,7 +24,6 @@ #include #include #include -#include #include .text diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 3910cd7bb2d9b..42abbed452cd9 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -716,73 +716,89 @@ int gpr32_get_common(struct task_struct *target, return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32)); } -int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) +static int gpr32_set_common_kernel(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, unsigned long *regs) { const compat_ulong_t *k = kbuf; + + pos /= sizeof(compat_ulong_t); + count /= sizeof(compat_ulong_t); + + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + + if (count > 0 && pos == PT_MSR) { + set_user_msr(target, *k++); + ++pos; + --count; + } + + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + + if (count > 0 && pos == PT_TRAP) { + set_user_trap(target, *k++); + ++pos; + --count; + } + + kbuf = k; + pos *= sizeof(compat_ulong_t); + count *= sizeof(compat_ulong_t); + user_regset_copyin_ignore(&pos, &count, &kbuf, NULL, + (PT_TRAP + 1) * sizeof(compat_ulong_t), -1); + return 0; +} + +static int gpr32_set_common_user(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void __user *ubuf, unsigned long *regs) +{ const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; - if (!kbuf && !user_read_access_begin(u, count)) + if (!user_read_access_begin(u, count)) return -EFAULT; pos /= sizeof(reg); count /= sizeof(reg); - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - + for (; count > 0 && pos < PT_MSR; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; + } if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; } - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - unsafe_get_user(reg, u++, Efault); + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + unsafe_get_user(reg, u++, Efault); if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } - if (!kbuf) - user_read_access_end(); + user_read_access_end(); - kbuf = k; ubuf = u; pos *= sizeof(reg); count *= sizeof(reg); - user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + user_regset_copyin_ignore(&pos, &count, NULL, &ubuf, (PT_TRAP + 1) * sizeof(reg), -1); return 0; @@ -791,6 +807,18 @@ int gpr32_set_common(struct task_struct *target, return -EFAULT; } +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + if (kbuf) + return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs); + else + return gpr32_set_common_user(target, regset, pos, count, ubuf, regs); +} + static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8cb6d1c902669..5826f5108a124 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1088,7 +1088,7 @@ static int __init init_big_cores(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned int cpu; + unsigned int cpu, num_threads; DBG("smp_prepare_cpus\n"); @@ -1155,6 +1155,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (smp_ops && smp_ops->probe) smp_ops->probe(); + + // Initalise the generic SMT topology support + num_threads = 1; + if (smt_enabled_at_boot) + num_threads = smt_enabled_at_boot; + cpu_smt_set_num_threads(num_threads, threads_per_core); } void smp_prepare_boot_cpu(void) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 9feab5e0485bf..a9cd6507163ab 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -6,13 +6,13 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include #include #include #include #include #include #include -#include #include #ifdef CONFIG_VSX diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S index 6708e24db0aba..cdbcb5a0783b3 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -8,7 +8,6 @@ #include #include #include -#include _GLOBAL_TOC(ftrace_caller) lbz r3, PACA_FTRACE_ENABLED(r13) diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S index 294d1e05958aa..5e271f87f7990 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -3,12 +3,12 @@ * Split from entry_64.S */ +#include #include #include #include #include #include -#include #ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 1f7d86de1538e..600406716d66e 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S index 07296bc391664..80a1f9a4300ae 100644 --- a/arch/powerpc/kernel/ucall.S +++ b/arch/powerpc/kernel/ucall.S @@ -5,8 +5,8 @@ * Copyright 2019, IBM Corporation. * */ +#include #include -#include _GLOBAL(ucall_norets) EXPORT_SYMBOL_GPL(ucall_norets) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index fcc0ad6d9c7b0..4094e4c4c77a7 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -8,7 +9,6 @@ #include #include #include -#include #include /* diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 252724ed666a3..ef5c2d25ec397 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -350,7 +350,7 @@ EXPORT_SYMBOL(crash_shutdown_unregister); void default_machine_crash_shutdown(struct pt_regs *regs) { - unsigned int i; + volatile unsigned int i; int (*old_handler)(struct pt_regs *regs); if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 6c2b1d17cb636..3b361af873135 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7f765d5ad4366..efd0ebf70a5e6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -182,7 +182,7 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info) vfree(info->rev); info->rev = NULL; if (info->cma) - kvm_free_hpt_cma(virt_to_page(info->virt), + kvm_free_hpt_cma(virt_to_page((void *)info->virt), 1 << (info->order - PAGE_SHIFT)); else if (info->virt) free_pages(info->virt, info->order - PAGE_SHIFT); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 870110e3d9b1e..ea7ad200b330f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -10,6 +10,7 @@ * Authors: Alexander Graf */ +#include #include #include #include @@ -24,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index d58df71ace584..e476e107a932b 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "booke.h" #include "e500.h" @@ -92,7 +93,11 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) local_irq_save(flags); mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); - asm volatile("tlbilxlpid"); + /* + * clang-17 and older could not assemble tlbilxlpid. + * https://github.com/ClangBuiltLinux/linux/issues/1891 + */ + asm volatile (PPC_TLBILX_LPID); mtspr(SPRN_MAS5, 0); local_irq_restore(flags); } diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index 2158f61e317fc..b506c4d9a8d90 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -6,10 +6,10 @@ * Copyright 2011 Paul Mackerras, IBM Corp. */ +#include #include #include #include -#include #include #include diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 4541e8e294679..cd00b9bdd7727 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -8,12 +8,12 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include #include #include #include #include #include -#include .text diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 98ff51bd2f7dc..d53d8f09a2c2b 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -8,11 +8,11 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include #include #include #include #include -#include /* * Computes the checksum of a memory block at buff, length len, diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 3e9c27c463310..933b685e7ab68 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -4,11 +4,11 @@ * * Copyright (C) 1996-2005 Paul Mackerras. */ +#include #include #include #include #include -#include #include #include diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S index 88d46c471493b..bf1014b28fe83 100644 --- a/arch/powerpc/lib/copy_mc_64.S +++ b/arch/powerpc/lib/copy_mc_64.S @@ -4,9 +4,9 @@ * Derived from copyuser_power7.s by Anton Blanchard * Author - Balbir Singh */ +#include #include #include -#include .macro err1 100: diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 5d09a029b5567..f33a2e6088e51 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -2,11 +2,11 @@ /* * Copyright (C) 2008 Mark Nelson, IBM Corp. */ +#include #include #include #include #include -#include #include _GLOBAL_TOC(copy_page) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index db8719a14846d..9af969d2cc0cb 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include #include #include -#include #include #include diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 09af29561314e..151875050da9c 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -5,9 +5,9 @@ * * Author: Anton Blanchard */ +#include #include #include -#include #include /* Note: This code relies on -mminimal-toc */ diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 9351ffab409cf..6fd06cd20faa0 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -4,10 +4,10 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include #include #include #include -#include #include #ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S index 5010e376f7b83..f6fca5664e91c 100644 --- a/arch/powerpc/lib/memcmp_32.S +++ b/arch/powerpc/lib/memcmp_32.S @@ -7,8 +7,8 @@ * */ +#include #include -#include .text diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index 0b9b1685a33dc..142c666d38978 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -3,8 +3,8 @@ * Author: Anton Blanchard * Copyright 2015 IBM Corporation. */ +#include #include -#include #include #define off8 r6 diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 016c91e958d8f..b5a67e20143f8 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include #include #include -#include #include #include #include diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 38158b77a8019..a4ab8625061a6 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -485,7 +485,7 @@ write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *r * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { int c; @@ -1043,7 +1043,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, } #endif /* CONFIG_VSX */ -static int __emulate_dcbz(unsigned long ea) +static __always_inline int __emulate_dcbz(unsigned long ea) { unsigned long i; unsigned long size = l1_dcache_bytes(); diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 2752b1cc1d45f..daa72061dc0c7 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -4,8 +4,8 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include #include -#include #include .text diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index 1ddb26394e8ac..3ee45619a3f82 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -7,8 +7,8 @@ * */ +#include #include -#include #include .text diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index df41ce06f86bd..a25eb8588434d 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -6,10 +6,10 @@ * Author: Anton Blanchard */ +#include #include #include #include -#include /** * __arch_clear_user: - Zero a block of memory in user space, with less checking. diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S index 0a8d3f64d4935..bbd24feb233f2 100644 --- a/arch/powerpc/lib/strlen_32.S +++ b/arch/powerpc/lib/strlen_32.S @@ -6,8 +6,8 @@ * * Inspired from glibc implementation */ +#include #include -#include #include .text diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index a5a21d444e729..8b804e1a9fa44 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -14,6 +14,7 @@ * hash table, so this file is not used on them.) */ +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index dd127087fee8a..6f48fffe8b9c9 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static __always_inline void __tlbie_pid_lpid(unsigned long pid, - unsigned long lpid, - unsigned long ric) -{ - unsigned long rb, rs, prs, r; - - rb = PPC_BIT(53); /* IS = 1 */ - rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); - prs = 1; /* process scoped */ - r = 1; /* radix format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, - unsigned long lpid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb, rs, prs, r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); - prs = 1; /* process scoped */ - r = 1; /* radix format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap, unsigned long ric) { @@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, } } -static inline void fixup_tlbie_va_range_lpid(unsigned long va, - unsigned long pid, - unsigned long lpid, - unsigned long ap) -{ - if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); - } - - if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); - } -} - static inline void fixup_tlbie_pid(unsigned long pid) { /* @@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid) } } -static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) -{ - /* - * We can use any address for the invalidation, pick one which is - * probably unused as an optimisation. - */ - unsigned long va = ((1UL << 52) - 1); - - if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); - } - - if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), - RIC_FLUSH_TLB); - } -} - static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap) { @@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, - unsigned long ric) -{ - asm volatile("ptesync" : : : "memory"); - - /* - * Workaround the fact that the "ric" argument to __tlbie_pid - * must be a compile-time contraint to match the "i" constraint - * in the asm statement. - */ - switch (ric) { - case RIC_FLUSH_TLB: - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); - fixup_tlbie_pid_lpid(pid, lpid); - break; - case RIC_FLUSH_PWC: - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); - break; - case RIC_FLUSH_ALL: - default: - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); - fixup_tlbie_pid_lpid(pid, lpid); - } - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); -} struct tlbiel_pid { unsigned long pid; unsigned long ric; @@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, fixup_tlbie_va_range(addr - page_size, pid, ap); } -static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, - unsigned long pid, unsigned long lpid, - unsigned long page_size, - unsigned long psize) -{ - unsigned long addr; - unsigned long ap = mmu_get_ap(psize); - - for (addr = start; addr < end; addr += page_size) - __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); - - fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); -} - static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, unsigned long psize, unsigned long ric) { @@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, - unsigned long pid, unsigned long lpid, - unsigned long page_size, - unsigned long psize, bool also_pwc) -{ - asm volatile("ptesync" : : : "memory"); - if (also_pwc) - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); - __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); -} - static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, @@ -1514,6 +1395,127 @@ void radix__flush_tlb_all(void) } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static __always_inline void __tlbie_pid_lpid(unsigned long pid, + unsigned long lpid, + unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = PPC_BIT(53); /* IS = 1 */ + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, + unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) +{ + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), + RIC_FLUSH_TLB); + } +} + +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, + unsigned long ric) +{ + asm volatile("ptesync" : : : "memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + fixup_tlbie_pid_lpid(pid, lpid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); + fixup_tlbie_pid_lpid(pid, lpid); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} + +static inline void fixup_tlbie_va_range_lpid(unsigned long va, + unsigned long pid, + unsigned long lpid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); + } +} + +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize) +{ + unsigned long addr; + unsigned long ap = mmu_get_ap(psize); + + for (addr = start; addr < end; addr += page_size) + __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); + + fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); +} + +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize, bool also_pwc) +{ + asm volatile("ptesync" : : : "memory"); + if (also_pwc) + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} + /* * Performs process-scoped invalidations for a given LPID * as part of H_RPT_INVALIDATE hcall. diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7ff8ff3509f5f..f0e63170323c4 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -102,6 +102,511 @@ static ssize_t cpumask_show(struct device *dev, return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); } +/* Interface attribute array index to store system information */ +#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 +#define INTERFACE_PROCESSOR_CONFIG_ATTR 7 +#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 +#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 +#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 +#define INTERFACE_NULL_ATTR 11 + +/* Counter request value to retrieve system information */ +enum { + PROCESSOR_BUS_TOPOLOGY, + PROCESSOR_CONFIG, + AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ + AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ + AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ +}; + +static int sysinfo_counter_request[] = { + [PROCESSOR_BUS_TOPOLOGY] = 0xD0, + [PROCESSOR_CONFIG] = 0x90, + [AFFINITY_DOMAIN_VIA_VP] = 0xA0, + [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, + [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, +}; + +static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); + +static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, + u16 secondary_index, char *buf, + size_t *n, struct hv_gpci_request_buffer *arg) +{ + unsigned long ret; + size_t i, j; + + arg->params.counter_request = cpu_to_be32(req); + arg->params.starting_index = cpu_to_be32(starting_index); + arg->params.secondary_index = cpu_to_be16(secondary_index); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', + * which means that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. + * + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + /* + * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE + * because of invalid buffer-length/address or due to some hardware + * error. + */ + if (ret && (ret != H_PARAMETER)) + return -EIO; + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' + * to show the total number of counter_value array elements + * returned via hcall. + * hcall also populates 'cv_element_size' corresponds to individual + * counter_value array element size. Below loop go through all + * counter_value array elements as per their size and add it to + * the output buffer. + */ + for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { + j = i * be16_to_cpu(arg->params.cv_element_size); + + for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]); + *n += sprintf(buf + *n, "\n"); + } + + if (*n >= PAGE_SIZE) { + pr_info("System information exceeds PAGE_SIZE\n"); + return -EFBIG; + } + + return ret; +} + +static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request value 0xD0 corresponds to request + * type 'Processor_bus_topology', to retrieve + * the system topology information. + * starting_index value implies the starting hardware + * chip id. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to make subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index value is part of counter_value + * buffer elements, use the starting index value in the last + * element and add 1 to make subsequent hcalls. + */ + u32 starting_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + + (arg->bytes[last_element + 1] << 16) + + (arg->bytes[last_element] << 24) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], + starting_index, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + +static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request value 0x90 corresponds to request + * type 'Processor_config', to retrieve + * the system processor information. + * starting_index value implies the starting hardware + * processor index. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to take subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index is part of counter_value + * buffer elements, use the starting index value in the last + * element and add 1 to subsequent hcalls. + */ + u32 starting_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + + (arg->bytes[last_element + 1] << 16) + + (arg->bytes[last_element] << 24) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], + starting_index, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + +static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request 0xA0 corresponds to request + * type 'Affinity_domain_information_by_virutal_processor', + * to retrieve the system affinity domain information. + * starting_index value refers to the starting hardware + * processor index. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to take subsequent requests + * with next secondary index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next secondary index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index and secondary index type is part of the + * counter_value buffer elements, use the starting index value in the + * last array element as subsequent starting index, and use secondary index + * value in the last array element plus 1 as subsequent secondary index. + * For counter request '0xA0', starting index points to partition id + * and secondary index points to corresponding virtual processor index. + */ + u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); + u16 secondary_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], + starting_index, secondary_index, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + +static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request 0xB0 corresponds to request + * type 'Affinity_domain_information_by_domain', + * to retrieve the system affinity domain information. + * starting_index value refers to the starting hardware + * processor index. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to take subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index value is part of counter_value + * buffer elements, use the starting index value in the last + * element and add 1 to make subsequent hcalls. + */ + u32 starting_index = arg->bytes[last_element + 1] + + (arg->bytes[last_element] << 8) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], + starting_index, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + +static void affinity_domain_via_partition_result_parse(int returned_values, + int element_size, char *buf, size_t *last_element, + size_t *n, struct hv_gpci_request_buffer *arg) +{ + size_t i = 0, j = 0; + size_t k, l, m; + uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' + * to show the total number of counter_value array elements + * returned via hcall. + * Unlike other request types, the data structure returned by this + * request is variable-size. For this counter request type, + * hcall populates 'cv_element_size' corresponds to minimum size of + * the structure returned i.e; the size of the structure with no domain + * information. Below loop go through all counter_value array + * to determine the number and size of each domain array element and + * add it to the output buffer. + */ + while (i < returned_values) { + k = j; + for (; k < j + element_size; k++) + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); + *n += sprintf(buf + *n, "\n"); + + total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; + size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; + + for (l = 0; l < total_affinity_domain_ele; l++) { + for (m = 0; m < size_of_each_affinity_domain_ele; m++) { + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); + k++; + } + *n += sprintf(buf + *n, "\n"); + } + + *n += sprintf(buf + *n, "\n"); + i++; + j = k; + } + + *last_element = k; +} + +static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + size_t last_element = 0; + u32 starting_index; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request value 0xB1 corresponds to counter request + * type 'Affinity_domain_information_by_partition', + * to retrieve the system affinity domain by partition information. + * starting_index value refers to the starting hardware + * processor index. + */ + arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); + arg->params.starting_index = cpu_to_be32(0); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + if (!ret) + goto parse_result; + + /* + * ret value as 'H_PARAMETER' implies that the current buffer size + * can't accommodate all the information, and a partial buffer + * returned. To handle that, we need to make subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + affinity_domain_via_partition_result_parse( + be16_to_cpu(arg->params.returned_values) - 1, + be16_to_cpu(arg->params.cv_element_size), buf, + &last_element, &n, arg); + + if (n >= PAGE_SIZE) { + put_cpu_var(hv_gpci_reqb); + pr_debug("System information exceeds PAGE_SIZE\n"); + return -EFBIG; + } + + /* + * Since the starting index value is part of counter_value + * buffer elements, use the starting_index value in the last + * element and add 1 to make subsequent hcalls. + */ + starting_index = (u8)arg->bytes[last_element] << 8 | + (u8)arg->bytes[last_element + 1]; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + arg->params.counter_request = cpu_to_be32( + sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); + arg->params.starting_index = cpu_to_be32(starting_index); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + if (ret && (ret != H_PARAMETER)) + goto out; + } + +parse_result: + affinity_domain_via_partition_result_parse( + be16_to_cpu(arg->params.returned_values), + be16_to_cpu(arg->params.cv_element_size), + buf, &last_element, &n, arg); + + put_cpu_var(hv_gpci_reqb); + return n; + +out: + put_cpu_var(hv_gpci_reqb); + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', + * which means that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. + * + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + /* + * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE + * because of invalid buffer-length/address or due to some hardware + * error. + */ + return -EIO; +} + static DEVICE_ATTR_RO(kernel_version); static DEVICE_ATTR_RO(cpumask); @@ -118,6 +623,31 @@ static struct attribute *interface_attrs[] = { &hv_caps_attr_expanded.attr, &hv_caps_attr_lab.attr, &hv_caps_attr_collect_privileged.attr, + /* + * This NULL is a placeholder for the processor_bus_topology + * attribute, set in init function if applicable. + */ + NULL, + /* + * This NULL is a placeholder for the processor_config + * attribute, set in init function if applicable. + */ + NULL, + /* + * This NULL is a placeholder for the affinity_domain_via_virtual_processor + * attribute, set in init function if applicable. + */ + NULL, + /* + * This NULL is a placeholder for the affinity_domain_via_domain + * attribute, set in init function if applicable. + */ + NULL, + /* + * This NULL is a placeholder for the affinity_domain_via_partition + * attribute, set in init function if applicable. + */ + NULL, NULL, }; @@ -143,8 +673,6 @@ static const struct attribute_group *attr_groups[] = { NULL, }; -static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); - static unsigned long single_gpci_request(u32 req, u32 starting_index, u16 secondary_index, u8 version_in, u32 offset, u8 length, u64 *value) @@ -325,6 +853,110 @@ static int hv_gpci_cpu_hotplug_init(void) ppc_hv_gpci_cpu_offline); } +static struct device_attribute *sysinfo_device_attr_create(int + sysinfo_interface_group_index, u32 req) +{ + struct device_attribute *attr = NULL; + unsigned long ret; + struct hv_gpci_request_buffer *arg; + + if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || + sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { + pr_info("Wrong interface group index for system information\n"); + return NULL; + } + + /* Check for given counter request value support */ + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + arg->params.counter_request = cpu_to_be32(req); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + put_cpu_var(hv_gpci_reqb); + + /* + * Add given counter request value attribute in the interface_attrs + * attribute array, only for valid return types. + */ + if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + + if (!attr) { + pr_info("Mem allocation failed, for sysinfo interface file"); + return NULL; + } + + sysfs_attr_init(&attr->attr); + attr->attr.mode = 0444; + + switch (sysinfo_interface_group_index) { + case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: + attr->attr.name = "processor_bus_topology"; + attr->show = processor_bus_topology_show; + break; + case INTERFACE_PROCESSOR_CONFIG_ATTR: + attr->attr.name = "processor_config"; + attr->show = processor_config_show; + break; + case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: + attr->attr.name = "affinity_domain_via_virtual_processor"; + attr->show = affinity_domain_via_virtual_processor_show; + break; + case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: + attr->attr.name = "affinity_domain_via_domain"; + attr->show = affinity_domain_via_domain_show; + break; + case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: + attr->attr.name = "affinity_domain_via_partition"; + attr->show = affinity_domain_via_partition_show; + break; + } + } else + pr_devel("hcall failed, with error: 0x%lx\n", ret); + + return attr; +} + +static void add_sysinfo_interface_files(void) +{ + int sysfs_count; + struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; + int i; + + sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; + + /* Get device attribute for a given counter request value */ + for (i = 0; i < sysfs_count; i++) { + attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, + sysinfo_counter_request[i]); + + if (!attr[i]) + goto out; + } + + /* Add sysinfo interface attributes in the interface_attrs attribute array */ + for (i = 0; i < sysfs_count; i++) + interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; + + return; + +out: + /* + * The sysinfo interface attributes will be added, only if hcall passed for + * all the counter request values. Free the device attribute array incase + * of any hcall failure. + */ + if (i > 0) { + while (i >= 0) { + kfree(attr[i]); + i--; + } + } +} + static int hv_gpci_init(void) { int r; @@ -388,6 +1020,10 @@ static int hv_gpci_init(void) if (r) return r; + /* sysinfo interface files are only available for power10 and above platforms */ + if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) + add_sysinfo_interface_files(); + return 0; } diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index 4eb372bdab70d..58e58b4f6a12e 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -32,6 +32,7 @@ config 8260 bool depends on PPC_BOOK3S_32 select CPM2 + select PPC_INDIRECT_PCI if PCI help The MPC8260 is a typical embedded CPU made by Freescale. Selecting this option means that you wish to build a kernel for a machine with diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 4bfa1a95e1555..3dc65ce1f175d 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 51c9bfd975925..c86da3f2b74bd 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/82xx/m82xx_pci.h b/arch/powerpc/platforms/82xx/m82xx_pci.h deleted file mode 100644 index d07c4d7606f61..0000000000000 --- a/arch/powerpc/platforms/82xx/m82xx_pci.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _PPC_KERNEL_M82XX_PCI_H -#define _PPC_KERNEL_M82XX_PCI_H - -/* - */ - -#define SIU_INT_IRQ1 ((uint)0x13 + CPM_IRQ_OFFSET) - -#ifndef _IO_BASE -#define _IO_BASE isa_io_base -#endif - -#endif /* _PPC_KERNEL_M8260_PCI_H */ diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 7e83eb6746f4a..ae72c574eb7e0 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include "mpc8xx.h" diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 34ab29966c8bf..ebb5f6a27dbf3 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -41,7 +41,7 @@ #include #include -#include +#include #ifdef CONFIG_8xx_GPIO #include @@ -54,8 +54,6 @@ immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE; void __init cpm_reset(void) { - sysconf8xx_t __iomem *siu_conf; - cpmp = &mpc8xx_immr->im_cpm; #ifndef CONFIG_PPC_EARLY_DEBUG_CPM @@ -77,12 +75,10 @@ void __init cpm_reset(void) * manual recommends it. * Bit 25, FAM can also be set to use FEC aggressive mode (860T). */ - siu_conf = immr_map(im_siu_conf); if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */ - out_be32(&siu_conf->sc_sdcr, 0x40); + out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40); else - out_be32(&siu_conf->sc_sdcr, 1); - immr_unmap(siu_conf); + out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1); } static DEFINE_SPINLOCK(cmd_lock); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 24f358f86d16c..3c5c4e08b6a99 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -22,7 +22,6 @@ #include #include -#include #include #include "pic.h" @@ -41,14 +40,11 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) void __init __attribute__ ((weak)) init_internal_rtc(void) { - sit8xx_t __iomem *sys_tmr = immr_map(im_sit); - /* Disable the RTC one second and alarm interrupts. */ - clrbits16(&sys_tmr->sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); + clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); /* Enable the RTC */ - setbits16(&sys_tmr->sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); - immr_unmap(sys_tmr); + setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); } static int __init get_freq(char *name, unsigned long *val) @@ -80,23 +76,14 @@ static int __init get_freq(char *name, unsigned long *val) void __init mpc8xx_calibrate_decr(void) { struct device_node *cpu; - cark8xx_t __iomem *clk_r1; - car8xx_t __iomem *clk_r2; - sitk8xx_t __iomem *sys_tmr1; - sit8xx_t __iomem *sys_tmr2; int irq, virq; - clk_r1 = immr_map(im_clkrstk); - /* Unlock the SCCR. */ - out_be32(&clk_r1->cark_sccrk, ~KAPWR_KEY); - out_be32(&clk_r1->cark_sccrk, KAPWR_KEY); - immr_unmap(clk_r1); + out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY); /* Force all 8xx processors to use divide by 16 processor clock. */ - clk_r2 = immr_map(im_clkrst); - setbits32(&clk_r2->car_sccr, 0x02000000); - immr_unmap(clk_r2); + setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000); /* Processor frequency is MHz. */ @@ -123,14 +110,12 @@ void __init mpc8xx_calibrate_decr(void) * we guarantee the registers are locked, then we unlock them * for our use. */ - sys_tmr1 = immr_map(im_sitk); - out_be32(&sys_tmr1->sitk_tbscrk, ~KAPWR_KEY); - out_be32(&sys_tmr1->sitk_rtcsck, ~KAPWR_KEY); - out_be32(&sys_tmr1->sitk_tbk, ~KAPWR_KEY); - out_be32(&sys_tmr1->sitk_tbscrk, KAPWR_KEY); - out_be32(&sys_tmr1->sitk_rtcsck, KAPWR_KEY); - out_be32(&sys_tmr1->sitk_tbk, KAPWR_KEY); - immr_unmap(sys_tmr1); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY); init_internal_rtc(); @@ -144,10 +129,8 @@ void __init mpc8xx_calibrate_decr(void) of_node_put(cpu); irq = virq_to_hw(virq); - sys_tmr2 = immr_map(im_sit); - out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) | - (TBSCR_TBF | TBSCR_TBE)); - immr_unmap(sys_tmr2); + out_be16(&mpc8xx_immr->im_sit.sit_tbscr, + ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE)); if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint", NULL)) @@ -161,47 +144,36 @@ void __init mpc8xx_calibrate_decr(void) int mpc8xx_set_rtc_time(struct rtc_time *tm) { - sitk8xx_t __iomem *sys_tmr1; - sit8xx_t __iomem *sys_tmr2; time64_t time; - sys_tmr1 = immr_map(im_sitk); - sys_tmr2 = immr_map(im_sit); time = rtc_tm_to_time64(tm); - out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY); - out_be32(&sys_tmr2->sit_rtc, (u32)time); - out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY); - immr_unmap(sys_tmr2); - immr_unmap(sys_tmr1); return 0; } void mpc8xx_get_rtc_time(struct rtc_time *tm) { unsigned long data; - sit8xx_t __iomem *sys_tmr = immr_map(im_sit); /* Get time from the RTC. */ - data = in_be32(&sys_tmr->sit_rtc); + data = in_be32(&mpc8xx_immr->im_sit.sit_rtc); rtc_time64_to_tm(data, tm); - immr_unmap(sys_tmr); return; } void __noreturn mpc8xx_restart(char *cmd) { - car8xx_t __iomem *clk_r = immr_map(im_clkrst); - - local_irq_disable(); - setbits32(&clk_r->car_plprcr, 0x00000080); + setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080); /* Clear the ME bit in MSR to cause checkstop on machine check */ mtmsr(mfmsr() & ~0x1000); - in_8(&clk_r->res[0]); + in_8(&mpc8xx_immr->im_clkrst.res[0]); panic("Restart failed\n"); } diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 11b3d1116db14..e4192c0a3c0c7 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "mpc86xads.h" diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 2fc7cacbcd968..eb4e54ba417f4 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "mpc885ads.h" diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 7d8eb50bb9cd3..c422262ba27b9 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include "mpc8xx.h" diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 629067781cec0..64a9c7125c294 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -449,7 +449,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, if (!data) return -ENOMEM; - bdfn = (dev->bus->number << 8) | dev->devfn; + bdfn = pci_dev_id(dev); rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), PE_mask); if (rc) { diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 1a3cb313976a4..e62835a12d73f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -398,6 +398,14 @@ static int dlpar_online_cpu(struct device_node *dn) for_each_present_cpu(cpu) { if (get_hard_smp_processor_id(cpu) != thread) continue; + + if (!topology_is_primary_thread(cpu)) { + if (cpu_smt_control != CPU_SMT_ENABLED) + break; + if (!topology_smt_thread_allowed(cpu)) + break; + } + cpu_maps_update_done(); find_and_update_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); @@ -845,15 +853,9 @@ static struct notifier_block pseries_smp_nb = { .notifier_call = pseries_smp_notifier, }; -static int __init pseries_cpu_hotplug_init(void) +void __init pseries_cpu_hotplug_init(void) { int qcss_tok; - unsigned int node; - -#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE - ppc_md.cpu_probe = dlpar_cpu_probe; - ppc_md.cpu_release = dlpar_cpu_release; -#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF); qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE); @@ -862,12 +864,22 @@ static int __init pseries_cpu_hotplug_init(void) qcss_tok == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "CPU Hotplug not supported by firmware " "- disabling.\n"); - return 0; + return; } smp_ops->cpu_offline_self = pseries_cpu_offline_self; smp_ops->cpu_disable = pseries_cpu_disable; smp_ops->cpu_die = pseries_cpu_die; +} + +static int __init pseries_dlpar_init(void) +{ + unsigned int node; + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + ppc_md.cpu_probe = dlpar_cpu_probe; + ppc_md.cpu_release = dlpar_cpu_release; +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ /* Processors can be added/removed only on LPAR */ if (firmware_has_feature(FW_FEATURE_LPAR)) { @@ -886,4 +898,4 @@ static int __init pseries_cpu_hotplug_init(void) return 0; } -machine_arch_initcall(pseries, pseries_cpu_hotplug_init); +machine_arch_initcall(pseries, pseries_dlpar_init); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index b0658ea3eccbf..2d40304eb6c16 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -194,7 +194,7 @@ static struct plpks_auth *construct_auth(u8 consumer) return auth; } -/** +/* * Label is combination of label attributes + name. * Label attributes are used internally by kernel and not exposed to the user. */ diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index f8bce40ebd0ce..f8893ba46e83a 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -75,11 +75,13 @@ static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) #ifdef CONFIG_HOTPLUG_CPU int dlpar_cpu(struct pseries_hp_errorlog *hp_elog); +void pseries_cpu_hotplug_init(void); #else static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { return -EOPNOTSUPP; } +static inline void pseries_cpu_hotplug_init(void) { } #endif /* PCI root bridge prepare function override for pseries */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e2a57cfa6c837..41451b76c6e51 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -816,6 +816,8 @@ static void __init pSeries_setup_arch(void) /* Discover PIC type and setup ppc_md accordingly */ smp_init_pseries(); + // Setup CPU hotplug callbacks + pseries_cpu_hotplug_init(); if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 915f4d3991c38..14cc5ea936c0a 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -37,11 +37,9 @@ #include #include -#include #include #include #include -#include #include @@ -119,9 +117,9 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) /* This is good enough to get SMCs running..... */ if (brg < 4) { - bp = cpm2_map_size(im_brgc1, 16); + bp = &cpm2_immr->im_brgc1; } else { - bp = cpm2_map_size(im_brgc5, 16); + bp = &cpm2_immr->im_brgc5; brg -= 4; } bp += brg; @@ -131,7 +129,6 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) val |= CPM_BRG_DIV16; out_be32(bp, val); - cpm2_unmap(bp); } EXPORT_SYMBOL(__cpm2_setbrg); @@ -140,7 +137,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) int ret = 0; int shift; int i, bits = 0; - cpmux_t __iomem *im_cpmux; u32 __iomem *reg; u32 mask = 7; @@ -203,35 +199,33 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) {CPM_CLK_SCC4, CPM_CLK8, 7}, }; - im_cpmux = cpm2_map(im_cpmux); - switch (target) { case CPM_CLK_SCC1: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 24; break; case CPM_CLK_SCC2: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 16; break; case CPM_CLK_SCC3: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 8; break; case CPM_CLK_SCC4: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 0; break; case CPM_CLK_FCC1: - reg = &im_cpmux->cmx_fcr; + reg = &cpm2_immr->im_cpmux.cmx_fcr; shift = 24; break; case CPM_CLK_FCC2: - reg = &im_cpmux->cmx_fcr; + reg = &cpm2_immr->im_cpmux.cmx_fcr; shift = 16; break; case CPM_CLK_FCC3: - reg = &im_cpmux->cmx_fcr; + reg = &cpm2_immr->im_cpmux.cmx_fcr; shift = 8; break; default: @@ -261,7 +255,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) out_be32(reg, (in_be32(reg) & ~mask) | bits); - cpm2_unmap(im_cpmux); return ret; } @@ -270,7 +263,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) int ret = 0; int shift; int i, bits = 0; - cpmux_t __iomem *im_cpmux; u8 __iomem *reg; u8 mask = 3; @@ -285,16 +277,14 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) {CPM_CLK_SMC2, CPM_CLK15, 3}, }; - im_cpmux = cpm2_map(im_cpmux); - switch (target) { case CPM_CLK_SMC1: - reg = &im_cpmux->cmx_smr; + reg = &cpm2_immr->im_cpmux.cmx_smr; mask = 3; shift = 4; break; case CPM_CLK_SMC2: - reg = &im_cpmux->cmx_smr; + reg = &cpm2_immr->im_cpmux.cmx_smr; mask = 3; shift = 0; break; @@ -317,7 +307,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) out_8(reg, (in_8(reg) & ~mask) | bits); - cpm2_unmap(im_cpmux); return ret; } diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index cb9ba4ef557a3..e14493685fe87 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -33,9 +33,7 @@ #include #include -#include #include -#include #include "cpm2_pic.h" @@ -231,7 +229,7 @@ void cpm2_pic_init(struct device_node *node) { int i; - cpm2_intctl = cpm2_map(im_intctl); + cpm2_intctl = &cpm2_immr->im_intctl; /* Clear the CPM IRQ controller, in case it has any bits set * from the bootloader diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index 329b9c4ae5429..e8401b205d380 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -5,10 +5,10 @@ * Copyright (c) 2004 Eugene Surovegin */ +#include #include #include #include -#include #define DCR_ACCESS_PROLOG(table) \ cmplwi cr0,r3,1024; \ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index fae747cc57d2d..78453b9b1ba0e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3303,7 +3303,7 @@ static void show_pte(unsigned long addr) { unsigned long tskv = 0; struct task_struct *volatile tsk = NULL; - struct mm_struct *mm; + struct mm_struct *volatile mm; pgd_t *pgdp; p4d_t *p4dp; pud_t *pudp; @@ -3828,9 +3828,9 @@ static void dump_tlb_44x(void) #ifdef CONFIG_PPC_BOOK3E_64 static void dump_tlb_book3e(void) { - u32 mmucfg, pidmask, lpidmask; + u32 mmucfg; u64 ramask; - int i, tlb, ntlbs, pidsz, lpidsz, rasz, lrat = 0; + int i, tlb, ntlbs, pidsz, lpidsz, rasz; int mmu_version; static const char *pgsz_names[] = { " 1K", @@ -3874,12 +3874,8 @@ static void dump_tlb_book3e(void) pidsz = ((mmucfg >> 6) & 0x1f) + 1; lpidsz = (mmucfg >> 24) & 0xf; rasz = (mmucfg >> 16) & 0x7f; - if ((mmu_version > 1) && (mmucfg & 0x10000)) - lrat = 1; printf("Book3E MMU MAV=%d.0,%d TLBs,%d-bit PID,%d-bit LPID,%d-bit RA\n", mmu_version, ntlbs, pidsz, lpidsz, rasz); - pidmask = (1ul << pidsz) - 1; - lpidmask = (1ul << lpidsz) - 1; ramask = (1ull << rasz) - 1; for (tlb = 0; tlb < ntlbs; tlb++) { diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index caf41c4869a0e..3235ba1e5b06d 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -136,10 +136,11 @@ static inline int topology_max_smt_threads(void) return __max_smt_threads; } +#include + int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -bool topology_smt_supported(void); extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -162,7 +163,6 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } -static inline bool topology_smt_supported(void) { return false; } #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e3a65e9fc750d..281fc3f6ea6bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2343,7 +2343,7 @@ void __init arch_cpu_finalize_init(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology(); + cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e1aa2cd7734ba..d4e897b820c41 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -326,14 +326,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/** - * topology_smt_supported - Check whether SMT is supported by the CPUs - */ -bool topology_smt_supported(void) -{ - return smp_num_siblings > 1; -} - /** * topology_phys_to_logical_pkg - Map a physical package id to a logical * @phys_pkg: The physical package id to map diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c index 877e8cb231283..c978b4272daa5 100644 --- a/drivers/macintosh/ams/ams-core.c +++ b/drivers/macintosh/ams/ams-core.c @@ -176,7 +176,7 @@ int ams_sensor_attach(void) return result; } -int __init ams_init(void) +static int __init ams_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 0ff944860dda9..4cf9e7c42a244 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -150,16 +150,7 @@ static inline resource_size_t p2_size(struct pci_dev *dev) static int find_cxl_vsec(struct pci_dev *dev) { - int vsec = 0; - u16 val; - - while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { - pci_read_config_word(dev, vsec + 0x4, &val); - if (val == CXL_PCI_VSEC_ID) - return vsec; - } - return 0; - + return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, CXL_PCI_VSEC_ID); } static void dump_cxl_config_space(struct pci_dev *dev) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index cb419aef8d1b8..aad96cb2ab4e9 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -10,7 +10,6 @@ #include #include -#include #ifdef CONFIG_CPM1 #include diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index b47490be872cf..c9491b6e8708c 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -38,7 +38,6 @@ #include #include -#include #include #include diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e006c719182ba..04fd7747f3d83 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include #include #include +#include struct device; struct device_node; @@ -208,30 +209,6 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -enum cpuhp_smt_control { - CPU_SMT_ENABLED, - CPU_SMT_DISABLED, - CPU_SMT_FORCE_DISABLED, - CPU_SMT_NOT_SUPPORTED, - CPU_SMT_NOT_IMPLEMENTED, -}; - -#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) -extern enum cpuhp_smt_control cpu_smt_control; -extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology(void); -extern bool cpu_smt_possible(void); -extern int cpuhp_smt_enable(void); -extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); -#else -# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) -static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology(void) { } -static inline bool cpu_smt_possible(void) { return false; } -static inline int cpuhp_smt_enable(void) { return 0; } -static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } -#endif - extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h new file mode 100644 index 0000000000000..0c1664294b579 --- /dev/null +++ b/include/linux/cpu_smt.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CPU_SMT_H_ +#define _LINUX_CPU_SMT_H_ + +enum cpuhp_smt_control { + CPU_SMT_ENABLED, + CPU_SMT_DISABLED, + CPU_SMT_FORCE_DISABLED, + CPU_SMT_NOT_SUPPORTED, + CPU_SMT_NOT_IMPLEMENTED, +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) +extern enum cpuhp_smt_control cpu_smt_control; +extern unsigned int cpu_smt_num_threads; +extern void cpu_smt_disable(bool force); +extern void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads); +extern bool cpu_smt_possible(void); +extern int cpuhp_smt_enable(void); +extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); +#else +# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) +# define cpu_smt_num_threads 1 +static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { } +static inline bool cpu_smt_possible(void) { return false; } +static inline int cpuhp_smt_enable(void) { return 0; } +static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } +#endif + +#endif /* _LINUX_CPU_SMT_H_ */ diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7fbb459112734..db199d653dd1a 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int arch_reserve_bp_slot(struct perf_event *bp); -void arch_release_bp_slot(struct perf_event *bp); -void arch_unregister_hw_breakpoint(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); diff --git a/kernel/cpu.c b/kernel/cpu.c index 88a7ede322bd5..7e8f1b044772e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -592,7 +592,10 @@ static void lockdep_release_cpus_lock(void) void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT + enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; +static unsigned int cpu_smt_max_threads __ro_after_init; +unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX; void __init cpu_smt_disable(bool force) { @@ -606,16 +609,33 @@ void __init cpu_smt_disable(bool force) pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } + cpu_smt_num_threads = 1; } /* * The decision whether SMT is supported can only be done after the full * CPU identification. Called from architecture code. */ -void __init cpu_smt_check_topology(void) +void __init cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { - if (!topology_smt_supported()) + WARN_ON(!num_threads || (num_threads > max_threads)); + + if (max_threads == 1) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; + + cpu_smt_max_threads = max_threads; + + /* + * If SMT has been disabled via the kernel command line or SMT is + * not supported, set cpu_smt_num_threads to 1 for consistency. + * If enabled, take the architecture requested number of threads + * to bring up into account. + */ + if (cpu_smt_control != CPU_SMT_ENABLED) + cpu_smt_num_threads = 1; + else if (num_threads < cpu_smt_num_threads) + cpu_smt_num_threads = num_threads; } static int __init smt_cmdline_disable(char *str) @@ -625,9 +645,23 @@ static int __init smt_cmdline_disable(char *str) } early_param("nosmt", smt_cmdline_disable); +/* + * For Archicture supporting partial SMT states check if the thread is allowed. + * Otherwise this has already been checked through cpu_smt_max_threads when + * setting the SMT level. + */ +static inline bool cpu_smt_thread_allowed(unsigned int cpu) +{ +#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC + return topology_smt_thread_allowed(cpu); +#else + return true; +#endif +} + static inline bool cpu_smt_allowed(unsigned int cpu) { - if (cpu_smt_control == CPU_SMT_ENABLED) + if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) return true; if (topology_is_primary_thread(cpu)) @@ -650,22 +684,8 @@ bool cpu_smt_possible(void) } EXPORT_SYMBOL_GPL(cpu_smt_possible); -static inline bool cpuhp_smt_aware(void) -{ - return topology_smt_supported(); -} - -static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) -{ - return cpu_primary_thread_mask; -} #else static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } -static inline bool cpuhp_smt_aware(void) { return false; } -static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) -{ - return cpu_present_mask; -} #endif static inline enum cpuhp_state @@ -1793,6 +1813,16 @@ static int __init parallel_bringup_parse_param(char *arg) } early_param("cpuhp.parallel", parallel_bringup_parse_param); +static inline bool cpuhp_smt_aware(void) +{ + return cpu_smt_max_threads > 1; +} + +static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) +{ + return cpu_primary_thread_mask; +} + /* * On architectures which have enabled parallel bringup this invokes all BP * prepare states for each of the to be onlined APs first. The last state @@ -2626,6 +2656,12 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) for_each_online_cpu(cpu) { if (topology_is_primary_thread(cpu)) continue; + /* + * Disable can be called with CPU_SMT_ENABLED when changing + * from a higher to lower number of SMT threads per core. + */ + if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) + continue; ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); if (ret) break; @@ -2660,6 +2696,8 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; + if (!cpu_smt_thread_allowed(cpu)) + continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) break; @@ -2838,20 +2876,19 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = { #ifdef CONFIG_HOTPLUG_SMT +static bool cpu_smt_num_threads_valid(unsigned int threads) +{ + if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC)) + return threads >= 1 && threads <= cpu_smt_max_threads; + return threads == 1 || threads == cpu_smt_max_threads; +} + static ssize_t __store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int ctrlval, ret; - - if (sysfs_streq(buf, "on")) - ctrlval = CPU_SMT_ENABLED; - else if (sysfs_streq(buf, "off")) - ctrlval = CPU_SMT_DISABLED; - else if (sysfs_streq(buf, "forceoff")) - ctrlval = CPU_SMT_FORCE_DISABLED; - else - return -EINVAL; + int ctrlval, ret, num_threads, orig_threads; + bool force_off; if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) return -EPERM; @@ -2859,21 +2896,39 @@ __store_smt_control(struct device *dev, struct device_attribute *attr, if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return -ENODEV; + if (sysfs_streq(buf, "on")) { + ctrlval = CPU_SMT_ENABLED; + num_threads = cpu_smt_max_threads; + } else if (sysfs_streq(buf, "off")) { + ctrlval = CPU_SMT_DISABLED; + num_threads = 1; + } else if (sysfs_streq(buf, "forceoff")) { + ctrlval = CPU_SMT_FORCE_DISABLED; + num_threads = 1; + } else if (kstrtoint(buf, 10, &num_threads) == 0) { + if (num_threads == 1) + ctrlval = CPU_SMT_DISABLED; + else if (cpu_smt_num_threads_valid(num_threads)) + ctrlval = CPU_SMT_ENABLED; + else + return -EINVAL; + } else { + return -EINVAL; + } + ret = lock_device_hotplug_sysfs(); if (ret) return ret; - if (ctrlval != cpu_smt_control) { - switch (ctrlval) { - case CPU_SMT_ENABLED: - ret = cpuhp_smt_enable(); - break; - case CPU_SMT_DISABLED: - case CPU_SMT_FORCE_DISABLED: - ret = cpuhp_smt_disable(ctrlval); - break; - } - } + orig_threads = cpu_smt_num_threads; + cpu_smt_num_threads = num_threads; + + force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED; + + if (num_threads > orig_threads) + ret = cpuhp_smt_enable(); + else if (num_threads < orig_threads || force_off) + ret = cpuhp_smt_disable(ctrlval); unlock_device_hotplug(); return ret ? ret : count; @@ -2901,6 +2956,17 @@ static ssize_t control_show(struct device *dev, { const char *state = smt_states[cpu_smt_control]; +#ifdef CONFIG_HOTPLUG_SMT + /* + * If SMT is enabled but not all threads are enabled then show the + * number of threads. If all threads are enabled show "on". Otherwise + * show the state name. + */ + if (cpu_smt_control == CPU_SMT_ENABLED && + cpu_smt_num_threads != cpu_smt_max_threads) + return sysfs_emit(buf, "%d\n", cpu_smt_num_threads); +#endif + return snprintf(buf, PAGE_SIZE - 2, "%s\n", state); } diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index c3797701339cb..6c2cb4e4f48da 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -523,26 +523,6 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int we return 0; } -__weak int arch_reserve_bp_slot(struct perf_event *bp) -{ - return 0; -} - -__weak void arch_release_bp_slot(struct perf_event *bp) -{ -} - -/* - * Function to perform processor-specific cleanup during unregistration - */ -__weak void arch_unregister_hw_breakpoint(struct perf_event *bp) -{ - /* - * A weak stub function here for those archs that don't define - * it inside arch/.../kernel/hw_breakpoint.c - */ -} - /* * Constraints to check before allowing this new breakpoint counter. * @@ -594,7 +574,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) enum bp_type_idx type; int max_pinned_slots; int weight; - int ret; /* We couldn't initialize breakpoint constraints on boot */ if (!constraints_initialized) @@ -613,10 +592,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) if (max_pinned_slots > hw_breakpoint_slots_cached(type)) return -ENOSPC; - ret = arch_reserve_bp_slot(bp); - if (ret) - return ret; - return toggle_bp_slot(bp, true, type, weight); } @@ -634,8 +609,6 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type) enum bp_type_idx type; int weight; - arch_release_bp_slot(bp); - type = find_slot_idx(bp_type); weight = hw_breakpoint_weight(bp); WARN_ON(toggle_bp_slot(bp, false, type, weight)); @@ -645,7 +618,6 @@ void release_bp_slot(struct perf_event *bp) { struct mutex *mtx = bp_constraints_lock(bp); - arch_unregister_hw_breakpoint(bp); __release_bp_slot(bp, bp->attr.bp_type); bp_constraints_unlock(mtx); } diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index cbeeaeae8837a..1b39b86849da5 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -36,6 +36,7 @@ $(TM_TESTS): CFLAGS += -I../tm -mhtm CFLAGS += $(KHDR_INCLUDES) -fno-pie $(OUTPUT)/ptrace-gpr: ptrace-gpr.S +$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread $(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S new file mode 100644 index 0000000000000..9aa2e58f3189b --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include + +.global same_watch_addr_load +.global same_watch_addr_trap + +FUNC_START(same_watch_addr_child) + nop +same_watch_addr_load: + ld 0,0(3) + nop +same_watch_addr_trap: + trap + blr +FUNC_END(same_watch_addr_child) + + +.global perf_then_ptrace_load1 +.global perf_then_ptrace_load2 +.global perf_then_ptrace_trap + +FUNC_START(perf_then_ptrace_child) + nop +perf_then_ptrace_load1: + ld 0,0(3) +perf_then_ptrace_load2: + ld 0,0(4) + nop +perf_then_ptrace_trap: + trap + blr +FUNC_END(perf_then_ptrace_child) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c index d8a9e95fc03de..a0a0b9bb58543 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -1,659 +1,445 @@ // SPDX-License-Identifier: GPL-2.0+ -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include +#include +#include +#include +#include #include -#include "ptrace.h" -char data[16]; +#include "utils.h" -/* Overlapping address range */ -volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; -volatile __u64 *perf_data1 = (__u64 *)&data[4]; +/* + * Child subroutine that performs a load on the address, then traps + */ +void same_watch_addr_child(unsigned long *addr); -/* Non-overlapping address range */ -volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; -volatile __u64 *perf_data2 = (__u64 *)&data[8]; +/* Address of the ld instruction in same_watch_addr_child() */ +extern char same_watch_addr_load[]; -static unsigned long pid_max_addr(void) -{ - FILE *fp; - char *line, *c; - char addr[100]; - size_t len = 0; - - fp = fopen("/proc/kallsyms", "r"); - if (!fp) { - printf("Failed to read /proc/kallsyms. Exiting..\n"); - exit(EXIT_FAILURE); - } +/* Address of the end trap instruction in same_watch_addr_child() */ +extern char same_watch_addr_trap[]; - while (getline(&line, &len, fp) != -1) { - if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || - strstr(line, "pid_max_min")) - continue; +/* + * Child subroutine that performs a load on the first address, then a load on + * the second address (with no instructions separating this from the first + * load), then traps. + */ +void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr); - strncpy(addr, line, len < 100 ? len : 100); - c = strchr(addr, ' '); - *c = '\0'; - return strtoul(addr, &c, 16); - } - fclose(fp); - printf("Could not find pid_max. Exiting..\n"); - exit(EXIT_FAILURE); - return -1; -} +/* Address of the first ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load1[]; -static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) -{ - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = addr; - attr->bp_len = len; - attr->exclude_kernel = 1; - attr->exclude_hv = 1; -} +/* Address of the second ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load2[]; -static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +/* Address of the end trap instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_trap[]; + +static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data) { - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = pid_max_addr(); - attr->bp_len = sizeof(unsigned long); - attr->exclude_user = 1; - attr->exclude_hv = 1; + return syscall(__NR_ptrace, request, pid, addr, data); } -static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +static long ptrace_traceme(void) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_TRACEME, 0, 0, 0); } -static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +static long ptrace_getregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +static long ptrace_setregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); + return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_kernel_event_open(pid_t child_pid) +static long ptrace_cont(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_CONT, pid, 0, signal); } -static int perf_cpu_kernel_event_open(int cpu) +static long ptrace_singlestep(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal); } -static int child(void) +static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo) { - int ret; - - ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); - if (ret) { - printf("Error: PTRACE_TRACEME failed\n"); - return 0; - } - kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ - - return 0; + return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo); } -static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, - __u64 addr, int len) +static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info) { - info->version = 1; - info->trigger_type = type; - info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - info->addr = addr; - info->addr2 = addr + len; - info->condition_value = 0; - if (!len) - info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; - else - info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info); } -static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id) { - struct ppc_hw_breakpoint info; - - ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); - return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id); } -static int test1(pid_t child_pid) +static long ptrace_getreg_pc(pid_t pid, void **pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range overlaps) - * fail; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; + err = ptrace_getregs(pid, ®s); + if (err) + return err; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; + *pc = (void *)regs.nip; - close(perf_fd); - return ret; + return 0; } -static int test2(pid_t child_pid) +static long ptrace_setreg_pc(pid_t pid, void *pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range does not overlaps) - * allow; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; + err = ptrace_getregs(pid, ®s); + if (err) + return err; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + regs.nip = (unsigned long)pc; -perf_close: - close(perf_fd); - return ret; -} + err = ptrace_setregs(pid, ®s); + if (err) + return err; -static int test3(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; - - close(perf_fd); - return ret; + return 0; } -static int test4(pid_t child_pid) +static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range does not overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - -perf_close: - close(perf_fd); - return ret; + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } -static int test5(pid_t child_pid) +static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the different thread) - * allow; - */ - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } + memset(attr, 0, sizeof(struct perf_event_attr)); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); -perf_close: - close(perf_fd); -kill_child: - kill(cpid, SIGINT); - return ret; + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = (u64)addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; } -static int test6(pid_t child_pid) +static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread kernel event by perf) - * if (existing thread event by ptrace on the same thread) - * allow; - * -- OR -- - * if (new per cpu kernel event by perf) - * if (existing thread event by ptrace) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_kernel_event_open(child_pid); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - - perf_fd = perf_cpu_kernel_event_open(0); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + struct perf_event_attr attr; -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + perf_user_event_attr_set(&attr, addr, len); + return perf_event_open(&attr, child_pid, -1, -1, 0); } -static int test7(pid_t child_pid) +static int perf_read_counter(int perf_fd, u64 *count) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; + /* + * A perf counter is retrieved by the read() syscall. It contains + * the current count as 8 bytes that are interpreted as a u64 */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + ssize_t len = read(perf_fd, count, sizeof(*count)); - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + if (len != sizeof(*count)) + return -1; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test8(pid_t child_pid) +static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info, + int type, void *addr, int len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlaps) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = (u64)addr; + info->addr2 = (u64)addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; } -static int test9(pid_t child_pid) +/* + * Checks if we can place at least 2 watchpoints on the child process + */ +static int check_watchpoints(pid_t pid) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } + struct ppc_debug_info dbginfo; - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); + FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed"); + SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test10(pid_t child_pid) +/* + * Wrapper around a plain fork() call that sets up the child for + * ptrace-ing. Both the parent and child return from this, though + * the child is stopped until ptrace_cont(pid) is run by the parent. + */ +static int ptrace_fork_child(pid_t *pid) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + int status; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; - - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + *pid = fork(); -static int test11(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + if (*pid < 0) + FAIL_IF_MSG(1, "Failed to fork child"); - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; + if (!*pid) { + FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed"); + FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP"); + } else { + /* Synchronise on child SIGSTOP */ + FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); } - close(perf_fd); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test12(pid_t child_pid) +/* + * Tests the interaction between ptrace and perf watching the same data. + * + * We expect ptrace to take 'priority', as it is has before-execute + * semantics. + * + * The perf counter should not be incremented yet because perf has after-execute + * semantics. E.g., if ptrace changes the child PC, we don't even execute the + * instruction at all. + * + * When the child is stopped for ptrace, we test both continue and single step. + * Both should increment the perf counter. We also test changing the PC somewhere + * different and stepping, which should not increment the perf counter. + */ +int same_watch_addr_test(void) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long value; /* Dummy value to be read/written to by child */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + if (!pid) { + same_watch_addr_child(&value); + exit(1); + } - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + err = check_watchpoints(pid); + if (err) + return err; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + /* Place a perf watchpoint counter on value */ + perf_fd = perf_watchpoint_open(pid, &value, sizeof(value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); -static int test13(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + /* Place a ptrace watchpoint on value */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); -static int test14(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; + /* + * We stopped before executing the load, so perf should not have + * recorded any events yet */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); - -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event"); + + /* Single stepping over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); + + /* + * Set up a ptrace watchpoint on the value again and trigger it. + * The perf counter should not have incremented because we do not + * execute the load yet. + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed"); + + /* Continuing over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 2, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, then continue, + * we should reach the end trap (because ptrace is one-shot) and have + * another perf event. + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, set a ptrace + * watchpoint on the load, then continue, we should immediately get + * the ptrace trap without incrementing the perf counter + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); + + /* + * If we change the PC while stopped on the load instruction, we should + * not increment the perf counter (because ptrace is before-execute, + * perf is after-execute). + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) -{ - int ret; + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); - ret = fun(arg); - if (ret) - printf("%s: Error\n", msg); - else - printf("%s: Ok\n", msg); - return ret; -} + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); -char *desc[14] = { - "perf cpu event -> ptrace thread event (Overlapping)", - "perf cpu event -> ptrace thread event (Non-overlapping)", - "perf thread event -> ptrace same thread event (Overlapping)", - "perf thread event -> ptrace same thread event (Non-overlapping)", - "perf thread event -> ptrace other thread event", - "ptrace thread event -> perf kernel event", - "ptrace thread event -> perf same thread event (Overlapping)", - "ptrace thread event -> perf same thread event (Non-overlapping)", - "ptrace thread event -> perf other thread event", - "ptrace thread event -> perf cpu event (Overlapping)", - "ptrace thread event -> perf cpu event (Non-overlapping)", - "ptrace thread event -> perf same thread & cpu event (Overlapping)", - "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", - "ptrace thread event -> perf other thread & cpu event", -}; - -static int test(pid_t child_pid) -{ - int ret = TEST_PASS; - - ret |= do_test(desc[0], test1, child_pid); - ret |= do_test(desc[1], test2, child_pid); - ret |= do_test(desc[2], test3, child_pid); - ret |= do_test(desc[3], test4, child_pid); - ret |= do_test(desc[4], test5, child_pid); - ret |= do_test(desc[5], test6, child_pid); - ret |= do_test(desc[6], test7, child_pid); - ret |= do_test(desc[7], test8, child_pid); - ret |= do_test(desc[8], test9, child_pid); - ret |= do_test(desc[9], test10, child_pid); - ret |= do_test(desc[10], test11, child_pid); - ret |= do_test(desc[11], test12, child_pid); - ret |= do_test(desc[12], test13, child_pid); - ret |= do_test(desc[13], test14, child_pid); - - return ret; + return 0; } -static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +/* + * Tests the interaction between ptrace and perf when: + * 1. perf watches a value + * 2. ptrace watches a different value + * 3. The perf value is read, then the ptrace value is read immediately after + * + * A breakpoint implementation may accidentally misattribute/skip one of + * the ptrace or perf handlers, as interrupt based work is done after perf + * and before ptrace. + * + * We expect the perf counter to increment before the ptrace watchpoint + * triggers. + */ +int perf_then_ptrace_test(void) { - if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { - perror("Can't get breakpoint info"); - exit(-1); + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long perf_value; /* Dummy value to be watched by perf */ + unsigned long ptrace_value; /* Dummy value to be watched by ptrace */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + /* + * If we are the child, run a subroutine that reads the perf value, + * then reads the ptrace value with consecutive load instructions + */ + if (!pid) { + perf_then_ptrace_child(&perf_value, &ptrace_value); + exit(0); } -} -static int ptrace_perf_hwbreak(void) -{ - int ret; - pid_t child_pid; - struct ppc_debug_info dbginfo; + err = check_watchpoints(pid); + if (err) + return err; - child_pid = fork(); - if (!child_pid) - return child(); + /* Place a perf watchpoint counter */ + perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); - /* parent */ - wait(NULL); /* <-- child (SIGUSR1) */ + /* Place a ptrace watchpoint */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, + &ptrace_value, sizeof(ptrace_value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - get_dbginfo(child_pid, &dbginfo); - SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); - ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - SKIP_IF_MSG(ret < 0, "perf_event_open syscall failed"); - close(ret); + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load"); - ret = test(child_pid); + /* perf should have recorded the first load */ + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - return ret; + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); + + return 0; } int main(int argc, char *argv[]) { - return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); + int err = 0; + + err |= test_harness(same_watch_addr_test, "same_watch_addr"); + err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace"); + + return err; }