From 5ca096161cdccfa328acf6704a4615528471d309 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (IBM)" Date: Wed, 3 Apr 2024 14:06:09 +0530 Subject: [PATCH 01/21] powerpc/mm: Align memory_limit value specified using mem= kernel parameter The value specified for the memory limit is used to set a restriction on memory usage. It is important to ensure that this restriction is within the linear map kernel address space range. The hash page table translation uses a 16MB page size to map the kernel linear map address space. htab_bolt_mapping() function aligns down the size of the range while mapping kernel linear address space. Since the memblock limit is enforced very early during boot, before we can detect the type of memory translation (radix vs hash), we align the memory limit value specified as a kernel parameter to 16MB. This alignment value will work for both hash and radix translations. Signed-off-by: Aneesh Kumar K.V (IBM) Acked-by: Joel Savitz Signed-off-by: Michael Ellerman Link: https://msgid.link/20240403083611.172833-1-aneesh.kumar@kernel.org --- arch/powerpc/kernel/prom.c | 7 +++++-- arch/powerpc/kernel/prom_init.c | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index cd8d8883de904..7451bedad1f43 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -846,8 +846,11 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* Ensure that total memory size is page-aligned. */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); memblock_enforce_memory_limit(limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0ef3582853374..fbb68fc28ed3a 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -817,8 +817,8 @@ static void __init early_cmdline_parse(void) opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 - /* Align to 16 MB == size of ppc64 large page */ - prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); + /* Align down to 16 MB which is large page size with hash page translation */ + prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M); #endif } From f94f5ac07983cb53de0c964f5428366c19e81993 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (IBM)" Date: Wed, 3 Apr 2024 14:06:10 +0530 Subject: [PATCH 02/21] powerpc/fadump: Don't update the user-specified memory limit If the user specifies the memory limit, the kernel should honor it such that all allocation and reservations are made within the memory limit specified. fadump was breaking that rule. Remove the code which updates the memory limit such that fadump reservations are done within the limit specified. Signed-off-by: Aneesh Kumar K.V (IBM) Signed-off-by: Michael Ellerman Link: https://msgid.link/20240403083611.172833-2-aneesh.kumar@kernel.org --- arch/powerpc/kernel/fadump.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d14eda1e85896..4e768d93c6d42 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -573,22 +573,6 @@ int __init fadump_reserve_mem(void) } } - /* - * Calculate the memory boundary. - * If memory_limit is less than actual memory boundary then reserve - * the memory for fadump beyond the memory_limit and adjust the - * memory_limit accordingly, so that the running kernel can run with - * specified memory_limit. - */ - if (memory_limit && memory_limit < memblock_end_of_DRAM()) { - size = get_fadump_area_size(); - if ((memory_limit + size) < memblock_end_of_DRAM()) - memory_limit += size; - else - memory_limit = memblock_end_of_DRAM(); - printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", memory_limit); - } if (memory_limit) mem_boundary = memory_limit; else From 5a799af9522641517f6d871d9f56e2658ee7db58 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (IBM)" Date: Wed, 3 Apr 2024 14:06:11 +0530 Subject: [PATCH 03/21] powerpc/mm: Update the memory limit based on direct mapping restrictions memory limit value specified by the user are further updated such that the value is 16MB aligned. This is because hash translation mode use 16MB as direct mapping page size. Make sure we update the global variable 'memory_limit' with the 16MB aligned value such that all kernel components will see the new aligned value of the memory limit. Signed-off-by: Aneesh Kumar K.V (IBM) Signed-off-by: Michael Ellerman Link: https://msgid.link/20240403083611.172833-3-aneesh.kumar@kernel.org --- arch/powerpc/kernel/prom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7451bedad1f43..b8f764453eaa0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -779,7 +779,6 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -850,8 +849,8 @@ void __init early_init_devtree(void *params) memory_limit = 0; /* Align down to 16 MB which is large page size with hash page translation */ - limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); - memblock_enforce_memory_limit(limit); + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) if (!early_radix_enabled()) From 9c74ecfd0fc46e2eaf92c1b6169cc0c8a87f1dc2 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Fri, 12 Apr 2024 14:50:46 +0530 Subject: [PATCH 04/21] powerpc/pseries: Add pool idle time at LPAR boot When there are no options specified for lparstat, it is expected to give reports since LPAR(Logical Partition) boot. APP(Available Processor Pool) is an indicator of how many cores in the shared pool are free to use in Shared Processor LPAR(SPLPAR). APP is derived using pool_idle_time which is obtained using H_PIC call. The interval based reports show correct APP value while since boot report shows very high APP values. This happens because in that case APP is obtained by dividing pool idle time by LPAR uptime. Since pool idle time is reported by the PowerVM hypervisor since its boot, it need not align with LPAR boot. To fix that export boot pool idle time in lparcfg and powerpc-utils will use this info to derive APP as below for since boot reports. APP = (pool idle time - boot pool idle time) / (uptime * timebase) Results:: Observe APP values. ====================== Shared LPAR ================================ lparstat System Configuration type=Shared mode=Uncapped smt=8 lcpu=12 mem=15573440 kB cpus=37 ent=12.00 reboot stress-ng --cpu=$(nproc) -t 600 sleep 600 So in this case app is expected to close to 37-6=31. ====== 6.9-rc1 and lparstat 1.3.10 ============= %user %sys %wait %idle physc %entc lbusy app vcsw phint ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 47.48 0.01 0.00 52.51 0.00 0.00 47.49 69099.72 541547 21 === With this patch and powerpc-utils patch to do the above equation === %user %sys %wait %idle physc %entc lbusy app vcsw phint ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 47.48 0.01 0.00 52.51 5.73 47.75 47.49 31.21 541753 21 ===================================================================== Note: physc, purr/idle purr being inaccurate is being handled in a separate patch in powerpc-utils tree. Signed-off-by: Shrikanth Hegde Signed-off-by: Michael Ellerman Link: https://msgid.link/20240412092047.455483-2-sshegde@linux.ibm.com --- arch/powerpc/platforms/pseries/lparcfg.c | 39 ++++++++++++++++++------ 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index f73c4d1c26af9..679efcf216284 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -170,20 +170,24 @@ static void show_gpci_data(struct seq_file *m) kfree(buf); } -static unsigned h_pic(unsigned long *pool_idle_time, - unsigned long *num_procs) +static long h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) { - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = {0}; rc = plpar_hcall(H_PIC, retbuf); - *pool_idle_time = retbuf[0]; - *num_procs = retbuf[1]; + if (pool_idle_time) + *pool_idle_time = retbuf[0]; + if (num_procs) + *num_procs = retbuf[1]; return rc; } +unsigned long boot_pool_idle_time; + /* * parse_ppp_data * Parse out the data returned from h_get_ppp and h_pic @@ -215,9 +219,15 @@ static void parse_ppp_data(struct seq_file *m) seq_printf(m, "pool_capacity=%d\n", ppp_data.active_procs_in_pool * 100); - h_pic(&pool_idle_time, &pool_procs); - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + /* In case h_pic call is not successful, this would result in + * APP values being wrong in tools like lparstat. + */ + + if (h_pic(&pool_idle_time, &pool_procs) == H_SUCCESS) { + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + seq_printf(m, "boot_pool_idle_time=%ld\n", boot_pool_idle_time); + } } seq_printf(m, "unallocated_capacity_weight=%d\n", @@ -792,6 +802,7 @@ static const struct proc_ops lparcfg_proc_ops = { static int __init lparcfg_init(void) { umode_t mode = 0444; + long retval; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) @@ -801,6 +812,16 @@ static int __init lparcfg_init(void) printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } + + /* If this call fails, it would result in APP values + * being wrong for since boot reports of lparstat + */ + retval = h_pic(&boot_pool_idle_time, NULL); + + if (retval != H_SUCCESS) + pr_debug("H_PIC failed during lparcfg init retval: %ld\n", + retval); + return 0; } machine_device_initcall(pseries, lparcfg_init); From 6d4341638516bf97b9a34947e0bd95035a8230a5 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Fri, 12 Apr 2024 14:50:47 +0530 Subject: [PATCH 05/21] powerpc/pseries: Add failure related checks for h_get_mpp and h_get_ppp Couple of Minor fixes: - hcall return values are long. Fix that for h_get_mpp, h_get_ppp and parse_ppp_data - If hcall fails, values set should be at-least zero. It shouldn't be uninitialized values. Fix that for h_get_mpp and h_get_ppp Signed-off-by: Shrikanth Hegde Signed-off-by: Michael Ellerman Link: https://msgid.link/20240412092047.455483-3-sshegde@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 2 +- arch/powerpc/platforms/pseries/lpar.c | 6 +++--- arch/powerpc/platforms/pseries/lparcfg.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a41e542ba94dd..51172625fa3a5 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -570,7 +570,7 @@ struct hvcall_mpp_data { unsigned long backing_mem; }; -int h_get_mpp(struct hvcall_mpp_data *); +long h_get_mpp(struct hvcall_mpp_data *mpp_data); struct hvcall_mpp_x_data { unsigned long coalesced_bytes; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4e9916bb03d71..c1d8bee8f7018 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1886,10 +1886,10 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) * h_get_mpp * H_GET_MPP hcall returns info in 7 parms */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) +long h_get_mpp(struct hvcall_mpp_data *mpp_data) { - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; rc = plpar_hcall9(H_GET_MPP, retbuf); diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 679efcf216284..6e7029640c0ca 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -113,8 +113,8 @@ struct hvcall_ppp_data { */ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) { - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; rc = plpar_hcall9(H_GET_PPP, retbuf); @@ -197,7 +197,7 @@ static void parse_ppp_data(struct seq_file *m) struct hvcall_ppp_data ppp_data; struct device_node *root; const __be32 *perf_level; - int rc; + long rc; rc = h_get_ppp(&ppp_data); if (rc) From c6c5b14dac0d1bd0da8b4d1d3b77f18eb9085fcb Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 23 Apr 2024 01:29:30 +0530 Subject: [PATCH 06/21] powerpc: make fadump resilient with memory add/remove events Due to changes in memory resources caused by either memory hotplug or online/offline events, the elfcorehdr, which describes the CPUs and memory of the crashed kernel to the kernel that collects the dump (known as second/fadump kernel), becomes outdated. Consequently, attempting dump collection with an outdated elfcorehdr can lead to failed or inaccurate dump collection. Memory hotplug or online/offline events is referred as memory add/remove events in reset of the commit message. The current solution to address the aforementioned issue is as follows: Monitor memory add/remove events in userspace using udev rules, and re-register fadump whenever there are changes in memory resources. This leads to the creation of a new elfcorehdr with updated system memory information. There are several notable issues associated with re-registering fadump for every memory add/remove events. 1. Bulk memory add/remove events with udev-based fadump re-registration can lead to race conditions and, more importantly, it creates a wide window during which fadump is inactive until all memory add/remove events are settled. 2. Re-registering fadump for every memory add/remove event is inefficient. 3. The memory for elfcorehdr is allocated based on the memblock regions available during early boot and remains fixed thereafter. However, if elfcorehdr is later recreated with additional memblock regions, its size will increase, potentially leading to memory corruption. Address the aforementioned challenges by shifting the creation of elfcorehdr from the first kernel (also referred as the crashed kernel), where it was created and frequently recreated for every memory add/remove event, to the fadump kernel. As a result, the elfcorehdr only needs to be created once, thus eliminating the necessity to re-register fadump during memory add/remove events. At present, the first kernel prepares fadump header and stores it in the fadump reserved area. The fadump header includes the start address of the elfcorehdr, crashing CPU details, and other relevant information. In the event of a crash in the first kernel, the second/fadump boots and accesses the fadump header prepared by the first kernel. It then performs the following steps in a platform-specific function [rtas|opal]_fadump_process: 1. Sanity check for fadump header 2. Update CPU notes in elfcorehdr Along with the above, update the setup_fadump()/fadump.c to create elfcorehdr and set its address to the global variable elfcorehdr_addr for the vmcore module to process it in the second/fadump kernel. Section below outlines the information required to create the elfcorehdr and the changes made to make it available to the fadump kernel if it's not already. To create elfcorehdr, the following crashed kernel information is required: CPU notes, vmcoreinfo, and memory ranges. At present, the CPU notes are already prepared in the fadump kernel, so no changes are needed in that regard. The fadump kernel has access to all crashed kernel memory regions, including boot memory regions that are relocated by firmware to fadump reserved areas, so no changes for that either. However, it is necessary to add new members to the fadump header, i.e., the 'fadump_crash_info_header' structure, in order to pass the crashed kernel's vmcoreinfo address and its size to fadump kernel. In addition to the vmcoreinfo address and size, there are a few other attributes also added to the fadump_crash_info_header structure. 1. version: It stores the fadump header version, which is currently set to 1. This provides flexibility to update the fadump crash info header in the future without changing the magic number. For each change in the fadump header, the version will be increased. This will help the updated kernel determine how to handle kernel dumps from older kernels. The magic number remains relevant for checking fadump header corruption. 2. pt_regs_sz/cpu_mask_sz: Store size of pt_regs and cpu_mask structure of first kernel. These attributes are used to prevent dump processing if the sizes of pt_regs or cpu_mask structure differ between the first and fadump kernels. Note: if either first/crashed kernel or second/fadump kernel do not have the changes introduced here then kernel fail to collect the dump and prints relevant error message on the console. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422195932.1583833-2-sourabhjain@linux.ibm.com --- arch/powerpc/include/asm/fadump-internal.h | 31 +- arch/powerpc/kernel/fadump.c | 361 +++++++++++-------- arch/powerpc/platforms/powernv/opal-fadump.c | 22 +- arch/powerpc/platforms/pseries/rtas-fadump.c | 34 +- 4 files changed, 242 insertions(+), 206 deletions(-) diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index 27f9e11eda28c..5d706a7acc8a4 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -42,13 +42,38 @@ static inline u64 fadump_str_to_u64(const char *str) #define FADUMP_CPU_UNKNOWN (~((u32)0)) -#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF") +/* + * The introduction of new fields in the fadump crash info header has + * led to a change in the magic key from `FADMPINF` to `FADMPSIG` for + * identifying a kernel crash from an old kernel. + * + * To prevent the need for further changes to the magic number in the + * event of future modifications to the fadump crash info header, a + * version field has been introduced to track the fadump crash info + * header version. + * + * Consider a few points before adding new members to the fadump crash info + * header structure: + * + * - Append new members; avoid adding them in between. + * - Non-primitive members should have a size member as well. + * - For every change in the fadump header, increment the + * fadump header version. This helps the updated kernel decide how to + * handle kernel dumps from older kernels. + */ +#define FADUMP_CRASH_INFO_MAGIC_OLD fadump_str_to_u64("FADMPINF") +#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPSIG") +#define FADUMP_HEADER_VERSION 1 /* fadump crash info structure */ struct fadump_crash_info_header { u64 magic_number; - u64 elfcorehdr_addr; + u32 version; u32 crashing_cpu; + u64 vmcoreinfo_raddr; + u64 vmcoreinfo_size; + u32 pt_regs_sz; + u32 cpu_mask_sz; struct pt_regs regs; struct cpumask cpu_mask; }; @@ -94,6 +119,8 @@ struct fw_dump { u64 boot_mem_regs_cnt; unsigned long fadumphdr_addr; + u64 elfcorehdr_addr; + u64 elfcorehdr_size; unsigned long cpu_notes_buf_vaddr; unsigned long cpu_notes_buf_size; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 4e768d93c6d42..a020597c065a2 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -53,8 +53,6 @@ static struct kobject *fadump_kobj; static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; - #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) @@ -373,12 +371,6 @@ static unsigned long __init get_fadump_area_size(void) size = PAGE_ALIGN(size); size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); - size += sizeof(struct elfhdr); /* ELF core header.*/ - size += sizeof(struct elf_phdr); /* place holder for cpu notes */ - /* Program headers for crash memory regions. */ - size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); - - size = PAGE_ALIGN(size); /* This is to hold kernel metadata on platforms that support it */ size += (fw_dump.ops->fadump_get_metadata_size ? @@ -915,36 +907,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, return 0; } -static int fadump_exclude_reserved_area(u64 start, u64 end) -{ - u64 ra_start, ra_end; - int ret = 0; - - ra_start = fw_dump.reserve_dump_area_start; - ra_end = ra_start + fw_dump.reserve_dump_area_size; - - if ((ra_start < end) && (ra_end > start)) { - if ((start < ra_start) && (end > ra_end)) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - if (ret) - return ret; - - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } else if (start < ra_start) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - } else if (ra_end < end) { - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } - } else - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - - return ret; -} - static int fadump_init_elfcore_header(char *bufp) { struct elfhdr *elf; @@ -981,52 +943,6 @@ static int fadump_init_elfcore_header(char *bufp) return 0; } -/* - * Traverse through memblock structure and setup crash memory ranges. These - * ranges will be used create PT_LOAD program headers in elfcore header. - */ -static int fadump_setup_crash_memory_ranges(void) -{ - u64 i, start, end; - int ret; - - pr_debug("Setup crash memory ranges.\n"); - crash_mrange_info.mem_range_cnt = 0; - - /* - * Boot memory region(s) registered with firmware are moved to - * different location at the time of crash. Create separate program - * header(s) for this memory chunk(s) with the correct offset. - */ - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - start = fw_dump.boot_mem_addr[i]; - end = start + fw_dump.boot_mem_sz[i]; - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - if (ret) - return ret; - } - - for_each_mem_range(i, &start, &end) { - /* - * skip the memory chunk that is already added - * (0 through boot_memory_top). - */ - if (start < fw_dump.boot_mem_top) { - if (end > fw_dump.boot_mem_top) - start = fw_dump.boot_mem_top; - else - continue; - } - - /* add this range excluding the reserved dump area. */ - ret = fadump_exclude_reserved_area(start, end); - if (ret) - return ret; - } - - return 0; -} - /* * If the given physical address falls within the boot memory region then * return the relocated address that points to the dump region reserved @@ -1057,36 +973,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr) return raddr; } -static int fadump_create_elfcore_headers(char *bufp) +static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, + u64 size, unsigned long long offset) { - unsigned long long raddr, offset; - struct elf_phdr *phdr; + phdr->p_align = 0; + phdr->p_memsz = size; + phdr->p_filesz = size; + phdr->p_paddr = start; + phdr->p_offset = offset; + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long)__va(start); +} + +static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) +{ + char *bufp; struct elfhdr *elf; - int i, j; + struct elf_phdr *phdr; + u64 boot_mem_dest_offset; + unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; + bufp = (char *) fw_dump.elfcorehdr_addr; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* - * setup ELF PT_NOTE, place holder for cpu notes info. The notes info - * will be populated during second kernel boot after crash. Hence - * this PT_NOTE will always be the first elf note. + * Set up ELF PT_NOTE, a placeholder for CPU notes information. + * The notes info will be populated later by platform-specific code. + * Hence, this PT_NOTE will always be the first ELF note. * * NOTE: Any new ELF note addition should be placed after this note. */ phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); phdr->p_type = PT_NOTE; - phdr->p_flags = 0; - phdr->p_vaddr = 0; - phdr->p_align = 0; - - phdr->p_offset = 0; - phdr->p_paddr = 0; - phdr->p_filesz = 0; - phdr->p_memsz = 0; - + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + /* Increment number of program headers. */ (elf->e_phnum)++; /* setup ELF PT_NOTE for vmcoreinfo */ @@ -1096,55 +1026,66 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = 0; phdr->p_vaddr = 0; phdr->p_align = 0; - - phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); - phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; - + phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; + phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; /* Increment number of program headers. */ (elf->e_phnum)++; - /* setup PT_LOAD sections. */ - j = 0; - offset = 0; - raddr = fw_dump.boot_mem_addr[0]; - for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { - u64 mbase, msize; - - mbase = crash_mrange_info.mem_ranges[i].base; - msize = crash_mrange_info.mem_ranges[i].size; - if (!msize) - continue; - + /* + * Setup PT_LOAD sections. first include boot memory regions + * and then add rest of the memory regions. + */ + boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; + for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mbase; - - if (mbase == raddr) { - /* - * The entire real memory region will be moved by - * firmware to the specified destination_address. - * Hence set the correct offset. - */ - phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; - if (j < (fw_dump.boot_mem_regs_cnt - 1)) { - offset += fw_dump.boot_mem_sz[j]; - raddr = fw_dump.boot_mem_addr[++j]; - } + populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], + fw_dump.boot_mem_sz[i], + boot_mem_dest_offset); + /* Increment number of program headers. */ + (elf->e_phnum)++; + boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; + } + + /* Memory reserved for fadump in first kernel */ + ra_start = fw_dump.reserve_dump_area_start; + ra_size = get_fadump_area_size(); + ra_end = ra_start + ra_size; + + phdr = (struct elf_phdr *)bufp; + for_each_mem_range(i, &mstart, &mend) { + /* Boot memory regions already added, skip them now */ + if (mstart < fw_dump.boot_mem_top) { + if (mend > fw_dump.boot_mem_top) + mstart = fw_dump.boot_mem_top; + else + continue; } - phdr->p_paddr = mbase; - phdr->p_vaddr = (unsigned long)__va(mbase); - phdr->p_filesz = msize; - phdr->p_memsz = msize; - phdr->p_align = 0; + /* Handle memblock regions overlaps with fadump reserved area */ + if ((ra_start < mend) && (ra_end > mstart)) { + if ((mstart < ra_start) && (mend > ra_end)) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + /* Increment number of program headers. */ + (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *)bufp; + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } else if (mstart < ra_start) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + } else if (ra_end < mend) { + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } + } else { + /* No overlap with fadump reserved memory region */ + populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); + } /* Increment number of program headers. */ (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *) bufp; } - return 0; } static unsigned long init_fadump_header(unsigned long addr) @@ -1159,14 +1100,25 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; - fdh->elfcorehdr_addr = addr; + fdh->version = FADUMP_HEADER_VERSION; /* We will set the crashing cpu id in crash_fadump() during crash. */ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; + + /* + * The physical address and size of vmcoreinfo are required in the + * second kernel to prepare elfcorehdr. + */ + fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); + fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; + + + fdh->pt_regs_sz = sizeof(struct pt_regs); /* * When LPAR is terminated by PYHP, ensure all possible CPUs' * register data is processed while exporting the vmcore. */ fdh->cpu_mask = *cpu_possible_mask; + fdh->cpu_mask_sz = sizeof(struct cpumask); return addr; } @@ -1174,8 +1126,6 @@ static unsigned long init_fadump_header(unsigned long addr) static int register_fadump(void) { unsigned long addr; - void *vaddr; - int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1184,18 +1134,10 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - ret = fadump_setup_crash_memory_ranges(); - if (ret) - return ret; - addr = fw_dump.fadumphdr_addr; /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); - vaddr = __va(addr); - - pr_debug("Creating ELF core headers at %#016lx\n", addr); - fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -1214,7 +1156,6 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->fadump_unregister(&fw_dump); - fadump_free_mem_ranges(&crash_mrange_info); } if (fw_dump.ops->fadump_cleanup) @@ -1400,6 +1341,22 @@ static void fadump_release_memory(u64 begin, u64 end) fadump_release_reserved_area(tstart, end); } +static void fadump_free_elfcorehdr_buf(void) +{ + if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) + return; + + /* + * Before freeing the memory of `elfcorehdr`, reset the global + * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing + * invalid memory. + */ + elfcorehdr_addr = ELFCORE_ADDR_ERR; + fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); + fw_dump.elfcorehdr_addr = 0; + fw_dump.elfcorehdr_size = 0; +} + static void fadump_invalidate_release_mem(void) { mutex_lock(&fadump_mutex); @@ -1411,6 +1368,7 @@ static void fadump_invalidate_release_mem(void) fadump_cleanup(); mutex_unlock(&fadump_mutex); + fadump_free_elfcorehdr_buf(); fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); fadump_free_cpu_notes_buf(); @@ -1616,6 +1574,102 @@ static void __init fadump_init_files(void) return; } +static int __init fadump_setup_elfcorehdr_buf(void) +{ + int elf_phdr_cnt; + unsigned long elfcorehdr_size; + + /* + * Program header for CPU notes comes first, followed by one for + * vmcoreinfo, and the remaining program headers correspond to + * memory regions. + */ + elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); + elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); + elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); + + fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); + if (!fw_dump.elfcorehdr_addr) { + pr_err("Failed to allocate %lu bytes for elfcorehdr\n", + elfcorehdr_size); + return -ENOMEM; + } + fw_dump.elfcorehdr_size = elfcorehdr_size; + return 0; +} + +/* + * Check if the fadump header of crashed kernel is compatible with fadump kernel. + * + * It checks the magic number, endianness, and size of non-primitive type + * members of fadump header to ensure safe dump collection. + */ +static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) +{ + if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { + pr_err("Old magic number, can't process the dump.\n"); + return false; + } + + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) + pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); + else + pr_err("Fadump header is corrupted.\n"); + + return false; + } + + /* + * Dump collection is not safe if the size of non-primitive type members + * of the fadump header do not match between crashed and fadump kernel. + */ + if (fdh->pt_regs_sz != sizeof(struct pt_regs) || + fdh->cpu_mask_sz != sizeof(struct cpumask)) { + pr_err("Fadump header size mismatch.\n"); + return false; + } + + return true; +} + +static void __init fadump_process(void) +{ + struct fadump_crash_info_header *fdh; + + fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); + if (!fdh) { + pr_err("Crash info header is empty.\n"); + goto err_out; + } + + /* Avoid processing the dump if fadump header isn't compatible */ + if (!is_fadump_header_compatible(fdh)) + goto err_out; + + /* Allocate buffer for elfcorehdr */ + if (fadump_setup_elfcorehdr_buf()) + goto err_out; + + fadump_populate_elfcorehdr(fdh); + + /* Let platform update the CPU notes in elfcorehdr */ + if (fw_dump.ops->fadump_process(&fw_dump) < 0) + goto err_out; + + /* + * elfcorehdr is now ready to be exported. + * + * set elfcorehdr_addr so that vmcore module will export the + * elfcorehdr through '/proc/vmcore'. + */ + elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); + return; + +err_out: + fadump_invalidate_release_mem(); +} + /* * Prepare for firmware-assisted dump. */ @@ -1635,12 +1689,7 @@ int __init setup_fadump(void) * saving it to the disk. */ if (fw_dump.dump_active) { - /* - * if dump process fails then invalidate the registration - * and release memory before proceeding for re-registration. - */ - if (fw_dump.ops->fadump_process(&fw_dump) < 0) - fadump_invalidate_release_mem(); + fadump_process(); } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 964f464b1b0e3..767a6b19e42a8 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -513,8 +513,8 @@ opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf, final_note(note_buf); pr_debug("Updating elfcore header (%llx) with cpu notes\n", - fdh->elfcorehdr_addr); - fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); + fadump_conf->elfcorehdr_addr); + fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); return 0; } @@ -526,12 +526,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf) if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) return rc; - /* Validate the fadump crash info header */ fdh = __va(fadump_conf->fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - pr_err("Crash info header is not valid.\n"); - return rc; - } #ifdef CONFIG_OPAL_CORE /* @@ -545,18 +540,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf) kernel_initiated = true; #endif - rc = opal_fadump_build_cpu_notes(fadump_conf, fdh); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return rc; + return opal_fadump_build_cpu_notes(fadump_conf, fdh); } static void opal_fadump_region_show(struct fw_dump *fadump_conf, diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index b5853e9fcc3c1..214f37788b2dc 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -375,11 +375,8 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) } final_note(note_buf); - if (fdh) { - pr_debug("Updating elfcore header (%llx) with cpu notes\n", - fdh->elfcorehdr_addr); - fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); - } + pr_debug("Updating elfcore header (%llx) with cpu notes\n", fadump_conf->elfcorehdr_addr); + fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); return 0; error_out: @@ -389,14 +386,11 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) } /* - * Validate and process the dump data stored by firmware before exporting - * it through '/proc/vmcore'. + * Validate and process the dump data stored by the firmware, and update + * the CPU notes of elfcorehdr. */ static int __init rtas_fadump_process(struct fw_dump *fadump_conf) { - struct fadump_crash_info_header *fdh; - int rc = 0; - if (!fdm_active || !fadump_conf->fadumphdr_addr) return -EINVAL; @@ -415,25 +409,7 @@ static int __init rtas_fadump_process(struct fw_dump *fadump_conf) return -EINVAL; } - /* Validate the fadump crash info header */ - fdh = __va(fadump_conf->fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - pr_err("Crash info header is not valid.\n"); - return -EINVAL; - } - - rc = rtas_fadump_build_cpu_notes(fadump_conf); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return 0; + return rtas_fadump_build_cpu_notes(fadump_conf); } static void rtas_fadump_region_show(struct fw_dump *fadump_conf, From bc446c5acabadeb38b61b565535401c5dfdd1214 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 23 Apr 2024 01:29:31 +0530 Subject: [PATCH 07/21] powerpc/fadump: add hotplug_ready sysfs interface The elfcorehdr describes the CPUs and memory of the crashed kernel to the kernel that captures the dump, known as the second or fadump kernel. The elfcorehdr needs to be updated if the system's memory changes due to memory hotplug or online/offline events. Currently, memory hotplug events are monitored in userspace by udev rules, and fadump is re-registered, which recreates the elfcorehdr with the latest available memory in the system. However, the previous patch ("powerpc: make fadump resilient with memory add/remove events") moved the creation of elfcorehdr to the second or fadump kernel. This eliminates the need to regenerate the elfcorehdr during memory hotplug or online/offline events. Create a sysfs entry at /sys/kernel/fadump/hotplug_ready to let userspace know that fadump re-registration is not required for memory add/remove events. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422195932.1583833-3-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump | 11 +++++++++++ arch/powerpc/kernel/fadump.c | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump index 8f7a64a81783a..c586054657d63 100644 --- a/Documentation/ABI/testing/sysfs-kernel-fadump +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -38,3 +38,14 @@ Contact: linuxppc-dev@lists.ozlabs.org Description: read only Provide information about the amount of memory reserved by FADump to save the crash dump in bytes. + +What: /sys/kernel/fadump/hotplug_ready +Date: Apr 2024 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Kdump udev rule re-registers fadump on memory add/remove events, + primarily to update the elfcorehdr. This sysfs indicates the + kdump udev rule that fadump re-registration is not required on + memory add/remove events because elfcorehdr is now prepared in + the second/fadump kernel. +User: kexec-tools diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index a020597c065a2..2de7379d0f300 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1426,6 +1426,18 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +/* + * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates + * to usersapce that fadump re-registration is not required on memory + * hotplug events. + */ +static ssize_t hotplug_ready_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 1); +} + static ssize_t mem_reserved_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1498,11 +1510,13 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); +static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, &mem_reserved_attr.attr, + &hotplug_ready_attr.attr, NULL, }; From 57e6700145c5d1f49c52137e9163f73ec5441256 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 23 Apr 2024 01:29:32 +0530 Subject: [PATCH 08/21] Documentation/powerpc: update fadump implementation details The patch titled ("powerpc: make fadump resilient with memory add/remove events") has made significant changes to the implementation of fadump, particularly on elfcorehdr creation and fadump crash info header structure. Therefore, updating the fadump implementation documentation to reflect those changes. Following updates are done to firmware assisted dump documentation: 1. The elfcorehdr is no longer stored after fadump HDR in the reserved dump area. Instead, the second kernel dynamically allocates memory for the elfcorehdr within the address range from 0 to the boot memory size. Therefore, update figures 1 and 2 of Memory Reservation during the first and second kernels to reflect this change. 2. A version field has been added to the fadump header to manage the future changes to fadump crash info header structure without changing the fadump header magic number in the future. Therefore, remove the corresponding TODO from the document. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422195932.1583833-4-sourabhjain@linux.ibm.com --- .../arch/powerpc/firmware-assisted-dump.rst | 91 +++++++++---------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/Documentation/arch/powerpc/firmware-assisted-dump.rst b/Documentation/arch/powerpc/firmware-assisted-dump.rst index e363fc48529a0..7e37aadd1f772 100644 --- a/Documentation/arch/powerpc/firmware-assisted-dump.rst +++ b/Documentation/arch/powerpc/firmware-assisted-dump.rst @@ -134,12 +134,12 @@ that are run. If there is dump data, then the memory is held. If there is no waiting dump data, then only the memory required to -hold CPU state, HPTE region, boot memory dump, FADump header and -elfcore header, is usually reserved at an offset greater than boot -memory size (see Fig. 1). This area is *not* released: this region -will be kept permanently reserved, so that it can act as a receptacle -for a copy of the boot memory content in addition to CPU state and -HPTE region, in the case a crash does occur. +hold CPU state, HPTE region, boot memory dump, and FADump header is +usually reserved at an offset greater than boot memory size (see Fig. 1). +This area is *not* released: this region will be kept permanently +reserved, so that it can act as a receptacle for a copy of the boot +memory content in addition to CPU state and HPTE region, in the case +a crash does occur. Since this reserved memory area is used only after the system crash, there is no point in blocking this significant chunk of memory from @@ -153,22 +153,22 @@ that were present in CMA region:: o Memory Reservation during first kernel - Low memory Top of memory - 0 boot memory size |<--- Reserved dump area --->| | - | | | Permanent Reservation | | - V V | | V - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | | |///|////| DUMP | HDR | ELF |////| | - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | ^ ^ ^ ^ ^ - | | | | | | - \ CPU HPTE / | | - ------------------------------ | | - Boot memory content gets transferred | | - to reserved area by firmware at the | | - time of crash. | | - FADump Header | - (meta area) | + Low memory Top of memory + 0 boot memory size |<------ Reserved dump area ----->| | + | | | Permanent Reservation | | + V V | | V + +-----------+-----/ /---+---+----+-----------+-------+----+-----+ + | | |///|////| DUMP | HDR |////| | + +-----------+-----/ /---+---+----+-----------+-------+----+-----+ + | ^ ^ ^ ^ ^ + | | | | | | + \ CPU HPTE / | | + -------------------------------- | | + Boot memory content gets transferred | | + to reserved area by firmware at the | | + time of crash. | | + FADump Header | + (meta area) | | | Metadata: This area holds a metadata structure whose @@ -186,13 +186,20 @@ that were present in CMA region:: 0 boot memory size | | |<------------ Crash preserved area ------------>| V V |<--- Reserved dump area --->| | - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | | |///|////| DUMP | HDR | ELF |////| | - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | | - V V - Used by second /proc/vmcore - kernel to boot + +----+---+--+-----/ /---+---+----+-------+-----+-----+-------+ + | |ELF| | |///|////| DUMP | HDR |/////| | + +----+---+--+-----/ /---+---+----+-------+-----+-----+-------+ + | | | | | | + ----- ------------------------------ --------------- + \ | | + \ | | + \ | | + \ | ---------------------------- + \ | / + \ | / + \ | / + /proc/vmcore + +---+ |///| -> Regions (CPU, HPTE & Metadata) marked like this in the above @@ -200,6 +207,12 @@ that were present in CMA region:: does not have CPU & HPTE regions while Metadata region is not supported on pSeries currently. + +---+ + |ELF| -> elfcorehdr, it is created in second kernel after crash. + +---+ + + Note: Memory from 0 to the boot memory size is used by second kernel + Fig. 2 @@ -353,26 +366,6 @@ TODO: - Need to come up with the better approach to find out more accurate boot memory size that is required for a kernel to boot successfully when booted with restricted memory. - - The FADump implementation introduces a FADump crash info structure - in the scratch area before the ELF core header. The idea of introducing - this structure is to pass some important crash info data to the second - kernel which will help second kernel to populate ELF core header with - correct data before it gets exported through /proc/vmcore. The current - design implementation does not address a possibility of introducing - additional fields (in future) to this structure without affecting - compatibility. Need to come up with the better approach to address this. - - The possible approaches are: - - 1. Introduce version field for version tracking, bump up the version - whenever a new field is added to the structure in future. The version - field can be used to find out what fields are valid for the current - version of the structure. - 2. Reserve the area of predefined size (say PAGE_SIZE) for this - structure and have unused area as reserved (initialized to zero) - for future field additions. - - The advantage of approach 1 over 2 is we don't need to reserve extra space. Author: Mahesh Salgaonkar From d1679b4fa1722e6bb4a17b13aacdc01a130ba362 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Mon, 22 Apr 2024 13:27:37 +0530 Subject: [PATCH 09/21] powerpc/eeh: Permanently disable the removed device When a device is hot removed on powernv, the hotplug driver clears the device's state. However, on pseries, if a device is removed by phyp after reaching the error threshold, the kernel remains unaware, leading to the device not being torn down. This prevents necessary remediation actions like failover. Permanently disable the device if the presence check fails. Also, in eeh_dev_check_failure in we may consider the error as false positive if the device is hotpluged out as the get_state call returns EEH_STATE_NOT_SUPPORT and we may end up not clearing the device state, so log the event if the state is not moved to permanent failure state. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422075737.1405551-1-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/eeh.c | 11 ++++++++++- arch/powerpc/kernel/eeh_driver.c | 13 +++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ab316e155ea9f..6670063a7a6c7 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 48773d2d9be3c..7efe04c68f0fe 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -865,9 +865,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe) devices++; if (!devices) { - pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", + pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n", pe->phb->global_number, pe->addr); - goto out; /* nothing to recover */ + /* + * The device is removed, tear down its state, on powernv + * hotplug driver would take care of it but not on pseries, + * permanently disable the card as it is hot removed. + * + * In the case of powernv, note that the removal of device + * is covered by pci rescan lock, so no problem even if hotplug + * driver attempts to remove the device. + */ + goto recover_failed; } /* Log the event */ From 4ccae23609f589dd69a593f457f76ee8b0e2d4e0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 19 Apr 2024 21:59:13 +1000 Subject: [PATCH 10/21] powerpc/dart: Drop unnecessary call to kmemleak_no_scan() Erhard reported that kmemleak was showing a warning at boot: kmemleak: Not scanning unknown object at 0xc00000007f000000 CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-rc3-PMacG5+ #2 Call Trace: .dump_stack_lvl+0x7c/0xc4 (unreliable) .kmemleak_no_scan+0xe0/0x100 .iommu_init_early_dart+0x2f0/0x924 .pmac_probe+0x1b0/0x20c .setup_arch+0x1b8/0x674 .start_kernel+0xdc/0xb74 start_here_common+0x1c/0x44 DART table allocated at: (____ptrval____) Which he bisected to a change in kmemleak, commit 23c2d497de21 ("mm: kmemleak: take a full lowmem check in kmemleak_*_phys()"). Because pmac_probe() is called before mem_topology_setup(), the min/ max PFN variables are still zero. That causes kmemleak_alloc_phys() to ignore the allocation, because the checks against the PFN fail. Then kmemleak_no_scan() can't find the allocation and prints warning. Given that kmemleak_alloc_phys() is ignoring the allocation to begin with, there's no need to call kmemleak_no_scan() at all, which avoids the warning. Reported-by: Erhard Furtner Closes: https://lore.kernel.org/all/bug-216156-206035@https.bugzilla.kernel.org%2F/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240419115913.3317575-1-mpe@ellerman.id.au --- arch/powerpc/sysdev/dart_iommu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 98096bbfd62e8..c0d10c149661c 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -243,9 +242,6 @@ static void __init allocate_dart(void) if (!dart_tablebase) panic("Failed to allocate 16MB below 2GB for DART table\n"); - /* There is no point scanning the DART space for leaks*/ - kmemleak_no_scan((void *)dart_tablebase); - /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge * prefetching into invalid pages and corrupting data From ff2e185cf73df480ec69675936c4ee75a445c3e4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 8 Apr 2024 09:08:31 -0500 Subject: [PATCH 11/21] powerpc/pseries: Enforce hcall result buffer validity and size plpar_hcall(), plpar_hcall9(), and related functions expect callers to provide valid result buffers of certain minimum size. Currently this is communicated only through comments in the code and the compiler has no idea. For example, if I write a bug like this: long retbuf[PLPAR_HCALL_BUFSIZE]; // should be PLPAR_HCALL9_BUFSIZE plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, ...); This compiles with no diagnostics emitted, but likely results in stack corruption at runtime when plpar_hcall9() stores results past the end of the array. (To be clear this is a contrived example and I have not found a real instance yet.) To make this class of error less likely, we can use explicitly-sized array parameters instead of pointers in the declarations for the hcall APIs. When compiled with -Warray-bounds[1], the code above now provokes a diagnostic like this: error: array argument is too small; is of size 32, callee requires at least 72 [-Werror,-Warray-bounds] 60 | plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, | ^ ~~~~~~ [1] Enabled for LLVM builds but not GCC for now. See commit 0da6e5fd6c37 ("gcc: disable '-Warray-bounds' for gcc-13 too") and related changes. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20240408-pseries-hvcall-retbuf-v1-1-ebc73d7253cf@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 51172625fa3a5..7a8495660c2f8 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -524,7 +524,7 @@ long plpar_hcall_norets_notrace(unsigned long opcode, ...); * Used for all but the craziest of phyp interfaces (see plpar_hcall9) */ #define PLPAR_HCALL_BUFSIZE 4 -long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...); +long plpar_hcall(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...); /** * plpar_hcall_raw: - Make a hypervisor call without calculating hcall stats @@ -538,7 +538,7 @@ long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...); * plpar_hcall, but plpar_hcall_raw works in real mode and does not * calculate hypervisor call statistics. */ -long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); +long plpar_hcall_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...); /** * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments @@ -549,8 +549,8 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); * PLPAR_HCALL9_BUFSIZE to size the return argument buffer. */ #define PLPAR_HCALL9_BUFSIZE 9 -long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); -long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...); +long plpar_hcall9(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...); +long plpar_hcall9_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...); /* pseries hcall tracing */ extern struct static_key hcall_tracepoint_key; From 29247de4ad753771afef95ace8af738d807ca279 Mon Sep 17 00:00:00 2001 From: Lidong Zhong Date: Thu, 11 Apr 2024 10:04:50 +0800 Subject: [PATCH 12/21] powerpc/pseries/vio: Don't return ENODEV if node or compatible missing We noticed the following nuisance messages during boot process: vio vio: uevent: failed to send synthetic uevent vio 4000: uevent: failed to send synthetic uevent vio 4001: uevent: failed to send synthetic uevent vio 4002: uevent: failedto send synthetic uevent vio 4004: uevent: failed to send synthetic uevent It's caused by either vio_register_device_node() failing to set dev->of_node or the node is missing a "compatible" property. To match the definition of modalias in modalias_show(), remove the return of ENODEV in such cases. The failure messages is also suppressed with this change. Signed-off-by: Lidong Zhong Signed-off-by: Michael Ellerman Link: https://msgid.link/20240411020450.12725-1-lidong.zhong@suse.com --- arch/powerpc/platforms/pseries/vio.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 90ff85c879bfe..b2babfdbc40bf 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1592,13 +1592,9 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env) const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; - cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (dn && (cp = of_get_property(dn, "compatible", NULL))) + add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); - add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); return 0; } From 37496845c812db2a470d51088a59ee38156e8058 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 29 Feb 2024 15:07:09 +0530 Subject: [PATCH 13/21] selftests/powerpc: Re-order *FLAGS to follow lib.mk In some powerpc/ sub-folder Makefiles, CFLAGS are defined before lib.mk include. Clean it up by re-ordering the flags to follow after the mk include. This is needed to support sub-folders in powerpc/ buildable on its own. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229093711.581230-1-maddy@linux.ibm.com --- .../selftests/powerpc/benchmarks/Makefile | 4 ++-- .../selftests/powerpc/copyloops/Makefile | 20 +++++++++---------- .../selftests/powerpc/nx-gzip/Makefile | 4 ++-- .../selftests/powerpc/pmu/ebb/Makefile | 20 +++++++++---------- .../powerpc/pmu/event_code_tests/Makefile | 4 ++-- .../powerpc/pmu/sampling_tests/Makefile | 4 ++-- .../selftests/powerpc/primitives/Makefile | 4 ++-- .../selftests/powerpc/security/Makefile | 4 ++-- .../testing/selftests/powerpc/signal/Makefile | 3 ++- .../selftests/powerpc/stringloops/Makefile | 10 +++++----- .../selftests/powerpc/switch_endian/Makefile | 4 ++-- .../selftests/powerpc/syscalls/Makefile | 4 ++-- tools/testing/selftests/powerpc/vphn/Makefile | 4 ++-- 13 files changed, 45 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index a32a6ab899144..75f5232c3aeca 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -4,11 +4,11 @@ TEST_GEN_FILES := exec_target TEST_FILES := settings -CFLAGS += -O2 - top_srcdir = ../../../../.. include ../../lib.mk +CFLAGS += -O2 + $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/context_switch: ../utils.c diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 77594e697f2fa..72684ed589c0a 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -1,14 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# The loops are all 64-bit code -CFLAGS += -m64 -CFLAGS += -I$(CURDIR) -CFLAGS += -D SELFTEST -CFLAGS += -maltivec -CFLAGS += -mcpu=power4 - -# Use our CFLAGS for the implicit .S rule & set the asm machine type -ASFLAGS = $(CFLAGS) -Wa,-mpower4 - TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ @@ -21,6 +11,16 @@ EXTRA_SOURCES := validate.c ../harness.c stubs.S top_srcdir = ../../../../.. include ../../lib.mk +# The loops are all 64-bit code +CFLAGS += -m64 +CFLAGS += -I$(CURDIR) +CFLAGS += -D SELFTEST +CFLAGS += -maltivec +CFLAGS += -mcpu=power4 + +# Use our CFLAGS for the implicit .S rule & set the asm machine type +ASFLAGS = $(CFLAGS) -Wa,-mpower4 + $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ -D COPY_LOOP=test___copy_tofrom_user_base \ diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile index 0785c2e99d407..b40991f902b2c 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/Makefile +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -1,8 +1,8 @@ -CFLAGS = -O3 -m64 -I./include -I../include - TEST_GEN_FILES := gzfht_test gunz_test TEST_PROGS := nx-gzip-test.sh include ../../lib.mk +CFLAGS = -O3 -m64 -I./include -I../include + $(TEST_GEN_FILES): gzip_vas.c ../utils.c diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 0101606902272..b3946ce17e0c1 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -4,16 +4,6 @@ include ../../../../../build/Build.include noarg: $(MAKE) -C ../../ -# The EBB handler is 64-bit code and everything links against it -CFLAGS += -m64 - -TMPOUT = $(OUTPUT)/TMPDIR/ -# Toolchains may build PIE by default which breaks the assembly -no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -LDFLAGS += $(no-pie-option) - TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ @@ -29,6 +19,16 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ top_srcdir = ../../../../../.. include ../../../lib.mk +# The EBB handler is 64-bit code and everything links against it +CFLAGS += -m64 + +TMPOUT = $(OUTPUT)/TMPDIR/ +# Toolchains may build PIE by default which breaks the assembly +no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +LDFLAGS += $(no-pie-option) + $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \ ebb.c ebb_handler.S trace.c busy_loop.S diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 4e07d70464574..509d4b235b9e5 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -m64 - TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ @@ -12,4 +10,6 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te top_srcdir = ../../../../../.. include ../../../lib.mk +CFLAGS += -m64 + $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 9e67351fb252b..d45892151e05d 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -m64 - TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ @@ -12,4 +10,6 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk +CFLAGS += -m64 + $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index 9b9491a632136..6dc5c5a42ca95 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -I$(CURDIR) - TEST_GEN_PROGS := load_unaligned_zeropad top_srcdir = ../../../../.. include ../../lib.mk +CFLAGS += -I$(CURDIR) + $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index e0d979ab02040..0a08386be9696 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -5,10 +5,10 @@ TEST_PROGS := mitigation-patching.sh top_srcdir = ../../../../.. -CFLAGS += $(KHDR_INCLUDES) - include ../../lib.mk +CFLAGS += $(KHDR_INCLUDES) + $(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/spectre_v2: CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index f679d260afc87..b15d5dbccc24b 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -3,7 +3,6 @@ TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart TEST_GEN_PROGS += sigreturn_kernel TEST_GEN_PROGS += sigreturn_unaligned -CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 @@ -12,4 +11,6 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +CFLAGS += -maltivec + $(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 9c39f55a58ff3..87c8c8f238daa 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# The loops are all 64-bit code -CFLAGS += -I$(CURDIR) - EXTRA_SOURCES := ../harness.c build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi) @@ -27,9 +24,12 @@ $(OUTPUT)/strlen_32: CFLAGS += -m32 TEST_GEN_PROGS += strlen_32 endif -ASFLAGS = $(CFLAGS) - top_srcdir = ../../../../.. include ../../lib.mk +# The loops are all 64-bit code +CFLAGS += -I$(CURDIR) + +ASFLAGS = $(CFLAGS) + $(TEST_GEN_PROGS): $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index bdc081afedb0f..8f0c2a1d3333b 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -1,13 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := switch_endian_test -ASFLAGS += -O2 -Wall -g -nostdlib -m64 - EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S top_srcdir = ../../../../.. include ../../lib.mk +ASFLAGS += -O2 -Wall -g -nostdlib -m64 + $(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index ee1740ddfb0c2..83dc335007732 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := ipc_unmuxed rtas_filter -CFLAGS += $(KHDR_INCLUDES) - top_srcdir = ../../../../.. include ../../lib.mk +CFLAGS += $(KHDR_INCLUDES) + $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile index cf65cbf33085a..ddc09a20b80fb 100644 --- a/tools/testing/selftests/powerpc/vphn/Makefile +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := test-vphn -CFLAGS += -m64 -I$(CURDIR) - top_srcdir = ../../../../.. include ../../lib.mk +CFLAGS += -m64 -I$(CURDIR) + $(TEST_GEN_PROGS): ../harness.c From 5553a79387e92ffd812a49fdcf679f392281f6a9 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 29 Feb 2024 15:07:10 +0530 Subject: [PATCH 14/21] selftests/powerpc: Add flags.mk to support pmu buildable When running `make -C powerpc/pmu run_tests` from top level selftests directory, currently this error is being reported: make: Entering directory '/home/maddy/linux/tools/testing/selftests/powerpc/pmu' Makefile:40: warning: overriding recipe for target 'emit_tests' ../../lib.mk:111: warning: ignoring old recipe for target 'emit_tests' gcc -m64 count_instructions.c ../harness.c event.c lib.c ../utils.c loop.S -o /home/maddy/selftest_output//count_instructions In file included from count_instructions.c:13: event.h:12:10: fatal error: utils.h: No such file or directory 12 | #include "utils.h" | ^~~~~~~~~ compilation terminated. This is due to missing of include path in CFLAGS. That is, CFLAGS and GIT_VERSION macros are defined in the powerpc/ folder Makefile which in this case is not involved. To address the failure in case of executing specific sub-folder test directly, a new rule file has been addded by the patch called "flags.mk" under selftest/powerpc/ folder and is linked to all the Makefile of powerpc/pmu sub-folders. Reported-by: Sachin Sant Signed-off-by: Madhavan Srinivasan Tested-by: Sachin Sant [mpe: Fixup ifeq, make GIT_VERSION simply expanded to avoid re-executing git describe] Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229093711.581230-2-maddy@linux.ibm.com --- tools/testing/selftests/powerpc/flags.mk | 12 ++++++++++++ tools/testing/selftests/powerpc/pmu/Makefile | 1 + tools/testing/selftests/powerpc/pmu/ebb/Makefile | 1 + .../selftests/powerpc/pmu/event_code_tests/Makefile | 1 + .../selftests/powerpc/pmu/sampling_tests/Makefile | 1 + 5 files changed, 16 insertions(+) create mode 100644 tools/testing/selftests/powerpc/flags.mk diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk new file mode 100644 index 0000000000000..b909bee3cb2a3 --- /dev/null +++ b/tools/testing/selftests/powerpc/flags.mk @@ -0,0 +1,12 @@ +#This checks for any ENV variables and add those. + +ifeq ($(GIT_VERSION),) +GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") +export GIT_VERSION +endif + +ifeq ($(CFLAGS),) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +export CFLAGS +endif + diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index a284fa874a9f1..1fcacae1b188e 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -7,6 +7,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index b3946ce17e0c1..1b39af7c10db3 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -18,6 +18,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 509d4b235b9e5..fdb080b3fa654 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -9,6 +9,7 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index d45892151e05d..9f79bec5fce79 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -9,6 +9,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk CFLAGS += -m64 From 108e5e683333615023265a9a73a29d4c2fa16c70 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 29 Feb 2024 15:07:11 +0530 Subject: [PATCH 15/21] selftests/powerpc: make sub-folders buildable on their own Build breaks when executing make with run_tests for sub-folders under powerpc. This is because, CFLAGS and GIT_VERSION macros are defined in Makefile of toplevel powerpc folder. make: Entering directory '/home/maddy/linux/tools/testing/selftests/powerpc/mm' gcc hugetlb_vs_thp_test.c ../harness.c ../utils.c -o /home/maddy/selftest_output//hugetlb_vs_thp_test hugetlb_vs_thp_test.c:6:10: fatal error: utils.h: No such file or directory 6 | #include "utils.h" | ^~~~~~~~~ compilation terminated. Fix this by adding the flags.mk in each sub-folder Makefile. Also remove the CFLAGS and GIT_VERSION macros from powerpc/ folder Makefile since the same is definied in flags.mk Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229093711.581230-3-maddy@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 7 +------ tools/testing/selftests/powerpc/alignment/Makefile | 1 + tools/testing/selftests/powerpc/benchmarks/Makefile | 1 + tools/testing/selftests/powerpc/cache_shape/Makefile | 1 + tools/testing/selftests/powerpc/copyloops/Makefile | 1 + tools/testing/selftests/powerpc/dexcr/Makefile | 1 + tools/testing/selftests/powerpc/dscr/Makefile | 1 + tools/testing/selftests/powerpc/eeh/Makefile | 1 + tools/testing/selftests/powerpc/math/Makefile | 1 + tools/testing/selftests/powerpc/mce/Makefile | 1 + tools/testing/selftests/powerpc/mm/Makefile | 1 + tools/testing/selftests/powerpc/nx-gzip/Makefile | 1 + tools/testing/selftests/powerpc/papr_attributes/Makefile | 3 ++- tools/testing/selftests/powerpc/papr_sysparm/Makefile | 1 + tools/testing/selftests/powerpc/papr_vpd/Makefile | 1 + tools/testing/selftests/powerpc/primitives/Makefile | 1 + tools/testing/selftests/powerpc/ptrace/Makefile | 1 + tools/testing/selftests/powerpc/security/Makefile | 1 + tools/testing/selftests/powerpc/signal/Makefile | 1 + tools/testing/selftests/powerpc/stringloops/Makefile | 1 + tools/testing/selftests/powerpc/switch_endian/Makefile | 1 + tools/testing/selftests/powerpc/syscalls/Makefile | 1 + tools/testing/selftests/powerpc/tm/Makefile | 1 + tools/testing/selftests/powerpc/vphn/Makefile | 1 + 24 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index c376151982c4d..2f299fd04d2d9 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -7,12 +7,6 @@ ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/) ifeq ($(ARCH),powerpc) -GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") - -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) - -export CFLAGS - SUB_DIRS = alignment \ benchmarks \ cache_shape \ @@ -46,6 +40,7 @@ $(SUB_DIRS): BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all include ../lib.mk +include ./flags.mk override define RUN_TESTS +@for TARGET in $(SUB_DIRS); do \ diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 93e9af37449d6..66d5d7aaeb20c 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index 75f5232c3aeca..1321922038d0f 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -6,6 +6,7 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk CFLAGS += -O2 diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index 689f6c8ebcd8d..3a3ca956ac664 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := cache_shape top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 72684ed589c0a..42940f92d8322 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -10,6 +10,7 @@ EXTRA_SOURCES := validate.c ../harness.c stubs.S top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk # The loops are all 64-bit code CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile index 76210f2bcec3c..523947a38d175 100644 --- a/tools/testing/selftests/powerpc/dexcr/Makefile +++ b/tools/testing/selftests/powerpc/dexcr/Makefile @@ -2,6 +2,7 @@ TEST_GEN_PROGS := hashchk_test TEST_GEN_FILES := lsdexcr include ../../lib.mk +include ../flags.mk $(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect) diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 9289d5febe1ed..9fa9cb5bd989a 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -5,6 +5,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread $(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile index ae963eb2dc5bf..70797716f2b56 100644 --- a/tools/testing/selftests/powerpc/eeh/Makefile +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -7,3 +7,4 @@ TEST_FILES := eeh-functions.sh settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 3948f7c510aa7..b14fd2e0c6a88 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vm top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c $(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile index 2424513982d9b..ce4ed679aaafb 100644 --- a/tools/testing/selftests/powerpc/mce/Makefile +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := inject-ra-err include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 4a6608beef0e9..aab058ecb3528 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -13,6 +13,7 @@ TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile index b40991f902b2c..480d8ba94cf77 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/Makefile +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -2,6 +2,7 @@ TEST_GEN_FILES := gzfht_test gunz_test TEST_PROGS := nx-gzip-test.sh include ../../lib.mk +include ../flags.mk CFLAGS = -O3 -m64 -I./include -I../include diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile index e899712d49db1..4064294990224 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/Makefile +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := attr_test top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk -$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile index 7f79e437634a2..fed4f2414dbfb 100644 --- a/tools/testing/selftests/powerpc/papr_sysparm/Makefile +++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_sysparm top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile index 06b719703bfd7..b09852e408822 100644 --- a/tools/testing/selftests/powerpc/papr_vpd/Makefile +++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_vpd top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index 6dc5c5a42ca95..23bd9a7590dde 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := load_unaligned_zeropad top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk CFLAGS += -I$(CURDIR) diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 1b39b86849da5..59ca01d8567e2 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -26,6 +26,7 @@ LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS)) TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 0a08386be9696..33286039724aa 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -6,6 +6,7 @@ TEST_PROGS := mitigation-patching.sh top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index b15d5dbccc24b..ece95bd52be98 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -10,6 +10,7 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk CFLAGS += -maltivec diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 87c8c8f238daa..4c9d9a58c9d15 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -26,6 +26,7 @@ endif top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk # The loops are all 64-bit code CFLAGS += -I$(CURDIR) diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index 8f0c2a1d3333b..0da2e0a742648 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -5,6 +5,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk ASFLAGS += -O2 -Wall -g -nostdlib -m64 diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index 83dc335007732..3bc07af88f0e1 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := ipc_unmuxed rtas_filter top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 3876805c2f312..f13f0ab36007c 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -11,6 +11,7 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile index ddc09a20b80fb..61d519a076c6f 100644 --- a/tools/testing/selftests/powerpc/vphn/Makefile +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := test-vphn top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk CFLAGS += -m64 -I$(CURDIR) From 822a04957cc5e675570645f506270797a1cf2865 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Apr 2024 23:34:52 +1000 Subject: [PATCH 16/21] selftests/powerpc: Convert pmu Makefile to for loop style The pmu Makefile has grown more sub directories over the years. Rather than open coding the rules for each subdir, use for loops. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422133453.1793988-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/pmu/Makefile | 43 ++++++++++---------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 1fcacae1b188e..773933e5180e8 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -9,7 +9,9 @@ top_srcdir = ../../../../.. include ../../lib.mk include ../flags.mk -all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests +SUB_DIRS := ebb sampling_tests event_code_tests + +all: $(TEST_GEN_PROGS) $(SUB_DIRS) $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -23,12 +25,16 @@ $(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES) $(OUTPUT)/per_event_excludes: ../utils.c +$(SUB_DIRS): + BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all + DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests; \ + done; endef emit_tests: @@ -36,34 +42,29 @@ emit_tests: BASENAME_TEST=`basename $$TEST`; \ echo "$(COLLECTION):$$BASENAME_TEST"; \ done - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests; \ + done; DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install; \ + done; endef DEFAULT_CLEAN := $(CLEAN) override define CLEAN $(DEFAULT_CLEAN) $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \ + done; endef -ebb: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all - -sampling_tests: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all - -event_code_tests: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all .PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests From dda32e37d397f5937cc24a6e98b71d3645f51afa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Apr 2024 23:34:53 +1000 Subject: [PATCH 17/21] selftests/powerpc: Install tests in sub-directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sources for the powerpc selftests are arranged into sub-directories. However when the tests are built and installed, the sub-directories are squashed, losing the structure. For example, with the current code the result of installing the selftests is: $ tree tools/testing/selftests/kselftest_install tools/testing/selftests/kselftest_install ├── kselftest │   ├── ktap_helpers.sh │   ├── module.sh │   ├── prefix.pl │   └── runner.sh ├── kselftest-list.txt ├── powerpc │   ├── alignment_handler │   ├── attr_test │   ├── back_to_back_ebbs_test │   ├── bad_accesses │   ├── bhrb_filter_map_test │   ├── bhrb_no_crash_wo_pmu_test │   ├── blacklisted_events_test │   ├── cache_shape │   ├── close_clears_pmcc_test │   ├── context_switch │   ├── copy_first_unaligned ... │   ├── settings ... │   └── wild_bctr └── run_kselftest.sh All the powerpc tests are squashed into the single powerpc directory. In particular, note that there is a single `settings` file, even though there are multiple settings files in the powerpc selftest sources. One of the settings files ends up installed, depending on install order, even if they have different contents. Similarly if there were two tests with the same name in different sub-directories they would clobber each other. Fix it by replicating the directory structure of the source tree into the install directory. The result being for example: $ tree tools/testing/selftests/kselftest_install tools/testing/selftests/kselftest_install ├── kselftest │   ├── ktap_helpers.sh │   ├── module.sh │   ├── prefix.pl │   └── runner.sh ├── kselftest-list.txt ├── powerpc │   ├── alignment │   │   ├── alignment_handler │   │   └── copy_first_unaligned │   ├── benchmarks │   │   ├── context_switch │   │   ├── exec_target │   │   ├── fork │   │   ├── futex_bench │   │   ├── gettimeofday │   │   ├── mmap_bench │   │   ├── null_syscall │   │   └── settings ... │   ├── eeh │   │   ├── eeh-basic.sh │   │   ├── eeh-functions.sh │   │   └── settings ... │   └── vphn │   └── test-vphn └── run_kselftest.sh Note multiple settings files in different sub-directories. This change also has the effect of changing the names of the tests from the point of view of the kselftest runner. Before the tests are named eg: powerpc:copy_first_unaligned powerpc:cache_shape powerpc:reg_access_test After, the test collection names include the sub-directory: powerpc/alignment:copy_first_unaligned powerpc/cache_shape:cache_shape powerpc/pmu/ebb:reg_access_test That means whereas previously all powerpc tests could be run with: $ ./run_kselftest.sh -c powerpc After the change it's necessary to pass a regex that matches all powerpc entries, eg: $ ./run_kselftest.sh -c "powerpc.*" The latter form also works before and after the change. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422133453.1793988-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/Makefile | 4 ++-- tools/testing/selftests/powerpc/pmu/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 2f299fd04d2d9..b175e94e1901d 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -52,14 +52,14 @@ endef override define INSTALL_RULE +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\ + $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install;\ done; endef emit_tests: +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET $@;\ done; override define CLEAN diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 773933e5180e8..7e9dbf3d0d092 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -44,7 +44,7 @@ emit_tests: done +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests; \ + $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET emit_tests; \ done; DEFAULT_INSTALL_RULE := $(INSTALL_RULE) @@ -52,7 +52,7 @@ override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install; \ + $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install; \ done; endef From 84030aacf127d000180fa3cb4b589d8ab1b0d46b Mon Sep 17 00:00:00 2001 From: sundar Date: Wed, 24 Apr 2024 20:37:18 +0530 Subject: [PATCH 18/21] macintosh/macio-adb: replace of_node_put() with __free use the new cleanup magic to replace of_node_put() with __free(device_node) marking to auto release when they get out of scope. Suggested-by: Julia Lawall Signed-off-by: sundar Signed-off-by: Michael Ellerman Link: https://msgid.link/20240424150718.5006-1-prosunofficial@gmail.com --- drivers/macintosh/macio-adb.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 779f1268286eb..19c63959ebed9 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -83,35 +83,32 @@ struct adb_driver macio_adb_driver = { int macio_probe(void) { - struct device_node *np; + struct device_node *np __free(device_node) = + of_find_compatible_node(NULL, "adb", "chrp,adb0"); - np = of_find_compatible_node(NULL, "adb", "chrp,adb0"); - if (np) { - of_node_put(np); + if (np) return 0; - } + return -ENODEV; } int macio_init(void) { - struct device_node *adbs; + struct device_node *adbs __free(device_node) = + of_find_compatible_node(NULL, "adb", "chrp,adb0"); struct resource r; unsigned int irq; - adbs = of_find_compatible_node(NULL, "adb", "chrp,adb0"); if (!adbs) return -ENXIO; - if (of_address_to_resource(adbs, 0, &r)) { - of_node_put(adbs); + if (of_address_to_resource(adbs, 0, &r)) return -ENXIO; - } + adb = ioremap(r.start, sizeof(struct adb_regs)); - if (!adb) { - of_node_put(adbs); + if (!adb) return -ENOMEM; - } + out_8(&adb->ctrl.r, 0); out_8(&adb->intr.r, 0); @@ -121,7 +118,6 @@ int macio_init(void) out_8(&adb->autopoll.r, APE); irq = irq_of_parse_and_map(adbs, 0); - of_node_put(adbs); if (request_irq(irq, macio_adb_interrupt, 0, "ADB", (void *)0)) { iounmap(adb); printk(KERN_ERR "ADB: can't get irq %d\n", irq); From 236a4c63491784ae4814100cca47bc3645c776df Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Apr 2024 21:52:31 +1000 Subject: [PATCH 19/21] powerpc: Mark memory_limit as initdata The `memory_limit` variable should only be used during boot, enforce that by marking it initdata. Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422115231.1769984-1-mpe@ellerman.id.au --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3a440004b97d2..12316ac66e7e0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -30,7 +30,7 @@ #include -unsigned long long memory_limit; +unsigned long long memory_limit __initdata; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); From 6a3e640b5dcf56fb44d66d525e01ea08633c6b8b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 30 Apr 2024 14:42:28 +1000 Subject: [PATCH 20/21] MAINTAINERS: powerpc: Remove Aneesh Aneesh is stepping down from powerpc maintenance. Acked-by: Aneesh Kumar K.V (Arm) Signed-off-by: Michael Ellerman Link: https://msgid.link/20240430044228.49015-1-mpe@ellerman.id.au --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7c121493f43d0..93af33ee8541c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12478,7 +12478,6 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Michael Ellerman R: Nicholas Piggin R: Christophe Leroy -R: Aneesh Kumar K.V R: Naveen N. Rao L: linuxppc-dev@lists.ozlabs.org S: Supported From 1fcd254733371cfa5a3602bab5ae2c9dc4bf69e6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 30 Apr 2024 14:43:27 +1000 Subject: [PATCH 21/21] MAINTAINERS: MMU GATHER: Update Aneesh's address Aneesh's IBM address no longer works, switch to his preferred kernel.org address. Acked-by: Aneesh Kumar K.V (Arm) Signed-off-by: Michael Ellerman Link: https://msgid.link/20240430044327.49363-1-mpe@ellerman.id.au --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 93af33ee8541c..f096c9fff5b3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14898,7 +14898,7 @@ F: drivers/phy/marvell/phy-pxa-usb.c MMU GATHER AND TLB INVALIDATION M: Will Deacon -M: "Aneesh Kumar K.V" +M: "Aneesh Kumar K.V" M: Andrew Morton M: Nick Piggin M: Peter Zijlstra