// SPDX-License-Identifier: GPL-2.0-or-later /* * Firmware-Assisted Dump support on POWER platform (OPAL). * * Copyright 2019, Hari Bathini, IBM Corporation.
*/
#ifdef CONFIG_PRESERVE_FA_DUMP /* * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, * ensure crash data is preserved in hope that the subsequent memory * preserving kernel boot is going to process this crash data.
*/ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
{ conststruct opal_fadump_mem_struct *opal_fdm_active; const __be32 *prop; unsignedlong dn;
u64 addr = 0;
s64 ret;
dn = of_get_flat_dt_subnode_by_name(node, "dump"); if (dn == -FDT_ERR_NOTFOUND) return;
/* * Check if dump has been initiated on last reboot.
*/
prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); if (!prop) return;
ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); if ((ret != OPAL_SUCCESS) || !addr) {
pr_debug("Could not get Kernel metadata (%lld)\n", ret); return;
}
/* * Preserve memory only if kernel memory regions are registered * with f/w for MPIPL.
*/
addr = be64_to_cpu(addr);
pr_debug("Kernel metadata addr: %llx\n", addr);
opal_fdm_active = (void *)addr; if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return;
ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); if ((ret != OPAL_SUCCESS) || !addr) {
pr_err("Failed to get boot memory tag (%lld)\n", ret); return;
}
/* * Memory below this address can be used for booting a * capture kernel or petitboot kernel. Preserve everything * above this address for processing crashdump.
*/
fadump_conf->boot_mem_top = be64_to_cpu(addr);
pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
pr_info("Firmware-assisted dump is active.\n");
fadump_conf->dump_active = 1;
}
/* * The destination address of the first boot memory region is the * destination address of boot memory regions.
*/
fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest);
pr_debug("Destination address of boot memory regions: %#016llx\n",
fadump_conf->boot_mem_dest_addr);
/* * This function is called in the capture kernel to get configuration details * from metadata setup by the first kernel.
*/ staticvoid __init opal_fadump_get_config(struct fw_dump *fadump_conf, conststruct opal_fadump_mem_struct *fdm)
{ unsignedlong base, size, last_end, hole_size; int i;
/* * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture.
*/
fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest);
/* * Rarely, but it can so happen that system crashes before all * boot memory regions are registered for MPIPL. In such * cases, warn that the vmcore may not be accurate and proceed * anyway as that is the best bet considering free pages, cache * pages, user pages, etc are usually filtered out. * * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore.
*/ if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) {
pr_warn("Not all memory regions were saved!!!\n");
pr_warn(" Unsaved memory regions:\n");
i = be16_to_cpu(fdm->registered_regions); while (i < be16_to_cpu(fdm->region_cnt)) {
pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
i, be64_to_cpu(fdm->rgn[i].src),
be64_to_cpu(fdm->rgn[i].size));
i++;
}
pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
}
/* * Kernel metadata is passed to f/w and retrieved in capture kernel. * So, use it to save fadump header address instead of calculating it.
*/
opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) +
fadump_conf->boot_memory_size);
/* * Use the last page(s) in FADump memory reservation for * kernel metadata.
*/
fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
fadump_conf->reserve_dump_area_size -
opal_fadump_get_metadata_size());
pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
/* Initialize kernel metadata before registering the address with f/w */
opal_fdm = __va(fadump_conf->kernel_metadata);
opal_fadump_init_metadata(opal_fdm);
/* * Register metadata address with f/w. Can be retrieved in * the capture kernel.
*/
ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
fadump_conf->kernel_metadata); if (ret != OPAL_SUCCESS) {
pr_err("Failed to set kernel metadata tag!\n");
err = -EPERM;
}
/* * Register boot memory top address with f/w. Should be retrieved * by a kernel that intends to preserve crash'ed kernel's memory.
*/
ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
fadump_conf->boot_mem_top); if (ret != OPAL_SUCCESS) {
pr_err("Failed to set boot memory tag!\n");
err = -EPERM;
}
switch (rc) { case OPAL_SUCCESS:
pr_info("Registration is successful!\n");
fadump_conf->dump_registered = 1;
err = 0; break; case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */
pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
(be16_to_cpu(opal_fdm->region_cnt) -
be16_to_cpu(opal_fdm->registered_regions)));
fadump_conf->dump_registered = 1;
err = 0; break; case OPAL_PARAMETER:
pr_err("Failed to register. Parameter Error(%lld).\n", rc); break; case OPAL_HARDWARE:
pr_err("Support not available.\n");
fadump_conf->fadump_supported = 0;
fadump_conf->fadump_enabled = 0; break; default:
pr_err("Failed to register. Unknown Error(%lld).\n", rc); break;
}
/* * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions.
*/ if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0))
opal_fadump_unregister(fadump_conf);
ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0); if (ret != OPAL_SUCCESS)
pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
}
/* * Verify if CPU state data is available. If available, do a bit of sanity * checking before processing this data.
*/ staticbool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
{ if (!opal_cpu_metadata) returnfalse;
if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
pr_warn("Supported CPU state data version: %u, found: %d!\n",
HDAT_FADUMP_CPU_DATA_VER,
fadump_conf->cpu_state_data_version);
pr_warn("WARNING: F/W using newer CPU state data format!!\n");
}
if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
(fadump_conf->cpu_state_entry_size == 0) ||
(fadump_conf->cpu_state_entry_size >
fadump_conf->cpu_state_data_size)) {
pr_err("CPU state data is invalid. Ignoring!\n"); returnfalse;
}
returntrue;
}
/* * Convert CPU state data saved at the time of crash into ELF notes. * * While the crashing CPU's register data is saved by the kernel, CPU state * data for all CPUs is saved by f/w. In CPU state data provided by f/w, * each register entry is of 16 bytes, a numerical identifier along with * a GPR/SPR flag in the first 8 bytes and the register value in the next * 8 bytes. For more details refer to F/W documentation. If this data is * missing or in unsupported format, append crashing CPU's register data * saved by the kernel in the PT_NOTE, to have something to work with in * the vmcore file.
*/ staticint __init
opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf, struct fadump_crash_info_header *fdh)
{
u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize; struct hdat_fadump_thread_hdr *thdr; bool is_cpu_data_valid = false;
u32 num_cpus = 1, *note_buf; struct pt_regs regs; char *bufp; int rc, i;
rc = fadump_setup_cpu_notes_buf(num_cpus); if (rc != 0) return rc;
note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr; if (!is_cpu_data_valid) goto out;
/* * Offset for register entries, entry size and registers count is * duplicated in every thread header in keeping with HDAT format. * Use these values from the first thread header.
*/
thdr = (struct hdat_fadump_thread_hdr *)bufp;
regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
be32_to_cpu(thdr->offset));
reg_esize = be32_to_cpu(thdr->esize);
regs_cnt = be32_to_cpu(thdr->ecnt);
/* * If this is kernel initiated crash, crashing_cpu would be set * appropriately and register data of the crashing CPU saved by * crashing kernel. Add this saved register data of crashing CPU * to elf notes and populate the pt_regs for the remaining CPUs * from register state data provided by firmware.
*/ if (fdh->crashing_cpu == thread_pir) {
note_buf = fadump_regs_to_elf_notes(note_buf,
&fdh->regs);
pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
fdh->crashing_cpu, fdh->regs.gpr[1],
fdh->regs.nip); continue;
}
/* * Register state data of MAX cores is provided by firmware, * but some of this cores may not be active. So, while * processing register state data, check core state and * skip threads that belong to inactive cores.
*/ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE) continue;
out: /* * CPU state data is invalid/unsupported. Try appending crashing CPU's * register data, if it is saved by the kernel.
*/ if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) { if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
fadump_free_cpu_notes_buf(); return -ENODEV;
}
if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) return rc;
fdh = __va(fadump_conf->fadumphdr_addr);
#ifdef CONFIG_OPAL_CORE /* * If this is a kernel initiated crash, crashing_cpu would be set * appropriately and register data of the crashing CPU saved by * crashing kernel. Add this saved register data of crashing CPU * to elf notes and populate the pt_regs for the remaining CPUs * from register state data provided by firmware.
*/ if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)
kernel_initiated = true; #endif
if (fadump_conf->dump_active)
fdm_ptr = opal_fdm_active; else
fdm_ptr = opal_fdm;
for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data.
*/ if ((fadump_conf->dump_active) &&
(i < be16_to_cpu(fdm_ptr->registered_regions)))
dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size);
/* Dump is active. Show preserved area start address. */ if (fadump_conf->dump_active) {
seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
fadump_conf->boot_mem_top);
}
}
staticvoid opal_fadump_trigger(struct fadump_crash_info_header *fdh, constchar *msg)
{ int rc;
/* * Unlike on pSeries platform, logical CPU number is not provided * with architected register state data. So, store the crashing * CPU's PIR instead to plug the appropriate register data for * crashing CPU in the vmcore file.
*/
fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg); if (rc == OPAL_UNSUPPORTED) {
pr_emerg("Reboot type %d not supported.\n",
OPAL_REBOOT_MPIPL);
} elseif (rc == OPAL_HARDWARE)
pr_emerg("No backend support for MPIPL!\n");
}
/* * Check if Firmware-Assisted Dump is supported. if yes, check * if dump has been initiated on last reboot.
*/
dn = of_get_flat_dt_subnode_by_name(node, "dump"); if (dn == -FDT_ERR_NOTFOUND) {
pr_debug("FADump support is missing!\n"); return;
}
if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
pr_err("Support missing for this f/w version!\n"); return;
}
prop = of_get_flat_dt_prop(dn, "fw-load-area", &len); if (prop) { /* * Each f/w load area is an (address,size) pair, * 2 cells each, totalling 4 cells per range.
*/ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
u64 base, end;
base = of_read_number(prop + (i * 4) + 0, 2);
end = base;
end += of_read_number(prop + (i * 4) + 2, 2); if (end > OPAL_FADUMP_MIN_BOOT_MEM) {
pr_err("F/W load area: 0x%llx-0x%llx\n",
base, end);
pr_err("F/W version not supported!\n"); return;
}
}
}
fadump_conf->ops = &opal_fadump_ops;
fadump_conf->fadump_supported = 1; /* TODO: Add support to pass additional parameters */
fadump_conf->param_area_supported = 0;
/* * Firmware supports 32-bit field for size. Align it to PAGE_SIZE * and request firmware to copy multiple kernel boot memory regions.
*/
fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE);
/* * Check if dump has been initiated on last reboot.
*/
prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); if (!prop) return;
ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); if ((ret != OPAL_SUCCESS) || !be_addr) {
pr_err("Failed to get Kernel metadata (%lld)\n", ret); return;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.