/* SPDX-License-Identifier: GPL-2.0 */ /* * This file contains the 64-bit "server" PowerPC variant * of the low level exception handling including exception * vectors, exception return, part of the slb and stab * handling and other fixed offset specific things. * * This file is meant to be #included from head_64.S due to * position dependent assembly. * * Most of this originates from head_64.S and thus has the same * copyright history. *
*/
/* * We're short on space and time in the exception prolog, so we can't * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. * Instead we get the base of the kernel from paca->kernelbase and or in the low * part of label. This requires that the label be within 64KB of kernelbase, and * that kernelbase be 64K aligned.
*/
#define LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); /* get high part of &label */ \
ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
/* * All interrupts which set HSRR registers, as well as SRESET and MCE and * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken, * so they all generally need to test whether they were taken in guest context. * * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be * taken with MSR[HV]=0. * * Interrupts which set SRR registers (with the above exceptions) do not * elevate to MSR[HV]=1 mode, though most can be taken when running with * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do * not need to test whether a guest is running because they get delivered to * the guest directly, including nested HV KVM guests. * * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be * delivered to the real-mode entry point, therefore such interrupts only test * KVM in their real mode handlers, and only when PR KVM is possible. * * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always * delivered in real-mode when the MMU is in hash mode because the MMU * registers are not set appropriately to translate host addresses. In nested * radix mode these can be delivered in virt-mode as the host translations are * used implicitly (see: effective LPID, effective PID).
*/
/* * If an interrupt is taken while a guest is running, it is immediately routed * to KVM to handle.
*/
.macro KVMTEST name handler
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0 /* HSRR variants have the 0x2 bit added to their trap number */
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
li r10,(IVEC + 0x2)
.else
li r10,(IVEC)
.endif
bne \handler
#endif
.endm
/* * This is the BOOK3S interrupt entry code macro. * * This can result in one of several things happening: * - Branch to the _common handler, relocated, in virtual mode. * These are normal interrupts (synchronous and asynchronous) handled by * the kernel. * - Branch to KVM, relocated but real mode interrupts remain in real mode. * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by * / intended for host or guest kernel, but KVM must always be involved * because the machine state is set for guest execution. * - Branch to the masked handler, unrelocated. * These occur when maskable asynchronous interrupts are taken with the * irq_soft_mask set. * - Branch to an "early" handler in real mode but relocated. * This is done if early=1. MCE and HMI use these to handle errors in real * mode. * - Fall through and continue executing in real, unrelocated mode. * This is done if early=2.
*/
/* * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI], * because a d-side MCE will clobber those registers so is * not recoverable if they are live.
*/
GET_SCRATCH0(r10)
std r10,IAREA+EX_R13(r13)
.if IDAR && !IISIDE
.if IHSRR
mfspr r10,SPRN_HDAR
.else
mfspr r10,SPRN_DAR
.endif
std r10,IAREA+EX_DAR(r13)
.endif
.if IDSISR && !IISIDE
.if IHSRR
mfspr r10,SPRN_HDSISR
.else
mfspr r10,SPRN_DSISR
.endif
stw r10,IAREA+EX_DSISR(r13)
.endif
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
mfspr r11,SPRN_HSRR0 /* save HSRR0 */
mfspr r12,SPRN_HSRR1 /* and HSRR1 */
FTR_SECTION_ELSE
mfspr r11,SPRN_SRR0 /* save SRR0 */
mfspr r12,SPRN_SRR1 /* and SRR1 */
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
mfspr r11,SPRN_HSRR0 /* save HSRR0 */
mfspr r12,SPRN_HSRR1 /* and HSRR1 */
.else
mfspr r11,SPRN_SRR0 /* save SRR0 */
mfspr r12,SPRN_SRR1 /* and SRR1 */
.endif
/* * __GEN_COMMON_ENTRY is required to receive the branch from interrupt * entry, except in the case of the real-mode handlers which require * __GEN_REALMODE_COMMON_ENTRY. * * This switches to virtual mode and sets MSR[RI].
*/
.macro __GEN_COMMON_ENTRY name
DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
.endif
ld r10,PACAKMSR(r13) /* get MSR value for kernel */ /* MSR[RI] is clear iff using SRR regs */
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
xori r10,r10,MSR_RI
END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
.elseif ! IHSRR
xori r10,r10,MSR_RI
.endif
mtmsrd r10
.if IVIRT
.if IKVM_VIRT
b 1f /* skip the virt test coming from real */
.endif
/* * Don't switch to virt mode. Used for early MCE and HMI handlers that * want to run in real mode.
*/
.macro __GEN_REALMODE_COMMON_ENTRY name
DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
.endif
.endm
.macro __GEN_COMMON_BODY name
.if IMASK
.if ! ISTACK
.error "No support for masked interrupt to use custom stack"
.endif
/* If coming from user, skip soft-mask tests. */
andi. r10,r12,MSR_PR
bne 3f
/* * Kernel code running below __end_soft_masked may be * implicitly soft-masked if it is within the regions * in the soft mask table.
*/
LOAD_HANDLER(r10, __end_soft_masked)
cmpld r11,r10
bge+ 1f
/* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
mtctr r12
stw r9,PACA_EXGEN+EX_CCR(r13)
SEARCH_SOFT_MASK_TABLE
cmpdi r12,0
mfctr r12 /* Restore r12 to SRR1 */
lwz r9,PACA_EXGEN+EX_CCR(r13)
beq 1f /* Not in soft-mask table */
li r10,IMASK
b 2f /* In soft-mask table, always mask */
/* Test the soft mask state against our interrupt's bit */
1: lbz r10,PACAIRQSOFTMASK(r13)
2: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */
.if IVEC == 0x500 || IVEC == 0xea0
li r10,PACA_IRQ_EE
.elseif IVEC == 0x900
li r10,PACA_IRQ_DEC
.elseif IVEC == 0xa00 || IVEC == 0xe80
li r10,PACA_IRQ_DBELL
.elseif IVEC == 0xe60
li r10,PACA_IRQ_HMI
.elseif IVEC == 0xf00
li r10,PACA_IRQ_PMI
.else
.abort "Bad maskable vector"
.endif
.if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
3: mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */
100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
.endif
std r9,_CCR(r1) /* save CR in stackframe */
std r11,_NIP(r1) /* save SRR0 in stackframe */
std r12,_MSR(r1) /* save SRR1 in stackframe */
std r10,0(r1) /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
SANITIZE_GPR(0)
/* Mark our [H]SRRs valid for return */
li r10,1
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
stb r10,PACAHSRR_VALID(r13)
FTR_SECTION_ELSE
stb r10,PACASRR_VALID(r13)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
stb r10,PACAHSRR_VALID(r13)
.else
stb r10,PACASRR_VALID(r13)
.endif
BEGIN_FTR_SECTION
.if ICFAR || ICFAR_IF_HVMODE ld r10,IAREA+EX_CFAR(r13)
.else
li r10,0
.endif
std r10,ORIG_GPR3(r1)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */
SANITIZE_GPRS(2, 8)
mflr r9 /* Get LR, later save to stack */
LOAD_PACA_TOC() /* get kernel TOC into r2 */
std r9,_LINK(r1)
lbz r10,PACAIRQSOFTMASK(r13)
mfspr r11,SPRN_XER /* save XER in stackframe */
std r10,SOFTE(r1)
std r11,_XER(r1)
li r9,IVEC
std r9,_TRAP(r1) /* set trap number */
li r10,0
LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
std r10,RESULT(r1) /* clear regs->result */
std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */
.endm
/* * On entry r13 points to the paca, r9-r13 are saved in the paca, * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and * SRR1, and relocation is on. * * If stack=0, then the stack is already set in r1, and r1 is saved in r10. * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
*/
.macro GEN_COMMON name
__GEN_COMMON_ENTRY \name
__GEN_COMMON_BODY \name
.endm
/* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception.
*/
.macro EXCEPTION_RESTORE_REGS hsrr=0 /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1)
li r10,0
.if \hsrr
mtspr SPRN_HSRR1,r9
stb r10,PACAHSRR_VALID(r13)
.else
mtspr SPRN_SRR1,r9
stb r10,PACASRR_VALID(r13)
.endif ld r9,_NIP(r1)
.if \hsrr
mtspr SPRN_HSRR0,r9
.else
mtspr SPRN_SRR0,r9
.endif ld r9,_CTR(r1)
mtctr r9 ld r9,_XER(r1)
mtxer r9 ld r9,_LINK(r1)
mtlr r9 ld r9,_CCR(r1)
mtcr r9
SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1) /* restore original r1. */ ld r1,GPR1(r1)
.endm
/* * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot. * * There's a short window during boot where although the kernel is running * little endian, any exceptions will cause the CPU to switch back to big * endian. For example a WARN() boils down to a trap instruction, which will * cause a program check, and we end up here but with the CPU in big endian * mode. The first instruction of the program check handler (in GEN_INT_ENTRY * below) is an mtsprg, which when executed in the wrong endian is an lhzu with * a ~3GB displacement from r3. The content of r3 is random, so that is a load * from some random location, and depending on the system can easily lead to a * checkstop, or an infinitely recursive page fault. * * So to handle that case we have a trampoline here that can detect we are in * the wrong endian and flip us back to the correct endian. We can't flip * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1 * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for * the paca. SPRG3 is user readable, but this trampoline is only active very * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before * userspace starts.
*/
.macro EARLY_BOOT_FIXUP
BEGIN_FTR_SECTION
#ifdef CONFIG_CPU_LITTLE_ENDIAN
tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8
b 2f // Skip trampoline if endian is correct
.long 0xa643707d // mtsprg 0, r11 Backup r11
.long 0xa6027a7d // mfsrr0 r11
.long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2
.long 0xa6027b7d // mfsrr1 r11
.long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3
.long 0xa600607d // mfmsr r11
.long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE]
.long 0xa6037b7d // mtsrr1 r11 /* * This is 'li r11,1f' where 1f is the absolute address of that * label, byteswapped into the SI field of the instruction.
*/
.long 0x00006039 | \
((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \
((ABS_ADDR(1f, real_vectors) & 0xff00) << 8)
.long 0xa6037a7d // mtsrr0 r11
.long 0x2400004c // rfid
1:
mfsprg r11, 3
mtsrr1 r11 // Restore SRR1
mfsprg r11, 2
mtsrr0 r11 // Restore SRR0
mfsprg r11, 0 // Restore r11
2:
#endif /* * program check could hit at any time, and pseries can not block * MSR[ME] in early boot. So check if there is anything useful in r13 * yet, and spin forever if not.
*/
mtsprg 0, r11
mfcr r11
cmpdi r13, 0
beq .
mtcr r11
mfsprg r11, 0
END_FTR_SECTION(0, 1) // nop out after boot
.endm
/* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. * - Virtual mode exceptions must be mapped at their 0xc000... location. * - Fixed location code must not call directly beyond the __end_interrupts * area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence * must be used. * - LOAD_HANDLER targets must be within first 64K of physical 0 / * virtual 0xc00... * - Conditional branch targets must be within +/-32K of caller. * * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and * therefore don't have to run in physically located code or rfid to * virtual mode kernel code. However on relocatable kernels they do have * to branch to KERNELBASE offset because the rest of the kernel (outside * the exception vectors) may be located elsewhere. * * Virtual exceptions correspond with physical, except their entry points * are offset by 0xc000000000000000 and also tend to get an added 0x4000 * offset applied. Virtual exceptions are enabled with the Alternate * Interrupt Location (AIL) bit set in the LPCR. However this does not * guarantee they will be delivered virtually. Some conditions (see the ISA) * cause exceptions to be delivered in real mode. * * The scv instructions are a special case. They get a 0x3000 offset applied. * scv exceptions have unique reentrancy properties, see below. * * It's impossible to receive interrupts below 0x300 via AIL. * * KVM: None of the virtual exceptions are from the guest. Anything that * escalated to HV=1 from HV=0 is delivered via real mode handlers. * * * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors * 0x1900 - 0x2fff : Real mode trampolines * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - .... : Common interrupt handlers, remaining early * setup code, rest of kernel. * * We could reclaim 0x4000-0x42ff for real mode trampolines if the space * is necessary. Until then it's more consistent to explicitly put VIRT_NONE * vectors there.
*/
OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000)
OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * Data area reserved for FWNMI option. * This address (0x7000) is fixed by the RPA. * pseries and powernv need to keep the whole page from * 0x7000 to 0x8000 free for use by the firmware
*/
ZERO_FIXED_SECTION(fwnmi_page, 0x7000, 0x8000)
OPEN_TEXT_SECTION(0x8000)
#else
OPEN_TEXT_SECTION(0x7000)
#endif
USE_FIXED_SECTION(real_vectors)
/* * This is the start of the interrupt handlers for pSeries * This code runs with relocation off. * Code from here to __end_interrupts gets copied down to real * address 0x100 when we are running a relocatable kernel. * Therefore any relative branches in this section must only * branch to labels in this section.
*/
.globl __start_interrupts
__start_interrupts:
/** * Interrupt 0x3000 - System Call Vectored Interrupt (syscall). * This is a synchronous interrupt invoked with the "scv" instruction. The * system call does not alter the HV bit, so it is directed to the OS. * * Handling: * scv instructions enter the kernel without changing EE, RI, ME, or HV. * In particular, this means we can take a maskable interrupt at any point * in the scv handler, which is unlike any other interrupt. This is solved * by treating the instruction addresses in the handler as being soft-masked, * by adding a SOFT_MASK_TABLE entry for them. * * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and * ensure scv is never executed with relocation off, which means AIL-0 * should never happen. * * Before leaving the following inside-__end_soft_masked text, at least of the * following must be true: * - MSR[PR]=1 (i.e., return to userspace) * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * * KVM: * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM * ensures that FSCR[SCV] is disabled whenever it has to force AIL off. * * Call convention: * * syscall register convention is in Documentation/arch/powerpc/syscall64-abi.rst
*/
EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) /* SCV 0 */
mr r9,r13
GET_PACA(r13)
mflr r11
mfctr r12
li r10,IRQS_ALL_DISABLED
stb r10,PACAIRQSOFTMASK(r13)
#ifdef CONFIG_RELOCATABLE
b system_call_vectored_tramp
#else
b system_call_vectored_common
#endif
nop
/* SCV 1 - 127 */
.rept 127
mr r9,r13
GET_PACA(r13)
mflr r11
mfctr r12
li r10,IRQS_ALL_DISABLED
stb r10,PACAIRQSOFTMASK(r13)
li r0,-1 /* cause failure */
#ifdef CONFIG_RELOCATABLE
b system_call_vectored_sigill_tramp
#else
b system_call_vectored_sigill
#endif
.endr
EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
// Treat scv vectors as soft-masked, see comment above.
// Use absolute values rather than labels here, so they don't get relocated,
// because this code runs unrelocated.
SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
/* No virt vectors corresponding with 0x0..0x100 */
EXC_VIRT_NONE(0x4000, 0x100)
/** * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI). * This is a non-maskable, asynchronous interrupt always taken in real-mode. * It is caused by: * - Wake from power-saving state, on powernv. * - An NMI from another CPU, triggered by firmware or hypercall. * - As crash/debug signal injected from BMC, firmware or hypervisor. * * Handling: * Power-save wakeup is the only performance critical path, so this is * determined quickly as possible first. In this case volatile registers * can be discarded and SPRs like CFAR don't need to be read. * * If not a powersave wakeup, then it's run as a regular interrupt, however * it uses its own stack and PACA save area to preserve the regular kernel * environment for debugging. * * This interrupt is not maskable, so triggering it when MSR[RI] is clear, * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely * correct to switch to virtual mode to run the regular interrupt handler * because it might be interrupted when the MMU is in a bad state (e.g., SLB * is clear). * * FWNMI: * PAPR specifies a "fwnmi" facility which sends the sreset to a different * entry point with a different register set up. Some hypervisors will * send the sreset to 0x100 in the guest if it is not fwnmi capable. * * KVM: * Unlike most SRR interrupts, this may be taken by the host while executing * in a guest, so a KVM test is required. KVM will pull the CPU out of guest * mode and then raise the sreset.
*/
INT_DEFINE_BEGIN(system_reset)
IVEC=0x100
IAREA=PACA_EXNMI
IVIRT=0 /* no virt entry point */
ISTACK=0
IKVM_REAL=1
INT_DEFINE_END(system_reset)
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP /* * If running native on arch 2.06 or later, check if we are waking up * from nap/sleep/winkle, and branch to idle handler. This tests SRR1 * bits 46:47. A non-0 value indicates that we are coming from a power * saving state. The idle wakeup handler initially runs in real mode, * but we branch to the 0xc000... address so we can turn on relocation * with mtmsrd later, after SPRs are restored. * * Careful to minimise cost for the fast path (idle wakeup) while * also avoiding clobbering CFAR for the debug path (non-idle). * * For the idle wake case volatile registers can be clobbered, which * is why we use those initially. If it turns out to not be an idle * wake, carefully put everything back the way it was, so we can use * common exception macros to handle it.
*/
BEGIN_FTR_SECTION
SET_SCRATCH0(r13)
GET_PACA(r13)
std r3,PACA_EXNMI+0*8(r13)
std r4,PACA_EXNMI+1*8(r13)
std r5,PACA_EXNMI+2*8(r13)
mfspr r3,SPRN_SRR1
mfocrf r4,0x80
rlwinm. r5,r3,47-31,30,31
bne+ system_reset_idle_wake /* Not powersave wakeup. Restore regs for regular interrupt handler. */
mtocrf 0x80,r4 ld r3,PACA_EXNMI+0*8(r13) ld r4,PACA_EXNMI+1*8(r13) ld r5,PACA_EXNMI+2*8(r13)
GET_SCRATCH0(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
GEN_INT_ENTRY system_reset, virt=0 /* * In theory, we should not enable relocation here if it was disabled * in SRR1, because the MMU may not be configured to support it (e.g., * SLB may have been cleared). In practice, there should only be a few * small windows where that's the case, and sreset is considered to * be dangerous anyway.
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake) /* We are waking up from idle, so may clobber any volatile register */
cmpwi cr1,r5,2
bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
__LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
mtctr r12
bctr
#endif
#ifdef CONFIG_PPC_PSERIES /* * Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
GEN_INT_ENTRY system_reset, virt=0
#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_ENTRY system_reset /* * Increment paca->in_nmi. When the interrupt entry wrapper later * enable MSR_RI, then SLB or MCE will be able to recover, but a nested * NMI will notice in_nmi and not recover because of the use of the NMI * stack. in_nmi reentrancy is tested in system_reset_exception.
*/
lhz r10,PACA_IN_NMI(r13)
addi r10,r10,1
sth r10,PACA_IN_NMI(r13)
/** * Interrupt 0x200 - Machine Check Interrupt (MCE). * This is a non-maskable interrupt always taken in real-mode. It can be * synchronous or asynchronous, caused by hardware or software, and it may be * taken in a power-saving state. * * Handling: * Similarly to system reset, this uses its own stack and PACA save area, * the difference is re-entrancy is allowed on the machine check stack. * * machine_check_early is run in real mode, and carefully decodes the * machine check and tries to handle it (e.g., flush the SLB if there was an * error detected there), determines if it was recoverable and logs the * event. * * This early code does not "reconcile" irq soft-mask state like SRESET or * regular interrupts do, so irqs_disabled() among other things may not work * properly (irq disable/enable already doesn't work because irq tracing can * not work in real mode). * * Then, depending on the execution context when the interrupt is taken, there * are 3 main actions: * - Executing in kernel mode. The event is queued with irq_work, which means * it is handled when it is next safe to do so (i.e., the kernel has enabled * interrupts), which could be immediately when the interrupt returns. This * avoids nasty issues like switching to virtual mode when the MMU is in a * bad state, or when executing OPAL code. (SRESET is exposed to such issues, * but it has different priorities). Check to see if the CPU was in power * save, and return via the wake up code if it was. * * - Executing in user mode. machine_check_exception is run like a normal * interrupt handler, which processes the data generated by the early handler. * * - Executing in guest mode. The interrupt is run with its KVM test, and * branches to KVM to deal with. KVM may queue the event for the host * to report later. * * This interrupt is not maskable, so if it triggers when MSR[RI] is clear, * or SCRATCH0 is in use, it may cause a crash. * * KVM: * See SRESET.
*/
INT_DEFINE_BEGIN(machine_check_early)
IVEC=0x200
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
IREALMODE_COMMON=1
ISTACK=0
IDAR=1
IDSISR=1
IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
INT_DEFINE_END(machine_check_early)
INT_DEFINE_BEGIN(machine_check)
IVEC=0x200
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
IDAR=1
IDSISR=1
IKVM_REAL=1
INT_DEFINE_END(machine_check)
#ifdef CONFIG_PPC_PSERIES
TRAMP_REAL_BEGIN(machine_check_fwnmi) /* See comment at machine_check exception, don't turn on RI */
GEN_INT_ENTRY machine_check_early, virt=0
#endif
#define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\
li r9,0; \
mtmsrd r9,1; /* Clear MSR_RI */ \ /* Decrement paca->in_mce now RI is clear. */ \
lhz r12,PACA_IN_MCE(r13); \
subi r12,r12,1; \
sth r12,PACA_IN_MCE(r13); \
EXCEPTION_RESTORE_REGS
/* * Switch to mc_emergency stack and handle re-entrancy (we limit * the nested MCE upto level 4 to avoid stack overflow). * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 * * We use paca->in_mce to check whether this is the first entry or * nested machine check. We increment paca->in_mce to track nested * machine checks. * * If this is the first entry then set stack pointer to * paca->mc_emergency_sp, otherwise r1 is already pointing to * stack frame on mc_emergency stack. * * NOTE: We are here with MSR_ME=0 (off), which means we risk a * checkstop if we get another machine check exception before we do * rfid with MSR_ME=1. * * This interrupt can wake directly from idle. If that is the case, * the machine check is handled then the idle wakeup code is called * to restore state.
*/
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
mr r10,r1 /* Save r1 */
bne 1f /* First machine check entry */ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
1: /* Limit nested MCE to level 4 to avoid stack overflow */
bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
__GEN_COMMON_BODY machine_check_early
BEGIN_FTR_SECTION
bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_FTR_SECTION
bl CFUNC(machine_check_early_boot)
END_FTR_SECTION(0, 1) // nop out after boot
bl CFUNC(machine_check_early)
std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1)
#ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any * of the following is true: * a. thread wasn't in power saving mode * b. thread was in power saving mode with no state loss, * supervisor state loss or hypervisor state loss. * * Go back to nap/sleep/winkle mode again if (b) is true.
*/
BEGIN_FTR_SECTION
rlwinm. r11,r12,47-31,30,31
bne machine_check_idle_common
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * Check if we are coming from guest. If yes, then run the normal * exception handler which will take the * machine_check_kvm->kvm_interrupt branch to deliver the MC event * to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
cmpwi r11,0 /* Check if coming from guest */
bne mce_deliver /* continue if we are. */
#endif
/* * Check if we are coming from userspace. If yes, then run the normal * exception handler which will deliver the MC event to this kernel.
*/
andi. r11,r12,MSR_PR /* See if coming from user. */
bne mce_deliver /* continue in V mode if we are. */
/* * At this point we are coming from kernel context. * Queue up the MCE event and return from the interrupt. * But before that, check if this is an un-recoverable exception. * If yes, then stay on emergency stack and panic.
*/
andi. r11,r12,MSR_RI
beq unrecoverable_mce
/* * Check if we have successfully handled/recovered from error, if not * then stay on emergency stack and panic.
*/ ld r3,RESULT(r1) /* Load result */
cmpdi r3,0 /* see if we handled MCE successfully */
beq unrecoverable_mce /* if !handled then panic */
/* * Return from MC interrupt. * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call.
*/
bl CFUNC(machine_check_queue_event)
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_KERNEL
mce_deliver: /* * This is a host user or guest MCE. Restore all registers, then * run the "late" handler. For host user, this will run the * machine_check_exception handler in virtual mode like a normal * interrupt handler. For guest, this will trigger the KVM test * and branch to the KVM interrupt similarly to other interrupts.
*/
BEGIN_FTR_SECTION ld r10,ORIG_GPR3(r1)
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MACHINE_CHECK_HANDLER_WINDUP
GEN_INT_ENTRY machine_check, virt=0
EXC_COMMON_BEGIN(machine_check_common) /* * Machine check is different because we use a different * save area: PACA_EXMC instead of PACA_EXGEN.
*/
GEN_COMMON machine_check
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(machine_check_exception_async)
b interrupt_return_srr
#ifdef CONFIG_PPC_P7_NAP /* * This is an idle wakeup. Low level machine check has already been * done. Queue the event then call the idle code to do the wake up.
*/
EXC_COMMON_BEGIN(machine_check_idle_common)
bl CFUNC(machine_check_queue_event)
/* * GPR-loss wakeups are relatively straightforward, because the * idle sleep code has saved all non-volatile registers on its * own stack, and r1 in PACAR1. * * For no-loss wakeups the r1 and lr registers used by the * early machine check handler have to be restored first. r2 is * the kernel TOC, so no need to restore it. * * Then decrement MCE nesting after finishing with the stack.
*/ ld r3,_MSR(r1) ld r4,_LINK(r1) ld r1,GPR1(r1)
mtlr r4
rlwinm r10,r3,47-31,30,31
cmpwi cr1,r10,2
bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
b idle_return_gpr_loss
#endif
EXC_COMMON_BEGIN(unrecoverable_mce) /* * We are going down. But there are chances that we might get hit by * another MCE during panic path and we may run into unstable state * with no way out. Hence, turn ME bit off while going down, so that * when another MCE is hit during panic path, system will checkstop * and hypervisor will get restarted cleanly by SP.
*/
BEGIN_FTR_SECTION
li r10,0 /* clear MSR_RI */
mtmsrd r10,1
bl CFUNC(disable_machine_check)
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r10,PACAKMSR(r13)
li r3,MSR_ME
andc r10,r10,r3
mtmsrd r10
/* * Invoke machine_check_exception to print MCE event and panic. * This is the NMI version of the handler because we are called from * the early handler which is a true NMI.
*/
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(machine_check_exception)
/* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die.
*/
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(unrecoverable_exception)
b .
/** * Interrupt 0x300 - Data Storage Interrupt (DSI). * This is a synchronous interrupt generated due to a data access exception, * e.g., a load orstore which does not have a valid page table entry with * permissions. DAWR matches also fault here, as do RC updates, and minor misc * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc. * * Handling: * - Hash MMU * Go to do_hash_fault, which attempts to fill the HPT from an entry in the * Linux page table. Hash faults can hit in kernel mode in a fairly * arbitrary state (e.g., interrupts disabled, locks held) when accessing * "non-bolted" regions, e.g., vmalloc space. However these should always be * backed by Linux page table entries. * * If no entry is found the Linux page fault handler is invoked (by * do_hash_fault). Linux page faults can happen in kernel mode due to user * copy operations of course. * * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest * MMU context, which may cause a DSI in the host, which must go to the * KVM handler. MSR[IR] is not enabled, so the real-mode handler will * always be used regardless of AIL setting. * * - Radix MMU * The hardware loads from the Linux page table directly, so a fault goes * immediately to Linux page fault. * * Conditions like DAWR match are handled on the way in to Linux page fault.
*/
INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
IKVM_REAL=1
INT_DEFINE_END(data_access)
1: bl CFUNC(do_break) /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values.
*/
HANDLER_RESTORE_NVGPRS()
b interrupt_return_srr
/** * Interrupt 0x380 - Data Segment Interrupt (DSLB). * This is a synchronous interrupt in response to an MMU fault missing SLB * entry for HPT, or an address outside RPT translation range. * * Handling: * - HPT: * This refills the SLB, or reports an access fault similarly to a bad page * fault. When coming from user-mode, the SLB handler may access any kernel * data, though it may itself take a DSLB. When coming from kernel mode, * recursive faults must be avoided so access is restricted to the kernel * image text/data, kernel stack, and any data allocated below * ppc64_bolted_size (first segment). The kernel handler must avoid stomping * on user-handler data structures. * * KVM: Same as 0x300, DSLB must test for KVM guest.
*/
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IDAR=1
IKVM_REAL=1
INT_DEFINE_END(data_access_slb)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
GEN_INT_ENTRY data_access_slb, virt=0
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
GEN_INT_ENTRY data_access_slb, virt=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
EXC_COMMON_BEGIN(data_access_slb_common)
GEN_COMMON data_access_slb
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
li r3,-EFAULT
#endif
std r3,RESULT(r1)
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(do_bad_segment_interrupt)
b interrupt_return_srr
/** * Interrupt 0x400 - Instruction Storage Interrupt (ISI). * This is a synchronous interrupt in response to an MMU fault due to an * instruction fetch. * * Handling: * Similar to DSI, though in response to fetch. The faulting address is found * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
*/
INT_DEFINE_BEGIN(instruction_access)
IVEC=0x400
IISIDE=1
IDAR=1
IDSISR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
INT_DEFINE_END(instruction_access)
/** * Interrupt 0x480 - Instruction Segment Interrupt (ISLB). * This is a synchronous interrupt in response to an MMU fault due to an * instruction fetch. * * Handling: * Similar to DSLB, though in response to fetch. The faulting address is found * in SRR0 (rather than DAR).
*/
INT_DEFINE_BEGIN(instruction_access_slb)
IVEC=0x480
IISIDE=1
IDAR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
INT_DEFINE_END(instruction_access_slb)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
GEN_INT_ENTRY instruction_access_slb, virt=0
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
GEN_INT_ENTRY instruction_access_slb, virt=1
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
EXC_COMMON_BEGIN(instruction_access_slb_common)
GEN_COMMON instruction_access_slb
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
li r3,-EFAULT
#endif
std r3,RESULT(r1)
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(do_bad_segment_interrupt)
b interrupt_return_srr
/** * Interrupt 0x500 - External Interrupt. * This is an asynchronous maskable interrupt in response to an "external * exception" from the interrupt controller or hypervisor (e.g., device * interrupt). It is maskable in hardware by clearing MSR[EE], and * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()). * * When running in HV mode, Linux sets up the LPCR[LPES] bit such that * interrupts are delivered with HSRR registers, guests use SRRs, which * reqiures IHSRR_IF_HVMODE. * * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that * external interrupts are delivered as Hypervisor Virtualization Interrupts * rather than External Interrupts. * * Handling: * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead, * because registers at the time of the interrupt are not so important as it is * asynchronous. * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. * * CFAR is not required because this is an asynchronous interrupt that in * general won't have much bearing on the state of the CPU, with the possible * exception of crash/debug IPIs, but those are generally moving to use SRESET * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case * it may be exiting the guest and need CFAR to be saved.
*/
INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
IHSRR_IF_HVMODE=1
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
ICFAR=0
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
ICFAR_IF_HVMODE=1
#endif
INT_DEFINE_END(hardware_interrupt)
/** * Interrupt 0x600 - Alignment Interrupt * This is a synchronous interrupt in response to data alignment fault.
*/
INT_DEFINE_BEGIN(alignment)
IVEC=0x600
IDAR=1
IDSISR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
INT_DEFINE_END(alignment)
/** * Interrupt 0x700 - Program Interrupt (program check). * This is a synchronous interrupt in response to various instruction faults: * traps, privilege errors, TM errors, floating point exceptions. * * Handling: * This interrupt may use the "emergency stack" in some cases when being taken * from kernel context, which complicates handling.
*/
INT_DEFINE_BEGIN(program_check)
IVEC=0x700
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
INT_DEFINE_END(program_check)
/* * It's possible to receive a TM Bad Thing type program check with * userspace register values (in particular r1), but with SRR1 reporting * that we came from the kernel. Normally that would confuse the bad * stack logic, and we would report a bad kernel stack pointer. Instead * we switch to the emergency stack if we're taking a TM Bad Thing from * the kernel.
*/
andi. r10,r12,MSR_PR
bne .Lnormal_stack /* If userspace, go normal path */
andis. r10,r12,(SRR1_PROGTM)@h
bne .Lemergency_stack /* If TM, emergency */
cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */
blt .Lnormal_stack /* normal path if not */
/* Use the emergency stack */
.Lemergency_stack:
andi. r10,r12,MSR_PR /* Set CR0 correctly for label */ /* 3 in EXCEPTION_PROLOG_COMMON */
mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
__ISTACK(program_check)=0
__GEN_COMMON_BODY program_check
b .Ldo_program_check
.Ldo_program_check:
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(program_check_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
/* * Interrupt 0x800 - Floating-Point Unavailable Interrupt. * This is a synchronous interrupt in response to executing an fp instruction * with MSR[FP]=0. * * Handling: * This will load FP registers and enable the FP bit if coming from userspace, * otherwise report a bad kernel use of FP.
*/
INT_DEFINE_BEGIN(fp_unavailable)
IVEC=0x800
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
IMSR_R12=1
INT_DEFINE_END(fp_unavailable)
EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
GEN_INT_ENTRY fp_unavailable, virt=0
EXC_REAL_END(fp_unavailable, 0x800, 0x100)
EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
GEN_INT_ENTRY fp_unavailable, virt=1
EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(kernel_fp_unavailable_exception)
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in * transaction), go do TM stuff
*/
rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
bne- 2f
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
bl CFUNC(load_up_fpu)
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
bl CFUNC(fp_unavailable_tm)
b interrupt_return_srr
#endif
/** * Interrupt 0x900 - Decrementer Interrupt. * This is an asynchronous interrupt in response to a decrementer exception * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e., * local_irq_disable()). * * Handling: * This calls into Linux timer handler. NVGPRs are not saved (see 0x500). * * If soft masked, the masked handler will note the pending interrupt for * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled * in the interrupted context. * If PPC_WATCHDOG is configured, the soft masked handler will actually set * things back up to run soft_nmi_interrupt as a regular interrupt handler * on the emergency stack. * * CFAR is not required because this is asynchronous (see hardware_interrupt). * A watchdog interrupt may like to have CFAR, but usually the interesting * branch is long gone by that point (e.g., infinite loop).
*/
INT_DEFINE_BEGIN(decrementer)
IVEC=0x900
IMASK=IRQS_DISABLED
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
ICFAR=0
INT_DEFINE_END(decrementer)
/** * Interrupt 0x980 - Hypervisor Decrementer Interrupt. * This is an asynchronous interrupt, similar to 0x900 but for the HDEC * register. * * Handling: * Linux does not use this outside KVM where it's used to keep a host timer * while the guest is given control of DEC. It should normally be caught by * the KVM test and routed there.
*/
INT_DEFINE_BEGIN(hdecrementer)
IVEC=0x980
IHSRR=1
ISTACK=0
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(hdecrementer)
EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
GEN_INT_ENTRY hdecrementer, virt=0
EXC_REAL_END(hdecrementer, 0x980, 0x80)
EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
GEN_INT_ENTRY hdecrementer, virt=1
EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
EXC_COMMON_BEGIN(hdecrementer_common)
__GEN_COMMON_ENTRY hdecrementer /* * Hypervisor decrementer interrupts not caught by the KVM test * shouldn't occur but are sometimes left pending on exit from a KVM * guest. We don't need to do anything to clear them, as they are * edge-triggered. * * Be careful to avoid touching the kernel stack.
*/
li r10,0
stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_CTR(r13)
mtctr r10
mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) ld r13,PACA_EXGEN+EX_R13(r13)
HRFI_TO_KERNEL
/** * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt. * This is an asynchronous interrupt in response to a msgsndp doorbell. * It is maskable in hardware by clearing MSR[EE], and soft-maskable with * IRQS_DISABLED mask (i.e., local_irq_disable()). * * Handling: * Guests may use this for IPIs between threads in a core if the * hypervisor supports it. NVGPRS are not saved (see 0x500). * * If soft masked, the masked handler will note the pending interrupt for * replay, leaving MSR[EE] enabled in the interrupted context because the * doorbells are edge triggered. * * CFAR is not required, similarly to hardware_interrupt.
*/
INT_DEFINE_BEGIN(doorbell_super)
IVEC=0xa00
IMASK=IRQS_DISABLED
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
ICFAR=0
INT_DEFINE_END(doorbell_super)
/** * Interrupt 0xc00 - System Call Interrupt (syscall, hcall). * This is a synchronous interrupt invoked with the "sc" instruction. The * system call is invoked with "sc 0" and does not alter the HV bit, so it * is directed to the currently running OS. The hypercall is invoked with * "sc 1" and it sets HV=1, so it elevates to hypervisor. * * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to * 0x4c00 virtual mode. * * Handling: * If the KVM test fires then it was due to a hypercall and is accordingly * routed to KVM. Otherwise this executes a normal Linux system call. * * Call convention: * * syscall and hypercalls register conventions are documented in * Documentation/arch/powerpc/syscall64-abi.rst and * Documentation/arch/powerpc/papr_hcalls.rst respectively. * * The intersection of volatile registers that don't contain possible * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry * without saving, though xer is not a good idea to use, as hardware may * interpret some bits so it may be costly to change them.
*/
INT_DEFINE_BEGIN(system_call)
IVEC=0xc00
IKVM_REAL=1
IKVM_VIRT=1
ICFAR=0
INT_DEFINE_END(system_call)
.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * There is a little bit of juggling to get syscall and hcall * working well. Save r13 in ctr to avoid using SPRG scratch * register. * * Userspace syscalls have already saved the PPR, hcalls must save * it before setting HMT_MEDIUM.
*/
mtctr r13
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
GET_PACA(r13)
INTERRUPT_TO_KERNEL
#endif
/* We reach here with PACA in r13, r13 in r9. */
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
TRAMP_REAL_BEGIN(kvm_hcall)
std r9,PACA_EXGEN+EX_R9(r13)
std r11,PACA_EXGEN+EX_R11(r13)
std r12,PACA_EXGEN+EX_R12(r13)
mfcr r9
mfctr r10
std r10,PACA_EXGEN+EX_R13(r13)
li r10,0
std r10,PACA_EXGEN+EX_CFAR(r13)
std r10,PACA_EXGEN+EX_CTR(r13) /* * Save the PPR (on systems that support it) before changing to * HMT_MEDIUM. That allows the KVM code to save that value into the * guest state (it is the guest's PPR value).
*/
BEGIN_FTR_SECTION
mfspr r10,SPRN_PPR
std r10,PACA_EXGEN+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
#ifdef CONFIG_RELOCATABLE /* * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives * outside the head section.
*/
__LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
mtctr r10
bctr
#else
b kvmppc_hcall
#endif
#endif
/** * Interrupt 0xd00 - Trace Interrupt. * This is a synchronous interrupt in response to instruction step or * breakpoint faults.
*/
INT_DEFINE_BEGIN(single_step)
IVEC=0xd00
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
INT_DEFINE_END(single_step)
/** * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI). * This is a synchronous interrupt in response to an MMU fault caused by a * guest data access. * * Handling: * This should always get routed to KVM. In radix MMU mode, this is caused * by a guest nested radix access that can't be performed due to the * partition scope page table. In hash mode, this can be caused by guests * running with translation disabled (virtual real mode) or with VPM enabled. * KVM will update the page table structures or disallow the access.
*/
INT_DEFINE_BEGIN(h_data_storage)
IVEC=0xe00
IHSRR=1
IDAR=1
IDSISR=1
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(h_data_storage)
/** * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI). * This is a synchronous interrupt in response to an MMU fault caused by a * guest instruction fetch, similar to HDSI.
*/
INT_DEFINE_BEGIN(h_instr_storage)
IVEC=0xe20
IHSRR=1
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(h_instr_storage)
/** * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI). * This is an asynchronous interrupt caused by a Hypervisor Maintenance * Exception. It is always taken in real mode but uses HSRR registers * unlike SRESET and MCE. * * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable * with IRQS_DISABLED mask (i.e., local_irq_disable()). * * Handling:
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.24 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.