/* Save the 32-bit only FPSIMD system register state */ staticinlinevoid __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{ if (!vcpu_el1_is_32bit(vcpu)) return;
staticinlinevoid __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
{ /* * We are about to set CPTR_EL2.TFP to trap all floating point * register accesses to EL2, however, the ARM ARM clearly states that * traps are only taken to EL2 if the operation would not otherwise * trap to EL1. Therefore, always make sure that for 32-bit guests, * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to * it will cause an exception.
*/ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
}
/* * Always trap SME since it's not supported in KVM. * TSM is RES1 if SME isn't implemented.
*/
val |= CPTR_EL2_TSM;
if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
val |= CPTR_EL2_TZ;
if (!guest_owns_fp_regs())
val |= CPTR_EL2_TFP;
write_sysreg(val, cptr_el2);
}
staticinlinevoid __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
{ /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, * except for some missing controls, such as TAM. * In this case, CPTR_EL2.TAM has the same position with or without * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM * shift value for trapping the AMU accesses.
*/
u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA;
u64 cptr;
if (guest_owns_fp_regs()) {
val |= CPACR_EL1_FPEN; if (vcpu_has_sve(vcpu))
val |= CPACR_EL1_ZEN;
}
if (!vcpu_has_nv(vcpu)) goto write;
/* * The architecture is a bit crap (what a surprise): an EL2 guest * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, * as they are RES0 in the guest's view. To work around it, trap the * sucker using the very same bit it can't set...
*/ if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
val |= CPTR_EL2_TCPAC;
/* * Layer the guest hypervisor's trap configuration on top of our own if * we're in a nested context.
*/ if (is_hyp_ctxt(vcpu)) goto write;
cptr = vcpu_sanitised_cptr_el2(vcpu);
/* * Pay attention, there's some interesting detail here. * * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): * * - CPTR_EL2.xEN = x0, traps are enabled * - CPTR_EL2.xEN = x1, traps are disabled * * In other words, bit[0] determines if guest accesses trap or not. In * the interest of simplicity, clear the entire field if the guest * hypervisor has traps enabled to dispel any illusion of something more * complicated taking place.
*/ if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
val &= ~CPACR_EL1_FPEN; if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
val &= ~CPACR_EL1_ZEN;
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
val |= cptr & CPACR_EL1_E0POE;
val |= cptr & CPTR_EL2_TCPAC;
write:
write_sysreg(val, cpacr_el1);
}
staticinlinevoid __activate_cptr_traps(struct kvm_vcpu *vcpu)
{ if (!guest_owns_fp_regs())
__activate_traps_fpsimd32(vcpu);
if (has_vhe() || has_hvhe())
__activate_cptr_traps_vhe(vcpu); else
__activate_cptr_traps_nvhe(vcpu);
}
staticinlinevoid __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
{
u64 val = CPTR_NVHE_EL2_RES1;
if (!cpus_have_final_cap(ARM64_SVE))
val |= CPTR_EL2_TZ; if (!cpus_have_final_cap(ARM64_SME))
val |= CPTR_EL2_TSM;
write_sysreg(val, cptr_el2);
}
staticinlinevoid __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
{
u64 val = CPACR_EL1_FPEN;
if (cpus_have_final_cap(ARM64_SVE))
val |= CPACR_EL1_ZEN; if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN;
/* trap guest access to MPAMIDR_EL1 */ if (system_supports_mpam_hcr()) {
write_sysreg_s(MPAMHCR_EL2_TRAP_MPAMIDR_EL1, SYS_MPAMHCR_EL2);
} else { /* From v1.1 TIDR can trap MPAMIDR, set it unconditionally */
r |= MPAM2_EL2_TIDR;
}
write_sysreg_s(r, SYS_MPAM2_EL2);
}
staticinlinevoid __deactivate_traps_mpam(void)
{ if (!system_supports_mpam()) return;
write_sysreg_s(0, SYS_MPAM2_EL2);
if (system_supports_mpam_hcr())
write_sysreg_s(MPAMHCR_HOST_FLAGS, SYS_MPAMHCR_EL2);
}
/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
/* * Make sure we trap PMU access from EL0 to EL2. Also sanitize * PMSELR_EL0 to make sure it never contains the cycle * counter, which could make a PMXEVCNTR_EL0 access UNDEF at * EL1 instead of being trapped to EL2.
*/ if (system_supports_pmuv3()) {
write_sysreg(0, pmselr_el0);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) {
u64 vsesr;
/* * When HCR_EL2.AMO is set, physical SErrors are taken to EL2 * and vSError injection is enabled for EL1. Conveniently, for * NV this means that it is never the case where a 'physical' * SError (injected by KVM or userspace) and vSError are * deliverable to the same context. * * As such, we can trivially select between the host or guest's * VSESR_EL2. Except for the case that FEAT_RAS hasn't been * exposed to the guest, where ESR propagation in hardware * occurs unconditionally. * * Paper over the architectural wart and use an IMPLEMENTATION * DEFINED ESR value in case FEAT_RAS is hidden from the guest.
*/ if (!vserror_state_is_nested(vcpu))
vsesr = vcpu->arch.vsesr_el2; elseif (kvm_has_ras(kern_hyp_va(vcpu->kvm)))
vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2); else
vsesr = ESR_ELx_ISV;
if (vserror_state_is_nested(vcpu))
hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2); else
hcr = &vcpu->arch.hcr_el2;
/* * If we pended a virtual abort, preserve it until it gets * cleared. See D1.14.3 (Virtual Interrupts) for details, but * the crucial bit is "On taking a vSError interrupt, * HCR_EL2.VSE is cleared to 0." * * Additionally, when in a nested context we need to propagate the * updated state to the guest hypervisor's HCR_EL2.
*/ if (*hcr & HCR_VSE) {
*hcr &= ~HCR_VSE;
*hcr |= read_sysreg(hcr_el2) & HCR_VSE;
}
}
/* * Finish potential single step before executing the prologue * instruction.
*/
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
returntrue;
}
staticinlinevoid __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{ /* * The vCPU's saved SVE state layout always matches the max VL of the * vCPU. Start off with the max VL so we can load the SVE state.
*/
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_restore_state(vcpu_sve_pffr(vcpu),
&vcpu->arch.ctxt.fp_regs.fpsr, true);
/* * The effective VL for a VM could differ from the max VL when running a * nested guest, as the guest hypervisor could select a smaller VL. Slap * that into hardware before wrapping up.
*/ if (is_nested_ctxt(vcpu))
sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2);
if (vcpu_has_sve(vcpu)) { /* A guest hypervisor may restrict the effective max VL. */ if (is_nested_ctxt(vcpu))
zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); else
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
/* * When the guest owns the FP regs, we know that guest+hyp traps for * any FPSIMD/SVE/SME features exposed to the guest have been disabled * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() * prior to __guest_entry(). As __guest_entry() guarantees a context * synchronization event, we don't need an ISB here to avoid taking * traps for anything that was exposed to the guest.
*/ if (vcpu_has_sve(vcpu)) {
zcr_el1 = read_sysreg_el1(SYS_ZCR);
__vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1);
/* * The guest's state is always saved using the guest's max VL. * Ensure that the host has the guest's max VL active such that * the host can save the guest's state lazily, but don't * artificially restrict the host to the guest's max VL.
*/ if (has_vhe()) {
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
write_sysreg_el2(zcr_el2, SYS_ZCR);
} else {
zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
write_sysreg_el2(zcr_el2, SYS_ZCR);
staticvoid kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
{ /* * Non-protected kvm relies on the host restoring its sve state. * Protected kvm restores the host's sve state as not to reveal that * fpsimd was used by a guest nor leak upper sve bits.
*/ if (system_supports_sve()) {
__hyp_sve_save_host();
} else {
__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
}
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
*host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
}
/* * We trap the first access to the FP/SIMD to save the host context and * restore the guest context lazily. * If FP/SIMD is not implemented, handle the trap and inject an undefined * instruction exception to the guest. Similarly for trapped SVE accesses.
*/ staticinlinebool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{ bool sve_guest;
u8 esr_ec;
/* Only handle traps the vCPU can support here: */ switch (esr_ec) { case ESR_ELx_EC_FP_ASIMD: /* Forward traps to the guest hypervisor as required */ if (guest_hyp_fpsimd_traps_enabled(vcpu)) returnfalse; break; case ESR_ELx_EC_SYS64: if (WARN_ON_ONCE(!is_hyp_ctxt(vcpu))) returnfalse;
fallthrough; case ESR_ELx_EC_SVE: if (!sve_guest) returnfalse; if (guest_hyp_sve_traps_enabled(vcpu)) returnfalse; break; default: returnfalse;
}
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
__deactivate_cptr_traps(vcpu);
isb();
/* Write out the host state if it's in the registers */ if (is_protected_kvm_enabled() && host_owns_fp_regs())
kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */ if (sve_guest)
__hyp_sve_restore_guest(vcpu); else
__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
/* Skip restoring fpexc32 for AArch64 guests */ if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
*host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
/* * Re-enable traps necessary for the current state of the guest, e.g. * those enabled by a guest hypervisor. The ERET to the guest will * provide the necessary context synchronization.
*/
__activate_cptr_traps(vcpu);
returntrue;
}
staticinlinebool handle_tx2_tvm(struct kvm_vcpu *vcpu)
{
u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); int rt = kvm_vcpu_sys_get_rt(vcpu);
u64 val = vcpu_get_reg(vcpu, rt);
/* * The normal sysreg handling code expects to see the traps, * let's not do anything here.
*/ if (vcpu->arch.hcr_el2 & HCR_TVM) returnfalse;
switch (sysreg) { case SYS_SCTLR_EL1:
write_sysreg_el1(val, SYS_SCTLR); break; case SYS_TTBR0_EL1:
write_sysreg_el1(val, SYS_TTBR0); break; case SYS_TTBR1_EL1:
write_sysreg_el1(val, SYS_TTBR1); break; case SYS_TCR_EL1:
write_sysreg_el1(val, SYS_TCR); break; case SYS_ESR_EL1:
write_sysreg_el1(val, SYS_ESR); break; case SYS_FAR_EL1:
write_sysreg_el1(val, SYS_FAR); break; case SYS_AFSR0_EL1:
write_sysreg_el1(val, SYS_AFSR0); break; case SYS_AFSR1_EL1:
write_sysreg_el1(val, SYS_AFSR1); break; case SYS_MAIR_EL1:
write_sysreg_el1(val, SYS_MAIR); break; case SYS_AMAIR_EL1:
write_sysreg_el1(val, SYS_AMAIR); break; case SYS_CONTEXTIDR_EL1:
write_sysreg_el1(val, SYS_CONTEXTIDR); break; default: returnfalse;
}
__kvm_skip_instr(vcpu); returntrue;
}
/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */ staticinline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt)
{
u64 offset = 0;
if (ctxt->offset.vm_offset)
offset += *kern_hyp_va(ctxt->offset.vm_offset); if (ctxt->offset.vcpu_offset)
offset += *kern_hyp_va(ctxt->offset.vcpu_offset);
/* * We only get here for 64bit guests, 32bit guests will hit * the long and winding road all the way to the standard * handling. Yes, it sucks to be irrelevant. * * Also, we only deal with non-hypervisor context here (either * an EL1 guest, or a non-HYP context of an EL2 guest).
*/ if (is_hyp_ctxt(vcpu)) returnfalse;
switch (sysreg) { case SYS_CNTPCT_EL0: case SYS_CNTPCTSS_EL0: if (vcpu_has_nv(vcpu)) { /* Check for guest hypervisor trapping */
val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); if (!vcpu_el2_e2h_is_set(vcpu))
val = (val & CNTHCTL_EL1PCTEN) << 10;
if (!(val & (CNTHCTL_EL1PCTEN << 10))) returnfalse;
}
ctxt = vcpu_ptimer(vcpu); break; case SYS_CNTVCT_EL0: case SYS_CNTVCTSS_EL0: if (vcpu_has_nv(vcpu)) { /* Check for guest hypervisor trapping */
val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
staticbool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
{
u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); int rt = kvm_vcpu_sys_get_rt(vcpu);
u64 val = vcpu_get_reg(vcpu, rt);
if (sysreg != SYS_TCR_EL1) returnfalse;
/* * Affected parts do not advertise support for hardware Access Flag / * Dirty state management in ID_AA64MMFR1_EL1.HAFDBS, but the underlying * control bits are still functional. The architecture requires these be * RES0 on systems that do not implement FEAT_HAFDBS. * * Uphold the requirements of the architecture by masking guest writes * to TCR_EL1.{HA,HD} here.
*/
val &= ~(TCR_HD | TCR_HA);
write_sysreg_el1(val, SYS_TCR);
__kvm_skip_instr(vcpu); returntrue;
}
/* * Allow the hypervisor to handle the exit with an exit handler if it has one. * * Returns true if the hypervisor handled the exit, and control should go back * to the guest, or false if it hasn't.
*/ staticinlinebool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code, const exit_handler_fn *handlers)
{
exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; if (fn) return fn(vcpu, exit_code);
returnfalse;
}
staticinlinevoid synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
{ /* * Check for the conditions of Cortex-A510's #2077057. When these occur * SPSR_EL2 can't be trusted, but isn't needed either as it is * unchanged from the value in vcpu_gp_regs(vcpu)->pstate. * Are we single-stepping the guest, and took a PAC exception from the * active-not-pending state?
*/ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
*vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
/* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the * main run loop.
*/ staticinlinebool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code, const exit_handler_fn *handlers)
{ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
/* * HVC already have an adjusted PC, which we need to * correct in order to return to after having injected * the SError. * * SMC, on the other hand, is *trapped*, meaning its * preferred return address is the SMC itself.
*/ if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64)
write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
}
/* * We're using the raw exception code in order to only process * the trap if no SError is pending. We will come back to the * same PC once the SError has been injected, and replay the * trapping instruction.
*/ if (*exit_code != ARM_EXCEPTION_TRAP) gotoexit;
/* Check if there's an exit handler and allow it to handle the exit. */ if (kvm_hyp_handle_exit(vcpu, exit_code, handlers)) goto guest; exit: /* Return to the host kernel and handle the exit */ returnfalse;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.