/* * To use atomic bitmap functions, we have to provide a bitmap address * that is u64 aligned. However, the ipm might be u32 aligned. * Therefore, we logically start the bitmap at the very beginning of the * struct and fixup the bit number.
*/ #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
/** * gisa_set_iam - change the GISA interruption alert mask * * @gisa: gisa to operate on * @iam: new IAM value to use * * Change the IAM atomically with the next alert address and the IPM * of the GISA if the GISA is not part of the GIB alert list. All three * fields are located in the first long word of the GISA. * * Returns: 0 on success * -EBUSY in case the gisa is part of the alert list
*/ staticinlineint gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
{
u64 word, _word;
word = READ_ONCE(gisa->u64.word[0]); do { if ((u64)gisa != word >> 32) return -EBUSY;
_word = (word & ~0xffUL) | iam;
} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
return 0;
}
/** * gisa_clear_ipm - clear the GISA interruption pending mask * * @gisa: gisa to operate on * * Clear the IPM atomically with the next alert address and the IAM * of the GISA unconditionally. All three fields are located in the * first long word of the GISA.
*/ staticinlinevoid gisa_clear_ipm(struct kvm_s390_gisa *gisa)
{
u64 word, _word;
word = READ_ONCE(gisa->u64.word[0]); do {
_word = word & ~(0xffUL << 24);
} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
}
/** * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM * * @gi: gisa interrupt struct to work on * * Atomically restores the interruption alert mask if none of the * relevant ISCs are pending and return the IPM. * * Returns: the relevant pending ISCs
*/ staticinline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
{
u8 pending_mask, alert_mask;
u64 word, _word;
word = READ_ONCE(gi->origin->u64.word[0]); do {
alert_mask = READ_ONCE(gi->alert.mask);
pending_mask = (u8)(word >> 24) & alert_mask; if (pending_mask) return pending_mask;
_word = (word & ~0xffUL) | alert_mask;
} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
active_mask = pending_irqs(vcpu); if (!active_mask) return 0;
if (psw_extint_disabled(vcpu))
active_mask &= ~IRQ_PEND_EXT_MASK; if (psw_ioint_disabled(vcpu))
active_mask &= ~IRQ_PEND_IO_MASK; else
active_mask = disable_iscs(vcpu, active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_EMERGENCY_SIGNAL_SUBMASK))
__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
__clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
} if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK; /* PV guest cpus can have a single interruption injected at a time. */ if (kvm_s390_pv_cpu_get_handle(vcpu) &&
vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
active_mask &= ~(IRQ_PEND_EXT_II_MASK |
IRQ_PEND_IO_MASK |
IRQ_PEND_MCHK_MASK); /* * Check both floating and local interrupt's cr14 because * bit IRQ_PEND_MCHK_REP could be set in both cases.
*/ if (!(vcpu->arch.sie_block->gcr[14] &
(vcpu->kvm->arch.float_int.mchk.cr14 |
vcpu->arch.local_int.irq.mchk.cr14)))
__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
/* * STOP irqs will never be actively delivered. They are triggered via * intercept requests and cleared when the stop intercept is performed.
*/
__clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
staticint __write_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk)
{ unsignedlong ext_sa_addr; unsignedlong lc;
freg_t fprs[NUM_FPRS]; union mci mci; int rc;
/* * All other possible payload for a machine check (e.g. the register * contents in the save area) will be handled by the ultravisor, as * the hypervisor does not not have the needed information for * protected guests.
*/ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
vcpu->arch.sie_block->mcic = mchk->mcic;
vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
vcpu->arch.sie_block->edc = mchk->ext_damage_code; return 0;
}
mci.val = mchk->mcic; /* take care of lazy register loading */
kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs); if (cpu_has_gs() && vcpu->arch.gs_enabled)
save_gs_cb(current->thread.gs_cb);
/* Extended save area */
rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, sizeof(unsignedlong)); /* Only bits 0 through 63-LC are used for address formation */
lc = ext_sa_addr & MCESA_LC_MASK; if (test_kvm_facility(vcpu->kvm, 133)) { switch (lc) { case 0: case 10:
ext_sa_addr &= ~0x3ffUL; break; case 11:
ext_sa_addr &= ~0x7ffUL; break; case 12:
ext_sa_addr &= ~0xfffUL; break; default:
ext_sa_addr = 0; break;
}
} else {
ext_sa_addr &= ~0x3ffUL;
}
spin_lock(&fi->lock);
spin_lock(&li->lock); if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) { /* * If there was an exigent machine check pending, then any * repressible machine checks that might have been pending * are indicated along with it, so always clear bits for * repressible and exigent interrupts
*/
mchk = li->irq.mchk;
clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
memset(&li->irq.mchk, 0, sizeof(mchk));
deliver = 1;
} /* * We indicate floating repressible conditions along with * other pending conditions. Channel Report Pending and Channel * Subsystem damage are the only two and are indicated by * bits in mcic and masked in cr14.
*/ if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
mchk.mcic |= fi->mchk.mcic;
mchk.cr14 |= fi->mchk.cr14;
memset(&fi->mchk, 0, sizeof(mchk));
deliver = 1;
}
spin_unlock(&li->lock);
spin_unlock(&fi->lock);
/* PER is handled by the ultravisor */ if (kvm_s390_pv_cpu_is_protected(vcpu)) return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
switch (pgm_info.code & ~PGM_PER) { case PGM_AFX_TRANSLATION: case PGM_ASX_TRANSLATION: case PGM_EX_TRANSLATION: case PGM_LFX_TRANSLATION: case PGM_LSTE_SEQUENCE: case PGM_LSX_TRANSLATION: case PGM_LX_TRANSLATION: case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY:
nullifying = true;
fallthrough; case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE); break; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: case PGM_ASTE_INSTANCE: case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY:
rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
nullifying = true; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
(u8 *)__LC_OP_ACCESS_ID);
nullifying = true; break; case PGM_MONITOR:
rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
(u16 *)__LC_MON_CLASS_NR);
rc |= put_guest_lc(vcpu, pgm_info.mon_code,
(u64 *)__LC_MON_CODE); break; case PGM_VECTOR_PROCESSING: case PGM_DATA:
rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
(u32 *)__LC_DATA_EXC_CODE); break; case PGM_PROTECTION:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID); break; case PGM_STACK_FULL: case PGM_STACK_EMPTY: case PGM_STACK_SPECIFICATION: case PGM_STACK_TYPE: case PGM_STACK_OPERATION: case PGM_TRACE_TABEL: case PGM_CRYPTO_OPERATION:
nullifying = true; break;
}
/* * The VCPU might not be sleeping but rather executing VSIE. Let's * kick it, so it leaves the SIE to process the request.
*/
kvm_s390_vsie_kick(vcpu);
}
/* * If the monotonic clock runs faster than the tod clock we might be * woken up too early and have to go back to sleep to avoid deadlocks.
*/ if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) return HRTIMER_RESTART;
kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART;
}
/* pending ckc conditions might have been invalidated */
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); if (ckc_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
/* pending cpu timer conditions might have been invalidated */
clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); if (cpu_timer_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
while ((irqs = deliverable_irqs(vcpu)) && !rc) { /* bits are in the reverse order of interrupt priority */
irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); switch (irq_type) { case IRQ_PEND_IO_ISC_0: case IRQ_PEND_IO_ISC_1: case IRQ_PEND_IO_ISC_2: case IRQ_PEND_IO_ISC_3: case IRQ_PEND_IO_ISC_4: case IRQ_PEND_IO_ISC_5: case IRQ_PEND_IO_ISC_6: case IRQ_PEND_IO_ISC_7:
rc = __deliver_io(vcpu, irq_type); break; case IRQ_PEND_MCHK_EX: case IRQ_PEND_MCHK_REP:
rc = __deliver_machine_check(vcpu); break; case IRQ_PEND_PROG:
rc = __deliver_prog(vcpu); break; case IRQ_PEND_EXT_EMERGENCY:
rc = __deliver_emergency_signal(vcpu); break; case IRQ_PEND_EXT_EXTERNAL:
rc = __deliver_external_call(vcpu); break; case IRQ_PEND_EXT_CLOCK_COMP:
rc = __deliver_ckc(vcpu); break; case IRQ_PEND_EXT_CPU_TIMER:
rc = __deliver_cpu_timer(vcpu); break; case IRQ_PEND_RESTART:
rc = __deliver_restart(vcpu); break; case IRQ_PEND_SET_PREFIX:
rc = __deliver_set_prefix(vcpu); break; case IRQ_PEND_PFAULT_INIT:
rc = __deliver_pfault_init(vcpu); break; case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu); break; case IRQ_PEND_EXT_SERVICE_EV:
rc = __deliver_service_ev(vcpu); break; case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu); break; case IRQ_PEND_VIRTIO:
rc = __deliver_virtio(vcpu); break; default:
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
clear_bit(irq_type, &li->pending_irqs);
}
delivered |= !rc;
}
/* * We delivered at least one interrupt and modified the PC. Force a * singlestep event now.
*/ if (delivered && guestdbg_sstep_enabled(vcpu)) { struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { /* auto detection if no valid ILC was given */
irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
}
/* * Because repressible machine checks can be indicated along with * exigent machine checks (PoP, Chapter 11, Interruption action) * we need to combine cr14, mcic and external damage code. * Failing storage address and the logout area should not be or'ed * together, we just indicate the last occurrence of the corresponding * machine check
*/
mchk->cr14 |= irq->u.mchk.cr14;
mchk->mcic |= irq->u.mchk.mcic;
mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout, sizeof(mchk->fixed_logout)); if (mchk->mcic & MCHK_EX_MASK)
set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); elseif (mchk->mcic & MCHK_REP_MASK)
set_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); return 0;
}
/* * Dequeue and return an I/O interrupt matching any of the interruption * subclasses as designated by the isc mask in cr6 and the schid (if != 0). * Take into account the interrupts pending in the interrupt list and in GISA. * * Note that for a guest that does not enable I/O interrupts * but relies on TPI, a flood of classic interrupts may starve * out adapter interrupts on the same isc. Linux does not do * that, and it is possible to work around the issue by configuring * different iscs for classic and adapter interrupts in the guest, * but we may want to revisit this in the future.
*/ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 isc_mask, u32 schid)
{ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti, *tmp_inti; int isc;
inti = get_top_io_int(kvm, isc_mask, schid);
isc = get_top_gisa_isc(kvm, isc_mask, schid); if (isc < 0) /* no AI in GISA */ goto out;
if (!inti) /* AI in GISA but no classical IO int */ goto gisa_out;
/* both types of interrupts present */ if (int_word_to_isc(inti->io.io_int_word) <= isc) { /* classical IO int with higher priority */
gisa_set_ipm_gisc(gi->origin, isc); goto out;
}
gisa_out:
tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (tmp_inti) {
tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
tmp_inti->io.io_int_word = isc_to_int_word(isc); if (inti)
kvm_s390_reinject_io_int(kvm, inti);
inti = tmp_inti;
} else
gisa_set_ipm_gisc(gi->origin, isc);
out: return inti;
}
/* We always allow events, track them separately from the sccb ints */ if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
/* * Early versions of the QEMU s390 bios will inject several * service interrupts after another without handling a * condition code indicating busy. * We will silently ignore those superfluous sccb values. * A future version of QEMU will take care of serialization * of servc requests
*/ if (fi->srv_signal.ext_params & SCCB_MASK) goto out;
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
out:
spin_unlock(&fi->lock);
kfree(inti); return 0;
}
/* * We do not use the lock checking variant as this is just a * performance optimization and we do not hold the lock here. * This is ok as the code will pick interrupts from both "lists" * for delivery.
*/ if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti); return 0;
}
fi = &kvm->arch.float_int;
spin_lock(&fi->lock); if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
spin_unlock(&fi->lock); return -EBUSY;
}
fi->counters[FIRQ_CNTR_IO] += 1;
/* * Find a destination VCPU for a floating irq and kick it.
*/ staticvoid __floating_irq_kick(struct kvm *kvm, u64 type)
{ struct kvm_vcpu *dst_vcpu; int sigcpu, online_vcpus, nr_tries = 0;
online_vcpus = atomic_read(&kvm->online_vcpus); if (!online_vcpus) return;
/* find idle VCPUs first, then round robin */
sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); if (sigcpu == online_vcpus) { do {
sigcpu = kvm->arch.float_int.next_rr_cpu++;
kvm->arch.float_int.next_rr_cpu %= online_vcpus; /* avoid endless loops if all vcpus are stopped */ if (nr_tries++ >= online_vcpus) return;
} while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
}
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
/* make the VCPU drop out of the SIE, or wake it up if sleeping */ switch (type) { case KVM_S390_MCHK:
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (!(type & KVM_S390_INT_IO_AI_MASK &&
kvm->arch.gisa_int.origin) ||
kvm_s390_pv_cpu_get_handle(dst_vcpu))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default:
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); break;
}
kvm_s390_vcpu_wakeup(dst_vcpu);
}
staticint __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
{
u64 type = READ_ONCE(inti->type); int rc;
switch (type) { case KVM_S390_MCHK:
rc = __inject_float_mchk(kvm, inti); break; case KVM_S390_INT_VIRTIO:
rc = __inject_virtio(kvm, inti); break; case KVM_S390_INT_SERVICE:
rc = __inject_service(kvm, inti); break; case KVM_S390_INT_PFAULT_DONE:
rc = __inject_pfault_done(kvm, inti); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
rc = __inject_io(kvm, inti); break; default:
rc = -EINVAL;
} if (rc) return rc;
__floating_irq_kick(kvm, type); return 0;
}
int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int)
{ struct kvm_s390_interrupt_info *inti; int rc;
inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM;
mutex_lock(&kvm->lock); if (!kvm_s390_pv_is_protected(kvm))
fi->masked_irqs = 0;
mutex_unlock(&kvm->lock);
spin_lock(&fi->lock);
fi->pending_irqs = 0;
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
memset(&fi->mchk, 0, sizeof(fi->mchk)); for (i = 0; i < FIRQ_LIST_COUNT; i++)
clear_irq_list(&fi->lists[i]); for (i = 0; i < FIRQ_MAX_COUNT; i++)
fi->counters[i] = 0;
spin_unlock(&fi->lock);
kvm_s390_gisa_clear(kvm);
};
staticint get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
{ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; struct kvm_s390_irq *buf; struct kvm_s390_irq *irq; int max_irqs; int ret = 0; int n = 0; int i;
if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) return -EINVAL;
/* * We are already using -ENOMEM to signal * userspace it may retry with a bigger buffer, * so we need to use something else for this case
*/
buf = vzalloc(len); if (!buf) return -ENOBUFS;
max_irqs = len / sizeof(struct kvm_s390_irq);
if (gi->origin && gisa_get_ipm(gi->origin)) { for (i = 0; i <= MAX_ISC; i++) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out_nolock;
} if (gisa_tac_ipm_gisc(gi->origin, i)) {
irq = (struct kvm_s390_irq *) &buf[n];
irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
irq->u.io.io_int_word = isc_to_int_word(i);
n++;
}
}
}
fi = &kvm->arch.float_int;
spin_lock(&fi->lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) {
list_for_each_entry(inti, &fi->lists[i], list) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out;
}
inti_to_irq(inti, &buf[n]);
n++;
}
} if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out;
}
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.24 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.