rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); if (rc) {
pr_err("Failed to get VP info from OPAL: %d\n", rc); goto bail;
}
if (!kvmppc_xive_check_save_restore(vcpu)) {
pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
rc = -EIO; goto bail;
}
/* * Enable the VP first as the single escalation mode will * affect escalation interrupts numbering
*/
rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); if (rc) {
pr_err("Failed to enable VP in OPAL: %d\n", rc); goto bail;
}
/* Configure VCPU fields for use by assembly push/pull */
vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
/* TODO: reset all queues to a clean state ? */
bail:
mutex_unlock(&xive->lock); if (rc)
kvmppc_xive_native_cleanup_vcpu(vcpu);
/* * Clear the ESB pages of the IRQ number being mapped (or * unmapped) into the guest and let the VM fault handler * repopulate with the appropriate ESB pages (device or IC)
*/
pr_debug("clearing esb pages for girq 0x%lx\n", irq);
mutex_lock(&xive->mapping_lock); if (xive->mapping)
unmap_mapping_range(xive->mapping,
esb_pgoff << PAGE_SHIFT,
2ull << PAGE_SHIFT, 1);
mutex_unlock(&xive->mapping_lock); return 0;
}
/* * Grab the KVM device file address_space to be able to clear * the ESB pages mapping when a device is passed-through into * the guest.
*/
xive->mapping = vma->vm_file->f_mapping; return 0;
}
if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) return -E2BIG;
sb = kvmppc_xive_find_source(xive, irq, &idx); if (!sb) {
pr_debug("No source, creating source block...\n");
sb = kvmppc_xive_create_src_block(xive, irq); if (!sb) {
pr_err("Failed to create block...\n"); return -ENOMEM;
}
}
state = &sb->irq_state[idx];
if (get_user(val, ubufp)) {
pr_err("fault getting user info !\n"); return -EFAULT;
}
arch_spin_lock(&sb->lock);
/* * If the source doesn't already have an IPI, allocate * one and get the corresponding data
*/ if (!state->ipi_number) {
state->ipi_number = xive_native_alloc_irq(); if (state->ipi_number == 0) {
pr_err("Failed to allocate IRQ !\n");
rc = -ENXIO; goto unlock;
}
xive_native_populate_irq_data(state->ipi_number,
&state->ipi_data);
pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
state->ipi_number, irq);
}
/* Restore LSI state */ if (val & KVM_XIVE_LEVEL_SENSITIVE) {
state->lsi = true; if (val & KVM_XIVE_LEVEL_ASSERTED)
state->asserted = true;
pr_devel(" LSI ! Asserted=%d\n", state->asserted);
}
/* Mask IRQ to start with */
state->act_server = 0;
state->act_priority = MASKED;
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
/* Increment the number of valid sources and mark this one valid */ if (!state->valid)
xive->src_count++;
state->valid = true;
staticint xive_native_validate_queue_size(u32 qshift)
{ /* * We only support 64K pages for the moment. This is also * advertised in the DT property "ibm,xive-eq-sizes"
*/ switch (qshift) { case 0: /* EQ reset */ case 16: return 0; case 12: case 21: case 24: default: return -EINVAL;
}
}
/* reset queue and disable queueing */ if (!kvm_eq.qshift) {
q->guest_qaddr = 0;
q->guest_qshift = 0;
rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
NULL, 0, true); if (rc) {
pr_err("Failed to reset queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc); return rc;
}
return 0;
}
/* * sPAPR specifies a "Unconditional Notify (n) flag" for the * H_INT_SET_QUEUE_CONFIG hcall which forces notification * without using the coalescing mechanisms provided by the * XIVE END ESBs. This is required on KVM as notification * using the END ESBs is not supported.
*/ if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
pr_err("invalid flags %d\n", kvm_eq.flags); return -EINVAL;
}
/* * Backup the queue page guest address to the mark EQ page * dirty for migration.
*/
q->guest_qaddr = kvm_eq.qaddr;
q->guest_qshift = kvm_eq.qshift;
/* * Unconditional Notification is forced by default at the * OPAL level because the use of END ESBs is not supported by * Linux.
*/
rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
(__be32 *) qaddr, kvm_eq.qshift, true); if (rc) {
pr_err("Failed to configure queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
put_page(page); return rc;
}
/* * Only restore the queue state when needed. When doing the * H_INT_SET_SOURCE_CONFIG hcall, it should not.
*/ if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
rc = xive_native_set_queue_state(xc->vp_id, priority,
kvm_eq.qtoggle,
kvm_eq.qindex); if (rc) goto error;
}
/* * The struct kvmppc_xive_irq_state reflects the state * of the EAS configuration and not the state of the * source. The source is masked setting the PQ bits to * '-Q', which is what is being done before calling * the KVM_DEV_XIVE_EQ_SYNC control. * * If a source EAS is configured, OPAL syncs the XIVE * IC of the source and the XIVE IC of the previous * target if any. * * So it should be fine ignoring MASKED sources as * they have been synced already.
*/ if (state->act_priority == MASKED) continue;
staticint kvmppc_xive_native_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{ switch (attr->group) { case KVM_DEV_XIVE_GRP_CTRL: switch (attr->attr) { case KVM_DEV_XIVE_RESET: case KVM_DEV_XIVE_EQ_SYNC: case KVM_DEV_XIVE_NR_SERVERS: return 0;
} break; case KVM_DEV_XIVE_GRP_SOURCE: case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: case KVM_DEV_XIVE_GRP_SOURCE_SYNC: if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
attr->attr < KVMPPC_XIVE_NR_IRQS) return 0; break; case KVM_DEV_XIVE_GRP_EQ_CONFIG: return 0;
} return -ENXIO;
}
/* * Called when device fd is closed. kvm->lock is held.
*/ staticvoid kvmppc_xive_native_release(struct kvm_device *dev)
{ struct kvmppc_xive *xive = dev->private; struct kvm *kvm = xive->kvm; struct kvm_vcpu *vcpu; unsignedlong i;
pr_devel("Releasing xive native device\n");
/* * Clear the KVM device file address_space which is used to * unmap the ESB pages when a device is passed-through.
*/
mutex_lock(&xive->mapping_lock);
xive->mapping = NULL;
mutex_unlock(&xive->mapping_lock);
/* * Since this is the device release function, we know that * userspace does not have any open fd or mmap referring to * the device. Therefore there can not be any of the * device attribute set/get, mmap, or page fault functions * being executed concurrently, and similarly, the * connect_vcpu and set/clr_mapped functions also cannot * be being executed.
*/
debugfs_remove(xive->dentry);
/* * We should clean up the vCPU interrupt presenters first.
*/
kvm_for_each_vcpu(i, vcpu, kvm) { /* * Take vcpu->mutex to ensure that no one_reg get/set ioctl * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. * Holding the vcpu->mutex also means that the vcpu cannot * be executing the KVM_RUN ioctl, and therefore it cannot * be executing the XIVE push or pull code or accessing * the XIVE MMIO regions.
*/
mutex_lock(&vcpu->mutex);
kvmppc_xive_native_cleanup_vcpu(vcpu);
mutex_unlock(&vcpu->mutex);
}
/* * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe * against xive code getting called during vcpu execution or * set/get one_reg operations.
*/
kvm->arch.xive = NULL;
for (i = 0; i <= xive->max_sbid; i++) { if (xive->src_blocks[i])
kvmppc_xive_free_sources(xive->src_blocks[i]);
kfree(xive->src_blocks[i]);
xive->src_blocks[i] = NULL;
}
if (xive->vp_base != XIVE_INVALID_VP)
xive_native_free_vp_block(xive->vp_base);
/* * A reference of the kvmppc_xive pointer is now kept under * the xive_devices struct of the machine for reuse. It is * freed when the VM is destroyed for now until we fix all the * execution paths.
*/
/* VP allocation is delayed to the first call to connect_vcpu */
xive->vp_base = XIVE_INVALID_VP; /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets * on a POWER9 system.
*/
xive->nr_servers = KVM_MAX_VCPUS;
if (xive_native_has_single_escalation())
xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
if (xive_native_has_save_restore())
xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
{ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
u64 opal_state; int rc;
if (!kvmppc_xive_enabled(vcpu)) return -EPERM;
if (!xc) return -ENOENT;
/* Thread context registers. We only care about IPB and CPPR */
val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
/* Get the VP state from OPAL */
rc = xive_native_get_vp_state(xc->vp_id, &opal_state); if (rc) return rc;
/* * Capture the backup of IPB register in the NVT structure and * merge it in our KVM VP state.
*/
val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
/* We can't update the state of a "pushed" VCPU */ if (WARN_ON(vcpu->arch.xive_pushed)) return -EBUSY;
/* * Restore the thread context registers. IPB and CPPR should * be the only ones that matter.
*/
vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
/* * There is no need to restore the XIVE internal state (IPB * stored in the NVT) as the IPB register was merged in KVM VP * state when captured.
*/ return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.