// SPDX-License-Identifier: GPL-2.0-only /* * Xen event channels * * Xen models interrupts with abstract event channels. Because each * domain gets 1024 event channels, but NR_IRQ is not that large, we * must dynamically map irqs<->event channels. The event channels * interface with the rest of the kernel by defining a xen interrupt * chip. When an event is received, it is mapped to an irq and sent * through the normal interrupt processing path. * * There are four kinds of events which can be mapped to an event * channel: * * 1. Inter-domain notifications. This includes all the virtual * device events, since they're driven by front-ends in another domain * (typically dom0). * 2. VIRQs, typically used for timers. These are per-cpu events. * 3. IPIs. * 4. PIRQs - Hardware interrupts. * * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
/* * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables.
*/ static DEFINE_MUTEX(irq_mapping_update_lock);
/* * We've prepared an empty row for the mapping. If a different * thread was faster inserting it, we can drop ours.
*/ if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
free_page((unsignedlong) evtchn_row);
}
staticvoid channels_on_cpu_inc(struct irq_info *info)
{ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) return;
if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
INT_MAX))) return;
info->is_accounted = 1;
}
staticvoid xen_irq_free_desc(unsignedint irq)
{ /* Legacy IRQ descriptors are managed by the arch. */ if (irq >= nr_legacy_irqs())
irq_free_desc(irq);
}
/** * notify_remote_via_irq - send event to remote end of event channel via irq * @irq: irq of event channel to send event to * * Unlike notify_remote_via_evtchn(), this is safe to use across * save/restore. Notifications on a broken connection are silently * dropped.
*/ void notify_remote_via_irq(int irq)
{
evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
notify_remote_via_evtchn(evtchn);
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
if (irq >= 0) {
info = xen_irq_init(irq); if (!info)
xen_irq_free_desc(irq);
}
return info;
}
staticstruct irq_info *xen_allocate_irq_gsi(unsignedint gsi)
{ int irq; struct irq_info *info;
/* * A PV guest has no concept of a GSI (since it has no ACPI * nor access to/knowledge of the physical APICs). Therefore * all IRQs are dynamically allocated from the entire IRQ * space.
*/ if (xen_pv_domain() && !xen_initial_domain()) return xen_allocate_irq_dynamic();
/* Legacy IRQ descriptors are already allocated by the arch. */ if (gsi < nr_legacy_irqs())
irq = gsi; else
irq = irq_alloc_desc_at(gsi, -1);
info = xen_irq_init(irq); if (!info)
xen_irq_free_desc(irq);
return info;
}
staticvoid xen_free_irq(struct irq_info *info)
{ if (WARN_ON(!info)) return;
if (!list_empty(&info->eoi_list))
lateeoi_list_del(info);
list_del(&info->list);
WARN_ON(info->refcnt > 0);
queue_rcu_work(system_wq, &info->rwork);
}
/* Not called for lateeoi events. */ staticvoid event_handler_exit(struct irq_info *info)
{
smp_store_release(&info->is_active, 0);
clear_evtchn(info->evtchn);
}
if (info->refcnt > 0) {
info->refcnt--; if (info->refcnt != 0) return;
}
evtchn = info->evtchn;
if (VALID_EVTCHN(evtchn)) { unsignedint cpu = info->cpu; struct xenbus_device *dev;
if (!info->is_static)
close_evtchn = true;
switch (info->type) { case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1; break; case IRQT_IPI:
per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0; break; case IRQT_EVTCHN:
dev = info->u.interdomain; if (dev)
atomic_dec(&dev->event_channels); break; default: break;
}
xen_irq_info_cleanup(info);
if (close_evtchn)
xen_evtchn_close(evtchn);
}
xen_free_irq(info);
}
/* * Do not make any assumptions regarding the relationship between the * IRQ number returned here and the Xen pirq argument. * * Note: We don't assign an event channel until the irq actually started * up. Return an existing irq if we've already got one for the gsi. * * Shareable implies level triggered, not shareable implies edge * triggered here.
*/ int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name)
{ struct irq_info *info; struct physdev_irq irq_op; int ret;
mutex_lock(&irq_mapping_update_lock);
ret = xen_irq_from_gsi(gsi); if (ret != -1) {
pr_info("%s: returning irq %d for gsi %u\n",
__func__, ret, gsi); goto out;
}
info = xen_allocate_irq_gsi(gsi); if (!info) goto out;
irq_op.irq = info->irq;
irq_op.vector = 0;
/* Only the privileged domain can do this. For non-priv, the pcifront * driver provides a PCI bus that does the call to do exactly
* this in the priv domain. */ if (xen_initial_domain() &&
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
xen_free_irq(info);
ret = -ENOSPC; goto out;
}
pirq_query_unmask(info); /* We try to use the handler with the appropriate semantic for the * type of interrupt: if the interrupt is an edge triggered * interrupt we use handle_edge_irq. * * On the other hand if the interrupt is level triggered we use * handle_fasteoi_irq like the native code does for this kind of * interrupts. * * Depending on the Xen version, pirq_needs_eoi might return true * not only for level triggered interrupts but for edge triggered * interrupts too. In any case Xen always honors the eoi mechanism, * not injecting any more pirqs of the same kind if the first one * hasn't received an eoi yet. Therefore using the fasteoi handler * is the right choice either way.
*/ if (shareable)
irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
handle_fasteoi_irq, name); else
irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
handle_edge_irq, name);
ret = info->irq;
out:
mutex_unlock(&irq_mapping_update_lock);
return ret;
}
#ifdef CONFIG_PCI_MSI int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
{ int rc; struct physdev_get_free_pirq op_get_free_pirq;
WARN_ONCE(rc == -ENOSYS, "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
return rc ? -1 : op_get_free_pirq.pirq;
}
int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, int pirq, int nvec, constchar *name, domid_t domid)
{ int i, irq, ret; struct irq_info *info;
for (i = 0; i < nvec; i++) {
info = xen_irq_init(irq + i); if (!info) {
ret = -ENOMEM; goto error_irq;
}
irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
i == 0 ? 0 : PIRQ_MSI_GROUP); if (ret < 0) goto error_irq;
}
ret = irq_set_msi_desc(irq, msidesc); if (ret < 0) goto error_irq;
out:
mutex_unlock(&irq_mapping_update_lock); return irq;
error_irq: while (nvec--) {
info = info_for_irq(irq + nvec);
__unbind_from_irq(info, irq + nvec);
}
mutex_unlock(&irq_mapping_update_lock); return ret;
} #endif
int xen_destroy_irq(int irq)
{ struct physdev_unmap_pirq unmap_irq; struct irq_info *info = info_for_irq(irq); int rc = -ENOENT;
mutex_lock(&irq_mapping_update_lock);
/* * If trying to remove a vector in a MSI group different * than the first one skip the PIRQ unmap unless this vector * is the first one in the group.
*/ if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
unmap_irq.pirq = info->u.pirq.pirq;
unmap_irq.domid = info->u.pirq.domid;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); /* If another domain quits without making the pci_disable_msix * call, the Xen hypervisor takes care of freeing the PIRQs * (free_domain_pirqs).
*/ if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
pr_info("domain %d does not have %d anymore\n",
info->u.pirq.domid, info->u.pirq.pirq); elseif (rc) {
pr_warn("unmap irq failed %d\n", rc); goto out;
}
}
ret = xen_irq_info_evtchn_setup(info, evtchn, dev); if (ret < 0) {
__unbind_from_irq(info, info->irq); goto out;
} /* * New interdomain events are initially bound to vCPU0 This * is required to setup the event channel in the first * place and also important for UP guests because the * affinity setting is not invoked on them so nothing would * bind the channel.
*/
bind_evtchn_to_cpu(info, 0, false);
} elseif (!WARN_ON(info->type != IRQT_EVTCHN)) { if (shared && !WARN_ON(info->refcnt < 0))
info->refcnt++;
}
ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); if (ret < 0) {
__unbind_from_irq(info, info->irq); goto out;
} /* * Force the affinity mask to the target CPU so proc shows * the correct target.
*/
bind_evtchn_to_cpu(info, cpu, true);
ret = info->irq;
} else {
info = info_for_irq(ret);
WARN_ON(info == NULL || info->type != IRQT_IPI);
}
/** * xen_evtchn_nr_channels - number of usable event channel ports * * This may be less than the maximum supported by the current * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum * supported.
*/ unsigned xen_evtchn_nr_channels(void)
{ return evtchn_ops->nr_channels();
}
EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
int bind_virq_to_irq(unsignedint virq, unsignedint cpu, bool percpu)
{ struct evtchn_bind_virq bind_virq;
evtchn_port_t evtchn = 0; struct irq_info *info; int ret;
mutex_lock(&irq_mapping_update_lock);
ret = per_cpu(virq_to_irq, cpu)[virq];
if (ret == -1) {
info = xen_allocate_irq_dynamic(); if (!info) goto out;
if (WARN_ON(!info)) return;
free_irq(irq, dev_id);
unbind_from_irq(irq);
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
/** * xen_set_irq_priority() - set an event channel priority. * @irq:irq bound to an event channel. * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
*/ int xen_set_irq_priority(unsigned irq, unsigned priority)
{ struct evtchn_set_priority set_priority;
/* * Check for timeout every 256 events. * We are setting the timeout value only after the first 256 * events in order to not hurt the common case of few loop * iterations. The 256 is basically an arbitrary value. * * In case we are hitting the timeout we need to defer all further * EOIs in order to ensure to leave the event handling loop rather * sooner than later.
*/ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
ktime_t kt = ktime_get();
int xen_evtchn_do_upcall(void)
{ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; int cpu = smp_processor_id(); struct evtchn_loop_ctrl ctrl = { 0 };
/* * When closing an event channel the associated IRQ must not be freed * until all cpus have left the event handling loop. This is ensured * by taking the rcu_read_lock() while handling events, as freeing of * the IRQ is handled via queue_rcu_work() _after_ closing the event * channel.
*/
rcu_read_lock();
do {
vcpu_info->evtchn_upcall_pending = 0;
xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
virt_rmb(); /* Hypervisor can set upcall pending. */
} while (vcpu_info->evtchn_upcall_pending);
rcu_read_unlock();
/* * Increment irq_epoch only now to defer EOIs only for * xen_irq_lateeoi() invocations occurring from inside the loop * above.
*/
__this_cpu_inc(irq_epoch);
/* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
{ struct irq_info *info = info_for_irq(irq);
if (WARN_ON(!info)) return;
/* Make sure the irq is masked, since the new event channel
will also be masked. */
disable_irq(irq);
mutex_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
BUG_ON(evtchn_to_info(evtchn)); /* Expect irq to have been bound before,
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
/* Unmask the event channel. */
enable_irq(irq);
}
/* Rebind an evtchn so that it gets delivered to a specific cpu */ staticint xen_rebind_evtchn_to_cpu(struct irq_info *info, unsignedint tcpu)
{ struct evtchn_bind_vcpu bind_vcpu;
evtchn_port_t evtchn = info ? info->evtchn : 0;
if (!VALID_EVTCHN(evtchn)) return -1;
if (!xen_support_evtchn_rebind()) return -1;
/* Send future instances of this interrupt to other vcpu. */
bind_vcpu.port = evtchn;
bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
/* * Mask the event while changing the VCPU binding to prevent * it being delivered on an unexpected VCPU.
*/
do_mask(info, EVT_MASK_REASON_TEMPORARY);
/* * If this fails, it usually just indicates that we're dealing with a * virq or IPI channel, which don't actually need to be rebound. Ignore * it, but don't do the xenlinux-level rebind in that case.
*/ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) { int old_cpu = info->cpu;
bind_evtchn_to_cpu(info, tcpu, false);
if (info->type == IRQT_VIRQ) { int virq = info->u.virq; int irq = per_cpu(virq_to_irq, old_cpu)[virq];
/* * Find the CPU within @dest mask which has the least number of channels * assigned. This is not precise as the per cpu counts can be modified * concurrently.
*/ staticunsignedint select_target_cpu(conststruct cpumask *dest)
{ unsignedint cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
/* * Catch the unlikely case that dest contains no online CPUs. Can't * recurse.
*/ if (best_cpu == UINT_MAX) return select_target_cpu(cpu_online_mask);
if (VALID_EVTCHN(evtchn)) {
do_mask(info, EVT_MASK_REASON_EOI_PENDING); /* * Don't call event_handler_exit(). * Need to keep is_active non-zero in order to ignore re-raised * events after cpu affinity changes while a lateeoi is pending.
*/
clear_evtchn(evtchn);
}
}
for (virq = 0; virq < NR_VIRQS; virq++) { if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue;
info = info_for_irq(irq);
BUG_ON(virq_from_irq(info) != virq);
/* Get a new binding from Xen. */
bind_virq.virq = virq;
bind_virq.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq) != 0)
BUG();
evtchn = bind_virq.port;
/* Record the new mapping. */
xen_irq_info_virq_setup(info, cpu, evtchn, virq); /* The affinity mask is still valid */
bind_evtchn_to_cpu(info, cpu, false);
}
}
for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) continue;
info = info_for_irq(irq);
BUG_ON(ipi_from_irq(info) != ipi);
/* Get a new binding from Xen. */
bind_ipi.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi) != 0)
BUG();
evtchn = bind_ipi.port;
/* Record the new mapping. */
xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); /* The affinity mask is still valid */
bind_evtchn_to_cpu(info, cpu, false);
}
}
/* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq)
{ struct irq_info *info = info_for_irq(irq);
evtchn_port_t evtchn = info ? info->evtchn : 0;
if (VALID_EVTCHN(evtchn))
event_handler_exit(info);
}
EXPORT_SYMBOL(xen_clear_irq_pending);
if (VALID_EVTCHN(evtchn))
ret = test_evtchn(evtchn);
return ret;
}
/* Poll waiting for an irq to become pending with timeout. In the usual case,
* the irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq_timeout(int irq, u64 timeout)
{
evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn)) { struct sched_poll poll;
if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
BUG();
}
}
EXPORT_SYMBOL(xen_poll_irq_timeout); /* Poll waiting for an irq to become pending. In the usual case, the
* irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq)
{
xen_poll_irq_timeout(irq, 0 /* no timeout */);
}
/* Check whether the IRQ line is shared with other guests. */ int xen_test_irq_shared(int irq)
{ struct irq_info *info = info_for_irq(irq); struct physdev_irq_status_query irq_status;
staticstruct irq_chip xen_lateeoi_chip __read_mostly = { /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
.name = "xen-dyn-lateeoi",
#ifdef CONFIG_X86 #ifdef CONFIG_XEN_PVHVM /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any
* vcpu and we don't need PCI support or APIC interactions. */ void xen_setup_callback_vector(void)
{
uint64_t callback_via;
if (xen_have_vector_callback) {
callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); if (xen_set_callback_via(callback_via)) {
pr_err("Request for Xen HVM callback vector failed\n");
xen_have_vector_callback = false;
}
}
}
/* * Setup per-vCPU vector-type callbacks. If this setup is unavailable, * fallback to the global vector-type callback.
*/ static __init void xen_init_setup_upcall_vector(void)
{ if (!xen_have_vector_callback) return;
/* No event channels are 'live' right now. */ for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
mask_evtchn(evtchn);
pirq_needs_eoi = pirq_needs_eoi_flag;
#ifdef CONFIG_X86 if (xen_pv_domain()) { if (xen_initial_domain())
pci_xen_initial_domain();
}
xen_init_setup_upcall_vector();
xen_alloc_callback_vector();
if (xen_hvm_domain()) {
native_init_IRQ(); /* pci_xen_hvm_init must be called after native_init_IRQ so that
* __acpi_register_gsi can point at the right function */
pci_xen_hvm_init();
} else { int rc; struct physdev_pirq_eoi_gmfn eoi_gmfn;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.