* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. * Author: Timur Tabi <timur@freescale.com> * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. * * The Freescale hypervisor management driver provides several services to * drivers and applications related to the Freescale hypervisor: * * 1. An ioctl interface for querying and managing partitions. * * 2. A file interface to reading incoming doorbells. * * 3. An interrupt handler for shutting down the partition upon receiving the * shutdown doorbell from a manager partition. * * 4. A kernel interface for receiving callbacks when a managed partition * shuts down.
*/
/* Get the parameters from the user */ if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_stop))) return -EFAULT;
param.ret = fh_partition_stop(param.partition);
if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) return -EFAULT;
return 0;
}
/* * Ioctl interface for FSL_HV_IOCTL_MEMCPY * * The FH_MEMCPY hypercall takes an array of address/address/size structures * to represent the data being copied. As a convenience to the user, this * ioctl takes a user-create buffer and a pointer to a guest physically * contiguous buffer in the remote partition, and creates the * address/address/size array for the hypercall.
*/ staticlong ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
{ struct fsl_hv_ioctl_memcpy param;
unsignedint num_pages; unsignedlong lb_offset; /* Offset within a page of the local buffer */
unsignedint i; long ret = 0; int num_pinned = 0; /* return value from get_user_pages_fast() */
phys_addr_t remote_paddr; /* The next address in the remote buffer */
uint32_t count; /* The number of bytes left to copy */
/* Get the parameters from the user */ if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_memcpy))) return -EFAULT;
/* * One partition must be local, the other must be remote. In other * words, if source and target are both -1, or are both not -1, then * return an error.
*/ if ((param.source == -1) == (param.target == -1)) return -EINVAL;
/* * The array of pages returned by get_user_pages_fast() covers only * page-aligned memory. Since the user buffer is probably not * page-aligned, we need to handle the discrepancy. * * We calculate the offset within a page of the S/G list, and make * adjustments accordingly. This will result in a page list that looks * like this: * * ---- <-- first page starts before the buffer * | | * |////|-> ---- * |////| | | * ---- | | * | | * ---- | | * |////| | | * |////| | | * |////| | | * ---- | | * | | * ---- | | * |////| | | * |////| | | * |////| | | * ---- | | * | | * ---- | | * |////| | | * |////|-> ---- * | | <-- last page ends after the buffer * ---- * * The distance between the start of the first page and the start of the * buffer is lb_offset. The hashed (///) areas are the parts of the * page list that contain the actual buffer. * * The advantage of this approach is that the number of pages is * equal to the number of entries in the S/G list that we give to the * hypervisor.
*/
lb_offset = param.local_vaddr & (PAGE_SIZE - 1); if (param.count == 0 ||
param.count > U64_MAX - lb_offset - PAGE_SIZE + 1) return -EINVAL;
num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* Allocate the buffers we need */
/* * 'pages' is an array of struct page pointers that's initialized by * get_user_pages_fast().
*/
pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) {
pr_debug("fsl-hv: could not allocate page list\n"); return -ENOMEM;
}
/* * sg_list is the list of fh_sg_list objects that we pass to the * hypervisor.
*/
sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) + sizeof(struct fh_sg_list) - 1, GFP_KERNEL); if (!sg_list_unaligned) {
pr_debug("fsl-hv: could not allocate S/G list\n");
ret = -ENOMEM; goto free_pages;
}
sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list));
/* Get the physical addresses of the source buffer */
num_pinned = get_user_pages_fast(param.local_vaddr - lb_offset,
num_pages, param.source != -1 ? FOLL_WRITE : 0, pages);
if (num_pinned != num_pages) {
pr_debug("fsl-hv: could not lock source buffer\n");
ret = (num_pinned < 0) ? num_pinned : -EFAULT; gotoexit;
}
/* * Build the fh_sg_list[] array. The first page is special * because it's misaligned.
*/ if (param.source == -1) {
sg_list[0].source = page_to_phys(pages[0]) + lb_offset;
sg_list[0].target = param.remote_paddr;
} else {
sg_list[0].source = param.remote_paddr;
sg_list[0].target = page_to_phys(pages[0]) + lb_offset;
}
sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset);
/* * Ioctl main entry point
*/ staticlong fsl_hv_ioctl(struct file *file, unsignedint cmd, unsignedlong argaddr)
{ void __user *arg = (void __user *)argaddr; long ret;
switch (cmd) { case FSL_HV_IOCTL_PARTITION_RESTART:
ret = ioctl_restart(arg); break; case FSL_HV_IOCTL_PARTITION_GET_STATUS:
ret = ioctl_status(arg); break; case FSL_HV_IOCTL_PARTITION_START:
ret = ioctl_start(arg); break; case FSL_HV_IOCTL_PARTITION_STOP:
ret = ioctl_stop(arg); break; case FSL_HV_IOCTL_MEMCPY:
ret = ioctl_memcpy(arg); break; case FSL_HV_IOCTL_DOORBELL:
ret = ioctl_doorbell(arg); break; case FSL_HV_IOCTL_GETPROP:
ret = ioctl_dtprop(arg, 0); break; case FSL_HV_IOCTL_SETPROP:
ret = ioctl_dtprop(arg, 1); break; default:
pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n",
_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd),
_IOC_SIZE(cmd)); return -ENOTTY;
}
return ret;
}
/* Linked list of processes that have us open */ staticstruct list_head db_list;
/* spinlock for db_list */ static DEFINE_SPINLOCK(db_list_lock);
/* The size of the doorbell event queue. This must be a power of two. */ #define QSIZE 16
/* Returns the next head/tail pointer, wrapping around the queue if necessary */ #define nextp(x) (((x) + 1) & (QSIZE - 1))
/* Linked list of ISRs that we registered */ struct list_head isr_list;
/* Per-ISR data structure */ struct doorbell_isr { struct list_head list; unsignedint irq;
uint32_t doorbell; /* The doorbell handle */
uint32_t partition; /* The partition handle, if used */
};
/* * Add a doorbell to all of the doorbell queues
*/ staticvoid fsl_hv_queue_doorbell(uint32_t doorbell)
{ struct doorbell_queue *dbq; unsignedlong flags;
/* Prevent another core from modifying db_list */
spin_lock_irqsave(&db_list_lock, flags);
list_for_each_entry(dbq, &db_list, list) { if (dbq->head != nextp(dbq->tail)) {
dbq->q[dbq->tail] = doorbell; /* * This memory barrier eliminates the need to grab * the spinlock for dbq.
*/
smp_wmb();
dbq->tail = nextp(dbq->tail);
wake_up_interruptible(&dbq->wait);
}
}
spin_unlock_irqrestore(&db_list_lock, flags);
}
/* * Interrupt handler for all doorbells * * We use the same interrupt handler for all doorbells. Whenever a doorbell * is rung, and we receive an interrupt, we just put the handle for that * doorbell (passed to us as *data) into all of the queues.
*/ static irqreturn_t fsl_hv_isr(int irq, void *data)
{
fsl_hv_queue_doorbell((uintptr_t) data);
return IRQ_HANDLED;
}
/* * State change thread function * * The state change notification arrives in an interrupt, but we can't call * blocking_notifier_call_chain() in an interrupt handler. We could call * atomic_notifier_call_chain(), but that would require the clients' call-back * function to run in interrupt context. Since we don't want to impose that * restriction on the clients, we use a threaded IRQ to process the * notification in kernel context.
*/ static irqreturn_t fsl_hv_state_change_thread(int irq, void *data)
{ struct doorbell_isr *dbisr = data;
/* It's still a doorbell, so add it to all the queues. */
fsl_hv_queue_doorbell(dbisr->doorbell);
/* Determine the new state, and if it's stopped, notify the clients. */
ret = fh_partition_get_status(dbisr->partition, &status); if (!ret && (status == FH_PARTITION_STOPPED)) return IRQ_WAKE_THREAD;
return IRQ_HANDLED;
}
/* * Returns a bitmask indicating whether a read will block
*/ static __poll_t fsl_hv_poll(struct file *filp, struct poll_table_struct *p)
{ struct doorbell_queue *dbq = filp->private_data; unsignedlong flags;
__poll_t mask;
/* * Return the handles for any incoming doorbells * * If there are doorbell handles in the queue for this open instance, then * return them to the caller as an array of 32-bit integers. Otherwise, * block until there is at least one handle to return.
*/ static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len,
loff_t *off)
{ struct doorbell_queue *dbq = filp->private_data;
uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */ unsignedlong flags;
ssize_t count = 0;
/* Make sure we stop when the user buffer is full. */ while (len >= sizeof(uint32_t)) {
uint32_t dbell; /* Local copy of doorbell queue data */
spin_lock_irqsave(&dbq->lock, flags);
/* * If the queue is empty, then either we're done or we need * to block. If the application specified O_NONBLOCK, then * we return the appropriate error code.
*/ if (dbq->head == dbq->tail) {
spin_unlock_irqrestore(&dbq->lock, flags); if (count) break; if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(dbq->wait,
dbq->head != dbq->tail)) return -ERESTARTSYS; continue;
}
/* * Even though we have an smp_wmb() in the ISR, the core * might speculatively execute the "dbell = ..." below while * it's evaluating the if-statement above. In that case, the * value put into dbell could be stale if the core accepts the * speculation. To prevent that, we need a read memory barrier * here as well.
*/
smp_rmb();
/* Copy the data to a temporary local buffer, because * we can't call copy_to_user() from inside a spinlock
*/
dbell = dbq->q[dbq->head];
dbq->head = nextp(dbq->head);
spin_unlock_irqrestore(&dbq->lock, flags);
if (put_user(dbell, p)) return -EFAULT;
p++;
count += sizeof(uint32_t);
len -= sizeof(uint32_t);
}
return count;
}
/* * Open the driver and prepare for reading doorbells. * * Every time an application opens the driver, we create a doorbell queue * for that file handle. This queue is used for any incoming doorbells.
*/ staticint fsl_hv_open(struct inode *inode, struct file *filp)
{ struct doorbell_queue *dbq; unsignedlong flags;
dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL); if (!dbq) {
pr_err("fsl-hv: out of memory\n"); return -ENOMEM;
}
/* * Returns the handle of the parent of the given node * * The handle is the value of the 'hv-handle' property
*/ staticint get_parent_handle(struct device_node *np)
{ struct device_node *parent; const uint32_t *prop;
uint32_t handle; int len;
parent = of_get_parent(np); if (!parent) /* It's not really possible for this to fail */ return -ENODEV;
/* * The proper name for the handle property is "hv-handle", but some * older versions of the hypervisor used "reg".
*/
prop = of_get_property(parent, "hv-handle", &len); if (!prop)
prop = of_get_property(parent, "reg", &len);
if (!prop || (len != sizeof(uint32_t))) { /* This can happen only if the node is malformed */
of_node_put(parent); return -ENODEV;
}
handle = be32_to_cpup(prop);
of_node_put(parent);
return handle;
}
/* * Register a callback for failover events * * This function is called by device drivers to register their callback * functions for fail-over events.
*/ int fsl_hv_failover_register(struct notifier_block *nb)
{ return blocking_notifier_chain_register(&failover_subscribers, nb);
}
EXPORT_SYMBOL(fsl_hv_failover_register);
/* * Unregister a callback for failover events
*/ int fsl_hv_failover_unregister(struct notifier_block *nb)
{ return blocking_notifier_chain_unregister(&failover_subscribers, nb);
}
EXPORT_SYMBOL(fsl_hv_failover_unregister);
/* * Return TRUE if we're running under FSL hypervisor * * This function checks to see if we're running under the Freescale * hypervisor, and returns zero if we're not, or non-zero if we are. * * First, it checks if MSR[GS]==1, which means we're running under some * hypervisor. Then it checks if there is a hypervisor node in the device * tree. Currently, that means there needs to be a node in the root called * "hypervisor" and which has a property named "fsl,hv-version".
*/ staticint has_fsl_hypervisor(void)
{ struct device_node *node; int ret;
node = of_find_node_by_path("/hypervisor"); if (!node) return 0;
ret = of_property_present(node, "fsl,hv-version");
of_node_put(node);
return ret;
}
/* * Freescale hypervisor management driver init * * This function is called when this module is loaded. * * Register ourselves as a miscellaneous driver. This will register the * fops structure and create the right sysfs entries for udev.
*/ staticint __init fsl_hypervisor_init(void)
{ struct device_node *np; struct doorbell_isr *dbisr, *n; int ret;
if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) { /* The shutdown doorbell gets its own ISR */
ret = request_irq(irq, fsl_hv_shutdown_isr, 0,
np->name, NULL);
} elseif (of_device_is_compatible(np, "fsl,hv-state-change-doorbell")) { /* * The state change doorbell triggers a notification if * the state of the managed partition changes to * "stopped". We need a separate interrupt handler for * that, and we also need to know the handle of the * target partition, not just the handle of the * doorbell.
*/
dbisr->partition = ret = get_parent_handle(np); if (ret < 0) {
pr_err("fsl-hv: node %pOF has missing or " "malformed parent\n", np);
kfree(dbisr); continue;
}
ret = request_threaded_irq(irq, fsl_hv_state_change_isr,
fsl_hv_state_change_thread,
0, np->name, dbisr);
} else
ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr);
if (ret < 0) {
pr_err("fsl-hv: could not request irq %u for node %pOF\n",
irq, np);
kfree(dbisr); continue;
}
list_add(&dbisr->list, &isr_list);
pr_info("fsl-hv: registered handler for doorbell %u\n",
dbisr->doorbell);
}
/* * Freescale hypervisor management driver termination * * This function is called when this driver is unloaded.
*/ staticvoid __exit fsl_hypervisor_exit(void)
{ struct doorbell_isr *dbisr, *n;
list_for_each_entry_safe(dbisr, n, &isr_list, list) {
free_irq(dbisr->irq, dbisr);
list_del(&dbisr->list);
kfree(dbisr);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.