// SPDX-License-Identifier: GPL-2.0-only /* * vhost transport for vsock * * Copyright (C) 2013-2015 Red Hat, Inc. * Author: Asias He <asias@redhat.com> * Stefan Hajnoczi <stefanha@redhat.com>
*/ #include <linux/miscdevice.h> #include <linux/atomic.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/vmalloc.h> #include <net/sock.h> #include <linux/virtio_vsock.h> #include <linux/vhost.h> #include <linux/hashtable.h>
#include <net/af_vsock.h> #include"vhost.h"
#define VHOST_VSOCK_DEFAULT_HOST_CID 2 /* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others. */ #define VHOST_VSOCK_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others with * small pkts.
*/ #define VHOST_VSOCK_PKT_WEIGHT 256
/* Used to track all the vhost_vsock instances on the system. */ static DEFINE_MUTEX(vhost_vsock_mutex); static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
/* Callers that dereference the return value must hold vhost_vsock_mutex or the * RCU read lock.
*/ staticstruct vhost_vsock *vhost_vsock_get(u32 guest_cid)
{ struct vhost_vsock *vsock;
if (!skb) {
vhost_enable_notify(&vsock->dev, vq); break;
}
head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
&out, &in, NULL, NULL); if (head < 0) {
virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); break;
}
if (head == vq->num) {
virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); /* We cannot finish yet if more buffers snuck in while * re-enabling notify.
*/ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
vhost_disable_notify(&vsock->dev, vq); continue;
} break;
}
/* If the packet is greater than the space available in the * buffer, we split it using multiple buffers.
*/ if (payload_len > iov_len - sizeof(*hdr)) {
payload_len = iov_len - sizeof(*hdr);
/* As we are copying pieces of large packet's buffer to * small rx buffers, headers of packets in rx queue are * created dynamically and are initialized with header * of current packet(except length). But in case of * SOCK_SEQPACKET, we also must clear message delimeter * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise, * there will be sequence of packets with these * bits set. After initialized header will be copied to * rx buffer, these required bits will be restored.
*/ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
/* If we didn't send all the payload we can requeue the packet * to send it with the next available buffer.
*/ if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) {
hdr->flags |= cpu_to_le32(flags_to_restore);
/* We are queueing the same skb to handle * the remaining bytes, and we want to deliver it * to monitoring devices in the next iteration.
*/
virtio_vsock_skb_clear_tap_delivered(skb);
virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
} else { if (virtio_vsock_skb_reply(skb)) { int val;
val = atomic_dec_return(&vsock->queued_replies);
/* Do we have resources to resume tx * processing?
*/ if (val + 1 == tx_vq->num)
restart_tx = true;
}
staticint
vhost_transport_send_pkt(struct sk_buff *skb)
{ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct vhost_vsock *vsock; int len = skb->len;
rcu_read_lock();
/* Find the vhost_vsock according to guest context id */
vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid)); if (!vsock) {
rcu_read_unlock();
kfree_skb(skb); return -ENODEV;
}
if (virtio_vsock_skb_reply(skb))
atomic_inc(&vsock->queued_replies);
/* The pkt is too big or the length in the header is invalid */ if (payload_len + sizeof(*hdr) > len) {
kfree_skb(skb); return NULL;
}
virtio_vsock_skb_put(skb, payload_len);
if (skb_copy_datagram_from_iter(skb, 0, &iov_iter, payload_len)) {
vq_err(vq, "Failed to copy %zu byte payload\n", payload_len);
kfree_skb(skb); return NULL;
}
return skb;
}
/* Is there space left for replies to rx packets? */ staticbool vhost_vsock_more_replies(struct vhost_vsock *vsock)
{ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; int val;
smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
val = atomic_read(&vsock->queued_replies);
ret = vhost_dev_check_owner(&vsock->dev); if (ret) goto err;
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
if (!vhost_vq_access_ok(vq)) {
ret = -EFAULT; goto err_vq;
}
if (!vhost_vq_get_backend(vq)) {
vhost_vq_set_backend(vq, vsock);
ret = vhost_vq_init_access(vq); if (ret) goto err_vq;
}
mutex_unlock(&vq->mutex);
}
/* Some packets may have been queued before the device was started, * let's kick the send worker to send them.
*/
vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
/* This struct is large and allocation could fail, fall back to vmalloc * if there is no other way.
*/
vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!vsock) return -ENOMEM;
vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); if (!vqs) {
ret = -ENOMEM; goto out;
}
mutex_lock(&vhost_vsock_mutex); if (vsock->guest_cid)
hash_del_rcu(&vsock->hash);
mutex_unlock(&vhost_vsock_mutex);
/* Wait for other CPUs to finish using vsock */
synchronize_rcu();
/* Iterating over all connections for all CIDs to find orphans is
* inefficient. Room for improvement here. */
vsock_for_each_connected_socket(&vhost_transport.transport,
vhost_vsock_reset_orphans);
/* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. * vhost_vsock_stop() can not fail in this case, so we don't need to * check the return code.
*/
vhost_vsock_stop(vsock, false);
vhost_vsock_flush(vsock);
vhost_dev_stop(&vsock->dev);
/* 64-bit CIDs are not yet supported */ if (guest_cid > U32_MAX) return -EINVAL;
/* Refuse if CID is assigned to the guest->host transport (i.e. nested * VM), to make the loopback work.
*/ if (vsock_find_cid(guest_cid)) return -EADDRINUSE;
/* Refuse if CID is already in use */
mutex_lock(&vhost_vsock_mutex);
other = vhost_vsock_get(guest_cid); if (other && other != vsock) {
mutex_unlock(&vhost_vsock_mutex); return -EADDRINUSE;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.