ret = vmbus_sendpacket(dev->channel, init_pkt, sizeof(struct nvsp_message),
(unsignedlong)init_pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
/* If failed to switch to/from VF, let data_path_is_vf stay false, * so we use synthetic path to send data.
*/ if (ret) { if (ret != -EAGAIN) {
netdev_err(ndev, "Unable to send sw datapath msg, err: %d\n",
ret); return ret;
}
/* Worker to setup sub channels on initial setup * Initial hotplug event occurs in softirq context * and can't wait for channels.
*/ staticvoid netvsc_subchan_work(struct work_struct *w)
{ struct netvsc_device *nvdev =
container_of(w, struct netvsc_device, subchan_work); struct rndis_device *rdev; int i, ret;
/* Avoid deadlock with device removal already under RTNL */ if (!rtnl_trylock()) {
schedule_work(w); return;
}
rdev = nvdev->extension; if (rdev) {
ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL); if (ret == 0) {
netif_device_attach(rdev->ndev);
} else { /* fallback to only primary channel */ for (i = 1; i < nvdev->num_chn; i++)
netif_napi_del(&nvdev->chan_table[i].napi);
if (!nvdev->recv_buf_gpadl_handle.decrypted)
vfree(nvdev->recv_buf); if (!nvdev->send_buf_gpadl_handle.decrypted)
vfree(nvdev->send_buf);
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
kfree(nvdev->chan_table[i].recv_buf);
vfree(nvdev->chan_table[i].mrc.slots);
}
/* * If we got a section count, it means we received a * SendReceiveBufferComplete msg (ie sent * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need * to send a revoke msg here
*/ if (net_device->recv_section_cnt) { /* Send the revoke receive buffer */
revoke_packet = &net_device->revoke_packet;
memset(revoke_packet, 0, sizeof(struct nvsp_message));
ret = vmbus_sendpacket(device->channel,
revoke_packet, sizeof(struct nvsp_message),
VMBUS_RQST_ID_NO_RESPONSE,
VM_PKT_DATA_INBAND, 0); /* If the failure is because the channel is rescinded; * ignore the failure since we cannot send on a rescinded * channel. This would allow us to properly cleanup * even when the channel is rescinded.
*/ if (device->channel->rescind)
ret = 0; /* * If we failed here, we might as well return and * have a leak rather than continue and a bugchk
*/ if (ret != 0) {
netdev_err(ndev, "unable to send " "revoke receive buffer to netvsp\n"); return;
}
net_device->recv_section_cnt = 0;
}
}
/* Deal with the send buffer we may have setup. * If we got a send section size, it means we received a * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need * to send a revoke msg here
*/ if (net_device->send_section_cnt) { /* Send the revoke receive buffer */
revoke_packet = &net_device->revoke_packet;
memset(revoke_packet, 0, sizeof(struct nvsp_message));
ret = vmbus_sendpacket(device->channel,
revoke_packet, sizeof(struct nvsp_message),
VMBUS_RQST_ID_NO_RESPONSE,
VM_PKT_DATA_INBAND, 0);
/* If the failure is because the channel is rescinded; * ignore the failure since we cannot send on a rescinded * channel. This would allow us to properly cleanup * even when the channel is rescinded.
*/ if (device->channel->rescind)
ret = 0;
/* If we failed here, we might as well return and * have a leak rather than continue and a bugchk
*/ if (ret != 0) {
netdev_err(ndev, "unable to send " "revoke send buffer to netvsp\n"); return;
}
net_device->send_section_cnt = 0;
}
}
if (net_device->recv_buf_gpadl_handle.gpadl_handle) {
ret = vmbus_teardown_gpadl(device->channel,
&net_device->recv_buf_gpadl_handle);
/* If we failed here, we might as well return and have a leak * rather than continue and a bugchk
*/ if (ret != 0) {
netdev_err(ndev, "unable to teardown receive buffer's gpadl\n"); return;
}
}
}
if (net_device->send_buf_gpadl_handle.gpadl_handle) {
ret = vmbus_teardown_gpadl(device->channel,
&net_device->send_buf_gpadl_handle);
/* If we failed here, we might as well return and have a leak * rather than continue and a bugchk
*/ if (ret != 0) {
netdev_err(ndev, "unable to teardown send buffer's gpadl\n"); return;
}
}
}
int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
{ struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; int node = cpu_to_node(nvchan->channel->target_cpu);
size_t size;
/* Legacy hosts only allow smaller receive buffer */ if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
buf_size = min_t(unsignedint, buf_size,
NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
net_device->recv_buf = vzalloc(buf_size); if (!net_device->recv_buf) {
netdev_err(ndev, "unable to allocate receive buffer of size %u\n",
buf_size);
ret = -ENOMEM; goto cleanup;
}
net_device->recv_buf_size = buf_size;
/* * Establish the gpadl handle for this buffer on this * channel. Note: This call uses the vmbus connection rather * than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
buf_size,
&net_device->recv_buf_gpadl_handle); if (ret != 0) {
netdev_err(ndev, "unable to establish receive buffer's gpadl\n"); goto cleanup;
}
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
init_packet->msg.v1_msg.send_recv_buf.
gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle;
init_packet->msg.v1_msg.
send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
trace_nvsp_send(ndev, init_packet);
/* Send the gpadl notification request */
ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message),
(unsignedlong)init_packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) {
netdev_err(ndev, "unable to send receive buffer's gpadl to netvsp\n"); goto cleanup;
}
/* There should only be one section for the entire receive buffer */ if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
ret = -EINVAL; goto cleanup;
}
/* Ensure buffer will not overflow */ if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size *
(u64)net_device->recv_section_cnt > (u64)buf_size) {
netdev_err(ndev, "invalid recv_section_size %u\n",
net_device->recv_section_size);
ret = -EINVAL; goto cleanup;
}
for (i = 0; i < VRSS_CHANNEL_MAX; i++) { struct netvsc_channel *nvchan = &net_device->chan_table[i];
nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL); if (nvchan->recv_buf == NULL) {
ret = -ENOMEM; goto cleanup;
}
}
/* Setup receive completion ring. * Add 1 to the recv_section_cnt because at least one entry in a * ring buffer has to be empty.
*/
net_device->recv_completion_cnt = net_device->recv_section_cnt + 1;
ret = netvsc_alloc_recv_comp_ring(net_device, 0); if (ret) goto cleanup;
/* Now setup the send buffer. */
buf_size = device_info->send_sections * device_info->send_section_size;
buf_size = round_up(buf_size, PAGE_SIZE);
net_device->send_buf = vzalloc(buf_size); if (!net_device->send_buf) {
netdev_err(ndev, "unable to allocate send buffer of size %u\n",
buf_size);
ret = -ENOMEM; goto cleanup;
}
net_device->send_buf_size = buf_size;
/* Establish the gpadl handle for this buffer on this * channel. Note: This call uses the vmbus connection rather * than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
buf_size,
&net_device->send_buf_gpadl_handle); if (ret != 0) {
netdev_err(ndev, "unable to establish send buffer's gpadl\n"); goto cleanup;
}
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
net_device->send_buf_gpadl_handle.gpadl_handle;
init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
trace_nvsp_send(ndev, init_packet);
/* Send the gpadl notification request */
ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message),
(unsignedlong)init_packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) {
netdev_err(ndev, "unable to send send buffer's gpadl to netvsp\n"); goto cleanup;
}
/* Setup state for managing the send buffer. */
net_device->send_section_map = bitmap_zalloc(net_device->send_section_cnt,
GFP_KERNEL); if (!net_device->send_section_map) {
ret = -ENOMEM; goto cleanup;
}
if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { if (hv_is_isolation_supported())
netdev_info(ndev, "SR-IOV not advertised by guests on the host supporting isolation\n"); else
init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
/* Teaming bit is needed to receive link speed updates */
init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
}
if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
trace_nvsp_send(ndev, init_packet);
ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message),
VMBUS_RQST_ID_NO_RESPONSE,
VM_PKT_DATA_INBAND, 0);
/* Send the init request */
ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message),
VMBUS_RQST_ID_NO_RESPONSE,
VM_PKT_DATA_INBAND, 0); if (ret != 0) goto cleanup;
ret = netvsc_init_buf(device, net_device, device_info);
cleanup: return ret;
}
/* * netvsc_device_remove - Callback when the root bus device is removed
*/ void netvsc_device_remove(struct hv_device *device)
{ struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); struct netvsc_device *net_device
= rtnl_dereference(net_device_ctx->nvdev); int i;
/* * Revoke receive buffer. If host is pre-Win2016 then tear down * receive buffer GPADL. Do the same for send buffer.
*/
netvsc_revoke_recv_buf(device, net_device, ndev); if (vmbus_proto_version < VERSION_WIN10)
netvsc_teardown_recv_gpadl(device, net_device, ndev);
netvsc_revoke_send_buf(device, net_device, ndev); if (vmbus_proto_version < VERSION_WIN10)
netvsc_teardown_send_gpadl(device, net_device, ndev);
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
/* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ /* only disable enabled NAPI channel */ if (i < ndev->real_num_rx_queues) {
netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_TX,
NULL);
netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_RX,
NULL);
napi_disable(&net_device->chan_table[i].napi);
}
/* * At this point, no one should be accessing net_device * except in here
*/
netdev_dbg(ndev, "net device safe to remove\n");
/* Now, we can close the channel safely */
vmbus_close(device->channel);
/* * If host is Win2016 or higher then we do the GPADL tear down * here after VMBus is closed.
*/ if (vmbus_proto_version >= VERSION_WIN10) {
netvsc_teardown_recv_gpadl(device, net_device, ndev);
netvsc_teardown_send_gpadl(device, net_device, ndev);
}
/* Release all resources */
free_netvsc_device_rcu(net_device);
}
/* First check if this is a VMBUS completion without data payload */ if (!msglen) {
cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
desc->trans_id); if (cmd_rqst == VMBUS_RQST_ERROR) {
netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id); return;
}
/* Ensure packet is big enough to read header fields */ if (msglen < sizeof(struct nvsp_message_header)) {
netdev_err(ndev, "nvsp_message length too small: %u\n", msglen); return;
}
nvsp_packet = hv_pkt_data(desc); switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + sizeof(struct nvsp_message_init_complete)) {
netdev_err(ndev, "nvsp_msg length too small: %u\n",
msglen); return;
} break;
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) +
struct_size_t(struct nvsp_1_message_send_receive_buffer_complete,
sections, 1)) {
netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
msglen); return;
} break;
case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + sizeof(struct nvsp_1_message_send_send_buffer_complete)) {
netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
msglen); return;
} break;
case NVSP_MSG5_TYPE_SUBCHANNEL: if (msglen < sizeof(struct nvsp_message_header) + sizeof(struct nvsp_5_subchannel_complete)) {
netdev_err(ndev, "nvsp_msg5 length too small: %u\n",
msglen); return;
} break;
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + sizeof(struct nvsp_1_message_send_rndis_packet_complete)) { if (net_ratelimit())
netdev_err(ndev, "nvsp_rndis_pkt_complete length too small: %u\n",
msglen); return;
}
/* If status indicates an error, output a message so we know * there's a problem. But process the completion anyway so the * resources are released.
*/
status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status; if (status != NVSP_STAT_SUCCESS && net_ratelimit())
netdev_err(ndev, "nvsp_rndis_pkt_complete error status: %x\n",
status);
for (i = 0; i < page_count; i++) { char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT);
u32 offset = pb[i].offset;
u32 len = pb[i].len;
memcpy(dest, (src + offset), len);
dest += len;
}
if (padding)
memset(dest, 0, padding);
}
void netvsc_dma_unmap(struct hv_device *hv_dev, struct hv_netvsc_packet *packet)
{ int i;
if (!hv_is_isolation_supported()) return;
if (!packet->dma_range) return;
for (i = 0; i < packet->page_buf_cnt; i++)
dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
packet->dma_range[i].mapping_size,
DMA_TO_DEVICE);
kfree(packet->dma_range);
}
/* netvsc_dma_map - Map swiotlb bounce buffer with data page of * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation * VM. * * In isolation VM, netvsc send buffer has been marked visible to * host and so the data copied to send buffer doesn't need to use * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer() * may not be copied to send buffer and so these pages need to be * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do * that. The pfns in the struct hv_page_buffer need to be converted * to bounce buffer's pfn. The loop here is necessary because the * entries in the page buffer array are not necessarily full * pages of data. Each entry in the array has a separate offset and * len that may be non-zero, even for entries in the middle of the * array. And the entries are not physically contiguous. So each * entry must be individually mapped rather than as a contiguous unit. * So not use dma_map_sg() here.
*/ staticint netvsc_dma_map(struct hv_device *hv_dev, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb)
{
u32 page_count = packet->page_buf_cnt;
dma_addr_t dma; int i;
if (!hv_is_isolation_supported()) return 0;
packet->dma_range = kcalloc(page_count, sizeof(*packet->dma_range),
GFP_ATOMIC); if (!packet->dma_range) return -ENOMEM;
for (i = 0; i < page_count; i++) { char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
+ pb[i].offset);
u32 len = pb[i].len;
/* pb[].offset and pb[].len are not changed during dma mapping * and so not reassign.
*/
packet->dma_range[i].dma = dma;
packet->dma_range[i].mapping_size = len;
pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
}
return 0;
}
/* Build an "array" of mpb entries describing the data to be transferred * over VMBus. After the desc header fields, each "array" entry is variable * size, and each entry starts after the end of the previous entry. The * "offset" and "len" fields for each entry imply the size of the entry. * * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V * uses that granularity, even if the system page size of the guest is larger. * Each entry in the input "pb" array must describe a contiguous range of * guest physical memory so that the pfns are sequential if the range crosses * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE.
*/ staticinlinevoid netvsc_build_mpb_array(struct hv_page_buffer *pb,
u32 page_buffer_count, struct vmbus_packet_mpb_array *desc,
u32 *desc_size)
{ struct hv_mpb_array *mpb_entry = &desc->range; int i, j;
for (i = 0; i < page_buffer_count; i++) {
u32 offset = pb[i].offset;
u32 len = pb[i].len;
exit: if (ret == 0) {
atomic_inc_return(&nvchan->queue_sends);
if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
netif_tx_stop_queue(txq);
ndev_ctx->eth_stats.stop_queue++;
}
} elseif (ret == -EAGAIN) {
netif_tx_stop_queue(txq);
ndev_ctx->eth_stats.stop_queue++;
} else {
netdev_err(ndev, "Unable to send packet pages %u len %u, ret %d\n",
packet->page_buf_cnt, packet->total_data_buflen,
ret);
}
if (netif_tx_queue_stopped(txq) &&
atomic_read(&nvchan->queue_sends) < 1 &&
!net_device->tx_disable) {
netif_tx_wake_queue(txq);
ndev_ctx->eth_stats.wake_queue++; if (ret == -EAGAIN)
ret = -ENOSPC;
}
return ret;
}
/* Move packet out of multi send data (msd), and clear msd */ staticinlinevoid move_pkt_msd(struct hv_netvsc_packet **msd_send, struct sk_buff **msd_skb, struct multi_send_data *msdp)
{
*msd_skb = msdp->skb;
*msd_send = msdp->pkt;
msdp->skb = NULL;
msdp->pkt = NULL;
msdp->count = 0;
}
/* RCU already held by caller */ /* Batching/bouncing logic is designed to attempt to optimize * performance. * * For small, non-LSO packets we copy the packet to a send buffer * which is pre-registered with the Hyper-V side. This enables the * hypervisor to avoid remapping the aperture to access the packet * descriptor and data. * * If we already started using a buffer and the netdev is transmitting * a burst of packets, keep on copying into the buffer until it is * full or we are done collecting a burst. If there is an existing * buffer with space for the RNDIS descriptor but not the packet, copy * the RNDIS descriptor to the buffer, keeping the packet in place. * * If we do batching and send more than one packet using a single * NetVSC message, free the SKBs of the packets copied, except for the * last packet. This is done to streamline the handling of the case * where the last packet only had the RNDIS descriptor copied to the * send buffer, with the data pointers included in the NetVSC message.
*/ int netvsc_send(struct net_device *ndev, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, struct hv_page_buffer *pb, struct sk_buff *skb, bool xdp_tx)
{ struct net_device_context *ndev_ctx = netdev_priv(ndev); struct netvsc_device *net_device
= rcu_dereference_bh(ndev_ctx->nvdev); struct hv_device *device = ndev_ctx->device_ctx; int ret = 0; struct netvsc_channel *nvchan;
u32 pktlen = packet->total_data_buflen, msd_len = 0; unsignedint section_index = NETVSC_INVALID_INDEX; struct multi_send_data *msdp; struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; struct sk_buff *msd_skb = NULL; bool try_batch, xmit_more;
/* If device is rescinded, return error and packet will get dropped. */ if (unlikely(!net_device || net_device->destroy)) return -ENODEV;
/* Send a control message or XDP packet directly without accessing * msd (Multi-Send Data) field which may be changed during data packet * processing.
*/ if (!skb || xdp_tx) return netvsc_send_pkt(device, packet, net_device, pb, skb);
/* batch packets in send buffer if possible */
msdp = &nvchan->msd; if (msdp->pkt)
msd_len = msdp->pkt->total_data_buflen;
/* Keep aggregating only if stack says more data is coming * and not doing mixed modes send and not flow blocked
*/
xmit_more = netdev_xmit_more() &&
!packet->cp_partial &&
!netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
/* Ensure packet is big enough to read header fields */ if (msglen < sizeof(struct nvsp_message_header)) {
netif_err(net_device_ctx, rx_err, ndev, "invalid nvsp header, length too small: %u\n",
msglen); return 0;
}
/* Make sure this is a valid nvsp packet */ if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
netif_err(net_device_ctx, rx_err, ndev, "Unknown nvsp packet type received %u\n",
nvsp->hdr.msg_type); return 0;
}
if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
netif_err(net_device_ctx, rx_err, ndev, "Invalid xfer page set id - expecting %x got %x\n",
NETVSC_RECEIVE_BUFFER_ID,
vmxferpage_packet->xfer_pageset_id); return 0;
}
count = vmxferpage_packet->range_cnt;
/* Check count for a valid value */ if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) {
netif_err(net_device_ctx, rx_err, ndev, "Range count is not valid: %d\n",
count); return 0;
}
/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ for (i = 0; i < count; i++) {
u32 offset = vmxferpage_packet->ranges[i].byte_offset;
u32 buflen = vmxferpage_packet->ranges[i].byte_count; void *data; int ret;
if (unlikely(offset > net_device->recv_buf_size ||
buflen > net_device->recv_buf_size - offset)) {
nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL;
netif_err(net_device_ctx, rx_err, ndev, "Packet offset:%u + len:%u too big\n",
offset, buflen);
continue;
}
/* We're going to copy (sections of) the packet into nvchan->recv_buf; * make sure that nvchan->recv_buf is large enough to hold the packet.
*/ if (unlikely(buflen > net_device->recv_section_size)) {
nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL;
netif_err(net_device_ctx, rx_err, ndev, "Packet too big: buflen=%u recv_section_size=%u\n",
buflen, net_device->recv_section_size);
continue;
}
data = recv_buf + offset;
nvchan->rsc.is_last = (i == count - 1);
trace_rndis_recv(ndev, q_idx, data);
/* Pass it to the upper layer */
ret = rndis_filter_receive(ndev, net_device,
nvchan, data, buflen);
if (unlikely(ret != NVSP_STAT_SUCCESS)) { /* Drop incomplete packet */
nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL;
}
}
/* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be * wrong due to a host bug. So fix the offset here.
*/ if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 &&
msglen >= sizeof(struct nvsp_message_header) + sizeof(union nvsp_6_message_uber) + count * sizeof(u32))
offset = sizeof(struct nvsp_message_header) + sizeof(union nvsp_6_message_uber);
/* Boundary check for all versions */ if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
netdev_err(ndev, "Received send-table offset too big:%u\n",
offset); return;
}
tab = (void *)nvmsg + offset;
for (i = 0; i < count; i++)
net_device_ctx->tx_table[i] = tab[i];
}
/* Ensure packet is big enough to read its fields */ if (msglen < sizeof(struct nvsp_message_header) + sizeof(struct nvsp_4_send_vf_association)) {
netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen); return;
}
/* Ensure packet is big enough to read header fields */ if (msglen < sizeof(struct nvsp_message_header)) {
netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen); return;
}
switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
netvsc_send_table(ndev, nvscdev, nvmsg, msglen); break;
case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: if (hv_is_isolation_supported())
netdev_err(ndev, "Ignore VF_ASSOCIATION msg from the host supporting isolation\n"); else
netvsc_send_vf(ndev, nvmsg, msglen); break;
}
}
/* Network processing softirq * Process data in incoming ring buffer from host * Stops when ring is empty or budget is met or exceeded.
*/ int netvsc_poll(struct napi_struct *napi, int budget)
{ struct netvsc_channel *nvchan
= container_of(napi, struct netvsc_channel, napi); struct netvsc_device *net_device = nvchan->net_device; struct vmbus_channel *channel = nvchan->channel; struct hv_device *device = netvsc_channel_to_device(channel); struct net_device *ndev = hv_get_drvdata(device); int work_done = 0; int ret;
/* If starting a new interval */ if (!nvchan->desc)
nvchan->desc = hv_pkt_iter_first(channel);
/* Send any pending receive completions */
ret = send_recv_completions(ndev, net_device, nvchan);
/* If it did not exhaust NAPI budget this time * and not doing busy poll * then re-enable host interrupts * and reschedule if ring is not empty * or sending receive completion failed.
*/ if (work_done < budget &&
napi_complete_done(napi, work_done) &&
(ret || hv_end_read(&channel->inbound)) &&
napi_schedule_prep(napi)) {
hv_begin_read(&channel->inbound);
__napi_schedule(napi);
}
/* Driver may overshoot since multiple packets per descriptor */ return min(work_done, budget);
}
/* Call back when data is available in host ring buffer. * Processing is deferred until network softirq (NAPI)
*/ void netvsc_channel_cb(void *context)
{ struct netvsc_channel *nvchan = context; struct vmbus_channel *channel = nvchan->channel; struct hv_ring_buffer_info *rbi = &channel->inbound;
/* preload first vmpacket descriptor */
prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
if (napi_schedule_prep(&nvchan->napi)) { /* disable interrupts from host */
hv_begin_read(rbi);
__napi_schedule_irqoff(&nvchan->napi);
}
}
/* * netvsc_device_add - Callback when the device belonging to this * driver is added
*/ struct netvsc_device *netvsc_device_add(struct hv_device *device, conststruct netvsc_device_info *device_info)
{ int i, ret = 0; struct netvsc_device *net_device; struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev);
net_device = alloc_net_device(); if (!net_device) return ERR_PTR(-ENOMEM);
for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
net_device_ctx->tx_table[i] = 0;
/* Because the device uses NAPI, all the interrupt batching and * control is done via Net softirq, not the channel handling
*/
set_channel_read_mode(device->channel, HV_CALL_ISR);
/* If we're reopening the device we may have multiple queues, fill the * chn_table with the default channel to use it before subchannels are * opened. * Initialize the channel state before we open; * we can be interrupted as soon as we open the channel.
*/
for (i = 0; i < VRSS_CHANNEL_MAX; i++) { struct netvsc_channel *nvchan = &net_device->chan_table[i];
/* Enable NAPI handler before init callbacks */
netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll);
napi_enable(&net_device->chan_table[0].napi);
netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX,
&net_device->chan_table[0].napi);
netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX,
&net_device->chan_table[0].napi);
/* Open the channel */
device->channel->next_request_id_callback = vmbus_next_request_id;
device->channel->request_addr_callback = vmbus_request_addr;
device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
ret = vmbus_open(device->channel, netvsc_ring_bytes,
netvsc_ring_bytes, NULL, 0,
netvsc_channel_cb, net_device->chan_table);
if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret); goto cleanup;
}
/* Channel is opened */
netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device, net_device, device_info); if (ret != 0) {
netdev_err(ndev, "unable to connect to NetVSP - %d\n", ret); goto close;
}
/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is * populated.
*/
rcu_assign_pointer(net_device_ctx->nvdev, net_device);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.