#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) /** * i40e_fdir - Generate a Flow Director descriptor based on fdata * @tx_ring: Tx ring to send buffer on * @fdata: Flow director filter data * @add: Indicate if we are adding a rule or deleting one *
**/ staticvoid i40e_fdir(struct i40e_ring *tx_ring, struct i40e_fdir_filter *fdata, bool add)
{ struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back;
u32 flex_ptype, dtype_cmd, vsi_id;
u16 i;
/* grab the next descriptor */
i = tx_ring->next_to_use;
fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
i++;
tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
/* Use LAN VSI Id if not programmed by user */
vsi_id = fdata->dest_vsi ? : i40e_pf_get_main_vsi(pf)->id;
flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_DEST_VSI_MASK, vsi_id);
#define I40E_FD_CLEAN_DELAY 10 /** * i40e_program_fdir_filter - Program a Flow Director filter * @fdir_data: Packet data that will be filter parameters * @raw_packet: the pre-allocated packet buffer for FDir * @pf: The PF pointer * @add: True for add/update, False for remove
**/ staticint i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
u8 *raw_packet, struct i40e_pf *pf, bool add)
{ struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; struct i40e_ring *tx_ring; struct i40e_vsi *vsi; struct device *dev;
dma_addr_t dma;
u32 td_cmd = 0;
u16 i;
/* find existing FDIR VSI */
vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (!vsi) return -ENOENT;
tx_ring = vsi->tx_rings[0];
dev = tx_ring->dev;
/* we need two descriptors to add/del a filter and we can wait */ for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) { if (!i) return -EAGAIN;
msleep_interruptible(1);
}
/** * i40e_create_dummy_udp_packet - helper function to create UDP packet * @raw_packet: preallocated space for dummy packet * @ipv4: is layer 3 packet of version 4 or 6 * @l4proto: next level protocol used in data portion of l3 * @data: filter data * * Helper function to populate udp fields.
**/ staticvoid i40e_create_dummy_udp_packet(u8 *raw_packet, bool ipv4, u8 l4proto, struct i40e_fdir_filter *data)
{ struct udphdr *udp;
u8 *tmp;
/** * i40e_create_dummy_sctp_packet - helper function to create SCTP packet * @raw_packet: preallocated space for dummy packet * @ipv4: is layer 3 packet of version 4 or 6 * @l4proto: next level protocol used in data portion of l3 * @data: filter data * * Helper function to populate sctp fields.
**/ staticvoid i40e_create_dummy_sctp_packet(u8 *raw_packet, bool ipv4,
u8 l4proto, struct i40e_fdir_filter *data)
{ struct sctphdr *sctp;
u8 *tmp;
/** * i40e_prepare_fdir_filter - Prepare and program fdir filter * @pf: physical function to attach filter to * @fd_data: filter data * @add: add or delete filter * @packet_addr: address of dummy packet, used in filtering * @payload_offset: offset from dummy packet address to user defined data * @pctype: Packet type for which filter is used * * Helper function to offset data of dummy packet, program it and * handle errors.
**/ staticint i40e_prepare_fdir_filter(struct i40e_pf *pf, struct i40e_fdir_filter *fd_data, bool add, char *packet_addr, int payload_offset, u8 pctype)
{ int ret;
if (fd_data->flex_filter) {
u8 *payload;
__be16 pattern = fd_data->flex_word;
u16 off = fd_data->flex_offset;
payload = packet_addr + payload_offset;
/* If user provided vlan, offset payload by vlan header length */ if (!!fd_data->vlan_tag)
payload += VLAN_HLEN;
*((__force __be16 *)(payload + off)) = pattern;
}
fd_data->pctype = pctype;
ret = i40e_program_fdir_filter(fd_data, packet_addr, pf, add); if (ret) {
dev_info(&pf->pdev->dev, "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
fd_data->pctype, fd_data->fd_id, ret); /* Free the packet buffer since it wasn't added to the ring */ return -EOPNOTSUPP;
} elseif (I40E_DEBUG_FD & pf->hw.debug_mask) { if (add)
dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d\n",
fd_data->pctype, fd_data->fd_id); else
dev_info(&pf->pdev->dev, "Filter deleted for PCTYPE %d loc = %d\n",
fd_data->pctype, fd_data->fd_id);
}
return ret;
}
/** * i40e_change_filter_num - Prepare and program fdir filter * @ipv4: is layer 3 packet of version 4 or 6 * @add: add or delete filter * @ipv4_filter_num: field to update * @ipv6_filter_num: field to update * * Update filter number field for pf.
**/ staticvoid i40e_change_filter_num(bool ipv4, bool add, u16 *ipv4_filter_num,
u16 *ipv6_filter_num)
{ if (add) { if (ipv4)
(*ipv4_filter_num)++; else
(*ipv6_filter_num)++;
} else { if (ipv4)
(*ipv4_filter_num)--; else
(*ipv6_filter_num)--;
}
}
#define I40E_UDPIP_DUMMY_PACKET_LEN 42 #define I40E_UDPIP6_DUMMY_PACKET_LEN 62 /** * i40e_add_del_fdir_udp - Add/Remove UDP filters * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed
**/ staticint i40e_add_del_fdir_udp(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, bool add, bool ipv4)
{ struct i40e_pf *pf = vsi->back;
u8 *raw_packet; int ret;
raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); if (!raw_packet) return -ENOMEM;
#define I40E_TCPIP_DUMMY_PACKET_LEN 54 #define I40E_TCPIP6_DUMMY_PACKET_LEN 74 /** * i40e_add_del_fdir_tcp - Add/Remove TCPv4 filters * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed
**/ staticint i40e_add_del_fdir_tcp(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, bool add, bool ipv4)
{ struct i40e_pf *pf = vsi->back;
u8 *raw_packet; int ret;
raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); if (!raw_packet) return -ENOMEM;
i40e_create_dummy_tcp_packet(raw_packet, ipv4, IPPROTO_TCP, fd_data); if (ipv4)
ret = i40e_prepare_fdir_filter
(pf, fd_data, add, raw_packet,
I40E_TCPIP_DUMMY_PACKET_LEN,
LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP); else
ret = i40e_prepare_fdir_filter
(pf, fd_data, add, raw_packet,
I40E_TCPIP6_DUMMY_PACKET_LEN,
LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP);
if (add) { if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
} return 0;
}
#define I40E_SCTPIP_DUMMY_PACKET_LEN 46 #define I40E_SCTPIP6_DUMMY_PACKET_LEN 66 /** * i40e_add_del_fdir_sctp - Add/Remove SCTPv4 Flow Director filters for * a specific flow spec * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed
**/ staticint i40e_add_del_fdir_sctp(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, bool add, bool ipv4)
{ struct i40e_pf *pf = vsi->back;
u8 *raw_packet; int ret;
raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); if (!raw_packet) return -ENOMEM;
#define I40E_IP_DUMMY_PACKET_LEN 34 #define I40E_IP6_DUMMY_PACKET_LEN 54 /** * i40e_add_del_fdir_ip - Add/Remove IPv4 Flow Director filters for * a specific flow spec * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed
**/ staticint i40e_add_del_fdir_ip(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, bool add, bool ipv4)
{ struct i40e_pf *pf = vsi->back; int payload_offset;
u8 *raw_packet; int iter_start; int iter_end; int ret; int i;
/** * i40e_add_del_fdir - Build raw packets to add/del fdir filter * @vsi: pointer to the targeted VSI * @input: filter to add or delete * @add: true adds a filter, false removes it *
**/ int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add)
{ enum ip_ver { ipv6 = 0, ipv4 = 1 }; struct i40e_pf *pf = vsi->back; int ret;
switch (input->flow_type & ~FLOW_EXT) { case TCP_V4_FLOW:
ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv4); break; case UDP_V4_FLOW:
ret = i40e_add_del_fdir_udp(vsi, input, add, ipv4); break; case SCTP_V4_FLOW:
ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv4); break; case TCP_V6_FLOW:
ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv6); break; case UDP_V6_FLOW:
ret = i40e_add_del_fdir_udp(vsi, input, add, ipv6); break; case SCTP_V6_FLOW:
ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv6); break; case IP_USER_FLOW: switch (input->ipl4_proto) { case IPPROTO_TCP:
ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv4); break; case IPPROTO_UDP:
ret = i40e_add_del_fdir_udp(vsi, input, add, ipv4); break; case IPPROTO_SCTP:
ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv4); break; case IPPROTO_IP:
ret = i40e_add_del_fdir_ip(vsi, input, add, ipv4); break; default: /* We cannot support masking based on protocol */
dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n",
input->ipl4_proto); return -EINVAL;
} break; case IPV6_USER_FLOW: switch (input->ipl4_proto) { case IPPROTO_TCP:
ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv6); break; case IPPROTO_UDP:
ret = i40e_add_del_fdir_udp(vsi, input, add, ipv6); break; case IPPROTO_SCTP:
ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv6); break; case IPPROTO_IP:
ret = i40e_add_del_fdir_ip(vsi, input, add, ipv6); break; default: /* We cannot support masking based on protocol */
dev_info(&pf->pdev->dev, "Unsupported IPv6 protocol 0x%02x\n",
input->ipl4_proto); return -EINVAL;
} break; default:
dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n",
input->flow_type); return -EINVAL;
}
/* The buffer allocated here will be normally be freed by * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit * completion. In the event of an error adding the buffer to the FDIR * ring, it will immediately be freed. It may also be freed by * i40e_clean_tx_ring() when closing the VSI.
*/ return ret;
}
/** * i40e_fd_handle_status - check the Programming Status for FD * @rx_ring: the Rx ring for this descriptor * @qword0_raw: qword0 * @qword1: qword1 after le_to_cpu * @prog_id: the id originally used for programming * * This is used to verify if the FD programming or invalidation * requested by SW to the HW is successful or not and take actions accordingly.
**/ staticvoid i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
u64 qword1, u8 prog_id)
{ struct i40e_pf *pf = rx_ring->vsi->back; struct pci_dev *pdev = pf->pdev; struct i40e_16b_rx_wb_qw0 *qw0;
u32 fcnt_prog, fcnt_avail;
u32 error;
if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id); if (qw0->hi_dword.fd_id != 0 ||
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
pf->fd_inv);
/* Check if the programming error is for ATR. * If so, auto disable ATR and set a state for * flush in progress. Next time we come here if flush is in * progress do nothing, once flush is complete the state will * be cleared.
*/ if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return;
pf->fd_add_err++; /* store the current atr filter count */
pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
if (qw0->hi_dword.fd_id == 0 &&
test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) { /* These set_bit() calls aren't atomic with the * test_bit() here, but worse case we potentially * disable ATR and queue a flush right after SB * support is re-enabled. That shouldn't cause an * issue in practice
*/
set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
}
/* filter programming failed most likely due to table full */
fcnt_prog = i40e_get_global_fd_count(pf);
fcnt_avail = pf->fdir_pf_filter_count; /* If ATR is running fcnt_prog can quickly change, * if we are very close to full, it makes sense to disable * FD ATR/SB and then re-enable it when there is room.
*/ if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
!test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
pf->state)) if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
}
} elseif (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
qw0->hi_dword.fd_id);
}
}
/** * i40e_unmap_and_free_tx_resource - Release a Tx buffer * @ring: the ring that owns the buffer * @tx_buffer: the buffer to free
**/ staticvoid i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer)
{ if (tx_buffer->skb) { if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
kfree(tx_buffer->raw_buf); elseif (ring_is_xdp(ring))
xdp_return_frame(tx_buffer->xdpf); else
dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buffer, dma),
dma_unmap_len(tx_buffer, len),
DMA_TO_DEVICE);
} elseif (dma_unmap_len(tx_buffer, len)) {
dma_unmap_page(ring->dev,
dma_unmap_addr(tx_buffer, dma),
dma_unmap_len(tx_buffer, len),
DMA_TO_DEVICE);
}
tx_buffer->next_to_watch = NULL;
tx_buffer->skb = NULL;
dma_unmap_len_set(tx_buffer, len, 0); /* tx_buffer must be completely set up in the transmit path */
}
/** * i40e_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned
**/ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
{ unsignedlong bi_size;
u16 i;
if (ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
i40e_xsk_clean_tx_ring(tx_ring);
} else { /* ring already cleared, nothing to do */ if (!tx_ring->tx_bi) return;
/* Free all the Tx ring sk_buffs */ for (i = 0; i < tx_ring->count; i++)
i40e_unmap_and_free_tx_resource(tx_ring,
&tx_ring->tx_bi[i]);
}
/** * i40e_get_tx_pending - how many tx descriptors not processed * @ring: the ring of descriptors * @in_sw: use SW variables * * Since there is no access to the ring head register * in XL710, we need to use our local copies
**/
u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
{
u32 head, tail;
if (!in_sw) {
head = i40e_get_head(ring);
tail = readl(ring->tail);
} else {
head = ring->next_to_clean;
tail = ring->next_to_use;
}
if (head != tail) return (head < tail) ?
tail - head : (tail + ring->count - head);
return 0;
}
/** * i40e_detect_recover_hung - Function to detect and recover hung_queues * @pf: pointer to PF struct * * LAN VSI has netdev and netdev has TX queues. This function is to check * each of those TX queues if they are hung, trigger recovery by issuing * SW interrupt.
**/ void i40e_detect_recover_hung(struct i40e_pf *pf)
{ struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_ring *tx_ring = NULL; struct net_device *netdev; unsignedint i; int packets;
if (!vsi) return;
if (test_bit(__I40E_VSI_DOWN, vsi->state)) return;
netdev = vsi->netdev; if (!netdev) return;
if (!netif_carrier_ok(netdev)) return;
for (i = 0; i < vsi->num_queue_pairs; i++) {
tx_ring = vsi->tx_rings[i]; if (tx_ring && tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. * * prev_pkt_ctr would be negative if there was no * pending work.
*/
packets = tx_ring->stats.packets & INT_MAX; if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
i40e_force_wb(vsi, tx_ring->q_vector); continue;
}
/* Memory barrier between read of packet count and call * to i40e_get_tx_pending()
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
i40e_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
/** * i40e_clean_tx_irq - Reclaim resources after transmit completes * @vsi: the VSI we care about * @tx_ring: Tx ring to clean * @napi_budget: Used to determine if we are in netpoll * @tx_cleaned: Out parameter set to the number of TXes cleaned * * Returns true if there's any budget left (e.g. the clean is finished)
**/ staticbool i40e_clean_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, int napi_budget, unsignedint *tx_cleaned)
{ int i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; struct i40e_tx_desc *tx_head; struct i40e_tx_desc *tx_desc; unsignedint total_bytes = 0, total_packets = 0; unsignedint budget = vsi->work_limit;
tx_buf = &tx_ring->tx_bi[i];
tx_desc = I40E_TX_DESC(tx_ring, i);
i -= tx_ring->count;
tx_buf++;
tx_desc++;
i++; if (unlikely(!i)) {
i -= tx_ring->count;
tx_buf = tx_ring->tx_bi;
tx_desc = I40E_TX_DESC(tx_ring, 0);
}
/* unmap any remaining paged data */ if (dma_unmap_len(tx_buf, len)) {
dma_unmap_page(tx_ring->dev,
dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len),
DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
}
}
/* move us one more past the eop_desc for start of next pkt */
tx_buf++;
tx_desc++;
i++; if (unlikely(!i)) {
i -= tx_ring->count;
tx_buf = tx_ring->tx_bi;
tx_desc = I40E_TX_DESC(tx_ring, 0);
}
prefetch(tx_desc);
/* update budget accounting */
budget--;
} while (likely(budget));
/* notify netdev of completed buffers */
netdev_tx_completed_queue(txring_txq(tx_ring),
total_packets, total_bytes);
#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
(I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean.
*/
smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev,
tx_ring->queue_index) &&
!test_bit(__I40E_VSI_DOWN, vsi->state)) {
netif_wake_subqueue(tx_ring->netdev,
tx_ring->queue_index);
++tx_ring->tx_stats.restart_queue;
}
}
*tx_cleaned = total_packets; return !!budget;
}
/** * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about * @q_vector: the vector on which to enable writeback *
**/ staticvoid i40e_enable_wb_on_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{
u16 flags = q_vector->tx.ring[0].flags;
u32 val;
if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) return;
if (q_vector->arm_wb_state) return;
if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
wr32(&vsi->back->hw,
I40E_PFINT_DYN_CTLN(q_vector->reg_idx),
val);
} else {
val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
/** * i40e_force_wb - Issue SW Interrupt so HW does a wb * @vsi: the VSI we care about * @q_vector: the vector on which to force writeback *
**/ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{ if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK; /* allow 00 to be written to the index */
wr32(&vsi->back->hw,
I40E_PFINT_DYN_CTLN(q_vector->reg_idx), val);
} else {
u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK; /* allow 00 to be written to the index */
switch (q_vector->vsi->back->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; break; case I40E_LINK_SPEED_25GB: case I40E_LINK_SPEED_20GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; break; default: case I40E_LINK_SPEED_10GB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; break; case I40E_LINK_SPEED_1GB: case I40E_LINK_SPEED_100MB:
divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; break;
}
return divisor;
}
/** * i40e_update_itr - update the dynamic ITR value based on statistics * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * * Stores a new ITR value based on packets and byte * counts during the last interrupt. The advantage of per interrupt * computation is faster updates and more accurate ITR for the current * traffic pattern. Constants in this function were computed * based on theoretical maximum wire speed and thresholds were set based * on testing data as well as attempting to minimize response time * while increasing bulk throughput.
**/ staticvoid i40e_update_itr(struct i40e_q_vector *q_vector, struct i40e_ring_container *rc)
{ unsignedint avg_wire_size, packets, bytes, itr; unsignedlong next_update = jiffies;
/* If we don't have any rings just leave ourselves set for maximum * possible latency so we take ourselves out of the equation.
*/ if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) return;
/* For Rx we want to push the delay up and default to low latency. * for Tx we want to pull the delay down and default to high latency.
*/
itr = i40e_container_is_rx(q_vector, rc) ?
I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
/* If we didn't update within up to 1 - 2 jiffies we can assume * that either packets are coming in so slow there hasn't been * any work, or that there is so much work that NAPI is dealing * with interrupt moderation and we don't need to do anything.
*/ if (time_after(next_update, rc->next_update)) goto clear_counts;
/* If itr_countdown is set it means we programmed an ITR within * the last 4 interrupt cycles. This has a side effect of us * potentially firing an early interrupt. In order to work around * this we need to throw out any data received for a few * interrupts following the update.
*/ if (q_vector->itr_countdown) {
itr = rc->target_itr; goto clear_counts;
}
if (i40e_container_is_rx(q_vector, rc)) { /* If Rx there are 1 to 4 packets and bytes are less than * 9000 assume insufficient data to use bulk rate limiting * approach unless Tx is already in bulk rate limiting. We * are likely latency driven.
*/ if (packets && packets < 4 && bytes < 9000 &&
(q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
itr = I40E_ITR_ADAPTIVE_LATENCY; goto adjust_by_size;
}
} elseif (packets < 4) { /* If we have Tx and Rx ITR maxed and Tx ITR is running in * bulk mode and we are receiving 4 or fewer packets just * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so * that the Rx can relax.
*/ if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
(q_vector->rx.target_itr & I40E_ITR_MASK) ==
I40E_ITR_ADAPTIVE_MAX_USECS) goto clear_counts;
} elseif (packets > 32) { /* If we have processed over 32 packets in a single interrupt * for Tx assume we need to switch over to "bulk" mode.
*/
rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
}
/* We have no packets to actually measure against. This means * either one of the other queues on this vector is active or * we are a Tx queue doing TSO with too high of an interrupt rate. * * Between 4 and 56 we can assume that our current interrupt delay * is only slightly too low. As such we should increase it by a small * fixed amount.
*/ if (packets < 56) {
itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
itr &= I40E_ITR_ADAPTIVE_LATENCY;
itr += I40E_ITR_ADAPTIVE_MAX_USECS;
} goto clear_counts;
}
/* Between 56 and 112 is our "goldilocks" zone where we are * working out "just right". Just report that our current * ITR is good for us.
*/ if (packets <= 112) goto clear_counts;
/* If packet count is 128 or greater we are likely looking * at a slight overrun of the delay we want. Try halving * our delay to see if that will cut the number of packets * in half per interrupt.
*/
itr /= 2;
itr &= I40E_ITR_MASK; if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
itr = I40E_ITR_ADAPTIVE_MIN_USECS;
goto clear_counts;
}
/* The paths below assume we are dealing with a bulk ITR since * number of packets is greater than 256. We are just going to have * to compute a value and try to bring the count under control, * though for smaller packet sizes there isn't much we can do as * NAPI polling will likely be kicking in sooner rather than later.
*/
itr = I40E_ITR_ADAPTIVE_BULK;
adjust_by_size: /* If packet counts are 256 or greater we can assume we have a gross * overestimation of what the rate should be. Instead of trying to fine * tune it just use the formula below to try and dial in an exact value * give the current packet size of the frame.
*/
avg_wire_size = bytes / packets;
/* The following is a crude approximation of: * wmem_default / (size + overhead) = desired_pkts_per_int * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * * Assuming wmem_default is 212992 and overhead is 640 bytes per * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the * formula down to * * (170 * (size + 24)) / (size + 640) = ITR * * We first do some math on the packet size and then finally bitshift * by 8 after rounding up. We also have to account for PCIe link speed * difference as ITR scales based on this.
*/ if (avg_wire_size <= 60) { /* Start at 250k ints/sec */
avg_wire_size = 4096;
} elseif (avg_wire_size <= 380) { /* 250K ints/sec to 60K ints/sec */
avg_wire_size *= 40;
avg_wire_size += 1696;
} elseif (avg_wire_size <= 1084) { /* 60K ints/sec to 36K ints/sec */
avg_wire_size *= 15;
avg_wire_size += 11452;
} elseif (avg_wire_size <= 1980) { /* 36K ints/sec to 30K ints/sec */
avg_wire_size *= 5;
avg_wire_size += 22420;
} else { /* plateau at a limit of 30K ints/sec */
avg_wire_size = 32256;
}
/* If we are in low latency mode halve our delay which doubles the * rate to somewhere between 100K to 16K ints/sec
*/ if (itr & I40E_ITR_ADAPTIVE_LATENCY)
avg_wire_size /= 2;
/* Resultant value is 256 times larger than it needs to be. This * gives us room to adjust the value as needed to either increase * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. * * Use addition as we have already recorded the new latency flag * for the ITR value.
*/
itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
I40E_ITR_ADAPTIVE_MIN_INC;
/** * i40e_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: rx descriptor ring to store buffers on * @old_buff: donor buffer to have page reused * * Synchronizes page for reuse by the adapter
**/ staticvoid i40e_reuse_rx_page(struct i40e_ring *rx_ring, struct i40e_rx_buffer *old_buff)
{ struct i40e_rx_buffer *new_buff;
u16 nta = rx_ring->next_to_alloc;
new_buff = i40e_rx_bi(rx_ring, nta);
/* update, and store next to alloc */
nta++;
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* transfer page from old buffer to new buffer */
new_buff->dma = old_buff->dma;
new_buff->page = old_buff->page;
new_buff->page_offset = old_buff->page_offset;
new_buff->pagecnt_bias = old_buff->pagecnt_bias;
/* clear contents of buffer_info */
old_buff->page = NULL;
}
/** * i40e_clean_programming_status - clean the programming status descriptor * @rx_ring: the rx ring that has this descriptor * @qword0_raw: qword0 * @qword1: qword1 representing status_error_len in CPU ordering * * Flow director should handle FD_FILTER_STATUS to check its filter programming * status being successful or not and take actions accordingly. FCoE should * handle its context/filter programming/invalidation status and take actions. * * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
**/ void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
u64 qword1)
{
u8 id;
id = FIELD_GET(I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK, qword1);
if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
}
/** * i40e_setup_tx_descriptors - Allocate the Tx descriptors * @tx_ring: the tx ring to set up * * Return 0 on success, negative on error
**/ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
{ struct device *dev = tx_ring->dev; int bi_size;
if (!dev) return -ENOMEM;
/* warn if we are about to overwrite the pointer */
WARN_ON(tx_ring->tx_bi);
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); if (!tx_ring->tx_bi) goto err;
u64_stats_init(&tx_ring->syncp);
/* round up to nearest 4K */
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); /* add u32 for head writeback, align after this takes care of * guaranteeing this is at least one cache line in size
*/
tx_ring->size += sizeof(u32);
tx_ring->size = ALIGN(tx_ring->size, 4096);
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
&tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) {
dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
tx_ring->size); goto err;
}
/** * i40e_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned
**/ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
{
u16 i;
/* ring already cleared, nothing to do */ if (!rx_ring->rx_bi) return;
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring); goto skip_free;
}
/* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
if (!rx_bi->page) continue;
/* Invalidate cache lines that may have been written to by * device so that we avoid corrupting memory.
*/
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_bi->dma,
rx_bi->page_offset,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
rx_ring->rx_bi =
kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL); if (!rx_ring->rx_bi) return -ENOMEM;
return 0;
}
/** * i40e_release_rx_desc - Store the new tail and head values * @rx_ring: ring to bump * @val: new head index
**/ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
{
rx_ring->next_to_use = val;
/* update next to alloc since we have filled the ring */
rx_ring->next_to_alloc = val;
/* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64).
*/
wmb();
writel(val, rx_ring->tail);
}
/** * i40e_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use * @bi: rx_buffer struct to modify * * Returns true if the page was successfully allocated or * reused.
**/ staticbool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, struct i40e_rx_buffer *bi)
{ struct page *page = bi->page;
dma_addr_t dma;
/* since we are recycling buffers we should seldom need to alloc */ if (likely(page)) {
rx_ring->rx_stats.page_reuse_count++; returntrue;
}
/* alloc new page for storage */
page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++; returnfalse;
}
rx_ring->rx_stats.page_alloc_count++;
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
/* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use
*/ if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, i40e_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++; returnfalse;
}
/** * i40e_alloc_rx_buffers - Replace used receive buffers * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace * * Returns false if all allocations were successful, true if any fail
**/ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
{
u16 ntu = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi;
/* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) returnfalse;
rx_desc = I40E_RX_DESC(rx_ring, ntu);
bi = i40e_rx_bi(rx_ring, ntu);
do { if (!i40e_alloc_mapped_page(rx_ring, bi)) goto no_buffers;
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info.
*/
rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc++;
bi++;
ntu++; if (unlikely(ntu == rx_ring->count)) {
rx_desc = I40E_RX_DESC(rx_ring, 0);
bi = i40e_rx_bi(rx_ring, 0);
ntu = 0;
}
/* clear the status bits for the next_to_use descriptor */
rx_desc->wb.qword1.status_error_len = 0;
cleaned_count--;
} while (cleaned_count);
if (rx_ring->next_to_use != ntu)
i40e_release_rx_desc(rx_ring, ntu);
returnfalse;
no_buffers: if (rx_ring->next_to_use != ntu)
i40e_release_rx_desc(rx_ring, ntu);
/* make sure to come back via polling to try again after * allocation failure
*/ returntrue;
}
/** * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum * @vsi: the VSI we care about * @skb: skb currently being received and modified * @rx_desc: the receive descriptor
**/ staticinlinevoid i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, union i40e_rx_desc *rx_desc)
{ struct libeth_rx_pt decoded;
u32 rx_error, rx_status; bool ipv4, ipv6;
u8 ptype;
u64 qword;
if (ipv4 &&
(rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) goto checksum_fail;
/* likely incorrect csum if alternate IP extension headers found */ if (ipv6 &&
rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) /* don't increment checksum err here, non-fatal err */ return;
/* there was some L4 error, count error and punt packet to the stack */ if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) goto checksum_fail;
/* handle packets that were not able to be checksummed due * to arrival speed, in this case the stack can compute * the csum.
*/ if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return;
/* If there is an outer header present that might contain a checksum * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum.
*/ if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT)
skb->csum_level = 1;
skb->ip_summed = CHECKSUM_UNNECESSARY; return;
checksum_fail:
vsi->back->hw_csum_rx_error++;
}
/** * i40e_rx_hash - set the hash value in the skb * @ring: descriptor ring * @rx_desc: specific descriptor * @skb: skb currently being received and modified * @rx_ptype: Rx packet type
**/ staticinlinevoid i40e_rx_hash(struct i40e_ring *ring, union i40e_rx_desc *rx_desc, struct sk_buff *skb,
u8 rx_ptype)
{ struct libeth_rx_pt decoded;
u32 hash; const __le64 rss_mask =
cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
decoded = libie_rx_pt_parse(rx_ptype); if (!libeth_rx_pt_has_hash(ring->netdev, decoded)) return;
/** * i40e_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor * @skb: pointer to current skb being populated * * This function checks the ring, descriptor, and packet information in * order to populate the hash, checksum, VLAN, protocol, and * other fields within the skb.
**/ void i40e_process_skb_fields(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, struct sk_buff *skb)
{
u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
u32 rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword);
u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
u32 tsyn = FIELD_GET(I40E_RXD_QW1_STATUS_TSYNINDX_MASK, rx_status);
u8 rx_ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword);
if (unlikely(tsynvalid))
i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
skb_record_rx_queue(skb, rx_ring->queue_index);
if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
__le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed * @rx_desc: pointer to the EOP Rx descriptor * * In addition if skb is not at least 60 bytes we need to pad it so that * it is large enough to qualify as a valid Ethernet frame. * * Returns true if an error was encountered and skb was freed.
**/ staticbool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, union i40e_rx_desc *rx_desc)
{ /* ERR_MASK will only have valid bits if EOP set, and * what we are doing here is actually checking * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in * the error field
*/ if (unlikely(i40e_test_staterr(rx_desc,
BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb); returntrue;
}
/* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) returntrue;
returnfalse;
}
/** * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buffer: buffer containing the page * @rx_stats: rx stats structure for the rx ring * * If page is reusable, we have a green light for calling i40e_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and * page freed. * * rx_stats will be updated to indicate whether the page was waived * or busy if it could not be reused.
*/ staticbool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, struct i40e_rx_queue_stats *rx_stats)
{ unsignedint pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page;
/* Is any reuse possible? */ if (!dev_page_is_reusable(page)) {
rx_stats->page_waive_count++; returnfalse;
}
#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely((rx_buffer->page_count - pagecnt_bias) > 1)) {
rx_stats->page_busy_count++; returnfalse;
} #else #define I40E_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) if (rx_buffer->page_offset > I40E_LAST_OFFSET) {
rx_stats->page_busy_count++; returnfalse;
} #endif
/* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds.
*/ if (unlikely(pagecnt_bias == 1)) {
page_ref_add(page, USHRT_MAX - 1);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
returntrue;
}
/** * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region * @rx_buffer: Rx buffer to adjust * @truesize: Size of adjustment
**/ staticvoid i40e_rx_buffer_flip(struct i40e_rx_buffer *rx_buffer, unsignedint truesize)
{ #if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize; #else
rx_buffer->page_offset += truesize; #endif
}
/** * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use * @rx_ring: rx descriptor ring to transact packets on * @size: size of buffer to add to skb * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU.
*/ staticstruct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, constunsignedint size)
{ struct i40e_rx_buffer *rx_buffer;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
size,
DMA_FROM_DEVICE);
/* We have pulled a buffer for use, so decrement pagecnt_bias */
rx_buffer->pagecnt_bias--;
return rx_buffer;
}
/** * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will * either recycle the buffer or unmap it and free the associated resources.
*/ staticvoid i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer)
{ if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) { /* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
} else { /* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias); /* clear contents of buffer_info */
rx_buffer->page = NULL;
}
}
/** * i40e_process_rx_buffs- Processing of buffers post XDP prog or on error * @rx_ring: Rx descriptor ring to transact packets on * @xdp_res: Result of the XDP program * @xdp: xdp_buff pointing to the data
**/ staticvoid i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, struct xdp_buff *xdp)
{
u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
u32 next = rx_ring->next_to_clean, i = 0; struct i40e_rx_buffer *rx_buffer;
xdp->flags = 0;
while (1) {
rx_buffer = i40e_rx_bi(rx_ring, next); if (++next == rx_ring->count)
next = 0;
/* EOP buffer will be put in i40e_clean_rx_irq() */ if (next == rx_ring->next_to_process) return;
i40e_put_rx_buffer(rx_ring, rx_buffer);
}
}
/** * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the * skb correctly.
*/ staticstruct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{ unsignedint size = xdp->data_end - xdp->data; struct i40e_rx_buffer *rx_buffer; struct skb_shared_info *sinfo; unsignedint headlen; struct sk_buff *skb;
u32 nr_frags = 0;
/* prefetch first cache line of first page */
net_prefetch(xdp->data);
/* Note, we get here by enabling legacy-rx via: * * ethtool --set-priv-flags <dev> legacy-rx on * * In this mode, we currently get 0 extra XDP headroom as * opposed to having legacy-rx off, where we process XDP * packets going to stack via i40e_build_skb(). The latter * provides us currently with 192 bytes of headroom. * * For i40e_construct_skb() mode it means that the * xdp->data_meta will always point to xdp->data, since * the helper cannot expand the head. Should this ever * change in future for legacy-rx mode on, then lets also * add xdp->data_meta handling here.
*/
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, I40E_RX_HDR_SIZE); if (unlikely(!skb)) return NULL;
/* Determine available headroom for copy */
headlen = size; if (headlen > I40E_RX_HDR_SIZE)
headlen = eth_get_headlen(skb->dev, xdp->data,
I40E_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data,
ALIGN(headlen, sizeof(long)));
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); /* update all of the pointers */
size -= headlen; if (size) { if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
dev_kfree_skb(skb); return NULL;
}
skb_add_rx_frag(skb, 0, rx_buffer->page,
rx_buffer->page_offset + headlen,
size, xdp->frame_sz); /* buffer is used by skb, update page_offset */
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
} else { /* buffer is unused, reset bias back to rx_buffer */
rx_buffer->pagecnt_bias++;
}
if (unlikely(xdp_buff_has_frags(xdp))) { struct skb_shared_info *skinfo = skb_shinfo(skb);
/** * i40e_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead.
*/ staticstruct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{ unsignedint metasize = xdp->data - xdp->data_meta; struct skb_shared_info *sinfo; struct sk_buff *skb;
u32 nr_frags;
/* Prefetch first cache line of first page. If xdp->data_meta * is unused, this points exactly as xdp->data, otherwise we * likely have a consumer accessing first few bytes of meta * data, and then actual data.
*/
net_prefetch(xdp->data_meta);
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
/* build an skb around the page buffer */
skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); if (unlikely(!skb)) return NULL;
/* update pointers within the skb to store the data */
skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, xdp->data_end - xdp->data); if (metasize)
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp))) {
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); /* buffer is used by skb, update page_offset */
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
}
return skb;
}
/** * i40e_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer * * If the buffer is an EOP buffer, this function exits returning false, * otherwise return true indicating that this is in fact a non-EOP buffer.
*/ bool i40e_is_non_eop(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc)
{ /* if we are the last buffer then there is nothing else to do */ #define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT) if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) returnfalse;
/** * i40e_run_xdp - run an XDP program * @rx_ring: Rx ring being processed * @xdp: XDP buffer containing the frame * @xdp_prog: XDP program to run
**/ staticint i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{ int err, result = I40E_XDP_PASS; struct i40e_ring *xdp_ring;
u32 act;
act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: break; case XDP_TX:
xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); if (result == I40E_XDP_CONSUMED) goto out_failure; break; case XDP_REDIRECT:
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (err) goto out_failure;
result = I40E_XDP_REDIR; break; default:
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
fallthrough; case XDP_ABORTED:
out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough; /* handle aborts by dropping packet */ case XDP_DROP:
result = I40E_XDP_CONSUMED; break;
}
xdp_out: return result;
}
/** * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register * @xdp_ring: XDP Tx ring * * This function updates the XDP Tx ring tail register.
**/ void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
{ /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch.
*/
wmb();
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}
/** * i40e_update_rx_stats - Update Rx ring statistics * @rx_ring: rx descriptor ring * @total_rx_bytes: number of bytes received * @total_rx_packets: number of packets received * * This function updates the Rx ring statistics.
**/ void i40e_update_rx_stats(struct i40e_ring *rx_ring, unsignedint total_rx_bytes, unsignedint total_rx_packets)
{
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.packets += total_rx_packets;
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.25 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.