/************************************************************************* * myri10ge.c: Myricom Myri-10G Ethernet driver. * * Copyright (C) 2005 - 2011 Myricom, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Myricom, Inc. nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * * If the eeprom on your board is not recent enough, you will need to get a * newer firmware image at: * http://www.myri.com/scs/download-Myri10GE.html * * Contact Information: * <help@myri.com> * Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006
*************************************************************************/
struct myri10ge_rx_buf { struct mcp_kreq_ether_recv __iomem *lanai; /* lanai ptr for recv ring */ struct mcp_kreq_ether_recv *shadow; /* host shadow of recv ring */ struct myri10ge_rx_buffer_state *info; struct page *page;
dma_addr_t bus; int page_offset; int cnt; int fill_cnt; int alloc_fail; int mask; /* number of rx slots -1 */ int watchdog_needed;
};
struct myri10ge_tx_buf { struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
__be32 __iomem *send_go; /* "go" doorbell ptr */
__be32 __iomem *send_stop; /* "stop" doorbell ptr */ struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */ char *req_bytes; struct myri10ge_tx_buffer_state *info; int mask; /* number of transmit slots -1 */ int req ____cacheline_aligned; /* transmit slots submitted */ int pkt_start; /* packets started */ int stop_queue; int linearized; int done ____cacheline_aligned; /* transmit slots completed */ int pkt_done; /* packets completed */ int wake_queue; int queue_active;
};
struct myri10ge_rx_done { struct mcp_slot *entry;
dma_addr_t bus; int cnt; int idx;
};
/* wait up to 15ms. Longest command is the DMA benchmark, * which is capped at 5ms, but runs from a timeout handler * that runs every 7.8ms. So a 15ms timeout leaves us with * a 2.2ms margin
*/ if (atomic) { /* if atomic is set, do not sleep, * and try to get the completion quickly
* (1ms will be enough for those commands) */ for (sleep_total = 0;
sleep_total < 1000 &&
response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT);
sleep_total += 10) {
udelay(10);
mb();
}
} else { /* use msleep for most command */ for (sleep_total = 0;
sleep_total < 15 &&
response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT);
sleep_total++)
msleep(1);
}
/* * The eeprom strings on the lanaiX have the format * SN=x\0 * MAC=x:x:x:x:x:x\0 * PT:ddd mmm xx xx:xx:xx xx\0 * PV:ddd mmm xx xx:xx:xx xx\0
*/ staticint myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
{ char *ptr, *limit; int i;
/* send a rdma command to the PCIe engine, and wait for the * response in the confirmation address. The firmware should * write a -1 there to indicate it is alive and well
*/
dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) {
dev_err(dev, "Running firmware has bad header offset (%d)\n",
(int)hdr_offset); return -EIO;
}
/* copy header of running firmware from SRAM to host memory to
* validate firmware */
hdr = kmalloc(bytes, GFP_KERNEL); if (hdr == NULL) return -ENOMEM;
memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes);
status = myri10ge_validate_firmware(mgp, hdr);
kfree(hdr);
/* check to see if adopted firmware has bug where adopting * it will cause broadcasts to be filtered unless the NIC
* is kept in ALLMULTI mode */ if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 &&
mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) {
mgp->adopted_rx_filter_bug = 1;
dev_warn(dev, "Adopting fw %d.%d.%d: " "working around rx filter bug\n",
mgp->fw_ver_major, mgp->fw_ver_minor,
mgp->fw_ver_tiny);
} return status;
}
staticint myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp)
{ struct myri10ge_cmd cmd; int status;
/* probe for IPv6 TSO support */
mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
&cmd, 0); if (status == 0) {
mgp->max_tso6 = cmd.data0;
mgp->features |= NETIF_F_TSO6;
}
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); if (status != 0) {
dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_GET_RX_RING_SIZE\n"); return -ENXIO;
}
/* send a reload command to the bootstrap MCP, and wait for the * response in the confirmation address. The firmware should * write a -1 there to indicate it is alive and well
*/
dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
/* FIX: All newest firmware should un-protect the bottom of * the sram before handoff. However, the very first interfaces * do not. Therefore the handoff copy must skip the first 8 bytes
*/
buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
buf[4] = htonl(size - 8); /* length of code */
buf[5] = htonl(8); /* where to copy to */
buf[6] = htonl(0); /* where to jump to */
submit = mgp->sram + MXGEFW_BOOT_HANDOFF;
myri10ge_pio_copy(submit, &buf, sizeof(buf));
mb();
msleep(1);
mb();
i = 0; while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) {
msleep(1 << i);
i++;
} if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) {
dev_err(&mgp->pdev->dev, "handoff failed\n"); return -ENXIO;
}
myri10ge_dummy_rdma(mgp, 1);
status = myri10ge_get_firmware_capabilities(mgp);
/* Run a small DMA test. * The magic multipliers to the length tell the firmware * to do DMA read, write, or read+write tests. The * results are returned in cmd.data0. The upper 16 * bits or the return is the number of transfers completed. * The lower 16 bits is the time in 0.5us ticks that the * transfers took to complete.
*/
len = mgp->tx_boundary;
cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
cmd.data2 = len * 0x10000;
status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); if (status != 0) {
test = "read"; goto abort;
}
mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff);
cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
cmd.data2 = len * 0x1;
status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); if (status != 0) {
test = "write"; goto abort;
}
mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff);
cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
cmd.data2 = len * 0x10001;
status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); if (status != 0) {
test = "read/write"; goto abort;
}
mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) /
(cmd.data0 & 0xffff);
/* try to send a reset command to the card to see if it
* is alive */
memset(&cmd, 0, sizeof(cmd));
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); if (status != 0) {
dev_err(&mgp->pdev->dev, "failed reset\n"); return -ENXIO;
}
(void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); /* * Use non-ndis mcp_slot (eg, 4 bytes total, * no toeplitz hash value returned. Older firmware will * not understand this command, but will use the correct * sized mcp_slot, so we ignore error returns
*/
cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN;
(void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0);
/* * Even though we already know how many slices are supported * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES * has magic side effects, and must be called after a reset. * It must be called prior to calling any RSS related cmds, * including assigning an interrupt queue for anything but * slice 0. It must also be called *after* * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by * the firmware to compute offsets.
*/
if (mgp->num_slices > 1) {
/* ask the maximum number of slices it supports */
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
&cmd, 0); if (status != 0) {
dev_err(&mgp->pdev->dev, "failed to get number of slices\n");
}
/* * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior * to setting up the interrupt queue DMA
*/
cmd.data0 = mgp->num_slices;
cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; if (mgp->dev->real_num_tx_queues > 1)
cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
/* Firmware older than 1.4.32 only supports multiple * RX queues, so if we get an error, first retry using a
* single TX queue before giving up */ if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
netif_set_real_num_tx_queues(mgp->dev, 1);
cmd.data0 = mgp->num_slices;
cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
status = myri10ge_send_cmd(mgp,
MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
}
if (status != 0) {
dev_err(&mgp->pdev->dev, "failed to set number of slices\n");
return status;
}
} for (i = 0; i < mgp->num_slices; i++) {
ss = &mgp->ss[i];
cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus);
cmd.data2 = i;
status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
&cmd, 0);
}
status |=
myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); for (i = 0; i < mgp->num_slices; i++) {
ss = &mgp->ss[i];
ss->irq_claim =
(__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i);
}
status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
&cmd, 0);
mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0);
status |= myri10ge_send_cmd
(mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0);
mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0); if (status != 0) {
dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n"); return status;
}
put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr);
#ifdef CONFIG_MYRI10GE_DCA
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0);
dca_tag_off = cmd.data0; for (i = 0; i < mgp->num_slices; i++) {
ss = &mgp->ss[i]; if (status == 0) {
ss->dca_tag = (__iomem __be32 *)
(mgp->sram + dca_tag_off + 4 * i);
} else {
ss->dca_tag = NULL;
}
} #endif/* CONFIG_MYRI10GE_DCA */
/* reset mcp/driver shared state back to 0 */
mgp->link_changes = 0; for (i = 0; i < mgp->num_slices; i++) {
ss = &mgp->ss[i];
staticvoid
myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, int bytes, int watchdog)
{ struct page *page;
dma_addr_t bus; int idx; #if MYRI10GE_ALLOC_SIZE > 4096 int end_offset; #endif
if (unlikely(rx->watchdog_needed && !watchdog)) return;
/* try to refill entire ring */ while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) {
idx = rx->fill_cnt & rx->mask; if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) { /* we can use part of previous page */
get_page(rx->page);
} else { /* we need a new page */
page =
alloc_pages(GFP_ATOMIC | __GFP_COMP,
MYRI10GE_ALLOC_ORDER); if (unlikely(page == NULL)) { if (rx->fill_cnt - rx->cnt < 16)
rx->watchdog_needed = 1; return;
}
bus = dma_map_page(&mgp->pdev->dev, page, 0,
MYRI10GE_ALLOC_SIZE,
DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus))) {
__free_pages(page, MYRI10GE_ALLOC_ORDER); if (rx->fill_cnt - rx->cnt < 16)
rx->watchdog_needed = 1; return;
}
}
rx->info[idx].page = rx->page;
rx->info[idx].page_offset = rx->page_offset; /* note that this is the address of the start of the
* page */
dma_unmap_addr_set(&rx->info[idx], bus, rx->bus);
rx->shadow[idx].addr_low =
htonl(MYRI10GE_LOWPART_TO_U32(rx->bus) + rx->page_offset);
rx->shadow[idx].addr_high =
htonl(MYRI10GE_HIGHPART_TO_U32(rx->bus));
/* start next packet on a cacheline boundary */
rx->page_offset += SKB_DATA_ALIGN(bytes);
/* copy 8 descriptors to the firmware at a time */ if ((idx & 7) == 7) {
myri10ge_submit_8rx(&rx->lanai[idx - 7],
&rx->shadow[idx - 7]);
}
}
}
staticinlinevoid
myri10ge_unmap_rx_page(struct pci_dev *pdev, struct myri10ge_rx_buffer_state *info, int bytes)
{ /* unmap the recvd page if we're the only or last user of it */ if (bytes >= MYRI10GE_ALLOC_SIZE / 2 ||
(info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) {
dma_unmap_page(&pdev->dev, (dma_unmap_addr(info, bus)
& ~(MYRI10GE_ALLOC_SIZE - 1)),
MYRI10GE_ALLOC_SIZE, DMA_FROM_DEVICE);
}
}
/* * GRO does not support acceleration of tagged vlan frames, and * this NIC does not support vlan tag offload, so we must pop * the tag ourselves to be able to achieve GRO performance that * is comparable to LRO.
*/
/* Mark as free */
tx->info[idx].skb = NULL; if (tx->info[idx].last) {
tx->pkt_done++;
tx->info[idx].last = 0;
}
tx->done++;
len = dma_unmap_len(&tx->info[idx], len);
dma_unmap_len_set(&tx->info[idx], len, 0); if (skb) {
ss->stats.tx_bytes += skb->len;
ss->stats.tx_packets++;
dev_consume_skb_irq(skb); if (len)
dma_unmap_single(&pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
DMA_TO_DEVICE);
} else { if (len)
dma_unmap_page(&pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
DMA_TO_DEVICE);
}
}
dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss); /* * Make a minimal effort to prevent the NIC from polling an * idle tx queue. If we can't get the lock we leave the queue * active. In this case, either a thread was about to start * using the queue anyway, or we lost a race and the NIC will * waste some of its resources polling an inactive queue for a * while.
*/
staticint myri10ge_poll(struct napi_struct *napi, int budget)
{ struct myri10ge_slice_state *ss =
container_of(napi, struct myri10ge_slice_state, napi); int work_done;
#ifdef CONFIG_MYRI10GE_DCA if (ss->mgp->dca_enabled)
myri10ge_update_dca(ss); #endif /* process as many rx events as NAPI will allow */
work_done = myri10ge_clean_rx_done(ss, budget);
/* an interrupt on a non-zero receive-only slice is implicitly
* valid since MSI-X irqs are not shared */ if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
napi_schedule(&ss->napi); return IRQ_HANDLED;
}
/* make sure it is our IRQ, and that the DMA has finished */ if (unlikely(!stats->valid)) return IRQ_NONE;
/* low bit indicates receives are present, so schedule
* napi poll handler */ if (stats->valid & 1)
napi_schedule(&ss->napi);
if (!mgp->msi_enabled && !mgp->msix_enabled) {
put_be32(0, mgp->irq_deassert); if (!myri10ge_deassert_wait)
stats->valid = 0;
mb();
} else
stats->valid = 0;
/* Wait for IRQ line to go low, if using INTx */
i = 0; while (1) {
i++; /* check for transmit completes and receives */
send_done_count = ntohl(stats->send_done_count); if (send_done_count != tx->pkt_done)
myri10ge_tx_done(ss, (int)send_done_count); if (unlikely(i > myri10ge_max_irq_loops)) {
netdev_warn(mgp->dev, "irq stuck?\n");
stats->valid = 0;
schedule_work(&mgp->watchdog_work);
} if (likely(stats->valid == 0)) break;
cpu_relax();
barrier();
}
/* Only slice 0 updates stats */ if (ss == mgp->ss)
myri10ge_check_statblock(mgp);
switch (stringset) { case ETH_SS_STATS:
memcpy(data, *myri10ge_gstrings_main_stats, sizeof(myri10ge_gstrings_main_stats));
data += sizeof(myri10ge_gstrings_main_stats); for (i = 0; i < mgp->num_slices; i++) {
memcpy(data, *myri10ge_gstrings_slice_stats, sizeof(myri10ge_gstrings_slice_stats));
data += sizeof(myri10ge_gstrings_slice_stats);
} break;
}
}
/*
* Use a low-level command to change the LED behavior. Rather than
* blinking (which is the normal case), when identify is used, the
* yellow LED turns solid.
*/
static int myri10ge_led(struct myri10ge_priv *mgp, int on)
{
struct mcp_gen_header *hdr;
struct device *dev = &mgp->pdev->dev;
size_t hdr_off, pattern_off, hdr_len;
u32 pattern = 0xfffffffe;
/* find running firmware header */
hdr_off = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET));
if ((hdr_off & 3) || hdr_off + sizeof(*hdr) > mgp->sram_size) {
dev_err(dev, "Running firmware has bad header offset (%d)\n",
(int)hdr_off);
return -EIO;
}
hdr_len = swab32(readl(mgp->sram + hdr_off +
offsetof(struct mcp_gen_header, header_length)));
pattern_off = hdr_off + offsetof(struct mcp_gen_header, led_pattern);
if (pattern_off >= (hdr_len + hdr_off)) {
dev_info(dev, "Firmware does not support LED identification\n");
return -EINVAL;
}
if (!on)
pattern = swab32(readl(mgp->sram + pattern_off + 4));
writel(swab32(pattern), mgp->sram + pattern_off);
return 0;
}
static int
myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
{
struct myri10ge_priv *mgp = netdev_priv(netdev);
int rc;
switch (state) {
case ETHTOOL_ID_ACTIVE:
rc = myri10ge_led(mgp, 1);
break;
case ETHTOOL_ID_INACTIVE:
rc = myri10ge_led(mgp, 0);
break;
if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) {
netdev_err(dev, "slice-%d: alloced only %d small bufs\n",
slice, ss->rx_small.fill_cnt);
goto abort_with_rx_small_ring;
}
myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) {
netdev_err(dev, "slice-%d: alloced only %d big bufs\n",
slice, ss->rx_big.fill_cnt);
goto abort_with_rx_big_ring;
}
return 0;
abort_with_rx_big_ring:
for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
int idx = i & ss->rx_big.mask;
myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
mgp->big_bytes);
put_page(ss->rx_big.info[idx].page);
}
abort_with_rx_small_ring:
if (mgp->small_bytes == 0)
ss->rx_small.fill_cnt = ss->rx_small.cnt;
for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
int idx = i & ss->rx_small.mask;
myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
mgp->small_bytes + MXGEFW_PAD);
put_page(ss->rx_small.info[idx].page);
}
/* If not allocated, skip it */
if (ss->tx.req_list == NULL)
return;
for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
idx = i & ss->rx_big.mask;
if (i == ss->rx_big.fill_cnt - 1)
ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE;
myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
mgp->big_bytes);
put_page(ss->rx_big.info[idx].page);
}
if (mgp->small_bytes == 0)
ss->rx_small.fill_cnt = ss->rx_small.cnt;
for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
idx = i & ss->rx_small.mask;
if (i == ss->rx_small.fill_cnt - 1)
ss->rx_small.info[idx].page_offset =
MYRI10GE_ALLOC_SIZE;
myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
mgp->small_bytes + MXGEFW_PAD);
put_page(ss->rx_small.info[idx].page);
}
tx = &ss->tx;
while (tx->done != tx->req) {
idx = tx->done & tx->mask;
skb = tx->info[idx].skb;
/* Mark as free */
tx->info[idx].skb = NULL;
tx->done++;
len = dma_unmap_len(&tx->info[idx], len);
dma_unmap_len_set(&tx->info[idx], len, 0);
if (skb) {
ss->stats.tx_dropped++;
dev_kfree_skb_any(skb);
if (len)
dma_unmap_single(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
DMA_TO_DEVICE);
} else {
if (len)
dma_unmap_page(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
DMA_TO_DEVICE);
}
}
kfree(ss->rx_big.info);
if (mgp->msix_enabled) {
for (i = 0; i < mgp->num_slices; i++)
free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]);
} else {
free_irq(pdev->irq, &mgp->ss[0]);
}
if (mgp->msi_enabled)
pci_disable_msi(pdev);
if (mgp->msix_enabled)
pci_disable_msix(pdev);
}
static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
{
struct myri10ge_cmd cmd;
struct myri10ge_slice_state *ss;
int status;
static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice)
{
struct myri10ge_cmd cmd;
struct myri10ge_slice_state *ss;
int status;
ss = &mgp->ss[slice];
cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
if (status == -ENOSYS) {
dma_addr_t bus = ss->fw_stats_bus;
if (slice != 0)
return -EINVAL;
bus += offsetof(struct mcp_irq_data, send_done_count);
cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus);
status = myri10ge_send_cmd(mgp,
MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
&cmd, 0);
/* Firmware cannot support multicast without STATS_DMA_V2 */
mgp->fw_multicast_support = 0;
} else {
mgp->fw_multicast_support = 1;
}
return 0;
}
static int myri10ge_open(struct net_device *dev)
{
struct myri10ge_slice_state *ss;
struct myri10ge_priv *mgp = netdev_priv(dev);
struct myri10ge_cmd cmd;
int i, status, big_pow2, slice;
u8 __iomem *itable;
if (mgp->running != MYRI10GE_ETH_STOPPED)
return -EBUSY;
mgp->running = MYRI10GE_ETH_STARTING;
status = myri10ge_reset(mgp);
if (status != 0) {
netdev_err(dev, "failed reset\n");
goto abort_with_nothing;
}
if (mgp->num_slices > 1) {
cmd.data0 = mgp->num_slices;
cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
if (mgp->dev->real_num_tx_queues > 1)
cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
if (status != 0) {
netdev_err(dev, "failed to set number of slices\n");
goto abort_with_nothing;
}
/* setup the indirection table */
cmd.data0 = mgp->num_slices;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
&cmd, 0);
status |= myri10ge_send_cmd(mgp,
MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
&cmd, 0);
if (status != 0) {
netdev_err(dev, "failed to setup rss tables\n");
goto abort_with_nothing;
}
/* just enable an identity mapping */
itable = mgp->sram + cmd.data0;
for (i = 0; i < mgp->num_slices; i++)
__raw_writeb(i, &itable[i]);
cmd.data0 = 1;
cmd.data1 = myri10ge_rss_hash;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
&cmd, 0);
if (status != 0) {
netdev_err(dev, "failed to enable slices\n");
goto abort_with_nothing;
}
}
status = myri10ge_request_irq(mgp);
if (status != 0)
goto abort_with_nothing;
/* decide what small buffer size to use. For good TCP rx
* performance, it is important to not receive 1514 byte
* frames into jumbo buffers, as it confuses the socket buffer
* accounting code, leading to drops and erratic performance.
*/
if (dev->mtu <= ETH_DATA_LEN)
/* enough for a TCP header */
mgp->small_bytes = (128 > SMP_CACHE_BYTES)
? (128 - MXGEFW_PAD)
: (SMP_CACHE_BYTES - MXGEFW_PAD);
else
/* enough for a vlan encapsulated ETH_DATA_LEN frame */
mgp->small_bytes = VLAN_ETH_FRAME_LEN;
/* Override the small buffer size? */
if (myri10ge_small_bytes >= 0)
mgp->small_bytes = myri10ge_small_bytes;
/* Firmware needs the big buff size as a power of 2. Lie and
* tell him the buffer is larger, because we only use 1
* buffer/pkt, and the mtu will prevent overruns.
*/
big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD;
if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) {
while (!is_power_of_2(big_pow2))
big_pow2++;
mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD;
} else {
big_pow2 = MYRI10GE_ALLOC_SIZE;
mgp->big_bytes = big_pow2;
}
/* setup the per-slice data structures */
for (slice = 0; slice < mgp->num_slices; slice++) {
ss = &mgp->ss[slice];
status = myri10ge_get_txrx(mgp, slice);
if (status != 0) {
netdev_err(dev, "failed to get ring sizes or locations\n");
goto abort_with_rings;
}
status = myri10ge_allocate_rings(ss);
if (status != 0)
goto abort_with_rings;
/* only firmware which supports multiple TX queues
* supports setting up the tx stats on non-zero
* slices */
if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
status = myri10ge_set_stats(mgp, slice);
if (status) {
netdev_err(dev, "Couldn't set stats DMA\n");
goto abort_with_rings;
}
/* must happen prior to any irq */
napi_enable(&(ss)->napi);
}
/* now give firmware buffers sizes, and MTU */
cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0);
cmd.data0 = mgp->small_bytes;
status |=
myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0);
cmd.data0 = big_pow2;
status |=
myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0);
if (status) {
netdev_err(dev, "Couldn't set buffer sizes\n");
goto abort_with_rings;
}
/*
* Set Linux style TSO mode; this is needed only on newer
* firmware versions. Older versions default to Linux
* style TSO
*/
cmd.data0 = 0;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0);
if (status && status != -ENOSYS) {
netdev_err(dev, "Couldn't set TSO mode\n");
goto abort_with_rings;
}
abort_with_rings:
while (slice) {
slice--;
napi_disable(&mgp->ss[slice].napi);
}
for (i = 0; i < mgp->num_slices; i++)
myri10ge_free_rings(&mgp->ss[i]);
/*
* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy
* at most 32 bytes at a time, so as to avoid involving the software
* pio handler in the nic. We re-write the first segment's flags
* to mark them valid only after writing the entire chain.
*/
if ((idx + cnt) < tx->mask) {
for (i = 0; i < (cnt - 1); i += 2) {
myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
mb(); /* force write every 32 bytes */
srcp += 2;
dstp += 2;
}
} else {
/* submit all but the first request, and ensure
* that it is submitted below */
myri10ge_submit_req_backwards(tx, src, cnt);
i = 0;
}
if (i < cnt) {
/* submit the first request */
myri10ge_pio_copy(dstp, srcp, sizeof(*src));
mb(); /* barrier before setting valid flag */
}
/* re-write the last 32-bits with the valid flags */
src->flags = last_flags;
put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3);
tx->req += cnt;
mb();
}
static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
struct myri10ge_tx_buf *tx, int idx)
{
unsigned int len;
int last_idx;
/* Free any DMA resources we've alloced and clear out the skb slot */
last_idx = (idx + 1) & tx->mask;
idx = tx->req & tx->mask;
do {
len = dma_unmap_len(&tx->info[idx], len);
if (len) {
if (tx->info[idx].skb != NULL)
dma_unmap_single(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
DMA_TO_DEVICE);
else
dma_unmap_page(&mgp->pdev->dev,
dma_unmap_addr(&tx->info[idx],
bus), len,
DMA_TO_DEVICE);
dma_unmap_len_set(&tx->info[idx], len, 0);
tx->info[idx].skb = NULL;
}
idx = (idx + 1) & tx->mask;
} while (idx != last_idx);
}
/*
* Transmit a packet. We need to split the packet so that a single
* segment does not cross myri10ge->tx_boundary, so this makes segment
* counting tricky. So rather than try to count segments up front, we
* just give up if there are too few segments to hold a reasonably
* fragmented packet currently available. If we run
* out of segments while preparing a packet for DMA, we just linearize
* it and try again.
*/
if (skb_is_gso(skb)) {
mss = skb_shinfo(skb)->gso_size;
max_segments = MYRI10GE_MAX_SEND_DESC_TSO;
}
if ((unlikely(avail < max_segments))) {
/* we are out of transmit resources */
tx->stop_queue++;
netif_tx_stop_queue(netdev_queue);
return NETDEV_TX_BUSY;
}
/* Setup checksum offloading, if needed */
cksum_offset = 0;
pseudo_hdr_offset = 0;
odd_flag = 0;
flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
cksum_offset = skb_checksum_start_offset(skb);
pseudo_hdr_offset = cksum_offset + skb->csum_offset;
/* If the headers are excessively large, then we must
* fall back to a software checksum */
if (unlikely(!mss && (cksum_offset > 255 ||
pseudo_hdr_offset > 127))) {
if (skb_checksum_help(skb))
goto drop;
cksum_offset = 0;
pseudo_hdr_offset = 0;
} else {
odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
flags |= MXGEFW_FLAGS_CKSUM;
}
}
cum_len = 0;
if (mss) { /* TSO */
/* this removes any CKSUM flag from before */
flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
/* negative cum_len signifies to the
* send loop that we are still in the
* header portion of the TSO packet.
* TSO header can be at most 1KB long */
cum_len = -skb_tcp_all_headers(skb);
/* for IPv6 TSO, the checksum offset stores the
* TCP header length, to save the firmware from
* the need to parse the headers */
if (skb_is_gso_v6(skb)) {
cksum_offset = tcp_hdrlen(skb);
/* Can only handle headers <= max_tso6 long */
if (unlikely(-cum_len > mgp->max_tso6))
return myri10ge_sw_tso(skb, dev);
}
/* for TSO, pseudo_hdr_offset holds mss.
* The firmware figures out where to put
* the checksum by parsing the header. */
pseudo_hdr_offset = mss;
} else
/* Mark small packets, and pad out tiny packets */
if (skb->len <= MXGEFW_SEND_SMALL_SIZE) {
flags |= MXGEFW_FLAGS_SMALL;
/* pad frames to at least ETH_ZLEN bytes */
if (eth_skb_pad(skb)) {
/* The packet is gone, so we must
* return 0 */
ss->stats.tx_dropped += 1;
return NETDEV_TX_OK;
}
}
/* map the skb for DMA */
len = skb_headlen(skb);
bus = dma_map_single(&mgp->pdev->dev, skb->data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus)))
goto drop;
/* "rdma_count" is the number of RDMAs belonging to the
* current packet BEFORE the current send request. For
* non-TSO packets, this is equal to "count".
* For TSO packets, rdma_count needs to be reset
* to 0 after a segment cut.
*
* The rdma_count field of the send request is
* the number of RDMAs of the packet starting at
* that request. For TSO send requests with one ore more cuts
* in the middle, this is the number of RDMAs starting
* after the last cut in the request. All previous
* segments before the last cut implicitly have 1 RDMA.
*
* Since the number of RDMAs is not known beforehand,
* it must be filled-in retroactively - after each
* segmentation cut or at the end of the entire packet.
*/
while (1) {
/* Break the SKB or Fragment up into pieces which
* do not cross mgp->tx_boundary */
low = MYRI10GE_LOWPART_TO_U32(bus);
high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus));
while (len) {
u8 flags_next;
int cum_len_next;
if (unlikely(count == max_segments))
goto abort_linearize;
/* can be called from atomic contexts,
* pass 1 to force atomicity in myri10ge_send_cmd() */
myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1);
/* This firmware is known to not support multicast */
if (!mgp->fw_multicast_support)
return;
static int myri10ge_set_mac_address(struct net_device *dev, void *addr)
{
struct sockaddr *sa = addr;
struct myri10ge_priv *mgp = netdev_priv(dev);
int status;
if (!is_valid_ether_addr(sa->sa_data))
return -EADDRNOTAVAIL;
status = myri10ge_update_mac_address(mgp, sa->sa_data);
if (status != 0) {
netdev_err(dev, "changing mac address failed with %d\n",
status);
return status;
}
/* change the dev structure */
eth_hw_addr_set(dev, sa->sa_data);
return 0;
}
static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
{
struct myri10ge_priv *mgp = netdev_priv(dev);
netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
if (mgp->running) {
/* if we change the mtu on an active device, we must
* reset the device so the firmware sees the change */
myri10ge_close(dev);
WRITE_ONCE(dev->mtu, new_mtu);
myri10ge_open(dev);
} else {
WRITE_ONCE(dev->mtu, new_mtu);
}
return 0;
}
/*
* Enable ECRC to align PCI-E Completion packets on an 8-byte boundary.
* Only do it if the bridge is a root port since we don't want to disturb
* any other device, except if forced with myri10ge_ecrc_enable > 1.
*/
/* check that the bridge is a root port */
if (pci_pcie_type(bridge) != PCI_EXP_TYPE_ROOT_PORT) {
if (myri10ge_ecrc_enable > 1) {
struct pci_dev *prev_bridge, *old_bridge = bridge;
/* Walk the hierarchy up to the root port
* where ECRC has to be enabled */
do {
prev_bridge = bridge;
bridge = bridge->bus->self;
if (!bridge || prev_bridge == bridge) {
dev_err(dev,
"Failed to find root port"
" to force ECRC\n");
return;
}
} while (pci_pcie_type(bridge) !=
PCI_EXP_TYPE_ROOT_PORT);
dev_info(dev,
"Forcing ECRC on non-root port %s"
" (enabling on root port %s)\n",
pci_name(old_bridge), pci_name(bridge));
} else {
dev_err(dev,
"Not enabling ECRC on non-root port %s\n",
pci_name(bridge));
return;
}
}
cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR);
if (!cap)
return;
ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap);
if (ret) {
dev_err(dev, "failed reading ext-conf-space of %s\n",
pci_name(bridge));
dev_err(dev, "\t pci=nommconf in use? "
"or buggy/incomplete/absent ACPI MCFG attr?\n");
return;
}
if (!(err_cap & PCI_ERR_CAP_ECRC_GENC))
return;
err_cap |= PCI_ERR_CAP_ECRC_GENE;
pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap);
dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge));
}
/*
* The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
* when the PCI-E Completion packets are aligned on an 8-byte
* boundary. Some PCI-E chip sets always align Completion packets; on
* the ones that do not, the alignment can be enforced by enabling
* ECRC generation (if supported).
*
* When PCI-E Completion packets are not aligned, it is actually more
* efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
*
* If the driver can neither enable ECRC nor verify that it has
* already been enabled, then it must use a firmware image which works
* around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it
* should also ensure that it never gives the device a Read-DMA which is
* larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
* enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat)
* firmware image, and set tx_boundary to 4KB.
*/
mgp->tx_boundary = 4096;
/*
* Verify the max read request size was set to 4KB
* before trying the test with 4KB.
*/
status = pcie_get_readrq(pdev);
if (status < 0) {
dev_err(dev, "Couldn't read max read req size: %d\n", status);
goto abort;
}
if (status != 4096) {
dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status);
mgp->tx_boundary = 2048;
}
/*
* load the optimized firmware (which assumes aligned PCIe
* completions) in order to see if it works on this host.
*/
set_fw_name(mgp, myri10ge_fw_aligned, false);
status = myri10ge_load_firmware(mgp, 1);
if (status != 0) {
goto abort;
}
/*
* Enable ECRC if possible
*/
myri10ge_enable_ecrc(mgp);
/*
* Run a DMA test which watches for unaligned completions and
* aborts on the first one seen.
*/
status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
if (status == 0)
return; /* keep the aligned firmware */
if (status != -E2BIG)
dev_warn(dev, "DMA test failed: %d\n", status);
if (status == -ENOSYS)
dev_warn(dev, "Falling back to ethp! "
"Please install up to date fw\n");
abort:
/* fall back to using the unaligned firmware */
mgp->tx_boundary = 2048;
set_fw_name(mgp, myri10ge_fw_unaligned, false);
}
static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
{
int overridden = 0;
if (myri10ge_force_firmware == 0) {
int link_width;
u16 lnk;
/* Check to see if Link is less than 8 or if the
* upstream bridge is known to provide aligned
* completions */
if (link_width < 8) {
dev_info(&mgp->pdev->dev, "PCIE x%d Link\n",
link_width);
mgp->tx_boundary = 4096;
set_fw_name(mgp, myri10ge_fw_aligned, false);
} else {
myri10ge_firmware_probe(mgp);
}
} else {
if (myri10ge_force_firmware == 1) {
dev_info(&mgp->pdev->dev,
"Assuming aligned completions (forced)\n");
mgp->tx_boundary = 4096;
set_fw_name(mgp, myri10ge_fw_aligned, false);
} else {
dev_info(&mgp->pdev->dev,
"Assuming unaligned completions (forced)\n");
mgp->tx_boundary = 2048;
set_fw_name(mgp, myri10ge_fw_unaligned, false);
}
}
cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR);
if (cap) {
/* a sram parity error can cause a surprise link
* down; since we expect and can recover from sram
* parity errors, mask surprise link down events */
pci_read_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, &mask);
mask |= 0x20;
pci_write_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, mask);
}
}
/*
* This watchdog is used to check whether the board has suffered
* from a parity error and needs to be recovered.
*/
static void myri10ge_watchdog(struct work_struct *work)
{
struct myri10ge_priv *mgp =
container_of(work, struct myri10ge_priv, watchdog_work);
struct myri10ge_slice_state *ss;
u32 reboot, rx_pause_cnt;
int status, rebooted;
int i;
int reset_needed = 0;
int busy_slice_cnt = 0;
u16 cmd, vendor;
mgp->watchdog_resets++;
pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
rebooted = 0;
if ((cmd & PCI_COMMAND_MASTER) == 0) {
/* Bus master DMA disabled? Check to see
* if the card rebooted due to a parity error
* For now, just report it */
reboot = myri10ge_read_reboot(mgp);
netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n",
reboot, myri10ge_reset_recover ? "" : " not");
if (myri10ge_reset_recover == 0)
return;
rtnl_lock();
mgp->rebooted = 1;
rebooted = 1;
myri10ge_close(mgp->dev);
myri10ge_reset_recover--;
mgp->rebooted = 0;
/*
* A rebooted nic will come back with config space as
* it was after power was applied to PCIe bus.
* Attempt to restore config space which was saved
* when the driver was loaded, or the last time the
* nic was resumed from power saving mode.
*/
pci_restore_state(mgp->pdev);
/* save state again for accounting reasons */
pci_save_state(mgp->pdev);
} else {
/* if we get back -1's from our slot, perhaps somebody
* powered off our card. Don't try to reset it in
* this case */
if (cmd == 0xffff) {
pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor);
if (vendor == 0xffff) {
netdev_err(mgp->dev, "device disappeared!\n");
return;
}
}
/* Perhaps it is a software error. See if stuck slice
* has recovered, reset if not */
rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause);
for (i = 0; i < mgp->num_slices; i++) {
ss = mgp->ss;
if (ss->stuck) {
myri10ge_check_slice(ss, &reset_needed,
&busy_slice_cnt,
rx_pause_cnt);
ss->stuck = 0;
}
}
if (!reset_needed) {
netdev_dbg(mgp->dev, "not resetting\n");
return;
}
if (!rebooted) {
rtnl_lock();
myri10ge_close(mgp->dev);
}
status = myri10ge_load_firmware(mgp, 1);
if (status != 0)
netdev_err(mgp->dev, "failed to load firmware\n");
else
myri10ge_open(mgp->dev);
rtnl_unlock();
}
/*
* We use our own timer routine rather than relying upon
* netdev->tx_timeout because we have a very large hardware transmit
* queue. Due to the large queue, the netdev->tx_timeout function
* cannot detect a NIC with a parity error in a timely fashion if the
* NIC is lightly loaded.
*/
static void myri10ge_watchdog_timer(struct timer_list *t)
{
struct myri10ge_priv *mgp;
struct myri10ge_slice_state *ss;
int i, reset_needed, busy_slice_cnt;
u32 rx_pause_cnt;
u16 cmd;
mgp = timer_container_of(mgp, t, watchdog_timer);
rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause);
busy_slice_cnt = 0;
for (i = 0, reset_needed = 0;
i < mgp->num_slices && reset_needed == 0; ++i) {
ss = &mgp->ss[i];
if (ss->rx_small.watchdog_needed) {
myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
mgp->small_bytes + MXGEFW_PAD,
1);
if (ss->rx_small.fill_cnt - ss->rx_small.cnt >=
myri10ge_fill_thresh)
ss->rx_small.watchdog_needed = 0;
}
if (ss->rx_big.watchdog_needed) {
myri10ge_alloc_rx_pages(mgp, &ss->rx_big,
mgp->big_bytes, 1);
if (ss->rx_big.fill_cnt - ss->rx_big.cnt >=
myri10ge_fill_thresh)
ss->rx_big.watchdog_needed = 0;
}
myri10ge_check_slice(ss, &reset_needed, &busy_slice_cnt,
rx_pause_cnt);
}
/* if we've sent or received no traffic, poll the NIC to
* ensure it is still there. Otherwise, we risk not noticing
* an error in a timely fashion */
if (busy_slice_cnt == 0) {
pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
if ((cmd & PCI_COMMAND_MASTER) == 0) {
reset_needed = 1;
}
}
mgp->watchdog_pause = rx_pause_cnt;
/*
* This function determines the number of slices supported.
* The number slices is the minimum of the number of CPUS,
* the number of MSI-X irqs supported, the number of slices
* supported by the firmware
*/
static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
{
struct myri10ge_cmd cmd;
struct pci_dev *pdev = mgp->pdev;
char *old_fw;
bool old_allocated;
int i, status, ncpus;
/* try to load the slice aware rss firmware */
old_fw = mgp->fw_name;
old_allocated = mgp->fw_name_allocated;
/* don't free old_fw if we override it. */
mgp->fw_name_allocated = false;
if (myri10ge_fw_name != NULL) {
dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n",
myri10ge_fw_name);
set_fw_name(mgp, myri10ge_fw_name, false);
} else if (old_fw == myri10ge_fw_aligned)
set_fw_name(mgp, myri10ge_fw_rss_aligned, false);
else
set_fw_name(mgp, myri10ge_fw_rss_unaligned, false);
status = myri10ge_load_firmware(mgp, 0);
if (status != 0) {
dev_info(&pdev->dev, "Rss firmware not found\n");
if (old_allocated)
kfree(old_fw);
return;
}
/* hit the board with a reset to ensure it is alive */
memset(&cmd, 0, sizeof(cmd));
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0);
if (status != 0) {
dev_err(&mgp->pdev->dev, "failed reset\n");
goto abort_with_fw;
}
/* tell it the size of the interrupt queues */
cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot);
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
if (status != 0) {
dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
goto abort_with_fw;
}
/* ask the maximum number of slices it supports */
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0);
if (status != 0)
goto abort_with_fw;
else
mgp->num_slices = cmd.data0;
/* Only allow multiple slices if MSI-X is usable */
if (!myri10ge_msi) {
goto abort_with_fw;
}
/* if the admin did not specify a limit to how many
* slices we should use, cap it automatically to the
* number of CPUs currently online */
if (myri10ge_max_slices == -1)
myri10ge_max_slices = ncpus;
if (mgp->num_slices > myri10ge_max_slices)
mgp->num_slices = myri10ge_max_slices;
/* Now try to allocate as many MSI-X vectors as we have
* slices. We give up on MSI-X if we can only get a single
* vector. */
mgp->msix_vectors = kcalloc(mgp->num_slices, sizeof(*mgp->msix_vectors),
GFP_KERNEL);
if (mgp->msix_vectors == NULL)
goto no_msix;
for (i = 0; i < mgp->num_slices; i++) {
mgp->msix_vectors[i].entry = i;
}
while (mgp->num_slices > 1) {
mgp->num_slices = rounddown_pow_of_two(mgp->num_slices);
if (mgp->num_slices == 1)
goto no_msix;
status = pci_enable_msix_range(pdev,
mgp->msix_vectors,
mgp->num_slices,
mgp->num_slices);
if (status < 0)
goto no_msix;
pci_disable_msix(pdev);
if (status == mgp->num_slices) {
if (old_allocated)
kfree(old_fw);
return;
} else {
mgp->num_slices = status;
}
}
if (pci_enable_device(pdev)) {
dev_err(&pdev->dev, "pci_enable_device call failed\n");
status = -ENODEV;
goto abort_with_netdev;
}
/* Find the vendor-specific cap so we can check
* the reboot register later on */
mgp->vendor_specific_offset
= pci_find_capability(pdev, PCI_CAP_ID_VNDR);
/* Set our max read request to 4KB */
status = pcie_set_readrq(pdev, 4096);
if (status != 0) {
dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n",
status);
goto abort_with_enabled;
}
myri10ge_mask_surprise_down(pdev);
pci_set_master(pdev);
status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (status != 0) {
dev_err(&pdev->dev, "Error %d setting DMA mask\n", status);
goto abort_with_enabled;
}
mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd),
&mgp->cmd_bus, GFP_KERNEL);
if (!mgp->cmd) {
status = -ENOMEM;
goto abort_with_enabled;
}
netdev->vlan_features |= mgp->features;
if (mgp->fw_ver_tiny < 37)
netdev->vlan_features &= ~NETIF_F_TSO6;
if (mgp->fw_ver_tiny < 32)
netdev->vlan_features &= ~NETIF_F_TSO;
/* make sure we can get an irq, and that MSI can be
* setup (if available). */
status = myri10ge_request_irq(mgp);
if (status != 0)
goto abort_with_slices;
myri10ge_free_irq(mgp);
/* Save configuration space to be restored if the
* nic resets due to a parity error */
pci_save_state(pdev);
/* Setup the watchdog timer */
timer_setup(&mgp->watchdog_timer, myri10ge_watchdog_timer, 0);
/*
* myri10ge_remove
*
* Does what is necessary to shutdown one Myrinet device. Called
* once for each Myrinet card by the kernel when a module is
* unloaded.
*/
static void myri10ge_remove(struct pci_dev *pdev)
{
struct myri10ge_priv *mgp;
struct net_device *netdev;
mgp = pci_get_drvdata(pdev);
if (mgp == NULL)
return;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.