/* * Copyright (c) 2003-2008 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
module_param(dflt_msg_enable, int, 0644);
MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T3 default message enable bitmap");
/* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X, MSI, legacy pin interrupts. This parameter determines which * of these schemes the driver may consider as follows: * * msi = 2: choose from among all three options * msi = 1: only consider MSI and pin interrupts * msi = 0: force pin interrupts
*/ staticint msi = 2;
module_param(msi, int, 0644);
MODULE_PARM_DESC(msi, "whether to use MSI or MSI-X");
/* * The driver enables offload as a default. * To disable it, use ofld_disable = 1.
*/
staticint ofld_disable = 0;
module_param(ofld_disable, int, 0644);
MODULE_PARM_DESC(ofld_disable, "whether to enable offload at init time or not");
/* * We have work elements that we need to cancel when an interface is taken * down. Normally the work elements would be executed by keventd but that * can deadlock because of linkwatch. If our close method takes the rtnl * lock and linkwatch is ahead of our work elements in keventd, linkwatch * will block keventd as it needs the rtnl lock, and we'll deadlock waiting * for our work to complete. Get our own work queue to solve this.
*/ struct workqueue_struct *cxgb3_wq;
/** * link_report - show link status and link speed/duplex * @dev: the port whose settings are to be reported * * Shows the link status, speed, and duplex of a port.
*/ staticvoid link_report(struct net_device *dev)
{ if (!netif_carrier_ok(dev))
netdev_info(dev, "link down\n"); else { constchar *s = "10Mbps"; conststruct port_info *p = netdev_priv(dev);
switch (p->link_config.speed) { case SPEED_10000:
s = "10Gbps"; break; case SPEED_1000:
s = "1000Mbps"; break; case SPEED_100:
s = "100Mbps"; break;
}
/** * t3_os_link_changed - handle link status changes * @adapter: the adapter associated with the link change * @port_id: the port index whose limk status has changed * @link_stat: the new status of the link * @speed: the new speed setting * @duplex: the new duplex setting * @pause: the new flow-control setting * * This is the OS-dependent handler for link status changes. The OS * neutral handler takes care of most of the processing for these events, * then calls this handler for any OS-specific processing.
*/ void t3_os_link_changed(struct adapter *adapter, int port_id, int link_stat, int speed, int duplex, int pause)
{ struct net_device *dev = adapter->port[port_id]; struct port_info *pi = netdev_priv(dev); struct cmac *mac = &pi->mac;
/* Skip changes from disabled ports. */ if (!netif_running(dev)) return;
if (link_stat != netif_carrier_ok(dev)) { if (link_stat) {
disable_tx_fifo_drain(adapter, pi);
/** * t3_os_phymod_changed - handle PHY module changes * @adap: the adapter associated with the link change * @port_id: the port index whose limk status has changed * * This is the OS-dependent handler for PHY module changes. It is * invoked when a PHY module is removed or inserted for any OS-specific * processing.
*/ void t3_os_phymod_changed(struct adapter *adap, int port_id)
{ staticconstchar *mod_str[] = {
NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
};
/** * link_start - enable a port * @dev: the device to enable * * Performs the MAC and PHY actions needed to enable a port.
*/ staticvoid link_start(struct net_device *dev)
{ struct port_info *pi = netdev_priv(dev); struct cmac *mac = &pi->mac;
/** * setup_rss - configure RSS * @adap: the adapter * * Sets up RSS to distribute packets to multiple receive queues. We * configure the RSS CPU lookup table to distribute to the number of HW * receive queues, and the response queue lookup table to narrow that * down to the response queues actually configured for each port. * We always configure the RSS mapping for two ports since the mapping * table has plenty of entries.
*/ staticvoid setup_rss(struct adapter *adap)
{ int i; unsignedint nq0 = adap2pinfo(adap, 0)->nqsets; unsignedint nq1 = adap->port[1] ? adap2pinfo(adap, 1)->nqsets : 1;
u8 cpus[SGE_QSETS + 1];
u16 rspq_map[RSS_TABLE_SIZE + 1];
for (i = 0; i < SGE_QSETS; ++i)
cpus[i] = i;
cpus[SGE_QSETS] = 0xff; /* terminator */
for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
rspq_map[i] = i % nq0;
rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
}
rspq_map[RSS_TABLE_SIZE] = 0xffff; /* terminator */
staticvoid ring_dbs(struct adapter *adap)
{ int i, j;
for (i = 0; i < SGE_QSETS; i++) { struct sge_qset *qs = &adap->sge.qs[i];
if (qs->adap) for (j = 0; j < SGE_TXQ_PER_SET; j++)
t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(qs->txq[j].cntxt_id));
}
}
staticvoid init_napi(struct adapter *adap)
{ int i;
for (i = 0; i < SGE_QSETS; i++) { struct sge_qset *qs = &adap->sge.qs[i];
if (qs->adap)
netif_napi_add(qs->netdev, &qs->napi, qs->napi.poll);
}
/* * netif_napi_add() can be called only once per napi_struct because it * adds each new napi_struct to a list. Be careful not to call it a * second time, e.g., during EEH recovery, by making a note of it.
*/
adap->flags |= NAPI_INIT;
}
/* * Wait until all NAPI handlers are descheduled. This includes the handlers of * both netdevices representing interfaces and the dummy ones for the extra * queues.
*/ staticvoid quiesce_rx(struct adapter *adap)
{ int i;
for (i = 0; i < SGE_QSETS; i++) if (adap->sge.qs[i].adap)
napi_disable(&adap->sge.qs[i].napi);
}
staticvoid enable_all_napi(struct adapter *adap)
{ int i; for (i = 0; i < SGE_QSETS; i++) if (adap->sge.qs[i].adap)
napi_enable(&adap->sge.qs[i].napi);
}
/** * setup_sge_qsets - configure SGE Tx/Rx/response queues * @adap: the adapter * * Determines how many sets of SGE queues to use and initializes them. * We support multiple queue sets per port if we have MSI-X, otherwise * just one queue set per port.
*/ staticint setup_sge_qsets(struct adapter *adap)
{ int i, j, err, irq_idx = 0, qset_idx = 0; unsignedint ntxq = SGE_TXQ_PER_SET;
/* * Sends an sk_buff to an offload queue driver * after dealing with any active network taps.
*/ staticinlineint offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
{ int ret;
local_bh_disable();
ret = t3_offload_tx(tdev, skb);
local_bh_enable(); return ret;
}
staticint send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo, int hi, int port)
{ struct sk_buff *skb; struct mngt_pktsched_wr *req; int ret;
skb = alloc_skb(sizeof(*req), GFP_KERNEL); if (!skb)
skb = adap->nofail_skb; if (!skb) return -ENOMEM;
ret = request_firmware(&tpsram, buf, dev); if (ret < 0) {
dev_err(dev, "could not load TP SRAM: unable to load %s\n",
buf); return ret;
}
ret = t3_check_tpsram(adap, tpsram->data, tpsram->size); if (ret) goto release_tpsram;
ret = t3_set_proto_sram(adap, tpsram->data); if (ret == 0)
dev_info(dev, "successful update of protocol engine " "to %d.%d.%d\n",
TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); else
dev_err(dev, "failed to update of protocol engine %d.%d.%d\n",
TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); if (ret)
dev_err(dev, "loading protocol SRAM failed\n");
release_tpsram:
release_firmware(tpsram);
return ret;
}
/** * t3_synchronize_rx - wait for current Rx processing on a port to complete * @adap: the adapter * @p: the port * * Ensures that current Rx processing on any of the queues associated with * the given port completes before returning. We do this by acquiring and * releasing the locks of the response queues associated with the port.
*/ staticvoid t3_synchronize_rx(struct adapter *adap, conststruct port_info *p)
{ int i;
for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_rspq *q = &adap->sge.qs[i].rspq;
if (adapter->params.rev > 0) {
t3_set_vlan_accel(adapter, 1 << pi->port_id,
features & NETIF_F_HW_VLAN_CTAG_RX);
} else { /* single control for all ports */ unsignedint i, have_vlans = features & NETIF_F_HW_VLAN_CTAG_RX;
/** * cxgb_up - enable the adapter * @adap: adapter being enabled * * Called when the first port is enabled, this function performs the * actions necessary to make an adapter operational, such as completing * the initialization of HW modules, and enabling interrupts. * * Must be called with the rtnl lock held.
*/ staticint cxgb_up(struct adapter *adap)
{ int i, err;
/* * Clear interrupts now to catch errors if t3_init_hw fails. * We clear them again later as initialization may trigger * conditions that can interrupt.
*/
t3_intr_clear(adap);
/* * Release resources when all the ports and offloading have been stopped.
*/ staticvoid cxgb_down(struct adapter *adapter, int on_wq)
{
t3_sge_stop(adapter);
spin_lock_irq(&adapter->work_lock); /* sync with PHY intr task */
t3_intr_disable(adapter);
spin_unlock_irq(&adapter->work_lock);
free_irq_resources(adapter);
quiesce_rx(adapter);
t3_sge_stop(adapter); if (!on_wq)
flush_workqueue(cxgb3_wq);/* wait for external IRQ handler */
}
if (sysfs_create_group(&tdev->lldev->dev.kobj, &offload_attr_group))
dev_dbg(&dev->dev, "cannot create sysfs group\n");
/* Call back all registered clients */
cxgb3_add_clients(tdev);
out: /* restore them in case the offload module has changed them */ if (err) {
t3_tp_set_offload_mode(adapter, 0);
clear_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map);
cxgb3_set_dummy_ops(tdev);
} return err;
}
/* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision * bit 31: set for PCIe cards
*/
regs->version = 3 | (ap->params.rev << 10) | (is_pcie(ap) << 31);
/* * We skip the MAC statistics registers because they are clear-on-read. * Also reading multi-register stats would need to synchronize with the * periodic mac stats accumulation. Hard to justify the complexity.
*/
memset(buf, 0, T3_REGMAP_SIZE);
reg_block_dump(ap, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
reg_block_dump(ap, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
reg_block_dump(ap, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
reg_block_dump(ap, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
reg_block_dump(ap, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
reg_block_dump(ap, buf, A_XGM_SERDES_STATUS0,
XGM_REG(A_XGM_SERDES_STAT3, 1));
reg_block_dump(ap, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
}
staticint speed_duplex_to_caps(int speed, int duplex)
{ int cap = 0;
switch (speed) { case SPEED_10: if (duplex == DUPLEX_FULL)
cap = SUPPORTED_10baseT_Full; else
cap = SUPPORTED_10baseT_Half; break; case SPEED_100: if (duplex == DUPLEX_FULL)
cap = SUPPORTED_100baseT_Full; else
cap = SUPPORTED_100baseT_Half; break; case SPEED_1000: if (duplex == DUPLEX_FULL)
cap = SUPPORTED_1000baseT_Full; else
cap = SUPPORTED_1000baseT_Half; break; case SPEED_10000: if (duplex == DUPLEX_FULL)
cap = SUPPORTED_10000baseT_Full;
} return cap;
}
if (!(lc->supported & SUPPORTED_Autoneg)) { /* * PHY offers a single speed/duplex. See if that's what's * being requested.
*/ if (cmd->base.autoneg == AUTONEG_DISABLE) {
u32 speed = cmd->base.speed; int cap = speed_duplex_to_caps(speed, cmd->base.duplex); if (lc->supported & cap) return0;
} return -EINVAL;
}
if (cmd->base.autoneg == AUTONEG_DISABLE) {
u32 speed = cmd->base.speed; int cap = speed_duplex_to_caps(speed, cmd->base.duplex);
if (!is_offload(adapter)) return -EOPNOTSUPP; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (adapter->flags & FULL_INIT_DONE) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; if (m.cmd != CHELSIO_SET_PM) return -EINVAL; if (!is_power_of_2(m.rx_pg_sz) ||
!is_power_of_2(m.tx_pg_sz)) return -EINVAL; /* not power of 2 */ if (!(m.rx_pg_sz & 0x14000)) return -EINVAL; /* not 16KB or 64KB */ if (!(m.tx_pg_sz & 0x1554000)) return -EINVAL; if (m.tx_num_pg == -1)
m.tx_num_pg = p->tx_num_pgs; if (m.rx_num_pg == -1)
m.rx_num_pg = p->rx_num_pgs; if (m.tx_num_pg % 24 || m.rx_num_pg % 24) return -EINVAL; if (m.rx_num_pg * m.rx_pg_sz > p->chan_rx_size ||
m.tx_num_pg * m.tx_pg_sz > p->chan_tx_size) return -EINVAL;
p->rx_pg_size = m.rx_pg_sz;
p->tx_pg_size = m.tx_pg_sz;
p->rx_num_pgs = m.rx_num_pg;
p->tx_num_pgs = m.tx_num_pg; break;
} case CHELSIO_GET_MEM:{ struct ch_mem_range t; struct mc7 *mem;
u64 buf[32];
if (!is_offload(adapter)) return -EOPNOTSUPP; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!(adapter->flags & FULL_INIT_DONE)) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; if (t.cmd != CHELSIO_GET_MEM) return -EINVAL; if ((t.addr & 7) || (t.len & 7)) return -EINVAL; if (t.mem_id == MEM_CM)
mem = &adapter->cm; elseif (t.mem_id == MEM_PMRX)
mem = &adapter->pmrx; elseif (t.mem_id == MEM_PMTX)
mem = &adapter->pmtx; else return -EINVAL;
/* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision
*/
t.version = 3 | (adapter->params.rev << 10); if (copy_to_user(useraddr, &t, sizeof(t))) return -EFAULT;
/* * Read 256 bytes at a time as len can be large and we don't * want to use huge intermediate buffers.
*/
useraddr += sizeof(t); /* advance to start of buffer */ while (t.len) { unsignedint chunk =
min_t(unsignedint, t.len, sizeof(buf));
ret =
t3_mc7_bd_read(mem, t.addr / 8, chunk / 8,
buf); if (ret) return ret; if (copy_to_user(useraddr, buf, chunk)) return -EFAULT;
useraddr += chunk;
t.addr += chunk;
t.len -= chunk;
} break;
} case CHELSIO_SET_TRACE_FILTER:{ struct ch_trace t; conststruct trace_params *tp;
if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!offload_running(adapter)) return -EAGAIN; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; if (t.cmd != CHELSIO_SET_TRACE_FILTER) return -EINVAL;
static netdev_features_t cxgb_fix_features(struct net_device *dev,
netdev_features_t features)
{ /* * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx.
*/ if (features & NETIF_F_HW_VLAN_CTAG_RX)
features |= NETIF_F_HW_VLAN_CTAG_TX; else
features &= ~NETIF_F_HW_VLAN_CTAG_TX;
/* Accumulate MAC stats if needed */ if (!p->linkpoll_period ||
(adapter->check_task_cnt * p->linkpoll_period) / 10 >=
p->stats_update_period) {
mac_stats_update(adapter);
adapter->check_task_cnt = 0;
}
if (p->rev == T3_REV_B2)
check_t3b2_mac(adapter);
/* * Scan the XGMAC's to check for various conditions which we want to * monitor in a periodic polling manner rather than via an interrupt * condition. This is used for conditions which would otherwise flood * the system with interrupts and we only really need to know that the * conditions are "happening" ... For each condition we count the * detection of the condition and reset it for the next polling loop.
*/
for_each_port(adapter, port) { struct cmac *mac = &adap2pinfo(adapter, port)->mac;
u32 cause;
cause = t3_read_reg(adapter, A_XGM_INT_CAUSE + mac->offset);
reset = 0; if (cause & F_RXFIFO_OVERFLOW) {
mac->stats.rx_fifo_ovfl++;
reset |= F_RXFIFO_OVERFLOW;
}
/* * We do the same as above for FL_EMPTY interrupts.
*/
status = t3_read_reg(adapter, A_SG_INT_CAUSE);
reset = 0;
if (status & F_FLEMPTY) { struct sge_qset *qs = &adapter->sge.qs[0]; int i = 0;
reset |= F_FLEMPTY;
v = (t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS) >> S_FL0EMPTY) & 0xffff;
while (v) {
qs->fl[i].empty += (v & 1); if (i)
qs++;
i ^= 1;
v >>= 1;
}
}
t3_write_reg(adapter, A_SG_INT_CAUSE, reset);
/* Schedule the next check update if any port is active. */
spin_lock_irq(&adapter->work_lock); if (adapter->open_device_map & PORT_MASK)
schedule_chk_task(adapter);
spin_unlock_irq(&adapter->work_lock);
}
/* * Sleep a while before ringing the driver qset dbs. * The delay is between 1000-2023 usecs.
*/
get_random_bytes(&r, 2);
delay += r & 1023;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(delay));
ring_dbs(adapter);
}
/* * Processes external (PHY) interrupts in process context.
*/ staticvoid ext_intr_task(struct work_struct *work)
{ struct adapter *adapter = container_of(work, struct adapter,
ext_intr_handler_task); int i;
/* * Interrupt-context handler for external (PHY) interrupts.
*/ void t3_os_ext_intr_handler(struct adapter *adapter)
{ /* * Schedule a task to handle external interrupts as they may be slow * and we use a mutex to protect MDIO registers. We disable PHY * interrupts in the meantime and let the task reenable them when * it's done.
*/
spin_lock(&adapter->work_lock); if (adapter->slow_intr_mask) {
adapter->slow_intr_mask &= ~F_T3DBG;
t3_write_reg(adapter, A_PL_INT_ENABLE0,
adapter->slow_intr_mask);
queue_work(cxgb3_wq, &adapter->ext_intr_handler_task);
}
spin_unlock(&adapter->work_lock);
}
/** * t3_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device * @state: The current pci connection state * * This function is called after a PCI bus error affecting * this device has been detected.
*/ static pci_ers_result_t t3_io_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{ struct adapter *adapter = pci_get_drvdata(pdev);
if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT;
t3_adapter_error(adapter, 0, 0);
/* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET;
}
/** * t3_io_slot_reset - called after the pci bus has been reset. * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot.
*/ static pci_ers_result_t t3_io_slot_reset(struct pci_dev *pdev)
{ struct adapter *adapter = pci_get_drvdata(pdev);
if (!t3_reenable_adapter(adapter)) return PCI_ERS_RESULT_RECOVERED;
return PCI_ERS_RESULT_DISCONNECT;
}
/** * t3_io_resume - called when traffic can start flowing again. * @pdev: Pointer to PCI device * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation.
*/ staticvoid t3_io_resume(struct pci_dev *pdev)
{ struct adapter *adapter = pci_get_drvdata(pdev);
/* * Set the number of qsets based on the number of CPUs and the number of ports, * not to exceed the number of available qsets, assuming there are enough qsets * per port in HW.
*/ staticvoid set_nqsets(struct adapter *adap)
{ int i, j = 0; int num_cpus = netif_get_num_default_rss_queues(); int hwports = adap->params.nports; int nqsets = adap->msix_nvectors - 1;
err = pci_request_regions(pdev, DRV_NAME); if (err) { /* Just info, some other driver may have claimed the device. */
dev_info(&pdev->dev, "cannot obtain PCI resources\n"); goto out_disable_device;
}
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) {
dev_err(&pdev->dev, "no usable DMA configuration\n"); goto out_release_regions;
}
/* * The card is now ready to go. If any errors occur during device * registration we do not fail the whole card but rather proceed only * with the ports we manage to register successfully. However we must * register at least one net device.
*/
for_each_port(adapter, i) {
err = register_netdev(adapter->port[i]); if (err)
dev_warn(&pdev->dev, "cannot register net device %s, skipping\n",
adapter->port[i]->name); else { /* * Change the name we use for messages to the name of * the first successfully registered interface.
*/ if (!adapter->registered_device_map)
adapter->name = adapter->port[i]->name;
__set_bit(i, &adapter->registered_device_map);
}
} if (!adapter->registered_device_map) {
dev_err(&pdev->dev, "could not register any net devices\n");
err = -ENODEV; goto out_free_dev;
}
if (is_offload(adapter)) {
cxgb3_adapter_unofld(adapter); if (test_bit(OFFLOAD_DEVMAP_BIT,
&adapter->open_device_map))
offload_close(&adapter->tdev);
}
for_each_port(adapter, i) if (test_bit(i, &adapter->registered_device_map))
unregister_netdev(adapter->port[i]);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.