/* starfire.c: Linux device driver for the Adaptec Starfire network adapter. */ /* Written 1998-2000 by Donald Becker.
Current maintainer is Ion Badulescu <ionut ta badula tod org>. Please send all bug reports to me, and not to Donald Becker, as this code has been heavily modified from Donald's original version.
This software may be used and distributed according to the terms of the GNU General Public License (GPL), incorporated herein by reference. Drivers based on or derived from this code fall under the GPL and must retain the authorship, copyright and license notice. This file is not a complete program and may only be used when the entire operating system is licensed under the GPL.
The information below comes from Donald Becker's original driver:
The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 Annapolis MD 21403
/* * The current frame processor firmware fails to checksum a fragment * of length 1. If and when this is fixed, the #define below can be removed.
*/ #define HAS_BROKEN_FIRMWARE
/* * If using the broken firmware, data must be padded to the next 32-bit boundary.
*/ #ifdef HAS_BROKEN_FIRMWARE #define PADDING_MASK 3 #endif
/* * Define this if using the driver with the zero-copy patch
*/ #define ZEROCOPY
/* The user-configurable values.
These may be modified when a driver module is loaded.*/
/* Used for tuning interrupt latency vs. overhead. */ staticint intr_latency; staticint small_frames;
staticint debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ staticint max_interrupt_work = 20; staticint mtu; /* Maximum number of multicast addresses to filter (vs. rx-all-multicast).
The Starfire has a 512 element hash table based on the Ethernet CRC. */ staticconstint multicast_filter_limit = 512; /* Whether to do TCP/UDP checksums in hardware */ staticint enable_hw_cksum = 1;
#define PKT_BUF_SZ 1536/* Size of each temporary Rx buffer.*/ /* * Set the copy breakpoint for the copy-only-tiny-frames scheme. * Setting to > 1518 effectively disables this feature. * * NOTE: * The ia64 doesn't allow for unaligned loads even of integers being * misaligned on a 2 byte boundary. Thus always force copying of * packets as the starfire doesn't allow for misaligned DMAs ;-( * 23/10/2000 - Jes * * The Alpha and the Sparc don't like unaligned loads, either. On Sparc64, * at least, having unaligned frames leads to a rather serious performance * penalty. -Ion
*/ #ifdefined(__ia64__) || defined(__alpha__) || defined(__sparc__) staticint rx_copybreak = PKT_BUF_SZ; #else staticint rx_copybreak /* = 0 */; #endif
/* PCI DMA burst size -- on sparc64 we want to force it to 64 bytes, on the others the default of 128 is fine. */ #ifdef __sparc__ #define DMA_BURST_SIZE 64 #else #define DMA_BURST_SIZE 128 #endif
/* Operational parameters that are set at compile time. */
/* The "native" ring sizes are either 256 or 2048. However in some modes a descriptor may be marked to wrap the ring earlier.
*/ #define RX_RING_SIZE 256 #define TX_RING_SIZE 32 /* The completion queues are fixed at 1024 entries i.e. 4K or 8KB. */ #define DONE_Q_SIZE 1024 /* All queues must be aligned on a 256-byte boundary */ #define QUEUE_ALIGN 256
/* Operational parameters that usually are not changed. */ /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (2 * HZ)
This driver is for the Adaptec 6915 "Starfire" 64 bit PCI Ethernet adapter.
II. Board-specific settings
III. Driver operation
IIIa. Ring buffers
The Starfire hardware uses multiple fixed-size descriptor queues/rings. The ring sizes are set fixed by the hardware, but may optionally be wrapped earlier by the END bit in the descriptor. This driver uses that hardware queue size for the Rx ring, where a large number of entries has no ill effect beyond increases the potential backlog. The Tx ring is wrapped with the END bit, since a large hardware Tx queue disables the queue layer priority ordering and we have no mechanism to utilize the hardware two-level priority queue. When modifying the RX/TX_RING_SIZE pay close attention to page sizes and the ring-empty warning levels.
IIIb/c. Transmit/Receive Structure
See the Adaptec manual for the many possible structures, and options for each structure. There are far too many to document all of them here.
For transmit this driver uses type 0/1 transmit descriptors (depending on the 32/64 bitness of the architecture), and relies on automatic minimum-length padding. It does not use the completion queue consumer index, but instead checks for non-zero status entries.
For receive this driver uses type 2/3 receive descriptors. The driver allocates full frame size skbuffs for the Rx ring buffers, so all frames should fit in a single descriptor. The driver does not use the completion queue consumer index, but instead checks for non-zero status entries.
When an incoming frame is less than RX_COPYBREAK bytes long, a fresh skbuff is allocated and the frame is copied to the new skbuff. When the incoming frame is larger, the skbuff is passed directly up the protocol stack. Buffers consumed this way are replaced by newly allocated skbuffs in a later phase of receive.
A notable aspect of operation is that unaligned buffers are not permitted by the Starfire hardware. Thus the IP header at offset 14 in an ethernet frame isn't longword aligned, which may cause problems on some machine e.g. Alphas and IA64. For these architectures, the driver is forced to copy the frame into a new skbuff unconditionally. Copied frames are put into the skbuff at an offset of "+2", thus 16-byte aligning the IP header.
IIId. Synchronization
The driver runs as two independent, single-threaded flows of control. One is the send-packet routine, which enforces single-threaded use by the dev->tbusy flag. The other thread is the interrupt handler, which is single threaded by the hardware and interrupt handling software.
The send packet thread has partial control over the Tx ring and the netif_queue status. If the number of free Tx slots in the ring falls below a certain number (currently hardcoded to 4), it signals the upper layer to stop the queue.
The interrupt handler has exclusive control over the Rx ring and records stats from the Tx ring. After reaping the stats, it marks the Tx queue entry as empty by incrementing the dirty_tx mark. Iff the netif_queue is stopped and the number of free Tx slow is above the threshold, it signals the upper layer to restart the queue.
- StopOnPerr is broken, don't enable - Hardware ethernet padding exposes random data, perform software padding instead (unverified -- works correctly for all the hardware I have)
/* A chip capabilities table, matching the CH_xxx entries in xxx_pci_tbl[] above. */ staticconststruct chip_info { constchar *name; int drv_flags;
} netdrv_tbl[] = {
{ "Adaptec Starfire 6915", CanHaveMII },
};
/* Offsets to the device registers. Unlike software-only systems, device drivers interact with complex hardware. It's not useful to define symbolic names for every register bit in the device. The name can only partially document the semantics and make the driver longer and more difficult to read. In general, only the important configuration values or bits changed multiple times should be defined symbolically.
*/ enum register_offsets {
PCIDeviceConfig=0x50040, GenCtrl=0x50070, IntrTimerCtrl=0x50074,
IntrClear=0x50080, IntrStatus=0x50084, IntrEnable=0x50088,
MIICtrl=0x52000, TxStationAddr=0x50120, EEPROMCtrl=0x51000,
GPIOCtrl=0x5008C, TxDescCtrl=0x50090,
TxRingPtr=0x50098, HiPriTxRingPtr=0x50094, /* Low and High priority. */
TxRingHiAddr=0x5009C, /* 64 bit address extension. */
TxProducerIdx=0x500A0, TxConsumerIdx=0x500A4,
TxThreshold=0x500B0,
CompletionHiAddr=0x500B4, TxCompletionAddr=0x500B8,
RxCompletionAddr=0x500BC, RxCompletionQ2Addr=0x500C0,
CompletionQConsumerIdx=0x500C4, RxDMACtrl=0x500D0,
RxDescQCtrl=0x500D4, RxDescQHiAddr=0x500DC, RxDescQAddr=0x500E0,
RxDescQIdx=0x500E8, RxDMAStatus=0x500F0, RxFilterMode=0x500F4,
TxMode=0x55000, VlanType=0x55064,
PerfFilterTable=0x56000, HashTable=0x56100,
TxGfpMem=0x58000, RxGfpMem=0x5a000,
};
/* * Bits in the interrupt status/mask registers. * Warning: setting Intr[Ab]NormalSummary in the IntrEnable register * enables all the interrupt sources that are or'ed into those status bits.
*/ enum intr_status_bits {
IntrLinkChange=0xf0000000, IntrStatsMax=0x08000000,
IntrAbnormalSummary=0x02000000, IntrGeneralTimer=0x01000000,
IntrSoftware=0x800000, IntrRxComplQ1Low=0x400000,
IntrTxComplQLow=0x200000, IntrPCI=0x100000,
IntrDMAErr=0x080000, IntrTxDataLow=0x040000,
IntrRxComplQ2Low=0x020000, IntrRxDescQ1Low=0x010000,
IntrNormalSummary=0x8000, IntrTxDone=0x4000,
IntrTxDMADone=0x2000, IntrTxEmpty=0x1000,
IntrEarlyRxQ2=0x0800, IntrEarlyRxQ1=0x0400,
IntrRxQ2Done=0x0200, IntrRxQ1Done=0x0100,
IntrRxGFPDead=0x80, IntrRxDescQ2Low=0x40,
IntrNoTxCsum=0x20, IntrTxBadID=0x10,
IntrHiPriTxBadID=0x08, IntrRxGfp=0x04,
IntrTxGfp=0x02, IntrPCIPad=0x01, /* not quite bits */
IntrRxDone=IntrRxQ2Done | IntrRxQ1Done,
IntrRxEmpty=IntrRxDescQ1Low | IntrRxDescQ2Low,
IntrNormalMask=0xff00, IntrAbnormalMask=0x3ff00fe,
};
/* Frequently used values: keep some adjacent for cache effect. */
spinlock_t lock; unsignedint cur_rx, dirty_rx; /* Producer/consumer ring indices */ unsignedint cur_tx, dirty_tx, reap_tx; unsignedint rx_buf_sz; /* Based on MTU+slack. */ /* These values keep track of the transceiver/media in use. */ int speed100; /* Set if speed == 100MBit. */
u32 tx_mode;
u32 intr_timer_ctrl;
u8 tx_threshold; /* MII transceiver section. */ struct mii_if_info mii_if; /* MII lib hooks/info */ int phy_cnt; /* MII device addresses. */ unsignedchar phys[PHY_CNT]; /* MII device addresses. */ void __iomem *base;
};
if (debug > 1)
printk(KERN_DEBUG "%s: Filling in the station address.\n", dev->name);
/* Fill both the Tx SA register and the Rx perfect filter. */ for (i = 0; i < 6; i++)
writeb(dev->dev_addr[i], ioaddr + TxStationAddr + 5 - i); /* The first entry is special because it bypasses the VLAN filter.
Don't use it. */
writew(0, ioaddr + PerfFilterTable);
writew(0, ioaddr + PerfFilterTable + 4);
writew(0, ioaddr + PerfFilterTable + 8); for (i = 1; i < 16; i++) { const __be16 *eaddrs = (const __be16 *)dev->dev_addr; void __iomem *setup_frm = ioaddr + PerfFilterTable + i * 16;
writew(be16_to_cpu(eaddrs[2]), setup_frm); setup_frm += 4;
writew(be16_to_cpu(eaddrs[1]), setup_frm); setup_frm += 4;
writew(be16_to_cpu(eaddrs[0]), setup_frm); setup_frm += 8;
}
/* Initialize other registers. */ /* Configure the PCI bus bursts and FIFO thresholds. */
np->tx_mode = TxFlowEnable|RxFlowEnable|PadEnable; /* modified when link is up. */
writel(MiiSoftReset | np->tx_mode, ioaddr + TxMode);
udelay(1000);
writel(np->tx_mode, ioaddr + TxMode);
np->tx_threshold = 4;
writel(np->tx_threshold, ioaddr + TxThreshold);
/* Load Rx/Tx firmware into the frame processors */ for (i = 0; i < rx_size; i++)
writel(be32_to_cpup(&fw_rx_data[i]), ioaddr + RxGfpMem + i * 4); for (i = 0; i < tx_size; i++)
writel(be32_to_cpup(&fw_tx_data[i]), ioaddr + TxGfpMem + i * 4); if (enable_hw_cksum) /* Enable the Rx and Tx units, and the Rx/Tx frame processors. */
writel(TxEnable|TxGFPEnable|RxEnable|RxGFPEnable, ioaddr + GenCtrl); else /* Enable the Rx and Tx units only. */
writel(TxEnable|RxEnable, ioaddr + GenCtrl);
if (debug > 1)
printk(KERN_DEBUG "%s: Done netdev_open().\n",
dev->name);
/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ staticvoid init_ring(struct net_device *dev)
{ struct netdev_private *np = netdev_priv(dev); int i;
/* Fill in the Rx buffers. Handle allocation failure gracefully. */ for (i = 0; i < RX_RING_SIZE; i++) { struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz);
np->rx_info[i].skb = skb; if (skb == NULL) break;
np->rx_info[i].mapping = dma_map_single(&np->pci_dev->dev,
skb->data,
np->rx_buf_sz,
DMA_FROM_DEVICE); if (dma_mapping_error(&np->pci_dev->dev, np->rx_info[i].mapping)) {
dev_kfree_skb(skb);
np->rx_info[i].skb = NULL; break;
} /* Grrr, we cannot offset to correctly align the IP header. */
np->rx_ring[i].rxaddr = cpu_to_dma(np->rx_info[i].mapping | RxDescValid);
}
writew(i - 1, np->base + RxDescQIdx);
np->dirty_rx = (unsignedint)(i - RX_RING_SIZE);
/* Clear the remainder of the Rx buffer ring. */ for ( ; i < RX_RING_SIZE; i++) {
np->rx_ring[i].rxaddr = 0;
np->rx_info[i].skb = NULL;
np->rx_info[i].mapping = 0;
} /* Mark the last entry as wrapping the ring. */
np->rx_ring[RX_RING_SIZE - 1].rxaddr |= cpu_to_dma(RxDescEndRing);
/* Clear the completion rings. */ for (i = 0; i < DONE_Q_SIZE; i++) {
np->rx_done_q[i].status = 0;
np->tx_done_q[i].status = 0;
}
for (i = 0; i < TX_RING_SIZE; i++)
memset(&np->tx_info[i], 0, sizeof(np->tx_info[i]));
}
/* * be cautious here, wrapping the queue has weird semantics * and we may not have enough slots even when it seems we do.
*/ if ((np->cur_tx - np->dirty_tx) + skb_num_frags(skb) * 2 > TX_RING_SIZE) {
netif_stop_queue(dev); return NETDEV_TX_BUSY;
}
/* Non-x86: explicitly flush descriptor cache lines here. */ /* Ensure all descriptors are written back before the transmit is
initiated. - Jes */
wmb();
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */ static irqreturn_t intr_handler(int irq, void *dev_instance)
{ struct net_device *dev = dev_instance; struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; int boguscnt = max_interrupt_work; int consumer; int tx_status; int handled = 0;
do {
u32 intr_status = readl(ioaddr + IntrClear);
if (debug > 4)
printk(KERN_DEBUG "%s: Interrupt status %#8.8x.\n",
dev->name, intr_status);
if (intr_status == 0 || intr_status == (u32) -1) break;
handled = 1;
if (intr_status & (IntrRxDone | IntrRxEmpty)) {
u32 enable;
/* Scavenge the skbuff list based on the Tx-done queue. There are redundant checks here that may be cleaned up
after the driver has proven to be reliable. */
consumer = readl(ioaddr + TxConsumerIdx); if (debug > 3)
printk(KERN_DEBUG "%s: Tx Consumer index is %d.\n",
dev->name, consumer);
if (netif_queue_stopped(dev) &&
(np->cur_tx - np->dirty_tx + 4 < TX_RING_SIZE)) { /* The ring is no longer full, wake the queue. */
netif_wake_queue(dev);
}
/* Stats overflow */ if (intr_status & IntrStatsMax)
get_stats(dev);
/* Media change interrupt. */ if (intr_status & IntrLinkChange)
netdev_media_change(dev);
if (--boguscnt < 0) { if (debug > 1)
printk(KERN_WARNING "%s: Too much work at interrupt, " "status=%#8.8x.\n",
dev->name, intr_status); break;
}
} while (1);
/* * This routine is logically part of the interrupt/poll handler, but separated * for clarity and better register allocation.
*/ staticint __netdev_rx(struct net_device *dev, int *quota)
{ struct netdev_private *np = netdev_priv(dev);
u32 desc_status; int retcode = 0;
/* If EOP is set on the next entry, it's a new packet. Send it up. */ while ((desc_status = le32_to_cpu(np->rx_done_q[np->rx_done].status)) != 0) { struct sk_buff *skb;
u16 pkt_len; int entry;
rx_done_desc *desc = &np->rx_done_q[np->rx_done];
if (debug > 4)
printk(KERN_DEBUG " netdev_rx() status of %d was %#8.8x.\n", np->rx_done, desc_status); if (!(desc_status & RxOK)) { /* There was an error. */ if (debug > 2)
printk(KERN_DEBUG " netdev_rx() Rx error was %#8.8x.\n", desc_status);
dev->stats.rx_errors++; if (desc_status & RxFIFOErr)
dev->stats.rx_fifo_errors++; goto next_rx;
}
if (*quota <= 0) { /* out of rx quota */
retcode = 1; goto out;
}
(*quota)--;
if (debug > 4)
printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d, quota %d.\n", pkt_len, *quota); /* Check if the packet is long enough to accept without copying
to a minimally-sized skbuff. */ if (pkt_len < rx_copybreak &&
(skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
skb_reserve(skb, 2); /* 16 byte align the IP header */
dma_sync_single_for_cpu(&np->pci_dev->dev,
np->rx_info[entry].mapping,
pkt_len, DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len);
dma_sync_single_for_device(&np->pci_dev->dev,
np->rx_info[entry].mapping,
pkt_len, DMA_FROM_DEVICE);
skb_put(skb, pkt_len);
} else {
dma_unmap_single(&np->pci_dev->dev,
np->rx_info[entry].mapping,
np->rx_buf_sz, DMA_FROM_DEVICE);
skb = np->rx_info[entry].skb;
skb_put(skb, pkt_len);
np->rx_info[entry].skb = NULL;
np->rx_info[entry].mapping = 0;
} #ifndef final_version /* Remove after testing. */ /* You will want this info for the initial debug. */ if (debug > 5) {
printk(KERN_DEBUG " Rx data %pM %pM %2.2x%2.2x.\n",
skb->data, skb->data + 6,
skb->data[12], skb->data[13]);
} #endif
skb->protocol = eth_type_trans(skb, dev); #ifdef VLAN_SUPPORT if (debug > 4)
printk(KERN_DEBUG " netdev_rx() status2 of %d was %#4.4x.\n", np->rx_done, le16_to_cpu(desc->status2)); #endif if (le16_to_cpu(desc->status2) & 0x0100) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
dev->stats.rx_compressed++;
} /* * This feature doesn't seem to be working, at least * with the two firmware versions I have. If the GFP sees * an IP fragment, it either ignores it completely, or reports * "bad checksum" on it. * * Maybe I missed something -- corrections are welcome. * Until then, the printk stays. :-) -Ion
*/ elseif (le16_to_cpu(desc->status2) & 0x0040) {
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = le16_to_cpu(desc->csum);
printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2));
} #ifdef VLAN_SUPPORT if (le16_to_cpu(desc->status2) & 0x0200) {
u16 vlid = le16_to_cpu(desc->vlanid);
if (debug > 1) {
printk(KERN_DEBUG "%s: Shutting down ethercard, Intr status %#8.8x.\n",
dev->name, (int) readl(ioaddr + IntrStatus));
printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n",
dev->name, np->cur_tx, np->dirty_tx,
np->cur_rx, np->dirty_rx);
}
/* Disable interrupts by clearing the interrupt mask. */
writel(0, ioaddr + IntrEnable);
/* Stop the chip's Tx and Rx processes. */
writel(0, ioaddr + GenCtrl);
readl(ioaddr + GenCtrl);
if (debug > 5) {
printk(KERN_DEBUG" Tx ring at %#llx:\n",
(longlong) np->tx_ring_dma); for (i = 0; i < 8/* TX_RING_SIZE is huge! */; i++)
printk(KERN_DEBUG " #%d desc. %#8.8x %#llx -> %#8.8x.\n",
i, le32_to_cpu(np->tx_ring[i].status),
(longlong) dma_to_cpu(np->tx_ring[i].addr),
le32_to_cpu(np->tx_done_q[i].status));
printk(KERN_DEBUG " Rx ring at %#llx -> %p:\n",
(longlong) np->rx_ring_dma, np->rx_done_q); if (np->rx_done_q) for (i = 0; i < 8/* RX_RING_SIZE */; i++) {
printk(KERN_DEBUG " #%d desc. %#llx -> %#8.8x\n",
i, (longlong) dma_to_cpu(np->rx_ring[i].rxaddr), le32_to_cpu(np->rx_done_q[i].status));
}
}
free_irq(np->pci_dev->irq, dev);
/* Free all the skbuffs in the Rx queue. */ for (i = 0; i < RX_RING_SIZE; i++) {
np->rx_ring[i].rxaddr = cpu_to_dma(0xBADF00D0); /* An invalid address. */ if (np->rx_info[i].skb != NULL) {
dma_unmap_single(&np->pci_dev->dev,
np->rx_info[i].mapping,
np->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb(np->rx_info[i].skb);
}
np->rx_info[i].skb = NULL;
np->rx_info[i].mapping = 0;
} for (i = 0; i < TX_RING_SIZE; i++) { struct sk_buff *skb = np->tx_info[i].skb; if (skb == NULL) continue;
dma_unmap_single(&np->pci_dev->dev, np->tx_info[i].mapping,
skb_first_frag_len(skb), DMA_TO_DEVICE);
np->tx_info[i].mapping = 0;
dev_kfree_skb(skb);
np->tx_info[i].skb = NULL;
}
staticint __init starfire_init (void)
{ /* when a module, this is printed whether or not devices are found in probe */ #ifdef MODULE
printk(KERN_INFO DRV_NAME ": polling (NAPI) enabled\n"); #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.