Quelle cassini.c Sprache: C

// SPDX-License-Identifier: GPL-2.0+
/* cassini.c: Sun Microsystems Cassini(+) ethernet driver.
*
* Copyright (C) 2004 Sun Microsystems Inc.
* Copyright (C) 2003 Adrian Sun (asun@darksunrising.com)
*
* This driver uses the sungem driver (c) David Miller
* (davem@redhat.com) as its basis.
*
* The cassini chip has a number of features that distinguish it from
* the gem chip:
*  4 transmit descriptor rings that are used for either QoS (VLAN) or
*      load balancing (non-VLAN mode)
*  batching of multiple packets
*  multiple CPU dispatching
*  page-based RX descriptor engine with separate completion rings
*  Gigabit support (GMII and PCS interface)
*  MIF link up/down detection works
*
* RX is handled by page sized buffers that are attached as fragments to
* the skb. here's what's done:
*  -- driver allocates pages at a time and keeps reference counts
*     on them.
*  -- the upper protocol layers assume that the header is in the skb
*     itself. as a result, cassini will copy a small amount (64 bytes)
*     to make them happy.
*  -- driver appends the rest of the data pages as frags to skbuffs
*     and increments the reference count
*  -- on page reclamation, the driver swaps the page with a spare page.
*     if that page is still in use, it frees its reference to that page,
*     and allocates a new page for use. otherwise, it just recycles the
*     page.
*
* NOTE: cassini can parse the header. however, it's not worth it
*       as long as the network stack requires a header copy.
*
* TX has 4 queues. currently these queues are used in a round-robin
* fashion for load balancing. They can also be used for QoS. for that
* to work, however, QoS information needs to be exposed down to the driver
* level so that subqueues get targeted to particular transmit rings.
* alternatively, the queues can be configured via use of the all-purpose
* ioctl.
*
* RX DATA: the rx completion ring has all the info, but the rx desc
* ring has all of the data. RX can conceivably come in under multiple
* interrupts, but the INT# assignment needs to be set up properly by
* the BIOS and conveyed to the driver. PCI BIOSes don't know how to do
* that. also, the two descriptor rings are designed to distinguish between
* encrypted and non-encrypted packets, but we use them for buffering
* instead.
*
* by default, the selective clear mask is set up to process rx packets.
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/list.h>
#include <linux/dma-mapping.h>

#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/skbuff_ref.h>
#include <linux/ethtool.h>
#include <linux/crc32.h>
#include <linux/random.h>
#include <linux/mii.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/mutex.h>
#include <linux/firmware.h>

#include <net/checksum.h>

#include <linux/atomic.h>
#include <asm/io.h>
#include <asm/byteorder.h>
#include <linux/uaccess.h>
#include <linux/jiffies.h>

#define CAS_NCPUS            num_online_cpus()

#define cas_skb_release(x)  netif_rx(x)

/* select which firmware to use */
#define USE_HP_WORKAROUND
#define HP_WORKAROUND_DEFAULT /* select which firmware to use as default */
#define CAS_HP_ALT_FIRMWARE   cas_prog_null /* alternate firmware */

#include "cassini.h"

#define USE_TX_COMPWB      /* use completion writeback registers */
#define USE_CSMA_CD_PROTO  /* standard CSMA/CD */
#define USE_RX_BLANK       /* hw interrupt mitigation */
#undef USE_ENTROPY_DEV     /* don't test for entropy device */

/* NOTE: these aren't useable unless PCI interrupts can be assigned.
* also, we need to make cp->lock finer-grained.
*/
#undef  USE_PCI_INTB
#undef  USE_PCI_INTC
#undef  USE_PCI_INTD
#undef  USE_QOS

#undef  USE_VPD_DEBUG       /* debug vpd information if defined */

/* rx processing options */
#define USE_PAGE_ORDER      /* specify to allocate large rx pages */
#define RX_DONT_BATCH  0    /* if 1, don't batch flows */
#define RX_COPY_ALWAYS 0    /* if 0, use frags */
#define RX_COPY_MIN    64   /* copy a little to make upper layers happy */
#undef  RX_COUNT_BUFFERS    /* define to calculate RX buffer stats */

#define DRV_MODULE_NAME  "cassini"
#define DRV_MODULE_VERSION "1.6"
#define DRV_MODULE_RELDATE "21 May 2008"

#define CAS_DEF_MSG_ENABLE   \
(NETIF_MSG_DRV  | \
  NETIF_MSG_PROBE | \
  NETIF_MSG_LINK  | \
  NETIF_MSG_TIMER | \
  NETIF_MSG_IFDOWN | \
  NETIF_MSG_IFUP  | \
  NETIF_MSG_RX_ERR | \
  NETIF_MSG_TX_ERR)

/* length of time before we decide the hardware is borked,
* and dev->tx_timeout() should be called to fix the problem
*/
#define CAS_TX_TIMEOUT   (HZ)
#define CAS_LINK_TIMEOUT                (22*HZ/10)
#define CAS_LINK_FAST_TIMEOUT           (1)

/* timeout values for state changing. these specify the number
* of 10us delays to be used before giving up.
*/
#define STOP_TRIES_PHY 1000
#define STOP_TRIES     5000

/* specify a minimum frame size to deal with some fifo issues
* max mtu == 2 * page size - ethernet header - 64 - swivel =
*            2 * page_size - 0x50
*/
#define CAS_MIN_FRAME   97
#define CAS_1000MB_MIN_FRAME            255
#define CAS_MIN_MTU                     60
#define CAS_MAX_MTU                     min(((cp->page_size << 1) - 0x50), 9000)

#if 1
/*
* Eliminate these and use separate atomic counters for each, to
* avoid a race condition.
*/
#else
#define CAS_RESET_MTU                   1
#define CAS_RESET_ALL                   2
#define CAS_RESET_SPARE                 3
#endif

static char version[] =
DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";

static int cassini_debug = -1; /* -1 == use CAS_DEF_MSG_ENABLE as value */
static int link_mode;

MODULE_AUTHOR("Adrian Sun ");
MODULE_DESCRIPTION("Sun Cassini(+) ethernet driver");
MODULE_LICENSE("GPL");
MODULE_FIRMWARE("sun/cassini.bin");
module_param(cassini_debug, int, 0);
MODULE_PARM_DESC(cassini_debug, "Cassini bitmapped debugging message enable value");
module_param(link_mode, int, 0);
MODULE_PARM_DESC(link_mode, "default link mode");

/*
* Work around for a PCS bug in which the link goes down due to the chip
* being confused and never showing a link status of "up."
*/
#define DEFAULT_LINKDOWN_TIMEOUT 5
/*
* Value in seconds, for user input.
*/
static int linkdown_timeout = DEFAULT_LINKDOWN_TIMEOUT;
module_param(linkdown_timeout, int, 0);
MODULE_PARM_DESC(linkdown_timeout,
"min reset interval in sec. for PCS linkdown issue; disabled if not positive");

/*
* value in 'ticks' (units used by jiffies). Set when we init the
* module because 'HZ' in actually a function call on some flavors of
* Linux.  This will default to DEFAULT_LINKDOWN_TIMEOUT * HZ.
*/
static int link_transition_timeout;

static u16 link_modes[] = {
BMCR_ANENABLE,    /* 0 : autoneg */
0,     /* 1 : 10bt half duplex */
BMCR_SPEED100,    /* 2 : 100bt half duplex */
BMCR_FULLDPLX,    /* 3 : 10bt full duplex */
BMCR_SPEED100|BMCR_FULLDPLX,  /* 4 : 100bt full duplex */
CAS_BMCR_SPEED1000|BMCR_FULLDPLX /* 5 : 1000bt full duplex */
};

static const struct pci_device_id cas_pci_tbl[] = {
{ PCI_VENDOR_ID_SUN, PCI_DEVICE_ID_SUN_CASSINI,
   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SATURN,
   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
{ 0, }
};

MODULE_DEVICE_TABLE(pci, cas_pci_tbl);

static void cas_set_link_modes(struct cas *cp);

static inline void cas_lock_tx(struct cas *cp)
{
int i;

for (i = 0; i < N_TX_RINGS; i++)
  spin_lock_nested(&cp->tx_lock[i], i);
}

/* WTZ: QA was finding deadlock problems with the previous
* versions after long test runs with multiple cards per machine.
* See if replacing cas_lock_all with safer versions helps. The
* symptoms QA is reporting match those we'd expect if interrupts
* aren't being properly restored, and we fixed a previous deadlock
* with similar symptoms by using save/restore versions in other
* places.
*/
#define cas_lock_all_save(cp, flags) \
do { \
struct cas *xxxcp = (cp); \
spin_lock_irqsave(&xxxcp->lock, flags); \
cas_lock_tx(xxxcp); \
} while (0)

static inline void cas_unlock_tx(struct cas *cp)
{
int i;

for (i = N_TX_RINGS; i > 0; i--)
  spin_unlock(&cp->tx_lock[i - 1]);
}

#define cas_unlock_all_restore(cp, flags) \
do { \
struct cas *xxxcp = (cp); \
cas_unlock_tx(xxxcp); \
spin_unlock_irqrestore(&xxxcp->lock, flags); \
} while (0)

static void cas_disable_irq(struct cas *cp, const int ring)
{
/* Make sure we won't get any more interrupts */
if (ring == 0) {
  writel(0xFFFFFFFF, cp->regs + REG_INTR_MASK);
  return;
}

/* disable completion interrupts and selectively mask */
if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
  switch (ring) {
#if defined (USE_PCI_INTB) || defined(USE_PCI_INTC) || defined(USE_PCI_INTD)
#ifdef USE_PCI_INTB
  case 1:
#endif
#ifdef USE_PCI_INTC
  case 2:
#endif
#ifdef USE_PCI_INTD
  case 3:
#endif
   writel(INTRN_MASK_CLEAR_ALL | INTRN_MASK_RX_EN,
          cp->regs + REG_PLUS_INTRN_MASK(ring));
   break;
#endif
  default:
   writel(INTRN_MASK_CLEAR_ALL, cp->regs +
          REG_PLUS_INTRN_MASK(ring));
   break;
  }
}
}

static inline void cas_mask_intr(struct cas *cp)
{
int i;

for (i = 0; i < N_RX_COMP_RINGS; i++)
  cas_disable_irq(cp, i);
}

static void cas_enable_irq(struct cas *cp, const int ring)
{
if (ring == 0) { /* all but TX_DONE */
  writel(INTR_TX_DONE, cp->regs + REG_INTR_MASK);
  return;
}

if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
  switch (ring) {
#if defined (USE_PCI_INTB) || defined(USE_PCI_INTC) || defined(USE_PCI_INTD)
#ifdef USE_PCI_INTB
  case 1:
#endif
#ifdef USE_PCI_INTC
  case 2:
#endif
#ifdef USE_PCI_INTD
  case 3:
#endif
   writel(INTRN_MASK_RX_EN, cp->regs +
          REG_PLUS_INTRN_MASK(ring));
   break;
#endif
  default:
   break;
  }
}
}

static inline void cas_unmask_intr(struct cas *cp)
{
int i;

for (i = 0; i < N_RX_COMP_RINGS; i++)
  cas_enable_irq(cp, i);
}

static inline void cas_entropy_gather(struct cas *cp)
{
#ifdef USE_ENTROPY_DEV
if ((cp->cas_flags & CAS_FLAG_ENTROPY_DEV) == 0)
  return;

batch_entropy_store(readl(cp->regs + REG_ENTROPY_IV),
       readl(cp->regs + REG_ENTROPY_IV),
       sizeof(uint64_t)*8);
#endif
}

static inline void cas_entropy_reset(struct cas *cp)
{
#ifdef USE_ENTROPY_DEV
if ((cp->cas_flags & CAS_FLAG_ENTROPY_DEV) == 0)
  return;

writel(BIM_LOCAL_DEV_PAD | BIM_LOCAL_DEV_PROM | BIM_LOCAL_DEV_EXT,
        cp->regs + REG_BIM_LOCAL_DEV_EN);
writeb(ENTROPY_RESET_STC_MODE, cp->regs + REG_ENTROPY_RESET);
writeb(0x55, cp->regs + REG_ENTROPY_RAND_REG);

/* if we read back 0x0, we don't have an entropy device */
if (readb(cp->regs + REG_ENTROPY_RAND_REG) == 0)
  cp->cas_flags &= ~CAS_FLAG_ENTROPY_DEV;
#endif
}

/* access to the phy. the following assumes that we've initialized the MIF to
* be in frame rather than bit-bang mode
*/
static u16 cas_phy_read(struct cas *cp, int reg)
{
u32 cmd;
int limit = STOP_TRIES_PHY;

cmd = MIF_FRAME_ST | MIF_FRAME_OP_READ;
cmd |= CAS_BASE(MIF_FRAME_PHY_ADDR, cp->phy_addr);
cmd |= CAS_BASE(MIF_FRAME_REG_ADDR, reg);
cmd |= MIF_FRAME_TURN_AROUND_MSB;
writel(cmd, cp->regs + REG_MIF_FRAME);

/* poll for completion */
while (limit-- > 0) {
  udelay(10);
  cmd = readl(cp->regs + REG_MIF_FRAME);
  if (cmd & MIF_FRAME_TURN_AROUND_LSB)
   return cmd & MIF_FRAME_DATA_MASK;
}
return 0xFFFF; /* -1 */
}

static int cas_phy_write(struct cas *cp, int reg, u16 val)
{
int limit = STOP_TRIES_PHY;
u32 cmd;

cmd = MIF_FRAME_ST | MIF_FRAME_OP_WRITE;
cmd |= CAS_BASE(MIF_FRAME_PHY_ADDR, cp->phy_addr);
cmd |= CAS_BASE(MIF_FRAME_REG_ADDR, reg);
cmd |= MIF_FRAME_TURN_AROUND_MSB;
cmd |= val & MIF_FRAME_DATA_MASK;
writel(cmd, cp->regs + REG_MIF_FRAME);

/* poll for completion */
while (limit-- > 0) {
  udelay(10);
  cmd = readl(cp->regs + REG_MIF_FRAME);
  if (cmd & MIF_FRAME_TURN_AROUND_LSB)
   return 0;
}
return -1;
}

static void cas_phy_powerup(struct cas *cp)
{
u16 ctl = cas_phy_read(cp, MII_BMCR);

if ((ctl & BMCR_PDOWN) == 0)
  return;
ctl &= ~BMCR_PDOWN;
cas_phy_write(cp, MII_BMCR, ctl);
}

static void cas_phy_powerdown(struct cas *cp)
{
u16 ctl = cas_phy_read(cp, MII_BMCR);

if (ctl & BMCR_PDOWN)
  return;
ctl |= BMCR_PDOWN;
cas_phy_write(cp, MII_BMCR, ctl);
}

/* cp->lock held. note: the last put_page will free the buffer */
static int cas_page_free(struct cas *cp, cas_page_t *page)
{
dma_unmap_page(&cp->pdev->dev, page->dma_addr, cp->page_size,
         DMA_FROM_DEVICE);
__free_pages(page->buffer, cp->page_order);
kfree(page);
return 0;
}

#ifdef RX_COUNT_BUFFERS
#define RX_USED_ADD(x, y)       ((x)->used += (y))
#define RX_USED_SET(x, y)       ((x)->used  = (y))
#else
#define RX_USED_ADD(x, y) do { } while(0)
#define RX_USED_SET(x, y) do { } while(0)
#endif

/* local page allocation routines for the receive buffers. jumbo pages
* require at least 8K contiguous and 8K aligned buffers.
*/
static cas_page_t *cas_page_alloc(struct cas *cp, const gfp_t flags)
{
cas_page_t *page;

page = kmalloc(sizeof(cas_page_t), flags);
if (!page)
  return NULL;

INIT_LIST_HEAD(&page->list);
RX_USED_SET(page, 0);
page->buffer = alloc_pages(flags, cp->page_order);
if (!page->buffer)
  goto page_err;
page->dma_addr = dma_map_page(&cp->pdev->dev, page->buffer, 0,
          cp->page_size, DMA_FROM_DEVICE);
return page;

page_err:
kfree(page);
return NULL;
}

/* initialize spare pool of rx buffers, but allocate during the open */
static void cas_spare_init(struct cas *cp)
{
spin_lock(&cp->rx_inuse_lock);
INIT_LIST_HEAD(&cp->rx_inuse_list);
spin_unlock(&cp->rx_inuse_lock);

spin_lock(&cp->rx_spare_lock);
INIT_LIST_HEAD(&cp->rx_spare_list);
cp->rx_spares_needed = RX_SPARE_COUNT;
spin_unlock(&cp->rx_spare_lock);
}

/* used on close. free all the spare buffers. */
static void cas_spare_free(struct cas *cp)
{
struct list_head list, *elem, *tmp;

/* free spare buffers */
INIT_LIST_HEAD(&list);
spin_lock(&cp->rx_spare_lock);
list_splice_init(&cp->rx_spare_list, &list);
spin_unlock(&cp->rx_spare_lock);
list_for_each_safe(elem, tmp, &list) {
  cas_page_free(cp, list_entry(elem, cas_page_t, list));
}

INIT_LIST_HEAD(&list);
#if 1
/*
* Looks like Adrian had protected this with a different
* lock than used everywhere else to manipulate this list.
*/
spin_lock(&cp->rx_inuse_lock);
list_splice_init(&cp->rx_inuse_list, &list);
spin_unlock(&cp->rx_inuse_lock);
#else
spin_lock(&cp->rx_spare_lock);
list_splice_init(&cp->rx_inuse_list, &list);
spin_unlock(&cp->rx_spare_lock);
#endif
list_for_each_safe(elem, tmp, &list) {
  cas_page_free(cp, list_entry(elem, cas_page_t, list));
}
}

/* replenish spares if needed */
static void cas_spare_recover(struct cas *cp, const gfp_t flags)
{
struct list_head list, *elem, *tmp;
int needed, i;

/* check inuse list. if we don't need any more free buffers,
* just free it
*/

/* make a local copy of the list */
INIT_LIST_HEAD(&list);
spin_lock(&cp->rx_inuse_lock);
list_splice_init(&cp->rx_inuse_list, &list);
spin_unlock(&cp->rx_inuse_lock);

list_for_each_safe(elem, tmp, &list) {
  cas_page_t *page = list_entry(elem, cas_page_t, list);

  /*
* With the lockless pagecache, cassini buffering scheme gets
* slightly less accurate: we might find that a page has an
* elevated reference count here, due to a speculative ref,
* and skip it as in-use. Ideally we would be able to reclaim
* it. However this would be such a rare case, it doesn't
* matter too much as we should pick it up the next time round.
*
* Importantly, if we find that the page has a refcount of 1
* here (our refcount), then we know it is definitely not inuse
* so we can reuse it.
*/
  if (page_count(page->buffer) > 1)
   continue;

  list_del(elem);
  spin_lock(&cp->rx_spare_lock);
  if (cp->rx_spares_needed > 0) {
   list_add(elem, &cp->rx_spare_list);
   cp->rx_spares_needed--;
   spin_unlock(&cp->rx_spare_lock);
  } else {
   spin_unlock(&cp->rx_spare_lock);
   cas_page_free(cp, page);
  }
}

/* put any inuse buffers back on the list */
if (!list_empty(&list)) {
  spin_lock(&cp->rx_inuse_lock);
  list_splice(&list, &cp->rx_inuse_list);
  spin_unlock(&cp->rx_inuse_lock);
}

spin_lock(&cp->rx_spare_lock);
needed = cp->rx_spares_needed;
spin_unlock(&cp->rx_spare_lock);
if (!needed)
  return;

/* we still need spares, so try to allocate some */
INIT_LIST_HEAD(&list);
i = 0;
while (i < needed) {
  cas_page_t *spare = cas_page_alloc(cp, flags);
  if (!spare)
   break;
  list_add(&spare->list, &list);
  i++;
}

spin_lock(&cp->rx_spare_lock);
list_splice(&list, &cp->rx_spare_list);
cp->rx_spares_needed -= i;
spin_unlock(&cp->rx_spare_lock);
}

/* pull a page from the list. */
static cas_page_t *cas_page_dequeue(struct cas *cp)
{
struct list_head *entry;
int recover;

spin_lock(&cp->rx_spare_lock);
if (list_empty(&cp->rx_spare_list)) {
  /* try to do a quick recovery */
  spin_unlock(&cp->rx_spare_lock);
  cas_spare_recover(cp, GFP_ATOMIC);
  spin_lock(&cp->rx_spare_lock);
  if (list_empty(&cp->rx_spare_list)) {
   netif_err(cp, rx_err, cp->dev,
      "no spare buffers available\n");
   spin_unlock(&cp->rx_spare_lock);
   return NULL;
  }
}

entry = cp->rx_spare_list.next;
list_del(entry);
recover = ++cp->rx_spares_needed;
spin_unlock(&cp->rx_spare_lock);

/* trigger the timer to do the recovery */
if ((recover & (RX_SPARE_RECOVER_VAL - 1)) == 0) {
#if 1
  atomic_inc(&cp->reset_task_pending);
  atomic_inc(&cp->reset_task_pending_spare);
  schedule_work(&cp->reset_task);
#else
  atomic_set(&cp->reset_task_pending, CAS_RESET_SPARE);
  schedule_work(&cp->reset_task);
#endif
}
return list_entry(entry, cas_page_t, list);
}

static void cas_mif_poll(struct cas *cp, const int enable)
{
u32 cfg;

cfg  = readl(cp->regs + REG_MIF_CFG);
cfg &= (MIF_CFG_MDIO_0 | MIF_CFG_MDIO_1);

if (cp->phy_type & CAS_PHY_MII_MDIO1)
  cfg |= MIF_CFG_PHY_SELECT;

/* poll and interrupt on link status change. */
if (enable) {
  cfg |= MIF_CFG_POLL_EN;
  cfg |= CAS_BASE(MIF_CFG_POLL_REG, MII_BMSR);
  cfg |= CAS_BASE(MIF_CFG_POLL_PHY, cp->phy_addr);
}
writel((enable) ? ~(BMSR_LSTATUS | BMSR_ANEGCOMPLETE) : 0xFFFF,
        cp->regs + REG_MIF_MASK);
writel(cfg, cp->regs + REG_MIF_CFG);
}

/* Must be invoked under cp->lock */
static void cas_begin_auto_negotiation(struct cas *cp,
           const struct ethtool_link_ksettings *ep)
{
u16 ctl;
#if 1
int lcntl;
int changed = 0;
int oldstate = cp->lstate;
int link_was_not_down = !(oldstate == link_down);
#endif
/* Setup link parameters */
if (!ep)
  goto start_aneg;
lcntl = cp->link_cntl;
if (ep->base.autoneg == AUTONEG_ENABLE) {
  cp->link_cntl = BMCR_ANENABLE;
} else {
  u32 speed = ep->base.speed;
  cp->link_cntl = 0;
  if (speed == SPEED_100)
   cp->link_cntl |= BMCR_SPEED100;
  else if (speed == SPEED_1000)
   cp->link_cntl |= CAS_BMCR_SPEED1000;
  if (ep->base.duplex == DUPLEX_FULL)
   cp->link_cntl |= BMCR_FULLDPLX;
}
#if 1
changed = (lcntl != cp->link_cntl);
#endif
start_aneg:
if (cp->lstate == link_up) {
  netdev_info(cp->dev, "PCS link down\n");
} else {
  if (changed) {
   netdev_info(cp->dev, "link configuration changed\n");
  }
}
cp->lstate = link_down;
cp->link_transition = LINK_TRANSITION_LINK_DOWN;
if (!cp->hw_running)
  return;
#if 1
/*
* WTZ: If the old state was link_up, we turn off the carrier
* to replicate everything we do elsewhere on a link-down
* event when we were already in a link-up state..
*/
if (oldstate == link_up)
  netif_carrier_off(cp->dev);
if (changed  && link_was_not_down) {
  /*
* WTZ: This branch will simply schedule a full reset after
* we explicitly changed link modes in an ioctl. See if this
* fixes the link-problems we were having for forced mode.
*/
  atomic_inc(&cp->reset_task_pending);
  atomic_inc(&cp->reset_task_pending_all);
  schedule_work(&cp->reset_task);
  cp->timer_ticks = 0;
  mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT);
  return;
}
#endif
if (cp->phy_type & CAS_PHY_SERDES) {
  u32 val = readl(cp->regs + REG_PCS_MII_CTRL);

  if (cp->link_cntl & BMCR_ANENABLE) {
   val |= (PCS_MII_RESTART_AUTONEG | PCS_MII_AUTONEG_EN);
   cp->lstate = link_aneg;
  } else {
   if (cp->link_cntl & BMCR_FULLDPLX)
    val |= PCS_MII_CTRL_DUPLEX;
   val &= ~PCS_MII_AUTONEG_EN;
   cp->lstate = link_force_ok;
  }
  cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
  writel(val, cp->regs + REG_PCS_MII_CTRL);

} else {
  cas_mif_poll(cp, 0);
  ctl = cas_phy_read(cp, MII_BMCR);
  ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 |
    CAS_BMCR_SPEED1000 | BMCR_ANENABLE);
  ctl |= cp->link_cntl;
  if (ctl & BMCR_ANENABLE) {
   ctl |= BMCR_ANRESTART;
   cp->lstate = link_aneg;
  } else {
   cp->lstate = link_force_ok;
  }
  cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
  cas_phy_write(cp, MII_BMCR, ctl);
  cas_mif_poll(cp, 1);
}

cp->timer_ticks = 0;
mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT);
}

/* Must be invoked under cp->lock. */
static int cas_reset_mii_phy(struct cas *cp)
{
int limit = STOP_TRIES_PHY;
u16 val;

cas_phy_write(cp, MII_BMCR, BMCR_RESET);
udelay(100);
while (--limit) {
  val = cas_phy_read(cp, MII_BMCR);
  if ((val & BMCR_RESET) == 0)
   break;
  udelay(10);
}
return limit <= 0;
}

static void cas_saturn_firmware_init(struct cas *cp)
{
const struct firmware *fw;
const char fw_name[] = "sun/cassini.bin";
int err;

if (PHY_NS_DP83065 != cp->phy_id)
  return;

err = request_firmware(&fw, fw_name, &cp->pdev->dev);
if (err) {
  pr_err("Failed to load firmware \"%s\"\n",
         fw_name);
  return;
}
if (fw->size < 2) {
  pr_err("bogus length %zu in \"%s\"\n",
         fw->size, fw_name);
  goto out;
}
cp->fw_load_addr= fw->data[1] << 8 | fw->data[0];
cp->fw_size = fw->size - 2;
cp->fw_data = vmalloc(cp->fw_size);
if (!cp->fw_data)
  goto out;
memcpy(cp->fw_data, &fw->data[2], cp->fw_size);
out:
release_firmware(fw);
}

static void cas_saturn_firmware_load(struct cas *cp)
{
int i;

if (!cp->fw_data)
  return;

cas_phy_powerdown(cp);

/* expanded memory access mode */
cas_phy_write(cp, DP83065_MII_MEM, 0x0);

/* pointer configuration for new firmware */
cas_phy_write(cp, DP83065_MII_REGE, 0x8ff9);
cas_phy_write(cp, DP83065_MII_REGD, 0xbd);
cas_phy_write(cp, DP83065_MII_REGE, 0x8ffa);
cas_phy_write(cp, DP83065_MII_REGD, 0x82);
cas_phy_write(cp, DP83065_MII_REGE, 0x8ffb);
cas_phy_write(cp, DP83065_MII_REGD, 0x0);
cas_phy_write(cp, DP83065_MII_REGE, 0x8ffc);
cas_phy_write(cp, DP83065_MII_REGD, 0x39);

/* download new firmware */
cas_phy_write(cp, DP83065_MII_MEM, 0x1);
cas_phy_write(cp, DP83065_MII_REGE, cp->fw_load_addr);
for (i = 0; i < cp->fw_size; i++)
  cas_phy_write(cp, DP83065_MII_REGD, cp->fw_data[i]);

/* enable firmware */
cas_phy_write(cp, DP83065_MII_REGE, 0x8ff8);
cas_phy_write(cp, DP83065_MII_REGD, 0x1);
}

/* phy initialization */
static void cas_phy_init(struct cas *cp)
{
u16 val;

/* if we're in MII/GMII mode, set up phy */
if (CAS_PHY_MII(cp->phy_type)) {
  writel(PCS_DATAPATH_MODE_MII,
         cp->regs + REG_PCS_DATAPATH_MODE);

  cas_mif_poll(cp, 0);
  cas_reset_mii_phy(cp); /* take out of isolate mode */

  if (PHY_LUCENT_B0 == cp->phy_id) {
   /* workaround link up/down issue with lucent */
   cas_phy_write(cp, LUCENT_MII_REG, 0x8000);
   cas_phy_write(cp, MII_BMCR, 0x00f1);
   cas_phy_write(cp, LUCENT_MII_REG, 0x0);

  } else if (PHY_BROADCOM_B0 == (cp->phy_id & 0xFFFFFFFC)) {
   /* workarounds for broadcom phy */
   cas_phy_write(cp, BROADCOM_MII_REG8, 0x0C20);
   cas_phy_write(cp, BROADCOM_MII_REG7, 0x0012);
   cas_phy_write(cp, BROADCOM_MII_REG5, 0x1804);
   cas_phy_write(cp, BROADCOM_MII_REG7, 0x0013);
   cas_phy_write(cp, BROADCOM_MII_REG5, 0x1204);
   cas_phy_write(cp, BROADCOM_MII_REG7, 0x8006);
   cas_phy_write(cp, BROADCOM_MII_REG5, 0x0132);
   cas_phy_write(cp, BROADCOM_MII_REG7, 0x8006);
   cas_phy_write(cp, BROADCOM_MII_REG5, 0x0232);
   cas_phy_write(cp, BROADCOM_MII_REG7, 0x201F);
   cas_phy_write(cp, BROADCOM_MII_REG5, 0x0A20);

  } else if (PHY_BROADCOM_5411 == cp->phy_id) {
   val = cas_phy_read(cp, BROADCOM_MII_REG4);
   val = cas_phy_read(cp, BROADCOM_MII_REG4);
   if (val & 0x0080) {
    /* link workaround */
    cas_phy_write(cp, BROADCOM_MII_REG4,
           val & ~0x0080);
   }

  } else if (cp->cas_flags & CAS_FLAG_SATURN) {
   writel((cp->phy_type & CAS_PHY_MII_MDIO0) ?
          SATURN_PCFG_FSI : 0x0,
          cp->regs + REG_SATURN_PCFG);

   /* load firmware to address 10Mbps auto-negotiation
* issue. NOTE: this will need to be changed if the
* default firmware gets fixed.
*/
   if (PHY_NS_DP83065 == cp->phy_id) {
    cas_saturn_firmware_load(cp);
   }
   cas_phy_powerup(cp);
  }

  /* advertise capabilities */
  val = cas_phy_read(cp, MII_BMCR);
  val &= ~BMCR_ANENABLE;
  cas_phy_write(cp, MII_BMCR, val);
  udelay(10);

  cas_phy_write(cp, MII_ADVERTISE,
         cas_phy_read(cp, MII_ADVERTISE) |
         (ADVERTISE_10HALF | ADVERTISE_10FULL |
          ADVERTISE_100HALF | ADVERTISE_100FULL |
          CAS_ADVERTISE_PAUSE |
          CAS_ADVERTISE_ASYM_PAUSE));

  if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
   /* make sure that we don't advertise half
* duplex to avoid a chip issue
*/
   val  = cas_phy_read(cp, CAS_MII_1000_CTRL);
   val &= ~CAS_ADVERTISE_1000HALF;
   val |= CAS_ADVERTISE_1000FULL;
   cas_phy_write(cp, CAS_MII_1000_CTRL, val);
  }

} else {
  /* reset pcs for serdes */
  u32 val;
  int limit;

  writel(PCS_DATAPATH_MODE_SERDES,
         cp->regs + REG_PCS_DATAPATH_MODE);

  /* enable serdes pins on saturn */
  if (cp->cas_flags & CAS_FLAG_SATURN)
   writel(0, cp->regs + REG_SATURN_PCFG);

  /* Reset PCS unit. */
  val = readl(cp->regs + REG_PCS_MII_CTRL);
  val |= PCS_MII_RESET;
  writel(val, cp->regs + REG_PCS_MII_CTRL);

  limit = STOP_TRIES;
  while (--limit > 0) {
   udelay(10);
   if ((readl(cp->regs + REG_PCS_MII_CTRL) &
        PCS_MII_RESET) == 0)
    break;
  }
  if (limit <= 0)
   netdev_warn(cp->dev, "PCS reset bit would not clear [%08x]\n",
        readl(cp->regs + REG_PCS_STATE_MACHINE));

  /* Make sure PCS is disabled while changing advertisement
* configuration.
*/
  writel(0x0, cp->regs + REG_PCS_CFG);

  /* Advertise all capabilities except half-duplex. */
  val  = readl(cp->regs + REG_PCS_MII_ADVERT);
  val &= ~PCS_MII_ADVERT_HD;
  val |= (PCS_MII_ADVERT_FD | PCS_MII_ADVERT_SYM_PAUSE |
   PCS_MII_ADVERT_ASYM_PAUSE);
  writel(val, cp->regs + REG_PCS_MII_ADVERT);

  /* enable PCS */
  writel(PCS_CFG_EN, cp->regs + REG_PCS_CFG);

  /* pcs workaround: enable sync detect */
  writel(PCS_SERDES_CTRL_SYNCD_EN,
         cp->regs + REG_PCS_SERDES_CTRL);
}
}

static int cas_pcs_link_check(struct cas *cp)
{
u32 stat, state_machine;
int retval = 0;

/* The link status bit latches on zero, so you must
* read it twice in such a case to see a transition
* to the link being up.
*/
stat = readl(cp->regs + REG_PCS_MII_STATUS);
if ((stat & PCS_MII_STATUS_LINK_STATUS) == 0)
  stat = readl(cp->regs + REG_PCS_MII_STATUS);

/* The remote-fault indication is only valid
* when autoneg has completed.
*/
if ((stat & (PCS_MII_STATUS_AUTONEG_COMP |
       PCS_MII_STATUS_REMOTE_FAULT)) ==
     (PCS_MII_STATUS_AUTONEG_COMP | PCS_MII_STATUS_REMOTE_FAULT))
  netif_info(cp, link, cp->dev, "PCS RemoteFault\n");

/* work around link detection issue by querying the PCS state
* machine directly.
*/
state_machine = readl(cp->regs + REG_PCS_STATE_MACHINE);
if ((state_machine & PCS_SM_LINK_STATE_MASK) != SM_LINK_STATE_UP) {
  stat &= ~PCS_MII_STATUS_LINK_STATUS;
} else if (state_machine & PCS_SM_WORD_SYNC_STATE_MASK) {
  stat |= PCS_MII_STATUS_LINK_STATUS;
}

if (stat & PCS_MII_STATUS_LINK_STATUS) {
  if (cp->lstate != link_up) {
   if (cp->opened) {
    cp->lstate = link_up;
    cp->link_transition = LINK_TRANSITION_LINK_UP;

    cas_set_link_modes(cp);
    netif_carrier_on(cp->dev);
   }
  }
} else if (cp->lstate == link_up) {
  cp->lstate = link_down;
  if (link_transition_timeout != 0 &&
      cp->link_transition != LINK_TRANSITION_REQUESTED_RESET &&
      !cp->link_transition_jiffies_valid) {
   /*
* force a reset, as a workaround for the
* link-failure problem. May want to move this to a
* point a bit earlier in the sequence. If we had
* generated a reset a short time ago, we'll wait for
* the link timer to check the status until a
* timer expires (link_transistion_jiffies_valid is
* true when the timer is running.)  Instead of using
* a system timer, we just do a check whenever the
* link timer is running - this clears the flag after
* a suitable delay.
*/
   retval = 1;
   cp->link_transition = LINK_TRANSITION_REQUESTED_RESET;
   cp->link_transition_jiffies = jiffies;
   cp->link_transition_jiffies_valid = 1;
  } else {
   cp->link_transition = LINK_TRANSITION_ON_FAILURE;
  }
  netif_carrier_off(cp->dev);
  if (cp->opened)
   netif_info(cp, link, cp->dev, "PCS link down\n");

  /* Cassini only: if you force a mode, there can be
* sync problems on link down. to fix that, the following
* things need to be checked:
* 1) read serialink state register
* 2) read pcs status register to verify link down.
* 3) if link down and serial link == 0x03, then you need
*    to global reset the chip.
*/
  if ((cp->cas_flags & CAS_FLAG_REG_PLUS) == 0) {
   /* should check to see if we're in a forced mode */
   stat = readl(cp->regs + REG_PCS_SERDES_STATE);
   if (stat == 0x03)
    return 1;
  }
} else if (cp->lstate == link_down) {
  if (link_transition_timeout != 0 &&
      cp->link_transition != LINK_TRANSITION_REQUESTED_RESET &&
      !cp->link_transition_jiffies_valid) {
   /* force a reset, as a workaround for the
* link-failure problem.  May want to move
* this to a point a bit earlier in the
* sequence.
*/
   retval = 1;
   cp->link_transition = LINK_TRANSITION_REQUESTED_RESET;
   cp->link_transition_jiffies = jiffies;
   cp->link_transition_jiffies_valid = 1;
  } else {
   cp->link_transition = LINK_TRANSITION_STILL_FAILED;
  }
}

return retval;
}

static int cas_pcs_interrupt(struct net_device *dev,
        struct cas *cp, u32 status)
{
u32 stat = readl(cp->regs + REG_PCS_INTR_STATUS);

if ((stat & PCS_INTR_STATUS_LINK_CHANGE) == 0)
  return 0;
return cas_pcs_link_check(cp);
}

static int cas_txmac_interrupt(struct net_device *dev,
          struct cas *cp, u32 status)
{
u32 txmac_stat = readl(cp->regs + REG_MAC_TX_STATUS);

if (!txmac_stat)
  return 0;

netif_printk(cp, intr, KERN_DEBUG, cp->dev,
       "txmac interrupt, txmac_stat: 0x%x\n", txmac_stat);

/* Defer timer expiration is quite normal,
* don't even log the event.
*/
if ((txmac_stat & MAC_TX_DEFER_TIMER) &&
     !(txmac_stat & ~MAC_TX_DEFER_TIMER))
  return 0;

spin_lock(&cp->stat_lock[0]);
if (txmac_stat & MAC_TX_UNDERRUN) {
  netdev_err(dev, "TX MAC xmit underrun\n");
  cp->net_stats[0].tx_fifo_errors++;
}

if (txmac_stat & MAC_TX_MAX_PACKET_ERR) {
  netdev_err(dev, "TX MAC max packet size error\n");
  cp->net_stats[0].tx_errors++;
}

/* The rest are all cases of one of the 16-bit TX
* counters expiring.
*/
if (txmac_stat & MAC_TX_COLL_NORMAL)
  cp->net_stats[0].collisions += 0x10000;

if (txmac_stat & MAC_TX_COLL_EXCESS) {
  cp->net_stats[0].tx_aborted_errors += 0x10000;
  cp->net_stats[0].collisions += 0x10000;
}

if (txmac_stat & MAC_TX_COLL_LATE) {
  cp->net_stats[0].tx_aborted_errors += 0x10000;
  cp->net_stats[0].collisions += 0x10000;
}
spin_unlock(&cp->stat_lock[0]);

/* We do not keep track of MAC_TX_COLL_FIRST and
* MAC_TX_PEAK_ATTEMPTS events.
*/
return 0;
}

static void cas_load_firmware(struct cas *cp, cas_hp_inst_t *firmware)
{
cas_hp_inst_t *inst;
u32 val;
int i;

i = 0;
while ((inst = firmware) && inst->note) {
  writel(i, cp->regs + REG_HP_INSTR_RAM_ADDR);

  val = CAS_BASE(HP_INSTR_RAM_HI_VAL, inst->val);
  val |= CAS_BASE(HP_INSTR_RAM_HI_MASK, inst->mask);
  writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_HI);

  val = CAS_BASE(HP_INSTR_RAM_MID_OUTARG, inst->outarg >> 10);
  val |= CAS_BASE(HP_INSTR_RAM_MID_OUTOP, inst->outop);
  val |= CAS_BASE(HP_INSTR_RAM_MID_FNEXT, inst->fnext);
  val |= CAS_BASE(HP_INSTR_RAM_MID_FOFF, inst->foff);
  val |= CAS_BASE(HP_INSTR_RAM_MID_SNEXT, inst->snext);
  val |= CAS_BASE(HP_INSTR_RAM_MID_SOFF, inst->soff);
  val |= CAS_BASE(HP_INSTR_RAM_MID_OP, inst->op);
  writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_MID);

  val = CAS_BASE(HP_INSTR_RAM_LOW_OUTMASK, inst->outmask);
  val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTSHIFT, inst->outshift);
  val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTEN, inst->outenab);
  val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTARG, inst->outarg);
  writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_LOW);
  ++firmware;
  ++i;
}
}

static void cas_init_rx_dma(struct cas *cp)
{
u64 desc_dma = cp->block_dvma;
u32 val;
int i, size;

/* rx free descriptors */
val = CAS_BASE(RX_CFG_SWIVEL, RX_SWIVEL_OFF_VAL);
val |= CAS_BASE(RX_CFG_DESC_RING, RX_DESC_RINGN_INDEX(0));
val |= CAS_BASE(RX_CFG_COMP_RING, RX_COMP_RINGN_INDEX(0));
if ((N_RX_DESC_RINGS > 1) &&
     (cp->cas_flags & CAS_FLAG_REG_PLUS))  /* do desc 2 */
  val |= CAS_BASE(RX_CFG_DESC_RING1, RX_DESC_RINGN_INDEX(1));
writel(val, cp->regs + REG_RX_CFG);

val = (unsigned long) cp->init_rxds[0] -
  (unsigned long) cp->init_block;
writel((desc_dma + val) >> 32, cp->regs + REG_RX_DB_HI);
writel((desc_dma + val) & 0xffffffff, cp->regs + REG_RX_DB_LOW);
writel(RX_DESC_RINGN_SIZE(0) - 4, cp->regs + REG_RX_KICK);

if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
  /* rx desc 2 is for IPSEC packets. however,
* we don't it that for that purpose.
*/
  val = (unsigned long) cp->init_rxds[1] -
   (unsigned long) cp->init_block;
  writel((desc_dma + val) >> 32, cp->regs + REG_PLUS_RX_DB1_HI);
  writel((desc_dma + val) & 0xffffffff, cp->regs +
         REG_PLUS_RX_DB1_LOW);
  writel(RX_DESC_RINGN_SIZE(1) - 4, cp->regs +
         REG_PLUS_RX_KICK1);
}

/* rx completion registers */
val = (unsigned long) cp->init_rxcs[0] -
  (unsigned long) cp->init_block;
writel((desc_dma + val) >> 32, cp->regs + REG_RX_CB_HI);
writel((desc_dma + val) & 0xffffffff, cp->regs + REG_RX_CB_LOW);

if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
  /* rx comp 2-4 */
  for (i = 1; i < MAX_RX_COMP_RINGS; i++) {
   val = (unsigned long) cp->init_rxcs[i] -
    (unsigned long) cp->init_block;
   writel((desc_dma + val) >> 32, cp->regs +
          REG_PLUS_RX_CBN_HI(i));
   writel((desc_dma + val) & 0xffffffff, cp->regs +
          REG_PLUS_RX_CBN_LOW(i));
  }
}

/* read selective clear regs to prevent spurious interrupts
* on reset because complete == kick.
* selective clear set up to prevent interrupts on resets
*/
readl(cp->regs + REG_INTR_STATUS_ALIAS);
writel(INTR_RX_DONE | INTR_RX_BUF_UNAVAIL, cp->regs + REG_ALIAS_CLEAR);

/* set up pause thresholds */
val  = CAS_BASE(RX_PAUSE_THRESH_OFF,
   cp->rx_pause_off / RX_PAUSE_THRESH_QUANTUM);
val |= CAS_BASE(RX_PAUSE_THRESH_ON,
   cp->rx_pause_on / RX_PAUSE_THRESH_QUANTUM);
writel(val, cp->regs + REG_RX_PAUSE_THRESH);

/* zero out dma reassembly buffers */
for (i = 0; i < 64; i++) {
  writel(i, cp->regs + REG_RX_TABLE_ADDR);
  writel(0x0, cp->regs + REG_RX_TABLE_DATA_LOW);
  writel(0x0, cp->regs + REG_RX_TABLE_DATA_MID);
  writel(0x0, cp->regs + REG_RX_TABLE_DATA_HI);
}

/* make sure address register is 0 for normal operation */
writel(0x0, cp->regs + REG_RX_CTRL_FIFO_ADDR);
writel(0x0, cp->regs + REG_RX_IPP_FIFO_ADDR);

/* interrupt mitigation */
#ifdef USE_RX_BLANK
val = CAS_BASE(RX_BLANK_INTR_TIME, RX_BLANK_INTR_TIME_VAL);
val |= CAS_BASE(RX_BLANK_INTR_PKT, RX_BLANK_INTR_PKT_VAL);
writel(val, cp->regs + REG_RX_BLANK);
#else
writel(0x0, cp->regs + REG_RX_BLANK);
#endif

/* interrupt generation as a function of low water marks for
* free desc and completion entries. these are used to trigger
* housekeeping for rx descs. we don't use the free interrupt
* as it's not very useful
*/
/* val = CAS_BASE(RX_AE_THRESH_FREE, RX_AE_FREEN_VAL(0)); */
val = CAS_BASE(RX_AE_THRESH_COMP, RX_AE_COMP_VAL);
writel(val, cp->regs + REG_RX_AE_THRESH);
if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
  val = CAS_BASE(RX_AE1_THRESH_FREE, RX_AE_FREEN_VAL(1));
  writel(val, cp->regs + REG_PLUS_RX_AE1_THRESH);
}

/* Random early detect registers. useful for congestion avoidance.
* this should be tunable.
*/
writel(0x0, cp->regs + REG_RX_RED);

/* receive page sizes. default == 2K (0x800) */
val = 0;
if (cp->page_size == 0x1000)
  val = 0x1;
else if (cp->page_size == 0x2000)
  val = 0x2;
else if (cp->page_size == 0x4000)
  val = 0x3;

/* round mtu + offset. constrain to page size. */
size = cp->dev->mtu + 64;
if (size > cp->page_size)
  size = cp->page_size;

if (size <= 0x400)
  i = 0x0;
else if (size <= 0x800)
  i = 0x1;
else if (size <= 0x1000)
  i = 0x2;
else
  i = 0x3;

cp->mtu_stride = 1 << (i + 10);
val  = CAS_BASE(RX_PAGE_SIZE, val);
val |= CAS_BASE(RX_PAGE_SIZE_MTU_STRIDE, i);
val |= CAS_BASE(RX_PAGE_SIZE_MTU_COUNT, cp->page_size >> (i + 10));
val |= CAS_BASE(RX_PAGE_SIZE_MTU_OFF, 0x1);
writel(val, cp->regs + REG_RX_PAGE_SIZE);

/* enable the header parser if desired */
if (&CAS_HP_FIRMWARE[0] == &cas_prog_null[0])
  return;

val = CAS_BASE(HP_CFG_NUM_CPU, CAS_NCPUS > 63 ? 0 : CAS_NCPUS);
val |= HP_CFG_PARSE_EN | HP_CFG_SYN_INC_MASK;
val |= CAS_BASE(HP_CFG_TCP_THRESH, HP_TCP_THRESH_VAL);
writel(val, cp->regs + REG_HP_CFG);
}

static inline void cas_rxc_init(struct cas_rx_comp *rxc)
{
memset(rxc, 0, sizeof(*rxc));
rxc->word4 = cpu_to_le64(RX_COMP4_ZERO);
}

/* NOTE: we use the ENC RX DESC ring for spares. the rx_page[0,1]
* flipping is protected by the fact that the chip will not
* hand back the same page index while it's being processed.
*/
static inline cas_page_t *cas_page_spare(struct cas *cp, const int index)
{
cas_page_t *page = cp->rx_pages[1][index];
cas_page_t *new;

if (page_count(page->buffer) == 1)
  return page;

new = cas_page_dequeue(cp);
if (new) {
  spin_lock(&cp->rx_inuse_lock);
  list_add(&page->list, &cp->rx_inuse_list);
  spin_unlock(&cp->rx_inuse_lock);
}
return new;
}

/* this needs to be changed if we actually use the ENC RX DESC ring */
static cas_page_t *cas_page_swap(struct cas *cp, const int ring,
     const int index)
{
cas_page_t **page0 = cp->rx_pages[0];
cas_page_t **page1 = cp->rx_pages[1];

/* swap if buffer is in use */
if (page_count(page0[index]->buffer) > 1) {
  cas_page_t *new = cas_page_spare(cp, index);
  if (new) {
   page1[index] = page0[index];
   page0[index] = new;
  }
}
RX_USED_SET(page0[index], 0);
return page0[index];
}

static void cas_clean_rxds(struct cas *cp)
{
/* only clean ring 0 as ring 1 is used for spare buffers */
        struct cas_rx_desc *rxd = cp->init_rxds[0];
int i, size;

/* release all rx flows */
for (i = 0; i < N_RX_FLOWS; i++) {
  struct sk_buff *skb;
  while ((skb = __skb_dequeue(&cp->rx_flows[i]))) {
   cas_skb_release(skb);
  }
}

/* initialize descriptors */
size = RX_DESC_RINGN_SIZE(0);
for (i = 0; i < size; i++) {
  cas_page_t *page = cas_page_swap(cp, 0, i);
  rxd[i].buffer = cpu_to_le64(page->dma_addr);
  rxd[i].index  = cpu_to_le64(CAS_BASE(RX_INDEX_NUM, i) |
         CAS_BASE(RX_INDEX_RING, 0));
}

cp->rx_old[0]  = RX_DESC_RINGN_SIZE(0) - 4;
cp->rx_last[0] = 0;
cp->cas_flags &= ~CAS_FLAG_RXD_POST(0);
}

static void cas_clean_rxcs(struct cas *cp)
{
int i, j;

/* take ownership of rx comp descriptors */
memset(cp->rx_cur, 0, sizeof(*cp->rx_cur)*N_RX_COMP_RINGS);
memset(cp->rx_new, 0, sizeof(*cp->rx_new)*N_RX_COMP_RINGS);
for (i = 0; i < N_RX_COMP_RINGS; i++) {
  struct cas_rx_comp *rxc = cp->init_rxcs[i];
  for (j = 0; j < RX_COMP_RINGN_SIZE(i); j++) {
   cas_rxc_init(rxc + j);
  }
}
}

#if 0
/* When we get a RX fifo overflow, the RX unit is probably hung
* so we do the following.
*
* If any part of the reset goes wrong, we return 1 and that causes the
* whole chip to be reset.
*/
static int cas_rxmac_reset(struct cas *cp)
{
struct net_device *dev = cp->dev;
int limit;
u32 val;

/* First, reset MAC RX. */
writel(cp->mac_rx_cfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG);
for (limit = 0; limit < STOP_TRIES; limit++) {
  if (!(readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_EN))
   break;
  udelay(10);
}
if (limit == STOP_TRIES) {
  netdev_err(dev, "RX MAC will not disable, resetting whole chip\n");
  return 1;
}

/* Second, disable RX DMA. */
writel(0, cp->regs + REG_RX_CFG);
for (limit = 0; limit < STOP_TRIES; limit++) {
  if (!(readl(cp->regs + REG_RX_CFG) & RX_CFG_DMA_EN))
   break;
  udelay(10);
}
if (limit == STOP_TRIES) {
  netdev_err(dev, "RX DMA will not disable, resetting whole chip\n");
  return 1;
}

mdelay(5);

/* Execute RX reset command. */
writel(SW_RESET_RX, cp->regs + REG_SW_RESET);
for (limit = 0; limit < STOP_TRIES; limit++) {
  if (!(readl(cp->regs + REG_SW_RESET) & SW_RESET_RX))
   break;
  udelay(10);
}
if (limit == STOP_TRIES) {
  netdev_err(dev, "RX reset command will not execute, resetting whole chip\n");
  return 1;
}

/* reset driver rx state */
cas_clean_rxds(cp);
cas_clean_rxcs(cp);

/* Now, reprogram the rest of RX unit. */
cas_init_rx_dma(cp);

/* re-enable */
val = readl(cp->regs + REG_RX_CFG);
writel(val | RX_CFG_DMA_EN, cp->regs + REG_RX_CFG);
writel(MAC_RX_FRAME_RECV, cp->regs + REG_MAC_RX_MASK);
val = readl(cp->regs + REG_MAC_RX_CFG);
writel(val | MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG);
return 0;
}
#endif

static int cas_rxmac_interrupt(struct net_device *dev, struct cas *cp,
          u32 status)
{
u32 stat = readl(cp->regs + REG_MAC_RX_STATUS);

if (!stat)
  return 0;

netif_dbg(cp, intr, cp->dev, "rxmac interrupt, stat: 0x%x\n", stat);

/* these are all rollovers */
spin_lock(&cp->stat_lock[0]);
if (stat & MAC_RX_ALIGN_ERR)
  cp->net_stats[0].rx_frame_errors += 0x10000;

if (stat & MAC_RX_CRC_ERR)
  cp->net_stats[0].rx_crc_errors += 0x10000;

if (stat & MAC_RX_LEN_ERR)
  cp->net_stats[0].rx_length_errors += 0x10000;

if (stat & MAC_RX_OVERFLOW) {
  cp->net_stats[0].rx_over_errors++;
  cp->net_stats[0].rx_fifo_errors++;
}

/* We do not track MAC_RX_FRAME_COUNT and MAC_RX_VIOL_ERR
* events.
*/
spin_unlock(&cp->stat_lock[0]);
return 0;
}

static int cas_mac_interrupt(struct net_device *dev, struct cas *cp,
        u32 status)
{
u32 stat = readl(cp->regs + REG_MAC_CTRL_STATUS);

if (!stat)
  return 0;

netif_printk(cp, intr, KERN_DEBUG, cp->dev,
       "mac interrupt, stat: 0x%x\n", stat);

/* This interrupt is just for pause frame and pause
* tracking.  It is useful for diagnostics and debug
* but probably by default we will mask these events.
*/
if (stat & MAC_CTRL_PAUSE_STATE)
  cp->pause_entered++;

if (stat & MAC_CTRL_PAUSE_RECEIVED)
  cp->pause_last_time_recvd = (stat >> 16);

return 0;
}

/* Must be invoked under cp->lock. */
static inline int cas_mdio_link_not_up(struct cas *cp)
{
u16 val;

switch (cp->lstate) {
case link_force_ret:
  netif_info(cp, link, cp->dev, "Autoneg failed again, keeping forced mode\n");
  cas_phy_write(cp, MII_BMCR, cp->link_fcntl);
  cp->timer_ticks = 5;
  cp->lstate = link_force_ok;
  cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
  break;

case link_aneg:
  val = cas_phy_read(cp, MII_BMCR);

  /* Try forced modes. we try things in the following order:
* 1000 full -> 100 full/half -> 10 half
*/
  val &= ~(BMCR_ANRESTART | BMCR_ANENABLE);
  val |= BMCR_FULLDPLX;
  val |= (cp->cas_flags & CAS_FLAG_1000MB_CAP) ?
   CAS_BMCR_SPEED1000 : BMCR_SPEED100;
  cas_phy_write(cp, MII_BMCR, val);
  cp->timer_ticks = 5;
  cp->lstate = link_force_try;
  cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
  break;

case link_force_try:
  /* Downgrade from 1000 to 100 to 10 Mbps if necessary. */
  val = cas_phy_read(cp, MII_BMCR);
  cp->timer_ticks = 5;
  if (val & CAS_BMCR_SPEED1000) { /* gigabit */
   val &= ~CAS_BMCR_SPEED1000;
   val |= (BMCR_SPEED100 | BMCR_FULLDPLX);
   cas_phy_write(cp, MII_BMCR, val);
   break;
  }

  if (val & BMCR_SPEED100) {
   if (val & BMCR_FULLDPLX) /* fd failed */
    val &= ~BMCR_FULLDPLX;
   else { /* 100Mbps failed */
    val &= ~BMCR_SPEED100;
   }
   cas_phy_write(cp, MII_BMCR, val);
   break;
  }
  break;
default:
  break;
}
return 0;
}

/* must be invoked with cp->lock held */
static int cas_mii_link_check(struct cas *cp, const u16 bmsr)
{
int restart;

if (bmsr & BMSR_LSTATUS) {
  /* Ok, here we got a link. If we had it due to a forced
* fallback, and we were configured for autoneg, we
* retry a short autoneg pass. If you know your hub is
* broken, use ethtool ;)
*/
  if ((cp->lstate == link_force_try) &&
      (cp->link_cntl & BMCR_ANENABLE)) {
   cp->lstate = link_force_ret;
   cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
   cas_mif_poll(cp, 0);
   cp->link_fcntl = cas_phy_read(cp, MII_BMCR);
   cp->timer_ticks = 5;
   if (cp->opened)
    netif_info(cp, link, cp->dev,
        "Got link after fallback, retrying autoneg once...\n");
   cas_phy_write(cp, MII_BMCR,
          cp->link_fcntl | BMCR_ANENABLE |
          BMCR_ANRESTART);
   cas_mif_poll(cp, 1);

  } else if (cp->lstate != link_up) {
   cp->lstate = link_up;
   cp->link_transition = LINK_TRANSITION_LINK_UP;

   if (cp->opened) {
    cas_set_link_modes(cp);
    netif_carrier_on(cp->dev);
   }
  }
  return 0;
}

/* link not up. if the link was previously up, we restart the
* whole process
*/
restart = 0;
if (cp->lstate == link_up) {
  cp->lstate = link_down;
  cp->link_transition = LINK_TRANSITION_LINK_DOWN;

  netif_carrier_off(cp->dev);
  if (cp->opened)
   netif_info(cp, link, cp->dev, "Link down\n");
  restart = 1;

} else if (++cp->timer_ticks > 10)
  cas_mdio_link_not_up(cp);

return restart;
}

static int cas_mif_interrupt(struct net_device *dev, struct cas *cp,
        u32 status)
{
u32 stat = readl(cp->regs + REG_MIF_STATUS);
u16 bmsr;

/* check for a link change */
if (CAS_VAL(MIF_STATUS_POLL_STATUS, stat) == 0)
  return 0;

bmsr = CAS_VAL(MIF_STATUS_POLL_DATA, stat);
return cas_mii_link_check(cp, bmsr);
}

static int cas_pci_interrupt(struct net_device *dev, struct cas *cp,
        u32 status)
{
u32 stat = readl(cp->regs + REG_PCI_ERR_STATUS);

if (!stat)
  return 0;

netdev_err(dev, "PCI error [%04x:%04x]",
     stat, readl(cp->regs + REG_BIM_DIAG));

/* cassini+ has this reserved */
if ((stat & PCI_ERR_BADACK) &&
     ((cp->cas_flags & CAS_FLAG_REG_PLUS) == 0))
  pr_cont(" ");

if (stat & PCI_ERR_DTRTO)
  pr_cont(" ");
if (stat & PCI_ERR_OTHER)
  pr_cont(" ");
if (stat & PCI_ERR_BIM_DMA_WRITE)
  pr_cont(" ");
if (stat & PCI_ERR_BIM_DMA_READ)
  pr_cont(" ");
pr_cont("\n");

if (stat & PCI_ERR_OTHER) {
  int pci_errs;

  /* Interrogate PCI config space for the
* true cause.
*/
  pci_errs = pci_status_get_and_clear_errors(cp->pdev);

  netdev_err(dev, "PCI status errors[%04x]\n", pci_errs);
  if (pci_errs & PCI_STATUS_PARITY)
   netdev_err(dev, "PCI parity error detected\n");
  if (pci_errs & PCI_STATUS_SIG_TARGET_ABORT)
   netdev_err(dev, "PCI target abort\n");
  if (pci_errs & PCI_STATUS_REC_TARGET_ABORT)
   netdev_err(dev, "PCI master acks target abort\n");
  if (pci_errs & PCI_STATUS_REC_MASTER_ABORT)
   netdev_err(dev, "PCI master abort\n");
  if (pci_errs & PCI_STATUS_SIG_SYSTEM_ERROR)
   netdev_err(dev, "PCI system error SERR#\n");
  if (pci_errs & PCI_STATUS_DETECTED_PARITY)
   netdev_err(dev, "PCI parity error\n");
}

/* For all PCI errors, we should reset the chip. */
return 1;
}

/* All non-normal interrupt conditions get serviced here.
* Returns non-zero if we should just exit the interrupt
* handler right now (ie. if we reset the card which invalidates
* all of the other original irq status bits).
*/
static int cas_abnormal_irq(struct net_device *dev, struct cas *cp,
       u32 status)
{
if (status & INTR_RX_TAG_ERROR) {
  /* corrupt RX tag framing */
  netif_printk(cp, rx_err, KERN_DEBUG, cp->dev,
        "corrupt rx tag framing\n");
  spin_lock(&cp->stat_lock[0]);
  cp->net_stats[0].rx_errors++;
  spin_unlock(&cp->stat_lock[0]);
  goto do_reset;
}

if (status & INTR_RX_LEN_MISMATCH) {
  /* length mismatch. */
  netif_printk(cp, rx_err, KERN_DEBUG, cp->dev,
        "length mismatch for rx frame\n");
  spin_lock(&cp->stat_lock[0]);
  cp->net_stats[0].rx_errors++;
  spin_unlock(&cp->stat_lock[0]);
  goto do_reset;
}

if (status & INTR_PCS_STATUS) {
  if (cas_pcs_interrupt(dev, cp, status))
   goto do_reset;
}

if (status & INTR_TX_MAC_STATUS) {
  if (cas_txmac_interrupt(dev, cp, status))
   goto do_reset;
}

if (status & INTR_RX_MAC_STATUS) {
  if (cas_rxmac_interrupt(dev, cp, status))
   goto do_reset;
}

if (status & INTR_MAC_CTRL_STATUS) {
  if (cas_mac_interrupt(dev, cp, status))
   goto do_reset;
}

if (status & INTR_MIF_STATUS) {
  if (cas_mif_interrupt(dev, cp, status))
   goto do_reset;
}

if (status & INTR_PCI_ERROR_STATUS) {
  if (cas_pci_interrupt(dev, cp, status))
   goto do_reset;
}
return 0;

do_reset:
#if 1
atomic_inc(&cp->reset_task_pending);
atomic_inc(&cp->reset_task_pending_all);
netdev_err(dev, "reset called in cas_abnormal_irq [0x%x]\n", status);
schedule_work(&cp->reset_task);
#else
atomic_set(&cp->reset_task_pending, CAS_RESET_ALL);
netdev_err(dev, "reset called in cas_abnormal_irq\n");
schedule_work(&cp->reset_task);
#endif
return 1;
}

/* NOTE: CAS_TABORT returns 1 or 2 so that it can be used when
*       determining whether to do a netif_stop/wakeup
*/
#define CAS_TABORT(x)      (((x)->cas_flags & CAS_FLAG_TARGET_ABORT) ? 2 : 1)
#define CAS_ROUND_PAGE(x)  (((x) + PAGE_SIZE - 1) & PAGE_MASK)
static inline int cas_calc_tabort(struct cas *cp, const unsigned long addr,
      const int len)
{
unsigned long off = addr + len;

if (CAS_TABORT(cp) == 1)
  return 0;
if ((CAS_ROUND_PAGE(off) - off) > TX_TARGET_ABORT_LEN)
  return 0;
return TX_TARGET_ABORT_LEN;
}

static inline void cas_tx_ringN(struct cas *cp, int ring, int limit)
{
struct cas_tx_desc *txds;
struct sk_buff **skbs;
struct net_device *dev = cp->dev;
int entry, count;

spin_lock(&cp->tx_lock[ring]);
txds = cp->init_txds[ring];
skbs = cp->tx_skbs[ring];
entry = cp->tx_old[ring];

count = TX_BUFF_COUNT(ring, entry, limit);
while (entry != limit) {
  struct sk_buff *skb = skbs[entry];
  dma_addr_t daddr;
  u32 dlen;
  int frag;

  if (!skb) {
   /* this should never occur */
   entry = TX_DESC_NEXT(ring, entry);
   continue;
  }

  /* however, we might get only a partial skb release. */
  count -= skb_shinfo(skb)->nr_frags +
   + cp->tx_tiny_use[ring][entry].nbufs + 1;
  if (count < 0)
   break;

  netif_printk(cp, tx_done, KERN_DEBUG, cp->dev,
        "tx[%d] done, slot %d\n", ring, entry);

  skbs[entry] = NULL;
  cp->tx_tiny_use[ring][entry].nbufs = 0;

  for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) {
   struct cas_tx_desc *txd = txds + entry;

   daddr = le64_to_cpu(txd->buffer);
   dlen = CAS_VAL(TX_DESC_BUFLEN,
           le64_to_cpu(txd->control));
   dma_unmap_page(&cp->pdev->dev, daddr, dlen,
           DMA_TO_DEVICE);
   entry = TX_DESC_NEXT(ring, entry);

   /* tiny buffer may follow */
   if (cp->tx_tiny_use[ring][entry].used) {
    cp->tx_tiny_use[ring][entry].used = 0;
    entry = TX_DESC_NEXT(ring, entry);
   }
  }

  spin_lock(&cp->stat_lock[ring]);
  cp->net_stats[ring].tx_packets++;
  cp->net_stats[ring].tx_bytes += skb->len;
  spin_unlock(&cp->stat_lock[ring]);
  dev_consume_skb_irq(skb);
}
cp->tx_old[ring] = entry;

/* this is wrong for multiple tx rings. the net device needs
* multiple queues for this to do the right thing.  we wait
* for 2*packets to be available when using tiny buffers
*/
if (netif_queue_stopped(dev) &&
     (TX_BUFFS_AVAIL(cp, ring) > CAS_TABORT(cp)*(MAX_SKB_FRAGS + 1)))
  netif_wake_queue(dev);
spin_unlock(&cp->tx_lock[ring]);
}

static void cas_tx(struct net_device *dev, struct cas *cp,
     u32 status)
{
        int limit, ring;
#ifdef USE_TX_COMPWB
u64 compwb = le64_to_cpu(cp->init_block->tx_compwb);
#endif
netif_printk(cp, intr, KERN_DEBUG, cp->dev,
       "tx interrupt, status: 0x%x, %llx\n",
       status, (unsigned long long)compwb);
/* process all the rings */
for (ring = 0; ring < N_TX_RINGS; ring++) {
#ifdef USE_TX_COMPWB
  /* use the completion writeback registers */
  limit = (CAS_VAL(TX_COMPWB_MSB, compwb) << 8) |
   CAS_VAL(TX_COMPWB_LSB, compwb);
  compwb = TX_COMPWB_NEXT(compwb);
#else
  limit = readl(cp->regs + REG_TX_COMPN(ring));
#endif
  if (cp->tx_old[ring] != limit)
   cas_tx_ringN(cp, ring, limit);
}
}

static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
         int entry, const u64 *words,
         struct sk_buff **skbref)
{
int dlen, hlen, len, i, alloclen;
int off, swivel = RX_SWIVEL_OFF_VAL;
struct cas_page *page;
struct sk_buff *skb;
void *crcaddr;
__sum16 csum;
char *p;

hlen = CAS_VAL(RX_COMP2_HDR_SIZE, words[1]);
dlen = CAS_VAL(RX_COMP1_DATA_SIZE, words[0]);
len  = hlen + dlen;

if (RX_COPY_ALWAYS || (words[2] & RX_COMP3_SMALL_PKT))
  alloclen = len;
else
  alloclen = max(hlen, RX_COPY_MIN);

skb = netdev_alloc_skb(cp->dev, alloclen + swivel + cp->crc_size);
if (skb == NULL)
  return -1;

*skbref = skb;
skb_reserve(skb, swivel);

p = skb->data;
crcaddr = NULL;
if (hlen) { /* always copy header pages */
  i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]);
  page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
  off = CAS_VAL(RX_COMP2_HDR_OFF, words[1]) * 0x100 +
   swivel;

  i = hlen;
  if (!dlen) /* attach FCS */
   i += cp->crc_size;
  dma_sync_single_for_cpu(&cp->pdev->dev, page->dma_addr + off,
     i, DMA_FROM_DEVICE);
  memcpy(p, page_address(page->buffer) + off, i);
  dma_sync_single_for_device(&cp->pdev->dev,
        page->dma_addr + off, i,
        DMA_FROM_DEVICE);
  RX_USED_ADD(page, 0x100);
  p += hlen;
  swivel = 0;
}

if (alloclen < (hlen + dlen)) {
  skb_frag_t *frag = skb_shinfo(skb)->frags;

  /* normal or jumbo packets. we use frags */
  i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]);
  page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
  off = CAS_VAL(RX_COMP1_DATA_OFF, words[0]) + swivel;

  hlen = min(cp->page_size - off, dlen);
  if (hlen < 0) {
   netif_printk(cp, rx_err, KERN_DEBUG, cp->dev,
         "rx page overflow: %d\n", hlen);
   dev_kfree_skb_irq(skb);
   return -1;
  }
  i = hlen;
  if (i == dlen)  /* attach FCS */
   i += cp->crc_size;
  dma_sync_single_for_cpu(&cp->pdev->dev, page->dma_addr + off,
     i, DMA_FROM_DEVICE);

  /* make sure we always copy a header */
  swivel = 0;
  if (p == (char *) skb->data) { /* not split */
   memcpy(p, page_address(page->buffer) + off,
          RX_COPY_MIN);
   dma_sync_single_for_device(&cp->pdev->dev,
         page->dma_addr + off, i,
         DMA_FROM_DEVICE);
   off += RX_COPY_MIN;
   swivel = RX_COPY_MIN;
   RX_USED_ADD(page, cp->mtu_stride);
  } else {
   RX_USED_ADD(page, hlen);
  }
  skb_put(skb, alloclen);

  skb_shinfo(skb)->nr_frags++;
  skb->data_len += hlen - swivel;
  skb->truesize += hlen - swivel;
  skb->len      += hlen - swivel;

  skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
  __skb_frag_ref(frag);

  /* any more data? */
  if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
   hlen = dlen;
   off = 0;

   i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]);
   page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
   dma_sync_single_for_cpu(&cp->pdev->dev,
      page->dma_addr,
      hlen + cp->crc_size,
      DMA_FROM_DEVICE);
   dma_sync_single_for_device(&cp->pdev->dev,
         page->dma_addr,
         hlen + cp->crc_size,
         DMA_FROM_DEVICE);

   skb_shinfo(skb)->nr_frags++;
   skb->data_len += hlen;
   skb->len      += hlen;
   frag++;

   skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
   __skb_frag_ref(frag);
   RX_USED_ADD(page, hlen + cp->crc_size);
  }

  if (cp->crc_size)
   crcaddr = page_address(page->buffer) + off + hlen;

} else {
  /* copying packet */
  if (!dlen)
   goto end_copy_pkt;

  i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]);
  page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
  off = CAS_VAL(RX_COMP1_DATA_OFF, words[0]) + swivel;
  hlen = min(cp->page_size - off, dlen);
  if (hlen < 0) {
   netif_printk(cp, rx_err, KERN_DEBUG, cp->dev,
         "rx page overflow: %d\n", hlen);
   dev_kfree_skb_irq(skb);
   return -1;
  }
  i = hlen;
  if (i == dlen) /* attach FCS */
   i += cp->crc_size;
  dma_sync_single_for_cpu(&cp->pdev->dev, page->dma_addr + off,
     i, DMA_FROM_DEVICE);
  memcpy(p, page_address(page->buffer) + off, i);
  dma_sync_single_for_device(&cp->pdev->dev,
        page->dma_addr + off, i,
        DMA_FROM_DEVICE);
  if (p == (char *) skb->data) /* not split */
   RX_USED_ADD(page, cp->mtu_stride);
  else
   RX_USED_ADD(page, i);

  /* any more data? */
  if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
   p += hlen;
   i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]);
   page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
   dma_sync_single_for_cpu(&cp->pdev->dev,
      page->dma_addr,
      dlen + cp->crc_size,
      DMA_FROM_DEVICE);
   memcpy(p, page_address(page->buffer), dlen + cp->crc_size);
   dma_sync_single_for_device(&cp->pdev->dev,
         page->dma_addr,
         dlen + cp->crc_size,
         DMA_FROM_DEVICE);
   RX_USED_ADD(page, dlen + cp->crc_size);
  }
end_copy_pkt:
  if (cp->crc_size)
   crcaddr = skb->data + alloclen;

  skb_put(skb, alloclen);
}

csum = (__force __sum16)htons(CAS_VAL(RX_COMP4_TCP_CSUM, words[3]));
if (cp->crc_size) {
  /* checksum includes FCS. strip it out. */
  csum = csum_fold(csum_partial(crcaddr, cp->crc_size,
           csum_unfold(csum)));
}
skb->protocol = eth_type_trans(skb, cp->dev);
if (skb->protocol == htons(ETH_P_IP)) {
  skb->csum = csum_unfold(~csum);
  skb->ip_summed = CHECKSUM_COMPLETE;
} else
  skb_checksum_none_assert(skb);
return len;
}

/* we can handle up to 64 rx flows at a time. we do the same thing
* as nonreassm except that we batch up the buffers.
* NOTE: we currently just treat each flow as a bunch of packets that
*       we pass up. a better way would be to coalesce the packets
*       into a jumbo packet. to do that, we need to do the following:
*       1) the first packet will have a clean split between header and
*          data. save both.
*       2) each time the next flow packet comes in, extend the
*          data length and merge the checksums.
*       3) on flow release, fix up the header.
*       4) make sure the higher layer doesn't care.
* because packets get coalesced, we shouldn't run into fragment count
* issues.
*/
static inline void cas_rx_flow_pkt(struct cas *cp, const u64 *words,
       struct sk_buff *skb)
{
int flowid = CAS_VAL(RX_COMP3_FLOWID, words[2]) & (N_RX_FLOWS - 1);
struct sk_buff_head *flow = &cp->rx_flows[flowid];

/* this is protected at a higher layer, so no need to
* do any additional locking here. stick the buffer
* at the end.
*/
__skb_queue_tail(flow, skb);
if (words[0] & RX_COMP1_RELEASE_FLOW) {
  while ((skb = __skb_dequeue(flow))) {
   cas_skb_release(skb);
  }
}
}

/* put rx descriptor back on ring. if a buffer is in use by a higher
* layer, this will need to put in a replacement.
*/
static void cas_post_page(struct cas *cp, const int ring, const int index)
{
cas_page_t *new;
int entry;

entry = cp->rx_old[ring];

new = cas_page_swap(cp, ring, index);
cp->init_rxds[ring][entry].buffer = cpu_to_le64(new->dma_addr);
cp->init_rxds[ring][entry].index  =
  cpu_to_le64(CAS_BASE(RX_INDEX_NUM, index) |
       CAS_BASE(RX_INDEX_RING, ring));

entry = RX_DESC_ENTRY(ring, entry + 1);
cp->rx_old[ring] = entry;

if (entry % 4)
  return;

if (ring == 0)
  writel(entry, cp->regs + REG_RX_KICK);
else if ((N_RX_DESC_RINGS > 1) &&
   (cp->cas_flags & CAS_FLAG_REG_PLUS))
  writel(entry, cp->regs + REG_PLUS_RX_KICK1);
}

/* only when things are bad */
static int cas_post_rxds_ringN(struct cas *cp, int ring, int num)
{
unsigned int entry, last, count, released;
int cluster;
cas_page_t **page = cp->rx_pages[ring];

entry = cp->rx_old[ring];

netif_printk(cp, intr, KERN_DEBUG, cp->dev,
       "rxd[%d] interrupt, done: %d\n", ring, entry);

cluster = -1;
count = entry & 0x3;
last = RX_DESC_ENTRY(ring, num ? entry + num - 4: entry - 4);
released = 0;
while (entry != last) {
  /* make a new buffer if it's still in use */
  if (page_count(page[entry]->buffer) > 1) {
   cas_page_t *new = cas_page_dequeue(cp);
   if (!new) {
    /* let the timer know that we need to
* do this again
*/
    cp->cas_flags |= CAS_FLAG_RXD_POST(ring);
    if (!timer_pending(&cp->link_timer))
     mod_timer(&cp->link_timer, jiffies +
        CAS_LINK_FAST_TIMEOUT);
    cp->rx_old[ring]  = entry;
    cp->rx_last[ring] = num ? num - released : 0;
    return -ENOMEM;
   }
   spin_lock(&cp->rx_inuse_lock);
   list_add(&page[entry]->list, &cp->rx_inuse_list);
   spin_unlock(&cp->rx_inuse_lock);
   cp->init_rxds[ring][entry].buffer =
    cpu_to_le64(new->dma_addr);
   page[entry] = new;

  }

  if (++count == 4) {
   cluster = entry;
   count = 0;
  }
  released++;
  entry = RX_DESC_ENTRY(ring, entry + 1);
}
cp->rx_old[ring] = entry;

if (cluster < 0)
  return 0;

if (ring == 0)
  writel(cluster, cp->regs + REG_RX_KICK);
else if ((N_RX_DESC_RINGS > 1) &&
   (cp->cas_flags & CAS_FLAG_REG_PLUS))
  writel(cluster, cp->regs + REG_PLUS_RX_KICK1);
return 0;
}

/* process a completion ring. packets are set up in three basic ways:
* small packets: should be copied header + data in single buffer.
* large packets: header and data in a single buffer.
* split packets: header in a separate buffer from data.
*                data may be in multiple pages. data may be > 256
*                bytes but in a single page.
*
* NOTE: RX page posting is done in this routine as well. while there's
*       the capability of using multiple RX completion rings, it isn't
*       really worthwhile due to the fact that the page posting will
*       force serialization on the single descriptor ring.
*/
static int cas_rx_ringN(struct cas *cp, int ring, int budget)
{
struct cas_rx_comp *rxcs = cp->init_rxcs[ring];
int entry, drops;
int npackets = 0;

netif_printk(cp, intr, KERN_DEBUG, cp->dev,
       "rx[%d] interrupt, done: %d/%d\n",
       ring,
       readl(cp->regs + REG_RX_COMP_HEAD), cp->rx_new[ring]);

entry = cp->rx_new[ring];
drops = 0;
while (1) {
  struct cas_rx_comp *rxc = rxcs + entry;
  struct sk_buff *skb;
  int type, len;
  u64 words[4];
  int i, dring;

  words[0] = le64_to_cpu(rxc->word1);
  words[1] = le64_to_cpu(rxc->word2);
  words[2] = le64_to_cpu(rxc->word3);
  words[3] = le64_to_cpu(rxc->word4);

  /* don't touch if still owned by hw */
  type = CAS_VAL(RX_COMP1_TYPE, words[0]);
  if (type == 0)
   break;

  /* hw hasn't cleared the zero bit yet */
  if (words[3] & RX_COMP4_ZERO) {
   break;
  }

  /* get info on the packet */
  if (words[3] & (RX_COMP4_LEN_MISMATCH | RX_COMP4_BAD)) {
   spin_lock(&cp->stat_lock[ring]);
   cp->net_stats[ring].rx_errors++;
   if (words[3] & RX_COMP4_LEN_MISMATCH)
    cp->net_stats[ring].rx_length_errors++;
   if (words[3] & RX_COMP4_BAD)
    cp->net_stats[ring].rx_crc_errors++;
   spin_unlock(&cp->stat_lock[ring]);

   /* We'll just return it to Cassini. */
  drop_it:
   spin_lock(&cp->stat_lock[ring]);
   ++cp->net_stats[ring].rx_dropped;
   spin_unlock(&cp->stat_lock[ring]);
   goto next;
  }

  len = cas_rx_process_pkt(cp, rxc, entry, words, &skb);
  if (len < 0) {
   ++drops;
   goto drop_it;
  }

  /* see if it's a flow re-assembly or not. the driver
* itself handles release back up.
*/
  if (RX_DONT_BATCH || (type == 0x2)) {
   /* non-reassm: these always get released */
   cas_skb_release(skb);
  } else {
   cas_rx_flow_pkt(cp, words, skb);
  }

  spin_lock(&cp->stat_lock[ring]);
  cp->net_stats[ring].rx_packets++;
  cp->net_stats[ring].rx_bytes += len;
  spin_unlock(&cp->stat_lock[ring]);

next:
  npackets++;

  /* should it be released? */
  if (words[0] & RX_COMP1_RELEASE_HDR) {
   i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]);
   dring = CAS_VAL(RX_INDEX_RING, i);
   i = CAS_VAL(RX_INDEX_NUM, i);
   cas_post_page(cp, dring, i);
  }

  if (words[0] & RX_COMP1_RELEASE_DATA) {
   i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]);
   dring = CAS_VAL(RX_INDEX_RING, i);
   i = CAS_VAL(RX_INDEX_NUM, i);
   cas_post_page(cp, dring, i);
  }

  if (words[0] & RX_COMP1_RELEASE_NEXT) {
   i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]);
   dring = CAS_VAL(RX_INDEX_RING, i);
   i = CAS_VAL(RX_INDEX_NUM, i);
   cas_post_page(cp, dring, i);
  }

  /* skip to the next entry */
  entry = RX_COMP_ENTRY(ring, entry + 1 +
          CAS_VAL(RX_COMP1_SKIP, words[0]));
#ifdef USE_NAPI
  if (budget && (npackets >= budget))
   break;
#endif
}
cp->rx_new[ring] = entry;

if (drops)
  netdev_info(cp->dev, "Memory squeeze, deferring packet\n");
return npackets;
}

--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.27 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.