Quelle ice_main.c Sprache: C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018-2023, Intel Corporation. */

/* Intel(R) Ethernet Connection E800 Series Linux Driver */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <generated/utsrelease.h>
#include <linux/crash_dump.h>
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_fltr.h"
#include "ice_dcb_lib.h"
#include "ice_dcb_nl.h"
#include "devlink/devlink.h"
#include "devlink/port.h"
#include "ice_sf_eth.h"
#include "ice_hwmon.h"
/* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
* ice tracepoint functions. This must be done exactly once across the
* ice driver.
*/
#define CREATE_TRACE_POINTS
#include "ice_trace.h"
#include "ice_eswitch.h"
#include "ice_tc_lib.h"
#include "ice_vsi_vlan_ops.h"
#include <net/xdp_sock_drv.h>

#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
static const char ice_driver_string[] = DRV_SUMMARY;
static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";

/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
#define ICE_DDP_PKG_PATH "intel/ice/ddp/"
#define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"

MODULE_DESCRIPTION(DRV_SUMMARY);
MODULE_IMPORT_NS("LIBIE");
MODULE_IMPORT_NS("LIBIE_ADMINQ");
MODULE_LICENSE("GPL v2");
MODULE_FIRMWARE(ICE_DDP_PKG_FILE);

static int debug = -1;
module_param(debug, int, 0644);
#ifndef CONFIG_DYNAMIC_DEBUG
MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
#else
MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
#endif /* !CONFIG_DYNAMIC_DEBUG */

DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
EXPORT_SYMBOL(ice_xdp_locking_key);

/**
* ice_hw_to_dev - Get device pointer from the hardware structure
* @hw: pointer to the device HW structure
*
* Used to access the device pointer from compilation units which can't easily
* include the definition of struct ice_pf without leading to circular header
* dependencies.
*/
struct device *ice_hw_to_dev(struct ice_hw *hw)
{
struct ice_pf *pf = container_of(hw, struct ice_pf, hw);

return &pf->pdev->dev;
}

static struct workqueue_struct *ice_wq;
struct workqueue_struct *ice_lag_wq;
static const struct net_device_ops ice_netdev_safe_mode_ops;
static const struct net_device_ops ice_netdev_ops;

static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);

static void ice_vsi_release_all(struct ice_pf *pf);

static int ice_rebuild_channels(struct ice_pf *pf);
static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);

static int
ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
       void *cb_priv, enum tc_setup_type type, void *type_data,
       void *data,
       void (*cleanup)(struct flow_block_cb *block_cb));

bool netif_is_ice(const struct net_device *dev)
{
return dev && (dev->netdev_ops == &ice_netdev_ops ||
         dev->netdev_ops == &ice_netdev_safe_mode_ops);
}

/**
* ice_get_tx_pending - returns number of Tx descriptors not processed
* @ring: the ring of descriptors
*/
static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
{
u16 head, tail;

head = ring->next_to_clean;
tail = ring->next_to_use;

if (head != tail)
  return (head < tail) ?
   tail - head : (tail + ring->count - head);
return 0;
}

/**
* ice_check_for_hang_subtask - check for and recover hung queues
* @pf: pointer to PF struct
*/
static void ice_check_for_hang_subtask(struct ice_pf *pf)
{
struct ice_vsi *vsi = NULL;
struct ice_hw *hw;
unsigned int i;
int packets;
u32 v;

ice_for_each_vsi(pf, v)
  if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
   vsi = pf->vsi[v];
   break;
  }

if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
  return;

if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
  return;

hw = &vsi->back->hw;

ice_for_each_txq(vsi, i) {
  struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
  struct ice_ring_stats *ring_stats;

  if (!tx_ring)
   continue;
  if (ice_ring_ch_enabled(tx_ring))
   continue;

  ring_stats = tx_ring->ring_stats;
  if (!ring_stats)
   continue;

  if (tx_ring->desc) {
   /* If packet counter has not changed the queue is
* likely stalled, so force an interrupt for this
* queue.
*
* prev_pkt would be negative if there was no
* pending work.
*/
   packets = ring_stats->stats.pkts & INT_MAX;
   if (ring_stats->tx_stats.prev_pkt == packets) {
    /* Trigger sw interrupt to revive the queue */
    ice_trigger_sw_intr(hw, tx_ring->q_vector);
    continue;
   }

   /* Memory barrier between read of packet count and call
* to ice_get_tx_pending()
*/
   smp_rmb();
   ring_stats->tx_stats.prev_pkt =
       ice_get_tx_pending(tx_ring) ? packets : -1;
  }
}
}

/**
* ice_init_mac_fltr - Set initial MAC filters
* @pf: board private structure
*
* Set initial set of MAC filters for PF VSI; configure filters for permanent
* address and broadcast address. If an error is encountered, netdevice will be
* unregistered.
*/
static int ice_init_mac_fltr(struct ice_pf *pf)
{
struct ice_vsi *vsi;
u8 *perm_addr;

vsi = ice_get_main_vsi(pf);
if (!vsi)
  return -EINVAL;

perm_addr = vsi->port_info->mac.perm_addr;
return ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
}

/**
* ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
* @netdev: the net device on which the sync is happening
* @addr: MAC address to sync
*
* This is a callback function which is called by the in kernel device sync
* functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
* populates the tmp_sync_list, which is later used by ice_add_mac to add the
* MAC filters from the hardware.
*/
static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;

if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
         ICE_FWD_TO_VSI))
  return -EINVAL;

return 0;
}

/**
* ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
* @netdev: the net device on which the unsync is happening
* @addr: MAC address to unsync
*
* This is a callback function which is called by the in kernel device unsync
* functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
* populates the tmp_unsync_list, which is later used by ice_remove_mac to
* delete the MAC filters from the hardware.
*/
static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;

/* Under some circumstances, we might receive a request to delete our
* own device address from our uc list. Because we store the device
* address in the VSI's MAC filter list, we need to ignore such
* requests and not delete our device address from this list.
*/
if (ether_addr_equal(addr, netdev->dev_addr))
  return 0;

if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
         ICE_FWD_TO_VSI))
  return -EINVAL;

return 0;
}

/**
* ice_vsi_fltr_changed - check if filter state changed
* @vsi: VSI to be checked
*
* returns true if filter state has changed, false otherwise.
*/
static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
{
return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
        test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
}

/**
* ice_set_promisc - Enable promiscuous mode for a given PF
* @vsi: the VSI being configured
* @promisc_m: mask of promiscuous config bits
*
*/
static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
{
int status;

if (vsi->type != ICE_VSI_PF)
  return 0;

if (ice_vsi_has_non_zero_vlans(vsi)) {
  promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
  status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi,
             promisc_m);
} else {
  status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
        promisc_m, 0);
}
if (status && status != -EEXIST)
  return status;

netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
     vsi->vsi_num, promisc_m);
return 0;
}

/**
* ice_clear_promisc - Disable promiscuous mode for a given PF
* @vsi: the VSI being configured
* @promisc_m: mask of promiscuous config bits
*
*/
static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
{
int status;

if (vsi->type != ICE_VSI_PF)
  return 0;

if (ice_vsi_has_non_zero_vlans(vsi)) {
  promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
  status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi,
        promisc_m);
} else {
  status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
          promisc_m, 0);
}

netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
     vsi->vsi_num, promisc_m);
return status;
}

/**
* ice_vsi_sync_fltr - Update the VSI filter list to the HW
* @vsi: ptr to the VSI
*
* Push any outstanding VSI filter changes through the AdminQ.
*/
static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
{
struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
struct device *dev = ice_pf_to_dev(vsi->back);
struct net_device *netdev = vsi->netdev;
bool promisc_forced_on = false;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
u32 changed_flags = 0;
int err;

if (!vsi->netdev)
  return -EINVAL;

while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
  usleep_range(1000, 2000);

changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
vsi->current_netdev_flags = vsi->netdev->flags;

INIT_LIST_HEAD(&vsi->tmp_sync_list);
INIT_LIST_HEAD(&vsi->tmp_unsync_list);

if (ice_vsi_fltr_changed(vsi)) {
  clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
  clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);

  /* grab the netdev's addr_list_lock */
  netif_addr_lock_bh(netdev);
  __dev_uc_sync(netdev, ice_add_mac_to_sync_list,
         ice_add_mac_to_unsync_list);
  __dev_mc_sync(netdev, ice_add_mac_to_sync_list,
         ice_add_mac_to_unsync_list);
  /* our temp lists are populated. release lock */
  netif_addr_unlock_bh(netdev);
}

/* Remove MAC addresses in the unsync list */
err = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
if (err) {
  netdev_err(netdev, "Failed to delete MAC filters\n");
  /* if we failed because of alloc failures, just bail */
  if (err == -ENOMEM)
   goto out;
}

/* Add MAC addresses in the sync list */
err = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
ice_fltr_free_list(dev, &vsi->tmp_sync_list);
/* If filter is added successfully or already exists, do not go into
* 'if' condition and report it as error. Instead continue processing
* rest of the function.
*/
if (err && err != -EEXIST) {
  netdev_err(netdev, "Failed to add MAC filters\n");
  /* If there is no more space for new umac filters, VSI
* should go into promiscuous mode. There should be some
* space reserved for promiscuous filters.
*/
  if (hw->adminq.sq_last_status == LIBIE_AQ_RC_ENOSPC &&
      !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
          vsi->state)) {
   promisc_forced_on = true;
   netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
        vsi->vsi_num);
  } else {
   goto out;
  }
}
err = 0;
/* check for changes in promiscuous modes */
if (changed_flags & IFF_ALLMULTI) {
  if (vsi->current_netdev_flags & IFF_ALLMULTI) {
   err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
   if (err) {
    vsi->current_netdev_flags &= ~IFF_ALLMULTI;
    goto out_promisc;
   }
  } else {
   /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
   err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
   if (err) {
    vsi->current_netdev_flags |= IFF_ALLMULTI;
    goto out_promisc;
   }
  }
}

if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
     test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
  clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
  if (vsi->current_netdev_flags & IFF_PROMISC) {
   /* Apply Rx filter rule to get traffic from wire */
   if (!ice_is_dflt_vsi_in_use(vsi->port_info)) {
    err = ice_set_dflt_vsi(vsi);
    if (err && err != -EEXIST) {
     netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
         err, vsi->vsi_num);
     vsi->current_netdev_flags &=
      ~IFF_PROMISC;
     goto out_promisc;
    }
    err = 0;
    vlan_ops->dis_rx_filtering(vsi);

    /* promiscuous mode implies allmulticast so
* that VSIs that are in promiscuous mode are
* subscribed to multicast packets coming to
* the port
*/
    err = ice_set_promisc(vsi,
            ICE_MCAST_PROMISC_BITS);
    if (err)
     goto out_promisc;
   }
  } else {
   /* Clear Rx filter to remove traffic from wire */
   if (ice_is_vsi_dflt_vsi(vsi)) {
    err = ice_clear_dflt_vsi(vsi);
    if (err) {
     netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
         err, vsi->vsi_num);
     vsi->current_netdev_flags |=
      IFF_PROMISC;
     goto out_promisc;
    }
    if (vsi->netdev->features &
        NETIF_F_HW_VLAN_CTAG_FILTER)
     vlan_ops->ena_rx_filtering(vsi);
   }

   /* disable allmulti here, but only if allmulti is not
* still enabled for the netdev
*/
   if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
    err = ice_clear_promisc(vsi,
       ICE_MCAST_PROMISC_BITS);
    if (err) {
     netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n",
         err, vsi->vsi_num);
    }
   }
  }
}
goto exit;

out_promisc:
set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
goto exit;
out:
/* if something went wrong then set the changed flag so we try again */
set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
exit:
clear_bit(ICE_CFG_BUSY, vsi->state);
return err;
}

/**
* ice_sync_fltr_subtask - Sync the VSI filter list with HW
* @pf: board private structure
*/
static void ice_sync_fltr_subtask(struct ice_pf *pf)
{
int v;

if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
  return;

clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);

ice_for_each_vsi(pf, v)
  if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
      ice_vsi_sync_fltr(pf->vsi[v])) {
   /* come back and try again later */
   set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
   break;
  }
}

/**
* ice_pf_dis_all_vsi - Pause all VSIs on a PF
* @pf: the PF
* @locked: is the rtnl_lock already held
*/
static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
{
int node;
int v;

ice_for_each_vsi(pf, v)
  if (pf->vsi[v])
   ice_dis_vsi(pf->vsi[v], locked);

for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
  pf->pf_agg_node[node].num_vsis = 0;

for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
  pf->vf_agg_node[node].num_vsis = 0;
}

/**
* ice_prepare_for_reset - prep for reset
* @pf: board private structure
* @reset_type: reset type requested
*
* Inform or close all dependent features in prep for reset.
*/
static void
ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
struct ice_hw *hw = &pf->hw;
struct ice_vsi *vsi;
struct ice_vf *vf;
unsigned int bkt;

dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);

/* already prepared for reset */
if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
  return;

synchronize_irq(pf->oicr_irq.virq);

ice_unplug_aux_dev(pf);

/* Notify VFs of impending reset */
if (ice_check_sq_alive(hw, &hw->mailboxq))
  ice_vc_notify_reset(pf);

/* Disable VFs until reset is completed */
mutex_lock(&pf->vfs.table_lock);
ice_for_each_vf(pf, bkt, vf)
  ice_set_vf_state_dis(vf);
mutex_unlock(&pf->vfs.table_lock);

if (ice_is_eswitch_mode_switchdev(pf)) {
  rtnl_lock();
  ice_eswitch_br_fdb_flush(pf->eswitch.br_offloads->bridge);
  rtnl_unlock();
}

/* release ADQ specific HW and SW resources */
vsi = ice_get_main_vsi(pf);
if (!vsi)
  goto skip;

/* to be on safe side, reset orig_rss_size so that normal flow
* of deciding rss_size can take precedence
*/
vsi->orig_rss_size = 0;

if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
  if (reset_type == ICE_RESET_PFR) {
   vsi->old_ena_tc = vsi->all_enatc;
   vsi->old_numtc = vsi->all_numtc;
  } else {
   ice_remove_q_channels(vsi, true);

   /* for other reset type, do not support channel rebuild
* hence reset needed info
*/
   vsi->old_ena_tc = 0;
   vsi->all_enatc = 0;
   vsi->old_numtc = 0;
   vsi->all_numtc = 0;
   vsi->req_txq = 0;
   vsi->req_rxq = 0;
   clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
   memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
  }
}

if (vsi->netdev)
  netif_device_detach(vsi->netdev);
skip:

/* clear SW filtering DB */
ice_clear_hw_tbls(hw);
/* disable the VSIs and their queues that are not already DOWN */
set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state);
ice_pf_dis_all_vsi(pf, false);

if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
  ice_ptp_prepare_for_reset(pf, reset_type);

if (ice_is_feature_supported(pf, ICE_F_GNSS))
  ice_gnss_exit(pf);

if (hw->port_info)
  ice_sched_clear_port(hw->port_info);

ice_shutdown_all_ctrlq(hw, false);

set_bit(ICE_PREPARED_FOR_RESET, pf->state);
}

/**
* ice_do_reset - Initiate one of many types of resets
* @pf: board private structure
* @reset_type: reset type requested before this function was called.
*/
static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;

dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);

if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) {
  dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n");
  reset_type = ICE_RESET_CORER;
}

ice_prepare_for_reset(pf, reset_type);

/* trigger the reset */
if (ice_reset(hw, reset_type)) {
  dev_err(dev, "reset %d failed\n", reset_type);
  set_bit(ICE_RESET_FAILED, pf->state);
  clear_bit(ICE_RESET_OICR_RECV, pf->state);
  clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
  clear_bit(ICE_PFR_REQ, pf->state);
  clear_bit(ICE_CORER_REQ, pf->state);
  clear_bit(ICE_GLOBR_REQ, pf->state);
  wake_up(&pf->reset_wait_queue);
  return;
}

/* PFR is a bit of a special case because it doesn't result in an OICR
* interrupt. So for PFR, rebuild after the reset and clear the reset-
* associated state bits.
*/
if (reset_type == ICE_RESET_PFR) {
  pf->pfr_count++;
  ice_rebuild(pf, reset_type);
  clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
  clear_bit(ICE_PFR_REQ, pf->state);
  wake_up(&pf->reset_wait_queue);
  ice_reset_all_vfs(pf);
}
}

/**
* ice_reset_subtask - Set up for resetting the device and driver
* @pf: board private structure
*/
static void ice_reset_subtask(struct ice_pf *pf)
{
enum ice_reset_req reset_type = ICE_RESET_INVAL;

/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
* OICR interrupt. The OICR handler (ice_misc_intr) determines what type
* of reset is pending and sets bits in pf->state indicating the reset
* type and ICE_RESET_OICR_RECV. So, if the latter bit is set
* prepare for pending reset if not already (for PF software-initiated
* global resets the software should already be prepared for it as
* indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
* by firmware or software on other PFs, that bit is not set so prepare
* for the reset now), poll for reset done, rebuild and return.
*/
if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
  /* Perform the largest reset requested */
  if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
   reset_type = ICE_RESET_CORER;
  if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
   reset_type = ICE_RESET_GLOBR;
  if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
   reset_type = ICE_RESET_EMPR;
  /* return if no valid reset type requested */
  if (reset_type == ICE_RESET_INVAL)
   return;
  ice_prepare_for_reset(pf, reset_type);

  /* make sure we are ready to rebuild */
  if (ice_check_reset(&pf->hw)) {
   set_bit(ICE_RESET_FAILED, pf->state);
  } else {
   /* done with reset. start rebuild */
   pf->hw.reset_ongoing = false;
   ice_rebuild(pf, reset_type);
   /* clear bit to resume normal operations, but
* ICE_NEEDS_RESTART bit is set in case rebuild failed
*/
   clear_bit(ICE_RESET_OICR_RECV, pf->state);
   clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
   clear_bit(ICE_PFR_REQ, pf->state);
   clear_bit(ICE_CORER_REQ, pf->state);
   clear_bit(ICE_GLOBR_REQ, pf->state);
   wake_up(&pf->reset_wait_queue);
   ice_reset_all_vfs(pf);
  }

  return;
}

/* No pending resets to finish processing. Check for new resets */
if (test_bit(ICE_PFR_REQ, pf->state)) {
  reset_type = ICE_RESET_PFR;
  if (pf->lag && pf->lag->bonded) {
   dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n");
   reset_type = ICE_RESET_CORER;
  }
}
if (test_bit(ICE_CORER_REQ, pf->state))
  reset_type = ICE_RESET_CORER;
if (test_bit(ICE_GLOBR_REQ, pf->state))
  reset_type = ICE_RESET_GLOBR;
/* If no valid reset type requested just return */
if (reset_type == ICE_RESET_INVAL)
  return;

/* reset if not already down or busy */
if (!test_bit(ICE_DOWN, pf->state) &&
     !test_bit(ICE_CFG_BUSY, pf->state)) {
  ice_do_reset(pf, reset_type);
}
}

/**
* ice_print_topo_conflict - print topology conflict message
* @vsi: the VSI whose topology status is being checked
*/
static void ice_print_topo_conflict(struct ice_vsi *vsi)
{
switch (vsi->port_info->phy.link_info.topo_media_conflict) {
case ICE_AQ_LINK_TOPO_CONFLICT:
case ICE_AQ_LINK_MEDIA_CONFLICT:
case ICE_AQ_LINK_TOPO_UNREACH_PRT:
case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
  netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
  break;
case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
  if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
   netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
  else
   netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
  break;
default:
  break;
}
}

/**
* ice_print_link_msg - print link up or down message
* @vsi: the VSI whose link status is being queried
* @isup: boolean for if the link is now up or down
*/
void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
{
struct ice_aqc_get_phy_caps_data *caps;
const char *an_advertised;
const char *fec_req;
const char *speed;
const char *fec;
const char *fc;
const char *an;
int status;

if (!vsi)
  return;

if (vsi->current_isup == isup)
  return;

vsi->current_isup = isup;

if (!isup) {
  netdev_info(vsi->netdev, "NIC Link is Down\n");
  return;
}

switch (vsi->port_info->phy.link_info.link_speed) {
case ICE_AQ_LINK_SPEED_200GB:
  speed = "200 G";
  break;
case ICE_AQ_LINK_SPEED_100GB:
  speed = "100 G";
  break;
case ICE_AQ_LINK_SPEED_50GB:
  speed = "50 G";
  break;
case ICE_AQ_LINK_SPEED_40GB:
  speed = "40 G";
  break;
case ICE_AQ_LINK_SPEED_25GB:
  speed = "25 G";
  break;
case ICE_AQ_LINK_SPEED_20GB:
  speed = "20 G";
  break;
case ICE_AQ_LINK_SPEED_10GB:
  speed = "10 G";
  break;
case ICE_AQ_LINK_SPEED_5GB:
  speed = "5 G";
  break;
case ICE_AQ_LINK_SPEED_2500MB:
  speed = "2.5 G";
  break;
case ICE_AQ_LINK_SPEED_1000MB:
  speed = "1 G";
  break;
case ICE_AQ_LINK_SPEED_100MB:
  speed = "100 M";
  break;
default:
  speed = "Unknown ";
  break;
}

switch (vsi->port_info->fc.current_mode) {
case ICE_FC_FULL:
  fc = "Rx/Tx";
  break;
case ICE_FC_TX_PAUSE:
  fc = "Tx";
  break;
case ICE_FC_RX_PAUSE:
  fc = "Rx";
  break;
case ICE_FC_NONE:
  fc = "None";
  break;
default:
  fc = "Unknown";
  break;
}

/* Get FEC mode based on negotiated link info */
switch (vsi->port_info->phy.link_info.fec_info) {
case ICE_AQ_LINK_25G_RS_528_FEC_EN:
case ICE_AQ_LINK_25G_RS_544_FEC_EN:
  fec = "RS-FEC";
  break;
case ICE_AQ_LINK_25G_KR_FEC_EN:
  fec = "FC-FEC/BASE-R";
  break;
default:
  fec = "NONE";
  break;
}

/* check if autoneg completed, might be false due to not supported */
if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
  an = "True";
else
  an = "False";

/* Get FEC mode requested based on PHY caps last SW configuration */
caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps) {
  fec_req = "Unknown";
  an_advertised = "Unknown";
  goto done;
}

status = ice_aq_get_phy_caps(vsi->port_info, false,
         ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
if (status)
  netdev_info(vsi->netdev, "Get phy capability failed.\n");

an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";

if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
     caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
  fec_req = "RS-FEC";
else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
   caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
  fec_req = "FC-FEC/BASE-R";
else
  fec_req = "NONE";

kfree(caps);

done:
netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
      speed, fec_req, fec, an_advertised, an, fc);
ice_print_topo_conflict(vsi);
}

/**
* ice_vsi_link_event - update the VSI's netdev
* @vsi: the VSI on which the link event occurred
* @link_up: whether or not the VSI needs to be set up or down
*/
static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
{
if (!vsi)
  return;

if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
  return;

if (vsi->type == ICE_VSI_PF) {
  if (link_up == netif_carrier_ok(vsi->netdev))
   return;

  if (link_up) {
   netif_carrier_on(vsi->netdev);
   netif_tx_wake_all_queues(vsi->netdev);
  } else {
   netif_carrier_off(vsi->netdev);
   netif_tx_stop_all_queues(vsi->netdev);
  }
}
}

/**
* ice_set_dflt_mib - send a default config MIB to the FW
* @pf: private PF struct
*
* This function sends a default configuration MIB to the FW.
*
* If this function errors out at any point, the driver is still able to
* function.  The main impact is that LFC may not operate as expected.
* Therefore an error state in this function should be treated with a DBG
* message and continue on with driver rebuild/reenable.
*/
static void ice_set_dflt_mib(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
u8 mib_type, *buf, *lldpmib = NULL;
u16 len, typelen, offset = 0;
struct ice_lldp_org_tlv *tlv;
struct ice_hw *hw = &pf->hw;
u32 ouisubtype;

mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
if (!lldpmib) {
  dev_dbg(dev, "%s Failed to allocate MIB memory\n",
   __func__);
  return;
}

/* Add ETS CFG TLV */
tlv = (struct ice_lldp_org_tlv *)lldpmib;
typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
     ICE_IEEE_ETS_TLV_LEN);
tlv->typelen = htons(typelen);
ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
        ICE_IEEE_SUBTYPE_ETS_CFG);
tlv->ouisubtype = htonl(ouisubtype);

buf = tlv->tlvinfo;
buf[0] = 0;

/* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
* Octets 5 - 12 are BW values, set octet 5 to 100% BW.
* Octets 13 - 20 are TSA values - leave as zeros
*/
buf[5] = 0x64;
len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen);
offset += len + 2;
tlv = (struct ice_lldp_org_tlv *)
  ((char *)tlv + sizeof(tlv->typelen) + len);

/* Add ETS REC TLV */
buf = tlv->tlvinfo;
tlv->typelen = htons(typelen);

ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
        ICE_IEEE_SUBTYPE_ETS_REC);
tlv->ouisubtype = htonl(ouisubtype);

/* First octet of buf is reserved
* Octets 1 - 4 map UP to TC - all UPs map to zero
* Octets 5 - 12 are BW values - set TC 0 to 100%.
* Octets 13 - 20 are TSA value - leave as zeros
*/
buf[5] = 0x64;
offset += len + 2;
tlv = (struct ice_lldp_org_tlv *)
  ((char *)tlv + sizeof(tlv->typelen) + len);

/* Add PFC CFG TLV */
typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
     ICE_IEEE_PFC_TLV_LEN);
tlv->typelen = htons(typelen);

ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
        ICE_IEEE_SUBTYPE_PFC_CFG);
tlv->ouisubtype = htonl(ouisubtype);

/* Octet 1 left as all zeros - PFC disabled */
buf[0] = 0x08;
len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen);
offset += len + 2;

if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
  dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);

kfree(lldpmib);
}

/**
* ice_check_phy_fw_load - check if PHY FW load failed
* @pf: pointer to PF struct
* @link_cfg_err: bitmap from the link info structure
*
* check if external PHY FW load failed and print an error message if it did
*/
static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
{
if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
  clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
  return;
}

if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
  return;

if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
  dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
  set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
}
}

/**
* ice_check_module_power
* @pf: pointer to PF struct
* @link_cfg_err: bitmap from the link info structure
*
* check module power level returned by a previous call to aq_get_link_info
* and print error messages if module power level is not supported
*/
static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
{
/* if module power level is supported, clear the flag */
if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
         ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
  clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
  return;
}

/* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
* above block didn't clear this bit, there's nothing to do
*/
if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
  return;

if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
  dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
  set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
  dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
  set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
}
}

/**
* ice_check_link_cfg_err - check if link configuration failed
* @pf: pointer to the PF struct
* @link_cfg_err: bitmap from the link info structure
*
* print if any link configuration failure happens due to the value in the
* link_cfg_err parameter in the link info structure
*/
static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
{
ice_check_module_power(pf, link_cfg_err);
ice_check_phy_fw_load(pf, link_cfg_err);
}

/**
* ice_link_event - process the link event
* @pf: PF that the link event is associated with
* @pi: port_info for the port that the link event is associated with
* @link_up: true if the physical link is up and false if it is down
* @link_speed: current link speed received from the link event
*
* Returns 0 on success and negative on failure
*/
static int
ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
        u16 link_speed)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_phy_info *phy_info;
struct ice_vsi *vsi;
u16 old_link_speed;
bool old_link;
int status;

phy_info = &pi->phy;
phy_info->link_info_old = phy_info->link_info;

old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
old_link_speed = phy_info->link_info_old.link_speed;

/* update the link info structures and re-enable link events,
* don't bail on failure due to other book keeping needed
*/
status = ice_update_link_info(pi);
if (status)
  dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
   pi->lport, status,
   libie_aq_str(pi->hw->adminq.sq_last_status));

ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);

/* Check if the link state is up after updating link info, and treat
* this event as an UP event since the link is actually UP now.
*/
if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
  link_up = true;

vsi = ice_get_main_vsi(pf);
if (!vsi || !vsi->port_info)
  return -EINVAL;

/* turn off PHY if media was removed */
if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
     !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
  set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
  ice_set_link(vsi, false);
}

/* if the old link up/down and speed is the same as the new */
if (link_up == old_link && link_speed == old_link_speed)
  return 0;

if (!link_up && old_link)
  pf->link_down_events++;

ice_ptp_link_change(pf, link_up);

if (ice_is_dcb_active(pf)) {
  if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
   ice_dcb_rebuild(pf);
} else {
  if (link_up)
   ice_set_dflt_mib(pf);
}
ice_vsi_link_event(vsi, link_up);
ice_print_link_msg(vsi, link_up);

ice_vc_notify_link_state(pf);

return 0;
}

/**
* ice_watchdog_subtask - periodic tasks not using event driven scheduling
* @pf: board private structure
*/
static void ice_watchdog_subtask(struct ice_pf *pf)
{
int i;

/* if interface is down do nothing */
if (test_bit(ICE_DOWN, pf->state) ||
     test_bit(ICE_CFG_BUSY, pf->state))
  return;

/* make sure we don't do these things too often */
if (time_before(jiffies,
   pf->serv_tmr_prev + pf->serv_tmr_period))
  return;

pf->serv_tmr_prev = jiffies;

/* Update the stats for active netdevs so the network stack
* can look at updated numbers whenever it cares to
*/
ice_update_pf_stats(pf);
ice_for_each_vsi(pf, i)
  if (pf->vsi[i] && pf->vsi[i]->netdev)
   ice_update_vsi_stats(pf->vsi[i]);
}

/**
* ice_init_link_events - enable/initialize link events
* @pi: pointer to the port_info instance
*
* Returns -EIO on failure, 0 on success
*/
static int ice_init_link_events(struct ice_port_info *pi)
{
u16 mask;

mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
         ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL |
         ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));

if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
  dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
   pi->lport);
  return -EIO;
}

if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
  dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
   pi->lport);
  return -EIO;
}

return 0;
}

/**
* ice_handle_link_event - handle link event via ARQ
* @pf: PF that the link event is associated with
* @event: event structure containing link status info
*/
static int
ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
{
struct ice_aqc_get_link_status_data *link_data;
struct ice_port_info *port_info;
int status;

link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
port_info = pf->hw.port_info;
if (!port_info)
  return -EINVAL;

status = ice_link_event(pf, port_info,
    !!(link_data->link_info & ICE_AQ_LINK_UP),
    le16_to_cpu(link_data->link_speed));
if (status)
  dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
   status);

return status;
}

/**
* ice_get_fwlog_data - copy the FW log data from ARQ event
* @pf: PF that the FW log event is associated with
* @event: event structure containing FW log data
*/
static void
ice_get_fwlog_data(struct ice_pf *pf, struct ice_rq_event_info *event)
{
struct ice_fwlog_data *fwlog;
struct ice_hw *hw = &pf->hw;

fwlog = &hw->fwlog_ring.rings[hw->fwlog_ring.tail];

memset(fwlog->data, 0, PAGE_SIZE);
fwlog->data_size = le16_to_cpu(event->desc.datalen);

memcpy(fwlog->data, event->msg_buf, fwlog->data_size);
ice_fwlog_ring_increment(&hw->fwlog_ring.tail, hw->fwlog_ring.size);

if (ice_fwlog_ring_full(&hw->fwlog_ring)) {
  /* the rings are full so bump the head to create room */
  ice_fwlog_ring_increment(&hw->fwlog_ring.head,
      hw->fwlog_ring.size);
}
}

/**
* ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
* @pf: pointer to the PF private structure
* @task: intermediate helper storage and identifier for waiting
* @opcode: the opcode to wait for
*
* Prepares to wait for a specific AdminQ completion event on the ARQ for
* a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
*
* Calls are separated to allow caller registering for event before sending
* the command, which mitigates a race between registering and FW responding.
*
* To obtain only the descriptor contents, pass an task->event with null
* msg_buf. If the complete data buffer is desired, allocate the
* task->event.msg_buf with enough space ahead of time.
*/
void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
      u16 opcode)
{
INIT_HLIST_NODE(&task->entry);
task->opcode = opcode;
task->state = ICE_AQ_TASK_WAITING;

spin_lock_bh(&pf->aq_wait_lock);
hlist_add_head(&task->entry, &pf->aq_wait_list);
spin_unlock_bh(&pf->aq_wait_lock);
}

/**
* ice_aq_wait_for_event - Wait for an AdminQ event from firmware
* @pf: pointer to the PF private structure
* @task: ptr prepared by ice_aq_prep_for_event()
* @timeout: how long to wait, in jiffies
*
* Waits for a specific AdminQ completion event on the ARQ for a given PF. The
* current thread will be put to sleep until the specified event occurs or
* until the given timeout is reached.
*
* Returns: zero on success, or a negative error code on failure.
*/
int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
     unsigned long timeout)
{
enum ice_aq_task_state *state = &task->state;
struct device *dev = ice_pf_to_dev(pf);
unsigned long start = jiffies;
long ret;
int err;

ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
            *state != ICE_AQ_TASK_WAITING,
            timeout);
switch (*state) {
case ICE_AQ_TASK_NOT_PREPARED:
  WARN(1, "call to %s without ice_aq_prep_for_event()", __func__);
  err = -EINVAL;
  break;
case ICE_AQ_TASK_WAITING:
  err = ret < 0 ? ret : -ETIMEDOUT;
  break;
case ICE_AQ_TASK_CANCELED:
  err = ret < 0 ? ret : -ECANCELED;
  break;
case ICE_AQ_TASK_COMPLETE:
  err = ret < 0 ? ret : 0;
  break;
default:
  WARN(1, "Unexpected AdminQ wait task state %u", *state);
  err = -EINVAL;
  break;
}

dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
  jiffies_to_msecs(jiffies - start),
  jiffies_to_msecs(timeout),
  task->opcode);

spin_lock_bh(&pf->aq_wait_lock);
hlist_del(&task->entry);
spin_unlock_bh(&pf->aq_wait_lock);

return err;
}

/**
* ice_aq_check_events - Check if any thread is waiting for an AdminQ event
* @pf: pointer to the PF private structure
* @opcode: the opcode of the event
* @event: the event to check
*
* Loops over the current list of pending threads waiting for an AdminQ event.
* For each matching task, copy the contents of the event into the task
* structure and wake up the thread.
*
* If multiple threads wait for the same opcode, they will all be woken up.
*
* Note that event->msg_buf will only be duplicated if the event has a buffer
* with enough space already allocated. Otherwise, only the descriptor and
* message length will be copied.
*
* Returns: true if an event was found, false otherwise
*/
static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
    struct ice_rq_event_info *event)
{
struct ice_rq_event_info *task_ev;
struct ice_aq_task *task;
bool found = false;

spin_lock_bh(&pf->aq_wait_lock);
hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
  if (task->state != ICE_AQ_TASK_WAITING)
   continue;
  if (task->opcode != opcode)
   continue;

  task_ev = &task->event;
  memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
  task_ev->msg_len = event->msg_len;

  /* Only copy the data buffer if a destination was set */
  if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
   memcpy(task_ev->msg_buf, event->msg_buf,
          event->buf_len);
   task_ev->buf_len = event->buf_len;
  }

  task->state = ICE_AQ_TASK_COMPLETE;
  found = true;
}
spin_unlock_bh(&pf->aq_wait_lock);

if (found)
  wake_up(&pf->aq_wait_queue);
}

/**
* ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
* @pf: the PF private structure
*
* Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
* This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
*/
static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
{
struct ice_aq_task *task;

spin_lock_bh(&pf->aq_wait_lock);
hlist_for_each_entry(task, &pf->aq_wait_list, entry)
  task->state = ICE_AQ_TASK_CANCELED;
spin_unlock_bh(&pf->aq_wait_lock);

wake_up(&pf->aq_wait_queue);
}

#define ICE_MBX_OVERFLOW_WATERMARK 64

/**
* __ice_clean_ctrlq - helper function to clean controlq rings
* @pf: ptr to struct ice_pf
* @q_type: specific Control queue type
*/
static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_rq_event_info event;
struct ice_hw *hw = &pf->hw;
struct ice_ctl_q_info *cq;
u16 pending, i = 0;
const char *qtype;
u32 oldval, val;

/* Do not clean control queue if/when PF reset fails */
if (test_bit(ICE_RESET_FAILED, pf->state))
  return 0;

switch (q_type) {
case ICE_CTL_Q_ADMIN:
  cq = &hw->adminq;
  qtype = "Admin";
  break;
case ICE_CTL_Q_SB:
  cq = &hw->sbq;
  qtype = "Sideband";
  break;
case ICE_CTL_Q_MAILBOX:
  cq = &hw->mailboxq;
  qtype = "Mailbox";
  /* we are going to try to detect a malicious VF, so set the
* state to begin detection
*/
  hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
  break;
default:
  dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
  return 0;
}

/* check for error indications - PF_xx_AxQLEN register layout for
* FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
*/
val = rd32(hw, cq->rq.len);
if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
     PF_FW_ARQLEN_ARQCRIT_M)) {
  oldval = val;
  if (val & PF_FW_ARQLEN_ARQVFE_M)
   dev_dbg(dev, "%s Receive Queue VF Error detected\n",
    qtype);
  if (val & PF_FW_ARQLEN_ARQOVFL_M) {
   dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
    qtype);
  }
  if (val & PF_FW_ARQLEN_ARQCRIT_M)
   dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
    qtype);
  val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
    PF_FW_ARQLEN_ARQCRIT_M);
  if (oldval != val)
   wr32(hw, cq->rq.len, val);
}

val = rd32(hw, cq->sq.len);
if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
     PF_FW_ATQLEN_ATQCRIT_M)) {
  oldval = val;
  if (val & PF_FW_ATQLEN_ATQVFE_M)
   dev_dbg(dev, "%s Send Queue VF Error detected\n",
    qtype);
  if (val & PF_FW_ATQLEN_ATQOVFL_M) {
   dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
    qtype);
  }
  if (val & PF_FW_ATQLEN_ATQCRIT_M)
   dev_dbg(dev, "%s Send Queue Critical Error detected\n",
    qtype);
  val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
    PF_FW_ATQLEN_ATQCRIT_M);
  if (oldval != val)
   wr32(hw, cq->sq.len, val);
}

event.buf_len = cq->rq_buf_size;
event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
if (!event.msg_buf)
  return 0;

do {
  struct ice_mbx_data data = {};
  u16 opcode;
  int ret;

  ret = ice_clean_rq_elem(hw, cq, &event, &pending);
  if (ret == -EALREADY)
   break;
  if (ret) {
   dev_err(dev, "%s Receive Queue event error %d\n", qtype,
    ret);
   break;
  }

  opcode = le16_to_cpu(event.desc.opcode);

  /* Notify any thread that might be waiting for this event */
  ice_aq_check_events(pf, opcode, &event);

  switch (opcode) {
  case ice_aqc_opc_get_link_status:
   if (ice_handle_link_event(pf, &event))
    dev_err(dev, "Could not handle link event\n");
   break;
  case ice_aqc_opc_event_lan_overflow:
   ice_vf_lan_overflow_event(pf, &event);
   break;
  case ice_mbx_opc_send_msg_to_pf:
   if (ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) {
    ice_vc_process_vf_msg(pf, &event, NULL);
    ice_mbx_vf_dec_trig_e830(hw, &event);
   } else {
    u16 val = hw->mailboxq.num_rq_entries;

    data.max_num_msgs_mbx = val;
    val = ICE_MBX_OVERFLOW_WATERMARK;
    data.async_watermark_val = val;
    data.num_msg_proc = i;
    data.num_pending_arq = pending;

    ice_vc_process_vf_msg(pf, &event, &data);
   }
   break;
  case ice_aqc_opc_fw_logs_event:
   ice_get_fwlog_data(pf, &event);
   break;
  case ice_aqc_opc_lldp_set_mib_change:
   ice_dcb_process_lldp_set_mib_change(pf, &event);
   break;
  case ice_aqc_opc_get_health_status:
   ice_process_health_status_event(pf, &event);
   break;
  default:
   dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
    qtype, opcode);
   break;
  }
} while (pending && (i++ < ICE_DFLT_IRQ_WORK));

kfree(event.msg_buf);

return pending && (i == ICE_DFLT_IRQ_WORK);
}

/**
* ice_ctrlq_pending - check if there is a difference between ntc and ntu
* @hw: pointer to hardware info
* @cq: control queue information
*
* returns true if there are pending messages in a queue, false if there aren't
*/
static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
{
u16 ntu;

ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
return cq->rq.next_to_clean != ntu;
}

/**
* ice_clean_adminq_subtask - clean the AdminQ rings
* @pf: board private structure
*/
static void ice_clean_adminq_subtask(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;

if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
  return;

if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
  return;

clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);

/* There might be a situation where new messages arrive to a control
* queue between processing the last message and clearing the
* EVENT_PENDING bit. So before exiting, check queue head again (using
* ice_ctrlq_pending) and process new messages if any.
*/
if (ice_ctrlq_pending(hw, &hw->adminq))
  __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);

ice_flush(hw);
}

/**
* ice_clean_mailboxq_subtask - clean the MailboxQ rings
* @pf: board private structure
*/
static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;

if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
  return;

if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
  return;

clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);

if (ice_ctrlq_pending(hw, &hw->mailboxq))
  __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);

ice_flush(hw);
}

/**
* ice_clean_sbq_subtask - clean the Sideband Queue rings
* @pf: board private structure
*/
static void ice_clean_sbq_subtask(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;

/* if mac_type is not generic, sideband is not supported
* and there's nothing to do here
*/
if (!ice_is_generic_mac(hw)) {
  clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
  return;
}

if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
  return;

if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
  return;

clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);

if (ice_ctrlq_pending(hw, &hw->sbq))
  __ice_clean_ctrlq(pf, ICE_CTL_Q_SB);

ice_flush(hw);
}

/**
* ice_service_task_schedule - schedule the service task to wake up
* @pf: board private structure
*
* If not already scheduled, this puts the task into the work queue.
*/
void ice_service_task_schedule(struct ice_pf *pf)
{
if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
     !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
     !test_bit(ICE_NEEDS_RESTART, pf->state))
  queue_work(ice_wq, &pf->serv_task);
}

/**
* ice_service_task_complete - finish up the service task
* @pf: board private structure
*/
static void ice_service_task_complete(struct ice_pf *pf)
{
WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));

/* force memory (pf->state) to sync before next service task */
smp_mb__before_atomic();
clear_bit(ICE_SERVICE_SCHED, pf->state);
}

/**
* ice_service_task_stop - stop service task and cancel works
* @pf: board private structure
*
* Return 0 if the ICE_SERVICE_DIS bit was not already set,
* 1 otherwise.
*/
static int ice_service_task_stop(struct ice_pf *pf)
{
int ret;

ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);

if (pf->serv_tmr.function)
  timer_delete_sync(&pf->serv_tmr);
if (pf->serv_task.func)
  cancel_work_sync(&pf->serv_task);

clear_bit(ICE_SERVICE_SCHED, pf->state);
return ret;
}

/**
* ice_service_task_restart - restart service task and schedule works
* @pf: board private structure
*
* This function is needed for suspend and resume works (e.g WoL scenario)
*/
static void ice_service_task_restart(struct ice_pf *pf)
{
clear_bit(ICE_SERVICE_DIS, pf->state);
ice_service_task_schedule(pf);
}

/**
* ice_service_timer - timer callback to schedule service task
* @t: pointer to timer_list
*/
static void ice_service_timer(struct timer_list *t)
{
struct ice_pf *pf = timer_container_of(pf, t, serv_tmr);

mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
ice_service_task_schedule(pf);
}

/**
* ice_mdd_maybe_reset_vf - reset VF after MDD event
* @pf: pointer to the PF structure
* @vf: pointer to the VF structure
* @reset_vf_tx: whether Tx MDD has occurred
* @reset_vf_rx: whether Rx MDD has occurred
*
* Since the queue can get stuck on VF MDD events, the PF can be configured to
* automatically reset the VF by enabling the private ethtool flag
* mdd-auto-reset-vf.
*/
static void ice_mdd_maybe_reset_vf(struct ice_pf *pf, struct ice_vf *vf,
       bool reset_vf_tx, bool reset_vf_rx)
{
struct device *dev = ice_pf_to_dev(pf);

if (!test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags))
  return;

/* VF MDD event counters will be cleared by reset, so print the event
* prior to reset.
*/
if (reset_vf_tx)
  ice_print_vf_tx_mdd_event(vf);

if (reset_vf_rx)
  ice_print_vf_rx_mdd_event(vf);

dev_info(dev, "PF-to-VF reset on PF %d VF %d due to MDD event\n",
   pf->hw.pf_id, vf->vf_id);
ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK);
}

/**
* ice_handle_mdd_event - handle malicious driver detect event
* @pf: pointer to the PF structure
*
* Called from service task. OICR interrupt handler indicates MDD event.
* VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
* messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
* disable the queue, the PF can be configured to reset the VF using ethtool
* private flag mdd-auto-reset-vf.
*/
static void ice_handle_mdd_event(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
struct ice_vf *vf;
unsigned int bkt;
u32 reg;

if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
  /* Since the VF MDD event logging is rate limited, check if
* there are pending MDD events.
*/
  ice_print_vfs_mdd_events(pf);
  return;
}

/* find what triggered an MDD event */
reg = rd32(hw, GL_MDET_TX_PQM);
if (reg & GL_MDET_TX_PQM_VALID_M) {
  u8 pf_num = FIELD_GET(GL_MDET_TX_PQM_PF_NUM_M, reg);
  u16 vf_num = FIELD_GET(GL_MDET_TX_PQM_VF_NUM_M, reg);
  u8 event = FIELD_GET(GL_MDET_TX_PQM_MAL_TYPE_M, reg);
  u16 queue = FIELD_GET(GL_MDET_TX_PQM_QNUM_M, reg);

  if (netif_msg_tx_err(pf))
   dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
     event, queue, pf_num, vf_num);
  ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num,
         event, queue);
  wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
}

reg = rd32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw));
if (reg & GL_MDET_TX_TCLAN_VALID_M) {
  u8 pf_num = FIELD_GET(GL_MDET_TX_TCLAN_PF_NUM_M, reg);
  u16 vf_num = FIELD_GET(GL_MDET_TX_TCLAN_VF_NUM_M, reg);
  u8 event = FIELD_GET(GL_MDET_TX_TCLAN_MAL_TYPE_M, reg);
  u16 queue = FIELD_GET(GL_MDET_TX_TCLAN_QNUM_M, reg);

  if (netif_msg_tx_err(pf))
   dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
     event, queue, pf_num, vf_num);
  ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num,
         event, queue);
  wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX);
}

reg = rd32(hw, GL_MDET_RX);
if (reg & GL_MDET_RX_VALID_M) {
  u8 pf_num = FIELD_GET(GL_MDET_RX_PF_NUM_M, reg);
  u16 vf_num = FIELD_GET(GL_MDET_RX_VF_NUM_M, reg);
  u8 event = FIELD_GET(GL_MDET_RX_MAL_TYPE_M, reg);
  u16 queue = FIELD_GET(GL_MDET_RX_QNUM_M, reg);

  if (netif_msg_rx_err(pf))
   dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
     event, queue, pf_num, vf_num);
  ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event,
         queue);
  wr32(hw, GL_MDET_RX, 0xffffffff);
}

/* check to see if this PF caused an MDD event */
reg = rd32(hw, PF_MDET_TX_PQM);
if (reg & PF_MDET_TX_PQM_VALID_M) {
  wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
  if (netif_msg_tx_err(pf))
   dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
}

reg = rd32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw));
if (reg & PF_MDET_TX_TCLAN_VALID_M) {
  wr32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw), 0xffff);
  if (netif_msg_tx_err(pf))
   dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
}

reg = rd32(hw, PF_MDET_RX);
if (reg & PF_MDET_RX_VALID_M) {
  wr32(hw, PF_MDET_RX, 0xFFFF);
  if (netif_msg_rx_err(pf))
   dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
}

/* Check to see if one of the VFs caused an MDD event, and then
* increment counters and set print pending
*/
mutex_lock(&pf->vfs.table_lock);
ice_for_each_vf(pf, bkt, vf) {
  bool reset_vf_tx = false, reset_vf_rx = false;

  reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
  if (reg & VP_MDET_TX_PQM_VALID_M) {
   wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF);
   vf->mdd_tx_events.count++;
   set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
   if (netif_msg_tx_err(pf))
    dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
      vf->vf_id);

   reset_vf_tx = true;
  }

  reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
  if (reg & VP_MDET_TX_TCLAN_VALID_M) {
   wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF);
   vf->mdd_tx_events.count++;
   set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
   if (netif_msg_tx_err(pf))
    dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
      vf->vf_id);

   reset_vf_tx = true;
  }

  reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
  if (reg & VP_MDET_TX_TDPU_VALID_M) {
   wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF);
   vf->mdd_tx_events.count++;
   set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
   if (netif_msg_tx_err(pf))
    dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
      vf->vf_id);

   reset_vf_tx = true;
  }

  reg = rd32(hw, VP_MDET_RX(vf->vf_id));
  if (reg & VP_MDET_RX_VALID_M) {
   wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF);
   vf->mdd_rx_events.count++;
   set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
   if (netif_msg_rx_err(pf))
    dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
      vf->vf_id);

   reset_vf_rx = true;
  }

  if (reset_vf_tx || reset_vf_rx)
   ice_mdd_maybe_reset_vf(pf, vf, reset_vf_tx,
            reset_vf_rx);
}
mutex_unlock(&pf->vfs.table_lock);

ice_print_vfs_mdd_events(pf);
}

/**
* ice_force_phys_link_state - Force the physical link state
* @vsi: VSI to force the physical link state to up/down
* @link_up: true/false indicates to set the physical link to up/down
*
* Force the physical link state by getting the current PHY capabilities from
* hardware and setting the PHY config based on the determined capabilities. If
* link changes a link event will be triggered because both the Enable Automatic
* Link Update and LESM Enable bits are set when setting the PHY capabilities.
*
* Returns 0 on success, negative on failure
*/
static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
{
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_aqc_set_phy_cfg_data *cfg;
struct ice_port_info *pi;
struct device *dev;
int retcode;

if (!vsi || !vsi->port_info || !vsi->back)
  return -EINVAL;
if (vsi->type != ICE_VSI_PF)
  return 0;

dev = ice_pf_to_dev(vsi->back);

pi = vsi->port_info;

pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
  return -ENOMEM;

retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
          NULL);
if (retcode) {
  dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
   vsi->vsi_num, retcode);
  retcode = -EIO;
  goto out;
}

/* No change in link */
if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
     link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
  goto out;

/* Use the current user PHY configuration. The current user PHY
* configuration is initialized during probe from PHY capabilities
* software mode, and updated on set PHY configuration.
*/
cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
if (!cfg) {
  retcode = -ENOMEM;
  goto out;
}

cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
if (link_up)
  cfg->caps |= ICE_AQ_PHY_ENA_LINK;
else
  cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;

retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
if (retcode) {
  dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
   vsi->vsi_num, retcode);
  retcode = -EIO;
}

kfree(cfg);
out:
kfree(pcaps);
return retcode;
}

/**
* ice_init_nvm_phy_type - Initialize the NVM PHY type
* @pi: port info structure
*
* Initialize nvm_phy_type_[low|high] for link lenient mode support
*/
static int ice_init_nvm_phy_type(struct ice_port_info *pi)
{
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_pf *pf = pi->hw->back;
int err;

pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
  return -ENOMEM;

err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
      pcaps, NULL);

if (err) {
  dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
  goto out;
}

pf->nvm_phy_type_hi = pcaps->phy_type_high;
pf->nvm_phy_type_lo = pcaps->phy_type_low;

out:
kfree(pcaps);
return err;
}

/**
* ice_init_link_dflt_override - Initialize link default override
* @pi: port info structure
*
* Initialize link default override and PHY total port shutdown during probe
*/
static void ice_init_link_dflt_override(struct ice_port_info *pi)
{
struct ice_link_default_override_tlv *ldo;
struct ice_pf *pf = pi->hw->back;

ldo = &pf->link_dflt_override;
if (ice_get_link_default_override(ldo, pi))
  return;

if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
  return;

/* Enable Total Port Shutdown (override/replace link-down-on-close
* ethtool private flag) for ports with Port Disable bit set.
*/
set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
}

/**
* ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
* @pi: port info structure
*
* If default override is enabled, initialize the user PHY cfg speed and FEC
* settings using the default override mask from the NVM.
*
* The PHY should only be configured with the default override settings the
* first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
* is used to indicate that the user PHY cfg default override is initialized
* and the PHY has not been configured with the default override settings. The
* state is set here, and cleared in ice_configure_phy the first time the PHY is
* configured.
*
* This function should be called only if the FW doesn't support default
* configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
*/
static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
{
struct ice_link_default_override_tlv *ldo;
struct ice_aqc_set_phy_cfg_data *cfg;
struct ice_phy_info *phy = &pi->phy;
struct ice_pf *pf = pi->hw->back;

ldo = &pf->link_dflt_override;

/* If link default override is enabled, use to mask NVM PHY capabilities
* for speed and FEC default configuration.
*/
cfg = &phy->curr_user_phy_cfg;

if (ldo->phy_type_low || ldo->phy_type_high) {
  cfg->phy_type_low = pf->nvm_phy_type_lo &
        cpu_to_le64(ldo->phy_type_low);
  cfg->phy_type_high = pf->nvm_phy_type_hi &
         cpu_to_le64(ldo->phy_type_high);
}
cfg->link_fec_opt = ldo->fec_options;
phy->curr_user_fec_req = ICE_FEC_AUTO;

set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
}

/**
* ice_init_phy_user_cfg - Initialize the PHY user configuration
* @pi: port info structure
*
* Initialize the current user PHY configuration, speed, FEC, and FC requested
* mode to default. The PHY defaults are from get PHY capabilities topology
* with media so call when media is first available. An error is returned if
* called when media is not available. The PHY initialization completed state is
* set here.
*
* These configurations are used when setting PHY
* configuration. The user PHY configuration is updated on set PHY
* configuration. Returns 0 on success, negative on failure
*/
static int ice_init_phy_user_cfg(struct ice_port_info *pi)
{
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_phy_info *phy = &pi->phy;
struct ice_pf *pf = pi->hw->back;
int err;

if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
  return -EIO;

pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
  return -ENOMEM;

if (ice_fw_supports_report_dflt_cfg(pi->hw))
  err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
       pcaps, NULL);
else
  err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
       pcaps, NULL);
if (err) {
  dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
  goto err_out;
}

ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);

/* check if lenient mode is supported and enabled */
if (ice_fw_supports_link_override(pi->hw) &&
     !(pcaps->module_compliance_enforcement &
       ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
  set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);

  /* if the FW supports default PHY configuration mode, then the driver
* does not have to apply link override settings. If not,
* initialize user PHY configuration with link override values
*/
  if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
      (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
   ice_init_phy_cfg_dflt_override(pi);
   goto out;
  }
}

/* if link default override is not enabled, set user flow control and
* FEC settings based on what get_phy_caps returned
*/
phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
            pcaps->link_fec_options);
phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);

out:
phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
err_out:
kfree(pcaps);
return err;
}

/**
* ice_configure_phy - configure PHY
* @vsi: VSI of PHY
*
* Set the PHY configuration. If the current PHY configuration is the same as
* the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
* configure the based get PHY capabilities for topology with media.
*/
static int ice_configure_phy(struct ice_vsi *vsi)
{
struct device *dev = ice_pf_to_dev(vsi->back);
struct ice_port_info *pi = vsi->port_info;
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_aqc_set_phy_cfg_data *cfg;
struct ice_phy_info *phy = &pi->phy;
struct ice_pf *pf = vsi->back;
int err;

/* Ensure we have media as we cannot configure a medialess port */
if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
  return -ENOMEDIUM;

ice_print_topo_conflict(vsi);

if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
     phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
  return -EPERM;

if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
  return ice_force_phys_link_state(vsi, true);

pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
  return -ENOMEM;

/* Get current PHY config */
err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
      NULL);
if (err) {
  dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
   vsi->vsi_num, err);
  goto done;
}

/* If PHY enable link is configured and configuration has not changed,
* there's nothing to do
*/
if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
     ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
  goto done;

/* Use PHY topology as baseline for configuration */
memset(pcaps, 0, sizeof(*pcaps));
if (ice_fw_supports_report_dflt_cfg(pi->hw))
  err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
       pcaps, NULL);
else
  err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
       pcaps, NULL);
if (err) {
  dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
   vsi->vsi_num, err);
  goto done;
}

cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
if (!cfg) {
  err = -ENOMEM;
  goto done;
}

ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);

/* Speed - If default override pending, use curr_user_phy_cfg set in
* ice_init_phy_user_cfg_ldo.
*/
if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
          vsi->back->state)) {
  cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.10 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.