/* * Copyright (c) 2016, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
/* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired * under it).
*/ static DEFINE_SPINLOCK(lag_lock);
/* Create a mapping between steering slots and active ports. * As we have ldev->buckets slots per port first assume the native * mapping should be used. * If there are ports that are disabled fill the relevant slots * with mapping that points to active ports.
*/ staticvoid mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, struct mlx5_lag *ldev,
u8 buckets,
u8 *ports)
{ int disabled[MLX5_MAX_PORTS] = {}; int enabled[MLX5_MAX_PORTS] = {}; int disabled_ports_num = 0; int enabled_ports_num = 0; int idx;
u32 rand; int i; int j;
/* Use native mapping by default where each port's buckets * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
*/
mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < buckets; j++) {
idx = i * buckets + j;
ports[idx] = i + 1;
}
}
/* If all ports are disabled/enabled keep native mapping */ if (enabled_ports_num == ldev->ports ||
disabled_ports_num == ldev->ports) return;
/* Go over the disabled ports and for each assign a random active port */ for (i = 0; i < disabled_ports_num; i++) { for (j = 0; j < buckets; j++) {
get_random_bytes(&rand, 4);
ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
}
}
}
staticbool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
{ int i;
mlx5_ldev_for_each(i, 0, ldev) if (ldev->pf[i].has_drop) returntrue; returnfalse;
}
staticvoid mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
{ int i;
mlx5_ldev_for_each(i, 0, ldev) { if (!ldev->pf[i].has_drop) continue;
for (i = 0; i < num_disabled; i++) {
disabled_index = disabled_ports[i];
dev = ldev->pf[disabled_index].dev;
err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
MLX5_VPORT_UPLINK); if (!err)
ldev->pf[disabled_index].has_drop = true; else
mlx5_core_err(dev, "Failed to create lag drop rule, error: %d", err);
}
}
if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
mlx5_lag_drop_rule_setup(ldev, tracker); /** Only sriov and roce lag should have tracker->tx_type set so * no need to check the mode
*/
blocking_notifier_call_chain(&dev0->priv.lag_nh,
MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
ndev);
dev_put(ndev);
}
}
err = mlx5_cmd_create_lag(dev0, ldev, mode, flags); if (err) {
mlx5_core_err(dev0, "Failed to create LAG (%d)\n",
err); return err;
}
if (shared_fdb) {
err = mlx5_lag_create_single_fdb(ldev); if (err)
mlx5_core_err(dev0, "Can't enable single FDB mode\n"); else
mlx5_core_info(dev0, "Operation mode is single FDB\n");
}
if (err) {
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
mlx5_core_err(dev0, "Failed to deactivate RoCE LAG; driver restart required\n");
}
BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
if (mode != MLX5_LAG_MODE_MPESW) {
mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map); if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
ldev->v2p_map); if (err) {
mlx5_core_err(dev0, "Failed to create LAG port selection(%d)\n",
err); return err;
}
}
}
err = mlx5_create_lag(ldev, tracker, mode, flags); if (err) { if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
mlx5_lag_port_sel_destroy(ldev); if (roce_lag)
mlx5_core_err(dev0, "Failed to activate RoCE LAG\n"); else
mlx5_core_err(dev0, "Failed to activate VF LAG\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); return err;
}
if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
!roce_lag)
mlx5_lag_drop_rule_setup(ldev, tracker);
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
err = mlx5_cmd_exec_in(dev0, destroy_lag, in); if (err) { if (roce_lag) {
mlx5_core_err(dev0, "Failed to deactivate RoCE LAG; driver restart required\n");
} else {
mlx5_core_err(dev0, "Failed to deactivate VF LAG; driver restart required\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
} return err;
}
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
mlx5_lag_port_sel_destroy(ldev);
ldev->buckets = 1;
} if (mlx5_lag_has_drop_rule(ldev))
mlx5_lag_drop_rule_cleanup(ldev);
staticvoid mlx5_do_bond(struct mlx5_lag *ldev)
{ int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct lag_tracker tracker = { }; struct mlx5_core_dev *dev0; struct net_device *ndev; bool do_bond, roce_lag; int err; int i;
if (idx < 0) return;
dev0 = ldev->pf[idx].dev; if (!mlx5_lag_is_ready(ldev)) {
do_bond = false;
} else { /* VF LAG is in multipath mode, ignore bond change requests */ if (mlx5_lag_is_multipath(dev0)) return;
if (err) {
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
mlx5_deactivate_lag(ldev);
mlx5_lag_add_devices(ldev);
mlx5_ldev_for_each(i, 0, ldev)
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
mlx5_core_err(dev0, "Failed to enable lag\n"); return;
}
} if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
ndev = mlx5_lag_active_backup_get_netdev(dev0); /** Only sriov and roce lag should have tracker->TX_type * set so no need to check the mode
*/
blocking_notifier_call_chain(&dev0->priv.lag_nh,
MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
ndev);
dev_put(ndev);
}
} elseif (mlx5_lag_should_modify_lag(ldev, do_bond)) {
mlx5_modify_lag(ldev, &tracker);
} elseif (mlx5_lag_should_disable_lag(ldev, do_bond)) {
mlx5_disable_lag(ldev);
}
}
/* The last mdev to unregister will destroy the workqueue before removing the * devcom component, and as all the mdevs use the same devcom component we are * guaranteed that the devcom is valid while the calling work is running.
*/ struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
{ struct mlx5_devcom_comp_dev *devcom = NULL; int i;
mutex_lock(&ldev->lock);
i = mlx5_get_next_ldev_func(ldev, 0); if (i < MLX5_MAX_PORTS)
devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
mutex_unlock(&ldev->lock); return devcom;
}
if (info->linking)
lag_upper_info = info->upper_info;
/* The event may still be of interest if the slave does not belong to * us, but is enslaved to a master which has one or more of our netdevs * as slaves (e.g., if a new slave is added to a master that bonds two * of our netdevs, we should unbond).
*/
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].netdev == ndev_tmp) {
idx++; break;
}
} if (i < MLX5_MAX_PORTS) {
slave = bond_slave_get_rcu(ndev_tmp); if (slave)
has_inactive |= bond_is_slave_inactive(slave);
bond_status |= (1 << idx);
}
num_slaves++;
}
rcu_read_unlock();
/* None of this lagdev's netdevs are slaves of this master. */ if (!(bond_status & GENMASK(ldev->ports - 1, 0))) return 0;
if (lag_upper_info) {
tracker->tx_type = lag_upper_info->tx_type;
tracker->hash_type = lag_upper_info->hash_type;
}
tracker->has_inactive = has_inactive; /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them.
*/
is_in_lag = num_slaves == ldev->ports &&
bond_status == GENMASK(ldev->ports - 1, 0);
/* Lag mode must be activebackup or hash. */
mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
if (!mlx5_lag_is_ready(ldev))
NL_SET_ERR_MSG_MOD(info->info.extack, "Can't activate LAG offload, PF is configured with more than 64 VFs"); elseif (!mode_supported)
NL_SET_ERR_MSG_MOD(info->info.extack, "Can't activate LAG offload, TX type isn't supported");
idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); if (idx < 0) return 0;
/* This information is used to determine virtual to physical * port mapping.
*/
lag_lower_info = info->lower_state_info; if (!lag_lower_info) return 0;
/* mdev is being removed, might as well remove debugfs * as early as possible.
*/
mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
recheck:
mutex_lock(&ldev->lock); if (ldev->mode_changes_in_progress) {
mutex_unlock(&ldev->lock);
msleep(100); goto recheck;
}
mlx5_ldev_remove_mdev(ldev, dev);
mutex_unlock(&ldev->lock);
mlx5_ldev_put(ldev);
}
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{ int err;
if (!mlx5_lag_is_supported(dev)) return;
if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp)) return;
for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
MLX5_SET(query_cong_statistics_in, in, opcode,
MLX5_CMD_OP_QUERY_CONG_STATISTICS);
ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
out); if (ret) goto free_mdev;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.