/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Interfaces handler. * * Version: @(#)dev.h 1.0.10 08/12/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov> * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Bjorn Ekwall. <bj0rn@blox.se> * Pekka Riikonen <priikone@poseidon.pspt.fi> * * Moved to /usr/include/linux for NET3
*/ #ifndef _LINUX_NETDEVICE_H #define _LINUX_NETDEVICE_H
/* * Transmit return codes: transmit return codes originate from three different * namespaces: * * - qdisc return codes * - driver transmit return codes * - errno values * * Drivers are allowed to return any one of those in their hard_start_xmit() * function. Real network devices commonly used with qdiscs should only return * the driver transmit return codes though - when qdiscs are used, the actual * transmission happens asynchronously, so the value is not propagated to * higher layers. Virtual network devices transmit synchronously; in this case * the driver transmit return codes are consumed by dev_queue_xmit(), and all * others are propagated to higher layers.
*/
/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops
* some packets of the same priority; prompting us to send less aggressively. */ #define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0)
enum netdev_tx {
__NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */
NETDEV_TX_OK = 0x00, /* driver took care of packet */
NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/
}; typedefenum netdev_tx netdev_tx_t;
/* * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed.
*/ staticinlinebool dev_xmit_complete(int rc)
{ /* * Positive cases with an skb consumed by a driver: * - successful transmission (rc == NETDEV_TX_OK) * - error while transmitting (rc < 0) * - error while queueing to a different device (rc & NET_XMIT_MASK)
*/ if (likely(rc < NET_XMIT_MASK)) returntrue;
returnfalse;
}
/* * Compute the worst-case header length according to the protocols * used.
*/
/* per-cpu stats, allocated on demand. * Try to fit them in a single cache line, for dev_get_stats() sake.
*/ struct net_device_core_stats { unsignedlong rx_dropped; unsignedlong tx_dropped; unsignedlong rx_nohandler; unsignedlong rx_otherhost_dropped;
} __aligned(4 * sizeof(unsignedlong));
/* * Structure for NAPI scheduling similar to tasklet but with weighting
*/ struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct * to the per-CPU poll_list, and whoever clears that bit * can remove from the list right before clearing the bit.
*/ struct list_head poll_list;
unsignedlong state; int weight;
u32 defer_hard_irqs_count; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL /* CPU actively polling if netpoll is configured */ int poll_owner; #endif /* CPU on which NAPI has been scheduled for processing */ int list_owner; struct net_device *dev; struct sk_buff *skb; struct gro_node gro; struct hrtimer timer; /* all fields past this point are write-protected by netdev_lock */ struct task_struct *thread; unsignedlong gro_flush_timeout; unsignedlong irq_suspend_timeout;
u32 defer_hard_irqs; /* control-path-only fields follow */
u32 napi_id; struct list_head dev_list; struct hlist_node napi_hash_node; int irq; struct irq_affinity_notify notify; int napi_rmap_idx; int index; struct napi_config *config;
};
enum {
NAPI_STATE_SCHED, /* Poll is scheduled */
NAPI_STATE_MISSED, /* reschedule a napi */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */
};
/* * enum rx_handler_result - Possible return values for rx_handlers. * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it * further. * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in * case skb->dev was changed by rx_handler. * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called. * * rx_handlers are functions called from inside __netif_receive_skb(), to do * special processing of the skb, prior to delivery to protocol handlers. * * Currently, a net_device can only have a single rx_handler registered. Trying * to register a second rx_handler will return -EBUSY. * * To register a rx_handler on a net_device, use netdev_rx_handler_register(). * To unregister a rx_handler on a net_device, use * netdev_rx_handler_unregister(). * * Upon return, rx_handler is expected to tell __netif_receive_skb() what to * do with the skb. * * If the rx_handler consumed the skb in some way, it should return * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for * the skb to be delivered in some other way. * * If the rx_handler changed skb->dev, to divert the skb to another * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the * new device will be called if it exists. * * If the rx_handler decides the skb should be ignored, it should return * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that * are registered on exact device (ptype->dev == skb->dev). * * If the rx_handler didn't change skb->dev, but wants the skb to be normally * delivered, it should return RX_HANDLER_PASS. * * A device without a registered rx_handler will behave as if rx_handler * returned RX_HANDLER_PASS.
*/
/** * napi_is_scheduled - test if NAPI is scheduled * @n: NAPI context * * This check is "best-effort". With no locking implemented, * a NAPI can be scheduled or terminate right after this check * and produce not precise results. * * NAPI_STATE_SCHED is an internal state, napi_is_scheduled * should not be used normally and napi_schedule should be * used instead. * * Use only if the driver really needs to check if a NAPI * is scheduled for example in the context of delayed timer * that can be skipped if a NAPI is already scheduled. * * Return: True if NAPI is scheduled, False otherwise.
*/ staticinlinebool napi_is_scheduled(struct napi_struct *n)
{ return test_bit(NAPI_STATE_SCHED, &n->state);
}
bool napi_schedule_prep(struct napi_struct *n);
/** * napi_schedule - schedule NAPI poll * @n: NAPI context * * Schedule NAPI poll routine to be called if it is not already * running. * Return: true if we schedule a NAPI or false if not. * Refer to napi_schedule_prep() for additional reason on why * a NAPI might not be scheduled.
*/ staticinlinebool napi_schedule(struct napi_struct *n)
{ if (napi_schedule_prep(n)) {
__napi_schedule(n); returntrue;
}
returnfalse;
}
/** * napi_schedule_irqoff - schedule NAPI poll * @n: NAPI context * * Variant of napi_schedule(), assuming hard irqs are masked.
*/ staticinlinevoid napi_schedule_irqoff(struct napi_struct *n)
{ if (napi_schedule_prep(n))
__napi_schedule_irqoff(n);
}
/** * napi_complete_done - NAPI processing complete * @n: NAPI context * @work_done: number of packets processed * * Mark NAPI processing as complete. Should only be called if poll budget * has not been completely consumed. * Prefer over napi_complete(). * Return: false if device should avoid rearming interrupts.
*/ bool napi_complete_done(struct napi_struct *n, int work_done);
/** * napi_synchronize - wait until NAPI is not running * @n: NAPI context * * Wait until NAPI is done being scheduled on this context. * Waits till any outstanding processing completes but * does not disable future activations.
*/ staticinlinevoid napi_synchronize(conststruct napi_struct *n)
{ if (IS_ENABLED(CONFIG_SMP)) while (test_bit(NAPI_STATE_SCHED, &n->state))
msleep(1); else
barrier();
}
/** * napi_if_scheduled_mark_missed - if napi is running, set the * NAPIF_STATE_MISSED * @n: NAPI context * * If napi is running, set the NAPIF_STATE_MISSED, and return true if * NAPI is scheduled.
**/ staticinlinebool napi_if_scheduled_mark_missed(struct napi_struct *n)
{ unsignedlong val, new;
val = READ_ONCE(n->state); do { if (val & NAPIF_STATE_DISABLE) returntrue;
if (!(val & NAPIF_STATE_SCHED)) returnfalse;
new = val | NAPIF_STATE_MISSED;
} while (!try_cmpxchg(&n->state, &val, new));
/* * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The * netif_tx_* functions below are used to manipulate this flag. The * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit * queue independently. The netif_xmit_*stopped functions below are called * to check if the queue has been stopped by the driver or stack (either * of the XOFF bits are set in the state). Drivers should not need to call * netif_xmit*stopped functions, they should only be using netif_tx_*.
*/
struct Qdisc __rcu *qdisc; struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; conststruct attribute_group **groups; #endif unsignedlong tx_maxrate; /* * Number of TX timeouts for this queue * (/sys/class/net/DEV/Q/trans_timeout)
*/
atomic_long_t trans_timeout;
/* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; #ifdef CONFIG_XDP_SOCKETS /* "ops protected", see comment about net_device::lock */ struct xsk_buff_pool *pool; #endif
/* * write-mostly part
*/ #ifdef CONFIG_BQL struct dql dql; #endif
spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; /* * Time (in jiffies) of last Tx
*/ unsignedlong trans_start;
unsignedlong state;
/* * slow- / control-path part
*/ /* NAPI instance for the queue * "ops protected", see comment about net_device::lock
*/ struct napi_struct *napi;
#ifdefined(CONFIG_XPS) && defined(CONFIG_NUMA) int numa_node; #endif
} ____cacheline_aligned_in_smp;
/* * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns * == 1 : For initns only * == 2 : For none.
*/ staticinlinebool net_has_fallback_tunnels(conststruct net *net)
{ #if IS_ENABLED(CONFIG_SYSCTL) int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type {
XPS_CPUS = 0,
XPS_RXQS,
XPS_MAPS_MAX,
};
#ifdef CONFIG_XPS /* * This structure holds an XPS map which can be of variable length. The * map is an array of queues.
*/ struct xps_map { unsignedint len; unsignedint alloc_len; struct rcu_head rcu;
u16 queues[];
}; #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) #define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \
- sizeof(struct xps_map)) / sizeof(u16))
/* * This structure holds all XPS maps for device. Maps are indexed by CPU. * * We keep track of the number of cpus/rxqs used when the struct is allocated, * in nr_ids. This will help not accessing out-of-bound memory. * * We keep track of the number of traffic classes used when the struct is * allocated, in num_tc. This will be used to navigate the maps, to ensure we're * not crossing its upper bound, as the original dev->num_tc can be updated in * the meantime.
*/ struct xps_dev_maps { struct rcu_head rcu; unsignedint nr_ids;
s16 num_tc; struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
};
#ifdefined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) /* * This structure is to hold information about the device * configured to run FCoE protocol stack.
*/ struct netdev_fcoe_hbainfo { char manufacturer[64]; char serial_number[64]; char hardware_version[64]; char driver_version[64]; char optionrom_version[64]; char firmware_version[64]; char model[256]; char model_description[256];
}; #endif
#define MAX_PHYS_ITEM_ID_LEN 32
/* This structure holds a unique identifier to identify some * physical item (port for example) used by a netdevice.
*/ struct netdev_phys_item_id { unsignedchar id[MAX_PHYS_ITEM_ID_LEN]; unsignedchar id_len;
};
/* These structures hold the attributes of bpf state that are being passed * to the netdevice through the bpf op.
*/ enum bpf_netdev_command { /* Set or clear a bpf program used in the earliest stages of packet * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee * is responsible for calling bpf_prog_put on any old progs that are * stored. In case of error, the callee need not release the new prog * reference, but on success it takes ownership and must bpf_prog_put * when it is no longer used.
*/
XDP_SETUP_PROG,
XDP_SETUP_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
XDP_SETUP_XSK_POOL,
};
/* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when a network device is registered. * The network device can use this for any late stage initialization * or semantic validation. It can fail with an error code which will * be propagated back to register_netdev. * * void (*ndo_uninit)(struct net_device *dev); * This function is called when device is unregistered or when registration * fails. It is not called if init fails. * * int (*ndo_open)(struct net_device *dev); * This function is called when a network device transitions to the up * state. * * int (*ndo_stop)(struct net_device *dev); * This function is called when a network device transitions to the down * state. * * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. * Returns NETDEV_TX_OK. Can return NETDEV_TX_BUSY, but you should stop * the queue before that can happen; it's for obsolete devices and weird * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. * Required; cannot be NULL. * * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, * struct net_device *dev * netdev_features_t features); * Called by core transmit path to determine if device is capable of * performing offload operations on a given packet. This is to give * the device an opportunity to implement any restrictions that cannot * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * struct net_device *sb_dev); * Called to decide which queue to use when device supports multiple * transmit queues. * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscuous is enabled. * * void (*ndo_set_rx_mode)(struct net_device *dev); * This function is called device changes address list filtering. * If driver handles unicast address filtering, it should set * IFF_UNICAST_FLT in its priv_flags. * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the * MAC address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Old-style ioctl entry point. This is used internally by the * ieee802154 subsystem but is no longer called by the device * ioctl handler. * * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); * Used by the bonding driver for its device specific ioctls: * SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE, * SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY * * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG, * SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface * is retained for legacy reasons; new devices should use the bus * interface (PCI) for low level management. * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); * Called when a user wants to change the Maximum Transfer Unit * of a device. * * void (*ndo_tx_timeout)(struct net_device *dev, unsigned int txqueue); * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * * void (*ndo_get_stats64)(struct net_device *dev, * struct rtnl_link_stats64 *storage); * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. Drivers must do one of the following: * 1. Define @ndo_get_stats64 to fill in a zero-initialised * rtnl_link_stats64 structure passed by the caller. * 2. Define @ndo_get_stats to update a net_device_stats structure * (which should normally be dev->stats) and return a pointer to * it. The structure may be changed asynchronously only if each * field is written atomically. * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id) * Return true if this device supports offload stats of this attr_id. * * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, * void *attr_data) * Get statistics for offload operations by attr_id. Write it into the * attr_data pointer. * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); * * Enable or disable the VF ability to query its RSS Redirection Table and * Hash Key. This is needed since on some devices VF share this information * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, * void *type_data); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif * tx queues stopped. This allows the netdevice to perform queue * management safely. * * Fiber Channel over Ethernet (FCoE) offload functions. * int (*ndo_fcoe_enable)(struct net_device *dev); * Called when the FCoE protocol stack wants to start using LLD for FCoE * so the underlying device can perform whatever needed configuration or * initialization to support acceleration of FCoE traffic. * * int (*ndo_fcoe_disable)(struct net_device *dev); * Called when the FCoE protocol stack wants to stop using LLD for FCoE * so the underlying device can perform whatever needed clean-ups to * stop supporting acceleration of FCoE traffic. * * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, * struct scatterlist *sgl, unsigned int sgc); * Called when the FCoE Initiator wants to initialize an I/O that * is a possible candidate for Direct Data Placement (DDP). The LLD can * perform necessary setup and returns 1 to indicate the device is set up * successfully to perform DDP on this I/O, otherwise this returns 0. * * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); * Called when the FCoE Initiator/Target is done with the DDPed I/O as * indicated by the FC exchange id 'xid', so the underlying device can * clean up and reuse resources for later DDP requests. * * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, * struct scatterlist *sgl, unsigned int sgc); * Called when the FCoE Target wants to initialize an I/O that * is a possible candidate for Direct Data Placement (DDP). The LLD can * perform necessary setup and returns 1 to indicate the device is set up * successfully to perform DDP on this I/O, otherwise this returns 0. * * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, * struct netdev_fcoe_hbainfo *hbainfo); * Called when the FCoE Protocol stack wants information on the underlying * device. This information is utilized by the FCoE protocol stack to * register attributes with Fiber Channel management service as per the * FC-GS Fabric Device Management Information(FDMI) specification. * * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); * Called when the underlying device wants to override default World Wide * Name (WWN) generation mechanism in FCoE protocol stack to pass its own * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE * protocol stack to use. * * RFS acceleration. * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, * u16 rxq_index, u32 flow_id); * Set hardware filter for RFS. rxq_index is the target queue index; * flow_id is a flow ID to be passed to rps_may_expire_flow() later. * Return the filter ID on success, or a negative error code. * * Slave management functions (for bridge, bonding, etc). * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to make another netdev an underling. * * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev, * struct sk_buff *skb, * bool all_slaves); * Get the xmit slave of master device. If all_slaves is true, function * assume all the slaves can transmit. * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); * Adjusts the requested feature flags according to device-specific * constraints, and returns the resulting flags. Must not modify * the device state. * * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); * Called to update device configuration to new features. Passed * feature set might be less than what was returned by ndo_fix_features()). * Must return >0 or -errno if it changed dev->features itself. * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid, u16 flags, * bool *notified, struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. * Callee shall set *notified to true if it sent any appropriate * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid * bool *notified, struct netlink_ext_ack *extack); * Deletes the FDB entry from dev corresponding to addr. * Callee shall set *notified to true if it sent any appropriate * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * * int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[], * u16 nlmsg_flags, struct netlink_ext_ack *extack); * Adds an MDB entry to dev. * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Deletes the MDB entry from dev. * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Bulk deletes MDB entries from dev. * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, * struct netlink_callback *cb); * Dumps MDB entries from dev. The first argument (marker) in the netlink * callback is used by core rtnetlink code. * * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags, struct netlink_ext_ack *extack) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, * struct net_device *dev, u32 filter_mask, * int nlflags) * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); * Called to change device carrier. Soft-devices (like dummy, team, etc) * which do not represent real hardware may define this to allow their * userspace components to manage their virtual carrier state. Devices * that determine carrier state from physical hardware properties (eg * network cables) or protocol-dependent mechanisms (eg * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. * * int (*ndo_get_phys_port_id)(struct net_device *dev, * struct netdev_phys_item_id *ppid); * Called to get ID of physical port of this device. If driver does * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * * int (*ndo_get_port_parent_id)(struct net_device *dev, * struct netdev_phys_item_id *ppid) * Called to get the parent ID of the physical port of this device. * * void* (*ndo_dfwd_add_station)(struct net_device *pdev, * struct net_device *dev) * Called by upper layer devices to accelerate switching or other * station functionality into hardware. 'pdev is the lowerdev * to use for the offload and 'dev' is the net device that will * back the offload. Returns a pointer to the private structure * the upper layer will maintain. * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv) * Called by upper layer device to delete the station created * by 'ndo_dfwd_add_station'. 'pdev' is the net device backing * the station and priv is the structure returned by the add * operation. * int (*ndo_set_tx_maxrate)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific * TX queue. * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while * sampling packet. * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); * This function is used to specify the headroom that the skb must * consider when allocation skb during packet reception. Setting * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. * int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); * This function is used to set or query state related to XDP on the * netdevice and manage BPF offload. See definition of * enum bpf_netdev_command for details. * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, * u32 flags); * This function is used to submit @n XDP packets for transmit on a * netdevice. Returns number of frames successfully transmitted, frames * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, * struct xdp_buff *xdp); * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific * queue id bound to an AF_XDP socket. The flags field specifies if * only RX, only Tx, or both should be woken up using the flags * XDP_WAKEUP_RX and XDP_WAKEUP_TX. * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm_kern *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); * If a device is paired with a peer device, return the peer instance. * The caller must be under RCU read context. * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); * Get the forwarding path to reach the real device from the HW destination address * ktime_t (*ndo_get_tstamp)(struct net_device *dev, * const struct skb_shared_hwtstamps *hwtstamps, * bool cycles); * Get hardware timestamp based on normal/adjustable time or free running * cycle counter. This function is required if physical clock supports a * free running cycle counter. * * int (*ndo_hwtstamp_get)(struct net_device *dev, * struct kernel_hwtstamp_config *kernel_config); * Get the currently configured hardware timestamping parameters for the * NIC device. * * int (*ndo_hwtstamp_set)(struct net_device *dev, * struct kernel_hwtstamp_config *kernel_config, * struct netlink_ext_ack *extack); * Change the hardware timestamping parameters for NIC device.
*/ struct net_device_ops { int (*ndo_init)(struct net_device *dev); void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev);
netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev);
netdev_features_t (*ndo_features_check)(struct sk_buff *skb, struct net_device *dev,
netdev_features_t features);
u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); int (*ndo_set_mac_address)(struct net_device *dev, void *addr); int (*ndo_validate_addr)(struct net_device *dev); int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_siocwandev)(struct net_device *dev, struct if_settings *ifs); int (*ndo_siocdevprivate)(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); void (*ndo_tx_timeout) (struct net_device *dev, unsignedint txqueue);
int (*ndo_vlan_rx_add_vid)(struct net_device *dev,
__be16 proto, u16 vid); int (*ndo_vlan_rx_kill_vid)(struct net_device *dev,
__be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, int queue, u16 vlan,
u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *ivf); int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); int (*ndo_get_vf_stats)(struct net_device *dev, int vf, struct ifla_vf_stats
*vf_stats); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); int (*ndo_get_vf_guid)(struct net_device *dev, int vf, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); int (*ndo_set_vf_guid)(struct net_device *dev, int vf, u64 guid, int guid_type); int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, void *type_data); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); int (*ndo_fcoe_ddp_setup)(struct net_device *dev,
u16 xid, struct scatterlist *sgl, unsignedint sgc); int (*ndo_fcoe_ddp_done)(struct net_device *dev,
u16 xid); int (*ndo_fcoe_ddp_target)(struct net_device *dev,
u16 xid, struct scatterlist *sgl, unsignedint sgc); int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, struct netdev_fcoe_hbainfo *hbainfo); #endif
#if IS_ENABLED(CONFIG_LIBFCOE) #define NETDEV_FCOE_WWNN 0 #define NETDEV_FCOE_WWPN 1 int (*ndo_fcoe_get_wwn)(struct net_device *dev,
u64 *wwn, int type); #endif
/** * enum netdev_priv_flags - &struct net_device priv_flags * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to * userspace; this means that the order of these flags can change * during any kernel release. * * You should add bitfield booleans after either net_device::priv_flags * (hotpath) or ::threaded (slowpath) instead of extending these flags. * * @IFF_802_1Q_VLAN: 802.1Q VLAN device * @IFF_EBRIDGE: Ethernet bridging device * @IFF_BONDING: bonding master or slave * @IFF_ISATAP: ISATAP interface (RFC4214) * @IFF_WAN_HDLC: WAN HDLC device * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to * release skb->dst * @IFF_DONT_BRIDGE: disallow bridging this ether dev * @IFF_DISABLE_NETPOLL: disable netpoll at run-time * @IFF_MACVLAN_PORT: device used as macvlan port * @IFF_BRIDGE_PORT: device used as bridge port * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit * @IFF_UNICAST_FLT: Supports unicast filtering * @IFF_TEAM_PORT: device used as team port * @IFF_SUPP_NOFCS: device supports sending custom FCS * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account * underlying stacked devices * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device * @IFF_NO_ADDRCONF: prevent ipv6 addrconf * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0)
*/ enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
IFF_EBRIDGE = 1<<1,
IFF_BONDING = 1<<2,
IFF_ISATAP = 1<<3,
IFF_WAN_HDLC = 1<<4,
IFF_XMIT_DST_RELEASE = 1<<5,
IFF_DONT_BRIDGE = 1<<6,
IFF_DISABLE_NETPOLL = 1<<7,
IFF_MACVLAN_PORT = 1<<8,
IFF_BRIDGE_PORT = 1<<9,
IFF_OVS_DATAPATH = 1<<10,
IFF_TX_SKB_SHARING = 1<<11,
IFF_UNICAST_FLT = 1<<12,
IFF_TEAM_PORT = 1<<13,
IFF_SUPP_NOFCS = 1<<14,
IFF_LIVE_ADDR_CHANGE = 1<<15,
IFF_MACVLAN = 1<<16,
IFF_XMIT_DST_RELEASE_PERM = 1<<17,
IFF_L3MDEV_MASTER = 1<<18,
IFF_NO_QUEUE = 1<<19,
IFF_OPENVSWITCH = 1<<20,
IFF_L3MDEV_SLAVE = 1<<21,
IFF_TEAM = 1<<22,
IFF_RXFH_CONFIGURED = 1<<23,
IFF_PHONY_HEADROOM = 1<<24,
IFF_MACSEC = 1<<25,
IFF_NO_RX_HANDLER = 1<<26,
IFF_FAILOVER = 1<<27,
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_NO_ADDRCONF = BIT_ULL(30),
IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
};
/* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type {
ML_PRIV_NONE,
ML_PRIV_CAN,
};
enum netdev_reg_state {
NETREG_UNINITIALIZED = 0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
NETREG_DUMMY, /* dummy device for NAPI poll */
};
/** * struct net_device - The DEVICE structure. * * Actually, this whole structure is a big mistake. It mixes I/O * data with strictly "high-level" data, and it has to know about * almost every data structure used in the INET module. * * @priv_flags: flags invisible to userspace defined as bits, see * enum netdev_priv_flags for the definitions * @lltx: device supports lockless Tx. Deprecated for real HW * drivers. Mainly used by logical interfaces, such as * bonding and tunnels * @netmem_tx: device support netmem_tx. * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. * * @name_node: Name hashlist node * @ifalias: SNMP alias * @mem_end: Shared memory end * @mem_start: Shared memory start * @base_addr: Device I/O address * @irq: Device IRQ number * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices * @napi_list: List entry used for polling NAPI devices * @unreg_list: List entry when we are unregistering the * device; see the function unregister_netdev * @close_list: List entry used when we are closing the device * @ptype_all: Device-specific packet handlers for all protocols * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding * @features: Currently active device features * @hw_features: User-changeable features * * @wanted_features: User-requested features * @vlan_features: Mask of features inheritable by VLAN devices * * @hw_enc_features: Mask of features inherited by encapsulating devices * This field indicates what encapsulation * offloads the hardware is capable of doing, * and drivers will need to set them appropriately. * * @mpls_features: Mask of features inheritable by MPLS * @gso_partial_features: value(s) from NETIF_F_GSO\* * * @ifindex: interface index * @group: The group the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead * * @core_stats: core networking counters, * do not use this in drivers * @carrier_up_count: Number of times the carrier has been up * @carrier_down_count: Number of times the carrier has been down * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see <net/iw_handler.h> for details. * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour * discovery handling. Necessary for e.g. 6LoWPAN. * @xfrmdev_ops: Transformation offload operations * @tlsdev_ops: Transport Layer Security offload operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * * @flags: Interface flags (a la BSD) * @xdp_features: XDP capability supported by the device * @gflags: Global flags ( kept as legacy ) * @priv_len: Size of the ->priv flexible array * @priv: Flexible array containing private data * @operstate: RFC2863 operstate * @link_mode: Mapping policy to operstate * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value * @min_mtu: Interface Minimum MTU value * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @min_header_len: Minimum hardware header length * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed * @needed_tailroom: Extra tailroom the hardware may need, but not in all * cases can this be guaranteed. Some cases also use * LL_MAX_HEADER instead to allocate the skb * * interface address info: * * @perm_addr: Permanent hw address * @addr_assign_type: Hw address assignment type * @addr_len: Hardware address length * @upper_level: Maximum depth level of upper devices. * @lower_level: Maximum depth level of lower devices. * @threaded: napi threaded state. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one * @name_assign_type: network interface name assignment type * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() * @uc: unicast mac addresses * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses * @queues_kset: Group of all Kobjects in the Tx and RX queues * @promiscuity: Number of times the NIC is told to work in * promiscuous mode; if it becomes 0 the NIC will * exit promiscuous mode * @allmulti: Counter, enables or disables allmulticast mode * * @vlan_info: VLAN info * @dsa_ptr: dsa specific data * @tipc_ptr: TIPC specific data * @atalk_ptr: AppleTalk link * @ip_ptr: IPv4 specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network * device struct * @mpls_ptr: mpls_dev struct pointer * @mctp_ptr: MCTP specific data * * @dev_addr: Hw address (before bcast, * because most packets are unicast) * * @_rx: Array of RX queues * @num_rx_queues: Number of RX queues * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device * @xdp_prog: XDP sockets filter program pointer * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one * @tcx_ingress: BPF & clsact qdisc specific data for ingress processing * @ingress_queue: XXX: need comments on this one * @nf_hooks_ingress: netfilter hooks executed for ingress packets * @broadcast: hw bcast address * * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, * indexed by RX queue number. Assigned by driver. * This must only be set if the ndo_rx_flow_steer * operation is defined * @index_hlist: Device index hash chain * * @_tx: Array of TX queues * @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time * @real_num_tx_queues: Number of TX queues currently active in device * @qdisc: Root qdisc from userspace point of view * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one * @xdp_bulkq: XDP device bulk queue * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one * @tcx_egress: BPF & clsact qdisc specific data for egress processing * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @dev_refcnt: Number of references to this device * @refcnt_tracker: Tracker directory for tracked references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * * @reg_state: Register/unregister state machine * @dismantle: Device is going to be freed * @needs_free_netdev: Should unregister perform free_netdev? * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside * protected by @lock * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type * * @pcpu_stat_type: Type of device statistics which the core should * allocate/free: none, lstats, tstats, dstats. none * means the driver is handling statistics allocation/ * freeing internally. * @lstats: Loopback statistics: packets, bytes * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP * * @dm_private: Drop monitor private * * @dev: Class/net/name entry * @sysfs_groups: Space for optional device, statistics and wireless * sysfs groups * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops * @stat_ops: Optional ops for queue-aware statistics * @queue_mgmt_ops: Optional ops for queue management * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO * @tso_max_segs: Device (as in HW) limit on the max TSO segment count * @gso_ipv4_max_size: Maximum size of generic segmentation offload, * for IPv4. * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device * @tc_to_txq: XXX: need comments on this one * @prio_tc_map: XXX: need comments on this one * * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. * * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the * switch port. * * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ * affinity. Set by netif_enable_irq_affinity(), then * the driver must create a persistent napi by * netif_napi_add_config() and finally bind the napi to * IRQ (via netif_napi_set_irq()). * * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. * Set by calling netif_enable_cpu_rmap(). * * @see_all_hwtstamp_requests: device wants to see calls to * ndo_hwtstamp_set() for all timestamp requests * regardless of source, even if those aren't * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN * @netns_immutable: interface can't change network namespaces * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. * * @macsec_ops: MACsec offloading ops * * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. * @gro_max_size: Maximum size of aggregated packet in generic * receive offload (GRO) * @gro_ipv4_max_size: Maximum size of aggregated packet in generic * receive offload (GRO), for IPv4. * @xdp_zc_max_segs: Maximum number of segments supported by AF_XDP * zero copy driver * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @watchdog_dev_tracker: refcount tracker used by watchdog. * @dev_registered_tracker: tracker for reference held while * registered * @offload_xstats_l3: L3 HW stats for this netdevice. * * @devlink_port: Pointer to related devlink port structure. * Assigned by a driver before netdev registration using * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. * * @max_pacing_offload_horizon: max EDT offload horizon in nsec. * @napi_config: An array of napi_config structures containing per-NAPI * settings. * @num_napi_configs: number of allocated NAPI config structs, * always >= max(num_rx_queues, num_tx_queues). * @gro_flush_timeout: timeout for GRO layer in NAPI * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out.
*/
struct net_device { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/net_device.rst. * Please update the document when adding new fields.
*/
/* TX read-mostly hotpath */
__cacheline_group_begin(net_device_read_tx);
struct_group(priv_flags_fast, unsignedlong priv_flags:32; unsignedlong lltx:1; unsignedlong netmem_tx:1;
); conststruct net_device_ops *netdev_ops; conststruct header_ops *header_ops; struct netdev_queue *_tx;
netdev_features_t gso_partial_features; unsignedint real_num_tx_queues; unsignedint gso_max_size; unsignedint gso_ipv4_max_size;
u16 gso_max_segs;
s16 num_tc; /* Note : dev->mtu is often read without holding a lock. * Writers usually hold RTNL. * It is recommended to use READ_ONCE() to annotate the reads, * and to use WRITE_ONCE() to annotate the writes.
*/ unsignedint mtu; unsignedshort needed_headroom; struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; #endif #ifdef CONFIG_NETFILTER_EGRESS struct nf_hook_entries __rcu *nf_hooks_egress; #endif #ifdef CONFIG_NET_XGRESS struct bpf_mprog_entry __rcu *tcx_egress; #endif
__cacheline_group_end(net_device_read_tx);
char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; /* * I/O specific fields * FIXME: Merge these and struct ifmap into one
*/ unsignedlong mem_end; unsignedlong mem_start; unsignedlong base_addr;
/* * Some hardware also needs these fields (state,dev_list, * napi_list,unreg_list,close_list) but they are not * part of the usual set specified in Space.c.
*/
/* * Cache lines mostly used on receive path (including eth_type_trans())
*/ /* Interface address info used in eth_type_trans() */ constunsignedchar *dev_addr;
unsignedint num_rx_queues; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field.
*/ #define GRO_MAX_SIZE (8 * 65535u) unsignedint xdp_zc_max_segs; struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS struct nf_hook_entries __rcu *nf_hooks_ingress; #endif
/* * Cache lines mostly used on transmit path
*/ unsignedint num_tx_queues; struct Qdisc __rcu *qdisc; unsignedint tx_queue_len;
spinlock_t tx_global_lock;
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4); #endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; int watchdog_timeo;
/* priv_flags_slow, ungrouped to save space */ unsignedlong see_all_hwtstamp_requests:1; unsignedlong change_proto_down:1; unsignedlong netns_immutable:1; unsignedlong fcoe_mtu:1;
/** @cfg: net_device queue-related configuration */ struct netdev_config *cfg; /** * @cfg_pending: same as @cfg but when device is being actively * reconfigured includes any changes to the configuration * requested by the user, but which may or may not be rejected.
*/ struct netdev_config *cfg_pending; struct ethtool_netdev_state *ethtool;
/* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE];
/** * @up: copy of @state's IFF_UP, but safe to read with just @lock. * May report false negatives while the device is being opened * or closed (@lock does not protect .ndo_open, or .ndo_close).
*/ bool up;
/** * @request_ops_lock: request the core to run all @netdev_ops and * @ethtool_ops under the @lock.
*/ bool request_ops_lock;
/** * @lock: netdev-scope lock, protects a small selection of fields. * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * * For the drivers that implement shaper or queue API, the scope * of this lock is expanded to cover most ndo/queue/ethtool/sysfs * operations. Drivers may opt-in to this behavior by setting * @request_ops_lock. * * @lock protection mixes with rtnl_lock in multiple ways, fields are * either: * * - simply protected by the instance @lock; * * - double protected - writers hold both locks, readers hold either; * * - ops protected - protected by the lock held around the NDOs * and other callbacks, that is the instance lock on devices for * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; * * - double ops protected - always protected by rtnl_lock but for * devices for which netdev_need_ops_lock() returns true - also * the instance lock. * * Simply protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, * @net_shaper_hierarchy, @reg_state, @threaded * * Double protects: * @up, @moving_ns, @nd_net, @xdp_features * * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues * * Also protects some fields in: * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * * Ordering: take after rtnl_lock.
*/ struct mutex lock;
#if IS_ENABLED(CONFIG_NET_SHAPER) /** * @net_shaper_hierarchy: data tracking the current shaper status * see include/net/net_shapers.h
*/ struct net_shaper_hierarchy *net_shaper_hierarchy; #endif
/* * Driver should use this to assign devlink port instance to a netdevice * before it registers the netdevice. Therefore devlink_port is static * during the netdev lifetime after it is registered.
*/ #define SET_NETDEV_DEVLINK_PORT(dev, port) \
({ \
WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \
((dev)->devlink_port = (port)); \
})
/* returns the headroom that the master device needs to take in account * when forwarding to this dev
*/ staticinlineunsigned netdev_get_fwd_headroom(struct net_device *dev)
{ return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom;
}
staticinlinevoid netdev_set_rx_headroom(struct net_device *dev, int new_hr)
{ if (dev->netdev_ops->ndo_set_rx_headroom)
dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr);
}
/* set the device rx headroom to the dev's default */ staticinlinevoid netdev_reset_rx_headroom(struct net_device *dev)
{
netdev_set_rx_headroom(dev, -1);
}
/** * netdev_priv - access network device private data * @dev: network device * * Get network device private data
*/ staticinlinevoid *netdev_priv(conststruct net_device *dev)
{ return (void *)dev->priv;
}
/* Set the sysfs physical device reference for the network logical device * if set prior to registration will cause a symlink during initialization.
*/ #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev))
/* Set the sysfs device type for the network logical device to allow * fine-grained identification of different network device types. For * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc.
*/ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype))
/** * netif_napi_add() - initialize a NAPI context * @dev: network device * @napi: NAPI context * @poll: polling function * * netif_napi_add() must be used to initialize a NAPI context prior to calling * *any* of the other NAPI-related functions.
*/ staticinlinevoid
netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int))
{
netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}
/** * netif_napi_add_config - initialize a NAPI context with persistent config * @dev: network device * @napi: NAPI context * @poll: polling function * @index: the NAPI index
*/ staticinlinevoid
netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int index)
{
netdev_lock(dev);
netif_napi_add_config_locked(dev, napi, poll, index);
netdev_unlock(dev);
}
/** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device * @napi: NAPI context * @poll: polling function * * This variant of netif_napi_add() should be used from drivers using NAPI * to exclusively poll a TX queue. * This will avoid we add it into napi_hash[], thus polluting this hash table.
*/ staticinlinevoid netif_napi_add_tx(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int))
{
netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}
/** * __netif_napi_del - remove a NAPI context * @napi: NAPI context * * Warning: caller must observe RCU grace period before freeing memory * containing @napi. Drivers might want to call this helper to combine * all the needed RCU grace periods into a single one.
*/ staticinlinevoid __netif_napi_del(struct napi_struct *napi)
{
netdev_lock(napi->dev);
__netif_napi_del_locked(napi);
netdev_unlock(napi->dev);
}
/** * netif_napi_del - remove a NAPI context * @napi: NAPI context * * netif_napi_del() removes a NAPI context from the network device NAPI list
*/ staticinlinevoid netif_napi_del(struct napi_struct *napi)
{
__netif_napi_del(napi);
synchronize_net();
}
int netif_enable_cpu_rmap(struct net_device *dev, unsignedint num_irqs); void netif_set_affinity_auto(struct net_device *dev);
/* netdevice notifier chain. Please remember to update netdev_cmd_to_name() * and the rtnetlink notification exclusion list in rtnetlink_event() when * adding new types.
*/ enum netdev_cmd {
NETDEV_UP = 1, /* For now you can't veto a device up/down */
NETDEV_DOWN,
NETDEV_REBOOT, /* Tell a protocol stack a network interface detected a hardware crash and restarted - we can use this eg to kick tcp sessions
once done */
NETDEV_CHANGE, /* Notify device state change */
NETDEV_REGISTER,
NETDEV_UNREGISTER,
NETDEV_CHANGEMTU, /* notify after mtu change happened */
NETDEV_CHANGEADDR, /* notify after the address change */
NETDEV_PRE_CHANGEADDR, /* notify before the address change */
NETDEV_GOING_DOWN,
NETDEV_CHANGENAME,
NETDEV_FEAT_CHANGE,
NETDEV_BONDING_FAILOVER,
NETDEV_PRE_UP,
NETDEV_PRE_TYPE_CHANGE,
NETDEV_POST_TYPE_CHANGE,
NETDEV_POST_INIT,
NETDEV_PRE_UNINIT,
NETDEV_RELEASE,
NETDEV_NOTIFY_PEERS,
NETDEV_JOIN,
NETDEV_CHANGEUPPER,
NETDEV_RESEND_IGMP,
NETDEV_PRECHANGEMTU, /* notify before mtu change happened */
NETDEV_CHANGEINFODATA,
NETDEV_BONDING_INFO,
NETDEV_PRECHANGEUPPER,
NETDEV_CHANGELOWERSTATE,
NETDEV_UDP_TUNNEL_PUSH_INFO,
NETDEV_UDP_TUNNEL_DROP_INFO,
NETDEV_CHANGE_TX_QUEUE_LEN,
NETDEV_CVLAN_FILTER_PUSH_INFO,
NETDEV_CVLAN_FILTER_DROP_INFO,
NETDEV_SVLAN_FILTER_PUSH_INFO,
NETDEV_SVLAN_FILTER_DROP_INFO,
NETDEV_OFFLOAD_XSTATS_ENABLE,
NETDEV_OFFLOAD_XSTATS_DISABLE,
NETDEV_OFFLOAD_XSTATS_REPORT_USED,
NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
NETDEV_XDP_FEAT_CHANGE,
}; constchar *netdev_cmd_to_name(enum netdev_cmd cmd);
int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); int unregister_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn);
struct netdev_notifier_info_ext { struct netdev_notifier_info info; /* must be first */ union {
u32 mtu;
} ext;
};
struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsignedint flags_changed;
};
struct netdev_notifier_changeupper_info { struct netdev_notifier_info info; /* must be first */ struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ bool linking; /* is the notification for link or unlink */ void *upper_info; /* upper dev info */
};
struct netdev_notifier_changelowerstate_info { struct netdev_notifier_info info; /* must be first */ void *lower_state_info; /* is lower dev state */
};
struct netdev_notifier_pre_changeaddr_info { struct netdev_notifier_info info; /* must be first */ constunsignedchar *dev_addr;
};
if (!dev->header_ops || !dev->header_ops->parse_protocol) return 0; return dev->header_ops->parse_protocol(skb);
}
/* ll_header must have at least hard_header_len allocated */ staticinlinebool dev_validate_header(conststruct net_device *dev, char *ll_header, int len)
{ if (likely(len >= dev->hard_header_len)) returntrue; if (len < dev->min_header_len) returnfalse;
#ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit __rcu *flow_limit; #endif struct Qdisc *output_queue; struct Qdisc **output_queue_tailp; struct sk_buff *completion_queue; #ifdef CONFIG_XFRM_OFFLOAD struct sk_buff_head xfrm_backlog; #endif /* written and read only by owning cpu: */ struct netdev_xmit xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line.
*/ unsignedint input_queue_head ____cacheline_aligned_in_smp;
/* Elements below can be accessed between CPUs for RPS/RFS */
call_single_data_t csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsignedint cpu; unsignedint input_queue_tail; #endif struct sk_buff_head input_pkt_queue; struct napi_struct backlog;
atomic_t dropped ____cacheline_aligned_in_smp;
/* Another possibly contended cache line */
spinlock_t defer_lock ____cacheline_aligned_in_smp; int defer_count; int defer_ipi_scheduled; struct sk_buff *defer_list;
call_single_data_t defer_csd;
};
/** * netif_queue_stopped - test if transmit queue is flowblocked * @dev: network device * * Test if transmit queue on device is currently unable to send.
*/ staticinlinebool netif_queue_stopped(conststruct net_device *dev)
{ return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
}
/** * netdev_queue_set_dql_min_limit - set dql minimum limit * @dev_queue: pointer to transmit queue * @min_limit: dql minimum limit * * Forces xmit_more() to return true until the minimum threshold * defined by @min_limit is reached (or until the tx queue is * empty). Warning: to be use with care, misuse will impact the * latency.
*/ staticinlinevoid netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue, unsignedint min_limit)
{ #ifdef CONFIG_BQL
dev_queue->dql.min_limit = min_limit; #endif
}
/** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their ndo_start_xmit(), * to give appropriate hint to the CPU.
*/ staticinlinevoid netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
{ #ifdef CONFIG_BQL
prefetchw(&dev_queue->dql.num_queued); #endif
}
/** * netdev_txq_bql_complete_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their TX completion path, * to give appropriate hint to the CPU.
*/ staticinlinevoid netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
{ #ifdef CONFIG_BQL
prefetchw(&dev_queue->dql.limit); #endif
}
/** * netdev_tx_sent_queue - report the number of bytes queued to a given tx queue * @dev_queue: network device queue * @bytes: number of bytes queued to the device queue * * Report the number of bytes queued for sending/completion to the network * device hardware queue. @bytes should be a good approximation and should * exactly match netdev_completed_queue() @bytes. * This is typically called once per packet, from ndo_start_xmit().
*/ staticinlinevoid netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsignedint bytes)
{ #ifdef CONFIG_BQL
dql_queued(&dev_queue->dql, bytes);
if (likely(dql_avail(&dev_queue->dql) >= 0)) return;
/* Paired with READ_ONCE() from dev_watchdog() */
WRITE_ONCE(dev_queue->trans_start, jiffies);
/* This barrier is paired with smp_mb() from dev_watchdog() */
smp_mb__before_atomic();
/* * The XOFF flag must be set before checking the dql_avail below, * because in netdev_tx_completed_queue we update the dql_completed * before checking the XOFF flag.
*/
smp_mb__after_atomic();
/* check again in case another CPU has just made room avail */ if (unlikely(dql_avail(&dev_queue->dql) >= 0))
clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); #endif
}
/* Variant of netdev_tx_sent_queue() for drivers that are aware * that they should not test BQL status themselves. * We do want to change __QUEUE_STATE_STACK_XOFF only for the last * skb of a batch. * Returns true if the doorbell must be used to kick the NIC.
*/ staticinlinebool __netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsignedint bytes, bool xmit_more)
{ if (xmit_more) { #ifdef CONFIG_BQL
dql_queued(&dev_queue->dql, bytes); #endif return netif_tx_queue_stopped(dev_queue);
}
netdev_tx_sent_queue(dev_queue, bytes); returntrue;
}
/** * netdev_sent_queue - report the number of bytes queued to hardware * @dev: network device * @bytes: number of bytes queued to the hardware device queue * * Report the number of bytes queued for sending/completion to the network * device hardware queue#0. @bytes should be a good approximation and should * exactly match netdev_completed_queue() @bytes. * This is typically called once per packet, from ndo_start_xmit().
*/ staticinlinevoid netdev_sent_queue(struct net_device *dev, unsignedint bytes)
{
netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes);
}
/** * netdev_tx_completed_queue - report number of packets/bytes at TX completion. * @dev_queue: network device queue * @pkts: number of packets (currently ignored) * @bytes: number of bytes dequeued from the device queue * * Must be called at most once per TX completion round (and not per * individual packet), so that BQL can adjust its limits appropriately.
*/ staticinlinevoid netdev_tx_completed_queue(struct netdev_queue *dev_queue, unsignedint pkts, unsignedint bytes)
{ #ifdef CONFIG_BQL if (unlikely(!bytes)) return;
dql_completed(&dev_queue->dql, bytes);
/* * Without the memory barrier there is a small possibility that * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever
*/
smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */
if (unlikely(dql_avail(&dev_queue->dql) < 0)) return;
if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state))
netif_schedule_queue(dev_queue); #endif
}
/** * netdev_completed_queue - report bytes and packets completed by device * @dev: network device * @pkts: actual number of packets sent over the medium * @bytes: actual number of bytes sent over the medium * * Report the number of bytes and packets transmitted by the network device * hardware queue over the physical medium, @bytes must exactly match the * @bytes amount passed to netdev_sent_queue()
*/ staticinlinevoid netdev_completed_queue(struct net_device *dev, unsignedint pkts, unsignedint bytes)
{
netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
}
/** * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue * @dev: network device * @qid: stack index of the queue to reset
*/ staticinlinevoid netdev_tx_reset_subqueue(conststruct net_device *dev,
u32 qid)
{
netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid));
}
/** * netdev_reset_queue - reset the packets and bytes count of a network device * @dev_queue: network device * * Reset the bytes and packet count of a network device and clear the * software flow control OFF bit for this network device
*/ staticinlinevoid netdev_reset_queue(struct net_device *dev_queue)
{
netdev_tx_reset_subqueue(dev_queue, 0);
}
/** * netdev_cap_txqueue - check if selected tx queue exceeds device queues * @dev: network device * @queue_index: given tx queue index * * Returns 0 if given tx queue index >= number of device tx queues, * otherwise returns the originally passed tx queue index.
*/ staticinline u16 netdev_cap_txqueue(struct net_device *dev, u16 queue_index)
{ if (unlikely(queue_index >= dev->real_num_tx_queues)) {
net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
dev->name, queue_index,
dev->real_num_tx_queues); return 0;
}
return queue_index;
}
/** * netif_running - test if up * @dev: network device * * Test if the device has been brought up.
*/ staticinlinebool netif_running(conststruct net_device *dev)
{ return test_bit(__LINK_STATE_START, &dev->state);
}
/* * Routines to manage the subqueues on a device. We only need start, * stop, and a check if it's stopped. All other device management is * done at the overall netdevice level. * Also test the device if we're multiqueue.
*/
/** * netif_start_subqueue - allow sending packets on subqueue * @dev: network device * @queue_index: sub queue index * * Start individual transmit queue of a device with multiple transmit queues.
*/ staticinlinevoid netif_start_subqueue(struct net_device *dev, u16 queue_index)
{ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
netif_tx_start_queue(txq);
}
/** * netif_stop_subqueue - stop sending packets on subqueue * @dev: network device * @queue_index: sub queue index * * Stop individual transmit queue of a device with multiple transmit queues.
*/ staticinlinevoid netif_stop_subqueue(struct net_device *dev, u16 queue_index)
{ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
netif_tx_stop_queue(txq);
}
/** * __netif_subqueue_stopped - test status of subqueue * @dev: network device * @queue_index: sub queue index * * Check individual transmit queue of a device with multiple transmit queues.
*/ staticinlinebool __netif_subqueue_stopped(conststruct net_device *dev,
u16 queue_index)
{ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
return netif_tx_queue_stopped(txq);
}
/** * netif_subqueue_stopped - test status of subqueue * @dev: network device * @skb: sub queue buffer pointer * * Check individual transmit queue of a device with multiple transmit queues.
*/ staticinlinebool netif_subqueue_stopped(conststruct net_device *dev, struct sk_buff *skb)
{ return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
}
/** * netif_wake_subqueue - allow sending packets on subqueue * @dev: network device * @queue_index: sub queue index * * Resume individual transmit queue of a device with multiple transmit queues.
*/ staticinlinevoid netif_wake_subqueue(struct net_device *dev, u16 queue_index)
{ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
/** * netif_attr_test_mask - Test a CPU or Rx queue set in a mask * @j: CPU/Rx queue index * @mask: bitmask of all cpus/rx queues * @nr_bits: number of bits in the bitmask * * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
*/ staticinlinebool netif_attr_test_mask(unsignedlong j, constunsignedlong *mask, unsignedint nr_bits)
{
cpu_max_bits_warn(j, nr_bits); return test_bit(j, mask);
}
/** * netif_attr_test_online - Test for online CPU/Rx queue * @j: CPU/Rx queue index * @online_mask: bitmask for CPUs/Rx queues that are online * @nr_bits: number of bits in the bitmask * * Returns: true if a CPU/Rx queue is online.
*/ staticinlinebool netif_attr_test_online(unsignedlong j, constunsignedlong *online_mask, unsignedint nr_bits)
{
cpu_max_bits_warn(j, nr_bits);
if (online_mask) return test_bit(j, online_mask);
return (j < nr_bits);
}
/** * netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask * @n: CPU/Rx queue index * @srcp: the cpumask/Rx queue mask pointer * @nr_bits: number of bits in the bitmask * * Returns: next (after n) CPU/Rx queue index in the mask; * >= nr_bits if no further CPUs/Rx queues set.
*/ staticinlineunsignedint netif_attrmask_next(int n, constunsignedlong *srcp, unsignedint nr_bits)
{ /* -1 is a legal arg here. */ if (n != -1)
cpu_max_bits_warn(n, nr_bits);
if (srcp) return find_next_bit(srcp, nr_bits, n + 1);
return n + 1;
}
/** * netif_attrmask_next_and - get the next CPU/Rx queue in \*src1p & \*src2p * @n: CPU/Rx queue index * @src1p: the first CPUs/Rx queues mask pointer * @src2p: the second CPUs/Rx queues mask pointer * @nr_bits: number of bits in the bitmask * * Returns: next (after n) CPU/Rx queue index set in both masks; * >= nr_bits if no further CPUs/Rx queues set in both.
*/ staticinlineint netif_attrmask_next_and(int n, constunsignedlong *src1p, constunsignedlong *src2p, unsignedint nr_bits)
{ /* -1 is a legal arg here. */ if (n != -1)
cpu_max_bits_warn(n, nr_bits);
if (src1p && src2p) return find_next_and_bit(src1p, src2p, nr_bits, n + 1); elseif (src1p) return find_next_bit(src1p, nr_bits, n + 1); elseif (src2p) return find_next_bit(src2p, nr_bits, n + 1);
/* * It is not allowed to call kfree_skb() or consume_skb() from hardware * interrupt context or with hardware interrupts being disabled. * (in_hardirq() || irqs_disabled()) * * We provide four helpers that can be used in following contexts : * * dev_kfree_skb_irq(skb) when caller drops a packet from irq context, * replacing kfree_skb(skb) * * dev_consume_skb_irq(skb) when caller consumes a packet from irq context. * Typically used in place of consume_skb(skb) in TX completion path * * dev_kfree_skb_any(skb) when caller doesn't know its current irq context, * replacing kfree_skb(skb) * * dev_consume_skb_any(skb) when caller doesn't know its current irq context, * and consumed a packet. Used in place of consume_skb(skb)
*/ staticinlinevoid dev_kfree_skb_irq(struct sk_buff *skb)
{
dev_kfree_skb_irq_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}
len = dev->mtu + dev->hard_header_len + vlan_hdr_len; if (skb->len <= len) returntrue;
/* if TSO is enabled, we don't care about the length as the packet * could be forwarded without being segmented before
*/ if (skb_is_gso(skb)) returntrue;
/** * dev_hold - get reference to device * @dev: network device * * Hold reference to device to keep it from being freed. * Try using netdev_hold() instead.
*/ staticinlinevoid dev_hold(struct net_device *dev)
{
netdev_hold(dev, NULL, GFP_ATOMIC);
}
/** * dev_put - release reference to device * @dev: network device * * Release reference to device to allow it to be freed. * Try using netdev_put() instead.
*/ staticinlinevoid dev_put(struct net_device *dev)
{
netdev_put(dev, NULL);
}
DEFINE_FREE(dev_put, struct net_device *, if (_T) dev_put(_T))
if (ndev)
__netdev_tracker_alloc(ndev, tracker, gfp);
}
/* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller * who is responsible for serialization of these calls. * * The name carrier is inappropriate, these functions should really be * called netif_lowerlayer_*() because they represent the state of any * kind of lower layer not just hardware media.
*/ void linkwatch_fire_event(struct net_device *dev);
/** * linkwatch_sync_dev - sync linkwatch for the given device * @dev: network device to sync linkwatch for * * Sync linkwatch for the given device, removing it from the * pending work list (if queued).
*/ void linkwatch_sync_dev(struct net_device *dev); void __linkwatch_sync_dev(struct net_device *dev);
/** * netif_carrier_ok - test if carrier present * @dev: network device * * Check if carrier is present on device
*/ staticinlinebool netif_carrier_ok(conststruct net_device *dev)
{ return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
}
/** * netif_dormant_on - mark device as dormant. * @dev: network device * * Mark device as dormant (as per RFC2863). * * The dormant state indicates that the relevant interface is not * actually in a condition to pass packets (i.e., it is not 'up') but is * in a "pending" state, waiting for some external event. For "on- * demand" interfaces, this new state identifies the situation where the * interface is waiting for events to place it in the up state.
*/ staticinlinevoid netif_dormant_on(struct net_device *dev)
{ if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state))
linkwatch_fire_event(dev);
}
/** * netif_dormant_off - set device as not dormant. * @dev: network device * * Device is not in dormant state.
*/ staticinlinevoid netif_dormant_off(struct net_device *dev)
{ if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state))
linkwatch_fire_event(dev);
}
/** * netif_dormant - test if device is dormant * @dev: network device * * Check if device is dormant.
*/ staticinlinebool netif_dormant(conststruct net_device *dev)
{ return test_bit(__LINK_STATE_DORMANT, &dev->state);
}
/** * netif_testing_on - mark device as under test. * @dev: network device * * Mark device as under test (as per RFC2863). * * The testing state indicates that some test(s) must be performed on * the interface. After completion, of the test, the interface state * will change to up, dormant, or down, as appropriate.
*/ staticinlinevoid netif_testing_on(struct net_device *dev)
{ if (!test_and_set_bit(__LINK_STATE_TESTING, &dev->state))
linkwatch_fire_event(dev);
}
/** * netif_testing_off - set device as not under test. * @dev: network device * * Device is not in testing state.
*/ staticinlinevoid netif_testing_off(struct net_device *dev)
{ if (test_and_clear_bit(__LINK_STATE_TESTING, &dev->state))
linkwatch_fire_event(dev);
}
/** * netif_testing - test if device is under test * @dev: network device * * Check if device is under test
*/ staticinlinebool netif_testing(conststruct net_device *dev)
{ return test_bit(__LINK_STATE_TESTING, &dev->state);
}
/** * netif_oper_up - test if device is operational * @dev: network device * * Check if carrier is operational
*/ staticinlinebool netif_oper_up(conststruct net_device *dev)
{ unsignedint operstate = READ_ONCE(dev->operstate);
/** * netif_device_present - is device available or removed * @dev: network device * * Check if device has not been removed from system.
*/ staticinlinebool netif_device_present(conststruct net_device *dev)
{ return test_bit(__LINK_STATE_PRESENT, &dev->state);
}
/* When you add a new bit above, update netif_msg_class_names array * in net/ethtool/common.c
*/
NETIF_MSG_CLASS_COUNT,
}; /* Both ethtool_ops interface and internal driver implementation use u32 */
static_assert(NETIF_MSG_CLASS_COUNT <= 32);
staticinlinevoid netif_tx_disable(struct net_device *dev)
{ unsignedint i; int cpu;
local_bh_disable();
cpu = smp_processor_id();
spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* * dev_addrs walker. Should be used only for read access. Call with * rcu_read_lock held.
*/ #define for_each_dev_addr(dev, ha) \
list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list)
/* These functions live elsewhere (drivers/net/net_init.c, but related) */
int dev_addr_add(struct net_device *dev, constunsignedchar *addr, unsignedchar addr_type); int dev_addr_del(struct net_device *dev, constunsignedchar *addr, unsignedchar addr_type);
/* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, constunsignedchar *addr); int dev_uc_add_excl(struct net_device *dev, constunsignedchar *addr); int dev_uc_del(struct net_device *dev, constunsignedchar *addr); int dev_uc_sync(struct net_device *to, struct net_device *from); int dev_uc_sync_multiple(struct net_device *to, struct net_device *from); void dev_uc_unsync(struct net_device *to, struct net_device *from); void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev);
/** * __dev_uc_sync - Synchronize device's unicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * Add newly added addresses to the interface, and release * addresses that have been deleted.
*/ staticinlineint __dev_uc_sync(struct net_device *dev, int (*sync)(struct net_device *, constunsignedchar *), int (*unsync)(struct net_device *, constunsignedchar *))
{ return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync);
}
/** * __dev_uc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_uc_sync().
*/ staticinlinevoid __dev_uc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, constunsignedchar *))
{
__hw_addr_unsync_dev(&dev->uc, dev, unsync);
}
/* Functions used for multicast addresses handling */ int dev_mc_add(struct net_device *dev, constunsignedchar *addr); int dev_mc_add_global(struct net_device *dev, constunsignedchar *addr); int dev_mc_add_excl(struct net_device *dev, constunsignedchar *addr); int dev_mc_del(struct net_device *dev, constunsignedchar *addr); int dev_mc_del_global(struct net_device *dev, constunsignedchar *addr); int dev_mc_sync(struct net_device *to, struct net_device *from); int dev_mc_sync_multiple(struct net_device *to, struct net_device *from); void dev_mc_unsync(struct net_device *to, struct net_device *from); void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev);
/** * __dev_mc_sync - Synchronize device's multicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * Add newly added addresses to the interface, and release * addresses that have been deleted.
*/ staticinlineint __dev_mc_sync(struct net_device *dev, int (*sync)(struct net_device *, constunsignedchar *), int (*unsync)(struct net_device *, constunsignedchar *))
{ return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync);
}
/** * __dev_mc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_mc_sync().
*/ staticinlinevoid __dev_mc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, constunsignedchar *))
{
__hw_addr_unsync_dev(&dev->mc, dev, unsync);
}
/* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ void dev_load(struct net *net, constchar *name); struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, conststruct net_device_stats *netdev_stats); void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, conststruct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
/* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len);
int skb_checksum_help(struct sk_buff *skb); int skb_crc32c_csum_help(struct sk_buff *skb); int skb_csum_hwoffload_help(struct sk_buff *skb, const netdev_features_t features);
/* Allow TSO being used on stacked device : * Performing the GSO segmentation before last device * is a performance improvement.
*/ staticinline netdev_features_t netdev_add_tso_features(netdev_features_t features,
netdev_features_t mask)
{ return netdev_increment_features(features, NETIF_F_ALL_TSO, mask);
}
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ staticinlinevoid netif_keep_dst(struct net_device *dev)
{
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
}
/* return true if dev can't cope with mtu frames that need vlan tag insertion */ staticinlinebool netif_reduces_vlan_mtu(struct net_device *dev)
{ /* TODO: reserve and use an additional IFF bit, if we get more users */ return netif_is_macsec(dev);
}
switch (reg_state) { case NETREG_UNINITIALIZED: return" (uninitialized)"; case NETREG_REGISTERED: return""; case NETREG_UNREGISTERING: return" (unregistering)"; case NETREG_UNREGISTERED: return" (unregistered)"; case NETREG_RELEASED: return" (released)"; case NETREG_DUMMY: return" (dummy)";
}
/* * netdev_WARN() acts like dev_printk(), but with the key difference * of using a WARN/WARN_ON to get the message out, including the * file/line information and a backtrace.
*/ #define netdev_WARN(dev, format, args...) \
WARN(1, "netdevice: %s%s: " format, netdev_name(dev), \
netdev_reg_state(dev), ##args)
#define netdev_WARN_ONCE(dev, format, args...) \
WARN_ONCE(1, "netdevice: %s%s: " format, netdev_name(dev), \
netdev_reg_state(dev), ##args)
/* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. * * Why 16. Because with 16 the only overlap we get on a hash of the * low nibble of the protocol value is RARP/SNAP/X.25. * * 0800 IP * 0001 802.3 * 0002 AX.25 * 0004 802.2 * 8035 RARP * 0005 SNAP * 0805 X.25 * 0806 ARP * 8137 IPX * 0009 Localtalk * 86DD IPv6
*/ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.