for (i = 0; i < key_count; i++, key++) { /* User should make sure that every key target offset is within * boundaries of unsigned short.
*/
BUG_ON(key->offset > USHRT_MAX);
BUG_ON(dissector_uses_key(flow_dissector,
key->key_id));
/* Ensure that the dissector always includes control and basic key. * That way we are able to avoid handling lack of these in fast path.
*/
BUG_ON(!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_CONTROL));
BUG_ON(!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_BASIC));
}
EXPORT_SYMBOL(skb_flow_dissector_init);
#ifdef CONFIG_BPF_SYSCALL int flow_dissector_bpf_prog_attach_check(struct net *net, struct bpf_prog *prog)
{ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
if (net == &init_net) { /* BPF flow dissector in the root namespace overrides * any per-net-namespace one. When attaching to root, * make sure we don't have any BPF program attached * to the non-root namespaces.
*/ struct net *ns;
for_each_net(ns) { if (ns == &init_net) continue; if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST;
}
} else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace.
*/ if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST;
}
return 0;
} #endif/* CONFIG_BPF_SYSCALL */
/** * skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from * @thoff: transport header offset * @ip_proto: protocol for which to get port offset * @data: raw buffer pointer to the packet, if NULL use skb->data * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset
*/
__be32 skb_flow_get_ports(conststruct sk_buff *skb, int thoff, u8 ip_proto, constvoid *data, int hlen)
{ int poff = proto_ports_offset(ip_proto);
if (!data) {
data = skb->data;
hlen = skb_headlen(skb);
}
staticbool icmp_has_id(u8 type)
{ switch (type) { case ICMP_ECHO: case ICMP_ECHOREPLY: case ICMP_TIMESTAMP: case ICMP_TIMESTAMPREPLY: case ICMPV6_ECHO_REQUEST: case ICMPV6_ECHO_REPLY: returntrue;
}
returnfalse;
}
/** * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields * @skb: sk_buff to extract from * @key_icmp: struct flow_dissector_key_icmp to fill * @data: raw buffer pointer to the packet * @thoff: offset to extract at * @hlen: packet header length
*/ void skb_flow_get_icmp_tci(conststruct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, constvoid *data, int thoff, int hlen)
{ struct icmphdr *ih, _ih;
ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih); if (!ih) return;
/* As we use 0 to signal that the Id field is not present, * avoid confusion with packets without such field
*/ if (icmp_has_id(ih->type))
key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1; else
key_icmp->id = 0;
}
EXPORT_SYMBOL(skb_flow_get_icmp_tci);
/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet * using skb_flow_get_icmp_tci().
*/ staticvoid __skb_flow_dissect_icmp(conststruct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, constvoid *data, int thoff, int hlen)
{ struct flow_dissector_key_icmp *key_icmp;
if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP)) return;
/* A quick check to see if there might be something to do. */ if (!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IP) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS)) return;
info = skb_tunnel_info(skb); if (!info) return;
key = &info->key;
if (test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags))
ctrl_flags |= FLOW_DIS_F_TUNNEL_CSUM; if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
ctrl_flags |= FLOW_DIS_F_TUNNEL_DONT_FRAGMENT; if (test_bit(IP_TUNNEL_OAM_BIT, key->tun_flags))
ctrl_flags |= FLOW_DIS_F_TUNNEL_OAM; if (test_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_flags))
ctrl_flags |= FLOW_DIS_F_TUNNEL_CRIT_OPT;
switch (ip_tunnel_info_af(info)) { case AF_INET:
skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
ctrl_flags, flow_dissector,
target_container); if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_dissector_key_ipv4_addrs *ipv4;
/* Only look inside GRE without routing */ if (hdr->flags & GRE_ROUTING) return FLOW_DISSECT_RET_OUT_GOOD;
/* Only look inside GRE for version 0 and 1 */
gre_ver = ntohs(hdr->flags & GRE_VERSION); if (gre_ver > 1) return FLOW_DISSECT_RET_OUT_GOOD;
*p_proto = hdr->protocol; if (gre_ver) { /* Version1 must be PPTP, and check the flags */ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) return FLOW_DISSECT_RET_OUT_GOOD;
}
if (hdr->flags & GRE_SEQ)
offset += sizeof_field(struct pptp_gre_header, seq);
if (gre_ver == 0) { if (*p_proto == htons(ETH_P_TEB)) { conststruct ethhdr *eth; struct ethhdr _eth;
eth = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_eth),
data, *p_hlen, &_eth); if (!eth) return FLOW_DISSECT_RET_OUT_BAD;
*p_proto = eth->h_proto;
offset += sizeof(*eth);
/* Cap headers that we access via pointers at the * end of the Ethernet header as our maximum alignment * at that point is only 2 bytes.
*/ if (NET_IP_ALIGN)
*p_hlen = *p_nhoff + offset;
}
} else { /* version 1, must be PPTP */
u8 _ppp_hdr[PPP_HDRLEN];
u8 *ppp_hdr;
if (hdr->flags & GRE_ACK)
offset += sizeof_field(struct pptp_gre_header, ack);
switch (PPP_PROTOCOL(ppp_hdr)) { case PPP_IP:
*p_proto = htons(ETH_P_IP); break; case PPP_IPV6:
*p_proto = htons(ETH_P_IPV6); break; default: /* Could probably catch some more like MPLS */ break;
}
/** * __skb_flow_dissect_batadv() - dissect batman-adv header * @skb: sk_buff to with the batman-adv header * @key_control: flow dissectors control key * @data: raw buffer pointer to the packet, if NULL use skb->data * @p_proto: pointer used to update the protocol to process next * @p_nhoff: pointer used to update inner network header offset * @hlen: packet header length * @flags: any combination of FLOW_DISSECTOR_F_* * * ETH_P_BATMAN packets are tried to be dissected. Only * &struct batadv_unicast packets are actually processed because they contain an * inner ethernet header and are usually followed by actual network header. This * allows the flow dissector to continue processing the packet. * * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, * otherwise FLOW_DISSECT_RET_OUT_BAD
*/ staticenum flow_dissect_ret
__skb_flow_dissect_batadv(conststruct sk_buff *skb, struct flow_dissector_key_control *key_control, constvoid *data, __be16 *p_proto, int *p_nhoff, int hlen, unsignedint flags)
{ struct { struct batadv_unicast_packet batadv_unicast; struct ethhdr eth;
} *hdr, _hdr;
/** * __skb_flow_dissect - extract the flow_keys struct and return it * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @flow_dissector: list of keys to dissect * @target_container: target structure to put dissected values into * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * @flags: flags that control the dissection process, e.g. * FLOW_DISSECTOR_F_STOP_AT_ENCAP. * * The function will try to retrieve individual keys into target specified * by flow_dissector from either the skbuff or a raw buffer specified by the * rest parameters. * * Caller must take care of zeroing target container memory.
*/ bool __skb_flow_dissect(conststruct net *net, conststruct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, constvoid *data,
__be16 proto, int nhoff, int hlen, unsignedint flags)
{ struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; int mpls_lse = 0; int num_hdrs = 0;
u8 ip_proto = 0; bool ret;
if (!data) {
data = skb->data;
proto = skb_vlan_tag_present(skb) ?
skb->vlan_proto : skb->protocol;
nhoff = skb_network_offset(skb);
hlen = skb_headlen(skb); #if IS_ENABLED(CONFIG_NET_DSA) if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) &&
proto == htons(ETH_P_XDSA))) { struct metadata_dst *md_dst = skb_metadata_dst(skb); conststruct dsa_device_ops *ops; int offset = 0;
/* It is ensured by skb_flow_dissector_init() that control key will * be always present.
*/
key_control = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_CONTROL,
target_container);
/* It is ensured by skb_flow_dissector_init() that basic key will * be always present.
*/
key_basic = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
rcu_read_lock();
if (skb) { if (!net) { if (skb->dev)
net = dev_net_rcu(skb->dev); elseif (skb->sk)
net = sock_net(skb->sk);
}
}
DEBUG_NET_WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; struct bpf_prog_array *run_array;
run_array = rcu_dereference(init_net.bpf.run_array[type]); if (!run_array)
run_array = rcu_dereference(net->bpf.run_array[type]);
if (skb) {
ctx.skb = skb; /* we can't use 'proto' in the skb case * because it might be set to skb->vlan_proto * which has been pulled from the data
*/
n_proto = skb->protocol;
}
prog = READ_ONCE(run_array->items[0].prog);
result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
hlen, flags); if (result != BPF_FLOW_DISSECTOR_CONTINUE) {
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
rcu_read_unlock(); return result == BPF_OK;
}
}
}
if (!is_pppoe_ses_hdr_valid(&hdr->hdr)) {
fdret = FLOW_DISSECT_RET_OUT_BAD; break;
}
/* least significant bit of the most significant octet * indicates if protocol field was compressed
*/
ppp_proto = ntohs(hdr->proto); if (ppp_proto & 0x0100) {
ppp_proto = ppp_proto >> 8;
nhoff += PPPOE_SES_HLEN - 1;
} else {
nhoff += PPPOE_SES_HLEN;
}
/* Process result of proto processing */ switch (fdret) { case FLOW_DISSECT_RET_OUT_GOOD: goto out_good; case FLOW_DISSECT_RET_PROTO_AGAIN: if (skb_flow_dissect_allowed(&num_hdrs)) goto proto_again; goto out_good; case FLOW_DISSECT_RET_CONTINUE: case FLOW_DISSECT_RET_IPPROTO_AGAIN: break; case FLOW_DISSECT_RET_OUT_BAD: default: goto out_bad;
}
/* Process result of IP proto processing */ switch (fdret) { case FLOW_DISSECT_RET_PROTO_AGAIN: if (skb_flow_dissect_allowed(&num_hdrs)) goto proto_again; break; case FLOW_DISSECT_RET_IPPROTO_AGAIN: if (skb_flow_dissect_allowed(&num_hdrs)) goto ip_proto_again; break; case FLOW_DISSECT_RET_OUT_GOOD: case FLOW_DISSECT_RET_CONTINUE: break; case FLOW_DISSECT_RET_OUT_BAD: default: goto out_bad;
}
/* Sort the source and destination IP and the ports, * to have consistent hash within the two directions
*/ staticinlinevoid __flow_hash_consistentify(struct flow_keys *keys)
{ int addr_diff, i;
switch (keys->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: if ((__force u32)keys->addrs.v4addrs.dst <
(__force u32)keys->addrs.v4addrs.src)
swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
if ((__force u16)keys->ports.dst <
(__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
} break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
addr_diff = memcmp(&keys->addrs.v6addrs.dst,
&keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); if (addr_diff < 0) { for (i = 0; i < 4; i++)
swap(keys->addrs.v6addrs.src.s6_addr32[i],
keys->addrs.v6addrs.dst.s6_addr32[i]);
} if ((__force u16)keys->ports.dst <
(__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
} break;
}
}
/** * __skb_get_hash_net: calculate a flow hash * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to calculate flow hash from * * This function calculates a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports.
*/ void __skb_get_hash_net(conststruct net *net, struct sk_buff *skb)
{ struct flow_keys keys;
u32 hash;
/* skip L4 headers for fragments after the first */ if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
!(keys->control.flags & FLOW_DIS_FIRST_FRAG)) return poff;
switch (keys->basic.ip_proto) { case IPPROTO_TCP: { /* access doff as u8 to avoid unaligned access */ const u8 *doff;
u8 _doff;
poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); break;
} case IPPROTO_UDP: case IPPROTO_UDPLITE:
poff += sizeof(struct udphdr); break; /* For the rest, we do not really care about header * extensions at this point for now.
*/ case IPPROTO_ICMP:
poff += sizeof(struct icmphdr); break; case IPPROTO_ICMPV6:
poff += sizeof(struct icmp6hdr); break; case IPPROTO_IGMP:
poff += sizeof(struct igmphdr); break; case IPPROTO_DCCP:
poff += sizeof(struct dccp_hdr); break; case IPPROTO_SCTP:
poff += sizeof(struct sctphdr); break;
}
return poff;
}
/** * skb_get_poff - get the offset to the payload * @skb: sk_buff to get the payload offset from * * The function will get the offset to the payload as far as it could * be dissected. The main user is currently BPF, so that we can dynamically * truncate packets without needing to push actual payload to the user * space and can analyze headers only, instead.
*/
u32 skb_get_poff(conststruct sk_buff *skb)
{ struct flow_keys_basic keys;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.