// SPDX-License-Identifier: GPL-2.0-or-later /* * NET3: Implementation of the ICMP protocol layer. * * Alan Cox, <alan@lxorguk.ukuu.org.uk> * * Some of the function names and the icmp unreach table for this * module were derived from [icmp.c 1.0.11 06/02/93] by * Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting. * Other than that this module is a complete rewrite. * * Fixes: * Clemens Fruhwirth : introduce global icmp rate limiting * with icmp type masking ability instead * of broken per type icmp timeouts. * Mike Shaver : RFC1122 checks. * Alan Cox : Multicast ping reply as self. * Alan Cox : Fix atomicity lockup in ip_build_xmit * call. * Alan Cox : Added 216,128 byte paths to the MTU * code. * Martin Mares : RFC1812 checks. * Martin Mares : Can be configured to follow redirects * if acting as a router _without_ a * routing protocol (RFC 1812). * Martin Mares : Echo requests may be configured to * be ignored (RFC 1812). * Martin Mares : Limitation of ICMP error message * transmit rate (RFC 1812). * Martin Mares : TOS and Precedence set correctly * (RFC 1812). * Martin Mares : Now copying as much data from the * original packet as we can without * exceeding 576 bytes (RFC 1812). * Willy Konynenberg : Transparent proxying support. * Keith Owens : RFC1191 correction for 4.2BSD based * path MTU bug. * Thomas Quinot : ICMP Dest Unreach codes up to 15 are * valid (RFC 1812). * Andi Kleen : Check all packet lengths properly * and moved all kfree_skb() up to * icmp_rcv. * Andi Kleen : Move the rate limit bookkeeping * into the dest entry and use a token * bucket filter (thanks to ANK). Make * the rates sysctl configurable. * Yu Tianli : Fixed two ugly bugs in icmp_send * - IP option length was accounted wrongly * - ICMP header length was not accounted * at all. * Tristan Greaves : Added sysctl option to ignore bogus * broadcast responses from broken routers. * * To Fix: * * - Should use skb_pull() instead of all the manual checking. * This would also greatly simply some upper layer error handlers. --AK
*/
/* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
/* Called with BH disabled */ staticinlinestruct sock *icmp_xmit_lock(struct net *net)
{ struct sock *sk;
sk = this_cpu_read(ipv4_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet.
*/ return NULL;
}
sock_net_set(sk, net); return sk;
}
/** * icmp_global_allow - Are we allowed to send one more ICMP message ? * @net: network namespace * * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Returns false if we reached the limit and can not send another packet. * Works in tandem with icmp_global_consume().
*/ bool icmp_global_allow(struct net *net)
{
u32 delta, now, oldstamp; int incr, new, old;
/* Note: many cpus could find this condition true. * Then later icmp_global_consume() could consume more credits, * this is an acceptable race.
*/ if (atomic_read(&net->ipv4.icmp_global_credit) > 0) returntrue;
now = jiffies;
oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
delta = min_t(u32, now - oldstamp, HZ); if (delta < HZ / 50) returnfalse;
incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ; if (!incr) returnfalse;
if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
old = atomic_read(&net->ipv4.icmp_global_credit); do { new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
} while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
} returntrue;
}
EXPORT_SYMBOL(icmp_global_allow);
void icmp_global_consume(struct net *net)
{ int credits = get_random_u32_below(3);
/* Note: this might make icmp_global.credit negative. */ if (credits)
atomic_sub(credits, &net->ipv4.icmp_global_credit);
}
EXPORT_SYMBOL(icmp_global_consume);
staticbool icmpv4_mask_allow(struct net *net, int type, int code)
{ if (type > NR_ICMP_TYPES) returntrue;
/* * Maintain the counters used in the SNMP statistics for outgoing ICMP
*/ void icmp_out_count(struct net *net, unsignedchar type)
{
ICMPMSGOUT_INC_STATS(net, type);
ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
}
/* * Checksum each fragment, and on the first include the headers and final * checksum.
*/ staticint icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{ struct icmp_bxm *icmp_param = from;
__wsum csum;
csum = skb_copy_and_csum_bits(icmp_param->skb,
icmp_param->offset + offset,
to, len);
/* * The device used for looking up which routing table to use for sending an ICMP * error is preferably the source whenever it is set, which should ensure the * icmp error can be sent to the source host, else lookup using the routing * table of the destination device, else use the main routing table (index 0).
*/ staticstruct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
{ struct net_device *dev = skb->dev; conststruct dst_entry *dst;
relookup_failed: if (rt) return rt; return ERR_PTR(err);
}
/* * Send an ICMP message in response to a situation * * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. * MAY send more (we do). * MUST NOT change this header information. * MUST NOT reply to a multicast/broadcast IP address. * MUST NOT reply to a multicast/broadcast MAC address. * MUST reply to only the first fragment.
*/
if (rt->dst.dev)
net = dev_net_rcu(rt->dst.dev); elseif (skb_in->dev)
net = dev_net_rcu(skb_in->dev); else goto out;
/* * Find the original header. It is expected to be valid, of course. * Check this, icmp_send is called from the most obscure devices * sometimes.
*/
iph = ip_hdr(skb_in);
/* * No replies to physical multicast/broadcast
*/ if (skb_in->pkt_type != PACKET_HOST) goto out;
/* * Now check at the protocol level
*/ if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto out;
/* * Only reply to fragment 0. We byte re-order the constant * mask for efficiency.
*/ if (iph->frag_off & htons(IP_OFFSET)) goto out;
/* * If we send an ICMP error to an ICMP error a mess would result..
*/ if (icmp_pointers[type].error) { /* * We are an error, check if we are replying to an * ICMP error
*/ if (iph->protocol == IPPROTO_ICMP) {
u8 _inner_type, *itp;
/* * Assume any unknown ICMP type is an error. This * isn't specified by the RFC, but think about it..
*/ if (*itp > NR_ICMP_TYPES ||
icmp_pointers[*itp].error) goto out;
}
}
/* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless * incoming dev is loopback. If outgoing dev change to not be * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
!icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable;
sk = icmp_xmit_lock(net); if (!sk) goto out_bh_enable;
rcu_read_lock(); if (rt_is_input_route(rt) &&
READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
/* RFC says return as much as we can without exceeding 576 bytes. */
room = dst_mtu(&rt->dst); if (room > 576)
room = 576;
room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
room -= sizeof(struct icmphdr); /* Guard against tiny mtu. We need to include at least one * IP network header for this message to make any sense.
*/ if (room <= (int)sizeof(struct iphdr)) goto ende;
/* if we don't have a source address at this point, fall back to the * dummy address instead of sending out a packet with a source address * of 0.0.0.0
*/ if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
/* Checkin full IP header plus 8 bytes of protocol to * avoid additional coding at protocol handlers.
*/ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
__ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return;
}
raw_icmp_error(skb, protocol, info);
ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, info);
}
switch (icmph->type) { case ICMP_DEST_UNREACH: switch (icmph->code & 15) { case ICMP_NET_UNREACH: case ICMP_HOST_UNREACH: case ICMP_PROT_UNREACH: case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: /* for documentation of the ip_no_pmtu_disc * values please see * Documentation/networking/ip-sysctl.rst
*/ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr); break; case 2: goto out; case 3: if (!icmp_tag_validation(iph->protocol)) goto out;
fallthrough; case 0:
info = ntohs(icmph->un.frag.mtu);
} break; case ICMP_SR_FAILED:
net_dbg_ratelimited("%pI4: Source Route Failed\n",
&iph->daddr); break; default: break;
} if (icmph->code > NR_ICMP_UNREACH) goto out; break; case ICMP_PARAMETERPROB:
info = ntohl(icmph->un.gateway) >> 24; break; case ICMP_TIME_EXCEEDED:
__ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS); if (icmph->code == ICMP_EXC_FRAGTIME) goto out; break;
}
/* * Throw it at our lower layers * * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed * header. * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the * transport layer. * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to * transport layer.
*/
/* * Check the other end isn't violating RFC 1122. Some routers send * bogus responses to broadcast frames. If you see this message * first check your netmask matches at both ends, if it does then * get the other vendor to fix their kit.
*/
if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
&ip_hdr(skb)->saddr,
icmph->type, icmph->code,
&iph->daddr, skb->dev->name); goto out;
}
/* * Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo * requests. * RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be * included in the reply. * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring * echo requests, MUST have default=NOT. * RFC 8335: 8 MUST have a config option to enable/disable ICMP * Extended Echo Functionality, MUST be disabled by default * See also WRT handling of options once they are done and working.
*/
net = skb_dst_dev_net_rcu(skb); /* should there be an ICMP stat for ignored echos? */ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET;
/* Helper for icmp_echo and icmpv6_echo_reply. * Searches for net_device that matches PROBE interface identifier * and builds PROBE reply message in icmphdr. * * Returns false if PROBE responses are disabled via sysctl
*/
if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) returnfalse;
/* We currently only support probing interfaces on the proxy node * Check to ensure L-bit is set
*/ if (!(ntohs(icmphdr->un.echo.sequence) & 1)) returnfalse; /* Clear status bits in reply message */
icmphdr->un.echo.sequence &= htons(0xFF00); if (icmphdr->type == ICMP_EXT_ECHO)
icmphdr->type = ICMP_EXT_ECHOREPLY; else
icmphdr->type = ICMPV6_EXT_ECHO_REPLY;
ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr); /* Size of iio is class_type dependent. * Only check header here and assign length based on ctype in the switch statement
*/
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio); if (!ext_hdr || !iio) goto send_mal_query; if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) ||
ntohs(iio->extobj_hdr.length) > sizeof(_iio)) goto send_mal_query;
ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + ident_len, &_iio); if (!iio) goto send_mal_query;
status = 0;
dev = NULL; switch (iio->extobj_hdr.class_type) { case ICMP_EXT_ECHO_CTYPE_NAME: if (ident_len >= IFNAMSIZ) goto send_mal_query;
memset(buff, 0, sizeof(buff));
memcpy(buff, &iio->ident.name, ident_len);
dev = dev_get_by_name(net, buff); break; case ICMP_EXT_ECHO_CTYPE_INDEX: if (ident_len != sizeof(iio->ident.ifindex)) goto send_mal_query;
dev = dev_get_by_index(net, ntohl(iio->ident.ifindex)); break; case ICMP_EXT_ECHO_CTYPE_ADDR: if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) ||
ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
iio->ident.addr.ctype3_hdr.addrlen) goto send_mal_query; switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) { case ICMP_AFI_IP: if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr)) goto send_mal_query;
dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr); break; #if IS_ENABLED(CONFIG_IPV6) case ICMP_AFI_IP6: if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr)) goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
dev_hold(dev); break; #endif default: goto send_mal_query;
} break; default: goto send_mal_query;
} if (!dev) {
icmphdr->code = ICMP_EXT_CODE_NO_IF; returntrue;
} /* Fill bits in reply message */ if (dev->flags & IFF_UP)
status |= ICMP_EXT_ECHOREPLY_ACTIVE;
in_dev = __in_dev_get_rcu(dev); if (in_dev && rcu_access_pointer(in_dev->ifa_list))
status |= ICMP_EXT_ECHOREPLY_IPV4;
in6_dev = __in6_dev_get(dev); if (in6_dev && !list_empty(&in6_dev->addr_list))
status |= ICMP_EXT_ECHOREPLY_IPV6;
/* * Handle ICMP Timestamp requests. * RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests. * SHOULD be in the kernel for minimum random latency. * MUST be accurate to a few minutes. * MUST be updated at least at 15Hz.
*/ staticenum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
{ struct icmp_bxm icmp_param; /* * Too short.
*/ if (skb->len < 4) goto out_err;
/* * Fill in the current time as ms since midnight UT:
*/
icmp_param.data.times[1] = inet_current_timestamp();
icmp_param.data.times[2] = icmp_param.data.times[1];
if (skb_checksum_simple_validate(skb)) goto csum_error;
if (!pskb_pull(skb, sizeof(*icmph))) goto error;
icmph = icmp_hdr(skb);
ICMPMSGIN_INC_STATS(net, icmph->type);
/* Check for ICMP Extended Echo (PROBE) messages */ if (icmph->type == ICMP_EXT_ECHO) { /* We can't use icmp_pointers[].handler() because it is an array of * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
reason = icmp_echo(skb); goto reason_check;
}
/* * Parse the ICMP message
*/
if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { /* * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be * silently ignored (we let user decide with a sysctl). * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently * discarded if to broadcast/multicast.
*/ if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
reason = SKB_DROP_REASON_INVALID_PROTO; goto error;
} if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
reason = SKB_DROP_REASON_INVALID_PROTO; goto error;
}
}
exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth); if (!exth) returnfalse; if (exth->version != 2) returntrue;
if (exth->checksum &&
csum_fold(skb_checksum(skb, off, skb->len - off, 0))) returnfalse;
off += sizeof(_exth); while (off < skb->len) {
objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh); if (!objh) returnfalse;
olen = ntohs(objh->length); if (olen < sizeof(_objh)) returnfalse;
off += olen; if (off > skb->len) returnfalse;
}
returntrue;
}
void ip_icmp_error_rfc4884(conststruct sk_buff *skb, struct sock_ee_data_rfc4884 *out, int thlen, int off)
{ int hlen;
/* original datagram headers: end of icmph to payload (skb->data) */
hlen = -skb_transport_offset(skb) - thlen;
/* per rfc 4884: minimal datagram length of 128 bytes */ if (off < 128 || off < hlen) return;
/* kernel has stripped headers: return payload offset in bytes */
off -= hlen; if (off + sizeof(struct icmp_ext_hdr) > skb->len) return;
out->len = off;
if (!ip_icmp_error_rfc4884_validate(skb, off))
out->flags |= SO_EE_RFC4884_FLAG_INVALID;
}
EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
int icmp_err(struct sk_buff *skb, u32 info)
{ struct iphdr *iph = (struct iphdr *)skb->data; int offset = iph->ihl<<2; struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); struct net *net = dev_net_rcu(skb->dev); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code;
/* * Use ping_err to handle all icmp errors except those * triggered by ICMP_ECHOREPLY which sent from kernel.
*/ if (icmph->type != ICMP_ECHOREPLY) {
ping_err(skb, offset, info); return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.