net: ip: avoid OOM kills with large UDP sends over loopback

Dave observed number of machines hitting OOM on the UDP send
path. The workload seems to be sending large UDP packets over
loopback. Since loopback has MTU of 64k kernel will try to
allocate an skb with up to 64k of head space. This has a good
chance of failing under memory pressure. What's worse if
the message length is <32k the allocation may trigger an
OOM killer.

This is entirely avoidable, we can use an skb with page frags.

af_unix solves a similar problem by limiting the head
length to SKB_MAX_ALLOC. This seems like a good and simple
approach. It means that UDP messages > 16kB will now
use fragments if underlying device supports SG, if extra
allocator pressure causes regressions in real workloads
we can switch to trying the large allocation first and
falling back.

v4: pre-calculate all the additions to alloclen so
    we can be sure it won't go over order-2

Reported-by: Dave Jones <dsj@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Jakub Kicinski 2021-06-23 14:44:38 -07:00 коммит произвёл David S. Miller
Родитель ae24bab257
Коммит 6d123b81ac
2 изменённых файлов: 37 добавлений и 31 удалений

Просмотреть файл

@ -1054,7 +1054,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
@ -1074,17 +1074,8 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
else if (!paged)
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
alloclen += exthdrlen;
alloc_extra = hh_len + 15;
alloc_extra += exthdrlen;
/* The last fragment gets additional space at tail.
* Note, with MSG_MORE we overallocate on fragments,
@ -1092,17 +1083,30 @@ alloc_new_skb:
* the last.
*/
if (datalen == length + fraggap)
alloclen += rt->dst.trailer_len;
alloc_extra += rt->dst.trailer_len;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
else if (!paged &&
(fraglen + alloc_extra < SKB_MAX_ALLOC ||
!(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
alloclen += alloc_extra;
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
skb = alloc_skb(alloclen + hh_len + 15,
skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;

Просмотреть файл

@ -1555,7 +1555,7 @@ emsgsize:
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
@ -1582,17 +1582,28 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
alloc_extra = hh_len;
alloc_extra += dst_exthdrlen;
alloc_extra += rt->dst.trailer_len;
/* We just reserve space for fragment header.
* Note: this may be overallocation if the message
* (without MSG_MORE) fits into the MTU.
*/
alloc_extra += sizeof(struct frag_hdr);
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
else if (!paged)
else if (!paged &&
(fraglen + alloc_extra < SKB_MAX_ALLOC ||
!(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
alloclen += dst_exthdrlen;
alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
@ -1602,30 +1613,21 @@ alloc_new_skb:
datalen += rt->dst.trailer_len;
}
alloclen += rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
/*
* We just reserve space for fragment header.
* Note: this may be overallocation if the message
* (without MSG_MORE) fits into the MTU.
*/
alloclen += sizeof(struct frag_hdr);
copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len,
skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
skb = alloc_skb(alloclen + hh_len,
skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;