As Ilya Lesokhin suggested, we can collapse two skbs at retransmit
time even if the skb at the right has fragments.

We simply have to use more generic skb_copy_bits() instead of
skb_copy_from_linear_data() in tcp_collapse_retrans()

Also need to guard this skb_copy_bits() in case there is nothing to
copy, otherwise skb_put() could panic if left skb has frags.

Tested:

Used following packetdrill test

// Establish a connection.
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 8>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.100 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4

   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
   +0 write(4, ..., 200) = 200
   +0 > P. 1:201(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 201:401(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 401:601(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 601:801(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 801:1001(200) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1001:1101(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1101:1201(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1201:1301(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1301:1401(100) ack 1

+.100 < . 1:1(0) ack 1 win 257 <nop,nop,sack 1001:1401>
// Check that TCP collapse works :
   +0 > P. 1:1001(1000) ack 1

Reported-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2016-11-01 10:53:42 -07:00 коммит произвёл David S. Miller
Родитель b646cf299e
Коммит 2331ccc5b3
1 изменённых файлов: 4 добавлений и 6 удалений

Просмотреть файл

@ -2529,8 +2529,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_unlink_write_queue(next_skb, sk); tcp_unlink_write_queue(next_skb, sk);
skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), if (next_skb_size)
next_skb_size); skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size);
if (next_skb->ip_summed == CHECKSUM_PARTIAL) if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL;
@ -2567,14 +2568,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
{ {
if (tcp_skb_pcount(skb) > 1) if (tcp_skb_pcount(skb) > 1)
return false; return false;
/* TODO: SACK collapsing could be used to remove this condition */
if (skb_shinfo(skb)->nr_frags != 0)
return false;
if (skb_cloned(skb)) if (skb_cloned(skb))
return false; return false;
if (skb == tcp_send_head(sk)) if (skb == tcp_send_head(sk))
return false; return false;
/* Some heurestics for collapsing over SACK'd could be invented */ /* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false; return false;