Merge branch 'tcp-sack-compression-changes'

Eric Dumazet says:

====================
tcp: sack compression changes

Patch series refines SACK compression.

We had issues with missing SACK when TCP option space is tight.

Uses hrtimer slack to improve performance.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-04-30 13:24:01 -07:00
Родитель 3857c77624 a70437cc09
Коммит 1b2e788490
8 изменённых файлов: 68 добавлений и 15 удалений

Просмотреть файл

@ -651,6 +651,14 @@ tcp_comp_sack_delay_ns - LONG INTEGER
Default : 1,000,000 ns (1 ms)
tcp_comp_sack_slack_ns - LONG INTEGER
This sysctl control the slack used when arming the
timer used by SACK compression. This gives extra time
for small RTT flows, and reduces system overhead by allowing
opportunistic reduction of timer interrupts.
Default : 100,000 ns (100 us)
tcp_comp_sack_nr - INTEGER
Max number of SACK that can be compressed.
Using 0 disables SACK compression.

Просмотреть файл

@ -268,6 +268,7 @@ struct tcp_sock {
} rack;
u16 advmss; /* Advertised MSS */
u8 compressed_ack;
u8 dup_ack_counter;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */

Просмотреть файл

@ -173,6 +173,7 @@ struct netns_ipv4 {
int sysctl_tcp_rmem[3];
int sysctl_tcp_comp_sack_nr;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;

Просмотреть файл

@ -1329,6 +1329,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "tcp_comp_sack_slack_ns",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "tcp_comp_sack_nr",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_nr,

Просмотреть файл

@ -4327,6 +4327,33 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
}
}
static void tcp_sack_compress_send_ack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->compressed_ack)
return;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
/* Since we have to send one ack finally,
* substract one from tp->compressed_ack to keep
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - 1);
tp->compressed_ack = 0;
tcp_send_ack(sk);
}
/* Reasonable amount of sack blocks included in TCP SACK option
* The max is 4, but this becomes 3 if TCP timestamps are there.
* Given that SACK packets might be lost, be conservative and use 2.
*/
#define TCP_SACK_BLOCKS_EXPECTED 2
static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
@ -4339,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
if (tcp_sack_extend(sp, seq, end_seq)) {
if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
tcp_sack_compress_send_ack(sk);
/* Rotate this_sack to the first one. */
for (; this_sack > 0; this_sack--, sp--)
swap(*sp, *(sp - 1));
@ -4348,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
}
}
if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
tcp_sack_compress_send_ack(sk);
/* Could not find an adjacent existing SACK, build a new one,
* put it at the front, and shift everyone else down. We
* always know there is at least one SACK present already here.
@ -4355,8 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
sp--;
@ -5275,15 +5305,13 @@ send_now:
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
tp->compressed_ack = 0;
tp->dup_ack_counter = 0;
}
if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
tp->dup_ack_counter++;
goto send_now;
}
tp->compressed_ack++;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
@ -5296,7 +5324,8 @@ send_now:
delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
HRTIMER_MODE_REL_PINNED_SOFT);
}

Просмотреть файл

@ -2780,6 +2780,7 @@ static int __net_init tcp_sk_init(struct net *net)
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);

Просмотреть файл

@ -184,10 +184,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
if (unlikely(tp->compressed_ack)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
tp->compressed_ack = TCP_FASTRETRANS_THRESH;
tp->compressed_ack);
tp->compressed_ack = 0;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}

Просмотреть файл

@ -753,8 +753,14 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
if (tp->compressed_ack) {
/* Since we have to send one ack finally,
* substract one from tp->compressed_ack to keep
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
tp->compressed_ack--;
tcp_send_ack(sk);
}
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
&sk->sk_tsq_flags))