tcp: switch pacing timer to softirq based hrtimer
linux-4.16 got support for softirq based hrtimers. TCP can switch its pacing hrtimer to this variant, since this avoids going through a tasklet and some atomic operations. pacing timer logic looks like other (jiffies based) tcp timers. v2: use hrtimer_try_to_cancel() in tcp_clear_xmit_timers() to correctly release reference on socket if needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
4cbd7a7d3c
Коммит
73a6bab5aa
|
@ -557,7 +557,9 @@ void tcp_fin(struct sock *sk);
|
|||
void tcp_init_xmit_timers(struct sock *);
|
||||
static inline void tcp_clear_xmit_timers(struct sock *sk)
|
||||
{
|
||||
hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
|
||||
if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
|
||||
sock_put(sk);
|
||||
|
||||
inet_csk_clear_xmit_timers(sk);
|
||||
}
|
||||
|
||||
|
|
|
@ -772,7 +772,7 @@ struct tsq_tasklet {
|
|||
};
|
||||
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
|
||||
|
||||
static void tcp_tsq_handler(struct sock *sk)
|
||||
static void tcp_tsq_write(struct sock *sk)
|
||||
{
|
||||
if ((1 << sk->sk_state) &
|
||||
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
|
||||
|
@ -789,6 +789,16 @@ static void tcp_tsq_handler(struct sock *sk)
|
|||
0, GFP_ATOMIC);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcp_tsq_handler(struct sock *sk)
|
||||
{
|
||||
bh_lock_sock(sk);
|
||||
if (!sock_owned_by_user(sk))
|
||||
tcp_tsq_write(sk);
|
||||
else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
||||
sock_hold(sk);
|
||||
bh_unlock_sock(sk);
|
||||
}
|
||||
/*
|
||||
* One tasklet per cpu tries to send more skbs.
|
||||
* We run in tasklet context but need to disable irqs when
|
||||
|
@ -816,16 +826,7 @@ static void tcp_tasklet_func(unsigned long data)
|
|||
smp_mb__before_atomic();
|
||||
clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
|
||||
|
||||
if (!sk->sk_lock.owned &&
|
||||
test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
|
||||
bh_lock_sock(sk);
|
||||
if (!sock_owned_by_user(sk)) {
|
||||
clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
|
||||
tcp_tsq_handler(sk);
|
||||
}
|
||||
bh_unlock_sock(sk);
|
||||
}
|
||||
|
||||
sk_free(sk);
|
||||
}
|
||||
}
|
||||
|
@ -853,9 +854,10 @@ void tcp_release_cb(struct sock *sk)
|
|||
nflags = flags & ~TCP_DEFERRED_ALL;
|
||||
} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
|
||||
|
||||
if (flags & TCPF_TSQ_DEFERRED)
|
||||
tcp_tsq_handler(sk);
|
||||
|
||||
if (flags & TCPF_TSQ_DEFERRED) {
|
||||
tcp_tsq_write(sk);
|
||||
__sock_put(sk);
|
||||
}
|
||||
/* Here begins the tricky part :
|
||||
* We are called from release_sock() with :
|
||||
* 1) BH disabled
|
||||
|
@ -929,7 +931,7 @@ void tcp_wfree(struct sk_buff *skb)
|
|||
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
|
||||
goto out;
|
||||
|
||||
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
|
||||
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
|
||||
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
|
||||
if (nval != oval)
|
||||
continue;
|
||||
|
@ -948,37 +950,17 @@ out:
|
|||
sk_free(sk);
|
||||
}
|
||||
|
||||
/* Note: Called under hard irq.
|
||||
* We can not call TCP stack right away.
|
||||
/* Note: Called under soft irq.
|
||||
* We can call TCP stack right away, unless socket is owned by user.
|
||||
*/
|
||||
enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
|
||||
{
|
||||
struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
|
||||
struct sock *sk = (struct sock *)tp;
|
||||
unsigned long nval, oval;
|
||||
|
||||
for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
|
||||
struct tsq_tasklet *tsq;
|
||||
bool empty;
|
||||
tcp_tsq_handler(sk);
|
||||
sock_put(sk);
|
||||
|
||||
if (oval & TSQF_QUEUED)
|
||||
break;
|
||||
|
||||
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
|
||||
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
|
||||
if (nval != oval)
|
||||
continue;
|
||||
|
||||
if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
|
||||
break;
|
||||
/* queue this socket to tasklet queue */
|
||||
tsq = this_cpu_ptr(&tsq_tasklet);
|
||||
empty = list_empty(&tsq->head);
|
||||
list_add(&tp->tsq_node, &tsq->head);
|
||||
if (empty)
|
||||
tasklet_schedule(&tsq->tasklet);
|
||||
break;
|
||||
}
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
|
@ -1011,7 +993,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
|
|||
do_div(len_ns, rate);
|
||||
hrtimer_start(&tcp_sk(sk)->pacing_timer,
|
||||
ktime_add_ns(ktime_get(), len_ns),
|
||||
HRTIMER_MODE_ABS_PINNED);
|
||||
HRTIMER_MODE_ABS_PINNED_SOFT);
|
||||
sock_hold(sk);
|
||||
}
|
||||
|
||||
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
||||
|
@ -1078,7 +1061,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
|||
|
||||
/* if no packet is in qdisc/device queue, then allow XPS to select
|
||||
* another queue. We can be called from tcp_tsq_handler()
|
||||
* which holds one reference to sk_wmem_alloc.
|
||||
* which holds one reference to sk.
|
||||
*
|
||||
* TODO: Ideally, in-flight pure ACK packets should not matter here.
|
||||
* One way to get this would be to set skb->truesize = 2 on them.
|
||||
|
@ -2185,7 +2168,7 @@ static int tcp_mtu_probe(struct sock *sk)
|
|||
static bool tcp_pacing_check(const struct sock *sk)
|
||||
{
|
||||
return tcp_needs_internal_pacing(sk) &&
|
||||
hrtimer_active(&tcp_sk(sk)->pacing_timer);
|
||||
hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
|
||||
}
|
||||
|
||||
/* TCP Small Queues :
|
||||
|
@ -2365,8 +2348,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|||
skb, limit, mss_now, gfp)))
|
||||
break;
|
||||
|
||||
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
||||
clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
|
||||
if (tcp_small_queue_check(sk, skb, 0))
|
||||
break;
|
||||
|
||||
|
|
|
@ -713,6 +713,6 @@ void tcp_init_xmit_timers(struct sock *sk)
|
|||
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
|
||||
&tcp_keepalive_timer);
|
||||
hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_ABS_PINNED);
|
||||
HRTIMER_MODE_ABS_PINNED_SOFT);
|
||||
tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче