tcp: add tcp_sock_set_nodelay
Add a helper to directly set the TCP_NODELAY sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Sagi Grimberg <sagi@grimberg.me> Acked-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
db10538a4b
Коммит
12abc5ee78
|
@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
|
||||||
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
|
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
|
||||||
extern int drbd_connected(struct drbd_peer_device *);
|
extern int drbd_connected(struct drbd_peer_device *);
|
||||||
|
|
||||||
static inline void drbd_tcp_nodelay(struct socket *sock)
|
|
||||||
{
|
|
||||||
int val = 1;
|
|
||||||
(void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
|
|
||||||
(char*)&val, sizeof(val));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void drbd_tcp_quickack(struct socket *sock)
|
static inline void drbd_tcp_quickack(struct socket *sock)
|
||||||
{
|
{
|
||||||
int val = 2;
|
int val = 2;
|
||||||
|
|
|
@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection *connection, int vnr,
|
||||||
/* DRBD protocol "pings" are latency critical.
|
/* DRBD protocol "pings" are latency critical.
|
||||||
* This is supposed to trigger tcp_push_pending_frames() */
|
* This is supposed to trigger tcp_push_pending_frames() */
|
||||||
if (!err && (cmd == P_PING || cmd == P_PING_ACK))
|
if (!err && (cmd == P_PING || cmd == P_PING_ACK))
|
||||||
drbd_tcp_nodelay(sock->socket);
|
tcp_sock_set_nodelay(sock->socket->sk);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1051,8 +1051,8 @@ randomize:
|
||||||
|
|
||||||
/* we don't want delays.
|
/* we don't want delays.
|
||||||
* we use TCP_CORK where appropriate, though */
|
* we use TCP_CORK where appropriate, though */
|
||||||
drbd_tcp_nodelay(sock.socket);
|
tcp_sock_set_nodelay(sock.socket->sk);
|
||||||
drbd_tcp_nodelay(msock.socket);
|
tcp_sock_set_nodelay(msock.socket->sk);
|
||||||
|
|
||||||
connection->data.socket = sock.socket;
|
connection->data.socket = sock.socket;
|
||||||
connection->meta.socket = msock.socket;
|
connection->meta.socket = msock.socket;
|
||||||
|
|
|
@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep)
|
||||||
siw_cep_get(new_cep);
|
siw_cep_get(new_cep);
|
||||||
new_s->sk->sk_user_data = new_cep;
|
new_s->sk->sk_user_data = new_cep;
|
||||||
|
|
||||||
if (siw_tcp_nagle == false) {
|
if (siw_tcp_nagle == false)
|
||||||
int val = 1;
|
tcp_sock_set_nodelay(new_s->sk);
|
||||||
|
|
||||||
rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY,
|
|
||||||
(char *)&val, sizeof(val));
|
|
||||||
if (rv) {
|
|
||||||
siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv);
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
|
new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
|
||||||
|
|
||||||
rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
|
rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
|
||||||
|
@ -1386,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
|
||||||
siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
|
siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (siw_tcp_nagle == false) {
|
if (siw_tcp_nagle == false)
|
||||||
int val = 1;
|
tcp_sock_set_nodelay(s->sk);
|
||||||
|
|
||||||
rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val,
|
|
||||||
sizeof(val));
|
|
||||||
if (rv) {
|
|
||||||
siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv);
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cep = siw_cep_alloc(sdev);
|
cep = siw_cep_alloc(sdev);
|
||||||
if (!cep) {
|
if (!cep) {
|
||||||
rv = -ENOMEM;
|
rv = -ENOMEM;
|
||||||
|
|
|
@ -1346,14 +1346,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set TCP no delay */
|
/* Set TCP no delay */
|
||||||
opt = 1;
|
tcp_sock_set_nodelay(queue->sock->sk);
|
||||||
ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
|
|
||||||
TCP_NODELAY, (char *)&opt, sizeof(opt));
|
|
||||||
if (ret) {
|
|
||||||
dev_err(nctrl->device,
|
|
||||||
"failed to set TCP_NODELAY sock opt %d\n", ret);
|
|
||||||
goto err_sock;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cleanup whatever is sitting in the TCP transmit queue on socket
|
* Cleanup whatever is sitting in the TCP transmit queue on socket
|
||||||
|
|
|
@ -1580,7 +1580,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
|
||||||
{
|
{
|
||||||
struct nvmet_tcp_port *port;
|
struct nvmet_tcp_port *port;
|
||||||
__kernel_sa_family_t af;
|
__kernel_sa_family_t af;
|
||||||
int opt, ret;
|
int ret;
|
||||||
|
|
||||||
port = kzalloc(sizeof(*port), GFP_KERNEL);
|
port = kzalloc(sizeof(*port), GFP_KERNEL);
|
||||||
if (!port)
|
if (!port)
|
||||||
|
@ -1625,15 +1625,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
|
||||||
port->data_ready = port->sock->sk->sk_data_ready;
|
port->data_ready = port->sock->sk->sk_data_ready;
|
||||||
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
|
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
|
||||||
sock_set_reuseaddr(port->sock->sk);
|
sock_set_reuseaddr(port->sock->sk);
|
||||||
|
tcp_sock_set_nodelay(port->sock->sk);
|
||||||
opt = 1;
|
|
||||||
ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
|
|
||||||
TCP_NODELAY, (char *)&opt, sizeof(opt));
|
|
||||||
if (ret) {
|
|
||||||
pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
|
|
||||||
goto err_sock;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (so_priority > 0)
|
if (so_priority > 0)
|
||||||
sock_set_priority(port->sock->sk, so_priority);
|
sock_set_priority(port->sock->sk, so_priority);
|
||||||
|
|
||||||
|
|
|
@ -897,20 +897,11 @@ int iscsit_setup_np(
|
||||||
/*
|
/*
|
||||||
* Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
|
* Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
|
||||||
*/
|
*/
|
||||||
/* FIXME: Someone please explain why this is endian-safe */
|
if (np->np_network_transport == ISCSI_TCP)
|
||||||
opt = 1;
|
tcp_sock_set_nodelay(sock->sk);
|
||||||
if (np->np_network_transport == ISCSI_TCP) {
|
|
||||||
ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
|
|
||||||
(char *)&opt, sizeof(opt));
|
|
||||||
if (ret < 0) {
|
|
||||||
pr_err("kernel_setsockopt() for TCP_NODELAY"
|
|
||||||
" failed: %d\n", ret);
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sock_set_reuseaddr(sock->sk);
|
sock_set_reuseaddr(sock->sk);
|
||||||
|
|
||||||
|
opt = 1;
|
||||||
ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
|
ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
|
||||||
(char *)&opt, sizeof(opt));
|
(char *)&opt, sizeof(opt));
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
|
|
|
@ -3929,14 +3929,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
|
||||||
socket->sk->sk_rcvbuf = 140 * 1024;
|
socket->sk->sk_rcvbuf = 140 * 1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (server->tcp_nodelay) {
|
if (server->tcp_nodelay)
|
||||||
int val = 1;
|
tcp_sock_set_nodelay(socket->sk);
|
||||||
rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
|
|
||||||
(char *)&val, sizeof(val));
|
|
||||||
if (rc)
|
|
||||||
cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n",
|
|
||||||
rc);
|
|
||||||
}
|
|
||||||
|
|
||||||
cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n",
|
cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n",
|
||||||
socket->sk->sk_sndbuf,
|
socket->sk->sk_sndbuf,
|
||||||
|
|
|
@ -1011,7 +1011,6 @@ static void tcp_connect_to_sock(struct connection *con)
|
||||||
struct sockaddr_storage saddr, src_addr;
|
struct sockaddr_storage saddr, src_addr;
|
||||||
int addr_len;
|
int addr_len;
|
||||||
struct socket *sock = NULL;
|
struct socket *sock = NULL;
|
||||||
int one = 1;
|
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
if (con->nodeid == 0) {
|
if (con->nodeid == 0) {
|
||||||
|
@ -1060,8 +1059,7 @@ static void tcp_connect_to_sock(struct connection *con)
|
||||||
log_print("connecting to %d", con->nodeid);
|
log_print("connecting to %d", con->nodeid);
|
||||||
|
|
||||||
/* Turn off Nagle's algorithm */
|
/* Turn off Nagle's algorithm */
|
||||||
kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
|
tcp_sock_set_nodelay(sock->sk);
|
||||||
sizeof(one));
|
|
||||||
|
|
||||||
result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
|
result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
|
||||||
O_NONBLOCK);
|
O_NONBLOCK);
|
||||||
|
@ -1103,7 +1101,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
|
||||||
{
|
{
|
||||||
struct socket *sock = NULL;
|
struct socket *sock = NULL;
|
||||||
int result = 0;
|
int result = 0;
|
||||||
int one = 1;
|
|
||||||
int addr_len;
|
int addr_len;
|
||||||
|
|
||||||
if (dlm_local_addr[0]->ss_family == AF_INET)
|
if (dlm_local_addr[0]->ss_family == AF_INET)
|
||||||
|
@ -1120,8 +1117,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Turn off Nagle's algorithm */
|
/* Turn off Nagle's algorithm */
|
||||||
kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
|
tcp_sock_set_nodelay(sock->sk);
|
||||||
sizeof(one));
|
|
||||||
|
|
||||||
sock_set_reuseaddr(sock->sk);
|
sock_set_reuseaddr(sock->sk);
|
||||||
|
|
||||||
|
|
|
@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
|
||||||
sc_put(sc);
|
sc_put(sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int o2net_set_nodelay(struct socket *sock)
|
|
||||||
{
|
|
||||||
int val = 1;
|
|
||||||
|
|
||||||
return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
|
|
||||||
(void *)&val, sizeof(val));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int o2net_set_usertimeout(struct socket *sock)
|
static int o2net_set_usertimeout(struct socket *sock)
|
||||||
{
|
{
|
||||||
int user_timeout = O2NET_TCP_USER_TIMEOUT;
|
int user_timeout = O2NET_TCP_USER_TIMEOUT;
|
||||||
|
@ -1636,11 +1628,7 @@ static void o2net_start_connect(struct work_struct *work)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = o2net_set_nodelay(sc->sc_sock);
|
tcp_sock_set_nodelay(sc->sc_sock->sk);
|
||||||
if (ret) {
|
|
||||||
mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = o2net_set_usertimeout(sock);
|
ret = o2net_set_usertimeout(sock);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -1832,11 +1820,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
|
||||||
*more = 1;
|
*more = 1;
|
||||||
new_sock->sk->sk_allocation = GFP_ATOMIC;
|
new_sock->sk->sk_allocation = GFP_ATOMIC;
|
||||||
|
|
||||||
ret = o2net_set_nodelay(new_sock);
|
tcp_sock_set_nodelay(new_sock->sk);
|
||||||
if (ret) {
|
|
||||||
mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = o2net_set_usertimeout(new_sock);
|
ret = o2net_set_usertimeout(new_sock);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
|
|
@ -498,5 +498,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
|
||||||
int shiftlen);
|
int shiftlen);
|
||||||
|
|
||||||
void tcp_sock_set_cork(struct sock *sk, bool on);
|
void tcp_sock_set_cork(struct sock *sk, bool on);
|
||||||
|
void tcp_sock_set_nodelay(struct sock *sk);
|
||||||
|
|
||||||
#endif /* _LINUX_TCP_H */
|
#endif /* _LINUX_TCP_H */
|
||||||
|
|
|
@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
|
if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
|
||||||
int optval = 1;
|
tcp_sock_set_nodelay(sock->sk);
|
||||||
|
|
||||||
ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
|
|
||||||
(char *)&optval, sizeof(optval));
|
|
||||||
if (ret)
|
|
||||||
pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
|
|
||||||
ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
con->sock = sock;
|
con->sock = sock;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -2832,6 +2832,30 @@ void tcp_sock_set_cork(struct sock *sk, bool on)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_sock_set_cork);
|
EXPORT_SYMBOL(tcp_sock_set_cork);
|
||||||
|
|
||||||
|
/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
|
||||||
|
* remembered, but it is not activated until cork is cleared.
|
||||||
|
*
|
||||||
|
* However, when TCP_NODELAY is set we make an explicit push, which overrides
|
||||||
|
* even TCP_CORK for currently queued segments.
|
||||||
|
*/
|
||||||
|
static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
|
||||||
|
{
|
||||||
|
if (on) {
|
||||||
|
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
|
||||||
|
tcp_push_pending_frames(sk);
|
||||||
|
} else {
|
||||||
|
tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcp_sock_set_nodelay(struct sock *sk)
|
||||||
|
{
|
||||||
|
lock_sock(sk);
|
||||||
|
__tcp_sock_set_nodelay(sk, true);
|
||||||
|
release_sock(sk);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(tcp_sock_set_nodelay);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Socket option code for TCP.
|
* Socket option code for TCP.
|
||||||
*/
|
*/
|
||||||
|
@ -2929,20 +2953,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TCP_NODELAY:
|
case TCP_NODELAY:
|
||||||
if (val) {
|
__tcp_sock_set_nodelay(sk, val);
|
||||||
/* TCP_NODELAY is weaker than TCP_CORK, so that
|
|
||||||
* this option on corked socket is remembered, but
|
|
||||||
* it is not activated until cork is cleared.
|
|
||||||
*
|
|
||||||
* However, when TCP_NODELAY is set we make
|
|
||||||
* an explicit push, which overrides even TCP_CORK
|
|
||||||
* for currently queued segments.
|
|
||||||
*/
|
|
||||||
tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
|
|
||||||
tcp_push_pending_frames(sk);
|
|
||||||
} else {
|
|
||||||
tp->nonagle &= ~TCP_NAGLE_OFF;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TCP_THIN_LINEAR_TIMEOUTS:
|
case TCP_THIN_LINEAR_TIMEOUTS:
|
||||||
|
|
|
@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
/* doing it this way avoids calling tcp_sk() */
|
|
||||||
void rds_tcp_nonagle(struct socket *sock)
|
|
||||||
{
|
|
||||||
int val = 1;
|
|
||||||
|
|
||||||
kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val,
|
|
||||||
sizeof(val));
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
|
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
|
||||||
{
|
{
|
||||||
/* seq# of the last byte of data in tcp send buffer */
|
/* seq# of the last byte of data in tcp send buffer */
|
||||||
|
@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock)
|
||||||
struct net *net = sock_net(sk);
|
struct net *net = sock_net(sk);
|
||||||
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
|
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
|
||||||
|
|
||||||
rds_tcp_nonagle(sock);
|
tcp_sock_set_nodelay(sock->sk);
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
if (rtn->sndbuf_size > 0) {
|
if (rtn->sndbuf_size > 0) {
|
||||||
sk->sk_sndbuf = rtn->sndbuf_size;
|
sk->sk_sndbuf = rtn->sndbuf_size;
|
||||||
|
|
|
@ -50,7 +50,6 @@ struct rds_tcp_statistics {
|
||||||
|
|
||||||
/* tcp.c */
|
/* tcp.c */
|
||||||
void rds_tcp_tune(struct socket *sock);
|
void rds_tcp_tune(struct socket *sock);
|
||||||
void rds_tcp_nonagle(struct socket *sock);
|
|
||||||
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
|
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
|
||||||
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
|
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
|
||||||
void rds_tcp_restore_callbacks(struct socket *sock,
|
void rds_tcp_restore_callbacks(struct socket *sock,
|
||||||
|
|
|
@ -288,7 +288,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
|
||||||
}
|
}
|
||||||
|
|
||||||
sock->sk->sk_reuse = SK_CAN_REUSE;
|
sock->sk->sk_reuse = SK_CAN_REUSE;
|
||||||
rds_tcp_nonagle(sock);
|
tcp_sock_set_nodelay(sock->sk);
|
||||||
|
|
||||||
write_lock_bh(&sock->sk->sk_callback_lock);
|
write_lock_bh(&sock->sk->sk_callback_lock);
|
||||||
sock->sk->sk_user_data = sock->sk->sk_data_ready;
|
sock->sk->sk_user_data = sock->sk->sk_data_ready;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче