2019-05-27 09:55:01 +03:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* common UDP/RAW code
|
|
|
|
* Linux INET implementation
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/in.h>
|
2005-08-16 09:18:02 +04:00
|
|
|
#include <net/ip.h>
|
2005-04-17 02:20:36 +04:00
|
|
|
#include <net/sock.h>
|
|
|
|
#include <net/route.h>
|
2005-08-10 07:08:28 +04:00
|
|
|
#include <net/tcp_states.h>
|
2019-09-13 04:16:39 +03:00
|
|
|
#include <net/sock_reuseport.h>
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2015-07-14 09:10:22 +03:00
|
|
|
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
|
|
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
|
2011-05-07 09:27:25 +04:00
|
|
|
struct flowi4 *fl4;
|
2005-04-17 02:20:36 +04:00
|
|
|
struct rtable *rt;
|
2006-09-27 08:27:15 +04:00
|
|
|
__be32 saddr;
|
2005-04-17 02:20:36 +04:00
|
|
|
int oif;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
|
2007-02-09 17:24:47 +03:00
|
|
|
if (addr_len < sizeof(*usin))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (usin->sin_family != AF_INET)
|
|
|
|
return -EAFNOSUPPORT;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
sk_dst_reset(sk);
|
|
|
|
|
|
|
|
oif = sk->sk_bound_dev_if;
|
2009-10-15 10:30:45 +04:00
|
|
|
saddr = inet->inet_saddr;
|
2007-12-17 00:45:43 +03:00
|
|
|
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
|
2018-10-01 11:40:23 +03:00
|
|
|
if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
|
2005-04-17 02:20:36 +04:00
|
|
|
oif = inet->mc_index;
|
|
|
|
if (!saddr)
|
|
|
|
saddr = inet->mc_addr;
|
net-next: Fix IP_UNICAST_IF option behavior for connected sockets
The IP_UNICAST_IF socket option is used to set the outgoing interface
for outbound packets.
The IP_UNICAST_IF socket option was added as it was needed by the
Wine project, since no other existing option (SO_BINDTODEVICE socket
option, IP_PKTINFO socket option or the bind function) provided the
needed characteristics needed by the IP_UNICAST_IF socket option. [1]
The IP_UNICAST_IF socket option works well for unconnected sockets,
that is, the interface specified by the IP_UNICAST_IF socket option
is taken into consideration in the route lookup process when a packet
is being sent. However, for connected sockets, the outbound interface
is chosen when connecting the socket, and in the route lookup process
which is done when a packet is being sent, the interface specified by
the IP_UNICAST_IF socket option is being ignored.
This inconsistent behavior was reported and discussed in an issue
opened on systemd's GitHub project [2]. Also, a bug report was
submitted in the kernel's bugzilla [3].
To understand the problem in more detail, we can look at what happens
for UDP packets over IPv4 (The same analysis was done separately in
the referenced systemd issue).
When a UDP packet is sent the udp_sendmsg function gets called and
the following happens:
1. The oif member of the struct ipcm_cookie ipc (which stores the
output interface of the packet) is initialized by the ipcm_init_sk
function to inet->sk.sk_bound_dev_if (the device set by the
SO_BINDTODEVICE socket option).
2. If the IP_PKTINFO socket option was set, the oif member gets
overridden by the call to the ip_cmsg_send function.
3. If no output interface was selected yet, the interface specified
by the IP_UNICAST_IF socket option is used.
4. If the socket is connected and no destination address is
specified in the send function, the struct ipcm_cookie ipc is not
taken into consideration and the cached route, that was calculated in
the connect function is being used.
Thus, for a connected socket, the IP_UNICAST_IF sockopt isn't taken
into consideration.
This patch corrects the behavior of the IP_UNICAST_IF socket option
for connect()ed sockets by taking into consideration the
IP_UNICAST_IF sockopt when connecting the socket.
In order to avoid reconnecting the socket, this option is still
ignored when applied on an already connected socket until connect()
is called again by the Richard Gobert.
Change the __ip4_datagram_connect function, which is called during
socket connection, to take into consideration the interface set by
the IP_UNICAST_IF socket option, in a similar way to what is done in
the udp_sendmsg function.
[1] https://lore.kernel.org/netdev/1328685717.4736.4.camel@edumazet-laptop/T/
[2] https://github.com/systemd/systemd/issues/11935#issuecomment-618691018
[3] https://bugzilla.kernel.org/show_bug.cgi?id=210255
Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220829111554.GA1771@debian
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-08-29 14:18:51 +03:00
|
|
|
} else if (!oif) {
|
|
|
|
oif = inet->uc_index;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
2011-05-07 09:27:25 +04:00
|
|
|
fl4 = &inet->cork.fl.u.ip4;
|
2022-04-21 02:21:33 +03:00
|
|
|
rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
|
|
|
|
sk->sk_protocol, inet->inet_sport,
|
|
|
|
usin->sin_port, sk);
|
2011-03-03 01:31:35 +03:00
|
|
|
if (IS_ERR(rt)) {
|
|
|
|
err = PTR_ERR(rt);
|
2007-06-01 09:49:28 +04:00
|
|
|
if (err == -ENETUNREACH)
|
2013-11-15 01:37:54 +04:00
|
|
|
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
|
2011-05-07 09:27:25 +04:00
|
|
|
goto out;
|
2007-06-01 09:49:28 +04:00
|
|
|
}
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
|
|
|
|
ip_rt_put(rt);
|
2011-05-07 09:27:25 +04:00
|
|
|
err = -EACCES;
|
|
|
|
goto out;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
2009-10-15 10:30:45 +04:00
|
|
|
if (!inet->inet_saddr)
|
2011-05-07 09:27:25 +04:00
|
|
|
inet->inet_saddr = fl4->saddr; /* Update source address */
|
udp: add rehash on connect()
commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).
Problem is that following sequence :
fd = socket(...)
connect(fd, &remote, ...)
not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)
Sequence is :
- autobind() : choose a random local port, insert socket in hash tables
[while local address is INADDR_ANY]
- connect() : set remote address and port, change local address to IP
given by a route lookup.
When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.
One solution to this problem is to rehash datagram socket if needed.
We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.
This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.
Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-09-08 09:08:44 +04:00
|
|
|
if (!inet->inet_rcv_saddr) {
|
2011-05-07 09:27:25 +04:00
|
|
|
inet->inet_rcv_saddr = fl4->saddr;
|
udp: add rehash on connect()
commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).
Problem is that following sequence :
fd = socket(...)
connect(fd, &remote, ...)
not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)
Sequence is :
- autobind() : choose a random local port, insert socket in hash tables
[while local address is INADDR_ANY]
- connect() : set remote address and port, change local address to IP
given by a route lookup.
When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.
One solution to this problem is to rehash datagram socket if needed.
We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.
This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.
Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-09-08 09:08:44 +04:00
|
|
|
if (sk->sk_prot->rehash)
|
|
|
|
sk->sk_prot->rehash(sk);
|
|
|
|
}
|
2011-05-07 09:27:25 +04:00
|
|
|
inet->inet_daddr = fl4->daddr;
|
2009-10-15 10:30:45 +04:00
|
|
|
inet->inet_dport = usin->sin_port;
|
2019-09-13 04:16:39 +03:00
|
|
|
reuseport_has_conns(sk, true);
|
2005-04-17 02:20:36 +04:00
|
|
|
sk->sk_state = TCP_ESTABLISHED;
|
2015-07-29 02:02:05 +03:00
|
|
|
sk_set_txhash(sk);
|
2022-10-05 18:23:53 +03:00
|
|
|
inet->inet_id = get_random_u16();
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2010-06-11 10:31:35 +04:00
|
|
|
sk_dst_set(sk, &rt->dst);
|
2011-05-07 09:27:25 +04:00
|
|
|
err = 0;
|
|
|
|
out:
|
|
|
|
return err;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
2015-07-14 09:10:22 +03:00
|
|
|
EXPORT_SYMBOL(__ip4_datagram_connect);
|
|
|
|
|
|
|
|
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
|
|
|
|
lock_sock(sk);
|
|
|
|
res = __ip4_datagram_connect(sk, uaddr, addr_len);
|
|
|
|
release_sock(sk);
|
|
|
|
return res;
|
|
|
|
}
|
2005-04-17 02:20:36 +04:00
|
|
|
EXPORT_SYMBOL(ip4_datagram_connect);
|
2013-01-21 06:00:03 +04:00
|
|
|
|
2014-06-10 17:43:01 +04:00
|
|
|
/* Because UDP xmit path can manipulate sk_dst_cache without holding
|
|
|
|
* socket lock, we need to use sk_dst_set() here,
|
|
|
|
* even if we own the socket lock.
|
|
|
|
*/
|
2013-01-21 06:00:03 +04:00
|
|
|
void ip4_datagram_release_cb(struct sock *sk)
|
|
|
|
{
|
|
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
|
|
const struct ip_options_rcu *inet_opt;
|
|
|
|
__be32 daddr = inet->inet_daddr;
|
2014-06-10 17:43:01 +04:00
|
|
|
struct dst_entry *dst;
|
2013-01-21 06:00:03 +04:00
|
|
|
struct flowi4 fl4;
|
|
|
|
struct rtable *rt;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
2014-06-10 17:43:01 +04:00
|
|
|
|
|
|
|
dst = __sk_dst_get(sk);
|
|
|
|
if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
|
|
|
|
rcu_read_unlock();
|
|
|
|
return;
|
|
|
|
}
|
2013-01-21 06:00:03 +04:00
|
|
|
inet_opt = rcu_dereference(inet->inet_opt);
|
|
|
|
if (inet_opt && inet_opt->opt.srr)
|
|
|
|
daddr = inet_opt->opt.faddr;
|
|
|
|
rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
|
|
|
|
inet->inet_saddr, inet->inet_dport,
|
|
|
|
inet->inet_sport, sk->sk_protocol,
|
|
|
|
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
|
2014-06-10 17:43:01 +04:00
|
|
|
|
|
|
|
dst = !IS_ERR(rt) ? &rt->dst : NULL;
|
|
|
|
sk_dst_set(sk, dst);
|
|
|
|
|
2013-01-21 06:00:03 +04:00
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
|