From 4cdeeee9252af1ba50482f91d615f326365306bd Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 12 Dec 2018 13:15:33 -0800 Subject: [PATCH 1/5] net: udp: prefer listeners bound to an address A relatively common use case is to have several IPs configured on a host, and have different listeners for each of them. We would like to add a "catch all" listener on addr_any, to match incoming connections not served by any of the listeners bound to a specific address. However, port-only lookups can match addr_any sockets when sockets listening on specific addresses are present if so_reuseport flag is set. This patch eliminates lookups into port-only hashtable, as lookups by (addr,port) tuple are easily available. In addition, compute_score() is tweaked to _not_ match addr_any sockets to specific addresses, as hash collisions could result in the unwanted behavior described above. Tested: the patch compiles; full test in the last patch in this patchset. Existing reuseport_* selftests also pass. Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- net/ipv4/udp.c | 76 +++++++++++++------------------------------------- 1 file changed, 19 insertions(+), 57 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aff2a8e99e01..3fb0ed5e4789 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -380,15 +380,12 @@ static int compute_score(struct sock *sk, struct net *net, ipv6_only_sock(sk)) return -1; + if (sk->sk_rcv_saddr != daddr) + return -1; + score = (sk->sk_family == PF_INET) ? 2 : 1; + inet = inet_sk(sk); - - if (inet->inet_rcv_saddr) { - if (inet->inet_rcv_saddr != daddr) - return -1; - score += 4; - } - if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; @@ -464,65 +461,30 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *result; unsigned short hnum = ntohs(dport); - unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); - struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + unsigned int hash2, slot2; + struct udp_hslot *hslot2; bool exact_dif = udp_lib_exact_dif_match(net, skb); - int score, badness; - u32 hash = 0; - if (hslot->count > 10) { - hash2 = ipv4_portaddr_hash(net, daddr, hnum); + hash2 = ipv4_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp4_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, sdif, + exact_dif, hslot2, skb); + if (!result) { + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; - if (hslot->count < hslot2->count) - goto begin; result = udp4_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, sdif, + htonl(INADDR_ANY), hnum, dif, sdif, exact_dif, hslot2, skb); - if (!result) { - unsigned int old_slot2 = slot2; - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); - slot2 = hash2 & udptable->mask; - /* avoid searching the same slot again. */ - if (unlikely(slot2 == old_slot2)) - return result; - - hslot2 = &udptable->hash2[slot2]; - if (hslot->count < hslot2->count) - goto begin; - - result = udp4_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, sdif, - exact_dif, hslot2, skb); - } - if (unlikely(IS_ERR(result))) - return NULL; - return result; - } -begin: - result = NULL; - badness = 0; - sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif, exact_dif); - if (score > badness) { - if (sk->sk_reuseport) { - hash = udp_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (unlikely(IS_ERR(result))) - return NULL; - if (result) - return result; - } - result = sk; - badness = score; - } } + if (unlikely(IS_ERR(result))) + return NULL; return result; } EXPORT_SYMBOL_GPL(__udp4_lib_lookup); From 23b0269e58aee1165133b9696e43992f969b5088 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 12 Dec 2018 13:15:34 -0800 Subject: [PATCH 2/5] net: udp6: prefer listeners bound to an address A relatively common use case is to have several IPs configured on a host, and have different listeners for each of them. We would like to add a "catch all" listener on addr_any, to match incoming connections not served by any of the listeners bound to a specific address. However, port-only lookups can match addr_any sockets when sockets listening on specific addresses are present if so_reuseport flag is set. This patch eliminates lookups into port-only hashtable, as lookups by (addr,port) tuple are easily available. In addition, compute_score() is tweaked to _not_ match addr_any sockets to specific addresses, as hash collisions could result in the unwanted behavior described above. Tested: the patch compiles; full test in the last patch in this patchset. Existing reuseport_* selftests also pass. Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- net/ipv6/udp.c | 79 ++++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 58 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 09cba4cfe31f..9cbf363172bd 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -125,6 +125,9 @@ static int compute_score(struct sock *sk, struct net *net, sk->sk_family != PF_INET6) return -1; + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score = 0; inet = inet_sk(sk); @@ -134,12 +137,6 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; @@ -197,66 +194,32 @@ struct sock *__udp6_lib_lookup(struct net *net, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { - struct sock *sk, *result; unsigned short hnum = ntohs(dport); - unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); - struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + unsigned int hash2, slot2; + struct udp_hslot *hslot2; + struct sock *result; bool exact_dif = udp6_lib_exact_dif_match(net, skb); - int score, badness; - u32 hash = 0; - if (hslot->count > 10) { - hash2 = ipv6_portaddr_hash(net, daddr, hnum); + hash2 = ipv6_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp6_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, sdif, exact_dif, + hslot2, skb); + if (!result) { + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; - if (hslot->count < hslot2->count) - goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, sdif, exact_dif, - hslot2, skb); - if (!result) { - unsigned int old_slot2 = slot2; - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; - /* avoid searching the same slot again. */ - if (unlikely(slot2 == old_slot2)) - return result; - - hslot2 = &udptable->hash2[slot2]; - if (hslot->count < hslot2->count) - goto begin; - - result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, sdif, - exact_dif, hslot2, - skb); - } - if (unlikely(IS_ERR(result))) - return NULL; - return result; - } -begin: - result = NULL; - badness = -1; - sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, - sdif, exact_dif); - if (score > badness) { - if (sk->sk_reuseport) { - hash = udp6_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (unlikely(IS_ERR(result))) - return NULL; - if (result) - return result; - } - result = sk; - badness = score; - } + &in6addr_any, hnum, dif, sdif, + exact_dif, hslot2, + skb); } + if (unlikely(IS_ERR(result))) + return NULL; return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); From d9fbc7f6431fc0e5c0ddedf72206d7c5175c5c9a Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 12 Dec 2018 13:15:35 -0800 Subject: [PATCH 3/5] net: tcp: prefer listeners bound to an address A relatively common use case is to have several IPs configured on a host, and have different listeners for each of them. We would like to add a "catch all" listener on addr_any, to match incoming connections not served by any of the listeners bound to a specific address. However, port-only lookups can match addr_any sockets when sockets listening on specific addresses are present if so_reuseport flag is set. This patch eliminates lookups into port-only hashtable, as lookups by (addr,port) tuple are easily available. In addition, compute_score() is tweaked to _not_ match addr_any sockets to specific addresses, as hash collisions could result in the unwanted behavior described above. Tested: the patch compiles; full test in the last patch in this patchset. Existing reuseport_* selftests also pass. Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 60 +++++--------------------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 13890d5bfc34..cd03ab42705b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -234,24 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net, const int dif, const int sdif, bool exact_dif) { int score = -1; - struct inet_sock *inet = inet_sk(sk); - bool dev_match; - if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && + if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && !ipv6_only_sock(sk)) { - __be32 rcv_saddr = inet->inet_rcv_saddr; - score = sk->sk_family == PF_INET ? 2 : 1; - if (rcv_saddr) { - if (rcv_saddr != daddr) - return -1; - score += 4; - } - dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, - dif, sdif); - if (!dev_match) + if (sk->sk_rcv_saddr != daddr) return -1; - score += 4; + if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) + return -1; + + score = sk->sk_family == PF_INET ? 2 : 1; if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; } @@ -307,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { - unsigned int hash = inet_lhashfn(net, hnum); - struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - bool exact_dif = inet_exact_dif_match(net, skb); struct inet_listen_hashbucket *ilb2; - struct sock *sk, *result = NULL; - int score, hiscore = 0; + struct sock *result = NULL; unsigned int hash2; - u32 phash = 0; - - if (ilb->count <= 10 || !hashinfo->lhash2) - goto port_lookup; - - /* Too many sk in the ilb bucket (which is hashed by port alone). - * Try lhash2 (which is hashed by port and addr) instead. - */ hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); - if (ilb2->count > ilb->count) - goto port_lookup; result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, @@ -335,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net, goto done; /* Lookup lhash2 with INADDR_ANY */ - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); - if (ilb2->count > ilb->count) - goto port_lookup; result = inet_lhash2_lookup(net, ilb2, skb, doff, - saddr, sport, daddr, hnum, + saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif); - goto done; - -port_lookup: - sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, - dif, sdif, exact_dif); - if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - goto done; - } - result = sk; - hiscore = score; - } - } done: if (unlikely(IS_ERR(result))) return NULL; From 0ee58dad5b065f5910c2c926d8c9f07cbe2db86c Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 12 Dec 2018 13:15:36 -0800 Subject: [PATCH 4/5] net: tcp6: prefer listeners bound to an address A relatively common use case is to have several IPs configured on a host, and have different listeners for each of them. We would like to add a "catch all" listener on addr_any, to match incoming connections not served by any of the listeners bound to a specific address. However, port-only lookups can match addr_any sockets when sockets listening on specific addresses are present if so_reuseport flag is set. This patch eliminates lookups into port-only hashtable, as lookups by (addr,port) tuple are easily available. In addition, compute_score() is tweaked to _not_ match addr_any sockets to specific addresses, as hash collisions could result in the unwanted behavior described above. Tested: the patch compiles; full test in the last patch in this patchset. Existing reuseport_* selftests also pass. Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 56 +++++-------------------------------- 1 file changed, 7 insertions(+), 49 deletions(-) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 5eeeba7181a1..f3515ebe9b3a 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -99,23 +99,16 @@ static inline int compute_score(struct sock *sk, struct net *net, const int dif, const int sdif, bool exact_dif) { int score = -1; - bool dev_match; if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + + if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) + return -1; score = 1; - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, - dif, sdif); - if (!dev_match) - return -1; - score++; - if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; } @@ -164,26 +157,12 @@ struct sock *inet6_lookup_listener(struct net *net, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { - unsigned int hash = inet_lhashfn(net, hnum); - struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - bool exact_dif = inet6_exact_dif_match(net, skb); struct inet_listen_hashbucket *ilb2; - struct sock *sk, *result = NULL; - int score, hiscore = 0; + struct sock *result = NULL; unsigned int hash2; - u32 phash = 0; - - if (ilb->count <= 10 || !hashinfo->lhash2) - goto port_lookup; - - /* Too many sk in the ilb bucket (which is hashed by port alone). - * Try lhash2 (which is hashed by port and addr) instead. - */ hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); - if (ilb2->count > ilb->count) - goto port_lookup; result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, @@ -192,33 +171,12 @@ struct sock *inet6_lookup_listener(struct net *net, goto done; /* Lookup lhash2 with in6addr_any */ - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); - if (ilb2->count > ilb->count) - goto port_lookup; result = inet6_lhash2_lookup(net, ilb2, skb, doff, - saddr, sport, daddr, hnum, + saddr, sport, &in6addr_any, hnum, dif, sdif); - goto done; - -port_lookup: - sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); - if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - goto done; - } - result = sk; - hiscore = score; - } - } done: if (unlikely(IS_ERR(result))) return NULL; From 6254e5c6a8d7c19e51e671e2648de2db06f8d504 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 12 Dec 2018 13:15:37 -0800 Subject: [PATCH 5/5] selftests: net: test that listening sockets match on address properly This patch adds a selftest that verifies that a socket listening on a specific address is chosen in preference over sockets that listen on any address. The test covers UDP/UDP6/TCP/TCP6. It is based on, and similar to, reuseport_dualstack.c selftest. Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 4 +- .../selftests/net/reuseport_addr_any.c | 264 ++++++++++++++++++ .../selftests/net/reuseport_addr_any.sh | 4 + 4 files changed, 271 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/net/reuseport_addr_any.c create mode 100755 tools/testing/selftests/net/reuseport_addr_any.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 7f57b916e6b2..6f81130605d7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,6 +3,7 @@ socket psock_fanout psock_snd psock_tpacket +reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ee2e27b1cd0d..aeecc3ef53d0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,10 +7,10 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy +TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c new file mode 100644 index 000000000000..f5e01d989519 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test that sockets listening on a specific address are preferred + * over sockets listening on addr_any. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, + const char *addr_str) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt, i, sz; + + memset(&addr, 0, sizeof(addr)); + + switch (family) { + case AF_INET: + addr4.sin_family = family; + if (!addr_str) + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + if (!addr_str) + addr6.sin6_addr = in6addr_any; + else if (!inet_pton(family, addr_str, &addr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, daddr, sz)) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + return fd; +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, 3); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int family, int proto, int fd) +{ + struct epoll_event ev; + int epfd, i, send_fd, recv_fd; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_fd = connect_and_send(family, proto); + + recv_fd = receive_once(epfd, proto); + if (recv_fd != fd) + error(1, 0, "received on an unexpected socket"); + + close(send_fd); + close(epfd); +} + +int main(void) +{ + /* Below we test that a socket listening on a specific address + * is always selected in preference over a socket listening + * on addr_any. Bugs where this is not the case often result + * in sockets created first or last to get picked. So below + * we make sure that there are always addr_any sockets created + * before and after a specific socket is created. + */ + int rcv_fds[10], i; + + fprintf(stderr, "---- UDP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh new file mode 100755 index 000000000000..104592f62ad4 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./reuseport_addr_any