From d2ba8ea54a4089959afdeecdd963e3c4ff391748 Mon Sep 17 00:00:00 2001 From: KJ Tsanaktsidis Date: Thu, 7 Dec 2023 19:55:15 +1100 Subject: [PATCH] Set AI_ADDRCONFIG when making getaddrinfo(3) calls for outgoing conns (#7295) When making an outgoing TCP or UDP connection, set AI_ADDRCONFIG in the hints we send to getaddrinfo(3) (if supported). This will prompt the resolver to _NOT_ issue A or AAAA queries if the system does not actually have an IPv4 or IPv6 address (respectively). This makes outgoing connections marginally more efficient on non-dual-stack systems, since we don't have to try connecting to an address which can't possibly work. More importantly, however, this works around a race condition present in some older versions of glibc on aarch64 where it could accidently send the two outgoing DNS queries with the same DNS txnid, and get confused when receiving the responses. This manifests as outgoing connections sometimes taking 5 seconds (the DNS timeout before retry) to be made. Fixes #19144 --- ext/socket/extconf.rb | 2 + ext/socket/ipsocket.c | 11 ++- ext/socket/udpsocket.c | 9 ++- test/socket/test_tcp.rb | 164 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 183 insertions(+), 3 deletions(-) diff --git a/ext/socket/extconf.rb b/ext/socket/extconf.rb index 1ca52da366..544bed5298 100644 --- a/ext/socket/extconf.rb +++ b/ext/socket/extconf.rb @@ -607,6 +607,8 @@ You can try --enable-wide-getaddrinfo. EOS end + have_const('AI_ADDRCONFIG', headers) + case with_config("lookup-order-hack", "UNSPEC") when "INET" $defs << "-DLOOKUP_ORDER_HACK_INET" diff --git a/ext/socket/ipsocket.c b/ext/socket/ipsocket.c index 0c13620258..0a693655b4 100644 --- a/ext/socket/ipsocket.c +++ b/ext/socket/ipsocket.c @@ -54,15 +54,22 @@ init_inetsock_internal(VALUE v) VALUE connect_timeout = arg->connect_timeout; struct timeval tv_storage; struct timeval *tv = NULL; + int remote_addrinfo_hints = 0; if (!NIL_P(connect_timeout)) { tv_storage = rb_time_interval(connect_timeout); tv = &tv_storage; } + if (type == INET_SERVER) { + remote_addrinfo_hints |= AI_PASSIVE; + } +#ifdef HAVE_CONST_AI_ADDRCONFIG + remote_addrinfo_hints |= AI_ADDRCONFIG; +#endif + arg->remote.res = rsock_addrinfo(arg->remote.host, arg->remote.serv, - family, SOCK_STREAM, - (type == INET_SERVER) ? AI_PASSIVE : 0); + family, SOCK_STREAM, remote_addrinfo_hints); /* diff --git a/ext/socket/udpsocket.c b/ext/socket/udpsocket.c index 5224e48a96..cf3eb6ab9a 100644 --- a/ext/socket/udpsocket.c +++ b/ext/socket/udpsocket.c @@ -88,9 +88,16 @@ udp_connect(VALUE sock, VALUE host, VALUE port) { struct udp_arg arg; VALUE ret; + int addrinfo_hints = 0; GetOpenFile(sock, arg.fptr); - arg.res = rsock_addrinfo(host, port, rsock_fd_family(arg.fptr->fd), SOCK_DGRAM, 0); + +#ifdef HAVE_CONST_AI_ADDRCONFIG + addrinfo_hints |= AI_ADDRCONFIG; +#endif + + arg.res = rsock_addrinfo(host, port, rsock_fd_family(arg.fptr->fd), SOCK_DGRAM, + addrinfo_hints); ret = rb_ensure(udp_connect_internal, (VALUE)&arg, rsock_freeaddrinfo, (VALUE)arg.res); if (!ret) rsock_sys_fail_host_port("connect(2)", host, port); diff --git a/test/socket/test_tcp.rb b/test/socket/test_tcp.rb index 7f9dc53cae..6857fd9c99 100644 --- a/test/socket/test_tcp.rb +++ b/test/socket/test_tcp.rb @@ -140,4 +140,168 @@ class TestSocket_TCPSocket < Test::Unit::TestCase server_threads.each(&:join) end end + + def test_ai_addrconfig + # This test verifies that we pass AI_ADDRCONFIG to the DNS resolver when making + # an outgoing connection. + # The verification of this is unfortunately incredibly convoluted. We perform the + # test by setting up a fake DNS server to receive queries. Then, we construct + # an environment which has only IPv4 addresses and uses that fake DNS server. We + # then attempt to make an outgoing TCP connection. Finally, we verify that we + # only received A and not AAAA queries on our fake resolver. + # This test can only possibly work on Linux, and only when run as root. If either + # of these conditions aren't met, the test will be skipped. + + # The construction of our IPv6-free environment must happen in a child process, + # which we can put in its own network & mount namespaces. + + omit "this test requires fork" unless Process.respond_to?(:fork) + + IO.popen("-") do |test_io| + if test_io.nil? + begin + # Child program + require 'fiddle' + require 'resolv' + require 'open3' + + libc = Fiddle.dlopen(nil) + begin + unshare = Fiddle::Function.new(libc['unshare'], [Fiddle::TYPE_INT], Fiddle::TYPE_INT) + rescue Fiddle::DLError + # Test can't run because we don't have unshare(2) in libc + # This will be the case on not-linux, and also on very old glibc versions (or + # possibly other libc's that don't expose this syscall wrapper) + $stdout.write(Marshal.dump({result: :skip, reason: "unshare(2) or mount(2) not in libc"})) + exit + end + + # Move our test process into a new network & mount namespace. + # This environment will be configured to be IPv6 free and point DNS resolution + # at a fake DNS server. + # (n.b. these flags are CLONE_NEWNS | CLONE_NEWNET) + ret = unshare.call(0x00020000 | 0x40000000) + errno = Fiddle.last_error + if ret == -1 && errno == Errno::EPERM::Errno + # Test can't run because we're not root. + $stdout.write(Marshal.dump({result: :skip, reason: "insufficient permissions to unshare namespaces"})) + exit + elsif ret == -1 && (errno == Errno::ENOSYS::Errno || errno == Errno::EINVAL::Errno) + # No unshare(2) in the kernel (or kernel too old to know about this namespace type) + $stdout.write(Marshal.dump({result: :skip, reason: "errno #{errno} calling unshare(2)"})) + exit + elsif ret == -1 + # Unexpected failure + raise "errno #{errno} calling unshare(2)" + end + + # Set up our fake DNS environment. Clean out /etc/hosts... + fake_hosts_file = Tempfile.new('ruby_test_hosts') + fake_hosts_file.write <<~HOSTS + 127.0.0.1 localhost + ::1 localhost + HOSTS + fake_hosts_file.flush + + # Have /etc/resolv.conf point to 127.0.0.1... + fake_resolv_conf = Tempfile.new('ruby_test_resolv') + fake_resolv_conf.write <<~RESOLV + nameserver 127.0.0.1 + RESOLV + fake_resolv_conf.flush + + # Also stub out /etc/nsswitch.conf; glibc can have other resolver modules + # (like systemd-resolved) configured in there other than just using dns, + # so rewrite it to remove any `hosts:` lines and add one which just uses + # dns. + real_nsswitch_conf = File.read('/etc/nsswitch.conf') rescue "" + fake_nsswitch_conf = Tempfile.new('ruby_test_nsswitch') + real_nsswitch_conf.lines.reject { _1 =~ /^\s*hosts:/ }.each do |ln| + fake_nsswitch_conf.puts ln + end + fake_nsswitch_conf.puts "hosts: files myhostname dns" + fake_nsswitch_conf.flush + + # This is needed to make sure our bind-mounds aren't visible outside this process. + system 'mount', '--make-rprivate', '/', exception: true + # Bind-mount the fake files over the top of the real files. + system 'mount', '--bind', '--make-private', fake_hosts_file.path, '/etc/hosts', exception: true + system 'mount', '--bind', '--make-private', fake_resolv_conf.path, '/etc/resolv.conf', exception: true + system 'mount', '--bind', '--make-private', fake_nsswitch_conf.path, '/etc/nsswitch.conf', exception: true + + # Create a dummy interface with only an IPv4 address + system 'ip', 'link', 'add', 'dummy0', 'type', 'dummy', exception: true + system 'ip', 'addr', 'add', '192.168.1.2/24', 'dev', 'dummy0', exception: true + system 'ip', 'link', 'set', 'dummy0', 'up', exception: true + system 'ip', 'link', 'set', 'lo', 'up', exception: true + + # Disable IPv6 on this interface (this is needed to disable the link-local + # IPv6 address) + File.open('/proc/sys/net/ipv6/conf/dummy0/disable_ipv6', 'w') do |f| + f.puts "1" + end + + # Create a fake DNS server which will receive the DNS queries triggered by TCPSocket.new + fake_dns_server_socket = UDPSocket.new + fake_dns_server_socket.bind('127.0.0.1', 53) + received_dns_queries = [] + fake_dns_server_thread = Thread.new do + Socket.udp_server_loop_on([fake_dns_server_socket]) do |msg, msg_src| + request = Resolv::DNS::Message.decode(msg) + received_dns_queries << request + response = request.dup.tap do |r| + r.qr = 0 + r.rcode = 3 # NXDOMAIN + end + msg_src.reply response.encode + end + end + + # Make a request which will hit our fake DNS swerver - this needs to be in _another_ + # process because glibc will cache resolver info across the fork otherwise. + load_path_args = $LOAD_PATH.flat_map { ['-I', _1] } + _, _, status = Open3.capture3('/proc/self/exe', *load_path_args, '-rsocket', '-e', <<~RUBY) + TCPSocket.open('www.example.com', 4444) + RUBY + + fake_dns_server_thread.kill + fake_dns_server_thread.join + + have_aaaa_qs = received_dns_queries.any? do |query| + query.question.any? do |question| + question[1] == Resolv::DNS::Resource::IN::AAAA + end + end + + have_a_q = received_dns_queries.any? do |query| + query.question.any? do |question| + question[0].to_s == "www.example.com" + end + end + + if have_aaaa_qs + $stdout.write(Marshal.dump({result: :fail, reason: "got AAAA queries, expected none"})) + elsif !have_a_q + $stdout.write(Marshal.dump({result: :fail, reason: "got no A query for example.com"})) + else + $stdout.write(Marshal.dump({result: :success})) + end + rescue => ex + $stdout.write(Marshal.dump({result: :fail, reason: ex.full_message})) + ensure + # Make sure the child process does not transfer control back into the test runner. + exit! + end + else + test_result = Marshal.load(test_io.read) + + case test_result[:result] + when :skip + omit test_result[:reason] + when :fail + fail test_result[:reason] + end + end + end + end end if defined?(TCPSocket)