diff --git a/ares/CHANGES b/ares/CHANGES index dd9bac6c9..c590079b4 100644 --- a/ares/CHANGES +++ b/ares/CHANGES @@ -1,5 +1,15 @@ Changelog for the c-ares project +* Nov 1 2008 (Daniel Stenberg) +- Carlo Contavalli added support for the glibc "rotate" option, as documented + in man resolv.conf: + + causes round robin selection of nameservers from among those listed. This + has the effect of spreading the query load among all listed servers, rather + than having all clients try the first listed server first every time. + + You can enable it with ARES_OPT_ROTATE + * Oct 21 2008 (Yang Tse) Charles Hardin added handling of EINPROGRESS for UDP connects. diff --git a/ares/ares.h b/ares/ares.h index 3609f8676..1507f41e9 100644 --- a/ares/ares.h +++ b/ares/ares.h @@ -114,6 +114,7 @@ extern "C" { #define ARES_OPT_SOCK_SNDBUF (1 << 11) #define ARES_OPT_SOCK_RCVBUF (1 << 12) #define ARES_OPT_TIMEOUTMS (1 << 13) +#define ARES_OPT_ROTATE (1 << 14) /* Nameinfo flag values */ #define ARES_NI_NOFQDN (1 << 0) @@ -184,6 +185,7 @@ struct ares_options { int timeout; /* in seconds or milliseconds, depending on options */ int tries; int ndots; + int rotate; unsigned short udp_port; unsigned short tcp_port; int socket_send_buffer_size; diff --git a/ares/ares_init.c b/ares/ares_init.c index 4d4ba1ea5..4a147fc42 100644 --- a/ares/ares_init.c +++ b/ares/ares_init.c @@ -144,6 +144,7 @@ int ares_init_options(ares_channel *channelptr, struct ares_options *options, channel->timeout = -1; channel->tries = -1; channel->ndots = -1; + channel->rotate = -1; channel->udp_port = -1; channel->tcp_port = -1; channel->socket_send_buffer_size = -1; @@ -159,6 +160,7 @@ int ares_init_options(ares_channel *channelptr, struct ares_options *options, channel->sock_state_cb = NULL; channel->sock_state_cb_data = NULL; + channel->last_server = 0; channel->last_timeout_processed = (time_t)now.tv_sec; /* Initialize our lists of queries */ @@ -352,6 +354,8 @@ static int init_by_options(ares_channel channel, channel->tries = options->tries; if ((optmask & ARES_OPT_NDOTS) && channel->ndots == -1) channel->ndots = options->ndots; + if ((optmask & ARES_OPT_ROTATE) && channel->rotate == -1) + channel->rotate = options->rotate; if ((optmask & ARES_OPT_UDP_PORT) && channel->udp_port == -1) channel->udp_port = options->udp_port; if ((optmask & ARES_OPT_TCP_PORT) && channel->tcp_port == -1) @@ -932,6 +936,8 @@ static int init_by_defaults(ares_channel channel) channel->tries = DEFAULT_TRIES; if (channel->ndots == -1) channel->ndots = 1; + if (channel->rotate == -1) + channel->rotate = 0; if (channel->udp_port == -1) channel->udp_port = htons(NAMESERVER_PORT); if (channel->tcp_port == -1) @@ -1302,6 +1308,9 @@ static int set_options(ares_channel channel, const char *str) val = try_option(p, q, "retry:"); if (val && channel->tries == -1) channel->tries = atoi(val); + val = try_option(p, q, "rotate"); + if (val && channel->rotate == -1) + channel->rotate = 1; p = q; while (ISSPACE(*p)) p++; @@ -1374,7 +1383,7 @@ static char *try_config(char *s, const char *opt) static const char *try_option(const char *p, const char *q, const char *opt) { size_t len = strlen(opt); - return ((size_t)(q - p) > len && !strncmp(p, opt, len)) ? &p[len] : NULL; + return ((size_t)(q - p) >= len && !strncmp(p, opt, len)) ? &p[len] : NULL; } #ifndef WIN32 diff --git a/ares/ares_private.h b/ares/ares_private.h index 0b4edf94e..976fa9f10 100644 --- a/ares/ares_private.h +++ b/ares/ares_private.h @@ -195,8 +195,8 @@ struct query { void *arg; /* Query status */ - int try; - int server; + int try; /* Number of times we tried this query already. */ + int server; /* Server this query has last been sent to. */ struct query_server_info *server_info; /* per-server state */ int using_tcp; int error_status; @@ -242,6 +242,7 @@ struct ares_channeldata { int timeout; /* in milliseconds */ int tries; int ndots; + int rotate; /* if true, all servers specified are used */ int udp_port; int tcp_port; int socket_send_buffer_size; @@ -268,6 +269,9 @@ struct ares_channeldata { just to draw the line somewhere. */ time_t last_timeout_processed; + /* Last server we sent a query to. */ + int last_server; + /* Circular, doubly-linked list of queries, bucketed various ways.... */ /* All active queries in a single list: */ struct list_node all_queries; diff --git a/ares/ares_process.c b/ares/ares_process.c index 675af489d..b7f375e19 100644 --- a/ares/ares_process.c +++ b/ares/ares_process.c @@ -670,30 +670,33 @@ static void skip_server(ares_channel channel, struct query *query, static void next_server(ares_channel channel, struct query *query, struct timeval *now) { - /* Advance to the next server or try. */ - query->server++; - for (; query->try < channel->tries; query->try++) + /* We need to try each server channel->tries times. We have channel->nservers + * servers to try. In total, we need to do channel->nservers * channel->tries + * attempts. Use query->try to remember how many times we already attempted + * this query. Use modular arithmetic to find the next server to try. */ + while (++(query->try) < (channel->nservers * channel->tries)) { - for (; query->server < channel->nservers; query->server++) + struct server_state *server; + + /* Move on to the next server. */ + query->server = (query->server + 1) % channel->nservers; + server = &channel->servers[query->server]; + + /* We don't want to use this server if (1) we decided this + * connection is broken, and thus about to be closed, (2) + * we've decided to skip this server because of earlier + * errors we encountered, or (3) we already sent this query + * over this exact connection. + */ + if (!server->is_broken && + !query->server_info[query->server].skip_server && + !(query->using_tcp && + (query->server_info[query->server].tcp_connection_generation == + server->tcp_connection_generation))) { - struct server_state *server = &channel->servers[query->server]; - /* We don't want to use this server if (1) we decided this - * connection is broken, and thus about to be closed, (2) - * we've decided to skip this server because of earlier - * errors we encountered, or (3) we already sent this query - * over this exact connection. - */ - if (!server->is_broken && - !query->server_info[query->server].skip_server && - !(query->using_tcp && - (query->server_info[query->server].tcp_connection_generation == - server->tcp_connection_generation))) - { - ares__send_query(channel, query, now); - return; - } + ares__send_query(channel, query, now); + return; } - query->server = 0; /* You might think that with TCP we only need one try. However, * even when using TCP, servers can time-out our connection just @@ -702,6 +705,8 @@ static void next_server(ares_channel channel, struct query *query, * tickle a bug that drops our request. */ } + + /* If we are here, all attempts to perform query failed. */ end_query(channel, query, query->error_status, NULL, 0); } @@ -775,8 +780,7 @@ void ares__send_query(ares_channel channel, struct query *query, } query->timeout = *now; ares__timeadd(&query->timeout, - (query->try == 0) ? channel->timeout - : channel->timeout << query->try / channel->nservers); + channel->timeout << (query->try / channel->nservers)); /* Keep track of queries bucketed by timeout, so we can process * timeout events quickly. */ diff --git a/ares/ares_send.c b/ares/ares_send.c index a5811d36d..be5478df2 100644 --- a/ares/ares_send.c +++ b/ares/ares_send.c @@ -95,7 +95,13 @@ void ares_send(ares_channel channel, const unsigned char *qbuf, int qlen, /* Initialize query status. */ query->try = 0; - query->server = 0; + + /* Choose the server to send the query to. If rotation is enabled, keep track + * of the next server we want to use. */ + query->server = channel->last_server; + if (channel->rotate == 1) + channel->last_server = (channel->last_server + 1) % channel->nservers; + for (i = 0; i < channel->nservers; i++) { query->server_info[i].skip_server = 0;