ntp_intres.c 26.4 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 * ntp_intres.c - Implements a generic blocking worker child or thread,
 *		  initially to provide a nonblocking solution for DNS
 *		  name to address lookups available with getaddrinfo().
 *
 * This is a new implementation as of 2009 sharing the filename and
 * very little else with the prior implementation, which used a
 * temporary file to receive a single set of requests from the parent,
 * and a NTP mode 7 authenticated request to push back responses.
 *
 * A primary goal in rewriting this code was the need to support the
 * pool configuration directive's requirement to retrieve multiple
 * addresses resolving a single name, which has previously been
 * satisfied with blocking resolver calls from the ntpd mainline code.
 *
 * A secondary goal is to provide a generic mechanism for other
 * blocking operations to be delegated to a worker using a common
 * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
 * and work_thread.c implement the generic mechanism.  This file
 * implements the two current consumers, getaddrinfo_sometime() and the
 * presently unused getnameinfo_sometime().
 *
 * Both routines deliver results to a callback and manage memory
 * allocation, meaning there is no freeaddrinfo_sometime().
 *
 * The initial implementation for Unix uses a pair of unidirectional
 * pipes, one each for requests and responses, connecting the forked
 * blocking child worker with the ntpd mainline.  The threaded code
 * uses arrays of pointers to queue requests and responses.
 *
31 32 33
 * The parent drives the process, including scheduling sleeps between
 * retries.
 *
34 35 36 37 38 39 40 41 42 43 44
 * Memory is managed differently for a child process, which mallocs
 * request buffers to read from the pipe into, whereas the threaded
 * code mallocs a copy of the request to hand off to the worker via
 * the queueing array.  The resulting request buffer is free()d by
 * platform-independent code.  A wrinkle is the request needs to be
 * available to the requestor during response processing.
 *
 * Response memory allocation is also platform-dependent.  With a
 * separate process and pipes, the response is free()d after being
 * written to the pipe.  With threads, the same memory is handed
 * over and the requestor frees it after processing is completed.
45
 *
46 47
 * The code should be generalized to support threads on Unix using
 * much of the same code used for Windows initially.
48 49
 *
 */
Hal Murray's avatar
Hal Murray committed
50
#include <config.h>
51

52
#include "ntp_workimpl.h"
53

54
#ifdef USE_WORKER
55

56 57 58 59
#include <stdio.h>
#include <ctype.h>
#include <signal.h>

60
/**/
61
#include <sys/types.h>
62 63 64 65
#include <netinet/in.h>
#include <arpa/inet.h>
/**/
#ifdef HAVE_SYS_PARAM_H
66
# include <sys/param.h>
67 68
#endif

69 70 71 72
#if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
# ifdef HAVE_ARPA_NAMESER_H
#  include <arpa/nameser.h> /* DNS HEADER struct */
# endif
73
# include <netdb.h>
74
# include <resolv.h>
75 76
#endif

77 78
#include "ntp.h"
#include "ntp_debug.h"
79
#include "ntp_malloc.h"
80
#include "ntp_syslog.h"
81 82
#include "ntp_unixtime.h"
#include "ntp_intres.h"
83
#include "intreswork.h"
84 85 86


/*
87 88 89 90 91 92
 * Following are implementations of getaddrinfo_sometime() and
 * getnameinfo_sometime().  Each is implemented in three routines:
 *
 * getaddrinfo_sometime()		getnameinfo_sometime()
 * blocking_getaddrinfo()		blocking_getnameinfo()
 * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
93
 *
94 95 96 97 98 99 100
 * The first runs in the parent and marshalls (or serializes) request
 * parameters into a request blob which is processed in the child by
 * the second routine, blocking_*(), which serializes the results into
 * a response blob unpacked by the third routine, *_complete(), which
 * calls the callback routine provided with the request and frees
 * _request_ memory allocated by the first routine.  Response memory
 * is managed by the code which calls the *_complete routines.
101 102
 */

103 104
/* === typedefs === */
typedef struct blocking_gai_req_tag {	/* marshalled args */
105
	size_t			octets;
106
	u_int			dns_idx;
107 108 109 110 111 112 113 114 115 116 117 118 119
	time_t			scheduled;
	time_t			earliest;
	struct addrinfo		hints;
	int			retry;
	gai_sometime_callback	callback;
	void *			context;
	size_t			nodesize;
	size_t			servsize;
} blocking_gai_req;

typedef struct blocking_gai_resp_tag {
	size_t			octets;
	int			retcode;
120
	int			retry;
121 122 123 124 125 126 127 128 129 130
	int			gai_errno; /* for EAI_SYSTEM case */
	int			ai_count;
	/*
	 * Followed by ai_count struct addrinfo and then ai_count
	 * sockaddr_u and finally the canonical name strings.
	 */
} blocking_gai_resp;

typedef struct blocking_gni_req_tag {
	size_t			octets;
131
	u_int			dns_idx;
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
	time_t			scheduled;
	time_t			earliest;
	int			retry;
	size_t			hostoctets;
	size_t			servoctets;
	int			flags;
	gni_sometime_callback	callback;
	void *			context;
	sockaddr_u		socku;
} blocking_gni_req;

typedef struct blocking_gni_resp_tag {
	size_t			octets;
	int			retcode;
	int			gni_errno; /* for EAI_SYSTEM case */
147
	int			retry;
148 149 150 151 152 153 154
	size_t			hostoctets;
	size_t			servoctets;
	/*
	 * Followed by hostoctets bytes of null-terminated host,
	 * then servoctets bytes of null-terminated service.
	 */
} blocking_gni_resp;
155

156 157 158 159 160 161 162 163 164 165
/* per-DNS-worker state in parent */
typedef struct dnschild_ctx_tag {
	u_int	index;
	time_t	next_dns_timeslot;
} dnschild_ctx;

/* per-DNS-worker state in worker */
typedef struct dnsworker_ctx_tag {
	blocking_child *	c;
	time_t			ignore_scheduled_before;
166
#ifdef HAVE_RES_INIT
167
	time_t	next_res_init;
168
#endif
169
} dnsworker_ctx;
170 171


172 173 174 175 176
/* === variables === */
dnschild_ctx **		dnschild_contexts;		/* parent */
u_int			dnschild_contexts_alloc;
dnsworker_ctx **	dnsworker_contexts;		/* child */
u_int			dnsworker_contexts_alloc;
177

178 179 180
#ifdef HAVE_RES_INIT
static	time_t		next_res_init;
#endif
181

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200

/* === forward declarations === */
static	u_int		reserve_dnschild_ctx(void);
static	u_int		get_dnschild_ctx(void);
static	void		alloc_dnsworker_context(u_int);
/* static	void		free_dnsworker_context(u_int); */
static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
static	void		scheduled_sleep(time_t, time_t,
					dnsworker_ctx *);
static	void		manage_dns_retry_interval(time_t *, time_t *,
						  int *,
						  time_t *);
static	int		should_retry_dns(int, int);
#ifdef HAVE_RES_INIT
static	void		reload_resolv_conf(dnsworker_ctx *);
#else
# define		reload_resolv_conf(wc)		\
	do {						\
		(void)(wc);				\
201
	} while (false)
202 203 204 205 206 207 208 209 210 211
#endif
static	void		getaddrinfo_sometime_complete(blocking_work_req,
						      void *, size_t,
						      void *);
static	void		getnameinfo_sometime_complete(blocking_work_req,
						      void *, size_t,
						      void *);


/* === functions === */
212
/*
213 214
 * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
 *			  invokes provided callback completion function.
215
 */
216 217 218 219 220
int
getaddrinfo_sometime(
	const char *		node,
	const char *		service,
	const struct addrinfo *	hints,
221
	int			retry,
222 223 224 225 226
	gai_sometime_callback	callback,
	void *			context
	)
{
	blocking_gai_req *	gai_req;
227 228
	u_int			idx;
	dnschild_ctx *		child_ctx;
229 230 231 232 233 234 235 236 237 238 239 240
	size_t			req_size;
	size_t			nodesize;
	size_t			servsize;
	time_t			now;
	
	NTP_REQUIRE(NULL != node);
	if (NULL != hints) {
		NTP_REQUIRE(0 == hints->ai_addrlen);
		NTP_REQUIRE(NULL == hints->ai_addr);
		NTP_REQUIRE(NULL == hints->ai_canonname);
		NTP_REQUIRE(NULL == hints->ai_next);
	}
241

242 243 244
	idx = get_dnschild_ctx();
	child_ctx = dnschild_contexts[idx];

245 246 247 248
	nodesize = strlen(node) + 1;
	servsize = strlen(service) + 1;
	req_size = sizeof(*gai_req) + nodesize + servsize;

249
	gai_req = emalloc_zero(req_size);
250 251

	gai_req->octets = req_size;
252
	gai_req->dns_idx = idx;
253 254
	now = time(NULL);
	gai_req->scheduled = now;
255 256
	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
	child_ctx->next_dns_timeslot = gai_req->earliest;
257
	if (hints != NULL)
258
		gai_req->hints = *hints;
259
	gai_req->retry = retry;
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
	gai_req->callback = callback;
	gai_req->context = context;
	gai_req->nodesize = nodesize;
	gai_req->servsize = servsize;

	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
	       servsize);

	if (queue_blocking_request(
		BLOCKING_GETADDRINFO,
		gai_req,
		req_size, 
		&getaddrinfo_sometime_complete, 
		gai_req)) {

		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
		errno = EFAULT;
		return -1;
	}
280

281 282
	return 0;
}
283

284 285
int
blocking_getaddrinfo(
286
	blocking_child *	c,
287 288
	blocking_pipe_header *	req
	)
289
{
290
	blocking_gai_req *	gai_req;
291
	dnsworker_ctx *		worker_ctx;
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
	blocking_pipe_header *	resp;
	blocking_gai_resp *	gai_resp;
	char *			node;
	char *			service;
	struct addrinfo *	ai_res;
	struct addrinfo *	ai;
	struct addrinfo *	serialized_ai;
	size_t			canons_octets;
	size_t			this_octets;
	size_t			resp_octets;
	char *			cp;
	time_t			time_now;

	gai_req = (void *)((char *)req + sizeof(*req));
	node = (char *)gai_req + sizeof(*gai_req);
	service = node + gai_req->nodesize;

309 310 311 312
	worker_ctx = get_worker_context(c, gai_req->dns_idx);
	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
			worker_ctx);
	reload_resolv_conf(worker_ctx);
313

314
	/*
315 316 317
	 * Take a shot at the final size, better to overestimate
	 * at first and then realloc to a smaller size.
	 */
318

319 320 321 322 323
	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
		      16 * (sizeof(struct addrinfo) +
			    sizeof(sockaddr_u)) +
		      256;
	resp = emalloc_zero(resp_octets);
324
	gai_resp = (void *)(resp + 1);
325

326 327 328
	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n", 
		  node, service, gai_req->hints.ai_family,
		  gai_req->hints.ai_flags));
329 330 331 332
#ifdef DEBUG
	if (debug >= 2)
		fflush(stdout);
#endif	
333
	ai_res = NULL;
334 335 336
	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
					&ai_res);
	gai_resp->retry = gai_req->retry;
337
#ifdef EAI_SYSTEM
338 339 340 341 342 343 344
	if (EAI_SYSTEM == gai_resp->retcode) {
	    if (EAGAIN == errno) {
		msyslog(LOG_ERR, "EAI_SYSTEM/EAGAIN from getaddrinfo, probably out of (locked) memory");
		exit(1);
	    }
	    gai_resp->gai_errno = errno;
	}
345
#endif
346
	canons_octets = 0;
347

348
	if (0 == gai_resp->retcode) {
349 350 351 352 353 354 355 356 357 358 359 360 361 362
		ai = ai_res;
		while (NULL != ai) {
			gai_resp->ai_count++;
			if (ai->ai_canonname)
				canons_octets += strlen(ai->ai_canonname) + 1;
			ai = ai->ai_next;
		}
		/*
		 * If this query succeeded only after retrying, DNS may have
		 * just become responsive.  Ignore previously-scheduled
		 * retry sleeps once for each pending request, similar to
		 * the way scheduled_sleep() does when its worker_sleep()
		 * is interrupted.
		 */
363
		if (gai_resp->retry > INITIAL_DNS_RETRY) {
364
			time_now = time(NULL);
365
			worker_ctx->ignore_scheduled_before = time_now;
366 367
			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
				  humantime(time_now)));
368 369 370 371
		}
	}

	/*
372 373 374
	 * Our response consists of a header, followed by ai_count 
	 * addrinfo structs followed by ai_count sockaddr_storage 
	 * structs followed by the canonical names.
375
	 */
376 377 378 379 380
	gai_resp->octets = sizeof(*gai_resp)
			    + gai_resp->ai_count
				* (sizeof(gai_req->hints)
				   + sizeof(sockaddr_u))
			    + canons_octets;
381

382
	resp_octets = sizeof(*resp) + gai_resp->octets;
383
	resp = erealloc(resp, resp_octets);
384
	gai_resp = (void *)(resp + 1);
385

386 387 388 389
	/* cp serves as our current pointer while serializing */
	cp = (void *)(gai_resp + 1);
	canons_octets = 0;

390
	if (0 == gai_resp->retcode) {
391 392 393 394 395 396 397 398 399 400 401 402 403
		ai = ai_res;
		while (NULL != ai) {
			memcpy(cp, ai, sizeof(*ai));
			serialized_ai = (void *)cp;
			cp += sizeof(*ai);

			/* transform ai_canonname into offset */
			if (NULL != serialized_ai->ai_canonname) {
				serialized_ai->ai_canonname = (char *)canons_octets;
				canons_octets += strlen(ai->ai_canonname) + 1;
			}
			
			/* leave fixup of ai_addr pointer for receiver */
404

405 406
			ai = ai->ai_next;
		}
407

408 409 410 411 412
		ai = ai_res;
		while (NULL != ai) {
			NTP_INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
			cp += sizeof(sockaddr_u);
413

414
			ai = ai->ai_next;
415 416
		}

417 418 419 420 421 422
		ai = ai_res;
		while (NULL != ai) {
			if (NULL != ai->ai_canonname) {
				this_octets = strlen(ai->ai_canonname) + 1;
				memcpy(cp, ai->ai_canonname, this_octets);
				cp += this_octets;
423
			}
424

425 426
			ai = ai->ai_next;
		}
427
		freeaddrinfo(ai_res);
428 429
	}

430 431 432
	/*
	 * make sure our walk and earlier calc match
	 */
433
	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
434

435 436
	if (queue_blocking_response(c, resp, resp_octets, req)) {
		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
437 438
		return -1;
	}
439 440 441

	return 0;
}
442 443 444


static void
445 446 447 448 449 450
getaddrinfo_sometime_complete(
	blocking_work_req	rtype,
	void *			context,
	size_t			respsize,
	void *			resp
	)
451
{
452 453 454 455 456 457 458 459 460 461 462 463 464 465
	blocking_gai_req *	gai_req;
	blocking_gai_resp *	gai_resp;
	dnschild_ctx *		child_ctx;
	struct addrinfo *	ai;
	struct addrinfo *	next_ai;
	sockaddr_u *		psau;
	char *			node;
	char *			service;
	char *			canon_start;
	time_t			time_now;
	int			again;
	int			af;
	const char *		fam_spec;
	int			i;
466 467 468 469

	gai_req = context;
	gai_resp = resp;

470 471
	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
	DEBUG_REQUIRE(respsize == gai_resp->octets);
472 473 474 475

	node = (char *)gai_req + sizeof(*gai_req);
	service = node + gai_req->nodesize;

476 477 478 479 480 481 482 483 484 485
	child_ctx = dnschild_contexts[gai_req->dns_idx];

	if (0 == gai_resp->retcode) {
		/*
		 * If this query succeeded only after retrying, DNS may have
		 * just become responsive.
		 */
		if (gai_resp->retry > INITIAL_DNS_RETRY) {
			time_now = time(NULL);
			child_ctx->next_dns_timeslot = time_now;
486 487
			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
				  gai_req->dns_idx, humantime(time_now)));
488 489 490 491
		}
	} else {
		again = should_retry_dns(gai_resp->retcode,
					 gai_resp->gai_errno);
492 493 494
		/*
		 * exponential backoff of DNS retries to 64s
		 */
495
		if (gai_req->retry > 0 && again) {
496 497 498 499 500 501 502 503 504 505
			/* log the first retry only */
			if (INITIAL_DNS_RETRY == gai_req->retry)
				NLOG(NLOG_SYSINFO) {
					af = gai_req->hints.ai_family;
					fam_spec = (AF_INET6 == af)
						       ? " (AAAA)"
						       : (AF_INET == af)
							     ? " (A)"
							     : "";
#ifdef EAI_SYSTEM
506 507
					if (EAI_SYSTEM == gai_resp->retcode) {
						errno = gai_resp->gai_errno;
508
						msyslog(LOG_INFO,
509
							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
510 511
							node, fam_spec,
							gai_resp->gai_errno);
512
					} else
513 514 515 516 517 518 519 520
#endif
						msyslog(LOG_INFO,
							"retrying DNS %s%s: %s (%d)",
							node, fam_spec,
							gai_strerror(gai_resp->retcode),
							gai_resp->retcode);
				}
			manage_dns_retry_interval(&gai_req->scheduled,
521 522
			    &gai_req->earliest, &gai_req->retry,
			    &child_ctx->next_dns_timeslot);
523 524 525 526 527 528 529 530
			if (!queue_blocking_request(
					BLOCKING_GETADDRINFO,
					gai_req,
					gai_req->octets,
					&getaddrinfo_sometime_complete,
					gai_req))
				return;
			else
531 532 533
				msyslog(LOG_ERR,
					"unable to retry hostname %s",
					node);
534 535
		}
	}
536 537

	/*
538
	 * fixup pointers in returned addrinfo array
539
	 */
540 541 542 543 544
	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
	next_ai = NULL;
	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
		ai[i].ai_next = next_ai;
		next_ai = &ai[i];
545 546
	}

547 548
	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
549

550 551 552 553 554 555 556
	for (i = 0; i < gai_resp->ai_count; i++) {
		if (NULL != ai[i].ai_addr)
			ai[i].ai_addr = &psau->sa;
		psau++;
		if (NULL != ai[i].ai_canonname)
			ai[i].ai_canonname += (size_t)canon_start;
	}
557

558
	NTP_ENSURE((char *)psau == canon_start);
559

560 561 562 563 564 565
	if (!gai_resp->ai_count)
		ai = NULL;
	
	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
			     gai_req->context, node, service, 
			     &gai_req->hints, ai);
566

567
	free(gai_req);
568
	/* gai_resp is part of block freed by process_blocking_resp() */
569 570 571
}


572 573 574 575 576 577 578 579
int
getnameinfo_sometime(
	sockaddr_u *		psau,
	size_t			hostoctets,
	size_t			servoctets,
	int			flags,
	gni_sometime_callback	callback,
	void *			context
580 581
	)
{
582
	blocking_gni_req *	gni_req;
583 584
	u_int			idx;
	dnschild_ctx *		child_ctx;
585 586 587 588 589
	time_t			time_now;
	
	NTP_REQUIRE(hostoctets);
	NTP_REQUIRE(hostoctets + servoctets < 1024);

590 591 592
	idx = get_dnschild_ctx();
	child_ctx = dnschild_contexts[idx];

593
	gni_req = emalloc_zero(sizeof(*gni_req));
594 595

	gni_req->octets = sizeof(*gni_req);
596
	gni_req->dns_idx = idx;
597 598
	time_now = time(NULL);
	gni_req->scheduled = time_now;
599 600
	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
	child_ctx->next_dns_timeslot = gni_req->earliest;
601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
	gni_req->hostoctets = hostoctets;
	gni_req->servoctets = servoctets;
	gni_req->flags = flags;
	gni_req->retry = INITIAL_DNS_RETRY;
	gni_req->callback = callback;
	gni_req->context = context;

	if (queue_blocking_request(
		BLOCKING_GETNAMEINFO,
		gni_req,
		sizeof(*gni_req), 
		&getnameinfo_sometime_complete, 
		gni_req)) {

		msyslog(LOG_ERR, "unable to queue getnameinfo request");
		errno = EFAULT;
		return -1;
619
	}
620

621
	return 0;
622 623 624
}


625 626
int
blocking_getnameinfo(
627
	blocking_child *	c,
628 629
	blocking_pipe_header *	req
	)
630
{
631
	blocking_gni_req *	gni_req;
632
	dnsworker_ctx *		worker_ctx;
633 634 635 636 637 638 639 640
	blocking_pipe_header *	resp;
	blocking_gni_resp *	gni_resp;
	size_t			octets;
	size_t			resp_octets;
	char *			service;
	char *			cp;
	int			rc;
	time_t			time_now;
641
	char			host[1024];
642

643
	gni_req = (void *)((char *)req + sizeof(*req));
Jean-Francois Boudreault's avatar
Jean-Francois Boudreault committed
644

645
	octets = gni_req->hostoctets + gni_req->servoctets;
646 647

	/*
648 649 650
	 * Some alloca() implementations are fragile regarding
	 * large allocations.  We only need room for the host
	 * and service names.
651
	 */
652
	NTP_REQUIRE(octets < sizeof(host));
653
	service = host + gni_req->hostoctets;
Jean-Francois Boudreault's avatar
Jean-Francois Boudreault committed
654

655 656 657 658
	worker_ctx = get_worker_context(c, gni_req->dns_idx);
	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
			worker_ctx);
	reload_resolv_conf(worker_ctx);
659

660
	/*
661 662
	 * Take a shot at the final size, better to overestimate
	 * then realloc to a smaller size.
663
	 */
664

665
	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
666
	resp = emalloc_zero(resp_octets);
667
	gni_resp = (void *)((char *)resp + sizeof(*resp));
668

669 670 671
	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
		  stoa(&gni_req->socku), gni_req->flags,
		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
672
	
673 674 675 676 677 678 679
	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
					SOCKLEN(&gni_req->socku),
					host,
					gni_req->hostoctets,
					service,
					gni_req->servoctets,
					gni_req->flags);
680
	gni_resp->retry = gni_req->retry;
681
#ifdef EAI_SYSTEM
682
	if (EAI_SYSTEM == gni_resp->retcode)
683 684
		gni_resp->gni_errno = errno;
#endif
685

686
	if (0 != gni_resp->retcode) {
687 688
		gni_resp->hostoctets = 0;
		gni_resp->servoctets = 0;
Dave Hart's avatar
Dave Hart committed
689
	} else {
690 691 692 693 694 695 696 697 698 699 700
		gni_resp->hostoctets = strlen(host) + 1;
		gni_resp->servoctets = strlen(service) + 1;
		/*
		 * If this query succeeded only after retrying, DNS may have
		 * just become responsive.  Ignore previously-scheduled
		 * retry sleeps once for each pending request, similar to
		 * the way scheduled_sleep() does when its worker_sleep()
		 * is interrupted.
		 */
		if (gni_req->retry > INITIAL_DNS_RETRY) {
			time_now = time(NULL);
701
			worker_ctx->ignore_scheduled_before = time_now;
702
			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
703
				humantime(time_now)));
Dave Hart's avatar
Dave Hart committed
704 705
		}
	}
706
	octets = gni_resp->hostoctets + gni_resp->servoctets;
707
	/*
708 709
	 * Our response consists of a header, followed by the host and
	 * service strings, each null-terminated.
710
	 */
711
	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
712

713 714
	resp = erealloc(resp, resp_octets);
	gni_resp = (void *)(resp + 1);
715

716
	gni_resp->octets = sizeof(*gni_resp) + octets;
717

718 719
	/* cp serves as our current pointer while serializing */
	cp = (void *)(gni_resp + 1);
720

721
	if (0 == gni_resp->retcode) {
722 723 724 725 726
		memcpy(cp, host, gni_resp->hostoctets);
		cp += gni_resp->hostoctets;
		memcpy(cp, service, gni_resp->servoctets);
		cp += gni_resp->servoctets;
	}
727

728 729
	NTP_INSIST((size_t)(cp - (char *)resp) == resp_octets);
	NTP_INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
730

731
	rc = queue_blocking_response(c, resp, resp_octets, req);
732 733 734 735
	if (rc)
		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
	return rc;
}
736 737


738 739 740 741 742 743 744 745 746 747
static void
getnameinfo_sometime_complete(
	blocking_work_req	rtype,
	void *			context,
	size_t			respsize,
	void *			resp
	)
{
	blocking_gni_req *	gni_req;
	blocking_gni_resp *	gni_resp;
748
	dnschild_ctx *		child_ctx;
749 750
	char *			host;
	char *			service;
751
	time_t			time_now;
752
	int			again;
753

754 755
	gni_req = context;
	gni_resp = resp;
756

757 758
	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
	DEBUG_REQUIRE(respsize == gni_resp->octets);
759

760 761 762 763 764 765 766 767 768 769
	child_ctx = dnschild_contexts[gni_req->dns_idx];

	if (0 == gni_resp->retcode) {
		/*
		 * If this query succeeded only after retrying, DNS may have
		 * just become responsive.
		 */
		if (gni_resp->retry > INITIAL_DNS_RETRY) {
			time_now = time(NULL);
			child_ctx->next_dns_timeslot = time_now;
770 771
			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
				  gni_req->dns_idx, humantime(time_now)));
772 773
		}
	} else {
774 775 776 777
		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
		/*
		 * exponential backoff of DNS retries to 64s
		 */
778
		if (gni_req->retry > 0)
779
			manage_dns_retry_interval(&gni_req->scheduled,
780 781
			    &gni_req->earliest, &gni_req->retry,
			    &child_ctx->next_dns_timeslot);
782

783
		if (gni_req->retry > 0 && again) {
784 785 786 787 788 789 790 791 792
			if (!queue_blocking_request(
				BLOCKING_GETNAMEINFO,
				gni_req,
				gni_req->octets, 
				&getnameinfo_sometime_complete, 
				gni_req))
				return;

			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
793 794 795
		}
	}

796 797 798 799 800 801 802 803 804
	if (!gni_resp->hostoctets) {
		host = NULL;
		service = NULL;
	} else {
		host = (char *)gni_resp + sizeof(*gni_resp);
		service = (gni_resp->servoctets) 
			      ? host + gni_resp->hostoctets
			      : NULL;
	}
805

806 807 808
	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
			     &gni_req->socku, gni_req->flags, host,
			     service, gni_req->context);
809

810
	free(gni_req);
811
	/* gni_resp is part of block freed by process_blocking_resp() */
812
}
813 814


815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
#ifdef HAVE_RES_INIT
static void
reload_resolv_conf(
	dnsworker_ctx *	worker_ctx
	)
{
	time_t	time_now;

	/*
	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
	 * to pick up on changes from the DHCP client.  [Bug 1226]
	 * When using threads for the workers, this needs to happen
	 * only once per minute process-wide.
	 */
	time_now = time(NULL);
830
# ifdef USE_WORK_THREAD
831 832 833 834 835 836
	worker_ctx->next_res_init = next_res_init;
# endif
	if (worker_ctx->next_res_init <= time_now) {
		if (worker_ctx->next_res_init != 0)
			res_init();
		worker_ctx->next_res_init = time_now + 60;
837
# ifdef USE_WORK_THREAD
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
		next_res_init = worker_ctx->next_res_init;
# endif
	}
}
#endif	/* HAVE_RES_INIT */


static u_int
reserve_dnschild_ctx(void)
{
	const size_t	ps = sizeof(dnschild_contexts[0]);
	const size_t	cs = sizeof(*dnschild_contexts[0]);
	u_int		c;
	u_int		new_alloc;
	size_t		octets;
	size_t		new_octets;

	c = 0;
856
	while (true) {
857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
		for ( ; c < dnschild_contexts_alloc; c++) {
			if (NULL == dnschild_contexts[c]) {
				dnschild_contexts[c] = emalloc_zero(cs);

				return c;
			}
		}
		new_alloc = dnschild_contexts_alloc + 20;
		new_octets = new_alloc * ps;
		octets = dnschild_contexts_alloc * ps;
		dnschild_contexts = erealloc_zero(dnschild_contexts,
						  new_octets, octets);
		dnschild_contexts_alloc = new_alloc;
	}
}


static u_int
get_dnschild_ctx(void)
{
	static u_int	shared_ctx = UINT_MAX;

	if (worker_per_query)
		return reserve_dnschild_ctx();

	if (UINT_MAX == shared_ctx)
		shared_ctx = reserve_dnschild_ctx();

	return shared_ctx;
}


static void
alloc_dnsworker_context(
	u_int idx
	)
{
	const size_t worker_context_sz = sizeof(*dnsworker_contexts[0]);

	REQUIRE(NULL == dnsworker_contexts[idx]);
	dnsworker_contexts[idx] = emalloc_zero(worker_context_sz);
}


static dnsworker_ctx *
get_worker_context(
	blocking_child *	c,
	u_int			idx
	)
{
	static size_t	ps = sizeof(dnsworker_contexts[0]);
	u_int	min_new_alloc;
	u_int	new_alloc;
	size_t	octets;
	size_t	new_octets;

	if (dnsworker_contexts_alloc <= idx) {
		min_new_alloc = 1 + idx;
		/* round new_alloc up to nearest multiple of 4 */
		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
		new_octets = new_alloc * ps;
		octets = dnsworker_contexts_alloc * ps;
		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
						   new_octets, octets);
		dnsworker_contexts_alloc = new_alloc;
	}

	if (NULL == dnsworker_contexts[idx])
		alloc_dnsworker_context(idx);
	ZERO(*dnsworker_contexts[idx]);
	dnsworker_contexts[idx]->c = c;

	return dnsworker_contexts[idx];
}


933
static void
934
scheduled_sleep(
935 936 937
	time_t		scheduled,
	time_t		earliest,
	dnsworker_ctx *	worker_ctx
938 939
	)
{
940
	time_t now;
941

942
	if (scheduled < worker_ctx->ignore_scheduled_before) {
943 944 945
		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
			  humantime(earliest), humantime(scheduled),
			  humantime(worker_ctx->ignore_scheduled_before)));
946 947
		return;
	}
948

949 950 951
	now = time(NULL);

	if (now < earliest) {
952 953 954
		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
			  humantime(earliest), humantime(scheduled),
			  humantime(worker_ctx->ignore_scheduled_before)));
955
		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
956 957
			/* our sleep was interrupted */
			now = time(NULL);
958
			worker_ctx->ignore_scheduled_before = now;
959
#ifdef HAVE_RES_INIT
960 961
			worker_ctx->next_res_init = now + 60;
			next_res_init = worker_ctx->next_res_init;
962 963
			res_init();
#endif
964 965
			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
				  humantime(worker_ctx->ignore_scheduled_before)));
966 967 968 969 970 971
		}
	}
}


/*
972 973 974
 * manage_dns_retry_interval is a helper used by
 * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
 * to calculate the new retry interval and schedule the next query.
975 976
 */
static void
977 978 979
manage_dns_retry_interval(
	time_t *	pscheduled,
	time_t *	pwhen,
980 981
	int *		pretry,
	time_t *	pnext_timeslot
982 983
	)
{
984 985 986 987 988 989
	time_t	now;
	time_t	when;
	int	retry;
		
	now = time(NULL);
	retry = *pretry;
990 991
	when = max(now + retry, *pnext_timeslot);
	*pnext_timeslot = when;
992 993 994 995 996 997
	retry = min(64, retry << 1);

	*pscheduled = now;
	*pwhen = when;
	*pretry = retry;
}
998

999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
/*
 * should_retry_dns is a helper used by getaddrinfo_sometime_complete
 * and getnameinfo_sometime_complete which implements ntpd's DNS retry
 * policy.
 */
static int
should_retry_dns(
	int	rescode,
	int	res_errno
	)
{
	static int	eai_again_seen;
	int		again;
1012 1013 1014
#if defined (EAI_SYSTEM) && defined(DEBUG)
	char		msg[256];
#endif
1015

1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
	/*
	 * If the resolver failed, see if the failure is
	 * temporary. If so, return success.
	 */
	again = 0;

	switch (rescode) {

	case EAI_FAIL:
		again = 1;
		break;

	case EAI_AGAIN:
		again = 1;
		eai_again_seen = 1;		/* [Bug 1178] */
		break;

	case EAI_NONAME:
#if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
	case EAI_NODATA:
1036
#endif
1037 1038
		again = !eai_again_seen;	/* [Bug 1178] */
		break;
1039

1040 1041 1042 1043 1044 1045 1046 1047
#ifdef EAI_SYSTEM
	case EAI_SYSTEM:
		/* 
		 * EAI_SYSTEM means the real error is in errno.  We should be more
		 * discriminating about which errno values require retrying, but
		 * this matches existing behavior.
		 */
		again = 1;
1048 1049
# ifdef DEBUG
		errno_to_str(res_errno, msg, sizeof(msg));
1050 1051
		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
			  res_errno, msg));
1052
# endif
1053
		break;
1054 1055
#endif
	}
1056

1057 1058
	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
		  gai_strerror(rescode), rescode, again ? "" : "not "));
1059 1060

	return again;
1061
}
1062

1063
#else	/* !USE_WORKER follows */
1064 1065
int ntp_intres_nonempty_compilation_unit;
#endif