pack-objects.c 89.4 KB
Newer Older
1
#include "builtin.h"
2
#include "cache.h"
3
#include "repository.h"
4
#include "config.h"
5
#include "attr.h"
6
#include "object.h"
7 8 9 10
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
11
#include "delta.h"
12
#include "pack.h"
13
#include "pack-revindex.h"
14
#include "csum-file.h"
15
#include "tree-walk.h"
16 17 18
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
19 20
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
21
#include "pack-objects.h"
22
#include "progress.h"
23
#include "refs.h"
24
#include "streaming.h"
25
#include "thread-utils.h"
26
#include "pack-bitmap.h"
27 28
#include "reachable.h"
#include "sha1-array.h"
Jeff King's avatar
Jeff King committed
29
#include "argv-array.h"
30
#include "list.h"
31
#include "packfile.h"
32
#include "object-store.h"
33
#include "dir.h"
34

35
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
36 37
#define SIZE(obj) oe_size(&to_pack, obj)
#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
38
#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
39 40 41 42
#define DELTA(obj) oe_delta(&to_pack, obj)
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
43
#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
44 45
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
46

47
static const char *pack_usage[] = {
48 49
	N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
	N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
50 51
	NULL
};
52

53
/*
54 55 56
 * Objects we are going to pack are collected in the `to_pack` structure.
 * It contains an array (dynamically expanded) of the object data, and a map
 * that can resolve SHA1s to their position in the array.
57
 */
58 59
static struct packing_data to_pack;

60
static struct pack_idx_entry **written_list;
61
static uint32_t nr_result, nr_written, nr_seen;
62

63
static int non_empty;
64
static int reuse_delta = 1, reuse_object = 1;
65
static int keep_unreachable, unpack_unreachable, include_tag;
66
static timestamp_t unpack_unreachable_expiration;
67
static int pack_loose_unreachable;
68
static int local;
69
static int have_non_local_packs;
70
static int incremental;
71 72
static int ignore_packed_keep_on_disk;
static int ignore_packed_keep_in_core;
73
static int allow_ofs_delta;
74
static struct pack_idx_option pack_idx_opts;
75
static const char *base_name;
76
static int progress = 1;
77
static int window = 10;
78
static unsigned long pack_size_limit;
79
static int depth = 50;
80
static int delta_search_threads;
81
static int pack_to_stdout;
82
static int num_preferred_base;
83
static struct progress *progress_state;
84

85 86 87 88
static struct packed_git *reuse_packfile;
static uint32_t reuse_packfile_objects;
static off_t reuse_packfile_offset;

89 90
static int use_bitmap_index_default = 1;
static int use_bitmap_index = -1;
91
static int write_bitmap_index;
92
static uint16_t write_bitmap_options;
93

94 95
static int exclude_promisor_objects;

96
static unsigned long delta_cache_size = 0;
97
static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
98
static unsigned long cache_max_small_delta_size = 1000;
99

100 101
static unsigned long window_memory_limit = 0;

102 103 104
static struct list_objects_filter_options filter_options;

enum missing_action {
105 106 107
	MA_ERROR = 0,      /* fail if any missing objects are encountered */
	MA_ALLOW_ANY,      /* silently allow ALL missing objects */
	MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
108 109 110 111
};
static enum missing_action arg_missing_action;
static show_object_fn fn_show_object;

112 113 114
/*
 * stats
 */
115 116
static uint32_t written, written_delta;
static uint32_t reused, reused_delta;
117

118 119 120 121 122 123 124 125 126 127 128
/*
 * Indexed commits
 */
static struct commit **indexed_commits;
static unsigned int indexed_commits_nr;
static unsigned int indexed_commits_alloc;

static void index_commit_for_bitmap(struct commit *commit)
{
	if (indexed_commits_nr >= indexed_commits_alloc) {
		indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
129
		REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
130 131 132 133
	}

	indexed_commits[indexed_commits_nr++] = commit;
}
134

135
static void *get_delta(struct object_entry *entry)
136
{
137 138
	unsigned long size, base_size, delta_size;
	void *buf, *base_buf, *delta_buf;
139
	enum object_type type;
140

141
	buf = read_object_file(&entry->idx.oid, &type, &size);
142
	if (!buf)
143
		die("unable to read %s", oid_to_hex(&entry->idx.oid));
144 145
	base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
				    &base_size);
146
	if (!base_buf)
147
		die("unable to read %s",
148
		    oid_to_hex(&DELTA(entry)->idx.oid));
149
	delta_buf = diff_delta(base_buf, base_size,
150
			       buf, size, &delta_size, 0);
151
	if (!delta_buf || delta_size != DELTA_SIZE(entry))
Junio C Hamano's avatar
Junio C Hamano committed
152
		die("delta size changed");
153 154
	free(buf);
	free(base_buf);
155 156 157
	return delta_buf;
}

158 159
static unsigned long do_compress(void **pptr, unsigned long size)
{
160
	git_zstream stream;
161 162 163
	void *in, *out;
	unsigned long maxsize;

164
	git_deflate_init(&stream, pack_compression_level);
165
	maxsize = git_deflate_bound(&stream, size);
166 167 168 169 170 171 172 173 174

	in = *pptr;
	out = xmalloc(maxsize);
	*pptr = out;

	stream.next_in = in;
	stream.avail_in = size;
	stream.next_out = out;
	stream.avail_out = maxsize;
175
	while (git_deflate(&stream, Z_FINISH) == Z_OK)
176
		; /* nothing */
177
	git_deflate_end(&stream);
178 179 180 181 182

	free(in);
	return stream.total_out;
}

183
static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
184
					   const struct object_id *oid)
185 186 187 188 189 190 191 192 193 194 195 196 197
{
	git_zstream stream;
	unsigned char ibuf[1024 * 16];
	unsigned char obuf[1024 * 16];
	unsigned long olen = 0;

	git_deflate_init(&stream, pack_compression_level);

	for (;;) {
		ssize_t readlen;
		int zret = Z_OK;
		readlen = read_istream(st, ibuf, sizeof(ibuf));
		if (readlen == -1)
198
			die(_("unable to read %s"), oid_to_hex(oid));
199 200 201 202 203 204 205 206

		stream.next_in = ibuf;
		stream.avail_in = readlen;
		while ((stream.avail_in || readlen == 0) &&
		       (zret == Z_OK || zret == Z_BUF_ERROR)) {
			stream.next_out = obuf;
			stream.avail_out = sizeof(obuf);
			zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
207
			hashwrite(f, obuf, stream.next_out - obuf);
208 209 210 211 212 213 214 215 216 217 218 219 220 221
			olen += stream.next_out - obuf;
		}
		if (stream.avail_in)
			die(_("deflate error (%d)"), zret);
		if (readlen == 0) {
			if (zret != Z_STREAM_END)
				die(_("deflate error (%d)"), zret);
			break;
		}
	}
	git_deflate_end(&stream);
	return olen;
}

222 223 224 225
/*
 * we are going to reuse the existing object data as is.  make
 * sure it is not corrupt.
 */
226 227
static int check_pack_inflate(struct packed_git *p,
		struct pack_window **w_curs,
228 229
		off_t offset,
		off_t len,
230 231
		unsigned long expect)
{
232
	git_zstream stream;
233 234 235 236
	unsigned char fakebuf[4096], *in;
	int st;

	memset(&stream, 0, sizeof(stream));
237
	git_inflate_init(&stream);
238 239 240 241 242
	do {
		in = use_pack(p, w_curs, offset, &stream.avail_in);
		stream.next_in = in;
		stream.next_out = fakebuf;
		stream.avail_out = sizeof(fakebuf);
243
		st = git_inflate(&stream, Z_FINISH);
244 245
		offset += stream.next_in - in;
	} while (st == Z_OK || st == Z_BUF_ERROR);
246
	git_inflate_end(&stream);
247 248 249 250 251
	return (st == Z_STREAM_END &&
		stream.total_out == expect &&
		stream.total_in == len) ? 0 : -1;
}

252
static void copy_pack_data(struct hashfile *f,
253 254
		struct packed_git *p,
		struct pack_window **w_curs,
255 256
		off_t offset,
		off_t len)
257 258
{
	unsigned char *in;
259
	unsigned long avail;
260 261 262 263

	while (len) {
		in = use_pack(p, w_curs, offset, &avail);
		if (avail > len)
264
			avail = (unsigned long)len;
265
		hashwrite(f, in, avail);
266 267 268 269 270
		offset += avail;
		len -= avail;
	}
}

271
/* Return 0 if we will bust the pack-size limit */
272
static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
273
					   unsigned long limit, int usable_delta)
274
{
275
	unsigned long size, datalen;
276 277
	unsigned char header[MAX_PACK_OBJECT_HEADER],
		      dheader[MAX_PACK_OBJECT_HEADER];
278
	unsigned hdrlen;
279
	enum object_type type;
280
	void *buf;
281
	struct git_istream *st = NULL;
282
	const unsigned hashsz = the_hash_algo->rawsz;
283 284

	if (!usable_delta) {
285
		if (oe_type(entry) == OBJ_BLOB &&
286
		    oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
287
		    (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL)
288 289
			buf = NULL;
		else {
290
			buf = read_object_file(&entry->idx.oid, &type, &size);
291
			if (!buf)
292 293
				die(_("unable to read %s"),
				    oid_to_hex(&entry->idx.oid));
294
		}
295 296 297 298
		/*
		 * make sure no cached delta data remains from a
		 * previous attempt before a pack split occurred.
		 */
299
		FREE_AND_NULL(entry->delta_data);
300 301
		entry->z_delta_size = 0;
	} else if (entry->delta_data) {
302
		size = DELTA_SIZE(entry);
303 304
		buf = entry->delta_data;
		entry->delta_data = NULL;
305
		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
306 307 308
			OBJ_OFS_DELTA : OBJ_REF_DELTA;
	} else {
		buf = get_delta(entry);
309
		size = DELTA_SIZE(entry);
310
		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
311 312 313
			OBJ_OFS_DELTA : OBJ_REF_DELTA;
	}

314 315 316
	if (st)	/* large blob case, just assume we don't compress well */
		datalen = size;
	else if (entry->z_delta_size)
317 318 319 320 321 322 323 324
		datalen = entry->z_delta_size;
	else
		datalen = do_compress(&buf, size);

	/*
	 * The object header is a byte of 'type' followed by zero or
	 * more bytes of length.
	 */
325 326
	hdrlen = encode_in_pack_object_header(header, sizeof(header),
					      type, size);
327 328 329 330 331 332 333

	if (type == OBJ_OFS_DELTA) {
		/*
		 * Deltas with relative base contain an additional
		 * encoding of the relative offset for the delta
		 * base from this object's position in the pack.
		 */
334
		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
335 336 337 338
		unsigned pos = sizeof(dheader) - 1;
		dheader[pos] = ofs & 127;
		while (ofs >>= 7)
			dheader[--pos] = 128 | (--ofs & 127);
339
		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
340 341
			if (st)
				close_istream(st);
342 343 344
			free(buf);
			return 0;
		}
345 346
		hashwrite(f, header, hdrlen);
		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
347 348 349 350
		hdrlen += sizeof(dheader) - pos;
	} else if (type == OBJ_REF_DELTA) {
		/*
		 * Deltas with a base reference contain
351
		 * additional bytes for the base object ID.
352
		 */
353
		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
354 355
			if (st)
				close_istream(st);
356 357 358
			free(buf);
			return 0;
		}
359
		hashwrite(f, header, hdrlen);
360
		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
361
		hdrlen += hashsz;
362
	} else {
363
		if (limit && hdrlen + datalen + hashsz >= limit) {
364 365
			if (st)
				close_istream(st);
366 367 368
			free(buf);
			return 0;
		}
369
		hashwrite(f, header, hdrlen);
370
	}
371
	if (st) {
372
		datalen = write_large_blob_data(st, f, &entry->idx.oid);
373 374
		close_istream(st);
	} else {
375
		hashwrite(f, buf, datalen);
376 377
		free(buf);
	}
378 379 380 381 382

	return hdrlen + datalen;
}

/* Return 0 if we will bust the pack-size limit */
383
static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
384
				unsigned long limit, int usable_delta)
385
{
386
	struct packed_git *p = IN_PACK(entry);
387 388 389
	struct pack_window *w_curs = NULL;
	struct revindex_entry *revidx;
	off_t offset;
390
	enum object_type type = oe_type(entry);
391
	off_t datalen;
392 393
	unsigned char header[MAX_PACK_OBJECT_HEADER],
		      dheader[MAX_PACK_OBJECT_HEADER];
394
	unsigned hdrlen;
395
	const unsigned hashsz = the_hash_algo->rawsz;
396
	unsigned long entry_size = SIZE(entry);
397

398 399
	if (DELTA(entry))
		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
400
			OBJ_OFS_DELTA : OBJ_REF_DELTA;
401
	hdrlen = encode_in_pack_object_header(header, sizeof(header),
402
					      type, entry_size);
403 404 405 406 407 408

	offset = entry->in_pack_offset;
	revidx = find_pack_revindex(p, offset);
	datalen = revidx[1].offset - offset;
	if (!pack_to_stdout && p->index_version > 1 &&
	    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
409 410
		error("bad packed object CRC for %s",
		      oid_to_hex(&entry->idx.oid));
411 412 413 414 415 416 417 418
		unuse_pack(&w_curs);
		return write_no_reuse_object(f, entry, limit, usable_delta);
	}

	offset += entry->in_pack_header_size;
	datalen -= entry->in_pack_header_size;

	if (!pack_to_stdout && p->index_version == 1 &&
419
	    check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
420 421
		error("corrupt packed object for %s",
		      oid_to_hex(&entry->idx.oid));
422 423 424 425 426
		unuse_pack(&w_curs);
		return write_no_reuse_object(f, entry, limit, usable_delta);
	}

	if (type == OBJ_OFS_DELTA) {
427
		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
428 429 430 431
		unsigned pos = sizeof(dheader) - 1;
		dheader[pos] = ofs & 127;
		while (ofs >>= 7)
			dheader[--pos] = 128 | (--ofs & 127);
432
		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
433 434 435
			unuse_pack(&w_curs);
			return 0;
		}
436 437
		hashwrite(f, header, hdrlen);
		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
438 439 440
		hdrlen += sizeof(dheader) - pos;
		reused_delta++;
	} else if (type == OBJ_REF_DELTA) {
441
		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
442 443 444
			unuse_pack(&w_curs);
			return 0;
		}
445
		hashwrite(f, header, hdrlen);
446
		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
447
		hdrlen += hashsz;
448 449
		reused_delta++;
	} else {
450
		if (limit && hdrlen + datalen + hashsz >= limit) {
451 452 453
			unuse_pack(&w_curs);
			return 0;
		}
454
		hashwrite(f, header, hdrlen);
455 456 457 458 459 460 461 462
	}
	copy_pack_data(f, p, &w_curs, offset, datalen);
	unuse_pack(&w_curs);
	reused++;
	return hdrlen + datalen;
}

/* Return 0 if we will bust the pack-size limit */
463
static off_t write_object(struct hashfile *f,
464 465
			  struct object_entry *entry,
			  off_t write_offset)
466
{
467 468
	unsigned long limit;
	off_t len;
469
	int usable_delta, to_reuse;
470

471 472 473
	if (!pack_to_stdout)
		crc32_begin(f);

474
	/* apply size limit if limited packsize and not first object */
475 476 477 478 479 480 481 482 483 484
	if (!pack_size_limit || !nr_written)
		limit = 0;
	else if (pack_size_limit <= write_offset)
		/*
		 * the earlier object did not fit the limit; avoid
		 * mistaking this with unlimited (i.e. limit = 0).
		 */
		limit = 1;
	else
		limit = pack_size_limit - write_offset;
485

486
	if (!DELTA(entry))
487 488 489
		usable_delta = 0;	/* no delta */
	else if (!pack_size_limit)
	       usable_delta = 1;	/* unlimited packfile */
490
	else if (DELTA(entry)->idx.offset == (off_t)-1)
491
		usable_delta = 0;	/* base was written to another pack */
492
	else if (DELTA(entry)->idx.offset)
493 494 495 496
		usable_delta = 1;	/* base already exists in this pack */
	else
		usable_delta = 0;	/* base could end up in another pack */

497
	if (!reuse_object)
498
		to_reuse = 0;	/* explicit */
499
	else if (!IN_PACK(entry))
500
		to_reuse = 0;	/* can't reuse what we don't have */
501 502
	else if (oe_type(entry) == OBJ_REF_DELTA ||
		 oe_type(entry) == OBJ_OFS_DELTA)
503 504 505
				/* check_object() decided it for us ... */
		to_reuse = usable_delta;
				/* ... but pack split may override that */
506
	else if (oe_type(entry) != entry->in_pack_type)
507
		to_reuse = 0;	/* pack has delta which is unusable */
508
	else if (DELTA(entry))
509 510 511 512 513 514
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

515 516 517 518 519 520
	if (!to_reuse)
		len = write_no_reuse_object(f, entry, limit, usable_delta);
	else
		len = write_reuse_object(f, entry, limit, usable_delta);
	if (!len)
		return 0;
521

522
	if (usable_delta)
523
		written_delta++;
524
	written++;
525
	if (!pack_to_stdout)
526
		entry->idx.crc32 = crc32_end(f);
527
	return len;
528 529
}

530 531 532 533 534 535 536
enum write_one_status {
	WRITE_ONE_SKIP = -1, /* already written */
	WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
	WRITE_ONE_WRITTEN = 1, /* normal */
	WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
};

537
static enum write_one_status write_one(struct hashfile *f,
538 539
				       struct object_entry *e,
				       off_t *offset)
540
{
541
	off_t size;
542
	int recursing;
543

544 545 546 547 548 549 550 551
	/*
	 * we set offset to 1 (which is an impossible value) to mark
	 * the fact that this object is involved in "write its base
	 * first before writing a deltified object" recursion.
	 */
	recursing = (e->idx.offset == 1);
	if (recursing) {
		warning("recursive delta detected for object %s",
552
			oid_to_hex(&e->idx.oid));
553 554 555 556 557
		return WRITE_ONE_RECURSIVE;
	} else if (e->idx.offset || e->preferred_base) {
		/* offset is non zero if object is written already. */
		return WRITE_ONE_SKIP;
	}
558

559
	/* if we are deltified, write out base object first. */
560
	if (DELTA(e)) {
561
		e->idx.offset = 1; /* now recurse */
562
		switch (write_one(f, DELTA(e), offset)) {
563 564
		case WRITE_ONE_RECURSIVE:
			/* we cannot depend on this one */
565
			SET_DELTA(e, NULL);
566 567 568 569 570 571 572 573
			break;
		default:
			break;
		case WRITE_ONE_BREAK:
			e->idx.offset = recursing;
			return WRITE_ONE_BREAK;
		}
	}
574

575 576
	e->idx.offset = *offset;
	size = write_object(f, e, *offset);
577
	if (!size) {
578 579
		e->idx.offset = recursing;
		return WRITE_ONE_BREAK;
580
	}
581
	written_list[nr_written++] = &e->idx;
582 583

	/* make sure off_t is sufficiently large not to wrap */
584
	if (signed_add_overflows(*offset, size))
585
		die("pack too large for current definition of off_t");
586
	*offset += size;
587
	return WRITE_ONE_WRITTEN;
588 589
}

590
static int mark_tagged(const char *path, const struct object_id *oid, int flag,
591 592
		       void *cb_data)
{
593
	struct object_id peeled;
594
	struct object_entry *entry = packlist_find(&to_pack, oid->hash, NULL);
595 596 597

	if (entry)
		entry->tagged = 1;
598
	if (!peel_ref(path, &peeled)) {
599
		entry = packlist_find(&to_pack, peeled.hash, NULL);
600 601 602 603 604 605
		if (entry)
			entry->tagged = 1;
	}
	return 0;
}

606
static inline void add_to_write_order(struct object_entry **wo,
607
			       unsigned int *endp,
608 609 610 611 612 613 614 615 616
			       struct object_entry *e)
{
	if (e->filled)
		return;
	wo[(*endp)++] = e;
	e->filled = 1;
}

static void add_descendants_to_write_order(struct object_entry **wo,
617
					   unsigned int *endp,
618 619
					   struct object_entry *e)
{
620 621 622 623 624 625 626
	int add_to_order = 1;
	while (e) {
		if (add_to_order) {
			struct object_entry *s;
			/* add this node... */
			add_to_write_order(wo, endp, e);
			/* all its siblings... */
627
			for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
628 629 630 631
				add_to_write_order(wo, endp, s);
			}
		}
		/* drop down a level to add left subtree nodes if possible */
632
		if (DELTA_CHILD(e)) {
633
			add_to_order = 1;
634
			e = DELTA_CHILD(e);
635 636 637
		} else {
			add_to_order = 0;
			/* our sibling might have some children, it is next */
638 639
			if (DELTA_SIBLING(e)) {
				e = DELTA_SIBLING(e);
640 641 642
				continue;
			}
			/* go back to our parent node */
643 644
			e = DELTA(e);
			while (e && !DELTA_SIBLING(e)) {
645 646
				/* we're on the right side of a subtree, keep
				 * going up until we can go right again */
647
				e = DELTA(e);
648 649 650 651 652 653
			}
			if (!e) {
				/* done- we hit our original root node */
				return;
			}
			/* pass it off to sibling at this level */
654
			e = DELTA_SIBLING(e);
655 656
		}
	};
657 658 659
}

static void add_family_to_write_order(struct object_entry **wo,
660
				      unsigned int *endp,
661 662 663 664
				      struct object_entry *e)
{
	struct object_entry *root;

665
	for (root = e; DELTA(root); root = DELTA(root))
666 667 668 669 670 671
		; /* nothing */
	add_descendants_to_write_order(wo, endp, root);
}

static struct object_entry **compute_write_order(void)
{
672
	unsigned int i, wo_end, last_untagged;
673

674
	struct object_entry **wo;
675
	struct object_entry *objects = to_pack.objects;
676

677
	for (i = 0; i < to_pack.nr_objects; i++) {
678 679
		objects[i].tagged = 0;
		objects[i].filled = 0;
680 681
		SET_DELTA_CHILD(&objects[i], NULL);
		SET_DELTA_SIBLING(&objects[i], NULL);
682 683 684 685 686 687 688
	}

	/*
	 * Fully connect delta_child/delta_sibling network.
	 * Make sure delta_sibling is sorted in the original
	 * recency order.
	 */
689
	for (i = to_pack.nr_objects; i > 0;) {
690
		struct object_entry *e = &objects[--i];
691
		if (!DELTA(e))
692 693
			continue;
		/* Mark me as the first child */
694 695
		e->delta_sibling_idx = DELTA(e)->delta_child_idx;
		SET_DELTA_CHILD(DELTA(e), e);
696 697 698 699 700
	}

	/*
	 * Mark objects that are at the tip of tags.
	 */
701
	for_each_tag_ref(mark_tagged, NULL);
702 703

	/*
704
	 * Give the objects in the original recency order until
705 706
	 * we see a tagged tip.
	 */
707
	ALLOC_ARRAY(wo, to_pack.nr_objects);
708
	for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
709 710 711 712
		if (objects[i].tagged)
			break;
		add_to_write_order(wo, &wo_end, &objects[i]);
	}
713
	last_untagged = i;
714 715 716 717

	/*
	 * Then fill all the tagged tips.
	 */
718
	for (; i < to_pack.nr_objects; i++) {
719 720 721 722 723 724 725
		if (objects[i].tagged)
			add_to_write_order(wo, &wo_end, &objects[i]);
	}

	/*
	 * And then all remaining commits and tags.
	 */
726
	for (i = last_untagged; i < to_pack.nr_objects; i++) {
727 728
		if (oe_type(&objects[i]) != OBJ_COMMIT &&
		    oe_type(&objects[i]) != OBJ_TAG)
729 730 731 732 733 734 735
			continue;
		add_to_write_order(wo, &wo_end, &objects[i]);
	}

	/*
	 * And then all the trees.
	 */
736
	for (i = last_untagged; i < to_pack.nr_objects; i++) {
737
		if (oe_type(&objects[i]) != OBJ_TREE)
738 739 740 741 742 743 744
			continue;
		add_to_write_order(wo, &wo_end, &objects[i]);
	}

	/*
	 * Finally all the rest in really tight order
	 */
745
	for (i = last_untagged; i < to_pack.nr_objects; i++) {
746 747 748 749
		if (!objects[i].filled)
			add_family_to_write_order(wo, &wo_end, &objects[i]);
	}

750 751
	if (wo_end != to_pack.nr_objects)
		die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
752 753 754 755

	return wo;
}

756
static off_t write_reused_pack(struct hashfile *f)
757 758
{
	unsigned char buffer[8192];
759
	off_t to_write, total;
760 761 762 763 764
	int fd;

	if (!is_pack_valid(reuse_packfile))
		die("packfile is invalid: %s", reuse_packfile->pack_name);

765
	fd = git_open(reuse_packfile->pack_name);
766 767 768 769 770 771 772 773
	if (fd < 0)
		die_errno("unable to open packfile for reuse: %s",
			  reuse_packfile->pack_name);

	if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
		die_errno("unable to seek in reused packfile");

	if (reuse_packfile_offset < 0)
774
		reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
775

776
	total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
777 778 779 780 781 782 783 784 785 786

	while (to_write) {
		int read_pack = xread(fd, buffer, sizeof(buffer));

		if (read_pack <= 0)
			die_errno("unable to read from reused packfile");

		if (read_pack > to_write)
			read_pack = to_write;

787
		hashwrite(f, buffer, read_pack);
788
		to_write -= read_pack;
789 790 791 792 793 794 795 796 797 798 799 800

		/*
		 * We don't know the actual number of objects written,
		 * only how many bytes written, how many bytes total, and
		 * how many objects total. So we can fake it by pretending all
		 * objects we are writing are the same size. This gives us a
		 * smooth progress meter, and at the end it matches the true
		 * answer.
		 */
		written = reuse_packfile_objects *
				(((double)(total - to_write)) / total);
		display_progress(progress_state, written);
801 802 803
	}

	close(fd);
804 805
	written = reuse_packfile_objects;
	display_progress(progress_state, written);
806 807 808
	return reuse_packfile_offset - sizeof(struct pack_header);
}

809 810 811 812
static const char no_split_warning[] = N_(
"disabling bitmap writing, packs are split due to pack.packSizeLimit"
);

813
static void write_pack_file(void)
814
{
815
	uint32_t i = 0, j;
816
	struct hashfile *f;
817
	off_t offset;
818
	uint32_t nr_remaining = nr_result;
819
	time_t last_mtime = 0;
820
	struct object_entry **write_order;
821

822
	if (progress > pack_to_stdout)
823
		progress_state = start_progress(_("Writing objects"), nr_result);
824
	ALLOC_ARRAY(written_list, to_pack.nr_objects);
825
	write_order = compute_write_order();
826

827
	do {
828
		struct object_id oid;
829
		char *pack_tmp_name = NULL;
830

831
		if (pack_to_stdout)
832
			f = hashfd_throughput(1, "<stdout>", progress_state);
833 834
		else
			f = create_tmp_packfile(&pack_tmp_name);
835

836
		offset = write_pack_header(f, nr_remaining);
837 838 839 840 841 842 843 844 845

		if (reuse_packfile) {
			off_t packfile_size;
			assert(pack_to_stdout);

			packfile_size = write_reused_pack(f);
			offset += packfile_size;
		}

846
		nr_written = 0;
847
		for (; i < to_pack.nr_objects; i++) {
848
			struct object_entry *e = write_order[i];
849
			if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
850 851 852
				break;
			display_progress(progress_state, written);
		}
853

854 855 856 857
		/*
		 * Did we write the wrong # entries in the header?
		 * If so, rewrite it like in fast-import
		 */
858
		if (pack_to_stdout) {
859
			finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
860
		} else if (nr_written == nr_remaining) {
861
			finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
862
		} else {
863
			int fd = finalize_hashfile(f, oid.hash, 0);
864 865
			fixup_pack_header_footer(fd, oid.hash, pack_tmp_name,
						 nr_written, oid.hash, offset);
866
			close(fd);
867 868 869 870
			if (write_bitmap_index) {
				warning(_(no_split_warning));
				write_bitmap_index = 0;
			}
871 872 873
		}

		if (!pack_to_stdout) {
874
			struct stat st;
875
			struct strbuf tmpname = STRBUF_INIT;
876

877 878 879 880 881 882 883
			/*
			 * Packs are runtime accessed in their mtime
			 * order since newer packs are more likely to contain
			 * younger objects.  So if we are creating multiple
			 * packs then we should modify the mtime of later ones
			 * to preserve this property.
			 */
884
			if (stat(pack_tmp_name, &st) < 0) {
885
				warning_errno("failed to stat %s", pack_tmp_name);
886 887 888 889 890 891
			} else if (!last_mtime) {
				last_mtime = st.st_mtime;
			} else {
				struct utimbuf utb;
				utb.actime = st.st_atime;
				utb.modtime = --last_mtime;
892
				if (utime(pack_tmp_name, &utb) < 0)
893
					warning_errno("failed utime() on %s", pack_tmp_name);
894 895
			}

896
			strbuf_addf(&tmpname, "%s-", base_name);
897 898

			if (write_bitmap_index) {
899
				bitmap_writer_set_checksum(oid.hash);
900 901
				bitmap_writer_build_type_index(
					&to_pack, written_list, nr_written);
902 903
			}

904
			finish_tmp_packfile(&tmpname, pack_tmp_name,
905
					    written_list, nr_written,
906
					    &pack_idx_opts, oid.hash);
907 908

			if (write_bitmap_index) {
909
				strbuf_addf(&tmpname, "%s.bitmap", oid_to_hex(&oid));
910 911 912 913 914 915 916

				stop_progress(&progress_state);

				bitmap_writer_show_progress(progress);
				bitmap_writer_reuse_bitmaps(&to_pack);
				bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
				bitmap_writer_build(&to_pack);
917
				bitmap_writer_finish(written_list, nr_written,
918
						     tmpname.buf, write_bitmap_options);
919 920 921
				write_bitmap_index = 0;
			}

922
			strbuf_release(&tmpname);
923
			free(pack_tmp_name);
924
			puts(oid_to_hex(&oid));
925 926 927 928
		}

		/* mark written objects as written to previous pack */
		for (j = 0; j < nr_written; j++) {
929
			written_list[j]->offset = (off_t)-1;
930 931
		}
		nr_remaining -= nr_written;
932
	} while (nr_remaining && i < to_pack.nr_objects);
933 934

	free(written_list);
935
	free(write_order);
936
	stop_progress(&progress_state);
937
	if (written != nr_result)
938 939
		die("wrote %"PRIu32" objects while expecting %"PRIu32,
			written, nr_result);
940 941
}

942 943
static int no_try_delta(const char *path)
{
944
	static struct attr_check *check;
945

946 947 948
	if (!check)
		check = attr_check_initl("delta", NULL);
	if (git_check_attr(path, check))
949
		return 0;
950
	if (ATTR_FALSE(check->items[0].value))
951 952 953 954
		return 1;
	return 0;
}

955 956 957 958 959 960 961 962 963 964
/*
 * When adding an object, check whether we have already added it
 * to our packing list. If so, we can skip. However, if we are
 * being asked to excludei t, but the previous mention was to include
 * it, make sure to adjust its flags and tweak our numbers accordingly.
 *
 * As an optimization, we pass out the index position where we would have
 * found the item, since that saves us from having to look it up again a
 * few lines later when we want to add the new entry.
 */
965
static int have_duplicate_entry(const struct object_id *oid,
966 967
				int exclude,
				uint32_t *index_pos)
968 969
{
	struct object_entry *entry;
970

971
	entry = packlist_find(&to_pack, oid->hash, index_pos);
972
	if (!entry)
973
		return 0;
974 975 976 977 978

	if (exclude) {
		if (!entry->preferred_base)
			nr_result--;
		entry->preferred_base = 1;
979
	}
980

981 982 983
	return 1;
}

984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
static int want_found_object(int exclude, struct packed_git *p)
{
	if (exclude)
		return 1;
	if (incremental)
		return 0;

	/*
	 * When asked to do --local (do not include an object that appears in a
	 * pack we borrow from elsewhere) or --honor-pack-keep (do not include
	 * an object that appears in a pack marked with .keep), finding a pack
	 * that matches the criteria is sufficient for us to decide to omit it.
	 * However, even if this pack does not satisfy the criteria, we need to
	 * make sure no copy of this object appears in _any_ pack that makes us
	 * to omit the object, so we need to check all the packs.
	 *
	 * We can however first check whether these options can possible matter;
	 * if they do not matter we know we want the object in generated pack.
	 * Otherwise, we signal "-1" at the end to tell the caller that we do
	 * not know either way, and it needs to check more packs.
	 */
1005 1006
	if (!ignore_packed_keep_on_disk &&
	    !ignore_packed_keep_in_core &&
1007 1008 1009 1010 1011
	    (!local || !have_non_local_packs))
		return 1;

	if (local && !p->pack_local)
		return 0;
1012 1013 1014
	if (p->pack_local &&
	    ((ignore_packed_keep_on_disk && p->pack_keep) ||
	     (ignore_packed_keep_in_core && p->pack_keep_in_core)))
1015 1016 1017 1018 1019 1020
		return 0;

	/* we don't know yet; keep looking for more packs */
	return -1;
}

1021 1022 1023 1024
/*
 * Check whether we want the object in the pack (e.g., we do not want
 * objects found in non-local stores if the "--local" option was used).
 *
1025 1026 1027 1028
 * If the caller already knows an existing pack it wants to take the object
 * from, that is passed in *found_pack and *found_offset; otherwise this
 * function finds if there is any pack that has the object and returns the pack
 * and its offset in these variables.
1029
 */
1030
static int want_object_in_pack(const struct object_id *oid,
1031 1032 1033 1034
			       int exclude,
			       struct packed_git **found_pack,
			       off_t *found_offset)
{
1035
	int want;
1036
	struct list_head *pos;
1037

1038
	if (!exclude && local && has_loose_object_nonlocal(oid))
1039 1040
		return 0;

1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
	/*
	 * If we already know the pack object lives in, start checks from that
	 * pack - in the usual case when neither --local was given nor .keep files
	 * are present we will determine the answer right now.
	 */
	if (*found_pack) {
		want = want_found_object(exclude, *found_pack);
		if (want != -1)
			return want;
	}
1051
	list_for_each(pos, get_packed_git_mru(the_repository)) {
1052
		struct packed_git *p = list_entry(pos, struct packed_git, mru);
1053 1054 1055 1056 1057
		off_t offset;

		if (p == *found_pack)
			offset = *found_offset;
		else
1058
			offset = find_pack_entry_one(oid->hash, p);
1059

1060
		if (offset) {
1061
			if (!*found_pack) {
1062
				if (!is_pack_valid(p))
1063
					continue;
1064 1065
				*found_offset = offset;
				*found_pack = p;
1066
			}
1067
			want = want_found_object(exclude, p);
1068
			if (!exclude && want > 0)
1069 1070
				list_move(&p->mru,
					  get_packed_git_mru(the_repository));
1071 1072
			if (want != -1)
				return want;
1073 1074
		}
	}
1075

1076 1077 1078
	return 1;
}

1079
static void create_object_entry(const struct object_id *oid,
1080 1081 1082 1083 1084 1085 1086 1087 1088
				enum object_type type,
				uint32_t hash,
				int exclude,
				int no_try_delta,
				uint32_t index_pos,
				struct packed_git *found_pack,
				off_t found_offset)
{
	struct object_entry *entry;
1089

1090
	entry = packlist_alloc(&to_pack, oid->hash, index_pos);
1091
	entry->hash = hash;
1092
	oe_set_type(entry, type);
1093 1094
	if (exclude)
		entry->preferred_base = 1;
1095 1096
	else
		nr_result++;
1097
	if (found_pack) {
1098
		oe_set_in_pack(&to_pack, entry, found_pack);
1099 1100
		entry->in_pack_offset = found_offset;
	}
1101

1102 1103
	entry->no_try_delta = no_try_delta;
}
1104

1105 1106 1107 1108
static const char no_closure_warning[] = N_(
"disabling bitmap writing, as some objects are not being packed"
);

1109
static int add_object_entry(const struct object_id *oid, enum object_type type,
1110 1111
			    const char *name, int exclude)
{
1112 1113
	struct packed_git *found_pack = NULL;
	off_t found_offset = 0;
1114
	uint32_t index_pos;
1115

1116 1117
	display_progress(progress_state, ++nr_seen);

1118
	if (have_duplicate_entry(oid, exclude, &index_pos))
1119
		return 0;
1120

1121
	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1122 1123 1124 1125 1126
		/* The pack is missing an object, so it will not have closure */
		if (write_bitmap_index) {
			warning(_(no_closure_warning));
			write_bitmap_index = 0;
		}
1127
		return 0;
1128
	}
1129

1130
	create_object_entry(oid, type, pack_name_hash(name),
1131 1132
			    exclude, name && no_try_delta(name),
			    index_pos, found_pack, found_offset);
1133
	return 1;
1134 1135
}

1136
static int add_object_entry_from_bitmap(const struct object_id *oid,
1137 1138 1139 1140