read-cache.c 29.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5
/*
 * GIT - The information manager from hell
 *
 * Copyright (C) Linus Torvalds, 2005
 */
6
#define NO_THE_INDEX_COMPATIBILITY_MACROS
7
#include "cache.h"
8
#include "cache-tree.h"
9
#include "refs.h"
10
#include "dir.h"
11 12 13 14 15 16 17 18 19 20 21 22

/* Index extensions.
 *
 * The first letter should be 'A'..'Z' for extensions that are not
 * necessary for a correct operation (i.e. optimization data).
 * When new extensions are added that _needs_ to be understood in
 * order to correctly interpret the index file, pick character that
 * is outside the range, to cause the reader to abort.
 */

#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
#define CACHE_EXT_TREE 0x54524545	/* "TREE" */
23

24
struct index_state the_index;
25

26 27 28 29 30 31 32 33 34
/*
 * This only updates the "non-critical" parts of the directory
 * cache, ie the parts that aren't tracked by GIT, and only used
 * to validate the cache.
 */
void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
{
	ce->ce_ctime.sec = htonl(st->st_ctime);
	ce->ce_mtime.sec = htonl(st->st_mtime);
35
#ifdef USE_NSEC
36 37 38 39 40 41 42 43
	ce->ce_ctime.nsec = htonl(st->st_ctim.tv_nsec);
	ce->ce_mtime.nsec = htonl(st->st_mtim.tv_nsec);
#endif
	ce->ce_dev = htonl(st->st_dev);
	ce->ce_ino = htonl(st->st_ino);
	ce->ce_uid = htonl(st->st_uid);
	ce->ce_gid = htonl(st->st_gid);
	ce->ce_size = htonl(st->st_size);
Junio C Hamano's avatar
Junio C Hamano committed
44 45 46

	if (assume_unchanged)
		ce->ce_flags |= htons(CE_VALID);
47 48
}

Junio C Hamano's avatar
Junio C Hamano committed
49 50 51 52 53 54 55
static int ce_compare_data(struct cache_entry *ce, struct stat *st)
{
	int match = -1;
	int fd = open(ce->name, O_RDONLY);

	if (fd >= 0) {
		unsigned char sha1[20];
56
		if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name))
57
			match = hashcmp(sha1, ce->sha1);
58
		/* index_fd() closed the file descriptor already */
Junio C Hamano's avatar
Junio C Hamano committed
59 60 61 62
	}
	return match;
}

63
static int ce_compare_link(struct cache_entry *ce, size_t expected_size)
Junio C Hamano's avatar
Junio C Hamano committed
64 65 66 67 68
{
	int match = -1;
	char *target;
	void *buffer;
	unsigned long size;
69
	enum object_type type;
Junio C Hamano's avatar
Junio C Hamano committed
70 71 72 73 74 75 76 77
	int len;

	target = xmalloc(expected_size);
	len = readlink(ce->name, target, expected_size);
	if (len != expected_size) {
		free(target);
		return -1;
	}
78
	buffer = read_sha1_file(ce->sha1, &type, &size);
Junio C Hamano's avatar
Junio C Hamano committed
79 80 81 82 83 84 85 86 87 88 89
	if (!buffer) {
		free(target);
		return -1;
	}
	if (size == expected_size)
		match = memcmp(buffer, target, size);
	free(buffer);
	free(target);
	return match;
}

90 91 92 93 94 95
static int ce_compare_gitlink(struct cache_entry *ce)
{
	unsigned char sha1[20];

	/*
	 * We don't actually require that the .git directory
Martin Waitz's avatar
Martin Waitz committed
96
	 * under GITLINK directory be a valid git directory. It
97 98 99 100 101 102 103 104 105 106
	 * might even be missing (in case nobody populated that
	 * sub-project).
	 *
	 * If so, we consider it always to match.
	 */
	if (resolve_gitlink_ref(ce->name, "HEAD", sha1) < 0)
		return 0;
	return hashcmp(sha1, ce->sha1);
}

Junio C Hamano's avatar
Junio C Hamano committed
107 108 109 110 111 112 113 114
static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
{
	switch (st->st_mode & S_IFMT) {
	case S_IFREG:
		if (ce_compare_data(ce, st))
			return DATA_CHANGED;
		break;
	case S_IFLNK:
115
		if (ce_compare_link(ce, xsize_t(st->st_size)))
Junio C Hamano's avatar
Junio C Hamano committed
116 117
			return DATA_CHANGED;
		break;
118
	case S_IFDIR:
Martin Waitz's avatar
Martin Waitz committed
119
		if (S_ISGITLINK(ntohl(ce->ce_mode)))
120
			return 0;
Junio C Hamano's avatar
Junio C Hamano committed
121 122 123 124 125 126
	default:
		return TYPE_CHANGED;
	}
	return 0;
}

Junio C Hamano's avatar
Junio C Hamano committed
127
static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
128 129 130
{
	unsigned int changed = 0;

131 132 133
	switch (ntohl(ce->ce_mode) & S_IFMT) {
	case S_IFREG:
		changed |= !S_ISREG(st->st_mode) ? TYPE_CHANGED : 0;
Junio C Hamano's avatar
Junio C Hamano committed
134 135 136 137 138
		/* We consider only the owner x bit to be relevant for
		 * "mode changes"
		 */
		if (trust_executable_bit &&
		    (0100 & (ntohl(ce->ce_mode) ^ st->st_mode)))
139
			changed |= MODE_CHANGED;
140 141
		break;
	case S_IFLNK:
142 143 144
		if (!S_ISLNK(st->st_mode) &&
		    (has_symlinks || !S_ISREG(st->st_mode)))
			changed |= TYPE_CHANGED;
145
		break;
Martin Waitz's avatar
Martin Waitz committed
146
	case S_IFGITLINK:
147 148 149 150
		if (!S_ISDIR(st->st_mode))
			changed |= TYPE_CHANGED;
		else if (ce_compare_gitlink(ce))
			changed |= DATA_CHANGED;
151
		return changed;
152 153
	case 0: /* Special case: unmerged file in index */
		return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED;
154 155 156
	default:
		die("internal error: ce_mode is %o", ntohl(ce->ce_mode));
	}
157
	if (ce->ce_mtime.sec != htonl(st->st_mtime))
158
		changed |= MTIME_CHANGED;
159 160 161
	if (ce->ce_ctime.sec != htonl(st->st_ctime))
		changed |= CTIME_CHANGED;

162
#ifdef USE_NSEC
163 164 165 166 167
	/*
	 * nsec seems unreliable - not all filesystems support it, so
	 * as long as it is in the inode cache you get right nsec
	 * but after it gets flushed, you get zero nsec.
	 */
168
	if (ce->ce_mtime.nsec != htonl(st->st_mtim.tv_nsec))
169
		changed |= MTIME_CHANGED;
170
	if (ce->ce_ctime.nsec != htonl(st->st_ctim.tv_nsec))
171
		changed |= CTIME_CHANGED;
Junio C Hamano's avatar
Junio C Hamano committed
172
#endif
173 174 175

	if (ce->ce_uid != htonl(st->st_uid) ||
	    ce->ce_gid != htonl(st->st_gid))
176
		changed |= OWNER_CHANGED;
177
	if (ce->ce_ino != htonl(st->st_ino))
178
		changed |= INODE_CHANGED;
179 180 181 182 183 184 185 186 187 188 189

#ifdef USE_STDEV
	/*
	 * st_dev breaks on network filesystems where different
	 * clients will have different views of what "device"
	 * the filesystem is on
	 */
	if (ce->ce_dev != htonl(st->st_dev))
		changed |= INODE_CHANGED;
#endif

190
	if (ce->ce_size != htonl(st->st_size))
191
		changed |= DATA_CHANGED;
192

Junio C Hamano's avatar
Junio C Hamano committed
193 194 195
	return changed;
}

196 197
int ie_match_stat(struct index_state *istate,
		  struct cache_entry *ce, struct stat *st, int options)
Junio C Hamano's avatar
Junio C Hamano committed
198
{
Junio C Hamano's avatar
Junio C Hamano committed
199
	unsigned int changed;
200 201
	int ignore_valid = options & 01;
	int assume_racy_is_modified = options & 02;
Junio C Hamano's avatar
Junio C Hamano committed
202 203 204 205 206 207 208 209 210

	/*
	 * If it's marked as always valid in the index, it's
	 * valid whatever the checked-out copy says.
	 */
	if (!ignore_valid && (ce->ce_flags & htons(CE_VALID)))
		return 0;

	changed = ce_match_stat_basic(ce, st);
Junio C Hamano's avatar
Junio C Hamano committed
211

Junio C Hamano's avatar
Junio C Hamano committed
212 213 214 215 216 217 218 219 220 221 222 223 224 225
	/*
	 * Within 1 second of this sequence:
	 * 	echo xyzzy >file && git-update-index --add file
	 * running this command:
	 * 	echo frotz >file
	 * would give a falsely clean cache entry.  The mtime and
	 * length match the cache, and other stat fields do not change.
	 *
	 * We could detect this at update-index time (the cache entry
	 * being registered/updated records the same time as "now")
	 * and delay the return from git-update-index, but that would
	 * effectively mean we can make at most one commit per second,
	 * which is not acceptable.  Instead, we check cache entries
	 * whose mtime are the same as the index file timestamp more
Junio C Hamano's avatar
Junio C Hamano committed
226
	 * carefully than others.
Junio C Hamano's avatar
Junio C Hamano committed
227 228
	 */
	if (!changed &&
229 230
	    istate->timestamp &&
	    istate->timestamp <= ntohl(ce->ce_mtime.sec)) {
231 232 233 234 235
		if (assume_racy_is_modified)
			changed |= DATA_CHANGED;
		else
			changed |= ce_modified_check_fs(ce, st);
	}
236

Junio C Hamano's avatar
Junio C Hamano committed
237
	return changed;
238 239
}

240 241
int ie_modified(struct index_state *istate,
		struct cache_entry *ce, struct stat *st, int really)
242
{
Junio C Hamano's avatar
Junio C Hamano committed
243
	int changed, changed_fs;
244
	changed = ie_match_stat(istate, ce, st, really);
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
	if (!changed)
		return 0;
	/*
	 * If the mode or type has changed, there's no point in trying
	 * to refresh the entry - it's not going to match
	 */
	if (changed & (MODE_CHANGED | TYPE_CHANGED))
		return changed;

	/* Immediately after read-tree or update-index --cacheinfo,
	 * the length field is zero.  For other cases the ce_size
	 * should match the SHA1 recorded in the index entry.
	 */
	if ((changed & DATA_CHANGED) && ce->ce_size != htonl(0))
		return changed;

Junio C Hamano's avatar
Junio C Hamano committed
261 262 263
	changed_fs = ce_modified_check_fs(ce, st);
	if (changed_fs)
		return changed | changed_fs;
264 265 266
	return 0;
}

267 268 269 270 271 272 273 274 275 276 277 278
int base_name_compare(const char *name1, int len1, int mode1,
		      const char *name2, int len2, int mode2)
{
	unsigned char c1, c2;
	int len = len1 < len2 ? len1 : len2;
	int cmp;

	cmp = memcmp(name1, name2, len);
	if (cmp)
		return cmp;
	c1 = name1[len];
	c2 = name2[len];
279
	if (!c1 && S_ISDIR(mode1))
280
		c1 = '/';
281
	if (!c2 && S_ISDIR(mode2))
282 283 284 285
		c2 = '/';
	return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
}

286
int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
287
{
288 289
	int len1 = flags1 & CE_NAMEMASK;
	int len2 = flags2 & CE_NAMEMASK;
290 291 292 293 294 295 296 297 298 299
	int len = len1 < len2 ? len1 : len2;
	int cmp;

	cmp = memcmp(name1, name2, len);
	if (cmp)
		return cmp;
	if (len1 < len2)
		return -1;
	if (len1 > len2)
		return 1;
Junio C Hamano's avatar
Junio C Hamano committed
300

301 302 303
	/* Compare stages  */
	flags1 &= CE_STAGEMASK;
	flags2 &= CE_STAGEMASK;
Junio C Hamano's avatar
Junio C Hamano committed
304

305 306 307 308
	if (flags1 < flags2)
		return -1;
	if (flags1 > flags2)
		return 1;
309 310 311
	return 0;
}

312
int index_name_pos(struct index_state *istate, const char *name, int namelen)
313 314 315 316
{
	int first, last;

	first = 0;
317
	last = istate->cache_nr;
318 319
	while (last > first) {
		int next = (last + first) >> 1;
320
		struct cache_entry *ce = istate->cache[next];
321
		int cmp = cache_name_compare(name, namelen, ce->name, ntohs(ce->ce_flags));
322
		if (!cmp)
323
			return next;
324 325 326 327 328 329
		if (cmp < 0) {
			last = next;
			continue;
		}
		first = next+1;
	}
330
	return -first-1;
331 332
}

333
/* Remove entry, return true if there are more entries to go.. */
334
int remove_index_entry_at(struct index_state *istate, int pos)
335
{
336 337 338
	istate->cache_changed = 1;
	istate->cache_nr--;
	if (pos >= istate->cache_nr)
339
		return 0;
340 341 342
	memmove(istate->cache + pos,
		istate->cache + pos + 1,
		(istate->cache_nr - pos) * sizeof(struct cache_entry *));
343 344 345
	return 1;
}

346
int remove_file_from_index(struct index_state *istate, const char *path)
347
{
348
	int pos = index_name_pos(istate, path, strlen(path));
349 350
	if (pos < 0)
		pos = -pos-1;
351 352
	while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path))
		remove_index_entry_at(istate, pos);
353 354 355
	return 0;
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
static int compare_name(struct cache_entry *ce, const char *path, int namelen)
{
	return namelen != ce_namelen(ce) || memcmp(path, ce->name, namelen);
}

static int index_name_pos_also_unmerged(struct index_state *istate,
	const char *path, int namelen)
{
	int pos = index_name_pos(istate, path, namelen);
	struct cache_entry *ce;

	if (pos >= 0)
		return pos;

	/* maybe unmerged? */
	pos = -1 - pos;
	if (pos >= istate->cache_nr ||
			compare_name((ce = istate->cache[pos]), path, namelen))
		return -1;

	/* order of preference: stage 2, 1, 3 */
	if (ce_stage(ce) == 1 && pos + 1 < istate->cache_nr &&
			ce_stage((ce = istate->cache[pos + 1])) == 2 &&
			!compare_name(ce, path, namelen))
		pos++;
	return pos;
}

384
int add_file_to_index(struct index_state *istate, const char *path, int verbose)
Johannes Schindelin's avatar
Johannes Schindelin committed
385
{
386
	int size, namelen, pos;
Johannes Schindelin's avatar
Johannes Schindelin committed
387 388 389 390 391 392
	struct stat st;
	struct cache_entry *ce;

	if (lstat(path, &st))
		die("%s: unable to stat (%s)", path, strerror(errno));

393 394
	if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode) && !S_ISDIR(st.st_mode))
		die("%s: can only add regular files, symbolic links or git-directories", path);
Johannes Schindelin's avatar
Johannes Schindelin committed
395 396

	namelen = strlen(path);
397 398 399 400
	if (S_ISDIR(st.st_mode)) {
		while (namelen && path[namelen-1] == '/')
			namelen--;
	}
Johannes Schindelin's avatar
Johannes Schindelin committed
401 402 403 404 405 406
	size = cache_entry_size(namelen);
	ce = xcalloc(1, size);
	memcpy(ce->name, path, namelen);
	ce->ce_flags = htons(namelen);
	fill_stat_cache_info(ce, &st);

407
	if (trust_executable_bit && has_symlinks)
408 409
		ce->ce_mode = create_ce_mode(st.st_mode);
	else {
410 411
		/* If there is an existing entry, pick the mode bits and type
		 * from it, otherwise assume unexecutable regular file.
Johannes Schindelin's avatar
Johannes Schindelin committed
412
		 */
413
		struct cache_entry *ent;
414
		int pos = index_name_pos_also_unmerged(istate, path, namelen);
415

416
		ent = (0 <= pos) ? istate->cache[pos] : NULL;
417
		ce->ce_mode = ce_mode_from_stat(ent, st.st_mode);
Johannes Schindelin's avatar
Johannes Schindelin committed
418 419
	}

420 421 422 423 424 425 426 427 428
	pos = index_name_pos(istate, ce->name, namelen);
	if (0 <= pos &&
	    !ce_stage(istate->cache[pos]) &&
	    !ie_modified(istate, istate->cache[pos], &st, 1)) {
		/* Nothing changed, really */
		free(ce);
		return 0;
	}

Johannes Schindelin's avatar
Johannes Schindelin committed
429 430
	if (index_path(ce->sha1, path, &st, 1))
		die("unable to index file %s", path);
431
	if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE))
Johannes Schindelin's avatar
Johannes Schindelin committed
432 433 434
		die("unable to add %s to index",path);
	if (verbose)
		printf("add '%s'\n", path);
435
	cache_tree_invalidate_path(istate->cache_tree, path);
Johannes Schindelin's avatar
Johannes Schindelin committed
436 437 438
	return 0;
}

439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
struct cache_entry *make_cache_entry(unsigned int mode,
		const unsigned char *sha1, const char *path, int stage,
		int refresh)
{
	int size, len;
	struct cache_entry *ce;

	if (!verify_path(path))
		return NULL;

	len = strlen(path);
	size = cache_entry_size(len);
	ce = xcalloc(1, size);

	hashcpy(ce->sha1, sha1);
	memcpy(ce->name, path, len);
	ce->ce_flags = create_ce_flags(len, stage);
	ce->ce_mode = create_ce_mode(mode);

	if (refresh)
		return refresh_cache_entry(ce, 0);

	return ce;
}

464
int ce_same_name(struct cache_entry *a, struct cache_entry *b)
465 466 467 468 469
{
	int len = ce_namelen(a);
	return ce_namelen(b) == len && !memcmp(a->name, b->name, len);
}

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
int ce_path_match(const struct cache_entry *ce, const char **pathspec)
{
	const char *match, *name;
	int len;

	if (!pathspec)
		return 1;

	len = ce_namelen(ce);
	name = ce->name;
	while ((match = *pathspec++) != NULL) {
		int matchlen = strlen(match);
		if (matchlen > len)
			continue;
		if (memcmp(name, match, matchlen))
			continue;
		if (matchlen && name[matchlen-1] == '/')
			return 1;
		if (name[matchlen] == '/' || !name[matchlen])
			return 1;
490 491
		if (!matchlen)
			return 1;
492 493 494 495
	}
	return 0;
}

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
/*
 * We fundamentally don't like some paths: we don't want
 * dot or dot-dot anywhere, and for obvious reasons don't
 * want to recurse into ".git" either.
 *
 * Also, we don't want double slashes or slashes at the
 * end that can make pathnames ambiguous.
 */
static int verify_dotfile(const char *rest)
{
	/*
	 * The first character was '.', but that
	 * has already been discarded, we now test
	 * the rest.
	 */
	switch (*rest) {
	/* "." is not allowed */
	case '\0': case '/':
		return 0;

	/*
	 * ".git" followed by  NUL or slash is bad. This
	 * shares the path end test with the ".." case.
	 */
	case 'g':
		if (rest[1] != 'i')
			break;
		if (rest[2] != 't')
			break;
		rest += 2;
	/* fallthrough */
	case '.':
		if (rest[1] == '\0' || rest[1] == '/')
			return 0;
	}
	return 1;
}

int verify_path(const char *path)
{
	char c;

	goto inside;
	for (;;) {
		if (!c)
			return 1;
		if (c == '/') {
inside:
			c = *path++;
			switch (c) {
			default:
				continue;
			case '/': case '\0':
				break;
			case '.':
				if (verify_dotfile(path))
					continue;
			}
			return 0;
		}
		c = *path++;
	}
}

560 561 562
/*
 * Do we have another file that has the beginning components being a
 * proper superset of the name we're trying to add?
563
 */
564 565
static int has_file_name(struct index_state *istate,
			 const struct cache_entry *ce, int pos, int ok_to_replace)
566
{
567 568
	int retval = 0;
	int len = ce_namelen(ce);
569
	int stage = ce_stage(ce);
570
	const char *name = ce->name;
571

572 573
	while (pos < istate->cache_nr) {
		struct cache_entry *p = istate->cache[pos++];
574

575
		if (len >= ce_namelen(p))
576
			break;
577 578
		if (memcmp(name, p->name, len))
			break;
579 580
		if (ce_stage(p) != stage)
			continue;
581 582
		if (p->name[len] != '/')
			continue;
583 584
		if (!ce_stage(p) && !p->ce_mode)
			continue;
585 586 587
		retval = -1;
		if (!ok_to_replace)
			break;
588
		remove_index_entry_at(istate, --pos);
589
	}
590 591
	return retval;
}
592

593 594 595 596
/*
 * Do we have another file with a pathname that is a proper
 * subset of the name we're trying to add?
 */
597 598
static int has_dir_name(struct index_state *istate,
			const struct cache_entry *ce, int pos, int ok_to_replace)
599 600
{
	int retval = 0;
601
	int stage = ce_stage(ce);
602 603
	const char *name = ce->name;
	const char *slash = name + ce_namelen(ce);
604

605 606
	for (;;) {
		int len;
607

608 609 610 611 612 613 614
		for (;;) {
			if (*--slash == '/')
				break;
			if (slash <= ce->name)
				return retval;
		}
		len = slash - name;
615

616
		pos = index_name_pos(istate, name, ntohs(create_ce_flags(len, stage)));
617
		if (pos >= 0) {
618 619 620 621 622 623 624 625
			/*
			 * Found one, but not so fast.  This could
			 * be a marker that says "I was here, but
			 * I am being removed".  Such an entry is
			 * not a part of the resulting tree, and
			 * it is Ok to have a directory at the same
			 * path.
			 */
626
			if (stage || istate->cache[pos]->ce_mode) {
627 628 629
				retval = -1;
				if (!ok_to_replace)
					break;
630
				remove_index_entry_at(istate, pos);
631 632
				continue;
			}
633
		}
634 635
		else
			pos = -pos-1;
636 637 638 639

		/*
		 * Trivial optimization: if we find an entry that
		 * already matches the sub-directory, then we know
640
		 * we're ok, and we can exit.
641
		 */
642 643
		while (pos < istate->cache_nr) {
			struct cache_entry *p = istate->cache[pos];
644 645 646 647
			if ((ce_namelen(p) <= len) ||
			    (p->name[len] != '/') ||
			    memcmp(p->name, name, len))
				break; /* not our subdirectory */
648
			if (ce_stage(p) == stage && (stage || p->ce_mode))
649 650 651 652 653 654 655
				/* p is at the same stage as our entry, and
				 * is a subdirectory of what we are looking
				 * at, so we cannot have conflicts at our
				 * level or anything shorter.
				 */
				return retval;
			pos++;
656
		}
657
	}
658 659 660 661 662 663 664
	return retval;
}

/* We may be in a situation where we already have path/file and path
 * is being added, or we already have path and path/file is being
 * added.  Either one would result in a nonsense tree that has path
 * twice when git-write-tree tries to write it out.  Prevent it.
Junio C Hamano's avatar
Junio C Hamano committed
665
 *
666 667 668 669
 * If ok-to-replace is specified, we remove the conflicting entries
 * from the cache so the caller should recompute the insert position.
 * When this happens, we return non-zero.
 */
670 671 672
static int check_file_directory_conflict(struct index_state *istate,
					 const struct cache_entry *ce,
					 int pos, int ok_to_replace)
673
{
674 675 676 677 678 679 680 681
	int retval;

	/*
	 * When ce is an "I am going away" entry, we allow it to be added
	 */
	if (!ce_stage(ce) && !ce->ce_mode)
		return 0;

682 683 684
	/*
	 * We check if the path is a sub-path of a subsequent pathname
	 * first, since removing those will not change the position
685
	 * in the array.
686
	 */
687
	retval = has_file_name(istate, ce, pos, ok_to_replace);
688

689 690 691 692
	/*
	 * Then check if the path might have a clashing sub-directory
	 * before it.
	 */
693
	return retval + has_dir_name(istate, ce, pos, ok_to_replace);
694 695
}

696
static int add_index_entry_with_check(struct index_state *istate, struct cache_entry *ce, int option)
697 698
{
	int pos;
699 700
	int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
	int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
701
	int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK;
Junio C Hamano's avatar
Junio C Hamano committed
702

703
	pos = index_name_pos(istate, ce->name, ntohs(ce->ce_flags));
704

Junio C Hamano's avatar
Junio C Hamano committed
705
	/* existing match? Just replace it. */
706
	if (pos >= 0) {
707 708
		istate->cache_changed = 1;
		istate->cache[pos] = ce;
709 710
		return 0;
	}
711
	pos = -pos-1;
712

713 714 715 716
	/*
	 * Inserting a merged entry ("stage 0") into the index
	 * will always replace all non-merged entries..
	 */
717 718
	if (pos < istate->cache_nr && ce_stage(ce) == 0) {
		while (ce_same_name(istate->cache[pos], ce)) {
719
			ok_to_add = 1;
720
			if (!remove_index_entry_at(istate, pos))
721 722 723 724
				break;
		}
	}

725 726
	if (!ok_to_add)
		return -1;
727 728
	if (!verify_path(ce->name))
		return -1;
729

Junio C Hamano's avatar
Junio C Hamano committed
730
	if (!skip_df_check &&
731
	    check_file_directory_conflict(istate, ce, pos, ok_to_replace)) {
732
		if (!ok_to_replace)
733 734 735
			return error("'%s' appears as both a file and as a directory",
				     ce->name);
		pos = index_name_pos(istate, ce->name, ntohs(ce->ce_flags));
736 737
		pos = -pos-1;
	}
738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
	return pos + 1;
}

int add_index_entry(struct index_state *istate, struct cache_entry *ce, int option)
{
	int pos;

	if (option & ADD_CACHE_JUST_APPEND)
		pos = istate->cache_nr;
	else {
		int ret;
		ret = add_index_entry_with_check(istate, ce, option);
		if (ret <= 0)
			return ret;
		pos = ret - 1;
	}
754

755
	/* Make sure the array is big enough .. */
756 757 758 759
	if (istate->cache_nr == istate->cache_alloc) {
		istate->cache_alloc = alloc_nr(istate->cache_alloc);
		istate->cache = xrealloc(istate->cache,
					istate->cache_alloc * sizeof(struct cache_entry *));
760 761 762
	}

	/* Add it in.. */
763
	istate->cache_nr++;
764
	if (istate->cache_nr > pos + 1)
765 766 767 768 769
		memmove(istate->cache + pos + 1,
			istate->cache + pos,
			(istate->cache_nr - pos - 1) * sizeof(ce));
	istate->cache[pos] = ce;
	istate->cache_changed = 1;
770 771 772
	return 0;
}

773 774 775 776 777 778 779 780 781 782 783
/*
 * "refresh" does not calculate a new sha1 file or bring the
 * cache up-to-date for mode/content changes. But what it
 * _does_ do is to "re-match" the stat information of a file
 * with the cache, so that you can refresh the cache for a
 * file that hasn't been changed but where the stat entry is
 * out of date.
 *
 * For example, you'd want to do this after doing a "git-read-tree",
 * to link up the stat cache details with the proper files.
 */
784 785
static struct cache_entry *refresh_cache_ent(struct index_state *istate,
					     struct cache_entry *ce, int really, int *err)
786 787 788 789 790
{
	struct stat st;
	struct cache_entry *updated;
	int changed, size;

791
	if (lstat(ce->name, &st) < 0) {
792 793
		if (err)
			*err = errno;
794 795
		return NULL;
	}
796

797
	changed = ie_match_stat(istate, ce, &st, really);
798 799 800 801 802
	if (!changed) {
		if (really && assume_unchanged &&
		    !(ce->ce_flags & htons(CE_VALID)))
			; /* mark this one VALID again */
		else
803
			return ce;
804 805
	}

806
	if (ie_modified(istate, ce, &st, really)) {
807 808
		if (err)
			*err = EINVAL;
809 810
		return NULL;
	}
811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828

	size = ce_size(ce);
	updated = xmalloc(size);
	memcpy(updated, ce, size);
	fill_stat_cache_info(updated, &st);

	/* In this case, if really is not set, we should leave
	 * CE_VALID bit alone.  Otherwise, paths marked with
	 * --no-assume-unchanged (i.e. things to be edited) will
	 * reacquire CE_VALID bit automatically, which is not
	 * really what we want.
	 */
	if (!really && assume_unchanged && !(ce->ce_flags & htons(CE_VALID)))
		updated->ce_flags &= ~htons(CE_VALID);

	return updated;
}

829
int refresh_index(struct index_state *istate, unsigned int flags, const char **pathspec, char *seen)
830 831 832 833 834 835 836 837
{
	int i;
	int has_errors = 0;
	int really = (flags & REFRESH_REALLY) != 0;
	int allow_unmerged = (flags & REFRESH_UNMERGED) != 0;
	int quiet = (flags & REFRESH_QUIET) != 0;
	int not_new = (flags & REFRESH_IGNORE_MISSING) != 0;

838
	for (i = 0; i < istate->cache_nr; i++) {
839
		struct cache_entry *ce, *new;
840 841
		int cache_errno = 0;

842
		ce = istate->cache[i];
843
		if (ce_stage(ce)) {
844 845
			while ((i < istate->cache_nr) &&
			       ! strcmp(istate->cache[i]->name, ce->name))
846 847 848 849 850 851 852 853 854
				i++;
			i--;
			if (allow_unmerged)
				continue;
			printf("%s: needs merge\n", ce->name);
			has_errors = 1;
			continue;
		}

855 856 857
		if (pathspec && !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
			continue;

858
		new = refresh_cache_ent(istate, ce, really, &cache_errno);
859
		if (new == ce)
860
			continue;
861 862
		if (!new) {
			if (not_new && cache_errno == ENOENT)
863
				continue;
864
			if (really && cache_errno == EINVAL) {
865 866 867 868
				/* If we are doing --really-refresh that
				 * means the index is not valid anymore.
				 */
				ce->ce_flags &= ~htons(CE_VALID);
869
				istate->cache_changed = 1;
870 871 872 873 874 875 876
			}
			if (quiet)
				continue;
			printf("%s: needs update\n", ce->name);
			has_errors = 1;
			continue;
		}
877 878
		istate->cache_changed = 1;
		/* You can NOT just free istate->cache[i] here, since it
879 880
		 * might not be necessarily malloc()ed but can also come
		 * from mmap(). */
881
		istate->cache[i] = new;
882 883 884 885
	}
	return has_errors;
}

886 887
struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really)
{
888
	return refresh_cache_ent(&the_index, ce, really, NULL);
889 890
}

891
static int verify_hdr(struct cache_header *hdr, unsigned long size)
892 893
{
	SHA_CTX c;
894
	unsigned char sha1[20];
895

896
	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
897
		return error("bad signature");
898 899
	if (hdr->hdr_version != htonl(2))
		return error("bad index version");
900
	SHA1_Init(&c);
901
	SHA1_Update(&c, hdr, size - 20);
902
	SHA1_Final(sha1, &c);
903
	if (hashcmp(sha1, (unsigned char *)hdr + size - 20))
904
		return error("bad index file sha1 signature");
905 906 907
	return 0;
}

908 909
static int read_index_extension(struct index_state *istate,
				const char *ext, void *data, unsigned long sz)
910 911 912
{
	switch (CACHE_EXT(ext)) {
	case CACHE_EXT_TREE:
913
		istate->cache_tree = cache_tree_read(data, sz);
914 915 916 917 918 919 920 921 922 923 924
		break;
	default:
		if (*ext < 'A' || 'Z' < *ext)
			return error("index uses %.4s extension, which we do not understand",
				     ext);
		fprintf(stderr, "ignoring %.4s extension\n", ext);
		break;
	}
	return 0;
}

925
int read_index(struct index_state *istate)
926
{
927
	return read_index_from(istate, get_index_file());
928 929 930
}

/* remember to discard_cache() before reading a different cache! */
931
int read_index_from(struct index_state *istate, const char *path)
932 933 934
{
	int fd, i;
	struct stat st;
935
	unsigned long offset;
936 937 938
	struct cache_header *hdr;

	errno = EBUSY;
939 940
	if (istate->mmap)
		return istate->cache_nr;
941

942
	errno = ENOENT;
943
	istate->timestamp = 0;
944
	fd = open(path, O_RDONLY);
945 946 947 948 949
	if (fd < 0) {
		if (errno == ENOENT)
			return 0;
		die("index file open failed (%s)", strerror(errno));
	}
950

951
	if (fstat(fd, &st))
952
		die("cannot stat the open index (%s)", strerror(errno));
953 954 955 956 957 958 959

	errno = EINVAL;
	istate->mmap_size = xsize_t(st.st_size);
	if (istate->mmap_size < sizeof(struct cache_header) + 20)
		die("index file smaller than expected");

	istate->mmap = xmmap(NULL, istate->mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
960 961
	close(fd);

962 963
	hdr = istate->mmap;
	if (verify_hdr(hdr, istate->mmap_size) < 0)
964 965
		goto unmap;

966 967 968
	istate->cache_nr = ntohl(hdr->hdr_entries);
	istate->cache_alloc = alloc_nr(istate->cache_nr);
	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
969 970

	offset = sizeof(*hdr);
971 972 973 974
	for (i = 0; i < istate->cache_nr; i++) {
		struct cache_entry *ce;

		ce = (struct cache_entry *)((char *)(istate->mmap) + offset);
975
		offset = offset + ce_size(ce);
976
		istate->cache[i] = ce;
977
	}
978 979
	istate->timestamp = st.st_mtime;
	while (offset <= istate->mmap_size - 20 - 8) {
980 981 982 983 984 985 986
		/* After an array of active_nr index entries,
		 * there can be arbitrary number of extended
		 * sections, each of which is prefixed with
		 * extension name (4-byte) and section length
		 * in 4-byte network byte order.
		 */
		unsigned long extsize;
987
		memcpy(&extsize, (char *)(istate->mmap) + offset + 4, 4);
988
		extsize = ntohl(extsize);
989 990 991
		if (read_index_extension(istate,
					 ((const char *) (istate->mmap)) + offset,
					 (char *) (istate->mmap) + offset + 8,
992
					 extsize) < 0)
993 994 995 996
			goto unmap;
		offset += 8;
		offset += extsize;
	}
997
	return istate->cache_nr;
998 999

unmap:
1000
	munmap(istate->mmap, istate->mmap_size);
1001
	errno = EINVAL;
1002
	die("index file corrupt");
1003 1004
}

1005
int discard_index(struct index_state *istate)
1006 1007 1008
{
	int ret;

1009 1010 1011 1012 1013
	istate->cache_nr = 0;
	istate->cache_changed = 0;
	istate->timestamp = 0;
	cache_tree_free(&(istate->cache_tree));
	if (istate->mmap == NULL)
1014
		return 0;
1015 1016 1017
	ret = munmap(istate->mmap, istate->mmap_size);
	istate->mmap = NULL;
	istate->mmap_size = 0;
1018 1019 1020 1021 1022

	/* no need to throw away allocated active_cache */
	return ret;
}

1023
#define WRITE_BUFFER_SIZE 8192
1024
static unsigned char write_buffer[WRITE_BUFFER_SIZE];
1025 1026
static unsigned long write_buffer_len;

1027 1028 1029 1030 1031
static int ce_write_flush(SHA_CTX *context, int fd)
{
	unsigned int buffered = write_buffer_len;
	if (buffered) {
		SHA1_Update(context, write_buffer, buffered);
1032
		if (write_in_full(fd, write_buffer, buffered) != buffered)
1033 1034 1035 1036 1037 1038
			return -1;
		write_buffer_len = 0;
	}
	return 0;
}

1039
static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len)
1040 1041 1042 1043 1044 1045 1046 1047 1048
{
	while (len) {
		unsigned int buffered = write_buffer_len;
		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
		if (partial > len)
			partial = len;
		memcpy(write_buffer + buffered, data, partial);
		buffered += partial;
		if (buffered == WRITE_BUFFER_SIZE) {
1049 1050
			write_buffer_len = buffered;
			if (ce_write_flush(context, fd))
1051 1052 1053 1054 1055
				return -1;
			buffered = 0;
		}
		write_buffer_len = buffered;
		len -= partial;
1056
		data = (char *) data + partial;
Junio C Hamano's avatar
Junio C Hamano committed
1057 1058
	}
	return 0;
1059 1060
}

1061
static int write_index_ext_header(SHA_CTX *context, int fd,
1062
				  unsigned int ext, unsigned int sz)
1063 1064 1065
{
	ext = htonl(ext);
	sz = htonl(sz);
David Rientjes's avatar
David Rientjes committed
1066 1067
	return ((ce_write(context, fd, &ext, 4) < 0) ||
		(ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
1068 1069 1070
}

static int ce_flush(SHA_CTX *context, int fd)
1071 1072
{
	unsigned int left = write_buffer_len;
1073

1074 1075
	if (left) {
		write_buffer_len = 0;
1076
		SHA1_Update(context, write_buffer, left);
1077
	}
1078

1079 1080
	/* Flush first if not enough space for SHA1 signature */
	if (left + 20 > WRITE_BUFFER_SIZE) {
1081
		if (write_in_full(fd, write_buffer, left) != left)
1082 1083 1084 1085
			return -1;
		left = 0;
	}

1086
	/* Append the SHA1 signature at the end */
1087
	SHA1_Final(write_buffer + left, context);
1088
	left += 20;
1089
	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
1090 1091
}

Junio C Hamano's avatar
Junio C Hamano committed
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
{
	/*
	 * The only thing we care about in this function is to smudge the
	 * falsely clean entry due to touch-update-touch race, so we leave
	 * everything else as they are.  We are called for entries whose
	 * ce_mtime match the index file mtime.
	 */
	struct stat st;

	if (lstat(ce->name, &st) < 0)
		return;
	if (ce_match_stat_basic(ce, &st))
		return;
	if (ce_modified_check_fs(ce, &st)) {
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
		/* This is "racily clean"; smudge it.  Note that this
		 * is a tricky code.  At first glance, it may appear
		 * that it can break with this sequence:
		 *
		 * $ echo xyzzy >frotz
		 * $ git-update-index --add frotz
		 * $ : >frotz
		 * $ sleep 3
		 * $ echo filfre >nitfol
		 * $ git-update-index --add nitfol
		 *
1118
		 * but it does not.  When the second update-index runs,
1119 1120 1121 1122 1123
		 * it notices that the entry "frotz" has the same timestamp
		 * as index, and if we were to smudge it by resetting its
		 * size to zero here, then the object name recorded
		 * in index is the 6-byte file but the cached stat information
		 * becomes zero --- which would then match what we would
Junio C Hamano's avatar
Junio C Hamano committed
1124
		 * obtain from the filesystem next time we stat("frotz").
1125 1126 1127 1128 1129 1130 1131
		 *
		 * However, the second update-index, before calling
		 * this function, notices that the cached size is 6
		 * bytes and what is on the filesystem is an empty
		 * file, and never calls us, so the cached size information
		 * for "frotz" stays 6 which does not match the filesystem.
		 */
Junio C Hamano's avatar
Junio C Hamano committed
1132 1133 1134 1135
		ce->ce_size = htonl(0);
	}
}

1136
int write_index(struct index_state *istate, int newfd)
1137 1138 1139
{
	SHA_CTX c;
	struct cache_header hdr;
1140
	int i, removed;
1141 1142
	struct cache_entry **cache = istate->cache;
	int entries = istate->cache_nr;
1143 1144 1145 1146

	for (i = removed = 0; i < entries; i++)
		if (!cache[i]->ce_mode)
			removed++;
1147

1148
	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
1149
	hdr.hdr_version = htonl(2);
1150
	hdr.hdr_entries = htonl(entries - removed);
1151 1152

	SHA1_Init(&c);
1153
	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
1154 1155 1156 1157
		return -1;

	for (i = 0; i < entries; i++) {
		struct cache_entry *ce = cache[i];
1158 1159
		if (!ce->ce_mode)
			continue;
1160 1161
		if (istate->timestamp &&
		    istate->timestamp <= ntohl(ce->ce_mtime.sec))
Junio C Hamano's avatar
Junio C Hamano committed
1162
			ce_smudge_racily_clean_entry(ce);
1163
		if (ce_write(&c, newfd, ce, ce_size(ce)) < 0)
1164 1165
			return -1;
	}
1166

1167
	/* Write extension data here */
1168
	if (istate->cache_tree) {
1169
		unsigned long sz;
1170
		void *data = cache_tree_write(istate->cache_tree, &sz);
1171 1172 1173
		if (data &&
		    !write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sz) &&
		    !ce_write(&c, newfd, data, sz))
1174
			free(data);
1175 1176 1177 1178 1179 1180
		else {
			free(data);
			return -1;
		}
	}
	return ce_flush(&c, newfd);
1181
}