read-cache.c 36.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5
/*
 * GIT - The information manager from hell
 *
 * Copyright (C) Linus Torvalds, 2005
 */
6
#define NO_THE_INDEX_COMPATIBILITY_MACROS
7
#include "cache.h"
8
#include "cache-tree.h"
9
#include "refs.h"
10
#include "dir.h"
11 12 13 14 15 16 17 18 19 20 21 22

/* Index extensions.
 *
 * The first letter should be 'A'..'Z' for extensions that are not
 * necessary for a correct operation (i.e. optimization data).
 * When new extensions are added that _needs_ to be understood in
 * order to correctly interpret the index file, pick character that
 * is outside the range, to cause the reader to abort.
 */

#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
#define CACHE_EXT_TREE 0x54524545	/* "TREE" */
23

24
struct index_state the_index;
25

Junio C Hamano's avatar
Junio C Hamano committed
26 27 28
static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{
	istate->cache[nr] = ce;
29
	add_name_hash(istate, ce);
Junio C Hamano's avatar
Junio C Hamano committed
30 31
}

32 33 34 35
static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{
	struct cache_entry *old = istate->cache[nr];

36
	remove_name_hash(old);
37
	set_index_entry(istate, nr, ce);
38 39 40
	istate->cache_changed = 1;
}

41 42 43 44 45 46 47
/*
 * This only updates the "non-critical" parts of the directory
 * cache, ie the parts that aren't tracked by GIT, and only used
 * to validate the cache.
 */
void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
{
48 49 50 51 52 53 54
	ce->ce_ctime = st->st_ctime;
	ce->ce_mtime = st->st_mtime;
	ce->ce_dev = st->st_dev;
	ce->ce_ino = st->st_ino;
	ce->ce_uid = st->st_uid;
	ce->ce_gid = st->st_gid;
	ce->ce_size = st->st_size;
Junio C Hamano's avatar
Junio C Hamano committed
55 56

	if (assume_unchanged)
57
		ce->ce_flags |= CE_VALID;
58 59 60

	if (S_ISREG(st->st_mode))
		ce_mark_uptodate(ce);
61 62
}

Junio C Hamano's avatar
Junio C Hamano committed
63 64 65 66 67 68 69
static int ce_compare_data(struct cache_entry *ce, struct stat *st)
{
	int match = -1;
	int fd = open(ce->name, O_RDONLY);

	if (fd >= 0) {
		unsigned char sha1[20];
70
		if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name))
71
			match = hashcmp(sha1, ce->sha1);
72
		/* index_fd() closed the file descriptor already */
Junio C Hamano's avatar
Junio C Hamano committed
73 74 75 76
	}
	return match;
}

77
static int ce_compare_link(struct cache_entry *ce, size_t expected_size)
Junio C Hamano's avatar
Junio C Hamano committed
78 79 80 81 82
{
	int match = -1;
	char *target;
	void *buffer;
	unsigned long size;
83
	enum object_type type;
Junio C Hamano's avatar
Junio C Hamano committed
84 85 86 87 88 89 90 91
	int len;

	target = xmalloc(expected_size);
	len = readlink(ce->name, target, expected_size);
	if (len != expected_size) {
		free(target);
		return -1;
	}
92
	buffer = read_sha1_file(ce->sha1, &type, &size);
Junio C Hamano's avatar
Junio C Hamano committed
93 94 95 96 97 98 99 100 101 102 103
	if (!buffer) {
		free(target);
		return -1;
	}
	if (size == expected_size)
		match = memcmp(buffer, target, size);
	free(buffer);
	free(target);
	return match;
}

104 105 106 107 108 109
static int ce_compare_gitlink(struct cache_entry *ce)
{
	unsigned char sha1[20];

	/*
	 * We don't actually require that the .git directory
Martin Waitz's avatar
Martin Waitz committed
110
	 * under GITLINK directory be a valid git directory. It
111 112 113 114 115 116 117 118 119 120
	 * might even be missing (in case nobody populated that
	 * sub-project).
	 *
	 * If so, we consider it always to match.
	 */
	if (resolve_gitlink_ref(ce->name, "HEAD", sha1) < 0)
		return 0;
	return hashcmp(sha1, ce->sha1);
}

Junio C Hamano's avatar
Junio C Hamano committed
121 122 123 124 125 126 127 128
static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
{
	switch (st->st_mode & S_IFMT) {
	case S_IFREG:
		if (ce_compare_data(ce, st))
			return DATA_CHANGED;
		break;
	case S_IFLNK:
129
		if (ce_compare_link(ce, xsize_t(st->st_size)))
Junio C Hamano's avatar
Junio C Hamano committed
130 131
			return DATA_CHANGED;
		break;
132
	case S_IFDIR:
133
		if (S_ISGITLINK(ce->ce_mode))
134
			return 0;
Junio C Hamano's avatar
Junio C Hamano committed
135 136 137 138 139 140
	default:
		return TYPE_CHANGED;
	}
	return 0;
}

141 142 143 144 145 146 147 148 149 150
static int is_empty_blob_sha1(const unsigned char *sha1)
{
	static const unsigned char empty_blob_sha1[20] = {
		0xe6,0x9d,0xe2,0x9b,0xb2,0xd1,0xd6,0x43,0x4b,0x8b,
		0x29,0xae,0x77,0x5a,0xd8,0xc2,0xe4,0x8c,0x53,0x91
	};

	return !hashcmp(sha1, empty_blob_sha1);
}

Junio C Hamano's avatar
Junio C Hamano committed
151
static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
152 153 154
{
	unsigned int changed = 0;

155 156 157 158
	if (ce->ce_flags & CE_REMOVE)
		return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED;

	switch (ce->ce_mode & S_IFMT) {
159 160
	case S_IFREG:
		changed |= !S_ISREG(st->st_mode) ? TYPE_CHANGED : 0;
Junio C Hamano's avatar
Junio C Hamano committed
161 162 163 164
		/* We consider only the owner x bit to be relevant for
		 * "mode changes"
		 */
		if (trust_executable_bit &&
165
		    (0100 & (ce->ce_mode ^ st->st_mode)))
166
			changed |= MODE_CHANGED;
167 168
		break;
	case S_IFLNK:
169 170 171
		if (!S_ISLNK(st->st_mode) &&
		    (has_symlinks || !S_ISREG(st->st_mode)))
			changed |= TYPE_CHANGED;
172
		break;
Martin Waitz's avatar
Martin Waitz committed
173
	case S_IFGITLINK:
174 175 176 177
		if (!S_ISDIR(st->st_mode))
			changed |= TYPE_CHANGED;
		else if (ce_compare_gitlink(ce))
			changed |= DATA_CHANGED;
178
		return changed;
179
	default:
180
		die("internal error: ce_mode is %o", ce->ce_mode);
181
	}
182
	if (ce->ce_mtime != (unsigned int) st->st_mtime)
183
		changed |= MTIME_CHANGED;
184
	if (ce->ce_ctime != (unsigned int) st->st_ctime)
185
		changed |= CTIME_CHANGED;
186

187 188
	if (ce->ce_uid != (unsigned int) st->st_uid ||
	    ce->ce_gid != (unsigned int) st->st_gid)
189
		changed |= OWNER_CHANGED;
190
	if (ce->ce_ino != (unsigned int) st->st_ino)
191
		changed |= INODE_CHANGED;
192 193 194 195 196 197 198

#ifdef USE_STDEV
	/*
	 * st_dev breaks on network filesystems where different
	 * clients will have different views of what "device"
	 * the filesystem is on
	 */
199
	if (ce->ce_dev != (unsigned int) st->st_dev)
200 201 202
		changed |= INODE_CHANGED;
#endif

203
	if (ce->ce_size != (unsigned int) st->st_size)
204
		changed |= DATA_CHANGED;
205

206 207 208 209 210 211
	/* Racily smudged entry? */
	if (!ce->ce_size) {
		if (!is_empty_blob_sha1(ce->sha1))
			changed |= DATA_CHANGED;
	}

Junio C Hamano's avatar
Junio C Hamano committed
212 213 214
	return changed;
}

215
static int is_racy_timestamp(const struct index_state *istate, struct cache_entry *ce)
216
{
217 218
	return (!S_ISGITLINK(ce->ce_mode) &&
		istate->timestamp &&
219 220 221
		((unsigned int)istate->timestamp) <= ce->ce_mtime);
}

222
int ie_match_stat(const struct index_state *istate,
223 224
		  struct cache_entry *ce, struct stat *st,
		  unsigned int options)
Junio C Hamano's avatar
Junio C Hamano committed
225
{
Junio C Hamano's avatar
Junio C Hamano committed
226
	unsigned int changed;
227 228
	int ignore_valid = options & CE_MATCH_IGNORE_VALID;
	int assume_racy_is_modified = options & CE_MATCH_RACY_IS_DIRTY;
Junio C Hamano's avatar
Junio C Hamano committed
229 230 231 232 233

	/*
	 * If it's marked as always valid in the index, it's
	 * valid whatever the checked-out copy says.
	 */
234
	if (!ignore_valid && (ce->ce_flags & CE_VALID))
Junio C Hamano's avatar
Junio C Hamano committed
235 236 237
		return 0;

	changed = ce_match_stat_basic(ce, st);
Junio C Hamano's avatar
Junio C Hamano committed
238

Junio C Hamano's avatar
Junio C Hamano committed
239 240 241 242 243 244 245 246 247 248 249 250 251 252
	/*
	 * Within 1 second of this sequence:
	 * 	echo xyzzy >file && git-update-index --add file
	 * running this command:
	 * 	echo frotz >file
	 * would give a falsely clean cache entry.  The mtime and
	 * length match the cache, and other stat fields do not change.
	 *
	 * We could detect this at update-index time (the cache entry
	 * being registered/updated records the same time as "now")
	 * and delay the return from git-update-index, but that would
	 * effectively mean we can make at most one commit per second,
	 * which is not acceptable.  Instead, we check cache entries
	 * whose mtime are the same as the index file timestamp more
Junio C Hamano's avatar
Junio C Hamano committed
253
	 * carefully than others.
Junio C Hamano's avatar
Junio C Hamano committed
254
	 */
255
	if (!changed && is_racy_timestamp(istate, ce)) {
256 257 258 259 260
		if (assume_racy_is_modified)
			changed |= DATA_CHANGED;
		else
			changed |= ce_modified_check_fs(ce, st);
	}
261

Junio C Hamano's avatar
Junio C Hamano committed
262
	return changed;
263 264
}

265
int ie_modified(const struct index_state *istate,
266
		struct cache_entry *ce, struct stat *st, unsigned int options)
267
{
Junio C Hamano's avatar
Junio C Hamano committed
268
	int changed, changed_fs;
269 270

	changed = ie_match_stat(istate, ce, st, options);
271 272 273 274 275 276 277 278 279 280 281 282 283
	if (!changed)
		return 0;
	/*
	 * If the mode or type has changed, there's no point in trying
	 * to refresh the entry - it's not going to match
	 */
	if (changed & (MODE_CHANGED | TYPE_CHANGED))
		return changed;

	/* Immediately after read-tree or update-index --cacheinfo,
	 * the length field is zero.  For other cases the ce_size
	 * should match the SHA1 recorded in the index entry.
	 */
284
	if ((changed & DATA_CHANGED) && ce->ce_size != 0)
285 286
		return changed;

Junio C Hamano's avatar
Junio C Hamano committed
287 288 289
	changed_fs = ce_modified_check_fs(ce, st);
	if (changed_fs)
		return changed | changed_fs;
290 291 292
	return 0;
}

293 294 295 296 297 298 299 300 301 302 303 304
int base_name_compare(const char *name1, int len1, int mode1,
		      const char *name2, int len2, int mode2)
{
	unsigned char c1, c2;
	int len = len1 < len2 ? len1 : len2;
	int cmp;

	cmp = memcmp(name1, name2, len);
	if (cmp)
		return cmp;
	c1 = name1[len];
	c2 = name2[len];
305
	if (!c1 && S_ISDIR(mode1))
306
		c1 = '/';
307
	if (!c2 && S_ISDIR(mode2))
308 309 310 311
		c2 = '/';
	return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
}

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
/*
 * df_name_compare() is identical to base_name_compare(), except it
 * compares conflicting directory/file entries as equal. Note that
 * while a directory name compares as equal to a regular file, they
 * then individually compare _differently_ to a filename that has
 * a dot after the basename (because '\0' < '.' < '/').
 *
 * This is used by routines that want to traverse the git namespace
 * but then handle conflicting entries together when possible.
 */
int df_name_compare(const char *name1, int len1, int mode1,
		    const char *name2, int len2, int mode2)
{
	int len = len1 < len2 ? len1 : len2, cmp;
	unsigned char c1, c2;

	cmp = memcmp(name1, name2, len);
	if (cmp)
		return cmp;
	/* Directories and files compare equal (same length, same name) */
	if (len1 == len2)
		return 0;
	c1 = name1[len];
	if (!c1 && S_ISDIR(mode1))
		c1 = '/';
	c2 = name2[len];
	if (!c2 && S_ISDIR(mode2))
		c2 = '/';
	if (c1 == '/' && !c2)
		return 0;
	if (c2 == '/' && !c1)
		return 0;
	return c1 - c2;
}

347
int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
348
{
349 350
	int len1 = flags1 & CE_NAMEMASK;
	int len2 = flags2 & CE_NAMEMASK;
351 352 353 354 355 356 357 358 359 360
	int len = len1 < len2 ? len1 : len2;
	int cmp;

	cmp = memcmp(name1, name2, len);
	if (cmp)
		return cmp;
	if (len1 < len2)
		return -1;
	if (len1 > len2)
		return 1;
Junio C Hamano's avatar
Junio C Hamano committed
361

362 363 364
	/* Compare stages  */
	flags1 &= CE_STAGEMASK;
	flags2 &= CE_STAGEMASK;
Junio C Hamano's avatar
Junio C Hamano committed
365

366 367 368 369
	if (flags1 < flags2)
		return -1;
	if (flags1 > flags2)
		return 1;
370 371 372
	return 0;
}

373
int index_name_pos(const struct index_state *istate, const char *name, int namelen)
374 375 376 377
{
	int first, last;

	first = 0;
378
	last = istate->cache_nr;
379 380
	while (last > first) {
		int next = (last + first) >> 1;
381
		struct cache_entry *ce = istate->cache[next];
382
		int cmp = cache_name_compare(name, namelen, ce->name, ce->ce_flags);
383
		if (!cmp)
384
			return next;
385 386 387 388 389 390
		if (cmp < 0) {
			last = next;
			continue;
		}
		first = next+1;
	}
391
	return -first-1;
392 393
}

394
/* Remove entry, return true if there are more entries to go.. */
395
int remove_index_entry_at(struct index_state *istate, int pos)
396
{
397 398
	struct cache_entry *ce = istate->cache[pos];

399
	remove_name_hash(ce);
400 401 402
	istate->cache_changed = 1;
	istate->cache_nr--;
	if (pos >= istate->cache_nr)
403
		return 0;
404 405 406
	memmove(istate->cache + pos,
		istate->cache + pos + 1,
		(istate->cache_nr - pos) * sizeof(struct cache_entry *));
407 408 409
	return 1;
}

410
int remove_file_from_index(struct index_state *istate, const char *path)
411
{
412
	int pos = index_name_pos(istate, path, strlen(path));
413 414
	if (pos < 0)
		pos = -pos-1;
Junio C Hamano's avatar
Junio C Hamano committed
415
	cache_tree_invalidate_path(istate->cache_tree, path);
416 417
	while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path))
		remove_index_entry_at(istate, pos);
418 419 420
	return 0;
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
static int compare_name(struct cache_entry *ce, const char *path, int namelen)
{
	return namelen != ce_namelen(ce) || memcmp(path, ce->name, namelen);
}

static int index_name_pos_also_unmerged(struct index_state *istate,
	const char *path, int namelen)
{
	int pos = index_name_pos(istate, path, namelen);
	struct cache_entry *ce;

	if (pos >= 0)
		return pos;

	/* maybe unmerged? */
	pos = -1 - pos;
	if (pos >= istate->cache_nr ||
			compare_name((ce = istate->cache[pos]), path, namelen))
		return -1;

	/* order of preference: stage 2, 1, 3 */
	if (ce_stage(ce) == 1 && pos + 1 < istate->cache_nr &&
			ce_stage((ce = istate->cache[pos + 1])) == 2 &&
			!compare_name(ce, path, namelen))
		pos++;
	return pos;
}

449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
static int different_name(struct cache_entry *ce, struct cache_entry *alias)
{
	int len = ce_namelen(ce);
	return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len);
}

/*
 * If we add a filename that aliases in the cache, we will use the
 * name that we already have - but we don't want to update the same
 * alias twice, because that implies that there were actually two
 * different files with aliasing names!
 *
 * So we use the CE_ADDED flag to verify that the alias was an old
 * one before we accept it as
 */
static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_entry *alias)
{
	int len;
	struct cache_entry *new;

	if (alias->ce_flags & CE_ADDED)
		die("Will not add file alias '%s' ('%s' already exists in index)", ce->name, alias->name);

	/* Ok, create the new entry using the name of the existing alias */
	len = ce_namelen(alias);
	new = xcalloc(1, cache_entry_size(len));
	memcpy(new->name, alias->name, len);
	copy_cache_entry(new, ce);
	free(ce);
	return new;
}

481
int add_to_index(struct index_state *istate, const char *path, struct stat *st, int flags)
Johannes Schindelin's avatar
Johannes Schindelin committed
482
{
483
	int size, namelen, was_same;
484
	mode_t st_mode = st->st_mode;
485
	struct cache_entry *ce, *alias;
486
	unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY;
487 488
	int verbose = flags & (ADD_CACHE_VERBOSE | ADD_CACHE_PRETEND);
	int pretend = flags & ADD_CACHE_PRETEND;
Johannes Schindelin's avatar
Johannes Schindelin committed
489

490
	if (!S_ISREG(st_mode) && !S_ISLNK(st_mode) && !S_ISDIR(st_mode))
491
		return error("%s: can only add regular files, symbolic links or git-directories", path);
Johannes Schindelin's avatar
Johannes Schindelin committed
492 493

	namelen = strlen(path);
494
	if (S_ISDIR(st_mode)) {
495 496 497
		while (namelen && path[namelen-1] == '/')
			namelen--;
	}
Johannes Schindelin's avatar
Johannes Schindelin committed
498 499 500
	size = cache_entry_size(namelen);
	ce = xcalloc(1, size);
	memcpy(ce->name, path, namelen);
501
	ce->ce_flags = namelen;
502
	fill_stat_cache_info(ce, st);
Johannes Schindelin's avatar
Johannes Schindelin committed
503

504
	if (trust_executable_bit && has_symlinks)
505
		ce->ce_mode = create_ce_mode(st_mode);
506
	else {
507 508
		/* If there is an existing entry, pick the mode bits and type
		 * from it, otherwise assume unexecutable regular file.
Johannes Schindelin's avatar
Johannes Schindelin committed
509
		 */
510
		struct cache_entry *ent;
511
		int pos = index_name_pos_also_unmerged(istate, path, namelen);
512

513
		ent = (0 <= pos) ? istate->cache[pos] : NULL;
514
		ce->ce_mode = ce_mode_from_stat(ent, st_mode);
Johannes Schindelin's avatar
Johannes Schindelin committed
515 516
	}

517
	alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case);
518
	if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, st, ce_option)) {
519 520
		/* Nothing changed, really */
		free(ce);
521
		ce_mark_uptodate(alias);
522
		alias->ce_flags |= CE_ADDED;
523 524
		return 0;
	}
525
	if (index_path(ce->sha1, path, st, 1))
526
		return error("unable to index file %s", path);
527 528 529
	if (ignore_case && alias && different_name(ce, alias))
		ce = create_alias_ce(ce, alias);
	ce->ce_flags |= CE_ADDED;
530 531 532 533 534 535 536 537 538 539

	/* It was suspected to be recily clean, but it turns out to be Ok */
	was_same = (alias &&
		    !ce_stage(alias) &&
		    !hashcmp(alias->sha1, ce->sha1) &&
		    ce->ce_mode == alias->ce_mode);

	if (pretend)
		;
	else if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE))
540
		return error("unable to add %s to index",path);
541
	if (verbose && !was_same)
Johannes Schindelin's avatar
Johannes Schindelin committed
542 543 544 545
		printf("add '%s'\n", path);
	return 0;
}

546
int add_file_to_index(struct index_state *istate, const char *path, int flags)
547 548 549 550
{
	struct stat st;
	if (lstat(path, &st))
		die("%s: unable to stat (%s)", path, strerror(errno));
551
	return add_to_index(istate, path, &st, flags);
552 553
}

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
struct cache_entry *make_cache_entry(unsigned int mode,
		const unsigned char *sha1, const char *path, int stage,
		int refresh)
{
	int size, len;
	struct cache_entry *ce;

	if (!verify_path(path))
		return NULL;

	len = strlen(path);
	size = cache_entry_size(len);
	ce = xcalloc(1, size);

	hashcpy(ce->sha1, sha1);
	memcpy(ce->name, path, len);
	ce->ce_flags = create_ce_flags(len, stage);
	ce->ce_mode = create_ce_mode(mode);

	if (refresh)
		return refresh_cache_entry(ce, 0);

	return ce;
}

579
int ce_same_name(struct cache_entry *a, struct cache_entry *b)
580 581 582 583 584
{
	int len = ce_namelen(a);
	return ce_namelen(b) == len && !memcmp(a->name, b->name, len);
}

585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
int ce_path_match(const struct cache_entry *ce, const char **pathspec)
{
	const char *match, *name;
	int len;

	if (!pathspec)
		return 1;

	len = ce_namelen(ce);
	name = ce->name;
	while ((match = *pathspec++) != NULL) {
		int matchlen = strlen(match);
		if (matchlen > len)
			continue;
		if (memcmp(name, match, matchlen))
			continue;
		if (matchlen && name[matchlen-1] == '/')
			return 1;
		if (name[matchlen] == '/' || !name[matchlen])
			return 1;
605 606
		if (!matchlen)
			return 1;
607 608 609 610
	}
	return 0;
}

611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
/*
 * We fundamentally don't like some paths: we don't want
 * dot or dot-dot anywhere, and for obvious reasons don't
 * want to recurse into ".git" either.
 *
 * Also, we don't want double slashes or slashes at the
 * end that can make pathnames ambiguous.
 */
static int verify_dotfile(const char *rest)
{
	/*
	 * The first character was '.', but that
	 * has already been discarded, we now test
	 * the rest.
	 */
	switch (*rest) {
	/* "." is not allowed */
	case '\0': case '/':
		return 0;

	/*
	 * ".git" followed by  NUL or slash is bad. This
	 * shares the path end test with the ".." case.
	 */
	case 'g':
		if (rest[1] != 'i')
			break;
		if (rest[2] != 't')
			break;
		rest += 2;
	/* fallthrough */
	case '.':
		if (rest[1] == '\0' || rest[1] == '/')
			return 0;
	}
	return 1;
}

int verify_path(const char *path)
{
	char c;

	goto inside;
	for (;;) {
		if (!c)
			return 1;
		if (c == '/') {
inside:
			c = *path++;
			switch (c) {
			default:
				continue;
			case '/': case '\0':
				break;
			case '.':
				if (verify_dotfile(path))
					continue;
			}
			return 0;
		}
		c = *path++;
	}
}

675 676 677
/*
 * Do we have another file that has the beginning components being a
 * proper superset of the name we're trying to add?
678
 */
679 680
static int has_file_name(struct index_state *istate,
			 const struct cache_entry *ce, int pos, int ok_to_replace)
681
{
682 683
	int retval = 0;
	int len = ce_namelen(ce);
684
	int stage = ce_stage(ce);
685
	const char *name = ce->name;
686

687 688
	while (pos < istate->cache_nr) {
		struct cache_entry *p = istate->cache[pos++];
689

690
		if (len >= ce_namelen(p))
691
			break;
692 693
		if (memcmp(name, p->name, len))
			break;
694 695
		if (ce_stage(p) != stage)
			continue;
696 697
		if (p->name[len] != '/')
			continue;
698
		if (p->ce_flags & CE_REMOVE)
699
			continue;
700 701 702
		retval = -1;
		if (!ok_to_replace)
			break;
703
		remove_index_entry_at(istate, --pos);
704
	}
705 706
	return retval;
}
707

708 709 710 711
/*
 * Do we have another file with a pathname that is a proper
 * subset of the name we're trying to add?
 */
712 713
static int has_dir_name(struct index_state *istate,
			const struct cache_entry *ce, int pos, int ok_to_replace)
714 715
{
	int retval = 0;
716
	int stage = ce_stage(ce);
717 718
	const char *name = ce->name;
	const char *slash = name + ce_namelen(ce);
719

720 721
	for (;;) {
		int len;
722

723 724 725 726 727 728 729
		for (;;) {
			if (*--slash == '/')
				break;
			if (slash <= ce->name)
				return retval;
		}
		len = slash - name;
730

731
		pos = index_name_pos(istate, name, create_ce_flags(len, stage));
732
		if (pos >= 0) {
733 734 735 736 737 738 739 740
			/*
			 * Found one, but not so fast.  This could
			 * be a marker that says "I was here, but
			 * I am being removed".  Such an entry is
			 * not a part of the resulting tree, and
			 * it is Ok to have a directory at the same
			 * path.
			 */
741
			if (!(istate->cache[pos]->ce_flags & CE_REMOVE)) {
742 743 744
				retval = -1;
				if (!ok_to_replace)
					break;
745
				remove_index_entry_at(istate, pos);
746 747
				continue;
			}
748
		}
749 750
		else
			pos = -pos-1;
751 752 753 754

		/*
		 * Trivial optimization: if we find an entry that
		 * already matches the sub-directory, then we know
755
		 * we're ok, and we can exit.
756
		 */
757 758
		while (pos < istate->cache_nr) {
			struct cache_entry *p = istate->cache[pos];
759 760 761 762
			if ((ce_namelen(p) <= len) ||
			    (p->name[len] != '/') ||
			    memcmp(p->name, name, len))
				break; /* not our subdirectory */
763 764 765
			if (ce_stage(p) == stage && !(p->ce_flags & CE_REMOVE))
				/*
				 * p is at the same stage as our entry, and
766 767 768 769 770 771
				 * is a subdirectory of what we are looking
				 * at, so we cannot have conflicts at our
				 * level or anything shorter.
				 */
				return retval;
			pos++;
772
		}
773
	}
774 775 776 777 778 779 780
	return retval;
}

/* We may be in a situation where we already have path/file and path
 * is being added, or we already have path and path/file is being
 * added.  Either one would result in a nonsense tree that has path
 * twice when git-write-tree tries to write it out.  Prevent it.
Junio C Hamano's avatar
Junio C Hamano committed
781
 *
782 783 784 785
 * If ok-to-replace is specified, we remove the conflicting entries
 * from the cache so the caller should recompute the insert position.
 * When this happens, we return non-zero.
 */
786 787 788
static int check_file_directory_conflict(struct index_state *istate,
					 const struct cache_entry *ce,
					 int pos, int ok_to_replace)
789
{
790 791 792 793 794
	int retval;

	/*
	 * When ce is an "I am going away" entry, we allow it to be added
	 */
795
	if (ce->ce_flags & CE_REMOVE)
796 797
		return 0;

798 799 800
	/*
	 * We check if the path is a sub-path of a subsequent pathname
	 * first, since removing those will not change the position
801
	 * in the array.
802
	 */
803
	retval = has_file_name(istate, ce, pos, ok_to_replace);
804

805 806 807 808
	/*
	 * Then check if the path might have a clashing sub-directory
	 * before it.
	 */
809
	return retval + has_dir_name(istate, ce, pos, ok_to_replace);
810 811
}

812
static int add_index_entry_with_check(struct index_state *istate, struct cache_entry *ce, int option)
813 814
{
	int pos;
815 816
	int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
	int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
817
	int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK;
Junio C Hamano's avatar
Junio C Hamano committed
818

Junio C Hamano's avatar
Junio C Hamano committed
819
	cache_tree_invalidate_path(istate->cache_tree, ce->name);
820
	pos = index_name_pos(istate, ce->name, ce->ce_flags);
821

Junio C Hamano's avatar
Junio C Hamano committed
822
	/* existing match? Just replace it. */
823
	if (pos >= 0) {
824
		replace_index_entry(istate, pos, ce);
825 826
		return 0;
	}
827
	pos = -pos-1;
828

829 830 831 832
	/*
	 * Inserting a merged entry ("stage 0") into the index
	 * will always replace all non-merged entries..
	 */
833 834
	if (pos < istate->cache_nr && ce_stage(ce) == 0) {
		while (ce_same_name(istate->cache[pos], ce)) {
835
			ok_to_add = 1;
836
			if (!remove_index_entry_at(istate, pos))
837 838 839 840
				break;
		}
	}

841 842
	if (!ok_to_add)
		return -1;
843 844
	if (!verify_path(ce->name))
		return -1;
845

Junio C Hamano's avatar
Junio C Hamano committed
846
	if (!skip_df_check &&
847
	    check_file_directory_conflict(istate, ce, pos, ok_to_replace)) {
848
		if (!ok_to_replace)
849 850
			return error("'%s' appears as both a file and as a directory",
				     ce->name);
851
		pos = index_name_pos(istate, ce->name, ce->ce_flags);
852 853
		pos = -pos-1;
	}
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
	return pos + 1;
}

int add_index_entry(struct index_state *istate, struct cache_entry *ce, int option)
{
	int pos;

	if (option & ADD_CACHE_JUST_APPEND)
		pos = istate->cache_nr;
	else {
		int ret;
		ret = add_index_entry_with_check(istate, ce, option);
		if (ret <= 0)
			return ret;
		pos = ret - 1;
	}
870

871
	/* Make sure the array is big enough .. */
872 873 874 875
	if (istate->cache_nr == istate->cache_alloc) {
		istate->cache_alloc = alloc_nr(istate->cache_alloc);
		istate->cache = xrealloc(istate->cache,
					istate->cache_alloc * sizeof(struct cache_entry *));
876 877 878
	}

	/* Add it in.. */
879
	istate->cache_nr++;
880
	if (istate->cache_nr > pos + 1)
881 882 883
		memmove(istate->cache + pos + 1,
			istate->cache + pos,
			(istate->cache_nr - pos - 1) * sizeof(ce));
884
	set_index_entry(istate, pos, ce);
885
	istate->cache_changed = 1;
886 887 888
	return 0;
}

889 890 891 892 893 894 895 896 897 898 899
/*
 * "refresh" does not calculate a new sha1 file or bring the
 * cache up-to-date for mode/content changes. But what it
 * _does_ do is to "re-match" the stat information of a file
 * with the cache, so that you can refresh the cache for a
 * file that hasn't been changed but where the stat entry is
 * out of date.
 *
 * For example, you'd want to do this after doing a "git-read-tree",
 * to link up the stat cache details with the proper files.
 */
900
static struct cache_entry *refresh_cache_ent(struct index_state *istate,
901 902
					     struct cache_entry *ce,
					     unsigned int options, int *err)
903 904 905 906
{
	struct stat st;
	struct cache_entry *updated;
	int changed, size;
907
	int ignore_valid = options & CE_MATCH_IGNORE_VALID;
908

909 910 911
	if (ce_uptodate(ce))
		return ce;

912 913 914 915 916 917 918 919 920
	/*
	 * CE_VALID means the user promised us that the change to
	 * the work tree does not matter and told us not to worry.
	 */
	if (!ignore_valid && (ce->ce_flags & CE_VALID)) {
		ce_mark_uptodate(ce);
		return ce;
	}

921
	if (lstat(ce->name, &st) < 0) {
922 923
		if (err)
			*err = errno;
924 925
		return NULL;
	}
926

927
	changed = ie_match_stat(istate, ce, &st, options);
928
	if (!changed) {
929 930 931 932 933 934 935 936
		/*
		 * The path is unchanged.  If we were told to ignore
		 * valid bit, then we did the actual stat check and
		 * found that the entry is unmodified.  If the entry
		 * is not marked VALID, this is the place to mark it
		 * valid again, under "assume unchanged" mode.
		 */
		if (ignore_valid && assume_unchanged &&
937
		    !(ce->ce_flags & CE_VALID))
938
			; /* mark this one VALID again */
939 940 941 942 943 944 945
		else {
			/*
			 * We do not mark the index itself "modified"
			 * because CE_UPTODATE flag is in-core only;
			 * we are not going to write this change out.
			 */
			ce_mark_uptodate(ce);
946
			return ce;
947
		}
948 949
	}

950
	if (ie_modified(istate, ce, &st, options)) {
951 952
		if (err)
			*err = EINVAL;
953 954
		return NULL;
	}
955 956 957 958 959

	size = ce_size(ce);
	updated = xmalloc(size);
	memcpy(updated, ce, size);
	fill_stat_cache_info(updated, &st);
960 961 962 963 964
	/*
	 * If ignore_valid is not set, we should leave CE_VALID bit
	 * alone.  Otherwise, paths marked with --no-assume-unchanged
	 * (i.e. things to be edited) will reacquire CE_VALID bit
	 * automatically, which is not really what we want.
965
	 */
966
	if (!ignore_valid && assume_unchanged &&
967 968
	    !(ce->ce_flags & CE_VALID))
		updated->ce_flags &= ~CE_VALID;
969 970 971 972

	return updated;
}

973
int refresh_index(struct index_state *istate, unsigned int flags, const char **pathspec, char *seen)
974 975 976 977 978 979 980
{
	int i;
	int has_errors = 0;
	int really = (flags & REFRESH_REALLY) != 0;
	int allow_unmerged = (flags & REFRESH_UNMERGED) != 0;
	int quiet = (flags & REFRESH_QUIET) != 0;
	int not_new = (flags & REFRESH_IGNORE_MISSING) != 0;
981
	int ignore_submodules = (flags & REFRESH_IGNORE_SUBMODULES) != 0;
982
	unsigned int options = really ? CE_MATCH_IGNORE_VALID : 0;
983

984
	for (i = 0; i < istate->cache_nr; i++) {
985
		struct cache_entry *ce, *new;
986 987
		int cache_errno = 0;

988
		ce = istate->cache[i];
989 990 991
		if (ignore_submodules && S_ISGITLINK(ce->ce_mode))
			continue;

992
		if (ce_stage(ce)) {
993 994
			while ((i < istate->cache_nr) &&
			       ! strcmp(istate->cache[i]->name, ce->name))
995 996 997 998 999 1000 1001 1002 1003
				i++;
			i--;
			if (allow_unmerged)
				continue;
			printf("%s: needs merge\n", ce->name);
			has_errors = 1;
			continue;
		}

1004 1005 1006
		if (pathspec && !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen))
			continue;

1007
		new = refresh_cache_ent(istate, ce, options, &cache_errno);
1008
		if (new == ce)
1009
			continue;
1010 1011
		if (!new) {
			if (not_new && cache_errno == ENOENT)
1012
				continue;
1013
			if (really && cache_errno == EINVAL) {
1014 1015 1016
				/* If we are doing --really-refresh that
				 * means the index is not valid anymore.
				 */
1017
				ce->ce_flags &= ~CE_VALID;
1018
				istate->cache_changed = 1;
1019 1020 1021 1022 1023 1024 1025
			}
			if (quiet)
				continue;
			printf("%s: needs update\n", ce->name);
			has_errors = 1;
			continue;
		}
1026 1027

		replace_index_entry(istate, i, new);
1028 1029 1030 1031
	}
	return has_errors;
}

1032 1033
struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really)
{
1034
	return refresh_cache_ent(&the_index, ce, really, NULL);
1035 1036
}

1037
static int verify_hdr(struct cache_header *hdr, unsigned long size)
1038 1039
{
	SHA_CTX c;
1040
	unsigned char sha1[20];
1041

1042
	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
1043
		return error("bad signature");
1044 1045
	if (hdr->hdr_version != htonl(2))
		return error("bad index version");
1046
	SHA1_Init(&c);
1047
	SHA1_Update(&c, hdr, size - 20);
1048
	SHA1_Final(sha1, &c);
1049
	if (hashcmp(sha1, (unsigned char *)hdr + size - 20))
1050
		return error("bad index file sha1 signature");
1051 1052 1053
	return 0;
}

1054 1055
static int read_index_extension(struct index_state *istate,
				const char *ext, void *data, unsigned long sz)
1056 1057 1058
{
	switch (CACHE_EXT(ext)) {
	case CACHE_EXT_TREE:
1059
		istate->cache_tree = cache_tree_read(data, sz);
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
		break;
	default:
		if (*ext < 'A' || 'Z' < *ext)
			return error("index uses %.4s extension, which we do not understand",
				     ext);
		fprintf(stderr, "ignoring %.4s extension\n", ext);
		break;
	}
	return 0;
}

1071
int read_index(struct index_state *istate)
1072
{
1073
	return read_index_from(istate, get_index_file());
1074 1075
}

1076 1077
static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce)
{
1078 1079
	size_t len;

1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
	ce->ce_ctime = ntohl(ondisk->ctime.sec);
	ce->ce_mtime = ntohl(ondisk->mtime.sec);
	ce->ce_dev   = ntohl(ondisk->dev);
	ce->ce_ino   = ntohl(ondisk->ino);
	ce->ce_mode  = ntohl(ondisk->mode);
	ce->ce_uid   = ntohl(ondisk->uid);
	ce->ce_gid   = ntohl(ondisk->gid);
	ce->ce_size  = ntohl(ondisk->size);
	/* On-disk flags are just 16 bits */
	ce->ce_flags = ntohs(ondisk->flags);
	hashcpy(ce->sha1, ondisk->sha1);
1091 1092 1093 1094 1095 1096 1097 1098 1099

	len = ce->ce_flags & CE_NAMEMASK;
	if (len == CE_NAMEMASK)
		len = strlen(ondisk->name);
	/*
	 * NEEDSWORK: If the original index is crafted, this copy could
	 * go unchecked.
	 */
	memcpy(ce->name, ondisk->name, len + 1);
1100 1101
}

1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
static inline size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
{
	long per_entry;

	per_entry = sizeof(struct cache_entry) - sizeof(struct ondisk_cache_entry);

	/*
	 * Alignment can cause differences. This should be "alignof", but
	 * since that's a gcc'ism, just use the size of a pointer.
	 */
	per_entry += sizeof(void *);
	return ondisk_size + entries*per_entry;
}

1116
/* remember to discard_cache() before reading a different cache! */
1117
int read_index_from(struct index_state *istate, const char *path)
1118 1119 1120
{
	int fd, i;
	struct stat st;
1121
	unsigned long src_offset, dst_offset;
1122
	struct cache_header *hdr;
1123 1124
	void *mmap;
	size_t mmap_size;
1125 1126

	errno = EBUSY;
1127
	if (istate->alloc)
1128
		return istate->cache_nr;
1129

1130
	errno = ENOENT;
1131
	istate->timestamp = 0;
1132
	fd = open(path, O_RDONLY);
1133 1134 1135 1136 1137
	if (fd < 0) {
		if (errno == ENOENT)
			return 0;
		die("index file open failed (%s)", strerror(errno));
	}
1138

1139
	if (fstat(fd, &st))
1140
		die("cannot stat the open index (%s)", strerror(errno));
1141 1142

	errno = EINVAL;
1143 1144
	mmap_size = xsize_t(st.st_size);
	if (mmap_size < sizeof(struct cache_header) + 20)
1145 1146
		die("index file smaller than expected");

1147
	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1148
	close(fd);
1149 1150
	if (mmap == MAP_FAILED)
		die("unable to map index file");
1151

1152 1153
	hdr = mmap;
	if (verify_hdr(hdr, mmap_size) < 0)
1154 1155
		goto unmap;

1156 1157 1158
	istate->cache_nr = ntohl(hdr->hdr_entries);
	istate->cache_alloc = alloc_nr(istate->cache_nr);
	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
1159

1160 1161 1162 1163 1164 1165
	/*
	 * The disk format is actually larger than the in-memory format,
	 * due to space for nsec etc, so even though the in-memory one
	 * has room for a few  more flags, we can allocate using the same
	 * index size
	 */
1166
	istate->alloc = xmalloc(estimate_cache_size(mmap_size, istate->cache_nr));
1167 1168 1169

	src_offset = sizeof(*hdr);
	dst_offset = 0;
1170
	for (i = 0; i < istate->cache_nr; i++) {
1171
		struct ondisk_cache_entry *disk_ce;
1172 1173
		struct cache_entry *ce;

1174 1175 1176
		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
		ce = (struct cache_entry *)((char *)istate->alloc + dst_offset);
		convert_from_disk(disk_ce