split-index.c 13.6 KB
Newer Older
1 2
#include "cache.h"
#include "split-index.h"
3
#include "ewah/ewok.h"
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

struct split_index *init_split_index(struct index_state *istate)
{
	if (!istate->split_index) {
		istate->split_index = xcalloc(1, sizeof(*istate->split_index));
		istate->split_index->refcount = 1;
	}
	return istate->split_index;
}

int read_link_extension(struct index_state *istate,
			 const void *data_, unsigned long sz)
{
	const unsigned char *data = data_;
	struct split_index *si;
19 20
	int ret;

21
	if (sz < the_hash_algo->rawsz)
22 23
		return error("corrupt link extension (too short)");
	si = init_split_index(istate);
24 25 26
	hashcpy(si->base_oid.hash, data);
	data += the_hash_algo->rawsz;
	sz -= the_hash_algo->rawsz;
27 28 29 30 31 32 33 34 35 36 37 38 39
	if (!sz)
		return 0;
	si->delete_bitmap = ewah_new();
	ret = ewah_read_mmap(si->delete_bitmap, data, sz);
	if (ret < 0)
		return error("corrupt delete bitmap in link extension");
	data += ret;
	sz -= ret;
	si->replace_bitmap = ewah_new();
	ret = ewah_read_mmap(si->replace_bitmap, data, sz);
	if (ret < 0)
		return error("corrupt replace bitmap in link extension");
	if (ret != sz)
40 41 42 43 44 45 46 47
		return error("garbage at the end of link extension");
	return 0;
}

int write_link_extension(struct strbuf *sb,
			 struct index_state *istate)
{
	struct split_index *si = istate->split_index;
48
	strbuf_add(sb, si->base_oid.hash, the_hash_algo->rawsz);
49 50
	if (!si->delete_bitmap && !si->replace_bitmap)
		return 0;
51 52
	ewah_serialize_strbuf(si->delete_bitmap, sb);
	ewah_serialize_strbuf(si->replace_bitmap, sb);
53 54 55 56 57 58 59 60 61 62
	return 0;
}

static void mark_base_index_entries(struct index_state *base)
{
	int i;
	/*
	 * To keep track of the shared entries between
	 * istate->base->cache[] and istate->cache[], base entry
	 * position is stored in each base entry. All positions start
63
	 * from 1 instead of 0, which is reserved to say "this is a new
64 65 66 67 68 69
	 * entry".
	 */
	for (i = 0; i < base->cache_nr; i++)
		base->cache[i]->index = i + 1;
}

70 71 72 73 74 75
void move_cache_to_base_index(struct index_state *istate)
{
	struct split_index *si = istate->split_index;
	int i;

	/*
76 77
	 * If there was a previous base index, then transfer ownership of allocated
	 * entries to the parent index.
78
	 */
79 80 81 82 83 84 85 86 87
	if (si->base &&
		si->base->ce_mem_pool) {

		if (!istate->ce_mem_pool)
			mem_pool_init(&istate->ce_mem_pool, 0);

		mem_pool_combine(istate->ce_mem_pool, istate->split_index->base->ce_mem_pool);
	}

88 89 90 91 92 93
	si->base = xcalloc(1, sizeof(*si->base));
	si->base->version = istate->version;
	/* zero timestamp disables racy test in ce_write_index() */
	si->base->timestamp = istate->timestamp;
	ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
	si->base->cache_nr = istate->cache_nr;
94 95 96 97 98 99 100

	/*
	 * The mem_pool needs to move with the allocated entries.
	 */
	si->base->ce_mem_pool = istate->ce_mem_pool;
	istate->ce_mem_pool = NULL;

René Scharfe's avatar
René Scharfe committed
101
	COPY_ARRAY(si->base->cache, istate->cache, istate->cache_nr);
102 103 104 105 106
	mark_base_index_entries(si->base);
	for (i = 0; i < si->base->cache_nr; i++)
		si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
}

107 108 109 110 111 112 113
static void mark_entry_for_delete(size_t pos, void *data)
{
	struct index_state *istate = data;
	if (pos >= istate->cache_nr)
		die("position for delete %d exceeds base index size %d",
		    (int)pos, istate->cache_nr);
	istate->cache[pos]->ce_flags |= CE_REMOVE;
114
	istate->split_index->nr_deletions++;
115 116 117 118 119 120 121
}

static void replace_entry(size_t pos, void *data)
{
	struct index_state *istate = data;
	struct split_index *si = istate->split_index;
	struct cache_entry *dst, *src;
122

123 124 125 126 127 128 129 130 131 132 133
	if (pos >= istate->cache_nr)
		die("position for replacement %d exceeds base index size %d",
		    (int)pos, istate->cache_nr);
	if (si->nr_replacements >= si->saved_cache_nr)
		die("too many replacements (%d vs %d)",
		    si->nr_replacements, si->saved_cache_nr);
	dst = istate->cache[pos];
	if (dst->ce_flags & CE_REMOVE)
		die("entry %d is marked as both replaced and deleted",
		    (int)pos);
	src = si->saved_cache[si->nr_replacements];
134 135 136
	if (ce_namelen(src))
		die("corrupt link extension, entry %d should have "
		    "zero length name", (int)pos);
137 138
	src->index = pos + 1;
	src->ce_flags |= CE_UPDATE_IN_BASE;
139 140
	src->ce_namelen = dst->ce_namelen;
	copy_cache_entry(dst, src);
141
	discard_cache_entry(src);
142 143 144
	si->nr_replacements++;
}

145 146 147
void merge_base_index(struct index_state *istate)
{
	struct split_index *si = istate->split_index;
148
	unsigned int i;
149 150

	mark_base_index_entries(si->base);
151 152 153 154 155 156

	si->saved_cache	    = istate->cache;
	si->saved_cache_nr  = istate->cache_nr;
	istate->cache_nr    = si->base->cache_nr;
	istate->cache	    = NULL;
	istate->cache_alloc = 0;
157
	ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
René Scharfe's avatar
René Scharfe committed
158
	COPY_ARRAY(istate->cache, si->base->cache, istate->cache_nr);
159 160 161 162 163 164

	si->nr_deletions = 0;
	si->nr_replacements = 0;
	ewah_each_bit(si->replace_bitmap, replace_entry, istate);
	ewah_each_bit(si->delete_bitmap, mark_entry_for_delete, istate);
	if (si->nr_deletions)
165
		remove_marked_cache_entries(istate, 0);
166 167

	for (i = si->nr_replacements; i < si->saved_cache_nr; i++) {
168 169 170
		if (!ce_namelen(si->saved_cache[i]))
			die("corrupt link extension, entry %d should "
			    "have non-zero length name", i);
171 172
		add_index_entry(istate, si->saved_cache[i],
				ADD_CACHE_OK_TO_ADD |
173
				ADD_CACHE_KEEP_CACHE_TREE |
174 175 176 177 178 179 180 181 182 183 184
				/*
				 * we may have to replay what
				 * merge-recursive.c:update_stages()
				 * does, which has this flag on
				 */
				ADD_CACHE_SKIP_DFCHECK);
		si->saved_cache[i] = NULL;
	}

	ewah_free(si->delete_bitmap);
	ewah_free(si->replace_bitmap);
185
	FREE_AND_NULL(si->saved_cache);
186 187 188
	si->delete_bitmap  = NULL;
	si->replace_bitmap = NULL;
	si->saved_cache_nr = 0;
189 190
}

191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
/*
 * Compare most of the fields in two cache entries, i.e. all except the
 * hashmap_entry and the name.
 */
static int compare_ce_content(struct cache_entry *a, struct cache_entry *b)
{
	const unsigned int ondisk_flags = CE_STAGEMASK | CE_VALID |
					  CE_EXTENDED_FLAGS;
	unsigned int ce_flags = a->ce_flags;
	unsigned int base_flags = b->ce_flags;
	int ret;

	/* only on-disk flags matter */
	a->ce_flags &= ondisk_flags;
	b->ce_flags &= ondisk_flags;
	ret = memcmp(&a->ce_stat_data, &b->ce_stat_data,
		     offsetof(struct cache_entry, name) -
		     offsetof(struct cache_entry, ce_stat_data));
	a->ce_flags = ce_flags;
	b->ce_flags = base_flags;

	return ret;
}

215 216 217
void prepare_to_write_split_index(struct index_state *istate)
{
	struct split_index *si = init_split_index(istate);
218 219 220 221 222 223 224 225 226 227
	struct cache_entry **entries = NULL, *ce;
	int i, nr_entries = 0, nr_alloc = 0;

	si->delete_bitmap = ewah_new();
	si->replace_bitmap = ewah_new();

	if (si->base) {
		/* Go through istate->cache[] and mark CE_MATCHED to
		 * entry with positive index. We'll go through
		 * base->cache[] later to delete all entries in base
228
		 * that are not marked with either CE_MATCHED or
229 230 231 232 233 234
		 * CE_UPDATE_IN_BASE. If istate->cache[i] is a
		 * duplicate, deduplicate it.
		 */
		for (i = 0; i < istate->cache_nr; i++) {
			struct cache_entry *base;
			ce = istate->cache[i];
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
			if (!ce->index) {
				/*
				 * During simple update index operations this
				 * is a cache entry that is not present in
				 * the shared index.  It will be added to the
				 * split index.
				 *
				 * However, it might also represent a file
				 * that already has a cache entry in the
				 * shared index, but a new index has just
				 * been constructed by unpack_trees(), and
				 * this entry now refers to different content
				 * than what was recorded in the original
				 * index, e.g. during 'read-tree -m HEAD^' or
				 * 'checkout HEAD^'.  In this case the
				 * original entry in the shared index will be
				 * marked as deleted, and this entry will be
				 * added to the split index.
				 */
254
				continue;
255
			}
256
			if (ce->index > si->base->cache_nr) {
257 258
				BUG("ce refers to a shared ce at %d, which is beyond the shared index size %d",
				    ce->index, si->base->cache_nr);
259 260 261
			}
			ce->ce_flags |= CE_MATCHED; /* or "shared" */
			base = si->base->cache[ce->index - 1];
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
			if (ce == base) {
				/* The entry is present in the shared index. */
				if (ce->ce_flags & CE_UPDATE_IN_BASE) {
					/*
					 * Already marked for inclusion in
					 * the split index, either because
					 * the corresponding file was
					 * modified and the cached stat data
					 * was refreshed, or because there
					 * is already a replacement entry in
					 * the split index.
					 * Nothing more to do here.
					 */
				} else if (!ce_uptodate(ce) &&
					   is_racy_timestamp(istate, ce)) {
					/*
					 * A racily clean cache entry stored
					 * only in the shared index: it must
					 * be added to the split index, so
					 * the subsequent do_write_index()
					 * can smudge its stat data.
					 */
					ce->ce_flags |= CE_UPDATE_IN_BASE;
				} else {
					/*
					 * The entry is only present in the
					 * shared index and it was not
					 * refreshed.
					 * Just leave it there.
					 */
				}
293
				continue;
294
			}
295 296 297 298 299
			if (ce->ce_namelen != base->ce_namelen ||
			    strcmp(ce->name, base->name)) {
				ce->index = 0;
				continue;
			}
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
			/*
			 * This is the copy of a cache entry that is present
			 * in the shared index, created by unpack_trees()
			 * while it constructed a new index.
			 */
			if (ce->ce_flags & CE_UPDATE_IN_BASE) {
				/*
				 * Already marked for inclusion in the split
				 * index, either because the corresponding
				 * file was modified and the cached stat data
				 * was refreshed, or because the original
				 * entry already had a replacement entry in
				 * the split index.
				 * Nothing to do.
				 */
315 316 317 318 319 320 321 322 323
			} else if (!ce_uptodate(ce) &&
				   is_racy_timestamp(istate, ce)) {
				/*
				 * A copy of a racily clean cache entry from
				 * the shared index.  It must be added to
				 * the split index, so the subsequent
				 * do_write_index() can smudge its stat data.
				 */
				ce->ce_flags |= CE_UPDATE_IN_BASE;
324 325 326 327 328 329 330 331 332 333 334 335 336
			} else {
				/*
				 * Thoroughly compare the cached data to see
				 * whether it should be marked for inclusion
				 * in the split index.
				 *
				 * This comparison might be unnecessary, as
				 * code paths modifying the cached data do
				 * set CE_UPDATE_IN_BASE as well.
				 */
				if (compare_ce_content(ce, base))
					ce->ce_flags |= CE_UPDATE_IN_BASE;
			}
337
			discard_cache_entry(base);
338 339 340 341 342 343 344 345 346
			si->base->cache[ce->index - 1] = ce;
		}
		for (i = 0; i < si->base->cache_nr; i++) {
			ce = si->base->cache[i];
			if ((ce->ce_flags & CE_REMOVE) ||
			    !(ce->ce_flags & CE_MATCHED))
				ewah_set(si->delete_bitmap, i);
			else if (ce->ce_flags & CE_UPDATE_IN_BASE) {
				ewah_set(si->replace_bitmap, i);
347
				ce->ce_flags |= CE_STRIP_NAME;
348 349 350
				ALLOC_GROW(entries, nr_entries+1, nr_alloc);
				entries[nr_entries++] = ce;
			}
351 352
			if (is_null_oid(&ce->oid))
				istate->drop_cache_tree = 1;
353 354 355 356 357 358
		}
	}

	for (i = 0; i < istate->cache_nr; i++) {
		ce = istate->cache[i];
		if ((!si->base || !ce->index) && !(ce->ce_flags & CE_REMOVE)) {
359
			assert(!(ce->ce_flags & CE_STRIP_NAME));
360 361 362 363 364 365 366 367 368 369 370
			ALLOC_GROW(entries, nr_entries+1, nr_alloc);
			entries[nr_entries++] = ce;
		}
		ce->ce_flags &= ~CE_MATCHED;
	}

	/*
	 * take cache[] out temporarily, put entries[] in its place
	 * for writing
	 */
	si->saved_cache = istate->cache;
371
	si->saved_cache_nr = istate->cache_nr;
372 373
	istate->cache = entries;
	istate->cache_nr = nr_entries;
374 375 376 377 378
}

void finish_writing_split_index(struct index_state *istate)
{
	struct split_index *si = init_split_index(istate);
379 380 381 382 383 384 385

	ewah_free(si->delete_bitmap);
	ewah_free(si->replace_bitmap);
	si->delete_bitmap = NULL;
	si->replace_bitmap = NULL;
	free(istate->cache);
	istate->cache = si->saved_cache;
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
	istate->cache_nr = si->saved_cache_nr;
}

void discard_split_index(struct index_state *istate)
{
	struct split_index *si = istate->split_index;
	if (!si)
		return;
	istate->split_index = NULL;
	si->refcount--;
	if (si->refcount)
		return;
	if (si->base) {
		discard_index(si->base);
		free(si->base);
	}
	free(si);
}
404 405 406 407 408 409 410 411 412 413

void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce)
{
	if (ce->index &&
	    istate->split_index &&
	    istate->split_index->base &&
	    ce->index <= istate->split_index->base->cache_nr &&
	    ce == istate->split_index->base->cache[ce->index - 1])
		ce->ce_flags |= CE_REMOVE;
	else
414
		discard_cache_entry(ce);
415
}
416 417

void replace_index_entry_in_base(struct index_state *istate,
418 419
				 struct cache_entry *old_entry,
				 struct cache_entry *new_entry)
420
{
421
	if (old_entry->index &&
422 423
	    istate->split_index &&
	    istate->split_index->base &&
424 425 426
	    old_entry->index <= istate->split_index->base->cache_nr) {
		new_entry->index = old_entry->index;
		if (old_entry != istate->split_index->base->cache[new_entry->index - 1])
427
			discard_cache_entry(istate->split_index->base->cache[new_entry->index - 1]);
428
		istate->split_index->base->cache[new_entry->index - 1] = new_entry;
429 430
	}
}
431 432 433 434 435 436 437 438 439 440 441

void add_split_index(struct index_state *istate)
{
	if (!istate->split_index) {
		init_split_index(istate);
		istate->cache_changed |= SPLIT_INDEX_ORDERED;
	}
}

void remove_split_index(struct index_state *istate)
{
442
	if (istate->split_index) {
443 444 445 446 447 448 449 450 451 452
		if (istate->split_index->base) {
			/*
			 * When removing the split index, we need to move
			 * ownership of the mem_pool associated with the
			 * base index to the main index. There may be cache entries
			 * allocated from the base's memory pool that are shared with
			 * the_index.cache[].
			 */
			mem_pool_combine(istate->ce_mem_pool,
					 istate->split_index->base->ce_mem_pool);
453

454 455 456 457 458 459 460
			/*
			 * The split index no longer owns the mem_pool backing
			 * its cache array. As we are discarding this index,
			 * mark the index as having no cache entries, so it
			 * will not attempt to clean up the cache entries or
			 * validate them.
			 */
461
			istate->split_index->base->cache_nr = 0;
462
		}
463 464 465 466 467 468 469 470

		/*
		 * We can discard the split index because its
		 * memory pool has been incorporated into the
		 * memory pool associated with the the_index.
		 */
		discard_split_index(istate);

471 472
		istate->cache_changed |= SOMETHING_CHANGED;
	}
473
}