pack-objects.h 11.9 KB
Newer Older
1 2 3
#ifndef PACK_OBJECTS_H
#define PACK_OBJECTS_H

4
#include "object-store.h"
5
#include "thread-utils.h"
6
#include "pack.h"
7

8 9
struct repository;

10 11
#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)

12
#define OE_DFS_STATE_BITS	2
13
#define OE_DEPTH_BITS		12
14
#define OE_IN_PACK_BITS		10
15
#define OE_Z_DELTA_BITS		20
16 17 18 19 20
/*
 * Note that oe_set_size() becomes expensive when the given size is
 * above this limit. Don't lower it too much.
 */
#define OE_SIZE_BITS		31
21
#define OE_DELTA_SIZE_BITS	23
22 23 24 25 26 27 28 29 30 31 32 33 34 35

/*
 * State flags for depth-first search used for analyzing delta cycles.
 *
 * The depth is measured in delta-links to the base (so if A is a delta
 * against B, then A has a depth of 1, and B a depth of 0).
 */
enum dfs_state {
	DFS_NONE = 0,
	DFS_ACTIVE,
	DFS_DONE,
	DFS_NUM_STATES
};

36
/*
37 38 39 40
 * The size of struct nearly determines pack-objects's memory
 * consumption. This struct is packed tight for that reason. When you
 * add or reorder something in this struct, think a bit about this.
 *
41 42 43 44 45 46 47 48 49 50
 * basic object info
 * -----------------
 * idx.oid is filled up before delta searching starts. idx.crc32 is
 * only valid after the object is written out and will be used for
 * generating the index. idx.offset will be both gradually set and
 * used in writing phase (base objects get offset first, then deltas
 * refer to them)
 *
 * "size" is the uncompressed object size. Compressed size of the raw
 * data for an object in a pack is not stored anywhere but is computed
51 52 53
 * and made available when reverse .idx is made. Note that when a
 * delta is reused, "size" is the uncompressed _delta_ size, not the
 * canonical one after the delta has been applied.
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
 *
 * "hash" contains a path name hash which is used for sorting the
 * delta list and also during delta searching. Once prepare_pack()
 * returns it's no longer needed.
 *
 * source pack info
 * ----------------
 * The (in_pack, in_pack_offset) tuple contains the location of the
 * object in the source pack. in_pack_header_size allows quickly
 * skipping the header and going straight to the zlib stream.
 *
 * "type" and "in_pack_type" both describe object type. in_pack_type
 * may contain a delta type, while type is always the canonical type.
 *
 * deltas
 * ------
 * Delta links (delta, delta_child and delta_sibling) are created to
 * reflect that delta graph from the source pack then updated or added
 * during delta searching phase when we find better deltas.
 *
 * delta_child and delta_sibling are last needed in
 * compute_write_order(). "delta" and "delta_size" must remain valid
 * at object writing phase in case the delta is not cached.
 *
 * If a delta is cached in memory and is compressed, delta_data points
 * to the data and z_delta_size contains the compressed size. If it's
 * uncompressed [1], z_delta_size must be zero. delta_size is always
 * the uncompressed size and must be valid even if the delta is not
 * cached.
 *
 * [1] during try_delta phase we don't bother with compressing because
 * the delta could be quickly replaced with a better one.
 */
87 88 89
struct object_entry {
	struct pack_idx_entry idx;
	void *delta_data;	/* cached delta (uncompressed) */
90
	off_t in_pack_offset;
91
	uint32_t hash;			/* name hint hash */
92 93
	unsigned size_:OE_SIZE_BITS;
	unsigned size_valid:1;
94 95 96 97 98
	uint32_t delta_idx;	/* delta base object */
	uint32_t delta_child_idx; /* deltified objects who bases me */
	uint32_t delta_sibling_idx; /* other deltified objects who
				     * uses the same base as me
				     */
99 100
	unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
	unsigned delta_size_valid:1;
101
	unsigned char in_pack_header_size;
102
	unsigned in_pack_idx:OE_IN_PACK_BITS;	/* already in pack */
103
	unsigned z_delta_size:OE_Z_DELTA_BITS;
104 105
	unsigned type_valid:1;
	unsigned no_try_delta:1;
106
	unsigned type_:TYPE_BITS;
107
	unsigned in_pack_type:TYPE_BITS; /* could be delta */
Jeff King's avatar
Jeff King committed
108

109 110 111 112 113 114 115
	unsigned preferred_base:1; /*
				    * we do not pack this, but is available
				    * to be used as the base object to delta
				    * objects against.
				    */
	unsigned tagged:1; /* near the very tip of refs */
	unsigned filled:1; /* assigned write-order */
116
	unsigned dfs_state:OE_DFS_STATE_BITS;
117
	unsigned depth:OE_DEPTH_BITS;
118
	unsigned ext_base:1; /* delta_idx points outside packlist */
119 120

	/*
121
	 * pahole results on 64-bit linux (gcc and clang)
122
	 *
123
	 *   size: 80, bit_padding: 9 bits
124 125 126
	 *
	 * and on 32-bit (gcc)
	 *
127
	 *   size: 76, bit_padding: 9 bits
128
	 */
129 130 131
};

struct packing_data {
132
	struct repository *repo;
133 134 135 136 137
	struct object_entry *objects;
	uint32_t nr_objects, nr_alloc;

	int32_t *index;
	uint32_t index_size;
138 139

	unsigned int *in_pack_pos;
140
	unsigned long *delta_size;
141 142 143 144 145 146 147 148 149

	/*
	 * Only one of these can be non-NULL and they have different
	 * sizes. if in_pack_by_idx is allocated, oe_in_pack() returns
	 * the pack of an object using in_pack_idx field. If not,
	 * in_pack[] array is used the same way as in_pack_pos[]
	 */
	struct packed_git **in_pack_by_idx;
	struct packed_git **in_pack;
150

151 152 153 154 155
	/*
	 * During packing with multiple threads, protect the in-core
	 * object database from concurrent accesses.
	 */
	pthread_mutex_t odb_lock;
156

157 158 159 160 161 162 163 164
	/*
	 * This list contains entries for bases which we know the other side
	 * has (e.g., via reachability bitmaps), but which aren't in our
	 * "objects" list.
	 */
	struct object_entry *ext_bases;
	uint32_t nr_ext, alloc_ext;

165
	uintmax_t oe_size_limit;
166
	uintmax_t oe_delta_size_limit;
167 168 169

	/* delta islands */
	unsigned int *tree_depth;
170
	unsigned char *layer;
171 172
};

173
void prepare_packing_data(struct repository *r, struct packing_data *pdata);
174

175
/* Protect access to object database */
176 177
static inline void packing_data_lock(struct packing_data *pdata)
{
178
	pthread_mutex_lock(&pdata->odb_lock);
179 180 181
}
static inline void packing_data_unlock(struct packing_data *pdata)
{
182
	pthread_mutex_unlock(&pdata->odb_lock);
183 184
}

185 186 187 188 189 190 191 192
struct object_entry *packlist_alloc(struct packing_data *pdata,
				    const unsigned char *sha1,
				    uint32_t index_pos);

struct object_entry *packlist_find(struct packing_data *pdata,
				   const unsigned char *sha1,
				   uint32_t *index_pos);

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
static inline uint32_t pack_name_hash(const char *name)
{
	uint32_t c, hash = 0;

	if (!name)
		return 0;

	/*
	 * This effectively just creates a sortable number from the
	 * last sixteen non-whitespace characters. Last characters
	 * count "most", so things that end in ".c" sort together.
	 */
	while ((c = *name++) != 0) {
		if (isspace(c))
			continue;
		hash = (hash >> 2) + (c << 24);
	}
	return hash;
}

213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
static inline enum object_type oe_type(const struct object_entry *e)
{
	return e->type_valid ? e->type_ : OBJ_BAD;
}

static inline void oe_set_type(struct object_entry *e,
			       enum object_type type)
{
	if (type >= OBJ_ANY)
		BUG("OBJ_ANY cannot be set in pack-objects code");

	e->type_valid = type >= OBJ_NONE;
	e->type_ = (unsigned)type;
}

228 229 230 231 232 233 234 235 236 237 238 239 240
static inline unsigned int oe_in_pack_pos(const struct packing_data *pack,
					  const struct object_entry *e)
{
	return pack->in_pack_pos[e - pack->objects];
}

static inline void oe_set_in_pack_pos(const struct packing_data *pack,
				      const struct object_entry *e,
				      unsigned int pos)
{
	pack->in_pack_pos[e - pack->objects] = pos;
}

241 242 243 244 245 246 247 248 249
static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
					    const struct object_entry *e)
{
	if (pack->in_pack_by_idx)
		return pack->in_pack_by_idx[e->in_pack_idx];
	else
		return pack->in_pack[e - pack->objects];
}

250 251
void oe_map_new_pack(struct packing_data *pack);

252 253 254 255 256
static inline void oe_set_in_pack(struct packing_data *pack,
				  struct object_entry *e,
				  struct packed_git *p)
{
	if (!p->index)
257
		oe_map_new_pack(pack);
258 259 260 261 262 263
	if (pack->in_pack_by_idx)
		e->in_pack_idx = p->index;
	else
		pack->in_pack[e - pack->objects] = p;
}

264 265 266 267
static inline struct object_entry *oe_delta(
		const struct packing_data *pack,
		const struct object_entry *e)
{
268 269 270 271 272
	if (!e->delta_idx)
		return NULL;
	if (e->ext_base)
		return &pack->ext_bases[e->delta_idx - 1];
	else
273 274 275 276 277 278 279 280 281 282 283 284 285
		return &pack->objects[e->delta_idx - 1];
}

static inline void oe_set_delta(struct packing_data *pack,
				struct object_entry *e,
				struct object_entry *delta)
{
	if (delta)
		e->delta_idx = (delta - pack->objects) + 1;
	else
		e->delta_idx = 0;
}

286 287 288 289
void oe_set_delta_ext(struct packing_data *pack,
		      struct object_entry *e,
		      const unsigned char *sha1);

290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
static inline struct object_entry *oe_delta_child(
		const struct packing_data *pack,
		const struct object_entry *e)
{
	if (e->delta_child_idx)
		return &pack->objects[e->delta_child_idx - 1];
	return NULL;
}

static inline void oe_set_delta_child(struct packing_data *pack,
				      struct object_entry *e,
				      struct object_entry *delta)
{
	if (delta)
		e->delta_child_idx = (delta - pack->objects) + 1;
	else
		e->delta_child_idx = 0;
}

static inline struct object_entry *oe_delta_sibling(
		const struct packing_data *pack,
		const struct object_entry *e)
{
	if (e->delta_sibling_idx)
		return &pack->objects[e->delta_sibling_idx - 1];
	return NULL;
}

static inline void oe_set_delta_sibling(struct packing_data *pack,
					struct object_entry *e,
					struct object_entry *delta)
{
	if (delta)
		e->delta_sibling_idx = (delta - pack->objects) + 1;
	else
		e->delta_sibling_idx = 0;
}

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
unsigned long oe_get_size_slow(struct packing_data *pack,
			       const struct object_entry *e);
static inline unsigned long oe_size(struct packing_data *pack,
				    const struct object_entry *e)
{
	if (e->size_valid)
		return e->size_;

	return oe_get_size_slow(pack, e);
}

static inline int oe_size_less_than(struct packing_data *pack,
				    const struct object_entry *lhs,
				    unsigned long rhs)
{
	if (lhs->size_valid)
		return lhs->size_ < rhs;
	if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
		return 0;
	return oe_get_size_slow(pack, lhs) < rhs;
}

static inline int oe_size_greater_than(struct packing_data *pack,
				       const struct object_entry *lhs,
				       unsigned long rhs)
{
	if (lhs->size_valid)
		return lhs->size_ > rhs;
	if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
		return 1;
	return oe_get_size_slow(pack, lhs) > rhs;
}

static inline void oe_set_size(struct packing_data *pack,
			       struct object_entry *e,
			       unsigned long size)
{
	if (size < pack->oe_size_limit) {
		e->size_ = size;
		e->size_valid = 1;
	} else {
		e->size_valid = 0;
		if (oe_get_size_slow(pack, e) != size)
			BUG("'size' is supposed to be the object size!");
	}
}

375 376 377 378 379
static inline unsigned long oe_delta_size(struct packing_data *pack,
					  const struct object_entry *e)
{
	if (e->delta_size_valid)
		return e->delta_size_;
380 381

	/*
382
	 * pack->delta_size[] can't be NULL because oe_set_delta_size()
383 384 385 386 387 388 389
	 * must have been called when a new delta is saved with
	 * oe_set_delta().
	 * If oe_delta() returns NULL (i.e. default state, which means
	 * delta_size_valid is also false), then the caller must never
	 * call oe_delta_size().
	 */
	return pack->delta_size[e - pack->objects];
390 391 392 393 394 395
}

static inline void oe_set_delta_size(struct packing_data *pack,
				     struct object_entry *e,
				     unsigned long size)
{
396 397 398 399 400 401 402 403 404 405 406 407
	if (size < pack->oe_delta_size_limit) {
		e->delta_size_ = size;
		e->delta_size_valid = 1;
	} else {
		packing_data_lock(pack);
		if (!pack->delta_size)
			ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
		packing_data_unlock(pack);

		pack->delta_size[e - pack->objects] = size;
		e->delta_size_valid = 0;
	}
408 409
}

410 411 412 413 414 415 416 417 418 419 420 421 422
static inline unsigned int oe_tree_depth(struct packing_data *pack,
					 struct object_entry *e)
{
	if (!pack->tree_depth)
		return 0;
	return pack->tree_depth[e - pack->objects];
}

static inline void oe_set_tree_depth(struct packing_data *pack,
				     struct object_entry *e,
				     unsigned int tree_depth)
{
	if (!pack->tree_depth)
423
		CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
424 425 426
	pack->tree_depth[e - pack->objects] = tree_depth;
}

427 428 429 430 431 432 433 434 435 436 437 438 439
static inline unsigned char oe_layer(struct packing_data *pack,
				     struct object_entry *e)
{
	if (!pack->layer)
		return 0;
	return pack->layer[e - pack->objects];
}

static inline void oe_set_layer(struct packing_data *pack,
				struct object_entry *e,
				unsigned char layer)
{
	if (!pack->layer)
440
		CALLOC_ARRAY(pack->layer, pack->nr_alloc);
441 442 443
	pack->layer[e - pack->objects] = layer;
}

444
#endif