Commit 43fa44fa authored by Duy Nguyen's avatar Duy Nguyen Committed by Junio C Hamano

pack-objects: move in_pack out of struct object_entry

Instead of using 8 bytes (on 64 bit arch) to store a pointer to a
pack. Use an index instead since the number of packs should be
relatively small.

This limits the number of packs we can handle to 1k. Since we can't be
sure people can never run into the situation where they have more than
1k pack files. Provide a fall back route for it.

If we find out they have too many packs, the new in_pack_by_idx[]
array (which has at most 1k elements) will not be used. Instead we
allocate in_pack[] array that holds nr_objects elements. This is
similar to how the optional in_pack_pos field is handled.

The new simple test is just to make sure the too-many-packs code path
is at least executed. The true test is running

    make test GIT_TEST_FULL_IN_PACK_ARRAY=1

to take advantage of other special case tests.
Signed-off-by: Duy Nguyen's avatarNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: default avatarJunio C Hamano <gitster@pobox.com>
parent 06af3bba
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
#include "packfile.h" #include "packfile.h"
#include "object-store.h" #include "object-store.h"
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
static const char *pack_usage[] = { static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"), N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
...@@ -367,7 +369,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent ...@@ -367,7 +369,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
unsigned long limit, int usable_delta) unsigned long limit, int usable_delta)
{ {
struct packed_git *p = entry->in_pack; struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL; struct pack_window *w_curs = NULL;
struct revindex_entry *revidx; struct revindex_entry *revidx;
off_t offset; off_t offset;
...@@ -478,7 +480,7 @@ static off_t write_object(struct hashfile *f, ...@@ -478,7 +480,7 @@ static off_t write_object(struct hashfile *f,
if (!reuse_object) if (!reuse_object)
to_reuse = 0; /* explicit */ to_reuse = 0; /* explicit */
else if (!entry->in_pack) else if (!IN_PACK(entry))
to_reuse = 0; /* can't reuse what we don't have */ to_reuse = 0; /* can't reuse what we don't have */
else if (oe_type(entry) == OBJ_REF_DELTA || else if (oe_type(entry) == OBJ_REF_DELTA ||
oe_type(entry) == OBJ_OFS_DELTA) oe_type(entry) == OBJ_OFS_DELTA)
...@@ -1074,7 +1076,7 @@ static void create_object_entry(const struct object_id *oid, ...@@ -1074,7 +1076,7 @@ static void create_object_entry(const struct object_id *oid,
else else
nr_result++; nr_result++;
if (found_pack) { if (found_pack) {
entry->in_pack = found_pack; oe_set_in_pack(&to_pack, entry, found_pack);
entry->in_pack_offset = found_offset; entry->in_pack_offset = found_offset;
} }
...@@ -1399,8 +1401,8 @@ static void cleanup_preferred_base(void) ...@@ -1399,8 +1401,8 @@ static void cleanup_preferred_base(void)
static void check_object(struct object_entry *entry) static void check_object(struct object_entry *entry)
{ {
if (entry->in_pack) { if (IN_PACK(entry)) {
struct packed_git *p = entry->in_pack; struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL; struct pack_window *w_curs = NULL;
const unsigned char *base_ref = NULL; const unsigned char *base_ref = NULL;
struct object_entry *base_entry; struct object_entry *base_entry;
...@@ -1535,14 +1537,16 @@ static int pack_offset_sort(const void *_a, const void *_b) ...@@ -1535,14 +1537,16 @@ static int pack_offset_sort(const void *_a, const void *_b)
{ {
const struct object_entry *a = *(struct object_entry **)_a; const struct object_entry *a = *(struct object_entry **)_a;
const struct object_entry *b = *(struct object_entry **)_b; const struct object_entry *b = *(struct object_entry **)_b;
const struct packed_git *a_in_pack = IN_PACK(a);
const struct packed_git *b_in_pack = IN_PACK(b);
/* avoid filesystem trashing with loose objects */ /* avoid filesystem trashing with loose objects */
if (!a->in_pack && !b->in_pack) if (!a_in_pack && !b_in_pack)
return oidcmp(&a->idx.oid, &b->idx.oid); return oidcmp(&a->idx.oid, &b->idx.oid);
if (a->in_pack < b->in_pack) if (a_in_pack < b_in_pack)
return -1; return -1;
if (a->in_pack > b->in_pack) if (a_in_pack > b_in_pack)
return 1; return 1;
return a->in_pack_offset < b->in_pack_offset ? -1 : return a->in_pack_offset < b->in_pack_offset ? -1 :
(a->in_pack_offset > b->in_pack_offset); (a->in_pack_offset > b->in_pack_offset);
...@@ -1578,7 +1582,7 @@ static void drop_reused_delta(struct object_entry *entry) ...@@ -1578,7 +1582,7 @@ static void drop_reused_delta(struct object_entry *entry)
oi.sizep = &entry->size; oi.sizep = &entry->size;
oi.typep = &type; oi.typep = &type;
if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) { if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
/* /*
* We failed to get the info from this pack for some reason; * We failed to get the info from this pack for some reason;
* fall back to sha1_object_info, which may find another copy. * fall back to sha1_object_info, which may find another copy.
...@@ -1848,8 +1852,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, ...@@ -1848,8 +1852,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
* it, we will still save the transfer cost, as we already know * it, we will still save the transfer cost, as we already know
* the other side has it and we won't send src_entry at all. * the other side has it and we won't send src_entry at all.
*/ */
if (reuse_delta && trg_entry->in_pack && if (reuse_delta && IN_PACK(trg_entry) &&
trg_entry->in_pack == src_entry->in_pack && IN_PACK(trg_entry) == IN_PACK(src_entry) &&
!src_entry->preferred_base && !src_entry->preferred_base &&
trg_entry->in_pack_type != OBJ_REF_DELTA && trg_entry->in_pack_type != OBJ_REF_DELTA &&
trg_entry->in_pack_type != OBJ_OFS_DELTA) trg_entry->in_pack_type != OBJ_OFS_DELTA)
...@@ -3192,6 +3196,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) ...@@ -3192,6 +3196,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
} }
} }
prepare_packing_data(&to_pack);
if (progress) if (progress)
progress_state = start_progress(_("Counting objects"), 0); progress_state = start_progress(_("Counting objects"), 0);
if (!use_internal_rev_list) if (!use_internal_rev_list)
......
...@@ -69,6 +69,7 @@ struct packed_git { ...@@ -69,6 +69,7 @@ struct packed_git {
int index_version; int index_version;
time_t mtime; time_t mtime;
int pack_fd; int pack_fd;
int index; /* for builtin/pack-objects.c */
unsigned pack_local:1, unsigned pack_local:1,
pack_keep:1, pack_keep:1,
freshened:1, freshened:1,
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#include "object.h" #include "object.h"
#include "pack.h" #include "pack.h"
#include "pack-objects.h" #include "pack-objects.h"
#include "packfile.h"
#include "config.h"
static uint32_t locate_object_entry_hash(struct packing_data *pdata, static uint32_t locate_object_entry_hash(struct packing_data *pdata,
const unsigned char *sha1, const unsigned char *sha1,
...@@ -86,6 +88,63 @@ struct object_entry *packlist_find(struct packing_data *pdata, ...@@ -86,6 +88,63 @@ struct object_entry *packlist_find(struct packing_data *pdata,
return &pdata->objects[pdata->index[i] - 1]; return &pdata->objects[pdata->index[i] - 1];
} }
static void prepare_in_pack_by_idx(struct packing_data *pdata)
{
struct packed_git **mapping, *p;
int cnt = 0, nr = 1U << OE_IN_PACK_BITS;
ALLOC_ARRAY(mapping, nr);
/*
* oe_in_pack() on an all-zero'd object_entry
* (i.e. in_pack_idx also zero) should return NULL.
*/
mapping[cnt++] = NULL;
for (p = get_packed_git(the_repository); p; p = p->next, cnt++) {
if (cnt == nr) {
free(mapping);
return;
}
p->index = cnt;
mapping[cnt] = p;
}
pdata->in_pack_by_idx = mapping;
}
/*
* A new pack appears after prepare_in_pack_by_idx() has been
* run. This is likely a race.
*
* We could map this new pack to in_pack_by_idx[] array, but then we
* have to deal with full array anyway. And since it's hard to test
* this fall back code, just stay simple and fall back to using
* in_pack[] array.
*/
void oe_map_new_pack(struct packing_data *pack,
struct packed_git *p)
{
uint32_t i;
REALLOC_ARRAY(pack->in_pack, pack->nr_alloc);
for (i = 0; i < pack->nr_objects; i++)
pack->in_pack[i] = oe_in_pack(pack, pack->objects + i);
FREE_AND_NULL(pack->in_pack_by_idx);
}
/* assume pdata is already zero'd by caller */
void prepare_packing_data(struct packing_data *pdata)
{
if (git_env_bool("GIT_TEST_FULL_IN_PACK_ARRAY", 0)) {
/*
* do not initialize in_pack_by_idx[] to force the
* slow path in oe_in_pack()
*/
} else {
prepare_in_pack_by_idx(pdata);
}
}
struct object_entry *packlist_alloc(struct packing_data *pdata, struct object_entry *packlist_alloc(struct packing_data *pdata,
const unsigned char *sha1, const unsigned char *sha1,
uint32_t index_pos) uint32_t index_pos)
...@@ -95,6 +154,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, ...@@ -95,6 +154,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
if (pdata->nr_objects >= pdata->nr_alloc) { if (pdata->nr_objects >= pdata->nr_alloc) {
pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2; pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2;
REALLOC_ARRAY(pdata->objects, pdata->nr_alloc); REALLOC_ARRAY(pdata->objects, pdata->nr_alloc);
if (!pdata->in_pack_by_idx)
REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc);
} }
new_entry = pdata->objects + pdata->nr_objects++; new_entry = pdata->objects + pdata->nr_objects++;
...@@ -107,5 +169,8 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, ...@@ -107,5 +169,8 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
else else
pdata->index[index_pos] = pdata->nr_objects; pdata->index[index_pos] = pdata->nr_objects;
if (pdata->in_pack)
pdata->in_pack[pdata->nr_objects - 1] = NULL;
return new_entry; return new_entry;
} }
#ifndef PACK_OBJECTS_H #ifndef PACK_OBJECTS_H
#define PACK_OBJECTS_H #define PACK_OBJECTS_H
#include "object-store.h"
#define OE_DFS_STATE_BITS 2 #define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12 #define OE_DEPTH_BITS 12
#define OE_IN_PACK_BITS 10
/* /*
* State flags for depth-first search used for analyzing delta cycles. * State flags for depth-first search used for analyzing delta cycles.
...@@ -65,7 +68,7 @@ enum dfs_state { ...@@ -65,7 +68,7 @@ enum dfs_state {
struct object_entry { struct object_entry {
struct pack_idx_entry idx; struct pack_idx_entry idx;
unsigned long size; /* uncompressed size */ unsigned long size; /* uncompressed size */
struct packed_git *in_pack; /* already in pack */ unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
off_t in_pack_offset; off_t in_pack_offset;
struct object_entry *delta; /* delta base object */ struct object_entry *delta; /* delta base object */
struct object_entry *delta_child; /* deltified objects who bases me */ struct object_entry *delta_child; /* deltified objects who bases me */
...@@ -100,8 +103,18 @@ struct packing_data { ...@@ -100,8 +103,18 @@ struct packing_data {
uint32_t index_size; uint32_t index_size;
unsigned int *in_pack_pos; unsigned int *in_pack_pos;
/*
* Only one of these can be non-NULL and they have different
* sizes. if in_pack_by_idx is allocated, oe_in_pack() returns
* the pack of an object using in_pack_idx field. If not,
* in_pack[] array is used the same way as in_pack_pos[]
*/
struct packed_git **in_pack_by_idx;
struct packed_git **in_pack;
}; };
void prepare_packing_data(struct packing_data *pdata);
struct object_entry *packlist_alloc(struct packing_data *pdata, struct object_entry *packlist_alloc(struct packing_data *pdata,
const unsigned char *sha1, const unsigned char *sha1,
uint32_t index_pos); uint32_t index_pos);
...@@ -158,4 +171,27 @@ static inline void oe_set_in_pack_pos(const struct packing_data *pack, ...@@ -158,4 +171,27 @@ static inline void oe_set_in_pack_pos(const struct packing_data *pack,
pack->in_pack_pos[e - pack->objects] = pos; pack->in_pack_pos[e - pack->objects] = pos;
} }
static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
const struct object_entry *e)
{
if (pack->in_pack_by_idx)
return pack->in_pack_by_idx[e->in_pack_idx];
else
return pack->in_pack[e - pack->objects];
}
void oe_map_new_pack(struct packing_data *pack,
struct packed_git *p);
static inline void oe_set_in_pack(struct packing_data *pack,
struct object_entry *e,
struct packed_git *p)
{
if (!p->index)
oe_map_new_pack(pack, p);
if (pack->in_pack_by_idx)
e->in_pack_idx = p->index;
else
pack->in_pack[e - pack->objects] = p;
}
#endif #endif
...@@ -304,6 +304,11 @@ environment set. ...@@ -304,6 +304,11 @@ environment set.
GIT_TEST_SPLIT_INDEX=<boolean> forces split-index mode on the whole GIT_TEST_SPLIT_INDEX=<boolean> forces split-index mode on the whole
test suite. Accept any boolean values that are accepted by git-config. test suite. Accept any boolean values that are accepted by git-config.
GIT_TEST_FULL_IN_PACK_ARRAY=<boolean> exercises the uncommon
pack-objects code path where there are more than 1024 packs even if
the actual number of packs in repository is below this limit. Accept
any boolean values that are accepted by git-config.
Naming Tests Naming Tests
------------ ------------
......
...@@ -457,6 +457,11 @@ test_expect_success !PTHREADS,C_LOCALE_OUTPUT 'pack-objects --threads=N or pack. ...@@ -457,6 +457,11 @@ test_expect_success !PTHREADS,C_LOCALE_OUTPUT 'pack-objects --threads=N or pack.
grep -F "no threads support, ignoring pack.threads" err grep -F "no threads support, ignoring pack.threads" err
' '
test_expect_success 'pack-objects in too-many-packs mode' '
GIT_TEST_FULL_IN_PACK_ARRAY=1 git repack -ad &&
git fsck
'
# #
# WARNING! # WARNING!
# #
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment