Commit 3b41fb0c authored by René Scharfe's avatar René Scharfe Committed by Junio C Hamano

fsck: use oidset instead of oid_array for skipList

Change the implementation of the skipList feature to use oidset
instead of oid_array to store SHA-1s for later lookup.

This list is parsed once on startup by fsck, fetch-pack or
receive-pack depending on the *.skipList config in use. I.e. only once
per invocation, but note that for "clone --recurse-submodules" each
submodule will re-parse the list, in addition to the main project, and
it will be re-parsed when checking .gitmodules blobs, see
fb162877 ("fsck: check skiplist for object in fsck_blob()",
2018-06-27).

Memory usage is a bit higher, but we don't need to keep track of the
sort order anymore. Embed the oidset into struct fsck_options to make
its ownership clear (no hidden sharing) and avoid unnecessary pointer
indirection.

The cumulative impact on performance of this & the preceding change,
using the test setup described in the previous commit:

    Test                                             HEAD~2            HEAD~                   HEAD
    ----------------------------------------------------------------------------------------------------------------
    1450.3: fsck with 0 skipped bad commits          7.70(7.31+0.38)   7.72(7.33+0.38) +0.3%   7.70(7.30+0.40) +0.0%
    1450.5: fsck with 1 skipped bad commits          7.84(7.47+0.37)   7.69(7.32+0.36) -1.9%   7.71(7.29+0.41) -1.7%
    1450.7: fsck with 10 skipped bad commits         7.81(7.40+0.40)   7.94(7.57+0.36) +1.7%   7.92(7.55+0.37) +1.4%
    1450.9: fsck with 100 skipped bad commits        7.81(7.42+0.38)   7.95(7.53+0.41) +1.8%   7.83(7.42+0.41) +0.3%
    1450.11: fsck with 1000 skipped bad commits      7.99(7.62+0.36)   7.90(7.50+0.40) -1.1%   7.86(7.49+0.37) -1.6%
    1450.13: fsck with 10000 skipped bad commits     7.98(7.57+0.40)   7.94(7.53+0.40) -0.5%   7.90(7.45+0.44) -1.0%
    1450.15: fsck with 100000 skipped bad commits    7.97(7.57+0.39)   8.03(7.67+0.36) +0.8%   7.84(7.43+0.41) -1.6%
    1450.17: fsck with 1000000 skipped bad commits   7.72(7.22+0.50)   7.28(7.07+0.20) -5.7%   7.13(6.87+0.25) -7.6%
Helped-by: Ævar Arnfjörð Bjarmason's avatarÆvar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: default avatarRene Scharfe <l.s.r@web.de>
Signed-off-by: Ævar Arnfjörð Bjarmason's avatarÆvar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: default avatarJunio C Hamano <gitster@pobox.com>
parent fb895207
......@@ -1731,11 +1731,12 @@ all three of them they must all set to the same values.
+
Older versions of Git (before 2.20) documented that the object names
list should be sorted. This was never a requirement, the object names
can appear in any order, but when reading the list we track whether
the list is sorted for the purposes of an internal binary search
implementation, which can save itself some work with an already sorted
list. Unless you have a humongous list there's no reason to go out of
your way to pre-sort the list.
could appear in any order, but when reading the list we tracked whether
the list was sorted for the purposes of an internal binary search
implementation, which could save itself some work with an already sorted
list. Unless you had a humongous list there was no reason to go out of
your way to pre-sort the list. After Git version 2.20 a hash implementation
is used instead, so there's now no reason to pre-sort the list.
gc.aggressiveDepth::
The depth parameter used in the delta compression
......
......@@ -10,7 +10,6 @@
#include "fsck.h"
#include "refs.h"
#include "utf8.h"
#include "sha1-array.h"
#include "decorate.h"
#include "oidset.h"
#include "packfile.h"
......@@ -182,19 +181,10 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
static void init_skiplist(struct fsck_options *options, const char *path)
{
static struct oid_array skiplist = OID_ARRAY_INIT;
int sorted;
FILE *fp;
struct strbuf sb = STRBUF_INIT;
struct object_id oid;
if (options->skiplist)
sorted = options->skiplist->sorted;
else {
sorted = 1;
options->skiplist = &skiplist;
}
fp = fopen(path, "r");
if (!fp)
die("Could not open skip list: %s", path);
......@@ -202,19 +192,12 @@ static void init_skiplist(struct fsck_options *options, const char *path)
const char *p;
if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
die("Invalid SHA-1: %s", sb.buf);
oid_array_append(&skiplist, &oid);
if (sorted && skiplist.nr > 1 &&
oidcmp(&skiplist.oid[skiplist.nr - 2],
&oid) > 0)
sorted = 0;
oidset_insert(&options->skiplist, &oid);
}
if (ferror(fp))
die_errno("Could not read '%s'", path);
fclose(fp);
strbuf_release(&sb);
if (sorted)
skiplist.sorted = 1;
}
static int parse_msg_type(const char *str)
......@@ -319,9 +302,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)
static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
{
if (opts && opts->skiplist && obj)
return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
return 0;
return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
}
__attribute__((format (printf, 4, 5)))
......
#ifndef GIT_FSCK_H
#define GIT_FSCK_H
#include "oidset.h"
#define FSCK_ERROR 1
#define FSCK_WARN 2
#define FSCK_IGNORE 3
......@@ -35,12 +37,12 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
int *msg_type;
struct oid_array *skiplist;
struct oidset skiplist;
struct decoration *object_names;
};
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
/* descend in all linked child objects
* the return value is:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment