Commit 12da1d1f authored by Thomas Rast's avatar Thomas Rast Committed by Junio C Hamano

Implement line-history search (git log -L)

This is a rewrite of much of Bo's work, mainly in an effort to split
it into smaller, easier to understand routines.

The algorithm is built around the struct range_set, which encodes a
series of line ranges as intervals [a,b).  This is used in two
contexts:

* A set of lines we are tracking (which will change as we dig through
  history).
* To encode diffs, as pairs of ranges.

The main routine is range_set_map_across_diff().  It processes the
diff between a commit C and some parent P.  It determines which diff
hunks are relevant to the ranges tracked in C, and computes the new
ranges for P.

The algorithm is then simply to process history in topological order
from newest to oldest, computing ranges and (partial) diffs.  At
branch points, we need to merge the ranges we are watching.  We will
find that many commits do not affect the chosen ranges, and mark them
TREESAME (in addition to those already filtered by pathspec limiting).
Another pass of history simplification then gets rid of such commits.

This is wired as an extra filtering pass in the log machinery.  This
currently only reduces code duplication, but should allow for other
simplifications and options to be used.

Finally, we hook a diff printer into the output chain.  Ideally we
would wire directly into the diff logic, to optionally use features
like word diff.  However, that will require some major reworking of
the diff chain, so we completely replace the output with our own diff
for now.

As this was a GSoC project, and has quite some history by now, many
people have helped.  In no particular order, thanks go to

  Jakub Narebski <jnareb@gmail.com>
  Jens Lehmann <Jens.Lehmann@web.de>
  Jonathan Nieder <jrnieder@gmail.com>
  Junio C Hamano <gitster@pobox.com>
  Ramsay Jones <ramsay@ramsay1.demon.co.uk>
  Will Palmer <wmpalmer@gmail.com>

Apologies to everyone I forgot.
Signed-off-by: default avatarBo Yang <struggleyb.nku@gmail.com>
Signed-off-by: default avatarThomas Rast <trast@student.ethz.ch>
Signed-off-by: default avatarJunio C Hamano <gitster@pobox.com>
parent c7edcae0
......@@ -69,6 +69,23 @@ produced by --stat etc.
Note that only message is considered, if also a diff is shown
its size is not included.
-L <start>,<end>:<file>::
Trace the evolution of the line range given by "<start>,<end>"
within the <file>. You may not give any pathspec limiters.
This is currently limited to a walk starting from a single
revision, i.e., you may only give zero or one positive
revision arguments.
<start> and <end> can take one of these forms:
include::line-range-format.txt[]
You can specify this option more than once.
--full-line-diff::
Always print the interesting range even if the current commit
does not change any line of the range.
[\--] <path>...::
Show only commits that are enough to explain how the files
that match the specified paths came to be. See "History
......@@ -138,6 +155,11 @@ Examples
This makes sense only when following a strict policy of merging all
topic branches when staying on a single integration branch.
git log -L '/int main/',/^}/:main.c::
Shows how the function `main()` in the file 'main.c' evolved
over time.
`git log -3`::
Limits the number of commits to show to 3.
......
......@@ -683,6 +683,7 @@ LIB_H += help.h
LIB_H += http.h
LIB_H += kwset.h
LIB_H += levenshtein.h
LIB_H += line-log.h
LIB_H += line-range.h
LIB_H += list-objects.h
LIB_H += ll-merge.h
......@@ -799,6 +800,7 @@ LIB_OBJS += hex.o
LIB_OBJS += ident.o
LIB_OBJS += kwset.o
LIB_OBJS += levenshtein.o
LIB_OBJS += line-log.o
LIB_OBJS += line-range.o
LIB_OBJS += list-objects.o
LIB_OBJS += ll-merge.o
......
......@@ -19,6 +19,7 @@
#include "remote.h"
#include "string-list.h"
#include "parse-options.h"
#include "line-log.h"
#include "branch.h"
#include "streaming.h"
#include "version.h"
......@@ -41,6 +42,12 @@ static const char * const builtin_log_usage[] = {
NULL
};
struct line_opt_callback_data {
struct rev_info *rev;
const char *prefix;
struct string_list args;
};
static int parse_decoration_style(const char *var, const char *value)
{
switch (git_config_maybe_bool(var, value)) {
......@@ -75,6 +82,19 @@ static int decorate_callback(const struct option *opt, const char *arg, int unse
return 0;
}
static int log_line_range_callback(const struct option *option, const char *arg, int unset)
{
struct line_opt_callback_data *data = option->value;
if (!arg)
return -1;
data->rev->line_level_traverse = 1;
string_list_append(&data->args, arg);
return 0;
}
static void cmd_log_init_defaults(struct rev_info *rev)
{
if (fmt_pretty)
......@@ -97,6 +117,7 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix,
{
struct userformat_want w;
int quiet = 0, source = 0, mailmap = 0;
static struct line_opt_callback_data line_cb = {NULL, NULL, STRING_LIST_INIT_DUP};
const struct option builtin_log_options[] = {
OPT_BOOLEAN(0, "quiet", &quiet, N_("suppress diff output")),
......@@ -104,9 +125,15 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix,
OPT_BOOLEAN(0, "use-mailmap", &mailmap, N_("Use mail map file")),
{ OPTION_CALLBACK, 0, "decorate", NULL, NULL, N_("decorate options"),
PARSE_OPT_OPTARG, decorate_callback},
OPT_CALLBACK('L', NULL, &line_cb, "n,m:file",
"Process line range n,m in file, counting from 1",
log_line_range_callback),
OPT_END()
};
line_cb.rev = rev;
line_cb.prefix = prefix;
mailmap = use_mailmap_config;
argc = parse_options(argc, argv, prefix,
builtin_log_options, builtin_log_usage,
......@@ -160,6 +187,10 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix,
rev->show_decorations = 1;
load_ref_decorations(decoration_style);
}
if (rev->line_level_traverse)
line_log_init(rev, line_cb.prefix, &line_cb.args);
setup_pager();
}
......
This diff is collapsed.
#ifndef LINE_LOG_H
#define LINE_LOG_H
#include "diffcore.h"
struct rev_info;
struct commit;
/* A range [start,end]. Lines are numbered starting at 0, and the
* ranges include start but exclude end. */
struct range {
long start, end;
};
/* A set of ranges. The ranges must always be disjoint and sorted. */
struct range_set {
int alloc, nr;
struct range *ranges;
};
/* A diff, encoded as the set of pre- and post-image ranges where the
* files differ. A pair of ranges corresponds to a hunk. */
struct diff_ranges {
struct range_set parent;
struct range_set target;
};
/* Linked list of interesting files and their associated ranges. The
* list must be kept sorted by spec->path */
struct line_log_data {
struct line_log_data *next;
struct diff_filespec *spec;
char status;
struct range_set ranges;
int arg_alloc, arg_nr;
const char **args;
struct diff_filepair *pair;
struct diff_ranges diff;
};
extern void line_log_data_init(struct line_log_data *r);
extern void line_log_init(struct rev_info *rev, const char *prefix, struct string_list *args);
extern int line_log_filter(struct rev_info *rev);
extern int line_log_print(struct rev_info *rev, struct commit *commit);
#endif /* LINE_LOG_H */
......@@ -21,6 +21,8 @@ static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
num = strtol(spec + 1, &term, 10);
if (term != spec + 1) {
if (!ret)
return term;
if (spec[0] == '-')
num = 0 - num;
if (0 < num)
......@@ -35,7 +37,8 @@ static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
}
num = strtol(spec, &term, 10);
if (term != spec) {
*ret = num;
if (ret)
*ret = num;
return term;
}
if (spec[0] != '/')
......@@ -49,6 +52,10 @@ static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
if (*term != '/')
return spec;
/* in the scan-only case we are not interested in the regex */
if (!ret)
return term+1;
/* try [spec+1 .. term-1] as regexp */
*term = 0;
begin--; /* input is in human terms */
......@@ -90,3 +97,13 @@ int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb,
return 0;
}
const char *skip_range_arg(const char *arg)
{
arg = parse_loc(arg, NULL, NULL, 0, -1, NULL);
if (*arg == ',')
arg = parse_loc(arg+1, NULL, NULL, 0, 0, NULL);
return arg;
}
......@@ -21,4 +21,15 @@ extern int parse_range_arg(const char *arg,
void *cb_data, long lines,
long *begin, long *end);
/*
* Scan past a range argument that could be parsed by
* 'parse_range_arg', to help the caller determine the start of the
* filename in '-L n,m:file' syntax.
*
* Returns a pointer to the first character after the 'n,m' part, or
* NULL in case the argument is obviously malformed.
*/
extern const char *skip_range_arg(const char *arg);
#endif /* LINE_RANGE_H */
......@@ -9,6 +9,7 @@
#include "string-list.h"
#include "color.h"
#include "gpg-interface.h"
#include "line-log.h"
struct decoration name_decoration = { "object names" };
......@@ -856,6 +857,9 @@ int log_tree_commit(struct rev_info *opt, struct commit *commit)
log.parent = NULL;
opt->loginfo = &log;
if (opt->line_level_traverse)
return line_log_print(opt, commit);
shown = log_tree_diff(opt, commit, &log);
if (!shown && opt->loginfo && opt->always_show_header) {
log.parent = NULL;
......
......@@ -13,6 +13,7 @@
#include "decorate.h"
#include "log-tree.h"
#include "string-list.h"
#include "line-log.h"
#include "mailmap.h"
volatile show_early_output_fn_t show_early_output;
......@@ -1896,6 +1897,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
if (revs->combine_merges)
revs->ignore_merges = 0;
revs->diffopt.abbrev = revs->abbrev;
if (revs->line_level_traverse) {
revs->limited = 1;
revs->topo_order = 1;
}
diff_setup_done(&revs->diffopt);
grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED,
......@@ -2166,6 +2173,8 @@ int prepare_revision_walk(struct rev_info *revs)
return -1;
if (revs->topo_order)
sort_in_topological_order(&revs->commits, revs->lifo);
if (revs->line_level_traverse)
line_log_filter(revs);
if (revs->simplify_merges)
simplify_merges(revs);
if (revs->children.name)
......
......@@ -96,7 +96,8 @@ struct rev_info {
cherry_mark:1,
bisect:1,
ancestry_path:1,
first_parent_only:1;
first_parent_only:1,
line_level_traverse:1;
/* Diff flags */
unsigned int diff:1,
......@@ -175,6 +176,9 @@ struct rev_info {
int count_left;
int count_right;
int count_same;
/* line level range that we are chasing */
struct decoration line_log_data;
};
#define REV_TREE_SAME 0
......
#!/bin/sh
test_description='Tests log -L performance'
. ./perf-lib.sh
test_perf_default_repo
# Pick a file to log pseudo-randomly. The sort key is the blob hash,
# so it is stable.
test_expect_success 'select a file' '
git ls-tree HEAD | grep ^100644 |
sort -k 3 | head -1 | cut -f 2 >filelist
'
file=$(cat filelist)
export file
test_perf 'git rev-list --topo-order (baseline)' '
git rev-list --topo-order HEAD >/dev/null
'
test_perf 'git log --follow (baseline for -M)' '
git log --oneline --follow -- "$file" >/dev/null
'
test_perf 'git log -L' '
git log -L 1:"$file" >/dev/null
'
test_perf 'git log -M -L' '
git log -M -L 1:"$file" >/dev/null
'
test_done
#!/bin/sh
test_description='test log -L'
. ./test-lib.sh
test_expect_success 'setup (import history)' '
git fast-import < "$TEST_DIRECTORY"/t4211/history.export &&
git reset --hard
'
canned_test () {
test_expect_success "$1" "
git log $1 >actual &&
test_cmp \"\$TEST_DIRECTORY\"/t4211/expect.$2 actual
"
}
test_bad_opts () {
test_expect_success "invalid args: $1" "
test_must_fail git log $1 2>errors &&
grep '$2' errors
"
}
canned_test "-L 4,12:a.c simple" simple-f
canned_test "-L 4,+9:a.c simple" simple-f
canned_test "-L '/long f/,/^}/:a.c' simple" simple-f
canned_test "-L '/main/,/^}/:a.c' simple" simple-main
canned_test "-L 1,+4:a.c simple" beginning-of-file
canned_test "-L 20:a.c simple" end-of-file
canned_test "-L '/long f/',/^}/:a.c -L /main/,/^}/:a.c simple" two-ranges
canned_test "-L 24,+1:a.c simple" vanishes-early
canned_test "-L '/long f/,/^}/:b.c' move-support" move-support-f
test_bad_opts "-L" "switch.*requires a value"
test_bad_opts "-L b.c" "argument.*not of the form"
test_bad_opts "-L 1:" "argument.*not of the form"
test_bad_opts "-L 1:nonexistent" "There is no path"
test_bad_opts "-L 1:simple" "There is no path"
test_bad_opts "-L '/foo:b.c'" "argument.*not of the form"
test_bad_opts "-L 1000:b.c" "has only.*lines"
test_bad_opts "-L 1,1000:b.c" "has only.*lines"
test_done
commit 4a23ae5c98d59a58c6da036156959f2dc9f472ad
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:47:40 2013 +0100
change at very beginning
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -1,3 +1,4 @@
+#include <unistd.h>
#include <stdio.h>
long f(long x)
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -1,3 +1,3 @@
#include <stdio.h>
-int f(int x)
+long f(long x)
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +1,3 @@
+#include <stdio.h>
+
+int f(int x)
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -20,3 +20,5 @@
printf("%ld\n", f(15));
return 0;
-}
\ No newline at end of file
+}
+
+/* incomplete lines are bad! */
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -20,3 +20,3 @@
printf("%ld\n", f(15));
return 0;
-}
+}
\ No newline at end of file
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -19,3 +19,3 @@
- printf("%d\n", f(15));
+ printf("%ld\n", f(15));
return 0;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +18,3 @@
+ printf("%d\n", f(15));
+ return 0;
+}
commit 6ce3c4ff690136099bb17e1a8766b75764726ea7
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:49:50 2013 +0100
another simple change
diff --git a/b.c b/b.c
--- a/b.c
+++ b/b.c
@@ -4,9 +4,9 @@
long f(long x)
{
int s = 0;
while (x) {
- x >>= 1;
+ x /= 2;
s++;
}
return s;
}
commit e6da343666244ea9e67cbe3f3bd26da860f9fe0e
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:49:28 2013 +0100
move file
diff --git a/b.c b/b.c
--- /dev/null
+++ b/b.c
@@ -0,0 +4,9 @@
+long f(long x)
+{
+ int s = 0;
+ while (x) {
+ x >>= 1;
+ s++;
+ }
+ return s;
+}
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,9 +3,9 @@
-int f(int x)
+long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
commit f04fb20f2c77850996cba739709acc6faecc58f7
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:55 2013 +0100
change f()
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,8 +3,9 @@
int f(int x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
+ return s;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +3,8 @@
+int f(int x)
+{
+ int s = 0;
+ while (x) {
+ x >>= 1;
+ s++;
+ }
+}
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
\ No newline at end of file
+}
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
+}
\ No newline at end of file
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -17,5 +17,5 @@
int main ()
{
- printf("%d\n", f(15));
+ printf("%ld\n", f(15));
return 0;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +16,5 @@
+int main ()
+{
+ printf("%d\n", f(15));
+ return 0;
+}
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
\ No newline at end of file
+}
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
+}
\ No newline at end of file
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,9 +3,9 @@
-int f(int x)
+long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
@@ -17,5 +17,5 @@
int main ()
{
- printf("%d\n", f(15));
+ printf("%ld\n", f(15));
return 0;
}
commit f04fb20f2c77850996cba739709acc6faecc58f7
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:55 2013 +0100
change f()
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,8 +3,9 @@
int f(int x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
+ return s;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +3,8 @@
+int f(int x)
+{
+ int s = 0;
+ while (x) {
+ x >>= 1;
+ s++;