range-diff.c 12.4 KB
Newer Older
1 2 3 4 5 6 7 8
#include "cache.h"
#include "range-diff.h"
#include "string-list.h"
#include "run-command.h"
#include "argv-array.h"
#include "hashmap.h"
#include "xdiff-interface.h"
#include "linear-assignment.h"
9
#include "diffcore.h"
10 11
#include "commit.h"
#include "pretty.h"
12
#include "userdiff.h"
13 14 15 16 17 18

struct patch_util {
	/* For the search for an exact match */
	struct hashmap_entry e;
	const char *diff, *patch;

19
	int i, shown;
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
	int diffsize;
	size_t diff_offset;
	/* the index of the matching item in the other branch, or -1 */
	int matching;
	struct object_id oid;
};

/*
 * Reads the patches into a string list, with the `util` field being populated
 * as struct object_id (will need to be free()d).
 */
static int read_patches(const char *range, struct string_list *list)
{
	struct child_process cp = CHILD_PROCESS_INIT;
	FILE *in;
	struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT;
	struct patch_util *util = NULL;
	int in_header = 1;

	argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges",
			"--reverse", "--date-order", "--decorate=no",
41 42 43 44 45 46 47 48
			/*
			 * Choose indicators that are not used anywhere
			 * else in diffs, but still look reasonable
			 * (e.g. will not be confusing when debugging)
			 */
			"--output-indicator-new=>",
			"--output-indicator-old=<",
			"--output-indicator-context=#",
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
			"--no-abbrev-commit", range,
			NULL);
	cp.out = -1;
	cp.no_stdin = 1;
	cp.git_cmd = 1;

	if (start_command(&cp))
		return error_errno(_("could not start `log`"));
	in = fdopen(cp.out, "r");
	if (!in) {
		error_errno(_("could not read `log` output"));
		finish_command(&cp);
		return -1;
	}

	while (strbuf_getline(&line, in) != EOF) {
		const char *p;

		if (skip_prefix(line.buf, "commit ", &p)) {
			if (util) {
				string_list_append(list, buf.buf)->util = util;
				strbuf_reset(&buf);
			}
			util = xcalloc(sizeof(*util), 1);
			if (get_oid(p, &util->oid)) {
				error(_("could not parse commit '%s'"), p);
				free(util);
				string_list_clear(list, 1);
				strbuf_release(&buf);
				strbuf_release(&line);
				fclose(in);
				finish_command(&cp);
				return -1;
			}
			util->matching = -1;
			in_header = 1;
			continue;
		}

		if (starts_with(line.buf, "diff --git")) {
			in_header = 0;
			strbuf_addch(&buf, '\n');
			if (!util->diff_offset)
				util->diff_offset = buf.len;
93
			strbuf_addch(&buf, ' ');
94 95 96 97 98 99
			strbuf_addbuf(&buf, &line);
		} else if (in_header) {
			if (starts_with(line.buf, "Author: ")) {
				strbuf_addbuf(&buf, &line);
				strbuf_addstr(&buf, "\n\n");
			} else if (starts_with(line.buf, "    ")) {
100
				strbuf_rtrim(&line);
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
				strbuf_addbuf(&buf, &line);
				strbuf_addch(&buf, '\n');
			}
			continue;
		} else if (starts_with(line.buf, "@@ "))
			strbuf_addstr(&buf, "@@");
		else if (!line.buf[0] || starts_with(line.buf, "index "))
			/*
			 * A completely blank (not ' \n', which is context)
			 * line is not valid in a diff.  We skip it
			 * silently, because this neatly handles the blank
			 * separator line between commits in git-log
			 * output.
			 *
			 * We also want to ignore the diff's `index` lines
			 * because they contain exact blob hashes in which
			 * we are not interested.
			 */
			continue;
120 121 122 123 124 125 126 127 128 129
		else if (line.buf[0] == '>') {
			strbuf_addch(&buf, '+');
			strbuf_add(&buf, line.buf + 1, line.len - 1);
		} else if (line.buf[0] == '<') {
			strbuf_addch(&buf, '-');
			strbuf_add(&buf, line.buf + 1, line.len - 1);
		} else if (line.buf[0] == '#') {
			strbuf_addch(&buf, ' ');
			strbuf_add(&buf, line.buf + 1, line.len - 1);
		} else {
130
			strbuf_addch(&buf, ' ');
131
			strbuf_addbuf(&buf, &line);
132
		}
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

		strbuf_addch(&buf, '\n');
		util->diffsize++;
	}
	fclose(in);
	strbuf_release(&line);

	if (util)
		string_list_append(list, buf.buf)->util = util;
	strbuf_release(&buf);

	if (finish_command(&cp))
		return -1;

	return 0;
}

static int patch_util_cmp(const void *dummy, const struct patch_util *a,
		     const struct patch_util *b, const char *keydata)
{
	return strcmp(a->diff, keydata ? keydata : b->diff);
}

static void find_exact_matches(struct string_list *a, struct string_list *b)
{
	struct hashmap map;
	int i;

	hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0);

	/* First, add the patches of a to a hash map */
	for (i = 0; i < a->nr; i++) {
		struct patch_util *util = a->items[i].util;

		util->i = i;
		util->patch = a->items[i].string;
		util->diff = util->patch + util->diff_offset;
		hashmap_entry_init(util, strhash(util->diff));
		hashmap_add(&map, util);
	}

	/* Now try to find exact matches in b */
	for (i = 0; i < b->nr; i++) {
		struct patch_util *util = b->items[i].util, *other;

		util->i = i;
		util->patch = b->items[i].string;
		util->diff = util->patch + util->diff_offset;
		hashmap_entry_init(util, strhash(util->diff));
		other = hashmap_remove(&map, util, NULL);
		if (other) {
			if (other->matching >= 0)
				BUG("already assigned!");

			other->matching = i;
			util->matching = other->i;
		}
	}

	hashmap_free(&map, 0);
}

static void diffsize_consume(void *data, char *line, unsigned long len)
{
	(*(int *)data)++;
}

200 201 202 203 204 205
static void diffsize_hunk(void *data, long ob, long on, long nb, long nn,
			  const char *funcline, long funclen)
{
	diffsize_consume(data, NULL, 0);
}

206 207 208 209 210 211 212 213 214 215 216 217 218
static int diffsize(const char *a, const char *b)
{
	xpparam_t pp = { 0 };
	xdemitconf_t cfg = { 0 };
	mmfile_t mf1, mf2;
	int count = 0;

	mf1.ptr = (char *)a;
	mf1.size = strlen(a);
	mf2.ptr = (char *)b;
	mf2.size = strlen(b);

	cfg.ctxlen = 3;
219 220 221
	if (!xdi_diff_outf(&mf1, &mf2,
			   diffsize_hunk, diffsize_consume, &count,
			   &pp, &cfg))
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
		return count;

	error(_("failed to generate diff"));
	return COST_MAX;
}

static void get_correspondences(struct string_list *a, struct string_list *b,
				int creation_factor)
{
	int n = a->nr + b->nr;
	int *cost, c, *a2b, *b2a;
	int i, j;

	ALLOC_ARRAY(cost, st_mult(n, n));
	ALLOC_ARRAY(a2b, n);
	ALLOC_ARRAY(b2a, n);

	for (i = 0; i < a->nr; i++) {
		struct patch_util *a_util = a->items[i].util;

		for (j = 0; j < b->nr; j++) {
			struct patch_util *b_util = b->items[j].util;

			if (a_util->matching == j)
				c = 0;
			else if (a_util->matching < 0 && b_util->matching < 0)
				c = diffsize(a_util->diff, b_util->diff);
			else
				c = COST_MAX;
			cost[i + n * j] = c;
		}

		c = a_util->matching < 0 ?
			a_util->diffsize * creation_factor / 100 : COST_MAX;
		for (j = b->nr; j < n; j++)
			cost[i + n * j] = c;
	}

	for (j = 0; j < b->nr; j++) {
		struct patch_util *util = b->items[j].util;

		c = util->matching < 0 ?
			util->diffsize * creation_factor / 100 : COST_MAX;
		for (i = a->nr; i < n; i++)
			cost[i + n * j] = c;
	}

	for (i = a->nr; i < n; i++)
		for (j = b->nr; j < n; j++)
			cost[i + n * j] = 0;

	compute_assignment(n, n, cost, a2b, b2a);

	for (i = 0; i < a->nr; i++)
		if (a2b[i] >= 0 && a2b[i] < b->nr) {
			struct patch_util *a_util = a->items[i].util;
			struct patch_util *b_util = b->items[a2b[i]].util;

			a_util->matching = a2b[i];
			b_util->matching = i;
		}

	free(cost);
	free(a2b);
	free(b2a);
}

289
static void output_pair_header(struct diff_options *diffopt,
290
			       int patch_no_width,
291
			       struct strbuf *buf,
292 293 294
			       struct strbuf *dashes,
			       struct patch_util *a_util,
			       struct patch_util *b_util)
295
{
296 297
	struct object_id *oid = a_util ? &a_util->oid : &b_util->oid;
	struct commit *commit;
298 299 300 301 302 303
	char status;
	const char *color_reset = diff_get_color_opt(diffopt, DIFF_RESET);
	const char *color_old = diff_get_color_opt(diffopt, DIFF_FILE_OLD);
	const char *color_new = diff_get_color_opt(diffopt, DIFF_FILE_NEW);
	const char *color_commit = diff_get_color_opt(diffopt, DIFF_COMMIT);
	const char *color;
304 305 306 307 308 309

	if (!dashes->len)
		strbuf_addchars(dashes, '-',
				strlen(find_unique_abbrev(oid,
							  DEFAULT_ABBREV)));

310 311 312 313 314 315 316 317 318 319 320 321 322 323
	if (!b_util) {
		color = color_old;
		status = '<';
	} else if (!a_util) {
		color = color_new;
		status = '>';
	} else if (strcmp(a_util->patch, b_util->patch)) {
		color = color_commit;
		status = '!';
	} else {
		color = color_commit;
		status = '=';
	}

324
	strbuf_reset(buf);
325
	strbuf_addstr(buf, status == '!' ? color_old : color);
326
	if (!a_util)
327
		strbuf_addf(buf, "%*s:  %s ", patch_no_width, "-", dashes->buf);
328
	else
329
		strbuf_addf(buf, "%*d:  %s ", patch_no_width, a_util->i + 1,
330 331
			    find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));

332 333 334 335 336
	if (status == '!')
		strbuf_addf(buf, "%s%s", color_reset, color);
	strbuf_addch(buf, status);
	if (status == '!')
		strbuf_addf(buf, "%s%s", color_reset, color_new);
337 338

	if (!b_util)
339
		strbuf_addf(buf, " %*s:  %s", patch_no_width, "-", dashes->buf);
340
	else
341
		strbuf_addf(buf, " %*d:  %s", patch_no_width, b_util->i + 1,
342 343 344 345
			    find_unique_abbrev(&b_util->oid, DEFAULT_ABBREV));

	commit = lookup_commit_reference(the_repository, oid);
	if (commit) {
346 347 348
		if (status == '!')
			strbuf_addf(buf, "%s%s", color_reset, color);

349 350 351
		strbuf_addch(buf, ' ');
		pp_commit_easy(CMIT_FMT_ONELINE, commit, buf);
	}
352
	strbuf_addf(buf, "%s\n", color_reset);
353

354
	fwrite(buf->buf, buf->len, 1, diffopt->file);
355 356
}

357 358 359 360
static struct userdiff_driver no_func_name = {
	.funcname = { "$^", 0 }
};

361 362 363 364
static struct diff_filespec *get_filespec(const char *name, const char *p)
{
	struct diff_filespec *spec = alloc_filespec(name);

365
	fill_filespec(spec, &null_oid, 0, 0100644);
366 367 368 369
	spec->data = (char *)p;
	spec->size = strlen(p);
	spec->should_munmap = 0;
	spec->is_stdin = 1;
370
	spec->driver = &no_func_name;
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386

	return spec;
}

static void patch_diff(const char *a, const char *b,
			      struct diff_options *diffopt)
{
	diff_queue(&diff_queued_diff,
		   get_filespec("a", a), get_filespec("b", b));

	diffcore_std(diffopt);
	diff_flush(diffopt);
}

static void output(struct string_list *a, struct string_list *b,
		   struct diff_options *diffopt)
387
{
388
	struct strbuf buf = STRBUF_INIT, dashes = STRBUF_INIT;
389
	int patch_no_width = decimal_width(1 + (a->nr > b->nr ? a->nr : b->nr));
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
	int i = 0, j = 0;

	/*
	 * We assume the user is really more interested in the second argument
	 * ("newer" version). To that end, we print the output in the order of
	 * the RHS (the `b` parameter). To put the LHS (the `a` parameter)
	 * commits that are no longer in the RHS into a good place, we place
	 * them once we have shown all of their predecessors in the LHS.
	 */

	while (i < a->nr || j < b->nr) {
		struct patch_util *a_util, *b_util;
		a_util = i < a->nr ? a->items[i].util : NULL;
		b_util = j < b->nr ? b->items[j].util : NULL;

		/* Skip all the already-shown commits from the LHS. */
		while (i < a->nr && a_util->shown)
			a_util = ++i < a->nr ? a->items[i].util : NULL;

		/* Show unmatched LHS commit whose predecessors were shown. */
		if (i < a->nr && a_util->matching < 0) {
411
			output_pair_header(diffopt, patch_no_width,
412
					   &buf, &dashes, a_util, NULL);
413 414 415
			i++;
			continue;
		}
416

417 418
		/* Show unmatched RHS commits. */
		while (j < b->nr && b_util->matching < 0) {
419
			output_pair_header(diffopt, patch_no_width,
420
					   &buf, &dashes, NULL, b_util);
421
			b_util = ++j < b->nr ? b->items[j].util : NULL;
422 423
		}

424 425 426
		/* Show matching LHS/RHS pair. */
		if (j < b->nr) {
			a_util = a->items[b_util->matching].util;
427
			output_pair_header(diffopt, patch_no_width,
428
					   &buf, &dashes, a_util, b_util);
429 430 431
			if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
				patch_diff(a->items[b_util->matching].string,
					   b->items[j].string, diffopt);
432 433 434
			a_util->shown = 1;
			j++;
		}
435
	}
436 437
	strbuf_release(&buf);
	strbuf_release(&dashes);
438 439
}

440 441 442 443 444
static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
{
	return data;
}

445
int show_range_diff(const char *range1, const char *range2,
446 447
		    int creation_factor, int dual_color,
		    struct diff_options *diffopt)
448 449 450 451 452 453 454 455 456 457 458 459
{
	int res = 0;

	struct string_list branch1 = STRING_LIST_INIT_DUP;
	struct string_list branch2 = STRING_LIST_INIT_DUP;

	if (read_patches(range1, &branch1))
		res = error(_("could not parse log for '%s'"), range1);
	if (!res && read_patches(range2, &branch2))
		res = error(_("could not parse log for '%s'"), range2);

	if (!res) {
460 461 462
		struct diff_options opts;
		struct strbuf indent = STRBUF_INIT;

463 464 465 466 467
		if (diffopt)
			memcpy(&opts, diffopt, sizeof(opts));
		else
			diff_setup(&opts);

468 469
		if (!opts.output_format)
			opts.output_format = DIFF_FORMAT_PATCH;
470 471 472 473 474 475 476
		opts.flags.suppress_diff_headers = 1;
		opts.flags.dual_color_diffed_diffs = dual_color;
		opts.output_prefix = output_prefix_cb;
		strbuf_addstr(&indent, "    ");
		opts.output_prefix_data = &indent;
		diff_setup_done(&opts);

477 478
		find_exact_matches(&branch1, &branch2);
		get_correspondences(&branch1, &branch2, creation_factor);
479 480 481
		output(&branch1, &branch2, &opts);

		strbuf_release(&indent);
482 483 484 485 486 487 488
	}

	string_list_clear(&branch1, 1);
	string_list_clear(&branch2, 1);

	return res;
}