diffcore-pickaxe.c 6.58 KB
Newer Older
1 2
/*
 * Copyright (C) 2005 Junio C Hamano
3
 * Copyright (C) 2010 Google Inc.
4 5 6 7
 */
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
8
#include "xdiff-interface.h"
Fredrik K's avatar
Fredrik K committed
9
#include "kwset.h"
10 11
#include "commit.h"
#include "quote.h"
12

13 14 15 16
typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
			  struct diff_options *o,
			  regex_t *regexp, kwset_t kws);

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
struct diffgrep_cb {
	regex_t *regexp;
	int hit;
};

static void diffgrep_consume(void *priv, char *line, unsigned long len)
{
	struct diffgrep_cb *data = priv;
	regmatch_t regmatch;

	if (line[0] != '+' && line[0] != '-')
		return;
	if (data->hit)
		/*
		 * NEEDSWORK: we should have a way to terminate the
		 * caller early.
		 */
		return;
35 36
	data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
				 &regmatch, 0);
37 38
}

39 40
static int diff_grep(mmfile_t *one, mmfile_t *two,
		     struct diff_options *o,
41
		     regex_t *regexp, kwset_t kws)
42 43
{
	regmatch_t regmatch;
44 45 46
	struct diffgrep_cb ecbdata;
	xpparam_t xpp;
	xdemitconf_t xecfg;
47

48
	if (!one)
49 50
		return !regexec_buf(regexp, two->ptr, two->size,
				    1, &regmatch, 0);
51
	if (!two)
52 53
		return !regexec_buf(regexp, one->ptr, one->size,
				    1, &regmatch, 0);
54

55 56 57 58 59 60 61 62 63 64
	/*
	 * We have both sides; need to run textual diff and see if
	 * the pattern appears on added/deleted lines.
	 */
	memset(&xpp, 0, sizeof(xpp));
	memset(&xecfg, 0, sizeof(xecfg));
	ecbdata.regexp = regexp;
	ecbdata.hit = 0;
	xecfg.ctxlen = o->context;
	xecfg.interhunkctxlen = o->interhunkcontext;
65 66
	if (xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume,
			  &ecbdata, &xpp, &xecfg))
Jeff King's avatar
Jeff King committed
67
		return 0;
68
	return ecbdata.hit;
69 70
}

71
static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
72
{
73
	unsigned int cnt;
74
	unsigned long sz;
75
	const char *data;
76

77 78
	sz = mf->size;
	data = mf->ptr;
79 80
	cnt = 0;

81 82 83 84
	if (regexp) {
		regmatch_t regmatch;
		int flags = 0;

85
		while (sz && *data &&
86
		       !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
87
			flags |= REG_NOTBOL;
88
			data += regmatch.rm_eo;
89 90
			sz -= regmatch.rm_eo;
			if (sz && *data && regmatch.rm_so == regmatch.rm_eo) {
91
				data++;
92 93
				sz--;
			}
94 95
			cnt++;
		}
96 97

	} else { /* Classic exact string match */
98
		while (sz) {
99 100
			struct kwsmatch kwsm;
			size_t offset = kwsexec(kws, data, sz, &kwsm);
Fredrik K's avatar
Fredrik K committed
101
			if (offset == -1)
102
				break;
103 104
			sz -= offset + kwsm.size[0];
			data += offset + kwsm.size[0];
105
			cnt++;
106
		}
107 108
	}
	return cnt;
109 110
}

111 112
static int has_changes(mmfile_t *one, mmfile_t *two,
		       struct diff_options *o,
113
		       regex_t *regexp, kwset_t kws)
114
{
115 116 117
	unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
	unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
	return one_contains != two_contains;
118 119 120 121
}

static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
			 regex_t *regexp, kwset_t kws, pickaxe_fn fn)
122
{
123 124
	struct userdiff_driver *textconv_one = NULL;
	struct userdiff_driver *textconv_two = NULL;
125 126 127
	mmfile_t mf1, mf2;
	int ret;

128 129 130 131
	/* ignore unmerged */
	if (!DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two))
		return 0;

132 133 134 135 136 137 138 139 140 141
	if (o->objfind) {
		return  (DIFF_FILE_VALID(p->one) &&
			 oidset_contains(o->objfind, &p->one->oid)) ||
			(DIFF_FILE_VALID(p->two) &&
			 oidset_contains(o->objfind, &p->two->oid));
	}

	if (!o->pickaxe[0])
		return 0;

142
	if (o->flags.allow_textconv) {
143 144
		textconv_one = get_textconv(o->repo, p->one);
		textconv_two = get_textconv(o->repo, p->two);
145
	}
146

147 148 149 150 151 152 153 154 155 156
	/*
	 * If we have an unmodified pair, we know that the count will be the
	 * same and don't even have to load the blobs. Unless textconv is in
	 * play, _and_ we are using two different textconv filters (e.g.,
	 * because a pair is an exact rename with different textconv attributes
	 * for each side, which might generate different content).
	 */
	if (textconv_one == textconv_two && diff_unmodified_pair(p))
		return 0;

157 158 159 160 161 162
	if ((o->pickaxe_opts & DIFF_PICKAXE_KIND_G) &&
	    !o->flags.text &&
	    ((!textconv_one && diff_filespec_is_binary(o->repo, p->one)) ||
	     (!textconv_two && diff_filespec_is_binary(o->repo, p->two))))
		return 0;

163 164
	mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr);
	mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr);
165

166 167 168
	ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
		 DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
		 o, regexp, kws);
169 170 171 172 173 174 175 176 177

	if (textconv_one)
		free(mf1.ptr);
	if (textconv_two)
		free(mf2.ptr);
	diff_free_filespec_data(p->one);
	diff_free_filespec_data(p->two);

	return ret;
178 179
}

180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
		    regex_t *regexp, kwset_t kws, pickaxe_fn fn)
{
	int i;
	struct diff_queue_struct outq;

	DIFF_QUEUE_CLEAR(&outq);

	if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
		/* Showing the whole changeset if needle exists */
		for (i = 0; i < q->nr; i++) {
			struct diff_filepair *p = q->queue[i];
			if (pickaxe_match(p, o, regexp, kws, fn))
				return; /* do not munge the queue */
		}

		/*
		 * Otherwise we will clear the whole queue by copying
		 * the empty outq at the end of this function, but
		 * first clear the current entries in the queue.
		 */
		for (i = 0; i < q->nr; i++)
			diff_free_filepair(q->queue[i]);
	} else {
		/* Showing only the filepairs that has the needle */
		for (i = 0; i < q->nr; i++) {
			struct diff_filepair *p = q->queue[i];
			if (pickaxe_match(p, o, regexp, kws, fn))
				diff_q(&outq, p);
			else
				diff_free_filepair(p);
		}
	}

	free(q->queue);
	*q = outq;
}

218 219 220 221 222 223 224 225 226 227 228
static void regcomp_or_die(regex_t *regex, const char *needle, int cflags)
{
	int err = regcomp(regex, needle, cflags);
	if (err) {
		/* The POSIX.2 people are surely sick */
		char errbuf[1024];
		regerror(err, regex, errbuf, 1024);
		die("invalid regex: %s", errbuf);
	}
}

229
void diffcore_pickaxe(struct diff_options *o)
230
{
231 232
	const char *needle = o->pickaxe;
	int opts = o->pickaxe_opts;
233
	regex_t regex, *regexp = NULL;
Fredrik K's avatar
Fredrik K committed
234
	kwset_t kws = NULL;
235

236
	if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
237
		int cflags = REG_EXTENDED | REG_NEWLINE;
238
		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
239
			cflags |= REG_ICASE;
240
		regcomp_or_die(&regex, needle, cflags);
241
		regexp = &regex;
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
	} else if (opts & DIFF_PICKAXE_KIND_S) {
		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
		    has_non_ascii(needle)) {
			struct strbuf sb = STRBUF_INIT;
			int cflags = REG_NEWLINE | REG_ICASE;

			basic_regex_quote_buf(&sb, needle);
			regcomp_or_die(&regex, sb.buf, cflags);
			strbuf_release(&sb);
			regexp = &regex;
		} else {
			kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE
				       ? tolower_trans_tbl : NULL);
			kwsincr(kws, needle, strlen(needle));
			kwsprep(kws);
		}
258 259
	}

260 261
	pickaxe(&diff_queued_diff, o, regexp, kws,
		(opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
262

263 264
	if (regexp)
		regfree(regexp);
265
	if (kws)
Fredrik K's avatar
Fredrik K committed
266
		kwsfree(kws);
267 268
	return;
}