wildmatch.c 7.81 KB
Newer Older
Duy Nguyen's avatar
Duy Nguyen committed
1 2 3 4 5 6 7 8 9 10 11
/*
**  Do shell-style pattern matching for ?, \, [], and * characters.
**  It is 8bit clean.
**
**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
**  Rich $alz is now <rsalz@bbn.com>.
**
**  Modified by Wayne Davison to special-case '/' matching, to make '**'
**  work differently than '*', and to fix the character-class code.
*/

Duy Nguyen's avatar
Duy Nguyen committed
12 13 14 15
#include "cache.h"
#include "wildmatch.h"

typedef unsigned char uchar;
Duy Nguyen's avatar
Duy Nguyen committed
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

/* What character marks an inverted character class? */
#define NEGATE_CLASS	'!'
#define NEGATE_CLASS2	'^'

#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
				    && *(class) == *(litmatch) \
				    && strncmp((char*)class, litmatch, len) == 0)

#if defined STDC_HEADERS || !defined isascii
# define ISASCII(c) 1
#else
# define ISASCII(c) isascii(c)
#endif

#ifdef isblank
# define ISBLANK(c) (ISASCII(c) && isblank(c))
#else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif

#ifdef isgraph
# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
#else
# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
#endif

#define ISPRINT(c) (ISASCII(c) && isprint(c))
#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
#define ISALNUM(c) (ISASCII(c) && isalnum(c))
#define ISALPHA(c) (ISASCII(c) && isalpha(c))
#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
#define ISLOWER(c) (ISASCII(c) && islower(c))
#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
#define ISSPACE(c) (ISASCII(c) && isspace(c))
#define ISUPPER(c) (ISASCII(c) && isupper(c))
#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))

54
/* Match pattern "p" against "text" */
55
static int dowild(const uchar *p, const uchar *text, unsigned int flags)
Duy Nguyen's avatar
Duy Nguyen committed
56
{
57
	uchar p_ch;
58
	const uchar *pattern = p;
Duy Nguyen's avatar
Duy Nguyen committed
59

60
	for ( ; (p_ch = *p) != '\0'; text++, p++) {
61
		int matched, match_slash, negated;
62 63
		uchar t_ch, prev_ch;
		if ((t_ch = *text) == '\0' && p_ch != '*')
64
			return WM_ABORT_ALL;
65
		if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
66
			t_ch = tolower(t_ch);
67
		if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
68
			p_ch = tolower(p_ch);
69 70 71 72
		switch (p_ch) {
		case '\\':
			/* Literal match with following character.  Note that the test
			 * in "default" handles the p[1] == '\0' failure case. */
Duy Nguyen's avatar
Duy Nguyen committed
73
			p_ch = *++p;
74 75 76
			/* FALLTHROUGH */
		default:
			if (t_ch != p_ch)
77
				return WM_NOMATCH;
78 79 80
			continue;
		case '?':
			/* Match anything but '/'. */
81
			if ((flags & WM_PATHNAME) && t_ch == '/')
82
				return WM_NOMATCH;
Duy Nguyen's avatar
Duy Nguyen committed
83
			continue;
84 85
		case '*':
			if (*++p == '*') {
86
				const uchar *prev_p = p - 2;
87
				while (*++p == '*') {}
88 89 90 91
				if (!(flags & WM_PATHNAME))
					/* without WM_PATHNAME, '*' == '**' */
					match_slash = 1;
				else if ((prev_p < pattern || *prev_p == '/') &&
92 93
				    (*p == '\0' || *p == '/' ||
				     (p[0] == '\\' && p[1] == '/'))) {
94 95 96 97 98 99 100 101 102 103
					/*
					 * Assuming we already match 'foo/' and are at
					 * <star star slash>, just assume it matches
					 * nothing and go ahead match the rest of the
					 * pattern with the remaining string. This
					 * helps make foo/<*><*>/bar (<> because
					 * otherwise it breaks C comment syntax) match
					 * both foo/bar and foo/a/bar.
					 */
					if (p[0] == '/' &&
104
					    dowild(p + 1, text, flags) == WM_MATCH)
105 106
						return WM_MATCH;
					match_slash = 1;
107 108
				} else /* WM_PATHNAME is set */
					match_slash = 0;
109
			} else
110 111
				/* without WM_PATHNAME, '*' == '**' */
				match_slash = flags & WM_PATHNAME ? 0 : 1;
112 113 114
			if (*p == '\0') {
				/* Trailing "**" matches everything.  Trailing "*" matches
				 * only if there are no more slash characters. */
115
				if (!match_slash) {
116
					if (strchr((char*)text, '/') != NULL)
117
						return WM_NOMATCH;
118
				}
119
				return WM_MATCH;
120 121 122 123 124 125 126 127 128 129 130 131
			} else if (!match_slash && *p == '/') {
				/*
				 * _one_ asterisk followed by a slash
				 * with WM_PATHNAME matches the next
				 * directory
				 */
				const char *slash = strchr((char*)text, '/');
				if (!slash)
					return WM_NOMATCH;
				text = (const uchar*)slash;
				/* the slash is consumed by the top-level for loop */
				break;
132 133 134 135
			}
			while (1) {
				if (t_ch == '\0')
					break;
136 137 138
				/*
				 * Try to advance faster when an asterisk is
				 * followed by a literal. We know in this case
139
				 * that the string before the literal
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
				 * must belong to "*".
				 * If match_slash is false, do not look past
				 * the first slash as it cannot belong to '*'.
				 */
				if (!is_glob_special(*p)) {
					p_ch = *p;
					if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
						p_ch = tolower(p_ch);
					while ((t_ch = *text) != '\0' &&
					       (match_slash || t_ch != '/')) {
						if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
							t_ch = tolower(t_ch);
						if (t_ch == p_ch)
							break;
						text++;
					}
					if (t_ch != p_ch)
						return WM_NOMATCH;
				}
159
				if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
160
					if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
161
						return matched;
162
				} else if (!match_slash && t_ch == '/')
163
					return WM_ABORT_TO_STARSTAR;
164 165
				t_ch = *++text;
			}
166
			return WM_ABORT_ALL;
167 168 169 170 171 172
		case '[':
			p_ch = *++p;
#ifdef NEGATE_CLASS2
			if (p_ch == NEGATE_CLASS2)
				p_ch = NEGATE_CLASS;
#endif
173 174
			/* Assign literal 1/0 because of "matched" comparison. */
			negated = p_ch == NEGATE_CLASS ? 1 : 0;
175
			if (negated) {
176 177 178 179
				/* Inverted character class. */
				p_ch = *++p;
			}
			prev_ch = 0;
180
			matched = 0;
181 182
			do {
				if (!p_ch)
183
					return WM_ABORT_ALL;
184 185 186
				if (p_ch == '\\') {
					p_ch = *++p;
					if (!p_ch)
187
						return WM_ABORT_ALL;
188
					if (t_ch == p_ch)
189
						matched = 1;
190 191 192 193 194
				} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
					p_ch = *++p;
					if (p_ch == '\\') {
						p_ch = *++p;
						if (!p_ch)
195
							return WM_ABORT_ALL;
196 197
					}
					if (t_ch <= p_ch && t_ch >= prev_ch)
198
						matched = 1;
199 200 201 202 203
					else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
						uchar t_ch_upper = toupper(t_ch);
						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
							matched = 1;
					}
204 205 206 207 208 209
					p_ch = 0; /* This makes "prev_ch" get set to 0. */
				} else if (p_ch == '[' && p[1] == ':') {
					const uchar *s;
					int i;
					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
					if (!p_ch)
210
						return WM_ABORT_ALL;
211 212 213 214 215 216
					i = p - s - 1;
					if (i < 0 || p[-1] != ':') {
						/* Didn't find ":]", so treat like a normal set. */
						p = s - 2;
						p_ch = '[';
						if (t_ch == p_ch)
217
							matched = 1;
218 219 220 221
						continue;
					}
					if (CC_EQ(s,i, "alnum")) {
						if (ISALNUM(t_ch))
222
							matched = 1;
223 224
					} else if (CC_EQ(s,i, "alpha")) {
						if (ISALPHA(t_ch))
225
							matched = 1;
226 227
					} else if (CC_EQ(s,i, "blank")) {
						if (ISBLANK(t_ch))
228
							matched = 1;
229 230
					} else if (CC_EQ(s,i, "cntrl")) {
						if (ISCNTRL(t_ch))
231
							matched = 1;
232 233
					} else if (CC_EQ(s,i, "digit")) {
						if (ISDIGIT(t_ch))
234
							matched = 1;
235 236
					} else if (CC_EQ(s,i, "graph")) {
						if (ISGRAPH(t_ch))
237
							matched = 1;
238 239
					} else if (CC_EQ(s,i, "lower")) {
						if (ISLOWER(t_ch))
240
							matched = 1;
241 242
					} else if (CC_EQ(s,i, "print")) {
						if (ISPRINT(t_ch))
243
							matched = 1;
244 245
					} else if (CC_EQ(s,i, "punct")) {
						if (ISPUNCT(t_ch))
246
							matched = 1;
247 248
					} else if (CC_EQ(s,i, "space")) {
						if (ISSPACE(t_ch))
249
							matched = 1;
250 251
					} else if (CC_EQ(s,i, "upper")) {
						if (ISUPPER(t_ch))
252
							matched = 1;
253 254
						else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
							matched = 1;
255 256
					} else if (CC_EQ(s,i, "xdigit")) {
						if (ISXDIGIT(t_ch))
257
							matched = 1;
258
					} else /* malformed [:class:] string */
259
						return WM_ABORT_ALL;
260 261
					p_ch = 0; /* This makes "prev_ch" get set to 0. */
				} else if (t_ch == p_ch)
262
					matched = 1;
263
			} while (prev_ch = p_ch, (p_ch = *++p) != ']');
264 265
			if (matched == negated ||
			    ((flags & WM_PATHNAME) && t_ch == '/'))
266
				return WM_NOMATCH;
267 268
			continue;
		}
Duy Nguyen's avatar
Duy Nguyen committed
269 270
	}

271
	return *text ? WM_NOMATCH : WM_MATCH;
Duy Nguyen's avatar
Duy Nguyen committed
272 273 274
}

/* Match the "pattern" against the "text" string. */
275
int wildmatch(const char *pattern, const char *text, unsigned int flags)
Duy Nguyen's avatar
Duy Nguyen committed
276
{
277
	return dowild((const uchar*)pattern, (const uchar*)text, flags);
Duy Nguyen's avatar
Duy Nguyen committed
278
}