View Javadoc
1   /*
2    * Copyright (C) 2008, Florian Koeberle <florianskarten@web.de>
3    * Copyright (C) 2008, Florian Köberle <florianskarten@web.de>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.fnmatch;
46  
47  import java.util.ArrayList;
48  import java.util.Collections;
49  import java.util.List;
50  import java.util.ListIterator;
51  import java.util.regex.Matcher;
52  import java.util.regex.Pattern;
53  
54  import org.eclipse.jgit.errors.InvalidPatternException;
55  import org.eclipse.jgit.errors.NoClosingBracketException;
56  
57  /**
58   * This class can be used to match filenames against fnmatch like patterns. It
59   * is not thread save.
60   * <p>
61   * Supported are the wildcard characters * and ? and groups with:
62   * <ul>
63   * <li>characters e.g. [abc]</li>
64   * <li>ranges e.g. [a-z]</li>
65   * <li>the following character classes
66   * <ul>
67   * <li>[:alnum:]</li>
68   * <li>[:alpha:]</li>
69   * <li>[:blank:]</li>
70   * <li>[:cntrl:]</li>
71   * <li>[:digit:]</li>
72   * <li>[:graph:]</li>
73   * <li>[:lower:]</li>
74   * <li>[:print:]</li>
75   * <li>[:punct:]</li>
76   * <li>[:space:]</li>
77   * <li>[:upper:]</li>
78   * <li>[:word:]</li>
79   * <li>[:xdigit:]</li>
80   * </ul>
81   * e. g. [[:xdigit:]]</li>
82   * </ul>
83   * Any character can be escaped by prepending it with a \
84   */
85  public class FileNameMatcher {
86  	static final List<Head> EMPTY_HEAD_LIST = Collections.emptyList();
87  
88  	private static final Pattern characterClassStartPattern = Pattern
89  			.compile("\\[[.:=]"); //$NON-NLS-1$
90  
91  	private List<Head> headsStartValue;
92  
93  	private List<Head> heads;
94  
95  	/**
96  	 * {{@link #extendStringToMatchByOneCharacter(char)} needs a list for the
97  	 * new heads, allocating a new array would be bad for the performance, as
98  	 * the method gets called very often.
99  	 *
100 	 */
101 	private List<Head> listForLocalUseage;
102 
103 	/**
104 	 *
105 	 * @param headsStartValue
106 	 *            must be a list which will never be modified.
107 	 */
108 	private FileNameMatcher(final List<Head> headsStartValue) {
109 		this(headsStartValue, headsStartValue);
110 	}
111 
112 	/**
113 	 *
114 	 * @param headsStartValue
115 	 *            must be a list which will never be modified.
116 	 * @param heads
117 	 *            a list which will be cloned and then used as current head
118 	 *            list.
119 	 */
120 	private FileNameMatcher(final List<Head> headsStartValue,
121 			final List<Head> heads) {
122 		this.headsStartValue = headsStartValue;
123 		this.heads = new ArrayList<>(heads.size());
124 		this.heads.addAll(heads);
125 		this.listForLocalUseage = new ArrayList<>(heads.size());
126 	}
127 
128 	/**
129 	 * @param patternString
130 	 *            must contain a pattern which fnmatch would accept.
131 	 * @param invalidWildgetCharacter
132 	 *            if this parameter isn't null then this character will not
133 	 *            match at wildcards(* and ? are wildcards).
134 	 * @throws InvalidPatternException
135 	 *             if the patternString contains a invalid fnmatch pattern.
136 	 */
137 	public FileNameMatcher(final String patternString,
138 			final Character invalidWildgetCharacter)
139 			throws InvalidPatternException {
140 		this(createHeadsStartValues(patternString, invalidWildgetCharacter));
141 	}
142 
143 	/**
144 	 * A Copy Constructor which creates a new {@link FileNameMatcher} with the
145 	 * same state and reset point like <code>other</code>.
146 	 *
147 	 * @param other
148 	 *            another {@link FileNameMatcher} instance.
149 	 */
150 	public FileNameMatcher(FileNameMatcher other) {
151 		this(other.headsStartValue, other.heads);
152 	}
153 
154 	private static List<Head> createHeadsStartValues(
155 			final String patternString, final Character invalidWildgetCharacter)
156 			throws InvalidPatternException {
157 
158 		final List<AbstractHead> allHeads = parseHeads(patternString,
159 				invalidWildgetCharacter);
160 
161 		List<Head> nextHeadsSuggestion = new ArrayList<>(2);
162 		nextHeadsSuggestion.add(LastHead.INSTANCE);
163 		for (int i = allHeads.size() - 1; i >= 0; i--) {
164 			final AbstractHead head = allHeads.get(i);
165 
166 			// explanation:
167 			// a and * of the pattern "a*b"
168 			// need *b as newHeads
169 			// that's why * extends the list for it self and it's left neighbor.
170 			if (head.isStar()) {
171 				nextHeadsSuggestion.add(head);
172 				head.setNewHeads(nextHeadsSuggestion);
173 			} else {
174 				head.setNewHeads(nextHeadsSuggestion);
175 				nextHeadsSuggestion = new ArrayList<>(2);
176 				nextHeadsSuggestion.add(head);
177 			}
178 		}
179 		return nextHeadsSuggestion;
180 	}
181 
182 	private static int findGroupEnd(final int indexOfStartBracket,
183 			final String pattern) throws InvalidPatternException {
184 		int firstValidCharClassIndex = indexOfStartBracket + 1;
185 		int firstValidEndBracketIndex = indexOfStartBracket + 2;
186 
187 		if (indexOfStartBracket + 1 >= pattern.length())
188 			throw new NoClosingBracketException(indexOfStartBracket, "[", "]", //$NON-NLS-1$ //$NON-NLS-2$
189 					pattern);
190 
191 		if (pattern.charAt(firstValidCharClassIndex) == '!') {
192 			firstValidCharClassIndex++;
193 			firstValidEndBracketIndex++;
194 		}
195 
196 		final Matcher charClassStartMatcher = characterClassStartPattern
197 				.matcher(pattern);
198 
199 		int groupEnd = -1;
200 		while (groupEnd == -1) {
201 
202 			final int possibleGroupEnd = indexOfUnescaped(pattern, ']',
203 					firstValidEndBracketIndex);
204 			if (possibleGroupEnd == -1)
205 				throw new NoClosingBracketException(indexOfStartBracket, "[", //$NON-NLS-1$
206 						"]", pattern); //$NON-NLS-1$
207 
208 			final boolean foundCharClass = charClassStartMatcher
209 					.find(firstValidCharClassIndex);
210 
211 			if (foundCharClass
212 					&& charClassStartMatcher.start() < possibleGroupEnd) {
213 
214 				final String classStart = charClassStartMatcher.group(0);
215 				final String classEnd = classStart.charAt(1) + "]"; //$NON-NLS-1$
216 
217 				final int classStartIndex = charClassStartMatcher.start();
218 				final int classEndIndex = pattern.indexOf(classEnd,
219 						classStartIndex + 2);
220 
221 				if (classEndIndex == -1)
222 					throw new NoClosingBracketException(classStartIndex,
223 							classStart, classEnd, pattern);
224 
225 				firstValidCharClassIndex = classEndIndex + 2;
226 				firstValidEndBracketIndex = firstValidCharClassIndex;
227 			} else {
228 				groupEnd = possibleGroupEnd;
229 			}
230 		}
231 		return groupEnd;
232 	}
233 
234 	private static List<AbstractHead> parseHeads(final String pattern,
235 			final Character invalidWildgetCharacter)
236 			throws InvalidPatternException {
237 
238 		int currentIndex = 0;
239 		List<AbstractHead> heads = new ArrayList<>();
240 		while (currentIndex < pattern.length()) {
241 			final int groupStart = indexOfUnescaped(pattern, '[', currentIndex);
242 			if (groupStart == -1) {
243 				final String patternPart = pattern.substring(currentIndex);
244 				heads.addAll(createSimpleHeads(patternPart,
245 						invalidWildgetCharacter));
246 				currentIndex = pattern.length();
247 			} else {
248 				final String patternPart = pattern.substring(currentIndex,
249 						groupStart);
250 				heads.addAll(createSimpleHeads(patternPart,
251 						invalidWildgetCharacter));
252 
253 				final int groupEnd = findGroupEnd(groupStart, pattern);
254 				final String groupPart = pattern.substring(groupStart + 1,
255 						groupEnd);
256 				heads.add(new GroupHead(groupPart, pattern));
257 				currentIndex = groupEnd + 1;
258 			}
259 		}
260 		return heads;
261 	}
262 
263 	private static List<AbstractHead> createSimpleHeads(
264 			final String patternPart, final Character invalidWildgetCharacter) {
265 		final List<AbstractHead> heads = new ArrayList<>(
266 				patternPart.length());
267 
268 		boolean escaped = false;
269 		for (int i = 0; i < patternPart.length(); i++) {
270 			final char c = patternPart.charAt(i);
271 			if (escaped) {
272 				final CharacterHead head = new CharacterHead(c);
273 				heads.add(head);
274 				escaped = false;
275 			} else {
276 				switch (c) {
277 				case '*': {
278 					final AbstractHead head = createWildCardHead(
279 							invalidWildgetCharacter, true);
280 					heads.add(head);
281 					break;
282 				}
283 				case '?': {
284 					final AbstractHead head = createWildCardHead(
285 							invalidWildgetCharacter, false);
286 					heads.add(head);
287 					break;
288 				}
289 				case '\\':
290 					escaped = true;
291 					break;
292 				default:
293 					final CharacterHead head = new CharacterHead(c);
294 					heads.add(head);
295 				}
296 			}
297 		}
298 		return heads;
299 	}
300 
301 	private static AbstractHead createWildCardHead(
302 			final Character invalidWildgetCharacter, final boolean star) {
303 		if (invalidWildgetCharacter != null)
304 			return new RestrictedWildCardHead(invalidWildgetCharacter
305 					.charValue(), star);
306 		else
307 			return new WildCardHead(star);
308 	}
309 
310 	/**
311 	 * @param c new character to append
312 	 * @return true to continue, false if the matcher can stop appending
313 	 */
314 	private boolean extendStringToMatchByOneCharacter(final char c) {
315 		final List<Head> newHeads = listForLocalUseage;
316 		newHeads.clear();
317 		List<Head> lastAddedHeads = null;
318 		for (int i = 0; i < heads.size(); i++) {
319 			final Head head = heads.get(i);
320 			final List<Head> headsToAdd = head.getNextHeads(c);
321 			// Why the next performance optimization isn't wrong:
322 			// Some times two heads return the very same list.
323 			// We save future effort if we don't add these heads again.
324 			// This is the case with the heads "a" and "*" of "a*b" which
325 			// both can return the list ["*","b"]
326 			if (headsToAdd != lastAddedHeads) {
327 				if (!headsToAdd.isEmpty())
328 					newHeads.addAll(headsToAdd);
329 				lastAddedHeads = headsToAdd;
330 			}
331 		}
332 		listForLocalUseage = heads;
333 		heads = newHeads;
334 		return !newHeads.isEmpty();
335 	}
336 
337 	private static int indexOfUnescaped(final String searchString,
338 			final char ch, final int fromIndex) {
339 		for (int i = fromIndex; i < searchString.length(); i++) {
340 			char current = searchString.charAt(i);
341 			if (current == ch)
342 				return i;
343 			if (current == '\\')
344 				i++; // Skip the next char as it is escaped }
345 		}
346 		return -1;
347 	}
348 
349 	/**
350 	 *
351 	 * @param stringToMatch
352 	 *            extends the string which is matched against the patterns of
353 	 *            this class.
354 	 */
355 	public void append(final String stringToMatch) {
356 		for (int i = 0; i < stringToMatch.length(); i++) {
357 			final char c = stringToMatch.charAt(i);
358 			if (!extendStringToMatchByOneCharacter(c))
359 				break;
360 		}
361 	}
362 
363 	/**
364 	 * Resets this matcher to it's state right after construction.
365 	 */
366 	public void reset() {
367 		heads.clear();
368 		heads.addAll(headsStartValue);
369 	}
370 
371 	/**
372 	 *
373 	 * @return a {@link FileNameMatcher} instance which uses the same pattern
374 	 *         like this matcher, but has the current state of this matcher as
375 	 *         reset and start point.
376 	 */
377 	public FileNameMatcher createMatcherForSuffix() {
378 		final List<Head> copyOfHeads = new ArrayList<>(heads.size());
379 		copyOfHeads.addAll(heads);
380 		return new FileNameMatcher(copyOfHeads);
381 	}
382 
383 	/**
384 	 *
385 	 * @return true, if the string currently being matched does match.
386 	 */
387 	public boolean isMatch() {
388 		if (heads.isEmpty())
389 			return false;
390 
391 		final ListIterator<Head> headIterator = heads
392 				.listIterator(heads.size());
393 		while (headIterator.hasPrevious()) {
394 			final Head head = headIterator.previous();
395 			if (head == LastHead.INSTANCE) {
396 				return true;
397 			}
398 		}
399 		return false;
400 	}
401 
402 	/**
403 	 *
404 	 * @return false, if the string being matched will not match when the string
405 	 *         gets extended.
406 	 */
407 	public boolean canAppendMatch() {
408 		for (int i = 0; i < heads.size(); i++) {
409 			if (heads.get(i) != LastHead.INSTANCE) {
410 				return true;
411 			}
412 		}
413 		return false;
414 	}
415 }