View Javadoc
1   /*
2    * Copyright (C) 2008, Florian Koeberle <florianskarten@web.de>
3    * Copyright (C) 2008, Florian Köberle <florianskarten@web.de>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.fnmatch;
46  
47  import java.util.ArrayList;
48  import java.util.Collections;
49  import java.util.List;
50  import java.util.ListIterator;
51  import java.util.regex.Matcher;
52  import java.util.regex.Pattern;
53  
54  import org.eclipse.jgit.errors.InvalidPatternException;
55  import org.eclipse.jgit.errors.NoClosingBracketException;
56  
57  /**
58   * This class can be used to match filenames against fnmatch like patterns. It
59   * is not thread save.
60   * <p>
61   * Supported are the wildcard characters * and ? and groups with:
62   * <ul>
63   * <li>characters e.g. [abc]</li>
64   * <li>ranges e.g. [a-z]</li>
65   * <li>the following character classes
66   * <ul>
67   * <li>[:alnum:]</li>
68   * <li>[:alpha:]</li>
69   * <li>[:blank:]</li>
70   * <li>[:cntrl:]</li>
71   * <li>[:digit:]</li>
72   * <li>[:graph:]</li>
73   * <li>[:lower:]</li>
74   * <li>[:print:]</li>
75   * <li>[:punct:]</li>
76   * <li>[:space:]</li>
77   * <li>[:upper:]</li>
78   * <li>[:word:]</li>
79   * <li>[:xdigit:]</li>
80   * </ul>
81   * e. g. [[:xdigit:]]</li>
82   * </ul>
83   * Any character can be escaped by prepending it with a \
84   */
85  public class FileNameMatcher {
86  	static final List<Head> EMPTY_HEAD_LIST = Collections.emptyList();
87  
88  	private static final Pattern characterClassStartPattern = Pattern
89  			.compile("\\[[.:=]"); //$NON-NLS-1$
90  
91  	private List<Head> headsStartValue;
92  
93  	private List<Head> heads;
94  
95  	/**
96  	 * {{@link #extendStringToMatchByOneCharacter(char)} needs a list for the
97  	 * new heads, allocating a new array would be bad for the performance, as
98  	 * the method gets called very often.
99  	 *
100 	 */
101 	private List<Head> listForLocalUseage;
102 
103 	/**
104 	 *
105 	 * @param headsStartValue
106 	 *            must be a list which will never be modified.
107 	 */
108 	private FileNameMatcher(final List<Head> headsStartValue) {
109 		this(headsStartValue, headsStartValue);
110 	}
111 
112 	/**
113 	 *
114 	 * @param headsStartValue
115 	 *            must be a list which will never be modified.
116 	 * @param heads
117 	 *            a list which will be cloned and then used as current head
118 	 *            list.
119 	 */
120 	private FileNameMatcher(final List<Head> headsStartValue,
121 			final List<Head> heads) {
122 		this.headsStartValue = headsStartValue;
123 		this.heads = new ArrayList<>(heads.size());
124 		this.heads.addAll(heads);
125 		this.listForLocalUseage = new ArrayList<>(heads.size());
126 	}
127 
128 	/**
129 	 * Constructor for FileNameMatcher
130 	 *
131 	 * @param patternString
132 	 *            must contain a pattern which fnmatch would accept.
133 	 * @param invalidWildgetCharacter
134 	 *            if this parameter isn't null then this character will not
135 	 *            match at wildcards(* and ? are wildcards).
136 	 * @throws org.eclipse.jgit.errors.InvalidPatternException
137 	 *             if the patternString contains a invalid fnmatch pattern.
138 	 */
139 	public FileNameMatcher(final String patternString,
140 			final Character invalidWildgetCharacter)
141 			throws InvalidPatternException {
142 		this(createHeadsStartValues(patternString, invalidWildgetCharacter));
143 	}
144 
145 	/**
146 	 * A Copy Constructor which creates a new
147 	 * {@link org.eclipse.jgit.fnmatch.FileNameMatcher} with the same state and
148 	 * reset point like <code>other</code>.
149 	 *
150 	 * @param other
151 	 *            another {@link org.eclipse.jgit.fnmatch.FileNameMatcher}
152 	 *            instance.
153 	 */
154 	public FileNameMatcher(FileNameMatcher other) {
155 		this(other.headsStartValue, other.heads);
156 	}
157 
158 	private static List<Head> createHeadsStartValues(
159 			final String patternString, final Character invalidWildgetCharacter)
160 			throws InvalidPatternException {
161 
162 		final List<AbstractHead> allHeads = parseHeads(patternString,
163 				invalidWildgetCharacter);
164 
165 		List<Head> nextHeadsSuggestion = new ArrayList<>(2);
166 		nextHeadsSuggestion.add(LastHead.INSTANCE);
167 		for (int i = allHeads.size() - 1; i >= 0; i--) {
168 			final AbstractHead head = allHeads.get(i);
169 
170 			// explanation:
171 			// a and * of the pattern "a*b"
172 			// need *b as newHeads
173 			// that's why * extends the list for it self and it's left neighbor.
174 			if (head.isStar()) {
175 				nextHeadsSuggestion.add(head);
176 				head.setNewHeads(nextHeadsSuggestion);
177 			} else {
178 				head.setNewHeads(nextHeadsSuggestion);
179 				nextHeadsSuggestion = new ArrayList<>(2);
180 				nextHeadsSuggestion.add(head);
181 			}
182 		}
183 		return nextHeadsSuggestion;
184 	}
185 
186 	private static int findGroupEnd(final int indexOfStartBracket,
187 			final String pattern) throws InvalidPatternException {
188 		int firstValidCharClassIndex = indexOfStartBracket + 1;
189 		int firstValidEndBracketIndex = indexOfStartBracket + 2;
190 
191 		if (indexOfStartBracket + 1 >= pattern.length())
192 			throw new NoClosingBracketException(indexOfStartBracket, "[", "]", //$NON-NLS-1$ //$NON-NLS-2$
193 					pattern);
194 
195 		if (pattern.charAt(firstValidCharClassIndex) == '!') {
196 			firstValidCharClassIndex++;
197 			firstValidEndBracketIndex++;
198 		}
199 
200 		final Matcher charClassStartMatcher = characterClassStartPattern
201 				.matcher(pattern);
202 
203 		int groupEnd = -1;
204 		while (groupEnd == -1) {
205 
206 			final int possibleGroupEnd = indexOfUnescaped(pattern, ']',
207 					firstValidEndBracketIndex);
208 			if (possibleGroupEnd == -1)
209 				throw new NoClosingBracketException(indexOfStartBracket, "[", //$NON-NLS-1$
210 						"]", pattern); //$NON-NLS-1$
211 
212 			final boolean foundCharClass = charClassStartMatcher
213 					.find(firstValidCharClassIndex);
214 
215 			if (foundCharClass
216 					&& charClassStartMatcher.start() < possibleGroupEnd) {
217 
218 				final String classStart = charClassStartMatcher.group(0);
219 				final String classEnd = classStart.charAt(1) + "]"; //$NON-NLS-1$
220 
221 				final int classStartIndex = charClassStartMatcher.start();
222 				final int classEndIndex = pattern.indexOf(classEnd,
223 						classStartIndex + 2);
224 
225 				if (classEndIndex == -1)
226 					throw new NoClosingBracketException(classStartIndex,
227 							classStart, classEnd, pattern);
228 
229 				firstValidCharClassIndex = classEndIndex + 2;
230 				firstValidEndBracketIndex = firstValidCharClassIndex;
231 			} else {
232 				groupEnd = possibleGroupEnd;
233 			}
234 		}
235 		return groupEnd;
236 	}
237 
238 	private static List<AbstractHead> parseHeads(final String pattern,
239 			final Character invalidWildgetCharacter)
240 			throws InvalidPatternException {
241 
242 		int currentIndex = 0;
243 		List<AbstractHead> heads = new ArrayList<>();
244 		while (currentIndex < pattern.length()) {
245 			final int groupStart = indexOfUnescaped(pattern, '[', currentIndex);
246 			if (groupStart == -1) {
247 				final String patternPart = pattern.substring(currentIndex);
248 				heads.addAll(createSimpleHeads(patternPart,
249 						invalidWildgetCharacter));
250 				currentIndex = pattern.length();
251 			} else {
252 				final String patternPart = pattern.substring(currentIndex,
253 						groupStart);
254 				heads.addAll(createSimpleHeads(patternPart,
255 						invalidWildgetCharacter));
256 
257 				final int groupEnd = findGroupEnd(groupStart, pattern);
258 				final String groupPart = pattern.substring(groupStart + 1,
259 						groupEnd);
260 				heads.add(new GroupHead(groupPart, pattern));
261 				currentIndex = groupEnd + 1;
262 			}
263 		}
264 		return heads;
265 	}
266 
267 	private static List<AbstractHead> createSimpleHeads(
268 			final String patternPart, final Character invalidWildgetCharacter) {
269 		final List<AbstractHead> heads = new ArrayList<>(
270 				patternPart.length());
271 
272 		boolean escaped = false;
273 		for (int i = 0; i < patternPart.length(); i++) {
274 			final char c = patternPart.charAt(i);
275 			if (escaped) {
276 				final CharacterHead head = new CharacterHead(c);
277 				heads.add(head);
278 				escaped = false;
279 			} else {
280 				switch (c) {
281 				case '*': {
282 					final AbstractHead head = createWildCardHead(
283 							invalidWildgetCharacter, true);
284 					heads.add(head);
285 					break;
286 				}
287 				case '?': {
288 					final AbstractHead head = createWildCardHead(
289 							invalidWildgetCharacter, false);
290 					heads.add(head);
291 					break;
292 				}
293 				case '\\':
294 					escaped = true;
295 					break;
296 				default:
297 					final CharacterHead head = new CharacterHead(c);
298 					heads.add(head);
299 				}
300 			}
301 		}
302 		return heads;
303 	}
304 
305 	private static AbstractHead createWildCardHead(
306 			final Character invalidWildgetCharacter, final boolean star) {
307 		if (invalidWildgetCharacter != null)
308 			return new RestrictedWildCardHead(invalidWildgetCharacter
309 					.charValue(), star);
310 		else
311 			return new WildCardHead(star);
312 	}
313 
314 	/**
315 	 * @param c new character to append
316 	 * @return true to continue, false if the matcher can stop appending
317 	 */
318 	private boolean extendStringToMatchByOneCharacter(final char c) {
319 		final List<Head> newHeads = listForLocalUseage;
320 		newHeads.clear();
321 		List<Head> lastAddedHeads = null;
322 		for (int i = 0; i < heads.size(); i++) {
323 			final Head head = heads.get(i);
324 			final List<Head> headsToAdd = head.getNextHeads(c);
325 			// Why the next performance optimization isn't wrong:
326 			// Some times two heads return the very same list.
327 			// We save future effort if we don't add these heads again.
328 			// This is the case with the heads "a" and "*" of "a*b" which
329 			// both can return the list ["*","b"]
330 			if (headsToAdd != lastAddedHeads) {
331 				if (!headsToAdd.isEmpty())
332 					newHeads.addAll(headsToAdd);
333 				lastAddedHeads = headsToAdd;
334 			}
335 		}
336 		listForLocalUseage = heads;
337 		heads = newHeads;
338 		return !newHeads.isEmpty();
339 	}
340 
341 	private static int indexOfUnescaped(final String searchString,
342 			final char ch, final int fromIndex) {
343 		for (int i = fromIndex; i < searchString.length(); i++) {
344 			char current = searchString.charAt(i);
345 			if (current == ch)
346 				return i;
347 			if (current == '\\')
348 				i++; // Skip the next char as it is escaped }
349 		}
350 		return -1;
351 	}
352 
353 	/**
354 	 * Append to the string which is matched against the patterns of this class
355 	 *
356 	 * @param stringToMatch
357 	 *            extends the string which is matched against the patterns of
358 	 *            this class.
359 	 */
360 	public void append(final String stringToMatch) {
361 		for (int i = 0; i < stringToMatch.length(); i++) {
362 			final char c = stringToMatch.charAt(i);
363 			if (!extendStringToMatchByOneCharacter(c))
364 				break;
365 		}
366 	}
367 
368 	/**
369 	 * Resets this matcher to it's state right after construction.
370 	 */
371 	public void reset() {
372 		heads.clear();
373 		heads.addAll(headsStartValue);
374 	}
375 
376 	/**
377 	 * Create a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
378 	 * uses the same pattern like this matcher, but has the current state of
379 	 * this matcher as reset and start point
380 	 *
381 	 * @return a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
382 	 *         uses the same pattern like this matcher, but has the current
383 	 *         state of this matcher as reset and start point.
384 	 */
385 	public FileNameMatcher createMatcherForSuffix() {
386 		final List<Head> copyOfHeads = new ArrayList<>(heads.size());
387 		copyOfHeads.addAll(heads);
388 		return new FileNameMatcher(copyOfHeads);
389 	}
390 
391 	/**
392 	 * Whether the matcher matches
393 	 *
394 	 * @return whether the matcher matches
395 	 */
396 	public boolean isMatch() {
397 		if (heads.isEmpty())
398 			return false;
399 
400 		final ListIterator<Head> headIterator = heads
401 				.listIterator(heads.size());
402 		while (headIterator.hasPrevious()) {
403 			final Head head = headIterator.previous();
404 			if (head == LastHead.INSTANCE) {
405 				return true;
406 			}
407 		}
408 		return false;
409 	}
410 
411 	/**
412 	 * Whether a match can be appended
413 	 *
414 	 * @return a boolean.
415 	 */
416 	public boolean canAppendMatch() {
417 		for (int i = 0; i < heads.size(); i++) {
418 			if (heads.get(i) != LastHead.INSTANCE) {
419 				return true;
420 			}
421 		}
422 		return false;
423 	}
424 }