FileNameMatcher.java
/*
* Copyright (C) 2008, Florian Koeberle <florianskarten@web.de>
* Copyright (C) 2008, Florian Köberle <florianskarten@web.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.fnmatch;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.jgit.errors.InvalidPatternException;
import org.eclipse.jgit.errors.NoClosingBracketException;
/**
* This class can be used to match filenames against fnmatch like patterns. It
* is not thread save.
* <p>
* Supported are the wildcard characters * and ? and groups with:
* <ul>
* <li>characters e.g. [abc]</li>
* <li>ranges e.g. [a-z]</li>
* <li>the following character classes
* <ul>
* <li>[:alnum:]</li>
* <li>[:alpha:]</li>
* <li>[:blank:]</li>
* <li>[:cntrl:]</li>
* <li>[:digit:]</li>
* <li>[:graph:]</li>
* <li>[:lower:]</li>
* <li>[:print:]</li>
* <li>[:punct:]</li>
* <li>[:space:]</li>
* <li>[:upper:]</li>
* <li>[:word:]</li>
* <li>[:xdigit:]</li>
* </ul>
* e. g. [[:xdigit:]]</li>
* </ul>
* Any character can be escaped by prepending it with a \
*/
public class FileNameMatcher {
static final List<Head> EMPTY_HEAD_LIST = Collections.emptyList();
private static final Pattern characterClassStartPattern = Pattern
.compile("\\[[.:=]"); //$NON-NLS-1$
private List<Head> headsStartValue;
private List<Head> heads;
/**
* {{@link #extendStringToMatchByOneCharacter(char)} needs a list for the
* new heads, allocating a new array would be bad for the performance, as
* the method gets called very often.
*
*/
private List<Head> listForLocalUseage;
/**
*
* @param headsStartValue
* must be a list which will never be modified.
*/
private FileNameMatcher(List<Head> headsStartValue) {
this(headsStartValue, headsStartValue);
}
/**
*
* @param headsStartValue
* must be a list which will never be modified.
* @param heads
* a list which will be cloned and then used as current head
* list.
*/
private FileNameMatcher(final List<Head> headsStartValue,
final List<Head> heads) {
this.headsStartValue = headsStartValue;
this.heads = new ArrayList<>(heads.size());
this.heads.addAll(heads);
this.listForLocalUseage = new ArrayList<>(heads.size());
}
/**
* Constructor for FileNameMatcher
*
* @param patternString
* must contain a pattern which fnmatch would accept.
* @param invalidWildgetCharacter
* if this parameter isn't null then this character will not
* match at wildcards(* and ? are wildcards).
* @throws org.eclipse.jgit.errors.InvalidPatternException
* if the patternString contains a invalid fnmatch pattern.
*/
public FileNameMatcher(final String patternString,
final Character invalidWildgetCharacter)
throws InvalidPatternException {
this(createHeadsStartValues(patternString, invalidWildgetCharacter));
}
/**
* A Copy Constructor which creates a new
* {@link org.eclipse.jgit.fnmatch.FileNameMatcher} with the same state and
* reset point like <code>other</code>.
*
* @param other
* another {@link org.eclipse.jgit.fnmatch.FileNameMatcher}
* instance.
*/
public FileNameMatcher(FileNameMatcher other) {
this(other.headsStartValue, other.heads);
}
private static List<Head> createHeadsStartValues(
final String patternString, final Character invalidWildgetCharacter)
throws InvalidPatternException {
final List<AbstractHead> allHeads = parseHeads(patternString,
invalidWildgetCharacter);
List<Head> nextHeadsSuggestion = new ArrayList<>(2);
nextHeadsSuggestion.add(LastHead.INSTANCE);
for (int i = allHeads.size() - 1; i >= 0; i--) {
final AbstractHead head = allHeads.get(i);
// explanation:
// a and * of the pattern "a*b"
// need *b as newHeads
// that's why * extends the list for it self and it's left neighbor.
if (head.isStar()) {
nextHeadsSuggestion.add(head);
head.setNewHeads(nextHeadsSuggestion);
} else {
head.setNewHeads(nextHeadsSuggestion);
nextHeadsSuggestion = new ArrayList<>(2);
nextHeadsSuggestion.add(head);
}
}
return nextHeadsSuggestion;
}
private static int findGroupEnd(final int indexOfStartBracket,
final String pattern) throws InvalidPatternException {
int firstValidCharClassIndex = indexOfStartBracket + 1;
int firstValidEndBracketIndex = indexOfStartBracket + 2;
if (indexOfStartBracket + 1 >= pattern.length())
throw new NoClosingBracketException(indexOfStartBracket, "[", "]", //$NON-NLS-1$ //$NON-NLS-2$
pattern);
if (pattern.charAt(firstValidCharClassIndex) == '!') {
firstValidCharClassIndex++;
firstValidEndBracketIndex++;
}
final Matcher charClassStartMatcher = characterClassStartPattern
.matcher(pattern);
int groupEnd = -1;
while (groupEnd == -1) {
final int possibleGroupEnd = indexOfUnescaped(pattern, ']',
firstValidEndBracketIndex);
if (possibleGroupEnd == -1)
throw new NoClosingBracketException(indexOfStartBracket, "[", //$NON-NLS-1$
"]", pattern); //$NON-NLS-1$
final boolean foundCharClass = charClassStartMatcher
.find(firstValidCharClassIndex);
if (foundCharClass
&& charClassStartMatcher.start() < possibleGroupEnd) {
final String classStart = charClassStartMatcher.group(0);
final String classEnd = classStart.charAt(1) + "]"; //$NON-NLS-1$
final int classStartIndex = charClassStartMatcher.start();
final int classEndIndex = pattern.indexOf(classEnd,
classStartIndex + 2);
if (classEndIndex == -1)
throw new NoClosingBracketException(classStartIndex,
classStart, classEnd, pattern);
firstValidCharClassIndex = classEndIndex + 2;
firstValidEndBracketIndex = firstValidCharClassIndex;
} else {
groupEnd = possibleGroupEnd;
}
}
return groupEnd;
}
private static List<AbstractHead> parseHeads(final String pattern,
final Character invalidWildgetCharacter)
throws InvalidPatternException {
int currentIndex = 0;
List<AbstractHead> heads = new ArrayList<>();
while (currentIndex < pattern.length()) {
final int groupStart = indexOfUnescaped(pattern, '[', currentIndex);
if (groupStart == -1) {
final String patternPart = pattern.substring(currentIndex);
heads.addAll(createSimpleHeads(patternPart,
invalidWildgetCharacter));
currentIndex = pattern.length();
} else {
final String patternPart = pattern.substring(currentIndex,
groupStart);
heads.addAll(createSimpleHeads(patternPart,
invalidWildgetCharacter));
final int groupEnd = findGroupEnd(groupStart, pattern);
final String groupPart = pattern.substring(groupStart + 1,
groupEnd);
heads.add(new GroupHead(groupPart, pattern));
currentIndex = groupEnd + 1;
}
}
return heads;
}
private static List<AbstractHead> createSimpleHeads(
final String patternPart, final Character invalidWildgetCharacter) {
final List<AbstractHead> heads = new ArrayList<>(
patternPart.length());
boolean escaped = false;
for (int i = 0; i < patternPart.length(); i++) {
final char c = patternPart.charAt(i);
if (escaped) {
final CharacterHead head = new CharacterHead(c);
heads.add(head);
escaped = false;
} else {
switch (c) {
case '*': {
final AbstractHead head = createWildCardHead(
invalidWildgetCharacter, true);
heads.add(head);
break;
}
case '?': {
final AbstractHead head = createWildCardHead(
invalidWildgetCharacter, false);
heads.add(head);
break;
}
case '\\':
escaped = true;
break;
default:
final CharacterHead head = new CharacterHead(c);
heads.add(head);
}
}
}
return heads;
}
private static AbstractHead createWildCardHead(
final Character invalidWildgetCharacter, final boolean star) {
if (invalidWildgetCharacter != null)
return new RestrictedWildCardHead(invalidWildgetCharacter
.charValue(), star);
else
return new WildCardHead(star);
}
/**
* @param c new character to append
* @return true to continue, false if the matcher can stop appending
*/
private boolean extendStringToMatchByOneCharacter(char c) {
final List<Head> newHeads = listForLocalUseage;
newHeads.clear();
List<Head> lastAddedHeads = null;
for (int i = 0; i < heads.size(); i++) {
final Head head = heads.get(i);
final List<Head> headsToAdd = head.getNextHeads(c);
// Why the next performance optimization isn't wrong:
// Some times two heads return the very same list.
// We save future effort if we don't add these heads again.
// This is the case with the heads "a" and "*" of "a*b" which
// both can return the list ["*","b"]
if (headsToAdd != lastAddedHeads) {
if (!headsToAdd.isEmpty())
newHeads.addAll(headsToAdd);
lastAddedHeads = headsToAdd;
}
}
listForLocalUseage = heads;
heads = newHeads;
return !newHeads.isEmpty();
}
private static int indexOfUnescaped(final String searchString,
final char ch, final int fromIndex) {
for (int i = fromIndex; i < searchString.length(); i++) {
char current = searchString.charAt(i);
if (current == ch)
return i;
if (current == '\\')
i++; // Skip the next char as it is escaped }
}
return -1;
}
/**
* Append to the string which is matched against the patterns of this class
*
* @param stringToMatch
* extends the string which is matched against the patterns of
* this class.
*/
public void append(String stringToMatch) {
for (int i = 0; i < stringToMatch.length(); i++) {
final char c = stringToMatch.charAt(i);
if (!extendStringToMatchByOneCharacter(c))
break;
}
}
/**
* Resets this matcher to it's state right after construction.
*/
public void reset() {
heads.clear();
heads.addAll(headsStartValue);
}
/**
* Create a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
* uses the same pattern like this matcher, but has the current state of
* this matcher as reset and start point
*
* @return a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
* uses the same pattern like this matcher, but has the current
* state of this matcher as reset and start point.
*/
public FileNameMatcher createMatcherForSuffix() {
final List<Head> copyOfHeads = new ArrayList<>(heads.size());
copyOfHeads.addAll(heads);
return new FileNameMatcher(copyOfHeads);
}
/**
* Whether the matcher matches
*
* @return whether the matcher matches
*/
public boolean isMatch() {
if (heads.isEmpty())
return false;
final ListIterator<Head> headIterator = heads
.listIterator(heads.size());
while (headIterator.hasPrevious()) {
final Head head = headIterator.previous();
if (head == LastHead.INSTANCE) {
return true;
}
}
return false;
}
/**
* Whether a match can be appended
*
* @return a boolean.
*/
public boolean canAppendMatch() {
for (int i = 0; i < heads.size(); i++) {
if (heads.get(i) != LastHead.INSTANCE) {
return true;
}
}
return false;
}
}