View Javadoc
1   /*
2    * Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  package org.eclipse.jgit.ignore.internal;
44  
45  import static java.lang.Character.isLetter;
46  
47  import java.util.ArrayList;
48  import java.util.Arrays;
49  import java.util.List;
50  import java.util.regex.Pattern;
51  
52  import org.eclipse.jgit.errors.InvalidPatternException;
53  import org.eclipse.jgit.ignore.FastIgnoreRule;
54  
55  /**
56   * Various {@link String} related utility methods, written mostly to avoid
57   * generation of new String objects (e.g. via splitting Strings etc).
58   *
59   * @since 3.6
60   */
61  public class Strings {
62  
63  	static char getPathSeparator(Character pathSeparator) {
64  		return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
65  				: pathSeparator.charValue();
66  	}
67  
68  	/**
69  	 * @param pattern
70  	 *            non null
71  	 * @param c
72  	 *            character to remove
73  	 * @return new string with all trailing characters removed
74  	 */
75  	public static String stripTrailing(String pattern, char c) {
76  		while (pattern.length() > 0
77  				&& pattern.charAt(pattern.length() - 1) == c)
78  			pattern = pattern.substring(0, pattern.length() - 1);
79  		return pattern;
80  	}
81  
82  	static int count(String s, char c, boolean ignoreFirstLast) {
83  		int start = 0;
84  		int count = 0;
85  		while (true) {
86  			start = s.indexOf(c, start);
87  			if (start == -1)
88  				break;
89  			if (!ignoreFirstLast || (start != 0 && start != s.length()))
90  				count++;
91  			start++;
92  		}
93  		return count;
94  	}
95  
96  	/**
97  	 * Splits given string to substrings by given separator
98  	 *
99  	 * @param pattern
100 	 *            non null
101 	 * @param slash
102 	 *            separator char
103 	 * @return list of substrings
104 	 */
105 	public static List<String> split(String pattern, char slash) {
106 		int count = count(pattern, slash, true);
107 		if (count < 1)
108 			throw new IllegalStateException(
109 					"Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
110 		List<String> segments = new ArrayList<String>(count);
111 		int right = 0;
112 		while (true) {
113 			int left = right;
114 			right = pattern.indexOf(slash, right);
115 			if (right == -1) {
116 				if (left < pattern.length())
117 					segments.add(pattern.substring(left));
118 				break;
119 			}
120 			if (right - left > 0)
121 				if (left == 1)
122 					// leading slash should remain by the first pattern
123 					segments.add(pattern.substring(left - 1, right));
124 				else if (right == pattern.length() - 1)
125 					// trailing slash should remain too
126 					segments.add(pattern.substring(left, right + 1));
127 				else
128 					segments.add(pattern.substring(left, right));
129 			right++;
130 		}
131 		return segments;
132 	}
133 
134 	static boolean isWildCard(String pattern) {
135 		return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
136 	}
137 
138 	private static boolean isComplexWildcard(String pattern) {
139 		int idx1 = pattern.indexOf('[');
140 		if (idx1 != -1) {
141 			int idx2 = pattern.indexOf(']');
142 			if (idx2 > idx1)
143 				return true;
144 		}
145 		// required to match escaped backslashes '\\\\'
146 		if (pattern.indexOf('?') != -1 || pattern.indexOf('\\') != -1)
147 			return true;
148 		return false;
149 	}
150 
151 	static PatternState checkWildCards(String pattern) {
152 		if (isComplexWildcard(pattern))
153 			return PatternState.COMPLEX;
154 		int startIdx = pattern.indexOf('*');
155 		if (startIdx < 0)
156 			return PatternState.NONE;
157 
158 		if (startIdx == pattern.length() - 1)
159 			return PatternState.TRAILING_ASTERISK_ONLY;
160 		if (pattern.lastIndexOf('*') == 0)
161 			return PatternState.LEADING_ASTERISK_ONLY;
162 
163 		return PatternState.COMPLEX;
164 	}
165 
166 	static enum PatternState {
167 		LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
168 	}
169 
170 	final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
171 			"alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
172 			// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
173 			"digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
174 			// [:digit:] [:graph:] [:lower:] [:print:]
175 			"punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
176 			// [:punct:] [:space:] [:upper:] [:xdigit:]
177 			"word" //$NON-NLS-1$
178 	// [:word:] XXX I don't see it in
179 	// http://man7.org/linux/man-pages/man7/glob.7.html
180 	// but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
181 			);
182 
183 	private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
184 
185 	final static List<String> JAVA_CHAR_CLASSES = Arrays
186 			.asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
187 					// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
188 					"\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
189 					// [:digit:] [:graph:] [:lower:] [:print:]
190 					"\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
191 					// [:punct:] [:space:] [:upper:] [:xdigit:]
192 					"[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
193 							// [:word:]
194 			);
195 
196 	// Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
197 	// not supported by CLI git (at least not by 1.9.1)
198 	final static Pattern UNSUPPORTED = Pattern
199 			.compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
200 
201 	/**
202 	 * Conversion from glob to Java regex following two sources: <li>
203 	 * http://man7.org/linux/man-pages/man7/glob.7.html <li>
204 	 * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
205 	 * various ways to define what "glob" can be.
206 	 *
207 	 * @param pattern
208 	 *            non null pattern
209 	 *
210 	 * @return Java regex pattern corresponding to given glob pattern
211 	 * @throws InvalidPatternException
212 	 */
213 	static Pattern convertGlob(String pattern) throws InvalidPatternException {
214 		if (UNSUPPORTED.matcher(pattern).find())
215 			throw new InvalidPatternException(
216 					"Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
217 					pattern);
218 
219 		StringBuilder sb = new StringBuilder(pattern.length());
220 
221 		int in_brackets = 0;
222 		boolean seenEscape = false;
223 		boolean ignoreLastBracket = false;
224 		boolean in_char_class = false;
225 		// 6 is the length of the longest posix char class "xdigit"
226 		char[] charClass = new char[6];
227 
228 		for (int i = 0; i < pattern.length(); i++) {
229 			char c = pattern.charAt(i);
230 			switch (c) {
231 
232 			case '*':
233 				if (seenEscape || in_brackets > 0)
234 					sb.append(c);
235 				else
236 					sb.append('.').append(c);
237 				break;
238 
239 			case '.':
240 				if (seenEscape)
241 					sb.append(c);
242 				else
243 					sb.append('\\').append('.');
244 				break;
245 
246 			case '?':
247 				if (seenEscape || in_brackets > 0)
248 					sb.append(c);
249 				else
250 					sb.append('.');
251 				break;
252 
253 			case ':':
254 				if (in_brackets > 0)
255 					if (lookBehind(sb) == '['
256 							&& isLetter(lookAhead(pattern, i)))
257 						in_char_class = true;
258 				sb.append(':');
259 				break;
260 
261 			case '-':
262 				if (in_brackets > 0) {
263 					if (lookAhead(pattern, i) == ']')
264 						sb.append('\\').append(c);
265 					else
266 						sb.append(c);
267 				} else
268 					sb.append('-');
269 				break;
270 
271 			case '\\':
272 				if (in_brackets > 0) {
273 					char lookAhead = lookAhead(pattern, i);
274 					if (lookAhead == ']' || lookAhead == '[')
275 						ignoreLastBracket = true;
276 				}
277 				sb.append(c);
278 				break;
279 
280 			case '[':
281 				if (in_brackets > 0) {
282 					sb.append('\\').append('[');
283 					ignoreLastBracket = true;
284 				} else {
285 					if (!seenEscape) {
286 						in_brackets++;
287 						ignoreLastBracket = false;
288 					}
289 					sb.append('[');
290 				}
291 				break;
292 
293 			case ']':
294 				if (seenEscape) {
295 					sb.append(']');
296 					ignoreLastBracket = true;
297 					break;
298 				}
299 				if (in_brackets <= 0) {
300 					sb.append('\\').append(']');
301 					ignoreLastBracket = true;
302 					break;
303 				}
304 				char lookBehind = lookBehind(sb);
305 				if ((lookBehind == '[' && !ignoreLastBracket)
306 						|| lookBehind == '^') {
307 					sb.append('\\');
308 					sb.append(']');
309 					ignoreLastBracket = true;
310 				} else {
311 					ignoreLastBracket = false;
312 					if (!in_char_class) {
313 						in_brackets--;
314 						sb.append(']');
315 					} else {
316 						in_char_class = false;
317 						String charCl = checkPosixCharClass(charClass);
318 						// delete last \[:: chars and set the pattern
319 						if (charCl != null) {
320 							sb.setLength(sb.length() - 4);
321 							sb.append(charCl);
322 						}
323 						reset(charClass);
324 					}
325 				}
326 				break;
327 
328 			case '!':
329 				if (in_brackets > 0) {
330 					if (lookBehind(sb) == '[')
331 						sb.append('^');
332 					else
333 						sb.append(c);
334 				} else
335 					sb.append(c);
336 				break;
337 
338 			default:
339 				if (in_char_class)
340 					setNext(charClass, c);
341 				else
342 					sb.append(c);
343 				break;
344 			} // end switch
345 
346 			seenEscape = c == '\\';
347 
348 		} // end for
349 
350 		if (in_brackets > 0)
351 			throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
352 		return Pattern.compile(sb.toString());
353 	}
354 
355 	/**
356 	 * @param buffer
357 	 * @return zero of the buffer is empty, otherwise the last character from
358 	 *         buffer
359 	 */
360 	private static char lookBehind(StringBuilder buffer) {
361 		return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
362 	}
363 
364 	/**
365 	 * @param pattern
366 	 * @param i
367 	 *            current pointer in the pattern
368 	 * @return zero of the index is out of range, otherwise the next character
369 	 *         from given position
370 	 */
371 	private static char lookAhead(String pattern, int i) {
372 		int idx = i + 1;
373 		return idx >= pattern.length() ? 0 : pattern.charAt(idx);
374 	}
375 
376 	private static void setNext(char[] buffer, char c) {
377 		for (int i = 0; i < buffer.length; i++)
378 			if (buffer[i] == 0) {
379 				buffer[i] = c;
380 				break;
381 			}
382 	}
383 
384 	private static void reset(char[] buffer) {
385 		for (int i = 0; i < buffer.length; i++)
386 			buffer[i] = 0;
387 	}
388 
389 	private static String checkPosixCharClass(char[] buffer) {
390 		for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
391 			String clazz = POSIX_CHAR_CLASSES.get(i);
392 			boolean match = true;
393 			for (int j = 0; j < clazz.length(); j++)
394 				if (buffer[j] != clazz.charAt(j)) {
395 					match = false;
396 					break;
397 				}
398 			if (match)
399 				return JAVA_CHAR_CLASSES.get(i);
400 		}
401 		return null;
402 	}
403 
404 }