View Javadoc
1   /*
2    * Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  package org.eclipse.jgit.ignore.internal;
44  
45  import static java.lang.Character.isLetter;
46  
47  import java.text.MessageFormat;
48  import java.util.ArrayList;
49  import java.util.Arrays;
50  import java.util.List;
51  import java.util.regex.Pattern;
52  import java.util.regex.PatternSyntaxException;
53  
54  import org.eclipse.jgit.errors.InvalidPatternException;
55  import org.eclipse.jgit.ignore.FastIgnoreRule;
56  import org.eclipse.jgit.internal.JGitText;
57  
58  /**
59   * Various {@link String} related utility methods, written mostly to avoid
60   * generation of new String objects (e.g. via splitting Strings etc).
61   *
62   * @since 3.6
63   */
64  public class Strings {
65  
66  	static char getPathSeparator(Character pathSeparator) {
67  		return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
68  				: pathSeparator.charValue();
69  	}
70  
71  	/**
72  	 * @param pattern
73  	 *            non null
74  	 * @param c
75  	 *            character to remove
76  	 * @return new string with all trailing characters removed
77  	 */
78  	public static String stripTrailing(String pattern, char c) {
79  		for (int i = pattern.length() - 1; i >= 0; i--) {
80  			char charAt = pattern.charAt(i);
81  			if (charAt != c) {
82  				if (i == pattern.length() - 1) {
83  					return pattern;
84  				}
85  				return pattern.substring(0, i + 1);
86  			}
87  		}
88  		return ""; //$NON-NLS-1$
89  	}
90  
91  	/**
92  	 * @param pattern
93  	 *            non null
94  	 * @return new string with all trailing whitespace removed
95  	 */
96  	public static String stripTrailingWhitespace(String pattern) {
97  		for (int i = pattern.length() - 1; i >= 0; i--) {
98  			char charAt = pattern.charAt(i);
99  			if (!Character.isWhitespace(charAt)) {
100 				if (i == pattern.length() - 1) {
101 					return pattern;
102 				}
103 				return pattern.substring(0, i + 1);
104 			}
105 		}
106 		return ""; //$NON-NLS-1$
107 	}
108 
109 	/**
110 	 * @param pattern
111 	 *            non null
112 	 * @return true if the last character, which is not whitespace, is a path
113 	 *         separator
114 	 */
115 	public static boolean isDirectoryPattern(String pattern) {
116 		for (int i = pattern.length() - 1; i >= 0; i--) {
117 			char charAt = pattern.charAt(i);
118 			if (!Character.isWhitespace(charAt)) {
119 				return charAt == FastIgnoreRule.PATH_SEPARATOR;
120 			}
121 		}
122 		return false;
123 	}
124 
125 	static int count(String s, char c, boolean ignoreFirstLast) {
126 		int start = 0;
127 		int count = 0;
128 		while (true) {
129 			start = s.indexOf(c, start);
130 			if (start == -1)
131 				break;
132 			if (!ignoreFirstLast || (start != 0 && start != s.length()))
133 				count++;
134 			start++;
135 		}
136 		return count;
137 	}
138 
139 	/**
140 	 * Splits given string to substrings by given separator
141 	 *
142 	 * @param pattern
143 	 *            non null
144 	 * @param slash
145 	 *            separator char
146 	 * @return list of substrings
147 	 */
148 	public static List<String> split(String pattern, char slash) {
149 		int count = count(pattern, slash, true);
150 		if (count < 1)
151 			throw new IllegalStateException(
152 					"Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
153 		List<String> segments = new ArrayList<String>(count);
154 		int right = 0;
155 		while (true) {
156 			int left = right;
157 			right = pattern.indexOf(slash, right);
158 			if (right == -1) {
159 				if (left < pattern.length())
160 					segments.add(pattern.substring(left));
161 				break;
162 			}
163 			if (right - left > 0)
164 				if (left == 1)
165 					// leading slash should remain by the first pattern
166 					segments.add(pattern.substring(left - 1, right));
167 				else if (right == pattern.length() - 1)
168 					// trailing slash should remain too
169 					segments.add(pattern.substring(left, right + 1));
170 				else
171 					segments.add(pattern.substring(left, right));
172 			right++;
173 		}
174 		return segments;
175 	}
176 
177 	static boolean isWildCard(String pattern) {
178 		return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
179 	}
180 
181 	private static boolean isComplexWildcard(String pattern) {
182 		int idx1 = pattern.indexOf('[');
183 		if (idx1 != -1) {
184 			return true;
185 		}
186 		if (pattern.indexOf('?') != -1) {
187 			return true;
188 		} else {
189 			// check if the backslash escapes one of the glob special characters
190 			// if not, backslash is not part of a regex and treated literally
191 			int backSlash = pattern.indexOf('\\');
192 			if (backSlash >= 0) {
193 				int nextIdx = backSlash + 1;
194 				if (pattern.length() == nextIdx) {
195 					return false;
196 				}
197 				char nextChar = pattern.charAt(nextIdx);
198 				if (escapedByBackslash(nextChar)) {
199 					return true;
200 				} else {
201 					return false;
202 				}
203 			}
204 		}
205 		return false;
206 	}
207 
208 	private static boolean escapedByBackslash(char nextChar) {
209 		return nextChar == '?' || nextChar == '*' || nextChar == '[';
210 	}
211 
212 	static PatternState checkWildCards(String pattern) {
213 		if (isComplexWildcard(pattern))
214 			return PatternState.COMPLEX;
215 		int startIdx = pattern.indexOf('*');
216 		if (startIdx < 0)
217 			return PatternState.NONE;
218 
219 		if (startIdx == pattern.length() - 1)
220 			return PatternState.TRAILING_ASTERISK_ONLY;
221 		if (pattern.lastIndexOf('*') == 0)
222 			return PatternState.LEADING_ASTERISK_ONLY;
223 
224 		return PatternState.COMPLEX;
225 	}
226 
227 	static enum PatternState {
228 		LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
229 	}
230 
231 	final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
232 			"alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
233 			// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
234 			"digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
235 			// [:digit:] [:graph:] [:lower:] [:print:]
236 			"punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
237 			// [:punct:] [:space:] [:upper:] [:xdigit:]
238 			"word" //$NON-NLS-1$
239 	// [:word:] XXX I don't see it in
240 	// http://man7.org/linux/man-pages/man7/glob.7.html
241 	// but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
242 			);
243 
244 	private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
245 
246 	final static List<String> JAVA_CHAR_CLASSES = Arrays
247 			.asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
248 					// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
249 					"\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
250 					// [:digit:] [:graph:] [:lower:] [:print:]
251 					"\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
252 					// [:punct:] [:space:] [:upper:] [:xdigit:]
253 					"[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
254 							// [:word:]
255 			);
256 
257 	// Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
258 	// not supported by CLI git (at least not by 1.9.1)
259 	final static Pattern UNSUPPORTED = Pattern
260 			.compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
261 
262 	/**
263 	 * Conversion from glob to Java regex following two sources: <li>
264 	 * http://man7.org/linux/man-pages/man7/glob.7.html <li>
265 	 * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
266 	 * various ways to define what "glob" can be.
267 	 *
268 	 * @param pattern
269 	 *            non null pattern
270 	 *
271 	 * @return Java regex pattern corresponding to given glob pattern
272 	 * @throws InvalidPatternException
273 	 */
274 	static Pattern convertGlob(String pattern) throws InvalidPatternException {
275 		if (UNSUPPORTED.matcher(pattern).find())
276 			throw new InvalidPatternException(
277 					"Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
278 					pattern);
279 
280 		StringBuilder sb = new StringBuilder(pattern.length());
281 
282 		int in_brackets = 0;
283 		boolean seenEscape = false;
284 		boolean ignoreLastBracket = false;
285 		boolean in_char_class = false;
286 		// 6 is the length of the longest posix char class "xdigit"
287 		char[] charClass = new char[6];
288 
289 		for (int i = 0; i < pattern.length(); i++) {
290 			final char c = pattern.charAt(i);
291 			switch (c) {
292 
293 			case '*':
294 				if (seenEscape || in_brackets > 0)
295 					sb.append(c);
296 				else
297 					sb.append('.').append(c);
298 				break;
299 
300 			case '(': // fall-through
301 			case ')': // fall-through
302 			case '{': // fall-through
303 			case '}': // fall-through
304 			case '+': // fall-through
305 			case '$': // fall-through
306 			case '^': // fall-through
307 			case '|':
308 				if (seenEscape || in_brackets > 0)
309 					sb.append(c);
310 				else
311 					sb.append('\\').append(c);
312 				break;
313 
314 			case '.':
315 				if (seenEscape)
316 					sb.append(c);
317 				else
318 					sb.append('\\').append('.');
319 				break;
320 
321 			case '?':
322 				if (seenEscape || in_brackets > 0)
323 					sb.append(c);
324 				else
325 					sb.append('.');
326 				break;
327 
328 			case ':':
329 				if (in_brackets > 0)
330 					if (lookBehind(sb) == '['
331 							&& isLetter(lookAhead(pattern, i)))
332 						in_char_class = true;
333 				sb.append(':');
334 				break;
335 
336 			case '-':
337 				if (in_brackets > 0) {
338 					if (lookAhead(pattern, i) == ']')
339 						sb.append('\\').append(c);
340 					else
341 						sb.append(c);
342 				} else
343 					sb.append('-');
344 				break;
345 
346 			case '\\':
347 				if (in_brackets > 0) {
348 					char lookAhead = lookAhead(pattern, i);
349 					if (lookAhead == ']' || lookAhead == '[')
350 						ignoreLastBracket = true;
351 				} else {
352 					//
353 					char lookAhead = lookAhead(pattern, i);
354 					if (lookAhead != '\\' && lookAhead != '['
355 							&& lookAhead != '?' && lookAhead != '*'
356 							&& lookAhead != ' ' && lookBehind(sb) != '\\') {
357 						break;
358 					}
359 				}
360 				sb.append(c);
361 				break;
362 
363 			case '[':
364 				if (in_brackets > 0) {
365 					sb.append('\\').append('[');
366 					ignoreLastBracket = true;
367 				} else {
368 					if (!seenEscape) {
369 						in_brackets++;
370 						ignoreLastBracket = false;
371 					}
372 					sb.append('[');
373 				}
374 				break;
375 
376 			case ']':
377 				if (seenEscape) {
378 					sb.append(']');
379 					ignoreLastBracket = true;
380 					break;
381 				}
382 				if (in_brackets <= 0) {
383 					sb.append('\\').append(']');
384 					ignoreLastBracket = true;
385 					break;
386 				}
387 				char lookBehind = lookBehind(sb);
388 				if ((lookBehind == '[' && !ignoreLastBracket)
389 						|| lookBehind == '^') {
390 					sb.append('\\');
391 					sb.append(']');
392 					ignoreLastBracket = true;
393 				} else {
394 					ignoreLastBracket = false;
395 					if (!in_char_class) {
396 						in_brackets--;
397 						sb.append(']');
398 					} else {
399 						in_char_class = false;
400 						String charCl = checkPosixCharClass(charClass);
401 						// delete last \[:: chars and set the pattern
402 						if (charCl != null) {
403 							sb.setLength(sb.length() - 4);
404 							sb.append(charCl);
405 						}
406 						reset(charClass);
407 					}
408 				}
409 				break;
410 
411 			case '!':
412 				if (in_brackets > 0) {
413 					if (lookBehind(sb) == '[')
414 						sb.append('^');
415 					else
416 						sb.append(c);
417 				} else
418 					sb.append(c);
419 				break;
420 
421 			default:
422 				if (in_char_class)
423 					setNext(charClass, c);
424 				else
425 					sb.append(c);
426 				break;
427 			} // end switch
428 
429 			seenEscape = c == '\\';
430 
431 		} // end for
432 
433 		if (in_brackets > 0)
434 			throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
435 		try {
436 			return Pattern.compile(sb.toString());
437 		} catch (PatternSyntaxException e) {
438 			InvalidPatternException patternException = new InvalidPatternException(
439 					MessageFormat.format(JGitText.get().invalidIgnoreRule,
440 							pattern),
441 					pattern);
442 			patternException.initCause(e);
443 			throw patternException;
444 		}
445 	}
446 
447 	/**
448 	 * @param buffer
449 	 * @return zero of the buffer is empty, otherwise the last character from
450 	 *         buffer
451 	 */
452 	private static char lookBehind(StringBuilder buffer) {
453 		return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
454 	}
455 
456 	/**
457 	 * @param pattern
458 	 * @param i
459 	 *            current pointer in the pattern
460 	 * @return zero of the index is out of range, otherwise the next character
461 	 *         from given position
462 	 */
463 	private static char lookAhead(String pattern, int i) {
464 		int idx = i + 1;
465 		return idx >= pattern.length() ? 0 : pattern.charAt(idx);
466 	}
467 
468 	private static void setNext(char[] buffer, char c) {
469 		for (int i = 0; i < buffer.length; i++)
470 			if (buffer[i] == 0) {
471 				buffer[i] = c;
472 				break;
473 			}
474 	}
475 
476 	private static void reset(char[] buffer) {
477 		for (int i = 0; i < buffer.length; i++)
478 			buffer[i] = 0;
479 	}
480 
481 	private static String checkPosixCharClass(char[] buffer) {
482 		for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
483 			String clazz = POSIX_CHAR_CLASSES.get(i);
484 			boolean match = true;
485 			for (int j = 0; j < clazz.length(); j++)
486 				if (buffer[j] != clazz.charAt(j)) {
487 					match = false;
488 					break;
489 				}
490 			if (match)
491 				return JAVA_CHAR_CLASSES.get(i);
492 		}
493 		return null;
494 	}
495 
496 	static String deleteBackslash(String s) {
497 		if (s.indexOf('\\') < 0) {
498 			return s;
499 		}
500 		StringBuilder sb = new StringBuilder(s.length());
501 		for (int i = 0; i < s.length(); i++) {
502 			char ch = s.charAt(i);
503 			if (ch == '\\') {
504 				if (i + 1 == s.length()) {
505 					continue;
506 				}
507 				char next = s.charAt(i + 1);
508 				if (next == '\\') {
509 					sb.append(ch);
510 					i++;
511 					continue;
512 				}
513 				if (!escapedByBackslash(next)) {
514 					continue;
515 				}
516 			}
517 			sb.append(ch);
518 		}
519 		return sb.toString();
520 	}
521 
522 }