View Javadoc
1   /*
2    * Copyright (C) 2014, 2017 Andrey Loskutov <loskutov@gmx.de>
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  package org.eclipse.jgit.ignore.internal;
44  
45  import static java.lang.Character.isLetter;
46  
47  import java.text.MessageFormat;
48  import java.util.ArrayList;
49  import java.util.Arrays;
50  import java.util.List;
51  import java.util.regex.Pattern;
52  import java.util.regex.PatternSyntaxException;
53  
54  import org.eclipse.jgit.errors.InvalidPatternException;
55  import org.eclipse.jgit.ignore.FastIgnoreRule;
56  import org.eclipse.jgit.internal.JGitText;
57  
58  /**
59   * Various {@link java.lang.String} related utility methods, written mostly to
60   * avoid generation of new String objects (e.g. via splitting Strings etc).
61   */
62  public class Strings {
63  
64  	static char getPathSeparator(Character pathSeparator) {
65  		return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
66  				: pathSeparator.charValue();
67  	}
68  
69  	/**
70  	 * Strip trailing characters
71  	 *
72  	 * @param pattern
73  	 *            non null
74  	 * @param c
75  	 *            character to remove
76  	 * @return new string with all trailing characters removed
77  	 */
78  	public static String stripTrailing(String pattern, char c) {
79  		for (int i = pattern.length() - 1; i >= 0; i--) {
80  			char charAt = pattern.charAt(i);
81  			if (charAt != c) {
82  				if (i == pattern.length() - 1) {
83  					return pattern;
84  				}
85  				return pattern.substring(0, i + 1);
86  			}
87  		}
88  		return ""; //$NON-NLS-1$
89  	}
90  
91  	/**
92  	 * Strip trailing whitespace characters
93  	 *
94  	 * @param pattern
95  	 *            non null
96  	 * @return new string with all trailing whitespace removed
97  	 */
98  	public static String stripTrailingWhitespace(String pattern) {
99  		for (int i = pattern.length() - 1; i >= 0; i--) {
100 			char charAt = pattern.charAt(i);
101 			if (!Character.isWhitespace(charAt)) {
102 				if (i == pattern.length() - 1) {
103 					return pattern;
104 				}
105 				return pattern.substring(0, i + 1);
106 			}
107 		}
108 		return ""; //$NON-NLS-1$
109 	}
110 
111 	/**
112 	 * Check if pattern is a directory pattern ending with a path separator
113 	 *
114 	 * @param pattern
115 	 *            non null
116 	 * @return {@code true} if the last character, which is not whitespace, is a
117 	 *         path separator
118 	 */
119 	public static boolean isDirectoryPattern(String pattern) {
120 		for (int i = pattern.length() - 1; i >= 0; i--) {
121 			char charAt = pattern.charAt(i);
122 			if (!Character.isWhitespace(charAt)) {
123 				return charAt == FastIgnoreRule.PATH_SEPARATOR;
124 			}
125 		}
126 		return false;
127 	}
128 
129 	static int count(String s, char c, boolean ignoreFirstLast) {
130 		int start = 0;
131 		int count = 0;
132 		int length = s.length();
133 		while (start < length) {
134 			start = s.indexOf(c, start);
135 			if (start == -1) {
136 				break;
137 			}
138 			if (!ignoreFirstLast || (start != 0 && start != length - 1)) {
139 				count++;
140 			}
141 			start++;
142 		}
143 		return count;
144 	}
145 
146 	/**
147 	 * Splits given string to substrings by given separator
148 	 *
149 	 * @param pattern
150 	 *            non null
151 	 * @param slash
152 	 *            separator char
153 	 * @return list of substrings
154 	 */
155 	public static List<String> split(String pattern, char slash) {
156 		int count = count(pattern, slash, true);
157 		if (count < 1)
158 			throw new IllegalStateException(
159 					"Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
160 		List<String> segments = new ArrayList<>(count);
161 		int right = 0;
162 		while (true) {
163 			int left = right;
164 			right = pattern.indexOf(slash, right);
165 			if (right == -1) {
166 				if (left < pattern.length())
167 					segments.add(pattern.substring(left));
168 				break;
169 			}
170 			if (right - left > 0)
171 				if (left == 1)
172 					// leading slash should remain by the first pattern
173 					segments.add(pattern.substring(left - 1, right));
174 				else if (right == pattern.length() - 1)
175 					// trailing slash should remain too
176 					segments.add(pattern.substring(left, right + 1));
177 				else
178 					segments.add(pattern.substring(left, right));
179 			right++;
180 		}
181 		return segments;
182 	}
183 
184 	static boolean isWildCard(String pattern) {
185 		return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
186 	}
187 
188 	private static boolean isComplexWildcard(String pattern) {
189 		int idx1 = pattern.indexOf('[');
190 		if (idx1 != -1) {
191 			return true;
192 		}
193 		if (pattern.indexOf('?') != -1) {
194 			return true;
195 		} else {
196 			// check if the backslash escapes one of the glob special characters
197 			// if not, backslash is not part of a regex and treated literally
198 			int backSlash = pattern.indexOf('\\');
199 			if (backSlash >= 0) {
200 				int nextIdx = backSlash + 1;
201 				if (pattern.length() == nextIdx) {
202 					return false;
203 				}
204 				char nextChar = pattern.charAt(nextIdx);
205 				if (escapedByBackslash(nextChar)) {
206 					return true;
207 				} else {
208 					return false;
209 				}
210 			}
211 		}
212 		return false;
213 	}
214 
215 	private static boolean escapedByBackslash(char nextChar) {
216 		return nextChar == '?' || nextChar == '*' || nextChar == '[';
217 	}
218 
219 	static PatternState checkWildCards(String pattern) {
220 		if (isComplexWildcard(pattern))
221 			return PatternState.COMPLEX;
222 		int startIdx = pattern.indexOf('*');
223 		if (startIdx < 0)
224 			return PatternState.NONE;
225 
226 		if (startIdx == pattern.length() - 1)
227 			return PatternState.TRAILING_ASTERISK_ONLY;
228 		if (pattern.lastIndexOf('*') == 0)
229 			return PatternState.LEADING_ASTERISK_ONLY;
230 
231 		return PatternState.COMPLEX;
232 	}
233 
234 	static enum PatternState {
235 		LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
236 	}
237 
238 	final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
239 			"alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
240 			// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
241 			"digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
242 			// [:digit:] [:graph:] [:lower:] [:print:]
243 			"punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
244 			// [:punct:] [:space:] [:upper:] [:xdigit:]
245 			"word" //$NON-NLS-1$
246 	// [:word:] XXX I don't see it in
247 	// http://man7.org/linux/man-pages/man7/glob.7.html
248 	// but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
249 			);
250 
251 	private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
252 
253 	final static List<String> JAVA_CHAR_CLASSES = Arrays
254 			.asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
255 					// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
256 					"\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
257 					// [:digit:] [:graph:] [:lower:] [:print:]
258 					"\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
259 					// [:punct:] [:space:] [:upper:] [:xdigit:]
260 					"[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
261 							// [:word:]
262 			);
263 
264 	// Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
265 	// not supported by CLI git (at least not by 1.9.1)
266 	final static Pattern UNSUPPORTED = Pattern
267 			.compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
268 
269 	/**
270 	 * Conversion from glob to Java regex following two sources: <li>
271 	 * http://man7.org/linux/man-pages/man7/glob.7.html <li>
272 	 * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
273 	 * various ways to define what "glob" can be.
274 	 *
275 	 * @param pattern
276 	 *            non null pattern
277 	 *
278 	 * @return Java regex pattern corresponding to given glob pattern
279 	 * @throws InvalidPatternException
280 	 */
281 	static Pattern convertGlob(String pattern) throws InvalidPatternException {
282 		if (UNSUPPORTED.matcher(pattern).find())
283 			throw new InvalidPatternException(
284 					"Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
285 					pattern);
286 
287 		StringBuilder sb = new StringBuilder(pattern.length());
288 
289 		int in_brackets = 0;
290 		boolean seenEscape = false;
291 		boolean ignoreLastBracket = false;
292 		boolean in_char_class = false;
293 		// 6 is the length of the longest posix char class "xdigit"
294 		char[] charClass = new char[6];
295 
296 		for (int i = 0; i < pattern.length(); i++) {
297 			final char c = pattern.charAt(i);
298 			switch (c) {
299 
300 			case '*':
301 				if (seenEscape || in_brackets > 0)
302 					sb.append(c);
303 				else
304 					sb.append('.').append(c);
305 				break;
306 
307 			case '(': // fall-through
308 			case ')': // fall-through
309 			case '{': // fall-through
310 			case '}': // fall-through
311 			case '+': // fall-through
312 			case '$': // fall-through
313 			case '^': // fall-through
314 			case '|':
315 				if (seenEscape || in_brackets > 0)
316 					sb.append(c);
317 				else
318 					sb.append('\\').append(c);
319 				break;
320 
321 			case '.':
322 				if (seenEscape)
323 					sb.append(c);
324 				else
325 					sb.append('\\').append('.');
326 				break;
327 
328 			case '?':
329 				if (seenEscape || in_brackets > 0)
330 					sb.append(c);
331 				else
332 					sb.append('.');
333 				break;
334 
335 			case ':':
336 				if (in_brackets > 0)
337 					if (lookBehind(sb) == '['
338 							&& isLetter(lookAhead(pattern, i)))
339 						in_char_class = true;
340 				sb.append(':');
341 				break;
342 
343 			case '-':
344 				if (in_brackets > 0) {
345 					if (lookAhead(pattern, i) == ']')
346 						sb.append('\\').append(c);
347 					else
348 						sb.append(c);
349 				} else
350 					sb.append('-');
351 				break;
352 
353 			case '\\':
354 				if (in_brackets > 0) {
355 					char lookAhead = lookAhead(pattern, i);
356 					if (lookAhead == ']' || lookAhead == '[')
357 						ignoreLastBracket = true;
358 				} else {
359 					//
360 					char lookAhead = lookAhead(pattern, i);
361 					if (lookAhead != '\\' && lookAhead != '['
362 							&& lookAhead != '?' && lookAhead != '*'
363 							&& lookAhead != ' ' && lookBehind(sb) != '\\') {
364 						break;
365 					}
366 				}
367 				sb.append(c);
368 				break;
369 
370 			case '[':
371 				if (in_brackets > 0) {
372 					if (!seenEscape) {
373 						sb.append('\\');
374 					}
375 					sb.append('[');
376 					ignoreLastBracket = true;
377 				} else {
378 					if (!seenEscape) {
379 						in_brackets++;
380 						ignoreLastBracket = false;
381 					}
382 					sb.append('[');
383 				}
384 				break;
385 
386 			case ']':
387 				if (seenEscape) {
388 					sb.append(']');
389 					ignoreLastBracket = true;
390 					break;
391 				}
392 				if (in_brackets <= 0) {
393 					sb.append('\\').append(']');
394 					ignoreLastBracket = true;
395 					break;
396 				}
397 				char lookBehind = lookBehind(sb);
398 				if ((lookBehind == '[' && !ignoreLastBracket)
399 						|| lookBehind == '^') {
400 					sb.append('\\');
401 					sb.append(']');
402 					ignoreLastBracket = true;
403 				} else {
404 					ignoreLastBracket = false;
405 					if (!in_char_class) {
406 						in_brackets--;
407 						sb.append(']');
408 					} else {
409 						in_char_class = false;
410 						String charCl = checkPosixCharClass(charClass);
411 						// delete last \[:: chars and set the pattern
412 						if (charCl != null) {
413 							sb.setLength(sb.length() - 4);
414 							sb.append(charCl);
415 						}
416 						reset(charClass);
417 					}
418 				}
419 				break;
420 
421 			case '!':
422 				if (in_brackets > 0) {
423 					if (lookBehind(sb) == '[')
424 						sb.append('^');
425 					else
426 						sb.append(c);
427 				} else
428 					sb.append(c);
429 				break;
430 
431 			default:
432 				if (in_char_class)
433 					setNext(charClass, c);
434 				else
435 					sb.append(c);
436 				break;
437 			} // end switch
438 
439 			seenEscape = c == '\\';
440 
441 		} // end for
442 
443 		if (in_brackets > 0)
444 			throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
445 		try {
446 			return Pattern.compile(sb.toString(), Pattern.DOTALL);
447 		} catch (PatternSyntaxException e) {
448 			throw new InvalidPatternException(
449 					MessageFormat.format(JGitText.get().invalidIgnoreRule,
450 							pattern),
451 					pattern, e);
452 		}
453 	}
454 
455 	/**
456 	 * @param buffer
457 	 * @return zero of the buffer is empty, otherwise the last character from
458 	 *         buffer
459 	 */
460 	private static char lookBehind(StringBuilder buffer) {
461 		return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
462 	}
463 
464 	/**
465 	 * @param pattern
466 	 * @param i
467 	 *            current pointer in the pattern
468 	 * @return zero of the index is out of range, otherwise the next character
469 	 *         from given position
470 	 */
471 	private static char lookAhead(String pattern, int i) {
472 		int idx = i + 1;
473 		return idx >= pattern.length() ? 0 : pattern.charAt(idx);
474 	}
475 
476 	private static void setNext(char[] buffer, char c) {
477 		for (int i = 0; i < buffer.length; i++)
478 			if (buffer[i] == 0) {
479 				buffer[i] = c;
480 				break;
481 			}
482 	}
483 
484 	private static void reset(char[] buffer) {
485 		for (int i = 0; i < buffer.length; i++)
486 			buffer[i] = 0;
487 	}
488 
489 	private static String checkPosixCharClass(char[] buffer) {
490 		for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
491 			String clazz = POSIX_CHAR_CLASSES.get(i);
492 			boolean match = true;
493 			for (int j = 0; j < clazz.length(); j++)
494 				if (buffer[j] != clazz.charAt(j)) {
495 					match = false;
496 					break;
497 				}
498 			if (match)
499 				return JAVA_CHAR_CLASSES.get(i);
500 		}
501 		return null;
502 	}
503 
504 	static String deleteBackslash(String s) {
505 		if (s.indexOf('\\') < 0) {
506 			return s;
507 		}
508 		StringBuilder sb = new StringBuilder(s.length());
509 		for (int i = 0; i < s.length(); i++) {
510 			char ch = s.charAt(i);
511 			if (ch == '\\') {
512 				if (i + 1 == s.length()) {
513 					continue;
514 				}
515 				char next = s.charAt(i + 1);
516 				if (next == '\\') {
517 					sb.append(ch);
518 					i++;
519 					continue;
520 				}
521 				if (!escapedByBackslash(next)) {
522 					continue;
523 				}
524 			}
525 			sb.append(ch);
526 		}
527 		return sb.toString();
528 	}
529 
530 }