View Javadoc
1   /*
2    * Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  package org.eclipse.jgit.ignore.internal;
44  
45  import static java.lang.Character.isLetter;
46  
47  import java.text.MessageFormat;
48  import java.util.ArrayList;
49  import java.util.Arrays;
50  import java.util.List;
51  import java.util.regex.Pattern;
52  import java.util.regex.PatternSyntaxException;
53  
54  import org.eclipse.jgit.errors.InvalidPatternException;
55  import org.eclipse.jgit.ignore.FastIgnoreRule;
56  import org.eclipse.jgit.internal.JGitText;
57  
58  /**
59   * Various {@link String} related utility methods, written mostly to avoid
60   * generation of new String objects (e.g. via splitting Strings etc).
61   */
62  public class Strings {
63  
64  	static char getPathSeparator(Character pathSeparator) {
65  		return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
66  				: pathSeparator.charValue();
67  	}
68  
69  	/**
70  	 * @param pattern
71  	 *            non null
72  	 * @param c
73  	 *            character to remove
74  	 * @return new string with all trailing characters removed
75  	 */
76  	public static String stripTrailing(String pattern, char c) {
77  		for (int i = pattern.length() - 1; i >= 0; i--) {
78  			char charAt = pattern.charAt(i);
79  			if (charAt != c) {
80  				if (i == pattern.length() - 1) {
81  					return pattern;
82  				}
83  				return pattern.substring(0, i + 1);
84  			}
85  		}
86  		return ""; //$NON-NLS-1$
87  	}
88  
89  	/**
90  	 * @param pattern
91  	 *            non null
92  	 * @return new string with all trailing whitespace removed
93  	 */
94  	public static String stripTrailingWhitespace(String pattern) {
95  		for (int i = pattern.length() - 1; i >= 0; i--) {
96  			char charAt = pattern.charAt(i);
97  			if (!Character.isWhitespace(charAt)) {
98  				if (i == pattern.length() - 1) {
99  					return pattern;
100 				}
101 				return pattern.substring(0, i + 1);
102 			}
103 		}
104 		return ""; //$NON-NLS-1$
105 	}
106 
107 	/**
108 	 * @param pattern
109 	 *            non null
110 	 * @return true if the last character, which is not whitespace, is a path
111 	 *         separator
112 	 */
113 	public static boolean isDirectoryPattern(String pattern) {
114 		for (int i = pattern.length() - 1; i >= 0; i--) {
115 			char charAt = pattern.charAt(i);
116 			if (!Character.isWhitespace(charAt)) {
117 				return charAt == FastIgnoreRule.PATH_SEPARATOR;
118 			}
119 		}
120 		return false;
121 	}
122 
123 	static int count(String s, char c, boolean ignoreFirstLast) {
124 		int start = 0;
125 		int count = 0;
126 		while (true) {
127 			start = s.indexOf(c, start);
128 			if (start == -1)
129 				break;
130 			if (!ignoreFirstLast || (start != 0 && start != s.length()))
131 				count++;
132 			start++;
133 		}
134 		return count;
135 	}
136 
137 	/**
138 	 * Splits given string to substrings by given separator
139 	 *
140 	 * @param pattern
141 	 *            non null
142 	 * @param slash
143 	 *            separator char
144 	 * @return list of substrings
145 	 */
146 	public static List<String> split(String pattern, char slash) {
147 		int count = count(pattern, slash, true);
148 		if (count < 1)
149 			throw new IllegalStateException(
150 					"Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
151 		List<String> segments = new ArrayList<>(count);
152 		int right = 0;
153 		while (true) {
154 			int left = right;
155 			right = pattern.indexOf(slash, right);
156 			if (right == -1) {
157 				if (left < pattern.length())
158 					segments.add(pattern.substring(left));
159 				break;
160 			}
161 			if (right - left > 0)
162 				if (left == 1)
163 					// leading slash should remain by the first pattern
164 					segments.add(pattern.substring(left - 1, right));
165 				else if (right == pattern.length() - 1)
166 					// trailing slash should remain too
167 					segments.add(pattern.substring(left, right + 1));
168 				else
169 					segments.add(pattern.substring(left, right));
170 			right++;
171 		}
172 		return segments;
173 	}
174 
175 	static boolean isWildCard(String pattern) {
176 		return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
177 	}
178 
179 	private static boolean isComplexWildcard(String pattern) {
180 		int idx1 = pattern.indexOf('[');
181 		if (idx1 != -1) {
182 			return true;
183 		}
184 		if (pattern.indexOf('?') != -1) {
185 			return true;
186 		} else {
187 			// check if the backslash escapes one of the glob special characters
188 			// if not, backslash is not part of a regex and treated literally
189 			int backSlash = pattern.indexOf('\\');
190 			if (backSlash >= 0) {
191 				int nextIdx = backSlash + 1;
192 				if (pattern.length() == nextIdx) {
193 					return false;
194 				}
195 				char nextChar = pattern.charAt(nextIdx);
196 				if (escapedByBackslash(nextChar)) {
197 					return true;
198 				} else {
199 					return false;
200 				}
201 			}
202 		}
203 		return false;
204 	}
205 
206 	private static boolean escapedByBackslash(char nextChar) {
207 		return nextChar == '?' || nextChar == '*' || nextChar == '[';
208 	}
209 
210 	static PatternState checkWildCards(String pattern) {
211 		if (isComplexWildcard(pattern))
212 			return PatternState.COMPLEX;
213 		int startIdx = pattern.indexOf('*');
214 		if (startIdx < 0)
215 			return PatternState.NONE;
216 
217 		if (startIdx == pattern.length() - 1)
218 			return PatternState.TRAILING_ASTERISK_ONLY;
219 		if (pattern.lastIndexOf('*') == 0)
220 			return PatternState.LEADING_ASTERISK_ONLY;
221 
222 		return PatternState.COMPLEX;
223 	}
224 
225 	static enum PatternState {
226 		LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
227 	}
228 
229 	final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
230 			"alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
231 			// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
232 			"digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
233 			// [:digit:] [:graph:] [:lower:] [:print:]
234 			"punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
235 			// [:punct:] [:space:] [:upper:] [:xdigit:]
236 			"word" //$NON-NLS-1$
237 	// [:word:] XXX I don't see it in
238 	// http://man7.org/linux/man-pages/man7/glob.7.html
239 	// but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
240 			);
241 
242 	private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
243 
244 	final static List<String> JAVA_CHAR_CLASSES = Arrays
245 			.asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
246 					// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
247 					"\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
248 					// [:digit:] [:graph:] [:lower:] [:print:]
249 					"\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
250 					// [:punct:] [:space:] [:upper:] [:xdigit:]
251 					"[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
252 							// [:word:]
253 			);
254 
255 	// Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
256 	// not supported by CLI git (at least not by 1.9.1)
257 	final static Pattern UNSUPPORTED = Pattern
258 			.compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
259 
260 	/**
261 	 * Conversion from glob to Java regex following two sources: <li>
262 	 * http://man7.org/linux/man-pages/man7/glob.7.html <li>
263 	 * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
264 	 * various ways to define what "glob" can be.
265 	 *
266 	 * @param pattern
267 	 *            non null pattern
268 	 *
269 	 * @return Java regex pattern corresponding to given glob pattern
270 	 * @throws InvalidPatternException
271 	 */
272 	static Pattern convertGlob(String pattern) throws InvalidPatternException {
273 		if (UNSUPPORTED.matcher(pattern).find())
274 			throw new InvalidPatternException(
275 					"Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
276 					pattern);
277 
278 		StringBuilder sb = new StringBuilder(pattern.length());
279 
280 		int in_brackets = 0;
281 		boolean seenEscape = false;
282 		boolean ignoreLastBracket = false;
283 		boolean in_char_class = false;
284 		// 6 is the length of the longest posix char class "xdigit"
285 		char[] charClass = new char[6];
286 
287 		for (int i = 0; i < pattern.length(); i++) {
288 			final char c = pattern.charAt(i);
289 			switch (c) {
290 
291 			case '*':
292 				if (seenEscape || in_brackets > 0)
293 					sb.append(c);
294 				else
295 					sb.append('.').append(c);
296 				break;
297 
298 			case '(': // fall-through
299 			case ')': // fall-through
300 			case '{': // fall-through
301 			case '}': // fall-through
302 			case '+': // fall-through
303 			case '$': // fall-through
304 			case '^': // fall-through
305 			case '|':
306 				if (seenEscape || in_brackets > 0)
307 					sb.append(c);
308 				else
309 					sb.append('\\').append(c);
310 				break;
311 
312 			case '.':
313 				if (seenEscape)
314 					sb.append(c);
315 				else
316 					sb.append('\\').append('.');
317 				break;
318 
319 			case '?':
320 				if (seenEscape || in_brackets > 0)
321 					sb.append(c);
322 				else
323 					sb.append('.');
324 				break;
325 
326 			case ':':
327 				if (in_brackets > 0)
328 					if (lookBehind(sb) == '['
329 							&& isLetter(lookAhead(pattern, i)))
330 						in_char_class = true;
331 				sb.append(':');
332 				break;
333 
334 			case '-':
335 				if (in_brackets > 0) {
336 					if (lookAhead(pattern, i) == ']')
337 						sb.append('\\').append(c);
338 					else
339 						sb.append(c);
340 				} else
341 					sb.append('-');
342 				break;
343 
344 			case '\\':
345 				if (in_brackets > 0) {
346 					char lookAhead = lookAhead(pattern, i);
347 					if (lookAhead == ']' || lookAhead == '[')
348 						ignoreLastBracket = true;
349 				} else {
350 					//
351 					char lookAhead = lookAhead(pattern, i);
352 					if (lookAhead != '\\' && lookAhead != '['
353 							&& lookAhead != '?' && lookAhead != '*'
354 							&& lookAhead != ' ' && lookBehind(sb) != '\\') {
355 						break;
356 					}
357 				}
358 				sb.append(c);
359 				break;
360 
361 			case '[':
362 				if (in_brackets > 0) {
363 					sb.append('\\').append('[');
364 					ignoreLastBracket = true;
365 				} else {
366 					if (!seenEscape) {
367 						in_brackets++;
368 						ignoreLastBracket = false;
369 					}
370 					sb.append('[');
371 				}
372 				break;
373 
374 			case ']':
375 				if (seenEscape) {
376 					sb.append(']');
377 					ignoreLastBracket = true;
378 					break;
379 				}
380 				if (in_brackets <= 0) {
381 					sb.append('\\').append(']');
382 					ignoreLastBracket = true;
383 					break;
384 				}
385 				char lookBehind = lookBehind(sb);
386 				if ((lookBehind == '[' && !ignoreLastBracket)
387 						|| lookBehind == '^') {
388 					sb.append('\\');
389 					sb.append(']');
390 					ignoreLastBracket = true;
391 				} else {
392 					ignoreLastBracket = false;
393 					if (!in_char_class) {
394 						in_brackets--;
395 						sb.append(']');
396 					} else {
397 						in_char_class = false;
398 						String charCl = checkPosixCharClass(charClass);
399 						// delete last \[:: chars and set the pattern
400 						if (charCl != null) {
401 							sb.setLength(sb.length() - 4);
402 							sb.append(charCl);
403 						}
404 						reset(charClass);
405 					}
406 				}
407 				break;
408 
409 			case '!':
410 				if (in_brackets > 0) {
411 					if (lookBehind(sb) == '[')
412 						sb.append('^');
413 					else
414 						sb.append(c);
415 				} else
416 					sb.append(c);
417 				break;
418 
419 			default:
420 				if (in_char_class)
421 					setNext(charClass, c);
422 				else
423 					sb.append(c);
424 				break;
425 			} // end switch
426 
427 			seenEscape = c == '\\';
428 
429 		} // end for
430 
431 		if (in_brackets > 0)
432 			throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
433 		try {
434 			return Pattern.compile(sb.toString());
435 		} catch (PatternSyntaxException e) {
436 			InvalidPatternException patternException = new InvalidPatternException(
437 					MessageFormat.format(JGitText.get().invalidIgnoreRule,
438 							pattern),
439 					pattern);
440 			patternException.initCause(e);
441 			throw patternException;
442 		}
443 	}
444 
445 	/**
446 	 * @param buffer
447 	 * @return zero of the buffer is empty, otherwise the last character from
448 	 *         buffer
449 	 */
450 	private static char lookBehind(StringBuilder buffer) {
451 		return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
452 	}
453 
454 	/**
455 	 * @param pattern
456 	 * @param i
457 	 *            current pointer in the pattern
458 	 * @return zero of the index is out of range, otherwise the next character
459 	 *         from given position
460 	 */
461 	private static char lookAhead(String pattern, int i) {
462 		int idx = i + 1;
463 		return idx >= pattern.length() ? 0 : pattern.charAt(idx);
464 	}
465 
466 	private static void setNext(char[] buffer, char c) {
467 		for (int i = 0; i < buffer.length; i++)
468 			if (buffer[i] == 0) {
469 				buffer[i] = c;
470 				break;
471 			}
472 	}
473 
474 	private static void reset(char[] buffer) {
475 		for (int i = 0; i < buffer.length; i++)
476 			buffer[i] = 0;
477 	}
478 
479 	private static String checkPosixCharClass(char[] buffer) {
480 		for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
481 			String clazz = POSIX_CHAR_CLASSES.get(i);
482 			boolean match = true;
483 			for (int j = 0; j < clazz.length(); j++)
484 				if (buffer[j] != clazz.charAt(j)) {
485 					match = false;
486 					break;
487 				}
488 			if (match)
489 				return JAVA_CHAR_CLASSES.get(i);
490 		}
491 		return null;
492 	}
493 
494 	static String deleteBackslash(String s) {
495 		if (s.indexOf('\\') < 0) {
496 			return s;
497 		}
498 		StringBuilder sb = new StringBuilder(s.length());
499 		for (int i = 0; i < s.length(); i++) {
500 			char ch = s.charAt(i);
501 			if (ch == '\\') {
502 				if (i + 1 == s.length()) {
503 					continue;
504 				}
505 				char next = s.charAt(i + 1);
506 				if (next == '\\') {
507 					sb.append(ch);
508 					i++;
509 					continue;
510 				}
511 				if (!escapedByBackslash(next)) {
512 					continue;
513 				}
514 			}
515 			sb.append(ch);
516 		}
517 		return sb.toString();
518 	}
519 
520 }