View Javadoc
1   /*
2    * Copyright (C) 2014, 2017 Andrey Loskutov <loskutov@gmx.de> and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  package org.eclipse.jgit.ignore.internal;
11  
12  import static java.lang.Character.isLetter;
13  
14  import java.text.MessageFormat;
15  import java.util.ArrayList;
16  import java.util.Arrays;
17  import java.util.List;
18  import java.util.regex.Pattern;
19  import java.util.regex.PatternSyntaxException;
20  
21  import org.eclipse.jgit.errors.InvalidPatternException;
22  import org.eclipse.jgit.ignore.FastIgnoreRule;
23  import org.eclipse.jgit.internal.JGitText;
24  
25  /**
26   * Various {@link java.lang.String} related utility methods, written mostly to
27   * avoid generation of new String objects (e.g. via splitting Strings etc).
28   */
29  public class Strings {
30  
31  	static char getPathSeparator(Character pathSeparator) {
32  		return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
33  				: pathSeparator.charValue();
34  	}
35  
36  	/**
37  	 * Strip trailing characters
38  	 *
39  	 * @param pattern
40  	 *            non null
41  	 * @param c
42  	 *            character to remove
43  	 * @return new string with all trailing characters removed
44  	 */
45  	public static String stripTrailing(String pattern, char c) {
46  		for (int i = pattern.length() - 1; i >= 0; i--) {
47  			char charAt = pattern.charAt(i);
48  			if (charAt != c) {
49  				if (i == pattern.length() - 1) {
50  					return pattern;
51  				}
52  				return pattern.substring(0, i + 1);
53  			}
54  		}
55  		return ""; //$NON-NLS-1$
56  	}
57  
58  	/**
59  	 * Strip trailing whitespace characters
60  	 *
61  	 * @param pattern
62  	 *            non null
63  	 * @return new string with all trailing whitespace removed
64  	 */
65  	public static String stripTrailingWhitespace(String pattern) {
66  		for (int i = pattern.length() - 1; i >= 0; i--) {
67  			char charAt = pattern.charAt(i);
68  			if (!Character.isWhitespace(charAt)) {
69  				if (i == pattern.length() - 1) {
70  					return pattern;
71  				}
72  				return pattern.substring(0, i + 1);
73  			}
74  		}
75  		return ""; //$NON-NLS-1$
76  	}
77  
78  	/**
79  	 * Check if pattern is a directory pattern ending with a path separator
80  	 *
81  	 * @param pattern
82  	 *            non null
83  	 * @return {@code true} if the last character, which is not whitespace, is a
84  	 *         path separator
85  	 */
86  	public static boolean isDirectoryPattern(String pattern) {
87  		for (int i = pattern.length() - 1; i >= 0; i--) {
88  			char charAt = pattern.charAt(i);
89  			if (!Character.isWhitespace(charAt)) {
90  				return charAt == FastIgnoreRule.PATH_SEPARATOR;
91  			}
92  		}
93  		return false;
94  	}
95  
96  	static int count(String s, char c, boolean ignoreFirstLast) {
97  		int start = 0;
98  		int count = 0;
99  		int length = s.length();
100 		while (start < length) {
101 			start = s.indexOf(c, start);
102 			if (start == -1) {
103 				break;
104 			}
105 			if (!ignoreFirstLast || (start != 0 && start != length - 1)) {
106 				count++;
107 			}
108 			start++;
109 		}
110 		return count;
111 	}
112 
113 	/**
114 	 * Splits given string to substrings by given separator
115 	 *
116 	 * @param pattern
117 	 *            non null
118 	 * @param slash
119 	 *            separator char
120 	 * @return list of substrings
121 	 */
122 	public static List<String> split(String pattern, char slash) {
123 		int count = count(pattern, slash, true);
124 		if (count < 1)
125 			throw new IllegalStateException(
126 					"Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
127 		List<String> segments = new ArrayList<>(count);
128 		int right = 0;
129 		while (true) {
130 			int left = right;
131 			right = pattern.indexOf(slash, right);
132 			if (right == -1) {
133 				if (left < pattern.length())
134 					segments.add(pattern.substring(left));
135 				break;
136 			}
137 			if (right - left > 0)
138 				if (left == 1)
139 					// leading slash should remain by the first pattern
140 					segments.add(pattern.substring(left - 1, right));
141 				else if (right == pattern.length() - 1)
142 					// trailing slash should remain too
143 					segments.add(pattern.substring(left, right + 1));
144 				else
145 					segments.add(pattern.substring(left, right));
146 			right++;
147 		}
148 		return segments;
149 	}
150 
151 	static boolean isWildCard(String pattern) {
152 		return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
153 	}
154 
155 	private static boolean isComplexWildcard(String pattern) {
156 		int idx1 = pattern.indexOf('[');
157 		if (idx1 != -1) {
158 			return true;
159 		}
160 		if (pattern.indexOf('?') != -1) {
161 			return true;
162 		}
163 		// check if the backslash escapes one of the glob special characters
164 		// if not, backslash is not part of a regex and treated literally
165 		int backSlash = pattern.indexOf('\\');
166 		if (backSlash >= 0) {
167 			int nextIdx = backSlash + 1;
168 			if (pattern.length() == nextIdx) {
169 				return false;
170 			}
171 			char nextChar = pattern.charAt(nextIdx);
172 			if (escapedByBackslash(nextChar)) {
173 				return true;
174 			}
175 			return false;
176 		}
177 		return false;
178 	}
179 
180 	private static boolean escapedByBackslash(char nextChar) {
181 		return nextChar == '?' || nextChar == '*' || nextChar == '[';
182 	}
183 
184 	static PatternState checkWildCards(String pattern) {
185 		if (isComplexWildcard(pattern))
186 			return PatternState.COMPLEX;
187 		int startIdx = pattern.indexOf('*');
188 		if (startIdx < 0)
189 			return PatternState.NONE;
190 
191 		if (startIdx == pattern.length() - 1)
192 			return PatternState.TRAILING_ASTERISK_ONLY;
193 		if (pattern.lastIndexOf('*') == 0)
194 			return PatternState.LEADING_ASTERISK_ONLY;
195 
196 		return PatternState.COMPLEX;
197 	}
198 
199 	enum PatternState {
200 		LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
201 	}
202 
203 	static final List<String> POSIX_CHAR_CLASSES = Arrays.asList(
204 			"alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
205 			// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
206 			"digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
207 			// [:digit:] [:graph:] [:lower:] [:print:]
208 			"punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
209 			// [:punct:] [:space:] [:upper:] [:xdigit:]
210 			"word" //$NON-NLS-1$
211 	// [:word:] XXX I don't see it in
212 	// http://man7.org/linux/man-pages/man7/glob.7.html
213 	// but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
214 			);
215 
216 	private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
217 
218 	static final List<String> JAVA_CHAR_CLASSES = Arrays
219 			.asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
220 					// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
221 					"\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
222 					// [:digit:] [:graph:] [:lower:] [:print:]
223 					"\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
224 					// [:punct:] [:space:] [:upper:] [:xdigit:]
225 					"[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
226 							// [:word:]
227 			);
228 
229 	// Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
230 	// not supported by CLI git (at least not by 1.9.1)
231 	static final Pattern UNSUPPORTED = Pattern
232 			.compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
233 
234 	/**
235 	 * Conversion from glob to Java regex following two sources: <li>
236 	 * http://man7.org/linux/man-pages/man7/glob.7.html <li>
237 	 * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
238 	 * various ways to define what "glob" can be.
239 	 *
240 	 * @param pattern
241 	 *            non null pattern
242 	 *
243 	 * @return Java regex pattern corresponding to given glob pattern
244 	 * @throws InvalidPatternException
245 	 */
246 	static Pattern convertGlob(String pattern) throws InvalidPatternException {
247 		if (UNSUPPORTED.matcher(pattern).find())
248 			throw new InvalidPatternException(
249 					"Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
250 					pattern);
251 
252 		StringBuilder sb = new StringBuilder(pattern.length());
253 
254 		int in_brackets = 0;
255 		boolean seenEscape = false;
256 		boolean ignoreLastBracket = false;
257 		boolean in_char_class = false;
258 		// 6 is the length of the longest posix char class "xdigit"
259 		char[] charClass = new char[6];
260 
261 		for (int i = 0; i < pattern.length(); i++) {
262 			final char c = pattern.charAt(i);
263 			switch (c) {
264 
265 			case '*':
266 				if (seenEscape || in_brackets > 0)
267 					sb.append(c);
268 				else
269 					sb.append('.').append(c);
270 				break;
271 
272 			case '(': // fall-through
273 			case ')': // fall-through
274 			case '{': // fall-through
275 			case '}': // fall-through
276 			case '+': // fall-through
277 			case '$': // fall-through
278 			case '^': // fall-through
279 			case '|':
280 				if (seenEscape || in_brackets > 0)
281 					sb.append(c);
282 				else
283 					sb.append('\\').append(c);
284 				break;
285 
286 			case '.':
287 				if (seenEscape)
288 					sb.append(c);
289 				else
290 					sb.append('\\').append('.');
291 				break;
292 
293 			case '?':
294 				if (seenEscape || in_brackets > 0)
295 					sb.append(c);
296 				else
297 					sb.append('.');
298 				break;
299 
300 			case ':':
301 				if (in_brackets > 0)
302 					if (lookBehind(sb) == '['
303 							&& isLetter(lookAhead(pattern, i)))
304 						in_char_class = true;
305 				sb.append(':');
306 				break;
307 
308 			case '-':
309 				if (in_brackets > 0) {
310 					if (lookAhead(pattern, i) == ']')
311 						sb.append('\\').append(c);
312 					else
313 						sb.append(c);
314 				} else
315 					sb.append('-');
316 				break;
317 
318 			case '\\':
319 				if (in_brackets > 0) {
320 					char lookAhead = lookAhead(pattern, i);
321 					if (lookAhead == ']' || lookAhead == '[')
322 						ignoreLastBracket = true;
323 				} else {
324 					//
325 					char lookAhead = lookAhead(pattern, i);
326 					if (lookAhead != '\\' && lookAhead != '['
327 							&& lookAhead != '?' && lookAhead != '*'
328 							&& lookAhead != ' ' && lookBehind(sb) != '\\') {
329 						break;
330 					}
331 				}
332 				sb.append(c);
333 				break;
334 
335 			case '[':
336 				if (in_brackets > 0) {
337 					if (!seenEscape) {
338 						sb.append('\\');
339 					}
340 					sb.append('[');
341 					ignoreLastBracket = true;
342 				} else {
343 					if (!seenEscape) {
344 						in_brackets++;
345 						ignoreLastBracket = false;
346 					}
347 					sb.append('[');
348 				}
349 				break;
350 
351 			case ']':
352 				if (seenEscape) {
353 					sb.append(']');
354 					ignoreLastBracket = true;
355 					break;
356 				}
357 				if (in_brackets <= 0) {
358 					sb.append('\\').append(']');
359 					ignoreLastBracket = true;
360 					break;
361 				}
362 				char lookBehind = lookBehind(sb);
363 				if ((lookBehind == '[' && !ignoreLastBracket)
364 						|| lookBehind == '^') {
365 					sb.append('\\');
366 					sb.append(']');
367 					ignoreLastBracket = true;
368 				} else {
369 					ignoreLastBracket = false;
370 					if (!in_char_class) {
371 						in_brackets--;
372 						sb.append(']');
373 					} else {
374 						in_char_class = false;
375 						String charCl = checkPosixCharClass(charClass);
376 						// delete last \[:: chars and set the pattern
377 						if (charCl != null) {
378 							sb.setLength(sb.length() - 4);
379 							sb.append(charCl);
380 						}
381 						reset(charClass);
382 					}
383 				}
384 				break;
385 
386 			case '!':
387 				if (in_brackets > 0) {
388 					if (lookBehind(sb) == '[')
389 						sb.append('^');
390 					else
391 						sb.append(c);
392 				} else
393 					sb.append(c);
394 				break;
395 
396 			default:
397 				if (in_char_class)
398 					setNext(charClass, c);
399 				else
400 					sb.append(c);
401 				break;
402 			} // end switch
403 
404 			seenEscape = c == '\\';
405 
406 		} // end for
407 
408 		if (in_brackets > 0)
409 			throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
410 		try {
411 			return Pattern.compile(sb.toString(), Pattern.DOTALL);
412 		} catch (PatternSyntaxException e) {
413 			throw new InvalidPatternException(
414 					MessageFormat.format(JGitText.get().invalidIgnoreRule,
415 							pattern),
416 					pattern, e);
417 		}
418 	}
419 
420 	/**
421 	 * @param buffer
422 	 * @return zero of the buffer is empty, otherwise the last character from
423 	 *         buffer
424 	 */
425 	private static char lookBehind(StringBuilder buffer) {
426 		return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
427 	}
428 
429 	/**
430 	 * @param pattern
431 	 * @param i
432 	 *            current pointer in the pattern
433 	 * @return zero of the index is out of range, otherwise the next character
434 	 *         from given position
435 	 */
436 	private static char lookAhead(String pattern, int i) {
437 		int idx = i + 1;
438 		return idx >= pattern.length() ? 0 : pattern.charAt(idx);
439 	}
440 
441 	private static void setNext(char[] buffer, char c) {
442 		for (int i = 0; i < buffer.length; i++)
443 			if (buffer[i] == 0) {
444 				buffer[i] = c;
445 				break;
446 			}
447 	}
448 
449 	private static void reset(char[] buffer) {
450 		for (int i = 0; i < buffer.length; i++)
451 			buffer[i] = 0;
452 	}
453 
454 	private static String checkPosixCharClass(char[] buffer) {
455 		for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
456 			String clazz = POSIX_CHAR_CLASSES.get(i);
457 			boolean match = true;
458 			for (int j = 0; j < clazz.length(); j++)
459 				if (buffer[j] != clazz.charAt(j)) {
460 					match = false;
461 					break;
462 				}
463 			if (match)
464 				return JAVA_CHAR_CLASSES.get(i);
465 		}
466 		return null;
467 	}
468 
469 	static String deleteBackslash(String s) {
470 		if (s.indexOf('\\') < 0) {
471 			return s;
472 		}
473 		StringBuilder sb = new StringBuilder(s.length());
474 		for (int i = 0; i < s.length(); i++) {
475 			char ch = s.charAt(i);
476 			if (ch == '\\') {
477 				if (i + 1 == s.length()) {
478 					continue;
479 				}
480 				char next = s.charAt(i + 1);
481 				if (next == '\\') {
482 					sb.append(ch);
483 					i++;
484 					continue;
485 				}
486 				if (!escapedByBackslash(next)) {
487 					continue;
488 				}
489 			}
490 			sb.append(ch);
491 		}
492 		return sb.toString();
493 	}
494 
495 }