View Javadoc
1   /*
2    * Copyright (C) 2009, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4    *
5    * This program and the accompanying materials are made available under the
6    * terms of the Eclipse Distribution License v. 1.0 which is available at
7    * https://www.eclipse.org/org/documents/edl-v10.php.
8    *
9    * SPDX-License-Identifier: BSD-3-Clause
10   */
11  
12  package org.eclipse.jgit.revwalk.filter;
13  
14  import java.io.IOException;
15  import java.util.regex.Matcher;
16  import java.util.regex.Pattern;
17  
18  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
19  import org.eclipse.jgit.errors.MissingObjectException;
20  import org.eclipse.jgit.internal.JGitText;
21  import org.eclipse.jgit.lib.Constants;
22  import org.eclipse.jgit.revwalk.RevCommit;
23  import org.eclipse.jgit.revwalk.RevWalk;
24  
25  /**
26   * Abstract filter that searches text using extended regular expressions.
27   */
28  public abstract class PatternMatchRevFilter extends RevFilter {
29  	/**
30  	 * Encode a string pattern for faster matching on byte arrays.
31  	 * <p>
32  	 * Force the characters to our funny UTF-8 only convention that we use on
33  	 * raw buffers. This avoids needing to perform character set decodes on the
34  	 * individual commit buffers.
35  	 *
36  	 * @param patternText
37  	 *            original pattern string supplied by the user or the
38  	 *            application.
39  	 * @return same pattern, but re-encoded to match our funny raw UTF-8
40  	 *         character sequence {@link org.eclipse.jgit.util.RawCharSequence}.
41  	 */
42  	protected static final String forceToRaw(String patternText) {
43  		final byte[] b = Constants.encode(patternText);
44  		final StringBuilder needle = new StringBuilder(b.length);
45  		for (byte element : b)
46  			needle.append((char) (element & 0xff));
47  		return needle.toString();
48  	}
49  
50  	private final String patternText;
51  
52  	private final Matcher compiledPattern;
53  
54  	/**
55  	 * Construct a new pattern matching filter.
56  	 *
57  	 * @param pattern
58  	 *            text of the pattern. Callers may want to surround their
59  	 *            pattern with ".*" on either end to allow matching in the
60  	 *            middle of the string.
61  	 * @param innerString
62  	 *            should .* be wrapped around the pattern of ^ and $ are
63  	 *            missing? Most users will want this set.
64  	 * @param rawEncoding
65  	 *            should {@link #forceToRaw(String)} be applied to the pattern
66  	 *            before compiling it?
67  	 * @param flags
68  	 *            flags from {@link java.util.regex.Pattern} to control how
69  	 *            matching performs.
70  	 */
71  	protected PatternMatchRevFilter(String pattern, final boolean innerString,
72  			final boolean rawEncoding, final int flags) {
73  		if (pattern.length() == 0)
74  			throw new IllegalArgumentException(JGitText.get().cannotMatchOnEmptyString);
75  		patternText = pattern;
76  
77  		if (innerString) {
78  			if (!pattern.startsWith("^") && !pattern.startsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$
79  				pattern = ".*" + pattern; //$NON-NLS-1$
80  			if (!pattern.endsWith("$") && !pattern.endsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$
81  				pattern = pattern + ".*"; //$NON-NLS-1$
82  		}
83  		final String p = rawEncoding ? forceToRaw(pattern) : pattern;
84  		compiledPattern = Pattern.compile(p, flags).matcher(""); //$NON-NLS-1$
85  	}
86  
87  	/**
88  	 * Get the pattern this filter uses.
89  	 *
90  	 * @return the pattern this filter is applying to candidate strings.
91  	 */
92  	public String pattern() {
93  		return patternText;
94  	}
95  
96  	/** {@inheritDoc} */
97  	@Override
98  	public boolean include(RevWalk walker, RevCommit cmit)
99  			throws MissingObjectException, IncorrectObjectTypeException,
100 			IOException {
101 		return compiledPattern.reset(text(cmit)).matches();
102 	}
103 
104 	/** {@inheritDoc} */
105 	@Override
106 	public boolean requiresCommitBody() {
107 		return true;
108 	}
109 
110 	/**
111 	 * Obtain the raw text to match against.
112 	 *
113 	 * @param cmit
114 	 *            current commit being evaluated.
115 	 * @return sequence for the commit's content that we need to match on.
116 	 */
117 	protected abstract CharSequence text(RevCommit cmit);
118 
119 	/** {@inheritDoc} */
120 	@SuppressWarnings("nls")
121 	@Override
122 	public String toString() {
123 		return super.toString() + "(\"" + patternText + "\")";
124 	}
125 }