1 /*
2 * Copyright (C) 2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.revwalk.filter;
46
47 import java.io.IOException;
48 import java.util.regex.Matcher;
49 import java.util.regex.Pattern;
50
51 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
52 import org.eclipse.jgit.errors.MissingObjectException;
53 import org.eclipse.jgit.internal.JGitText;
54 import org.eclipse.jgit.lib.Constants;
55 import org.eclipse.jgit.revwalk.RevCommit;
56 import org.eclipse.jgit.revwalk.RevWalk;
57
58 /**
59 * Abstract filter that searches text using extended regular expressions.
60 */
61 public abstract class PatternMatchRevFilter extends RevFilter {
62 /**
63 * Encode a string pattern for faster matching on byte arrays.
64 * <p>
65 * Force the characters to our funny UTF-8 only convention that we use on
66 * raw buffers. This avoids needing to perform character set decodes on the
67 * individual commit buffers.
68 *
69 * @param patternText
70 * original pattern string supplied by the user or the
71 * application.
72 * @return same pattern, but re-encoded to match our funny raw UTF-8
73 * character sequence {@link org.eclipse.jgit.util.RawCharSequence}.
74 */
75 protected static final String forceToRaw(String patternText) {
76 final byte[] b = Constants.encode(patternText);
77 final StringBuilder needle = new StringBuilder(b.length);
78 for (int i = 0; i < b.length; i++)
79 needle.append((char) (b[i] & 0xff));
80 return needle.toString();
81 }
82
83 private final String patternText;
84
85 private final Matcher compiledPattern;
86
87 /**
88 * Construct a new pattern matching filter.
89 *
90 * @param pattern
91 * text of the pattern. Callers may want to surround their
92 * pattern with ".*" on either end to allow matching in the
93 * middle of the string.
94 * @param innerString
95 * should .* be wrapped around the pattern of ^ and $ are
96 * missing? Most users will want this set.
97 * @param rawEncoding
98 * should {@link #forceToRaw(String)} be applied to the pattern
99 * before compiling it?
100 * @param flags
101 * flags from {@link java.util.regex.Pattern} to control how
102 * matching performs.
103 */
104 protected PatternMatchRevFilter(String pattern, final boolean innerString,
105 final boolean rawEncoding, final int flags) {
106 if (pattern.length() == 0)
107 throw new IllegalArgumentException(JGitText.get().cannotMatchOnEmptyString);
108 patternText = pattern;
109
110 if (innerString) {
111 if (!pattern.startsWith("^") && !pattern.startsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$
112 pattern = ".*" + pattern; //$NON-NLS-1$
113 if (!pattern.endsWith("$") && !pattern.endsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$
114 pattern = pattern + ".*"; //$NON-NLS-1$
115 }
116 final String p = rawEncoding ? forceToRaw(pattern) : pattern;
117 compiledPattern = Pattern.compile(p, flags).matcher(""); //$NON-NLS-1$
118 }
119
120 /**
121 * Get the pattern this filter uses.
122 *
123 * @return the pattern this filter is applying to candidate strings.
124 */
125 public String pattern() {
126 return patternText;
127 }
128
129 /** {@inheritDoc} */
130 @Override
131 public boolean include(RevWalk walker, RevCommit cmit)
132 throws MissingObjectException, IncorrectObjectTypeException,
133 IOException {
134 return compiledPattern.reset(text(cmit)).matches();
135 }
136
137 /** {@inheritDoc} */
138 @Override
139 public boolean requiresCommitBody() {
140 return true;
141 }
142
143 /**
144 * Obtain the raw text to match against.
145 *
146 * @param cmit
147 * current commit being evaluated.
148 * @return sequence for the commit's content that we need to match on.
149 */
150 protected abstract CharSequence text(RevCommit cmit);
151
152 /** {@inheritDoc} */
153 @SuppressWarnings("nls")
154 @Override
155 public String toString() {
156 return super.toString() + "(\"" + patternText + "\")";
157 }
158 }