1 /*
2 * Copyright (C) 2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4 *
5 * This program and the accompanying materials are made available under the
6 * terms of the Eclipse Distribution License v. 1.0 which is available at
7 * https://www.eclipse.org/org/documents/edl-v10.php.
8 *
9 * SPDX-License-Identifier: BSD-3-Clause
10 */
11
12 package org.eclipse.jgit.revwalk.filter;
13
14 import java.io.IOException;
15 import java.util.regex.Matcher;
16 import java.util.regex.Pattern;
17
18 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
19 import org.eclipse.jgit.errors.MissingObjectException;
20 import org.eclipse.jgit.internal.JGitText;
21 import org.eclipse.jgit.lib.Constants;
22 import org.eclipse.jgit.revwalk.RevCommit;
23 import org.eclipse.jgit.revwalk.RevWalk;
24
25 /**
26 * Abstract filter that searches text using extended regular expressions.
27 */
28 public abstract class PatternMatchRevFilter extends RevFilter {
29 /**
30 * Encode a string pattern for faster matching on byte arrays.
31 * <p>
32 * Force the characters to our funny UTF-8 only convention that we use on
33 * raw buffers. This avoids needing to perform character set decodes on the
34 * individual commit buffers.
35 *
36 * @param patternText
37 * original pattern string supplied by the user or the
38 * application.
39 * @return same pattern, but re-encoded to match our funny raw UTF-8
40 * character sequence {@link org.eclipse.jgit.util.RawCharSequence}.
41 */
42 protected static final String forceToRaw(String patternText) {
43 final byte[] b = Constants.encode(patternText);
44 final StringBuilder needle = new StringBuilder(b.length);
45 for (byte element : b)
46 needle.append((char) (element & 0xff));
47 return needle.toString();
48 }
49
50 private final String patternText;
51
52 private final Matcher compiledPattern;
53
54 /**
55 * Construct a new pattern matching filter.
56 *
57 * @param pattern
58 * text of the pattern. Callers may want to surround their
59 * pattern with ".*" on either end to allow matching in the
60 * middle of the string.
61 * @param innerString
62 * should .* be wrapped around the pattern of ^ and $ are
63 * missing? Most users will want this set.
64 * @param rawEncoding
65 * should {@link #forceToRaw(String)} be applied to the pattern
66 * before compiling it?
67 * @param flags
68 * flags from {@link java.util.regex.Pattern} to control how
69 * matching performs.
70 */
71 protected PatternMatchRevFilter(String pattern, final boolean innerString,
72 final boolean rawEncoding, final int flags) {
73 if (pattern.length() == 0)
74 throw new IllegalArgumentException(JGitText.get().cannotMatchOnEmptyString);
75 patternText = pattern;
76
77 if (innerString) {
78 if (!pattern.startsWith("^") && !pattern.startsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$
79 pattern = ".*" + pattern; //$NON-NLS-1$
80 if (!pattern.endsWith("$") && !pattern.endsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$
81 pattern = pattern + ".*"; //$NON-NLS-1$
82 }
83 final String p = rawEncoding ? forceToRaw(pattern) : pattern;
84 compiledPattern = Pattern.compile(p, flags).matcher(""); //$NON-NLS-1$
85 }
86
87 /**
88 * Get the pattern this filter uses.
89 *
90 * @return the pattern this filter is applying to candidate strings.
91 */
92 public String pattern() {
93 return patternText;
94 }
95
96 /** {@inheritDoc} */
97 @Override
98 public boolean include(RevWalk walker, RevCommit cmit)
99 throws MissingObjectException, IncorrectObjectTypeException,
100 IOException {
101 return compiledPattern.reset(text(cmit)).matches();
102 }
103
104 /** {@inheritDoc} */
105 @Override
106 public boolean requiresCommitBody() {
107 return true;
108 }
109
110 /**
111 * Obtain the raw text to match against.
112 *
113 * @param cmit
114 * current commit being evaluated.
115 * @return sequence for the commit's content that we need to match on.
116 */
117 protected abstract CharSequence text(RevCommit cmit);
118
119 /** {@inheritDoc} */
120 @SuppressWarnings("nls")
121 @Override
122 public String toString() {
123 return super.toString() + "(\"" + patternText + "\")";
124 }
125 }