1 /* 2 * Copyright (C) 2009, Google Inc. 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.revwalk.filter; 46 47 import java.io.IOException; 48 import java.util.regex.Matcher; 49 import java.util.regex.Pattern; 50 51 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 52 import org.eclipse.jgit.errors.MissingObjectException; 53 import org.eclipse.jgit.internal.JGitText; 54 import org.eclipse.jgit.lib.Constants; 55 import org.eclipse.jgit.revwalk.RevCommit; 56 import org.eclipse.jgit.revwalk.RevWalk; 57 58 /** 59 * Abstract filter that searches text using extended regular expressions. 60 */ 61 public abstract class PatternMatchRevFilter extends RevFilter { 62 /** 63 * Encode a string pattern for faster matching on byte arrays. 64 * <p> 65 * Force the characters to our funny UTF-8 only convention that we use on 66 * raw buffers. This avoids needing to perform character set decodes on the 67 * individual commit buffers. 68 * 69 * @param patternText 70 * original pattern string supplied by the user or the 71 * application. 72 * @return same pattern, but re-encoded to match our funny raw UTF-8 73 * character sequence {@link org.eclipse.jgit.util.RawCharSequence}. 74 */ 75 protected static final String forceToRaw(String patternText) { 76 final byte[] b = Constants.encode(patternText); 77 final StringBuilder needle = new StringBuilder(b.length); 78 for (int i = 0; i < b.length; i++) 79 needle.append((char) (b[i] & 0xff)); 80 return needle.toString(); 81 } 82 83 private final String patternText; 84 85 private final Matcher compiledPattern; 86 87 /** 88 * Construct a new pattern matching filter. 89 * 90 * @param pattern 91 * text of the pattern. Callers may want to surround their 92 * pattern with ".*" on either end to allow matching in the 93 * middle of the string. 94 * @param innerString 95 * should .* be wrapped around the pattern of ^ and $ are 96 * missing? Most users will want this set. 97 * @param rawEncoding 98 * should {@link #forceToRaw(String)} be applied to the pattern 99 * before compiling it? 100 * @param flags 101 * flags from {@link java.util.regex.Pattern} to control how 102 * matching performs. 103 */ 104 protected PatternMatchRevFilter(String pattern, final boolean innerString, 105 final boolean rawEncoding, final int flags) { 106 if (pattern.length() == 0) 107 throw new IllegalArgumentException(JGitText.get().cannotMatchOnEmptyString); 108 patternText = pattern; 109 110 if (innerString) { 111 if (!pattern.startsWith("^") && !pattern.startsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$ 112 pattern = ".*" + pattern; //$NON-NLS-1$ 113 if (!pattern.endsWith("$") && !pattern.endsWith(".*")) //$NON-NLS-1$ //$NON-NLS-2$ 114 pattern = pattern + ".*"; //$NON-NLS-1$ 115 } 116 final String p = rawEncoding ? forceToRaw(pattern) : pattern; 117 compiledPattern = Pattern.compile(p, flags).matcher(""); //$NON-NLS-1$ 118 } 119 120 /** 121 * Get the pattern this filter uses. 122 * 123 * @return the pattern this filter is applying to candidate strings. 124 */ 125 public String pattern() { 126 return patternText; 127 } 128 129 /** {@inheritDoc} */ 130 @Override 131 public boolean include(RevWalk walker, RevCommit cmit) 132 throws MissingObjectException, IncorrectObjectTypeException, 133 IOException { 134 return compiledPattern.reset(text(cmit)).matches(); 135 } 136 137 /** {@inheritDoc} */ 138 @Override 139 public boolean requiresCommitBody() { 140 return true; 141 } 142 143 /** 144 * Obtain the raw text to match against. 145 * 146 * @param cmit 147 * current commit being evaluated. 148 * @return sequence for the commit's content that we need to match on. 149 */ 150 protected abstract CharSequence text(RevCommit cmit); 151 152 /** {@inheritDoc} */ 153 @SuppressWarnings("nls") 154 @Override 155 public String toString() { 156 return super.toString() + "(\"" + patternText + "\")"; 157 } 158 }