View Javadoc
1   /*
2    * Copyright (C) 2009, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.util;
46  
47  import org.eclipse.jgit.internal.JGitText;
48  import org.eclipse.jgit.lib.Constants;
49  
50  /**
51   * Searches text using only substring search.
52   * <p>
53   * Instances are thread-safe. Multiple concurrent threads may perform matches on
54   * different character sequences at the same time.
55   */
56  public class RawSubStringPattern {
57  	private final String needleString;
58  
59  	private final byte[] needle;
60  
61  	/**
62  	 * Construct a new substring pattern.
63  	 *
64  	 * @param patternText
65  	 *            text to locate. This should be a literal string, as no
66  	 *            meta-characters are supported by this implementation. The
67  	 *            string may not be the empty string.
68  	 */
69  	public RawSubStringPattern(final String patternText) {
70  		if (patternText.length() == 0)
71  			throw new IllegalArgumentException(JGitText.get().cannotMatchOnEmptyString);
72  		needleString = patternText;
73  
74  		final byte[] b = Constants.encode(patternText);
75  		needle = new byte[b.length];
76  		for (int i = 0; i < b.length; i++)
77  			needle[i] = lc(b[i]);
78  	}
79  
80  	/**
81  	 * Match a character sequence against this pattern.
82  	 *
83  	 * @param rcs
84  	 *            the sequence to match. Must not be null but the length of the
85  	 *            sequence is permitted to be 0.
86  	 * @return offset within <code>rcs</code> of the first occurrence of this
87  	 *         pattern; -1 if this pattern does not appear at any position of
88  	 *         <code>rcs</code>.
89  	 */
90  	public int match(final RawCharSequence rcs) {
91  		final int needleLen = needle.length;
92  		final byte first = needle[0];
93  
94  		final byte[] text = rcs.buffer;
95  		int matchPos = rcs.startPtr;
96  		final int maxPos = rcs.endPtr - needleLen;
97  
98  		OUTER: for (; matchPos <= maxPos; matchPos++) {
99  			if (neq(first, text[matchPos])) {
100 				while (++matchPos <= maxPos && neq(first, text[matchPos])) {
101 					/* skip */
102 				}
103 				if (matchPos > maxPos)
104 					return -1;
105 			}
106 
107 			int si = matchPos + 1;
108 			for (int j = 1; j < needleLen; j++, si++) {
109 				if (neq(needle[j], text[si]))
110 					continue OUTER;
111 			}
112 			return matchPos;
113 		}
114 		return -1;
115 	}
116 
117 	private static final boolean neq(final byte a, final byte b) {
118 		return a != b && a != lc(b);
119 	}
120 
121 	private static final byte lc(final byte q) {
122 		return (byte) StringUtils.toLowerCase((char) (q & 0xff));
123 	}
124 
125 	/**
126 	 * Get the literal pattern string this instance searches for.
127 	 *
128 	 * @return the pattern string given to our constructor.
129 	 */
130 	public String pattern() {
131 		return needleString;
132 	}
133 
134 	/** {@inheritDoc} */
135 	@Override
136 	public String toString() {
137 		return pattern();
138 	}
139 }