View Javadoc
1   /*
2    * Copyright (C) 2009-2010, Google Inc.
3    * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.diff;
46  
47  import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
48  import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
49  import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
50  
51  import org.eclipse.jgit.util.IntList;
52  
53  /**
54   * Equivalence function for {@link org.eclipse.jgit.diff.RawText}.
55   */
56  public abstract class RawTextComparator extends SequenceComparator<RawText> {
57  	/** No special treatment. */
58  	public static final RawTextComparator DEFAULT = new RawTextComparator() {
59  		@Override
60  		public boolean equals(RawText a, int ai, RawText b, int bi) {
61  			ai++;
62  			bi++;
63  
64  			int as = a.lines.get(ai);
65  			int bs = b.lines.get(bi);
66  			final int ae = a.lines.get(ai + 1);
67  			final int be = b.lines.get(bi + 1);
68  
69  			if (ae - as != be - bs)
70  				return false;
71  
72  			while (as < ae) {
73  				if (a.content[as++] != b.content[bs++])
74  					return false;
75  			}
76  			return true;
77  		}
78  
79  		@Override
80  		protected int hashRegion(byte[] raw, int ptr, int end) {
81  			int hash = 5381;
82  			for (; ptr < end; ptr++)
83  				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
84  			return hash;
85  		}
86  	};
87  
88  	/** Ignores all whitespace. */
89  	public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
90  		@Override
91  		public boolean equals(RawText a, int ai, RawText b, int bi) {
92  			ai++;
93  			bi++;
94  
95  			int as = a.lines.get(ai);
96  			int bs = b.lines.get(bi);
97  			int ae = a.lines.get(ai + 1);
98  			int be = b.lines.get(bi + 1);
99  
100 			ae = trimTrailingWhitespace(a.content, as, ae);
101 			be = trimTrailingWhitespace(b.content, bs, be);
102 
103 			while (as < ae && bs < be) {
104 				byte ac = a.content[as];
105 				byte bc = b.content[bs];
106 
107 				while (as < ae - 1 && isWhitespace(ac)) {
108 					as++;
109 					ac = a.content[as];
110 				}
111 
112 				while (bs < be - 1 && isWhitespace(bc)) {
113 					bs++;
114 					bc = b.content[bs];
115 				}
116 
117 				if (ac != bc)
118 					return false;
119 
120 				as++;
121 				bs++;
122 			}
123 
124 			return as == ae && bs == be;
125 		}
126 
127 		@Override
128 		protected int hashRegion(byte[] raw, int ptr, int end) {
129 			int hash = 5381;
130 			for (; ptr < end; ptr++) {
131 				byte c = raw[ptr];
132 				if (!isWhitespace(c))
133 					hash = ((hash << 5) + hash) + (c & 0xff);
134 			}
135 			return hash;
136 		}
137 	};
138 
139 	/**
140 	 * Ignore leading whitespace.
141 	 **/
142 	public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
143 		@Override
144 		public boolean equals(RawText a, int ai, RawText b, int bi) {
145 			ai++;
146 			bi++;
147 
148 			int as = a.lines.get(ai);
149 			int bs = b.lines.get(bi);
150 			int ae = a.lines.get(ai + 1);
151 			int be = b.lines.get(bi + 1);
152 
153 			as = trimLeadingWhitespace(a.content, as, ae);
154 			bs = trimLeadingWhitespace(b.content, bs, be);
155 
156 			if (ae - as != be - bs)
157 				return false;
158 
159 			while (as < ae) {
160 				if (a.content[as++] != b.content[bs++])
161 					return false;
162 			}
163 			return true;
164 		}
165 
166 		@Override
167 		protected int hashRegion(byte[] raw, int ptr, int end) {
168 			int hash = 5381;
169 			ptr = trimLeadingWhitespace(raw, ptr, end);
170 			for (; ptr < end; ptr++)
171 				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
172 			return hash;
173 		}
174 	};
175 
176 	/** Ignores trailing whitespace. */
177 	public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
178 		@Override
179 		public boolean equals(RawText a, int ai, RawText b, int bi) {
180 			ai++;
181 			bi++;
182 
183 			int as = a.lines.get(ai);
184 			int bs = b.lines.get(bi);
185 			int ae = a.lines.get(ai + 1);
186 			int be = b.lines.get(bi + 1);
187 
188 			ae = trimTrailingWhitespace(a.content, as, ae);
189 			be = trimTrailingWhitespace(b.content, bs, be);
190 
191 			if (ae - as != be - bs)
192 				return false;
193 
194 			while (as < ae) {
195 				if (a.content[as++] != b.content[bs++])
196 					return false;
197 			}
198 			return true;
199 		}
200 
201 		@Override
202 		protected int hashRegion(byte[] raw, int ptr, int end) {
203 			int hash = 5381;
204 			end = trimTrailingWhitespace(raw, ptr, end);
205 			for (; ptr < end; ptr++)
206 				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
207 			return hash;
208 		}
209 	};
210 
211 	/** Ignores whitespace occurring between non-whitespace characters. */
212 	public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
213 		@Override
214 		public boolean equals(RawText a, int ai, RawText b, int bi) {
215 			ai++;
216 			bi++;
217 
218 			int as = a.lines.get(ai);
219 			int bs = b.lines.get(bi);
220 			int ae = a.lines.get(ai + 1);
221 			int be = b.lines.get(bi + 1);
222 
223 			ae = trimTrailingWhitespace(a.content, as, ae);
224 			be = trimTrailingWhitespace(b.content, bs, be);
225 
226 			while (as < ae && bs < be) {
227 				byte ac = a.content[as];
228 				byte bc = b.content[bs];
229 
230 				if (ac != bc)
231 					return false;
232 
233 				if (isWhitespace(ac))
234 					as = trimLeadingWhitespace(a.content, as, ae);
235 				else
236 					as++;
237 
238 				if (isWhitespace(bc))
239 					bs = trimLeadingWhitespace(b.content, bs, be);
240 				else
241 					bs++;
242 			}
243 			return as == ae && bs == be;
244 		}
245 
246 		@Override
247 		protected int hashRegion(byte[] raw, int ptr, int end) {
248 			int hash = 5381;
249 			end = trimTrailingWhitespace(raw, ptr, end);
250 			while (ptr < end) {
251 				byte c = raw[ptr];
252 				hash = ((hash << 5) + hash) + (c & 0xff);
253 				if (isWhitespace(c))
254 					ptr = trimLeadingWhitespace(raw, ptr, end);
255 				else
256 					ptr++;
257 			}
258 			return hash;
259 		}
260 	};
261 
262 	@Override
263 	public int hash(RawText seq, int lno) {
264 		final int begin = seq.lines.get(lno + 1);
265 		final int end = seq.lines.get(lno + 2);
266 		return hashRegion(seq.content, begin, end);
267 	}
268 
269 	/** {@inheritDoc} */
270 	@Override
271 	public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
272 		// This is a faster exact match based form that tries to improve
273 		// performance for the common case of the header and trailer of
274 		// a text file not changing at all. After this fast path we use
275 		// the slower path based on the super class' using equals() to
276 		// allow for whitespace ignore modes to still work.
277 
278 		if (e.beginA == e.endA || e.beginB == e.endB)
279 			return e;
280 
281 		byte[] aRaw = a.content;
282 		byte[] bRaw = b.content;
283 
284 		int aPtr = a.lines.get(e.beginA + 1);
285 		int bPtr = a.lines.get(e.beginB + 1);
286 
287 		int aEnd = a.lines.get(e.endA + 1);
288 		int bEnd = b.lines.get(e.endB + 1);
289 
290 		// This can never happen, but the JIT doesn't know that. If we
291 		// define this assertion before the tight while loops below it
292 		// should be able to skip the array bound checks on access.
293 		//
294 		if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length)
295 			throw new ArrayIndexOutOfBoundsException();
296 
297 		while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
298 			aPtr++;
299 			bPtr++;
300 		}
301 
302 		while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
303 			aEnd--;
304 			bEnd--;
305 		}
306 
307 		e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
308 		e.beginB = findForwardLine(b.lines, e.beginB, bPtr);
309 
310 		e.endA = findReverseLine(a.lines, e.endA, aEnd);
311 
312 		final boolean partialA = aEnd < a.lines.get(e.endA + 1);
313 		if (partialA)
314 			bEnd += a.lines.get(e.endA + 1) - aEnd;
315 
316 		e.endB = findReverseLine(b.lines, e.endB, bEnd);
317 
318 		if (!partialA && bEnd < b.lines.get(e.endB + 1))
319 			e.endA++;
320 
321 		return super.reduceCommonStartEnd(a, b, e);
322 	}
323 
324 	private static int findForwardLine(IntList lines, int idx, int ptr) {
325 		final int end = lines.size() - 2;
326 		while (idx < end && lines.get(idx + 2) < ptr)
327 			idx++;
328 		return idx;
329 	}
330 
331 	private static int findReverseLine(IntList lines, int idx, int ptr) {
332 		while (0 < idx && ptr <= lines.get(idx))
333 			idx--;
334 		return idx;
335 	}
336 
337 	/**
338 	 * Compute a hash code for a region.
339 	 *
340 	 * @param raw
341 	 *            the raw file content.
342 	 * @param ptr
343 	 *            first byte of the region to hash.
344 	 * @param end
345 	 *            1 past the last byte of the region.
346 	 * @return hash code for the region <code>[ptr, end)</code> of raw.
347 	 */
348 	protected abstract int hashRegion(byte[] raw, int ptr, int end);
349 }