View Javadoc
1   /*
2    * Copyright (C) 2009-2010, Google Inc.
3    * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.diff;
46  
47  import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
48  import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
49  import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
50  
51  import org.eclipse.jgit.util.IntList;
52  
53  /** Equivalence function for {@link RawText}. */
54  public abstract class RawTextComparator extends SequenceComparator<RawText> {
55  	/** No special treatment. */
56  	public static final RawTextComparator DEFAULT = new RawTextComparator() {
57  		@Override
58  		public boolean equals(RawText a, int ai, RawText b, int bi) {
59  			ai++;
60  			bi++;
61  
62  			int as = a.lines.get(ai);
63  			int bs = b.lines.get(bi);
64  			final int ae = a.lines.get(ai + 1);
65  			final int be = b.lines.get(bi + 1);
66  
67  			if (ae - as != be - bs)
68  				return false;
69  
70  			while (as < ae) {
71  				if (a.content[as++] != b.content[bs++])
72  					return false;
73  			}
74  			return true;
75  		}
76  
77  		@Override
78  		protected int hashRegion(final byte[] raw, int ptr, final int end) {
79  			int hash = 5381;
80  			for (; ptr < end; ptr++)
81  				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
82  			return hash;
83  		}
84  	};
85  
86  	/** Ignores all whitespace. */
87  	public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
88  		@Override
89  		public boolean equals(RawText a, int ai, RawText b, int bi) {
90  			ai++;
91  			bi++;
92  
93  			int as = a.lines.get(ai);
94  			int bs = b.lines.get(bi);
95  			int ae = a.lines.get(ai + 1);
96  			int be = b.lines.get(bi + 1);
97  
98  			ae = trimTrailingWhitespace(a.content, as, ae);
99  			be = trimTrailingWhitespace(b.content, bs, be);
100 
101 			while (as < ae && bs < be) {
102 				byte ac = a.content[as];
103 				byte bc = b.content[bs];
104 
105 				while (as < ae - 1 && isWhitespace(ac)) {
106 					as++;
107 					ac = a.content[as];
108 				}
109 
110 				while (bs < be - 1 && isWhitespace(bc)) {
111 					bs++;
112 					bc = b.content[bs];
113 				}
114 
115 				if (ac != bc)
116 					return false;
117 
118 				as++;
119 				bs++;
120 			}
121 
122 			return as == ae && bs == be;
123 		}
124 
125 		@Override
126 		protected int hashRegion(byte[] raw, int ptr, int end) {
127 			int hash = 5381;
128 			for (; ptr < end; ptr++) {
129 				byte c = raw[ptr];
130 				if (!isWhitespace(c))
131 					hash = ((hash << 5) + hash) + (c & 0xff);
132 			}
133 			return hash;
134 		}
135 	};
136 
137 	/** Ignores leading whitespace. */
138 	public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
139 		@Override
140 		public boolean equals(RawText a, int ai, RawText b, int bi) {
141 			ai++;
142 			bi++;
143 
144 			int as = a.lines.get(ai);
145 			int bs = b.lines.get(bi);
146 			int ae = a.lines.get(ai + 1);
147 			int be = b.lines.get(bi + 1);
148 
149 			as = trimLeadingWhitespace(a.content, as, ae);
150 			bs = trimLeadingWhitespace(b.content, bs, be);
151 
152 			if (ae - as != be - bs)
153 				return false;
154 
155 			while (as < ae) {
156 				if (a.content[as++] != b.content[bs++])
157 					return false;
158 			}
159 			return true;
160 		}
161 
162 		@Override
163 		protected int hashRegion(final byte[] raw, int ptr, int end) {
164 			int hash = 5381;
165 			ptr = trimLeadingWhitespace(raw, ptr, end);
166 			for (; ptr < end; ptr++)
167 				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
168 			return hash;
169 		}
170 	};
171 
172 	/** Ignores trailing whitespace. */
173 	public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
174 		@Override
175 		public boolean equals(RawText a, int ai, RawText b, int bi) {
176 			ai++;
177 			bi++;
178 
179 			int as = a.lines.get(ai);
180 			int bs = b.lines.get(bi);
181 			int ae = a.lines.get(ai + 1);
182 			int be = b.lines.get(bi + 1);
183 
184 			ae = trimTrailingWhitespace(a.content, as, ae);
185 			be = trimTrailingWhitespace(b.content, bs, be);
186 
187 			if (ae - as != be - bs)
188 				return false;
189 
190 			while (as < ae) {
191 				if (a.content[as++] != b.content[bs++])
192 					return false;
193 			}
194 			return true;
195 		}
196 
197 		@Override
198 		protected int hashRegion(final byte[] raw, int ptr, int end) {
199 			int hash = 5381;
200 			end = trimTrailingWhitespace(raw, ptr, end);
201 			for (; ptr < end; ptr++)
202 				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
203 			return hash;
204 		}
205 	};
206 
207 	/** Ignores whitespace occurring between non-whitespace characters. */
208 	public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
209 		@Override
210 		public boolean equals(RawText a, int ai, RawText b, int bi) {
211 			ai++;
212 			bi++;
213 
214 			int as = a.lines.get(ai);
215 			int bs = b.lines.get(bi);
216 			int ae = a.lines.get(ai + 1);
217 			int be = b.lines.get(bi + 1);
218 
219 			ae = trimTrailingWhitespace(a.content, as, ae);
220 			be = trimTrailingWhitespace(b.content, bs, be);
221 
222 			while (as < ae && bs < be) {
223 				byte ac = a.content[as];
224 				byte bc = b.content[bs];
225 
226 				if (ac != bc)
227 					return false;
228 
229 				if (isWhitespace(ac))
230 					as = trimLeadingWhitespace(a.content, as, ae);
231 				else
232 					as++;
233 
234 				if (isWhitespace(bc))
235 					bs = trimLeadingWhitespace(b.content, bs, be);
236 				else
237 					bs++;
238 			}
239 			return as == ae && bs == be;
240 		}
241 
242 		@Override
243 		protected int hashRegion(final byte[] raw, int ptr, int end) {
244 			int hash = 5381;
245 			end = trimTrailingWhitespace(raw, ptr, end);
246 			while (ptr < end) {
247 				byte c = raw[ptr];
248 				hash = ((hash << 5) + hash) + (c & 0xff);
249 				if (isWhitespace(c))
250 					ptr = trimLeadingWhitespace(raw, ptr, end);
251 				else
252 					ptr++;
253 			}
254 			return hash;
255 		}
256 	};
257 
258 	@Override
259 	public int hash(RawText seq, int lno) {
260 		final int begin = seq.lines.get(lno + 1);
261 		final int end = seq.lines.get(lno + 2);
262 		return hashRegion(seq.content, begin, end);
263 	}
264 
265 	@Override
266 	public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
267 		// This is a faster exact match based form that tries to improve
268 		// performance for the common case of the header and trailer of
269 		// a text file not changing at all. After this fast path we use
270 		// the slower path based on the super class' using equals() to
271 		// allow for whitespace ignore modes to still work.
272 
273 		if (e.beginA == e.endA || e.beginB == e.endB)
274 			return e;
275 
276 		byte[] aRaw = a.content;
277 		byte[] bRaw = b.content;
278 
279 		int aPtr = a.lines.get(e.beginA + 1);
280 		int bPtr = a.lines.get(e.beginB + 1);
281 
282 		int aEnd = a.lines.get(e.endA + 1);
283 		int bEnd = b.lines.get(e.endB + 1);
284 
285 		// This can never happen, but the JIT doesn't know that. If we
286 		// define this assertion before the tight while loops below it
287 		// should be able to skip the array bound checks on access.
288 		//
289 		if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length)
290 			throw new ArrayIndexOutOfBoundsException();
291 
292 		while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
293 			aPtr++;
294 			bPtr++;
295 		}
296 
297 		while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
298 			aEnd--;
299 			bEnd--;
300 		}
301 
302 		e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
303 		e.beginB = findForwardLine(b.lines, e.beginB, bPtr);
304 
305 		e.endA = findReverseLine(a.lines, e.endA, aEnd);
306 
307 		final boolean partialA = aEnd < a.lines.get(e.endA + 1);
308 		if (partialA)
309 			bEnd += a.lines.get(e.endA + 1) - aEnd;
310 
311 		e.endB = findReverseLine(b.lines, e.endB, bEnd);
312 
313 		if (!partialA && bEnd < b.lines.get(e.endB + 1))
314 			e.endA++;
315 
316 		return super.reduceCommonStartEnd(a, b, e);
317 	}
318 
319 	private static int findForwardLine(IntList lines, int idx, int ptr) {
320 		final int end = lines.size() - 2;
321 		while (idx < end && lines.get(idx + 2) < ptr)
322 			idx++;
323 		return idx;
324 	}
325 
326 	private static int findReverseLine(IntList lines, int idx, int ptr) {
327 		while (0 < idx && ptr <= lines.get(idx))
328 			idx--;
329 		return idx;
330 	}
331 
332 	/**
333 	 * Compute a hash code for a region.
334 	 *
335 	 * @param raw
336 	 *            the raw file content.
337 	 * @param ptr
338 	 *            first byte of the region to hash.
339 	 * @param end
340 	 *            1 past the last byte of the region.
341 	 * @return hash code for the region <code>[ptr, end)</code> of raw.
342 	 */
343 	protected abstract int hashRegion(byte[] raw, int ptr, int end);
344 }