View Javadoc
1   /*
2    * Copyright (C) 2009-2010, Google Inc.
3    * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
4    *
5    * This program and the accompanying materials are made available under the
6    * terms of the Eclipse Distribution License v. 1.0 which is available at
7    * https://www.eclipse.org/org/documents/edl-v10.php.
8    *
9    * SPDX-License-Identifier: BSD-3-Clause
10   */
11  
12  package org.eclipse.jgit.diff;
13  
14  import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
15  import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
16  import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
17  
18  import org.eclipse.jgit.util.IntList;
19  
20  /**
21   * Equivalence function for {@link org.eclipse.jgit.diff.RawText}.
22   */
23  public abstract class RawTextComparator extends SequenceComparator<RawText> {
24  	/** No special treatment. */
25  	public static final RawTextComparator DEFAULT = new RawTextComparator() {
26  		@Override
27  		public boolean equals(RawText a, int ai, RawText b, int bi) {
28  			ai++;
29  			bi++;
30  
31  			int as = a.lines.get(ai);
32  			int bs = b.lines.get(bi);
33  			final int ae = a.lines.get(ai + 1);
34  			final int be = b.lines.get(bi + 1);
35  
36  			if (ae - as != be - bs)
37  				return false;
38  
39  			while (as < ae) {
40  				if (a.content[as++] != b.content[bs++])
41  					return false;
42  			}
43  			return true;
44  		}
45  
46  		@Override
47  		protected int hashRegion(byte[] raw, int ptr, int end) {
48  			int hash = 5381;
49  			for (; ptr < end; ptr++)
50  				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
51  			return hash;
52  		}
53  	};
54  
55  	/** Ignores all whitespace. */
56  	public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
57  		@Override
58  		public boolean equals(RawText a, int ai, RawText b, int bi) {
59  			ai++;
60  			bi++;
61  
62  			int as = a.lines.get(ai);
63  			int bs = b.lines.get(bi);
64  			int ae = a.lines.get(ai + 1);
65  			int be = b.lines.get(bi + 1);
66  
67  			ae = trimTrailingWhitespace(a.content, as, ae);
68  			be = trimTrailingWhitespace(b.content, bs, be);
69  
70  			while (as < ae && bs < be) {
71  				byte ac = a.content[as];
72  				byte bc = b.content[bs];
73  
74  				while (as < ae - 1 && isWhitespace(ac)) {
75  					as++;
76  					ac = a.content[as];
77  				}
78  
79  				while (bs < be - 1 && isWhitespace(bc)) {
80  					bs++;
81  					bc = b.content[bs];
82  				}
83  
84  				if (ac != bc)
85  					return false;
86  
87  				as++;
88  				bs++;
89  			}
90  
91  			return as == ae && bs == be;
92  		}
93  
94  		@Override
95  		protected int hashRegion(byte[] raw, int ptr, int end) {
96  			int hash = 5381;
97  			for (; ptr < end; ptr++) {
98  				byte c = raw[ptr];
99  				if (!isWhitespace(c))
100 					hash = ((hash << 5) + hash) + (c & 0xff);
101 			}
102 			return hash;
103 		}
104 	};
105 
106 	/**
107 	 * Ignore leading whitespace.
108 	 **/
109 	public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
110 		@Override
111 		public boolean equals(RawText a, int ai, RawText b, int bi) {
112 			ai++;
113 			bi++;
114 
115 			int as = a.lines.get(ai);
116 			int bs = b.lines.get(bi);
117 			int ae = a.lines.get(ai + 1);
118 			int be = b.lines.get(bi + 1);
119 
120 			as = trimLeadingWhitespace(a.content, as, ae);
121 			bs = trimLeadingWhitespace(b.content, bs, be);
122 
123 			if (ae - as != be - bs)
124 				return false;
125 
126 			while (as < ae) {
127 				if (a.content[as++] != b.content[bs++])
128 					return false;
129 			}
130 			return true;
131 		}
132 
133 		@Override
134 		protected int hashRegion(byte[] raw, int ptr, int end) {
135 			int hash = 5381;
136 			ptr = trimLeadingWhitespace(raw, ptr, end);
137 			for (; ptr < end; ptr++)
138 				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
139 			return hash;
140 		}
141 	};
142 
143 	/** Ignores trailing whitespace. */
144 	public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
145 		@Override
146 		public boolean equals(RawText a, int ai, RawText b, int bi) {
147 			ai++;
148 			bi++;
149 
150 			int as = a.lines.get(ai);
151 			int bs = b.lines.get(bi);
152 			int ae = a.lines.get(ai + 1);
153 			int be = b.lines.get(bi + 1);
154 
155 			ae = trimTrailingWhitespace(a.content, as, ae);
156 			be = trimTrailingWhitespace(b.content, bs, be);
157 
158 			if (ae - as != be - bs)
159 				return false;
160 
161 			while (as < ae) {
162 				if (a.content[as++] != b.content[bs++])
163 					return false;
164 			}
165 			return true;
166 		}
167 
168 		@Override
169 		protected int hashRegion(byte[] raw, int ptr, int end) {
170 			int hash = 5381;
171 			end = trimTrailingWhitespace(raw, ptr, end);
172 			for (; ptr < end; ptr++)
173 				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
174 			return hash;
175 		}
176 	};
177 
178 	/** Ignores whitespace occurring between non-whitespace characters. */
179 	public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
180 		@Override
181 		public boolean equals(RawText a, int ai, RawText b, int bi) {
182 			ai++;
183 			bi++;
184 
185 			int as = a.lines.get(ai);
186 			int bs = b.lines.get(bi);
187 			int ae = a.lines.get(ai + 1);
188 			int be = b.lines.get(bi + 1);
189 
190 			ae = trimTrailingWhitespace(a.content, as, ae);
191 			be = trimTrailingWhitespace(b.content, bs, be);
192 
193 			while (as < ae && bs < be) {
194 				byte ac = a.content[as++];
195 				byte bc = b.content[bs++];
196 
197 				if (isWhitespace(ac) && isWhitespace(bc)) {
198 					as = trimLeadingWhitespace(a.content, as, ae);
199 					bs = trimLeadingWhitespace(b.content, bs, be);
200 				} else if (ac != bc) {
201 					return false;
202 				}
203 			}
204 			return as == ae && bs == be;
205 		}
206 
207 		@Override
208 		protected int hashRegion(byte[] raw, int ptr, int end) {
209 			int hash = 5381;
210 			end = trimTrailingWhitespace(raw, ptr, end);
211 			while (ptr < end) {
212 				byte c = raw[ptr++];
213 				if (isWhitespace(c)) {
214 					ptr = trimLeadingWhitespace(raw, ptr, end);
215 					c = ' ';
216 				}
217 				hash = ((hash << 5) + hash) + (c & 0xff);
218 			}
219 			return hash;
220 		}
221 	};
222 
223 	@Override
224 	public int hash(RawText seq, int lno) {
225 		final int begin = seq.lines.get(lno + 1);
226 		final int end = seq.lines.get(lno + 2);
227 		return hashRegion(seq.content, begin, end);
228 	}
229 
230 	/** {@inheritDoc} */
231 	@Override
232 	public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
233 		// This is a faster exact match based form that tries to improve
234 		// performance for the common case of the header and trailer of
235 		// a text file not changing at all. After this fast path we use
236 		// the slower path based on the super class' using equals() to
237 		// allow for whitespace ignore modes to still work.
238 
239 		if (e.beginA == e.endA || e.beginB == e.endB)
240 			return e;
241 
242 		byte[] aRaw = a.content;
243 		byte[] bRaw = b.content;
244 
245 		int aPtr = a.lines.get(e.beginA + 1);
246 		int bPtr = a.lines.get(e.beginB + 1);
247 
248 		int aEnd = a.lines.get(e.endA + 1);
249 		int bEnd = b.lines.get(e.endB + 1);
250 
251 		// This can never happen, but the JIT doesn't know that. If we
252 		// define this assertion before the tight while loops below it
253 		// should be able to skip the array bound checks on access.
254 		//
255 		if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length)
256 			throw new ArrayIndexOutOfBoundsException();
257 
258 		while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
259 			aPtr++;
260 			bPtr++;
261 		}
262 
263 		while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
264 			aEnd--;
265 			bEnd--;
266 		}
267 
268 		e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
269 		e.beginB = findForwardLine(b.lines, e.beginB, bPtr);
270 
271 		e.endA = findReverseLine(a.lines, e.endA, aEnd);
272 
273 		final boolean partialA = aEnd < a.lines.get(e.endA + 1);
274 		if (partialA)
275 			bEnd += a.lines.get(e.endA + 1) - aEnd;
276 
277 		e.endB = findReverseLine(b.lines, e.endB, bEnd);
278 
279 		if (!partialA && bEnd < b.lines.get(e.endB + 1))
280 			e.endA++;
281 
282 		return super.reduceCommonStartEnd(a, b, e);
283 	}
284 
285 	private static int findForwardLine(IntList lines, int idx, int ptr) {
286 		final int end = lines.size() - 2;
287 		while (idx < end && lines.get(idx + 2) < ptr)
288 			idx++;
289 		return idx;
290 	}
291 
292 	private static int findReverseLine(IntList lines, int idx, int ptr) {
293 		while (0 < idx && ptr <= lines.get(idx))
294 			idx--;
295 		return idx;
296 	}
297 
298 	/**
299 	 * Compute a hash code for a region.
300 	 *
301 	 * @param raw
302 	 *            the raw file content.
303 	 * @param ptr
304 	 *            first byte of the region to hash.
305 	 * @param end
306 	 *            1 past the last byte of the region.
307 	 * @return hash code for the region <code>[ptr, end)</code> of raw.
308 	 */
309 	protected abstract int hashRegion(byte[] raw, int ptr, int end);
310 }