1 /*
2 * Copyright (C) 2010, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.diff;
45
46 /**
47 * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
48 * {@link org.eclipse.jgit.diff.EditList} of changes.
49 * <p>
50 * An algorithm's {@code diff} method must be callable from concurrent threads
51 * without data collisions. This permits some algorithms to use a singleton
52 * pattern, with concurrent invocations using the same singleton. Other
53 * algorithms may support parameterization, in which case the caller can create
54 * a unique instance per thread.
55 */
56 public abstract class DiffAlgorithm {
57 /**
58 * Supported diff algorithm
59 */
60 public enum SupportedAlgorithm {
61 /**
62 * Myers diff algorithm
63 */
64 MYERS,
65
66 /**
67 * Histogram diff algorithm
68 */
69 HISTOGRAM
70 }
71
72 /**
73 * Get diff algorithm
74 *
75 * @param alg
76 * the diff algorithm for which an implementation should be
77 * returned
78 * @return an implementation of the specified diff algorithm
79 */
80 public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
81 switch (alg) {
82 case MYERS:
83 return MyersDiff.INSTANCE;
84 case HISTOGRAM:
85 return new HistogramDiff();
86 default:
87 throw new IllegalArgumentException();
88 }
89 }
90
91 /**
92 * Compare two sequences and identify a list of edits between them.
93 *
94 * @param cmp
95 * the comparator supplying the element equivalence function.
96 * @param a
97 * the first (also known as old or pre-image) sequence. Edits
98 * returned by this algorithm will reference indexes using the
99 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
100 * {@link org.eclipse.jgit.diff.Edit#getEndA()}.
101 * @param b
102 * the second (also known as new or post-image) sequence. Edits
103 * returned by this algorithm will reference indexes using the
104 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
105 * {@link org.eclipse.jgit.diff.Edit#getEndB()}.
106 * @return a modifiable edit list comparing the two sequences. If empty, the
107 * sequences are identical according to {@code cmp}'s rules. The
108 * result list is never null.
109 */
110 public <S extends Sequence> EditList diff(
111 SequenceComparator<? super S> cmp, S a, S b) {
112 Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
113
114 switch (region.getType()) {
115 case INSERT:
116 case DELETE:
117 return EditList.singleton(region);
118
119 case REPLACE: {
120 if (region.getLengthA() == 1 && region.getLengthB() == 1)
121 return EditList.singleton(region);
122
123 SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
124 Subsequence<S> as = Subsequence.a(a, region);
125 Subsequence<S> bs = Subsequence.b(b, region);
126 EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
127 return normalize(cmp, e, a, b);
128 }
129
130 case EMPTY:
131 return new EditList(0);
132
133 default:
134 throw new IllegalStateException();
135 }
136 }
137
138 private static <S extends Sequence> Edit coverEdit(S a, S b) {
139 return new Edit(0, a.size(), 0, b.size());
140 }
141
142 /**
143 * Reorganize an {@link EditList} for better diff consistency.
144 * <p>
145 * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
146 * {@link Edit.Type#DELETE} edits that can be "shifted". For
147 * example, the deleted section
148 * <pre>
149 * -a
150 * -b
151 * -c
152 * a
153 * b
154 * c
155 * </pre>
156 * can be shifted down by 1, 2 or 3 locations.
157 * <p>
158 * To avoid later merge issues, we shift such edits to a
159 * consistent location. {@code normalize} uses a simple strategy of
160 * shifting such edits to their latest possible location.
161 * <p>
162 * This strategy may not always produce an aesthetically pleasing
163 * diff. For instance, it works well with
164 * <pre>
165 * function1 {
166 * ...
167 * }
168 *
169 * +function2 {
170 * + ...
171 * +}
172 * +
173 * function3 {
174 * ...
175 * }
176 * </pre>
177 * but less so for
178 * <pre>
179 * #
180 * # comment1
181 * #
182 * function1() {
183 * }
184 *
185 * #
186 * +# comment3
187 * +#
188 * +function3() {
189 * +}
190 * +
191 * +#
192 * # comment2
193 * #
194 * function2() {
195 * }
196 * </pre>
197 * <a href="https://github.com/mhagger/diff-slider-tools">More
198 * sophisticated strategies</a> are possible, say by calculating a
199 * suitable "aesthetic cost" for each possible position and using
200 * the lowest cost, but {@code normalize} just shifts edits
201 * to the end as much as possible.
202 *
203 * @param <S>
204 * type of sequence being compared.
205 * @param cmp
206 * the comparator supplying the element equivalence function.
207 * @param e
208 * a modifiable edit list comparing the provided sequences.
209 * @param a
210 * the first (also known as old or pre-image) sequence.
211 * @param b
212 * the second (also known as new or post-image) sequence.
213 * @return a modifiable edit list with edit regions shifted to their
214 * latest possible location. The result list is never null.
215 * @since 4.7
216 */
217 private static <S extends Sequence> EditList normalize(
218 SequenceComparator<? super S> cmp, EditList e, S a, S b) {
219 Edit prev = null;
220 for (int i = e.size() - 1; i >= 0; i--) {
221 Edit cur = e.get(i);
222 Edit.Type curType = cur.getType();
223
224 int maxA = (prev == null) ? a.size() : prev.beginA;
225 int maxB = (prev == null) ? b.size() : prev.beginB;
226
227 if (curType == Edit.Type.INSERT) {
228 while (cur.endA < maxA && cur.endB < maxB
229 && cmp.equals(b, cur.beginB, b, cur.endB)) {
230 cur.shift(1);
231 }
232 } else if (curType == Edit.Type.DELETE) {
233 while (cur.endA < maxA && cur.endB < maxB
234 && cmp.equals(a, cur.beginA, a, cur.endA)) {
235 cur.shift(1);
236 }
237 }
238 prev = cur;
239 }
240 return e;
241 }
242
243 /**
244 * Compare two sequences and identify a list of edits between them.
245 *
246 * This method should be invoked only after the two sequences have been
247 * proven to have no common starting or ending elements. The expected
248 * elimination of common starting and ending elements is automatically
249 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
250 * method, which invokes this method using
251 * {@link org.eclipse.jgit.diff.Subsequence}s.
252 *
253 * @param cmp
254 * the comparator supplying the element equivalence function.
255 * @param a
256 * the first (also known as old or pre-image) sequence. Edits
257 * returned by this algorithm will reference indexes using the
258 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
259 * {@link org.eclipse.jgit.diff.Edit#getEndA()}.
260 * @param b
261 * the second (also known as new or post-image) sequence. Edits
262 * returned by this algorithm will reference indexes using the
263 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
264 * {@link org.eclipse.jgit.diff.Edit#getEndB()}.
265 * @return a modifiable edit list comparing the two sequences.
266 */
267 public abstract <S extends Sequence> EditList diffNonCommon(
268 SequenceComparator<? super S> cmp, S a, S b);
269 }