1 /*
2 * Copyright (C) 2010, Google Inc. and others
3 *
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Distribution License v. 1.0 which is available at
6 * https://www.eclipse.org/org/documents/edl-v10.php.
7 *
8 * SPDX-License-Identifier: BSD-3-Clause
9 */
10
11 package org.eclipse.jgit.diff;
12
13 /**
14 * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
15 * {@link org.eclipse.jgit.diff.EditList} of changes.
16 * <p>
17 * An algorithm's {@code diff} method must be callable from concurrent threads
18 * without data collisions. This permits some algorithms to use a singleton
19 * pattern, with concurrent invocations using the same singleton. Other
20 * algorithms may support parameterization, in which case the caller can create
21 * a unique instance per thread.
22 */
23 public abstract class DiffAlgorithm {
24 /**
25 * Supported diff algorithm
26 */
27 public enum SupportedAlgorithm {
28 /**
29 * Myers diff algorithm
30 */
31 MYERS,
32
33 /**
34 * Histogram diff algorithm
35 */
36 HISTOGRAM
37 }
38
39 /**
40 * Get diff algorithm
41 *
42 * @param alg
43 * the diff algorithm for which an implementation should be
44 * returned
45 * @return an implementation of the specified diff algorithm
46 */
47 public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
48 switch (alg) {
49 case MYERS:
50 return MyersDiff.INSTANCE;
51 case HISTOGRAM:
52 return new HistogramDiff();
53 default:
54 throw new IllegalArgumentException();
55 }
56 }
57
58 /**
59 * Compare two sequences and identify a list of edits between them.
60 *
61 * @param cmp
62 * the comparator supplying the element equivalence function.
63 * @param a
64 * the first (also known as old or pre-image) sequence. Edits
65 * returned by this algorithm will reference indexes using the
66 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
67 * {@link org.eclipse.jgit.diff.Edit#getEndA()}.
68 * @param b
69 * the second (also known as new or post-image) sequence. Edits
70 * returned by this algorithm will reference indexes using the
71 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
72 * {@link org.eclipse.jgit.diff.Edit#getEndB()}.
73 * @return a modifiable edit list comparing the two sequences. If empty, the
74 * sequences are identical according to {@code cmp}'s rules. The
75 * result list is never null.
76 */
77 public <S extends Sequence> EditList diff(
78 SequenceComparator<? super S> cmp, S a, S b) {
79 Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
80
81 switch (region.getType()) {
82 case INSERT:
83 case DELETE:
84 return EditList.singleton(region);
85
86 case REPLACE: {
87 if (region.getLengthA() == 1 && region.getLengthB() == 1)
88 return EditList.singleton(region);
89
90 SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
91 Subsequence<S> as = Subsequence.a(a, region);
92 Subsequence<S> bs = Subsequence.b(b, region);
93 EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
94 return normalize(cmp, e, a, b);
95 }
96
97 case EMPTY:
98 return new EditList(0);
99
100 default:
101 throw new IllegalStateException();
102 }
103 }
104
105 private static <S extends Sequence> Edit coverEdit(S a, S b) {
106 return new Edit(0, a.size(), 0, b.size());
107 }
108
109 /**
110 * Reorganize an {@link EditList} for better diff consistency.
111 * <p>
112 * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
113 * {@link Edit.Type#DELETE} edits that can be "shifted". For
114 * example, the deleted section
115 * <pre>
116 * -a
117 * -b
118 * -c
119 * a
120 * b
121 * c
122 * </pre>
123 * can be shifted down by 1, 2 or 3 locations.
124 * <p>
125 * To avoid later merge issues, we shift such edits to a
126 * consistent location. {@code normalize} uses a simple strategy of
127 * shifting such edits to their latest possible location.
128 * <p>
129 * This strategy may not always produce an aesthetically pleasing
130 * diff. For instance, it works well with
131 * <pre>
132 * function1 {
133 * ...
134 * }
135 *
136 * +function2 {
137 * + ...
138 * +}
139 * +
140 * function3 {
141 * ...
142 * }
143 * </pre>
144 * but less so for
145 * <pre>
146 * #
147 * # comment1
148 * #
149 * function1() {
150 * }
151 *
152 * #
153 * +# comment3
154 * +#
155 * +function3() {
156 * +}
157 * +
158 * +#
159 * # comment2
160 * #
161 * function2() {
162 * }
163 * </pre>
164 * <a href="https://github.com/mhagger/diff-slider-tools">More
165 * sophisticated strategies</a> are possible, say by calculating a
166 * suitable "aesthetic cost" for each possible position and using
167 * the lowest cost, but {@code normalize} just shifts edits
168 * to the end as much as possible.
169 *
170 * @param <S>
171 * type of sequence being compared.
172 * @param cmp
173 * the comparator supplying the element equivalence function.
174 * @param e
175 * a modifiable edit list comparing the provided sequences.
176 * @param a
177 * the first (also known as old or pre-image) sequence.
178 * @param b
179 * the second (also known as new or post-image) sequence.
180 * @return a modifiable edit list with edit regions shifted to their
181 * latest possible location. The result list is never null.
182 * @since 4.7
183 */
184 private static <S extends Sequence> EditList normalize(
185 SequenceComparator<? super S> cmp, EditList e, S a, S b) {
186 Edit prev = null;
187 for (int i = e.size() - 1; i >= 0; i--) {
188 Edit cur = e.get(i);
189 Edit.Type curType = cur.getType();
190
191 int maxA = (prev == null) ? a.size() : prev.beginA;
192 int maxB = (prev == null) ? b.size() : prev.beginB;
193
194 if (curType == Edit.Type.INSERT) {
195 while (cur.endA < maxA && cur.endB < maxB
196 && cmp.equals(b, cur.beginB, b, cur.endB)) {
197 cur.shift(1);
198 }
199 } else if (curType == Edit.Type.DELETE) {
200 while (cur.endA < maxA && cur.endB < maxB
201 && cmp.equals(a, cur.beginA, a, cur.endA)) {
202 cur.shift(1);
203 }
204 }
205 prev = cur;
206 }
207 return e;
208 }
209
210 /**
211 * Compare two sequences and identify a list of edits between them.
212 *
213 * This method should be invoked only after the two sequences have been
214 * proven to have no common starting or ending elements. The expected
215 * elimination of common starting and ending elements is automatically
216 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
217 * method, which invokes this method using
218 * {@link org.eclipse.jgit.diff.Subsequence}s.
219 *
220 * @param cmp
221 * the comparator supplying the element equivalence function.
222 * @param a
223 * the first (also known as old or pre-image) sequence. Edits
224 * returned by this algorithm will reference indexes using the
225 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
226 * {@link org.eclipse.jgit.diff.Edit#getEndA()}.
227 * @param b
228 * the second (also known as new or post-image) sequence. Edits
229 * returned by this algorithm will reference indexes using the
230 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
231 * {@link org.eclipse.jgit.diff.Edit#getEndB()}.
232 * @return a modifiable edit list comparing the two sequences.
233 */
234 public abstract <S extends Sequence> EditList diffNonCommon(
235 SequenceComparator<? super S> cmp, S a, S b);
236 }