View Javadoc
1   /*
2    * Copyright (C) 2010, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.diff;
45  
46  /**
47   * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
48   * {@link org.eclipse.jgit.diff.EditList} of changes.
49   * <p>
50   * An algorithm's {@code diff} method must be callable from concurrent threads
51   * without data collisions. This permits some algorithms to use a singleton
52   * pattern, with concurrent invocations using the same singleton. Other
53   * algorithms may support parameterization, in which case the caller can create
54   * a unique instance per thread.
55   */
56  public abstract class DiffAlgorithm {
57  	/**
58  	 * Supported diff algorithm
59  	 */
60  	public enum SupportedAlgorithm {
61  		/**
62  		 * Myers diff algorithm
63  		 */
64  		MYERS,
65  
66  		/**
67  		 * Histogram diff algorithm
68  		 */
69  		HISTOGRAM
70  	}
71  
72  	/**
73  	 * Get diff algorithm
74  	 *
75  	 * @param alg
76  	 *            the diff algorithm for which an implementation should be
77  	 *            returned
78  	 * @return an implementation of the specified diff algorithm
79  	 */
80  	public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
81  		switch (alg) {
82  		case MYERS:
83  			return MyersDiff.INSTANCE;
84  		case HISTOGRAM:
85  			return new HistogramDiff();
86  		default:
87  			throw new IllegalArgumentException();
88  		}
89  	}
90  
91  	/**
92  	 * Compare two sequences and identify a list of edits between them.
93  	 *
94  	 * @param cmp
95  	 *            the comparator supplying the element equivalence function.
96  	 * @param a
97  	 *            the first (also known as old or pre-image) sequence. Edits
98  	 *            returned by this algorithm will reference indexes using the
99  	 *            'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
100 	 *            {@link org.eclipse.jgit.diff.Edit#getEndA()}.
101 	 * @param b
102 	 *            the second (also known as new or post-image) sequence. Edits
103 	 *            returned by this algorithm will reference indexes using the
104 	 *            'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
105 	 *            {@link org.eclipse.jgit.diff.Edit#getEndB()}.
106 	 * @return a modifiable edit list comparing the two sequences. If empty, the
107 	 *         sequences are identical according to {@code cmp}'s rules. The
108 	 *         result list is never null.
109 	 */
110 	public <S extends Sequence> EditList diff(
111 			SequenceComparator<? super S> cmp, S a, S b) {
112 		Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
113 
114 		switch (region.getType()) {
115 		case INSERT:
116 		case DELETE:
117 			return EditList.singleton(region);
118 
119 		case REPLACE: {
120 			if (region.getLengthA() == 1 && region.getLengthB() == 1)
121 				return EditList.singleton(region);
122 
123 			SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
124 			Subsequence<S> as = Subsequence.a(a, region);
125 			Subsequence<S> bs = Subsequence.b(b, region);
126 			EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
127 			return normalize(cmp, e, a, b);
128 		}
129 
130 		case EMPTY:
131 			return new EditList(0);
132 
133 		default:
134 			throw new IllegalStateException();
135 		}
136 	}
137 
138 	private static <S extends Sequence> Edit coverEdit(S a, S b) {
139 		return new Edit(0, a.size(), 0, b.size());
140 	}
141 
142 	/**
143 	 * Reorganize an {@link EditList} for better diff consistency.
144 	 * <p>
145 	 * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
146 	 * {@link Edit.Type#DELETE} edits that can be "shifted". For
147 	 * example, the deleted section
148 	 * <pre>
149 	 * -a
150 	 * -b
151 	 * -c
152 	 *  a
153 	 *  b
154 	 *  c
155 	 * </pre>
156 	 * can be shifted down by 1, 2 or 3 locations.
157 	 * <p>
158 	 * To avoid later merge issues, we shift such edits to a
159 	 * consistent location. {@code normalize} uses a simple strategy of
160 	 * shifting such edits to their latest possible location.
161 	 * <p>
162 	 * This strategy may not always produce an aesthetically pleasing
163 	 * diff. For instance, it works well with
164 	 * <pre>
165 	 *  function1 {
166 	 *   ...
167 	 *  }
168 	 *
169 	 * +function2 {
170 	 * + ...
171 	 * +}
172 	 * +
173 	 * function3 {
174 	 * ...
175 	 * }
176 	 * </pre>
177 	 * but less so for
178 	 * <pre>
179 	 *  #
180 	 *  # comment1
181 	 *  #
182 	 *  function1() {
183 	 *  }
184 	 *
185 	 *  #
186 	 * +# comment3
187 	 * +#
188 	 * +function3() {
189 	 * +}
190 	 * +
191 	 * +#
192 	 *  # comment2
193 	 *  #
194 	 *  function2() {
195 	 *  }
196 	 * </pre>
197 	 * <a href="https://github.com/mhagger/diff-slider-tools">More
198 	 * sophisticated strategies</a> are possible, say by calculating a
199 	 * suitable "aesthetic cost" for each possible position and using
200 	 * the lowest cost, but {@code normalize} just shifts edits
201 	 * to the end as much as possible.
202 	 *
203 	 * @param <S>
204 	 *            type of sequence being compared.
205 	 * @param cmp
206 	 *            the comparator supplying the element equivalence function.
207 	 * @param e
208 	 *            a modifiable edit list comparing the provided sequences.
209 	 * @param a
210 	 *            the first (also known as old or pre-image) sequence.
211 	 * @param b
212 	 *            the second (also known as new or post-image) sequence.
213 	 * @return a modifiable edit list with edit regions shifted to their
214 	 *         latest possible location. The result list is never null.
215 	 * @since 4.7
216 	 */
217 	private static <S extends Sequence> EditList normalize(
218 		SequenceComparator<? super S> cmp, EditList e, S a, S b) {
219 		Edit prev = null;
220 		for (int i = e.size() - 1; i >= 0; i--) {
221 			Edit cur = e.get(i);
222 			Edit.Type curType = cur.getType();
223 
224 			int maxA = (prev == null) ? a.size() : prev.beginA;
225 			int maxB = (prev == null) ? b.size() : prev.beginB;
226 
227 			if (curType == Edit.Type.INSERT) {
228 				while (cur.endA < maxA && cur.endB < maxB
229 					&& cmp.equals(b, cur.beginB, b, cur.endB)) {
230 					cur.shift(1);
231 				}
232 			} else if (curType == Edit.Type.DELETE) {
233 				while (cur.endA < maxA && cur.endB < maxB
234 					&& cmp.equals(a, cur.beginA, a, cur.endA)) {
235 					cur.shift(1);
236 				}
237 			}
238 			prev = cur;
239 		}
240 		return e;
241 	}
242 
243 	/**
244 	 * Compare two sequences and identify a list of edits between them.
245 	 *
246 	 * This method should be invoked only after the two sequences have been
247 	 * proven to have no common starting or ending elements. The expected
248 	 * elimination of common starting and ending elements is automatically
249 	 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
250 	 * method, which invokes this method using
251 	 * {@link org.eclipse.jgit.diff.Subsequence}s.
252 	 *
253 	 * @param cmp
254 	 *            the comparator supplying the element equivalence function.
255 	 * @param a
256 	 *            the first (also known as old or pre-image) sequence. Edits
257 	 *            returned by this algorithm will reference indexes using the
258 	 *            'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
259 	 *            {@link org.eclipse.jgit.diff.Edit#getEndA()}.
260 	 * @param b
261 	 *            the second (also known as new or post-image) sequence. Edits
262 	 *            returned by this algorithm will reference indexes using the
263 	 *            'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
264 	 *            {@link org.eclipse.jgit.diff.Edit#getEndB()}.
265 	 * @return a modifiable edit list comparing the two sequences.
266 	 */
267 	public abstract <S extends Sequence> EditList diffNonCommon(
268 			SequenceComparator<? super S> cmp, S a, S b);
269 }