1 /* 2 * Copyright (C) 2010, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.diff; 45 46 /** 47 * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an 48 * {@link org.eclipse.jgit.diff.EditList} of changes. 49 * <p> 50 * An algorithm's {@code diff} method must be callable from concurrent threads 51 * without data collisions. This permits some algorithms to use a singleton 52 * pattern, with concurrent invocations using the same singleton. Other 53 * algorithms may support parameterization, in which case the caller can create 54 * a unique instance per thread. 55 */ 56 public abstract class DiffAlgorithm { 57 /** 58 * Supported diff algorithm 59 */ 60 public enum SupportedAlgorithm { 61 /** 62 * Myers diff algorithm 63 */ 64 MYERS, 65 66 /** 67 * Histogram diff algorithm 68 */ 69 HISTOGRAM 70 } 71 72 /** 73 * Get diff algorithm 74 * 75 * @param alg 76 * the diff algorithm for which an implementation should be 77 * returned 78 * @return an implementation of the specified diff algorithm 79 */ 80 public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) { 81 switch (alg) { 82 case MYERS: 83 return MyersDiff.INSTANCE; 84 case HISTOGRAM: 85 return new HistogramDiff(); 86 default: 87 throw new IllegalArgumentException(); 88 } 89 } 90 91 /** 92 * Compare two sequences and identify a list of edits between them. 93 * 94 * @param cmp 95 * the comparator supplying the element equivalence function. 96 * @param a 97 * the first (also known as old or pre-image) sequence. Edits 98 * returned by this algorithm will reference indexes using the 99 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, 100 * {@link org.eclipse.jgit.diff.Edit#getEndA()}. 101 * @param b 102 * the second (also known as new or post-image) sequence. Edits 103 * returned by this algorithm will reference indexes using the 104 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, 105 * {@link org.eclipse.jgit.diff.Edit#getEndB()}. 106 * @return a modifiable edit list comparing the two sequences. If empty, the 107 * sequences are identical according to {@code cmp}'s rules. The 108 * result list is never null. 109 */ 110 public <S extends Sequence> EditList diff( 111 SequenceComparator<? super S> cmp, S a, S b) { 112 Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b)); 113 114 switch (region.getType()) { 115 case INSERT: 116 case DELETE: 117 return EditList.singleton(region); 118 119 case REPLACE: { 120 if (region.getLengthA() == 1 && region.getLengthB() == 1) 121 return EditList.singleton(region); 122 123 SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp); 124 Subsequence<S> as = Subsequence.a(a, region); 125 Subsequence<S> bs = Subsequence.b(b, region); 126 EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs); 127 return normalize(cmp, e, a, b); 128 } 129 130 case EMPTY: 131 return new EditList(0); 132 133 default: 134 throw new IllegalStateException(); 135 } 136 } 137 138 private static <S extends Sequence> Edit coverEdit(S a, S b) { 139 return new Edit(0, a.size(), 0, b.size()); 140 } 141 142 /** 143 * Reorganize an {@link EditList} for better diff consistency. 144 * <p> 145 * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or 146 * {@link Edit.Type#DELETE} edits that can be "shifted". For 147 * example, the deleted section 148 * <pre> 149 * -a 150 * -b 151 * -c 152 * a 153 * b 154 * c 155 * </pre> 156 * can be shifted down by 1, 2 or 3 locations. 157 * <p> 158 * To avoid later merge issues, we shift such edits to a 159 * consistent location. {@code normalize} uses a simple strategy of 160 * shifting such edits to their latest possible location. 161 * <p> 162 * This strategy may not always produce an aesthetically pleasing 163 * diff. For instance, it works well with 164 * <pre> 165 * function1 { 166 * ... 167 * } 168 * 169 * +function2 { 170 * + ... 171 * +} 172 * + 173 * function3 { 174 * ... 175 * } 176 * </pre> 177 * but less so for 178 * <pre> 179 * # 180 * # comment1 181 * # 182 * function1() { 183 * } 184 * 185 * # 186 * +# comment3 187 * +# 188 * +function3() { 189 * +} 190 * + 191 * +# 192 * # comment2 193 * # 194 * function2() { 195 * } 196 * </pre> 197 * <a href="https://github.com/mhagger/diff-slider-tools">More 198 * sophisticated strategies</a> are possible, say by calculating a 199 * suitable "aesthetic cost" for each possible position and using 200 * the lowest cost, but {@code normalize} just shifts edits 201 * to the end as much as possible. 202 * 203 * @param <S> 204 * type of sequence being compared. 205 * @param cmp 206 * the comparator supplying the element equivalence function. 207 * @param e 208 * a modifiable edit list comparing the provided sequences. 209 * @param a 210 * the first (also known as old or pre-image) sequence. 211 * @param b 212 * the second (also known as new or post-image) sequence. 213 * @return a modifiable edit list with edit regions shifted to their 214 * latest possible location. The result list is never null. 215 * @since 4.7 216 */ 217 private static <S extends Sequence> EditList normalize( 218 SequenceComparator<? super S> cmp, EditList e, S a, S b) { 219 Edit prev = null; 220 for (int i = e.size() - 1; i >= 0; i--) { 221 Edit cur = e.get(i); 222 Edit.Type curType = cur.getType(); 223 224 int maxA = (prev == null) ? a.size() : prev.beginA; 225 int maxB = (prev == null) ? b.size() : prev.beginB; 226 227 if (curType == Edit.Type.INSERT) { 228 while (cur.endA < maxA && cur.endB < maxB 229 && cmp.equals(b, cur.beginB, b, cur.endB)) { 230 cur.shift(1); 231 } 232 } else if (curType == Edit.Type.DELETE) { 233 while (cur.endA < maxA && cur.endB < maxB 234 && cmp.equals(a, cur.beginA, a, cur.endA)) { 235 cur.shift(1); 236 } 237 } 238 prev = cur; 239 } 240 return e; 241 } 242 243 /** 244 * Compare two sequences and identify a list of edits between them. 245 * 246 * This method should be invoked only after the two sequences have been 247 * proven to have no common starting or ending elements. The expected 248 * elimination of common starting and ending elements is automatically 249 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)} 250 * method, which invokes this method using 251 * {@link org.eclipse.jgit.diff.Subsequence}s. 252 * 253 * @param cmp 254 * the comparator supplying the element equivalence function. 255 * @param a 256 * the first (also known as old or pre-image) sequence. Edits 257 * returned by this algorithm will reference indexes using the 258 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, 259 * {@link org.eclipse.jgit.diff.Edit#getEndA()}. 260 * @param b 261 * the second (also known as new or post-image) sequence. Edits 262 * returned by this algorithm will reference indexes using the 263 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, 264 * {@link org.eclipse.jgit.diff.Edit#getEndB()}. 265 * @return a modifiable edit list comparing the two sequences. 266 */ 267 public abstract <S extends Sequence> EditList diffNonCommon( 268 SequenceComparator<? super S> cmp, S a, S b); 269 }