1 /* 2 * Copyright (C) 2010, Google Inc. and others 3 * 4 * This program and the accompanying materials are made available under the 5 * terms of the Eclipse Distribution License v. 1.0 which is available at 6 * https://www.eclipse.org/org/documents/edl-v10.php. 7 * 8 * SPDX-License-Identifier: BSD-3-Clause 9 */ 10 11 package org.eclipse.jgit.diff; 12 13 /** 14 * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an 15 * {@link org.eclipse.jgit.diff.EditList} of changes. 16 * <p> 17 * An algorithm's {@code diff} method must be callable from concurrent threads 18 * without data collisions. This permits some algorithms to use a singleton 19 * pattern, with concurrent invocations using the same singleton. Other 20 * algorithms may support parameterization, in which case the caller can create 21 * a unique instance per thread. 22 */ 23 public abstract class DiffAlgorithm { 24 /** 25 * Supported diff algorithm 26 */ 27 public enum SupportedAlgorithm { 28 /** 29 * Myers diff algorithm 30 */ 31 MYERS, 32 33 /** 34 * Histogram diff algorithm 35 */ 36 HISTOGRAM 37 } 38 39 /** 40 * Get diff algorithm 41 * 42 * @param alg 43 * the diff algorithm for which an implementation should be 44 * returned 45 * @return an implementation of the specified diff algorithm 46 */ 47 public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) { 48 switch (alg) { 49 case MYERS: 50 return MyersDiff.INSTANCE; 51 case HISTOGRAM: 52 return new HistogramDiff(); 53 default: 54 throw new IllegalArgumentException(); 55 } 56 } 57 58 /** 59 * Compare two sequences and identify a list of edits between them. 60 * 61 * @param cmp 62 * the comparator supplying the element equivalence function. 63 * @param a 64 * the first (also known as old or pre-image) sequence. Edits 65 * returned by this algorithm will reference indexes using the 66 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, 67 * {@link org.eclipse.jgit.diff.Edit#getEndA()}. 68 * @param b 69 * the second (also known as new or post-image) sequence. Edits 70 * returned by this algorithm will reference indexes using the 71 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, 72 * {@link org.eclipse.jgit.diff.Edit#getEndB()}. 73 * @return a modifiable edit list comparing the two sequences. If empty, the 74 * sequences are identical according to {@code cmp}'s rules. The 75 * result list is never null. 76 */ 77 public <S extends Sequence> EditList diff( 78 SequenceComparator<? super S> cmp, S a, S b) { 79 Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b)); 80 81 switch (region.getType()) { 82 case INSERT: 83 case DELETE: 84 return EditList.singleton(region); 85 86 case REPLACE: { 87 if (region.getLengthA() == 1 && region.getLengthB() == 1) 88 return EditList.singleton(region); 89 90 SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp); 91 Subsequence<S> as = Subsequence.a(a, region); 92 Subsequence<S> bs = Subsequence.b(b, region); 93 EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs); 94 return normalize(cmp, e, a, b); 95 } 96 97 case EMPTY: 98 return new EditList(0); 99 100 default: 101 throw new IllegalStateException(); 102 } 103 } 104 105 private static <S extends Sequence> Edit coverEdit(S a, S b) { 106 return new Edit(0, a.size(), 0, b.size()); 107 } 108 109 /** 110 * Reorganize an {@link EditList} for better diff consistency. 111 * <p> 112 * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or 113 * {@link Edit.Type#DELETE} edits that can be "shifted". For 114 * example, the deleted section 115 * <pre> 116 * -a 117 * -b 118 * -c 119 * a 120 * b 121 * c 122 * </pre> 123 * can be shifted down by 1, 2 or 3 locations. 124 * <p> 125 * To avoid later merge issues, we shift such edits to a 126 * consistent location. {@code normalize} uses a simple strategy of 127 * shifting such edits to their latest possible location. 128 * <p> 129 * This strategy may not always produce an aesthetically pleasing 130 * diff. For instance, it works well with 131 * <pre> 132 * function1 { 133 * ... 134 * } 135 * 136 * +function2 { 137 * + ... 138 * +} 139 * + 140 * function3 { 141 * ... 142 * } 143 * </pre> 144 * but less so for 145 * <pre> 146 * # 147 * # comment1 148 * # 149 * function1() { 150 * } 151 * 152 * # 153 * +# comment3 154 * +# 155 * +function3() { 156 * +} 157 * + 158 * +# 159 * # comment2 160 * # 161 * function2() { 162 * } 163 * </pre> 164 * <a href="https://github.com/mhagger/diff-slider-tools">More 165 * sophisticated strategies</a> are possible, say by calculating a 166 * suitable "aesthetic cost" for each possible position and using 167 * the lowest cost, but {@code normalize} just shifts edits 168 * to the end as much as possible. 169 * 170 * @param <S> 171 * type of sequence being compared. 172 * @param cmp 173 * the comparator supplying the element equivalence function. 174 * @param e 175 * a modifiable edit list comparing the provided sequences. 176 * @param a 177 * the first (also known as old or pre-image) sequence. 178 * @param b 179 * the second (also known as new or post-image) sequence. 180 * @return a modifiable edit list with edit regions shifted to their 181 * latest possible location. The result list is never null. 182 * @since 4.7 183 */ 184 private static <S extends Sequence> EditList normalize( 185 SequenceComparator<? super S> cmp, EditList e, S a, S b) { 186 Edit prev = null; 187 for (int i = e.size() - 1; i >= 0; i--) { 188 Edit cur = e.get(i); 189 Edit.Type curType = cur.getType(); 190 191 int maxA = (prev == null) ? a.size() : prev.beginA; 192 int maxB = (prev == null) ? b.size() : prev.beginB; 193 194 if (curType == Edit.Type.INSERT) { 195 while (cur.endA < maxA && cur.endB < maxB 196 && cmp.equals(b, cur.beginB, b, cur.endB)) { 197 cur.shift(1); 198 } 199 } else if (curType == Edit.Type.DELETE) { 200 while (cur.endA < maxA && cur.endB < maxB 201 && cmp.equals(a, cur.beginA, a, cur.endA)) { 202 cur.shift(1); 203 } 204 } 205 prev = cur; 206 } 207 return e; 208 } 209 210 /** 211 * Compare two sequences and identify a list of edits between them. 212 * 213 * This method should be invoked only after the two sequences have been 214 * proven to have no common starting or ending elements. The expected 215 * elimination of common starting and ending elements is automatically 216 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)} 217 * method, which invokes this method using 218 * {@link org.eclipse.jgit.diff.Subsequence}s. 219 * 220 * @param cmp 221 * the comparator supplying the element equivalence function. 222 * @param a 223 * the first (also known as old or pre-image) sequence. Edits 224 * returned by this algorithm will reference indexes using the 225 * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()}, 226 * {@link org.eclipse.jgit.diff.Edit#getEndA()}. 227 * @param b 228 * the second (also known as new or post-image) sequence. Edits 229 * returned by this algorithm will reference indexes using the 230 * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()}, 231 * {@link org.eclipse.jgit.diff.Edit#getEndB()}. 232 * @return a modifiable edit list comparing the two sequences. 233 */ 234 public abstract <S extends Sequence> EditList diffNonCommon( 235 SequenceComparator<? super S> cmp, S a, S b); 236 }