View Javadoc
1   /*
2    * Copyright (C) 2010, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.diff;
45  
46  /**
47   * Compares two {@link Sequence}s to create an {@link EditList} of changes.
48   * <p>
49   * An algorithm's {@code diff} method must be callable from concurrent threads
50   * without data collisions. This permits some algorithms to use a singleton
51   * pattern, with concurrent invocations using the same singleton. Other
52   * algorithms may support parameterization, in which case the caller can create
53   * a unique instance per thread.
54   */
55  public abstract class DiffAlgorithm {
56  	/**
57  	 * Supported diff algorithm
58  	 */
59  	public enum SupportedAlgorithm {
60  		/**
61  		 * Myers diff algorithm
62  		 */
63  		MYERS,
64  
65  		/**
66  		 * Histogram diff algorithm
67  		 */
68  		HISTOGRAM
69  	}
70  
71  	/**
72  	 * @param alg
73  	 *            the diff algorithm for which an implementation should be
74  	 *            returned
75  	 * @return an implementation of the specified diff algorithm
76  	 */
77  	public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
78  		switch (alg) {
79  		case MYERS:
80  			return MyersDiff.INSTANCE;
81  		case HISTOGRAM:
82  			return new HistogramDiff();
83  		default:
84  			throw new IllegalArgumentException();
85  		}
86  	}
87  
88  	/**
89  	 * Compare two sequences and identify a list of edits between them.
90  	 *
91  	 * @param <S>
92  	 *            type of sequence being compared.
93  	 * @param cmp
94  	 *            the comparator supplying the element equivalence function.
95  	 * @param a
96  	 *            the first (also known as old or pre-image) sequence. Edits
97  	 *            returned by this algorithm will reference indexes using the
98  	 *            'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
99  	 * @param b
100 	 *            the second (also known as new or post-image) sequence. Edits
101 	 *            returned by this algorithm will reference indexes using the
102 	 *            'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
103 	 * @return a modifiable edit list comparing the two sequences. If empty, the
104 	 *         sequences are identical according to {@code cmp}'s rules. The
105 	 *         result list is never null.
106 	 */
107 	public <S extends Sequence> EditList diff(
108 			SequenceComparator<? super S> cmp, S a, S b) {
109 		Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
110 
111 		switch (region.getType()) {
112 		case INSERT:
113 		case DELETE:
114 			return EditList.singleton(region);
115 
116 		case REPLACE: {
117 			if (region.getLengthA() == 1 && region.getLengthB() == 1)
118 				return EditList.singleton(region);
119 
120 			SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
121 			Subsequence<S> as = Subsequence.a(a, region);
122 			Subsequence<S> bs = Subsequence.b(b, region);
123 			EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
124 
125 			// The last insertion may need to be shifted later if it
126 			// inserts elements that were previously reduced out as
127 			// common at the end.
128 			//
129 			Edit last = e.get(e.size() - 1);
130 			if (last.getType() == Edit.Type.INSERT) {
131 				while (last.endB < b.size()
132 						&& cmp.equals(b, last.beginB, b, last.endB)) {
133 					last.beginA++;
134 					last.endA++;
135 					last.beginB++;
136 					last.endB++;
137 				}
138 			}
139 
140 			return e;
141 		}
142 
143 		case EMPTY:
144 			return new EditList(0);
145 
146 		default:
147 			throw new IllegalStateException();
148 		}
149 	}
150 
151 	private static <S extends Sequence> Edit coverEdit(S a, S b) {
152 		return new Edit(0, a.size(), 0, b.size());
153 	}
154 
155 	/**
156 	 * Compare two sequences and identify a list of edits between them.
157 	 *
158 	 * This method should be invoked only after the two sequences have been
159 	 * proven to have no common starting or ending elements. The expected
160 	 * elimination of common starting and ending elements is automatically
161 	 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
162 	 * method, which invokes this method using {@link Subsequence}s.
163 	 *
164 	 * @param <S>
165 	 *            type of sequence being compared.
166 	 * @param cmp
167 	 *            the comparator supplying the element equivalence function.
168 	 * @param a
169 	 *            the first (also known as old or pre-image) sequence. Edits
170 	 *            returned by this algorithm will reference indexes using the
171 	 *            'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
172 	 * @param b
173 	 *            the second (also known as new or post-image) sequence. Edits
174 	 *            returned by this algorithm will reference indexes using the
175 	 *            'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
176 	 * @return a modifiable edit list comparing the two sequences.
177 	 */
178 	public abstract <S extends Sequence> EditList diffNonCommon(
179 			SequenceComparator<? super S> cmp, S a, S b);
180 }