View Javadoc
1   /*
2    * Copyright (C) 2009, Google Inc.
3    * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.diff;
46  
47  import static org.eclipse.jgit.diff.DiffEntry.ChangeType.ADD;
48  import static org.eclipse.jgit.diff.DiffEntry.ChangeType.COPY;
49  import static org.eclipse.jgit.diff.DiffEntry.ChangeType.DELETE;
50  import static org.eclipse.jgit.diff.DiffEntry.ChangeType.MODIFY;
51  import static org.eclipse.jgit.diff.DiffEntry.ChangeType.RENAME;
52  import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
53  import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
54  import static org.eclipse.jgit.lib.Constants.encode;
55  import static org.eclipse.jgit.lib.Constants.encodeASCII;
56  import static org.eclipse.jgit.lib.FileMode.GITLINK;
57  
58  import java.io.ByteArrayOutputStream;
59  import java.io.IOException;
60  import java.io.OutputStream;
61  import java.util.Collection;
62  import java.util.Collections;
63  import java.util.List;
64  
65  import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm;
66  import org.eclipse.jgit.diff.DiffEntry.ChangeType;
67  import org.eclipse.jgit.dircache.DirCacheIterator;
68  import org.eclipse.jgit.errors.AmbiguousObjectException;
69  import org.eclipse.jgit.errors.BinaryBlobException;
70  import org.eclipse.jgit.errors.CorruptObjectException;
71  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
72  import org.eclipse.jgit.errors.MissingObjectException;
73  import org.eclipse.jgit.internal.JGitText;
74  import org.eclipse.jgit.lib.AbbreviatedObjectId;
75  import org.eclipse.jgit.lib.AnyObjectId;
76  import org.eclipse.jgit.lib.Config;
77  import org.eclipse.jgit.lib.ConfigConstants;
78  import org.eclipse.jgit.lib.Constants;
79  import org.eclipse.jgit.lib.FileMode;
80  import org.eclipse.jgit.lib.ObjectId;
81  import org.eclipse.jgit.lib.ObjectLoader;
82  import org.eclipse.jgit.lib.ObjectReader;
83  import org.eclipse.jgit.lib.ProgressMonitor;
84  import org.eclipse.jgit.lib.Repository;
85  import org.eclipse.jgit.patch.FileHeader;
86  import org.eclipse.jgit.patch.FileHeader.PatchType;
87  import org.eclipse.jgit.revwalk.FollowFilter;
88  import org.eclipse.jgit.revwalk.RevTree;
89  import org.eclipse.jgit.revwalk.RevWalk;
90  import org.eclipse.jgit.storage.pack.PackConfig;
91  import org.eclipse.jgit.treewalk.AbstractTreeIterator;
92  import org.eclipse.jgit.treewalk.CanonicalTreeParser;
93  import org.eclipse.jgit.treewalk.EmptyTreeIterator;
94  import org.eclipse.jgit.treewalk.TreeWalk;
95  import org.eclipse.jgit.treewalk.WorkingTreeIterator;
96  import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
97  import org.eclipse.jgit.treewalk.filter.IndexDiffFilter;
98  import org.eclipse.jgit.treewalk.filter.NotIgnoredFilter;
99  import org.eclipse.jgit.treewalk.filter.PathFilter;
100 import org.eclipse.jgit.treewalk.filter.TreeFilter;
101 import org.eclipse.jgit.util.LfsFactory;
102 import org.eclipse.jgit.util.QuotedString;
103 
104 /**
105  * Format a Git style patch script.
106  */
107 public class DiffFormatter implements AutoCloseable {
108 	private static final int DEFAULT_BINARY_FILE_THRESHOLD = PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
109 
110 	private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n"); //$NON-NLS-1$
111 
112 	/** Magic return content indicating it is empty or no content present. */
113 	private static final byte[] EMPTY = new byte[] {};
114 
115 	private final OutputStream out;
116 
117 	private ObjectReader reader;
118 
119 	private boolean closeReader;
120 
121 	private DiffConfig diffCfg;
122 
123 	private int context = 3;
124 
125 	private int abbreviationLength = 7;
126 
127 	private DiffAlgorithm diffAlgorithm;
128 
129 	private RawTextComparator comparator = RawTextComparator.DEFAULT;
130 
131 	private int binaryFileThreshold = DEFAULT_BINARY_FILE_THRESHOLD;
132 
133 	private String oldPrefix = "a/"; //$NON-NLS-1$
134 
135 	private String newPrefix = "b/"; //$NON-NLS-1$
136 
137 	private TreeFilter pathFilter = TreeFilter.ALL;
138 
139 	private RenameDetector renameDetector;
140 
141 	private ProgressMonitor progressMonitor;
142 
143 	private ContentSource.Pair source;
144 
145 	private Repository repository;
146 
147 	/**
148 	 * Create a new formatter with a default level of context.
149 	 *
150 	 * @param out
151 	 *            the stream the formatter will write line data to. This stream
152 	 *            should have buffering arranged by the caller, as many small
153 	 *            writes are performed to it.
154 	 */
155 	public DiffFormatter(OutputStream out) {
156 		this.out = out;
157 	}
158 
159 	/**
160 	 * Get output stream
161 	 *
162 	 * @return the stream we are outputting data to
163 	 */
164 	protected OutputStream getOutputStream() {
165 		return out;
166 	}
167 
168 	/**
169 	 * Set the repository the formatter can load object contents from.
170 	 *
171 	 * Once a repository has been set, the formatter must be released to ensure
172 	 * the internal ObjectReader is able to release its resources.
173 	 *
174 	 * @param repository
175 	 *            source repository holding referenced objects.
176 	 */
177 	public void setRepository(Repository repository) {
178 		this.repository = repository;
179 		setReader(repository.newObjectReader(), repository.getConfig(), true);
180 	}
181 
182 	/**
183 	 * Set the repository the formatter can load object contents from.
184 	 *
185 	 * @param reader
186 	 *            source reader holding referenced objects. Caller is responsible
187 	 *            for closing the reader.
188 	 * @param cfg
189 	 *            config specifying diff algorithm and rename detection options.
190 	 * @since 4.5
191 	 */
192 	public void setReader(ObjectReader reader, Config cfg) {
193 		setReader(reader, cfg, false);
194 	}
195 
196 	private void setReader(ObjectReader reader, Config cfg, boolean closeReader) {
197 		close();
198 		this.closeReader = closeReader;
199 		this.reader = reader;
200 		this.diffCfg = cfg.get(DiffConfig.KEY);
201 
202 		ContentSource cs = ContentSource.create(reader);
203 		source = new ContentSource.Pair(cs, cs);
204 
205 		if (diffCfg.isNoPrefix()) {
206 			setOldPrefix(""); //$NON-NLS-1$
207 			setNewPrefix(""); //$NON-NLS-1$
208 		}
209 		setDetectRenames(diffCfg.isRenameDetectionEnabled());
210 
211 		diffAlgorithm = DiffAlgorithm.getAlgorithm(cfg.getEnum(
212 				ConfigConstants.CONFIG_DIFF_SECTION, null,
213 				ConfigConstants.CONFIG_KEY_ALGORITHM,
214 				SupportedAlgorithm.HISTOGRAM));
215 	}
216 
217 	/**
218 	 * Change the number of lines of context to display.
219 	 *
220 	 * @param lineCount
221 	 *            number of lines of context to see before the first
222 	 *            modification and after the last modification within a hunk of
223 	 *            the modified file.
224 	 */
225 	public void setContext(int lineCount) {
226 		if (lineCount < 0)
227 			throw new IllegalArgumentException(
228 					JGitText.get().contextMustBeNonNegative);
229 		context = lineCount;
230 	}
231 
232 	/**
233 	 * Change the number of digits to show in an ObjectId.
234 	 *
235 	 * @param count
236 	 *            number of digits to show in an ObjectId.
237 	 */
238 	public void setAbbreviationLength(int count) {
239 		if (count < 0)
240 			throw new IllegalArgumentException(
241 					JGitText.get().abbreviationLengthMustBeNonNegative);
242 		abbreviationLength = count;
243 	}
244 
245 	/**
246 	 * Set the algorithm that constructs difference output.
247 	 *
248 	 * @param alg
249 	 *            the algorithm to produce text file differences.
250 	 * @see HistogramDiff
251 	 */
252 	public void setDiffAlgorithm(DiffAlgorithm alg) {
253 		diffAlgorithm = alg;
254 	}
255 
256 	/**
257 	 * Set the line equivalence function for text file differences.
258 	 *
259 	 * @param cmp
260 	 *            The equivalence function used to determine if two lines of
261 	 *            text are identical. The function can be changed to ignore
262 	 *            various types of whitespace.
263 	 * @see RawTextComparator#DEFAULT
264 	 * @see RawTextComparator#WS_IGNORE_ALL
265 	 * @see RawTextComparator#WS_IGNORE_CHANGE
266 	 * @see RawTextComparator#WS_IGNORE_LEADING
267 	 * @see RawTextComparator#WS_IGNORE_TRAILING
268 	 */
269 	public void setDiffComparator(RawTextComparator cmp) {
270 		comparator = cmp;
271 	}
272 
273 	/**
274 	 * Set maximum file size for text files.
275 	 *
276 	 * Files larger than this size will be treated as though they are binary and
277 	 * not text. Default is {@value #DEFAULT_BINARY_FILE_THRESHOLD} .
278 	 *
279 	 * @param threshold
280 	 *            the limit, in bytes. Files larger than this size will be
281 	 *            assumed to be binary, even if they aren't.
282 	 */
283 	public void setBinaryFileThreshold(int threshold) {
284 		this.binaryFileThreshold = threshold;
285 	}
286 
287 	/**
288 	 * Set the prefix applied in front of old file paths.
289 	 *
290 	 * @param prefix
291 	 *            the prefix in front of old paths. Typically this is the
292 	 *            standard string {@code "a/"}, but may be any prefix desired by
293 	 *            the caller. Must not be null. Use the empty string to have no
294 	 *            prefix at all.
295 	 */
296 	public void setOldPrefix(String prefix) {
297 		oldPrefix = prefix;
298 	}
299 
300 	/**
301 	 * Get the prefix applied in front of old file paths.
302 	 *
303 	 * @return the prefix
304 	 * @since 2.0
305 	 */
306 	public String getOldPrefix() {
307 		return this.oldPrefix;
308 	}
309 
310 	/**
311 	 * Set the prefix applied in front of new file paths.
312 	 *
313 	 * @param prefix
314 	 *            the prefix in front of new paths. Typically this is the
315 	 *            standard string {@code "b/"}, but may be any prefix desired by
316 	 *            the caller. Must not be null. Use the empty string to have no
317 	 *            prefix at all.
318 	 */
319 	public void setNewPrefix(String prefix) {
320 		newPrefix = prefix;
321 	}
322 
323 	/**
324 	 * Get the prefix applied in front of new file paths.
325 	 *
326 	 * @return the prefix
327 	 * @since 2.0
328 	 */
329 	public String getNewPrefix() {
330 		return this.newPrefix;
331 	}
332 
333 	/**
334 	 * Get if rename detection is enabled
335 	 *
336 	 * @return true if rename detection is enabled
337 	 */
338 	public boolean isDetectRenames() {
339 		return renameDetector != null;
340 	}
341 
342 	/**
343 	 * Enable or disable rename detection.
344 	 *
345 	 * Before enabling rename detection the repository must be set with
346 	 * {@link #setRepository(Repository)}. Once enabled the detector can be
347 	 * configured away from its defaults by obtaining the instance directly from
348 	 * {@link #getRenameDetector()} and invoking configuration.
349 	 *
350 	 * @param on
351 	 *            if rename detection should be enabled.
352 	 */
353 	public void setDetectRenames(boolean on) {
354 		if (on && renameDetector == null) {
355 			assertHaveReader();
356 			renameDetector = new RenameDetector(reader, diffCfg);
357 		} else if (!on)
358 			renameDetector = null;
359 	}
360 
361 	/**
362 	 * Get rename detector
363 	 *
364 	 * @return the rename detector if rename detection is enabled
365 	 */
366 	public RenameDetector getRenameDetector() {
367 		return renameDetector;
368 	}
369 
370 	/**
371 	 * Set the progress monitor for long running rename detection.
372 	 *
373 	 * @param pm
374 	 *            progress monitor to receive rename detection status through.
375 	 */
376 	public void setProgressMonitor(ProgressMonitor pm) {
377 		progressMonitor = pm;
378 	}
379 
380 	/**
381 	 * Set the filter to produce only specific paths.
382 	 *
383 	 * If the filter is an instance of
384 	 * {@link org.eclipse.jgit.revwalk.FollowFilter}, the filter path will be
385 	 * updated during successive scan or format invocations. The updated path
386 	 * can be obtained from {@link #getPathFilter()}.
387 	 *
388 	 * @param filter
389 	 *            the tree filter to apply.
390 	 */
391 	public void setPathFilter(TreeFilter filter) {
392 		pathFilter = filter != null ? filter : TreeFilter.ALL;
393 	}
394 
395 	/**
396 	 * Get path filter
397 	 *
398 	 * @return the current path filter
399 	 */
400 	public TreeFilter getPathFilter() {
401 		return pathFilter;
402 	}
403 
404 	/**
405 	 * Flush the underlying output stream of this formatter.
406 	 *
407 	 * @throws java.io.IOException
408 	 *             the stream's own flush method threw an exception.
409 	 */
410 	public void flush() throws IOException {
411 		out.flush();
412 	}
413 
414 	/**
415 	 * {@inheritDoc}
416 	 * <p>
417 	 * Release the internal ObjectReader state.
418 	 *
419 	 * @since 4.0
420 	 */
421 	@Override
422 	public void close() {
423 		if (reader != null && closeReader) {
424 			reader.close();
425 		}
426 	}
427 
428 	/**
429 	 * Determine the differences between two trees.
430 	 *
431 	 * No output is created, instead only the file paths that are different are
432 	 * returned. Callers may choose to format these paths themselves, or convert
433 	 * them into {@link org.eclipse.jgit.patch.FileHeader} instances with a
434 	 * complete edit list by calling {@link #toFileHeader(DiffEntry)}.
435 	 * <p>
436 	 * Either side may be null to indicate that the tree has beed added or
437 	 * removed. The diff will be computed against nothing.
438 	 *
439 	 * @param a
440 	 *            the old (or previous) side or null
441 	 * @param b
442 	 *            the new (or updated) side or null
443 	 * @return the paths that are different.
444 	 * @throws java.io.IOException
445 	 *             trees cannot be read or file contents cannot be read.
446 	 */
447 	public List<DiffEntry> scan(AnyObjectId a, AnyObjectId b)
448 			throws IOException {
449 		assertHaveReader();
450 
451 		try (RevWalk rw = new RevWalk(reader)) {
452 			RevTree aTree = a != null ? rw.parseTree(a) : null;
453 			RevTree bTree = b != null ? rw.parseTree(b) : null;
454 			return scan(aTree, bTree);
455 		}
456 	}
457 
458 	/**
459 	 * Determine the differences between two trees.
460 	 *
461 	 * No output is created, instead only the file paths that are different are
462 	 * returned. Callers may choose to format these paths themselves, or convert
463 	 * them into {@link org.eclipse.jgit.patch.FileHeader} instances with a
464 	 * complete edit list by calling {@link #toFileHeader(DiffEntry)}.
465 	 * <p>
466 	 * Either side may be null to indicate that the tree has beed added or
467 	 * removed. The diff will be computed against nothing.
468 	 *
469 	 * @param a
470 	 *            the old (or previous) side or null
471 	 * @param b
472 	 *            the new (or updated) side or null
473 	 * @return the paths that are different.
474 	 * @throws java.io.IOException
475 	 *             trees cannot be read or file contents cannot be read.
476 	 */
477 	public List<DiffEntry> scan(RevTree a, RevTree b) throws IOException {
478 		assertHaveReader();
479 
480 		AbstractTreeIterator aIterator = makeIteratorFromTreeOrNull(a);
481 		AbstractTreeIterator bIterator = makeIteratorFromTreeOrNull(b);
482 		return scan(aIterator, bIterator);
483 	}
484 
485 	private AbstractTreeIterator makeIteratorFromTreeOrNull(RevTree tree)
486 			throws IncorrectObjectTypeException, IOException {
487 		if (tree != null) {
488 			CanonicalTreeParser parser = new CanonicalTreeParser();
489 			parser.reset(reader, tree);
490 			return parser;
491 		} else
492 			return new EmptyTreeIterator();
493 	}
494 
495 	/**
496 	 * Determine the differences between two trees.
497 	 *
498 	 * No output is created, instead only the file paths that are different are
499 	 * returned. Callers may choose to format these paths themselves, or convert
500 	 * them into {@link org.eclipse.jgit.patch.FileHeader} instances with a
501 	 * complete edit list by calling {@link #toFileHeader(DiffEntry)}.
502 	 *
503 	 * @param a
504 	 *            the old (or previous) side.
505 	 * @param b
506 	 *            the new (or updated) side.
507 	 * @return the paths that are different.
508 	 * @throws java.io.IOException
509 	 *             trees cannot be read or file contents cannot be read.
510 	 */
511 	public List<DiffEntry> scan(AbstractTreeIterator a, AbstractTreeIterator b)
512 			throws IOException {
513 		assertHaveReader();
514 
515 		TreeWalk walk = new TreeWalk(reader);
516 		walk.addTree(a);
517 		walk.addTree(b);
518 		walk.setRecursive(true);
519 
520 		TreeFilter filter = getDiffTreeFilterFor(a, b);
521 		if (pathFilter instanceof FollowFilter) {
522 			walk.setFilter(AndTreeFilter.create(
523 					PathFilter.create(((FollowFilter) pathFilter).getPath()),
524 					filter));
525 		} else {
526 			walk.setFilter(AndTreeFilter.create(pathFilter, filter));
527 		}
528 
529 		source = new ContentSource.Pair(source(a), source(b));
530 
531 		List<DiffEntry> files = DiffEntry.scan(walk);
532 		if (pathFilter instanceof FollowFilter && isAdd(files)) {
533 			// The file we are following was added here, find where it
534 			// came from so we can properly show the rename or copy,
535 			// then continue digging backwards.
536 			//
537 			a.reset();
538 			b.reset();
539 			walk.reset();
540 			walk.addTree(a);
541 			walk.addTree(b);
542 			walk.setFilter(filter);
543 
544 			if (renameDetector == null)
545 				setDetectRenames(true);
546 			files = updateFollowFilter(detectRenames(DiffEntry.scan(walk)));
547 
548 		} else if (renameDetector != null)
549 			files = detectRenames(files);
550 
551 		return files;
552 	}
553 
554 	private static TreeFilter getDiffTreeFilterFor(AbstractTreeIterator a,
555 			AbstractTreeIterator b) {
556 		if (a instanceof DirCacheIterator && b instanceof WorkingTreeIterator)
557 			return new IndexDiffFilter(0, 1);
558 
559 		if (a instanceof WorkingTreeIterator && b instanceof DirCacheIterator)
560 			return new IndexDiffFilter(1, 0);
561 
562 		TreeFilter filter = TreeFilter.ANY_DIFF;
563 		if (a instanceof WorkingTreeIterator)
564 			filter = AndTreeFilter.create(new NotIgnoredFilter(0), filter);
565 		if (b instanceof WorkingTreeIterator)
566 			filter = AndTreeFilter.create(new NotIgnoredFilter(1), filter);
567 		return filter;
568 	}
569 
570 	private ContentSource source(AbstractTreeIterator iterator) {
571 		if (iterator instanceof WorkingTreeIterator)
572 			return ContentSource.create((WorkingTreeIterator) iterator);
573 		return ContentSource.create(reader);
574 	}
575 
576 	private List<DiffEntry> detectRenames(List<DiffEntry> files)
577 			throws IOException {
578 		renameDetector.reset();
579 		renameDetector.addAll(files);
580 		return renameDetector.compute(reader, progressMonitor);
581 	}
582 
583 	private boolean isAdd(List<DiffEntry> files) {
584 		String oldPath = ((FollowFilter) pathFilter).getPath();
585 		for (DiffEntry ent : files) {
586 			if (ent.getChangeType() == ADD && ent.getNewPath().equals(oldPath))
587 				return true;
588 		}
589 		return false;
590 	}
591 
592 	private List<DiffEntry> updateFollowFilter(List<DiffEntry> files) {
593 		String oldPath = ((FollowFilter) pathFilter).getPath();
594 		for (DiffEntry ent : files) {
595 			if (isRename(ent) && ent.getNewPath().equals(oldPath)) {
596 				pathFilter = FollowFilter.create(ent.getOldPath(), diffCfg);
597 				return Collections.singletonList(ent);
598 			}
599 		}
600 		return Collections.emptyList();
601 	}
602 
603 	private static boolean isRename(DiffEntry ent) {
604 		return ent.getChangeType() == RENAME || ent.getChangeType() == COPY;
605 	}
606 
607 	/**
608 	 * Format the differences between two trees.
609 	 *
610 	 * The patch is expressed as instructions to modify {@code a} to make it
611 	 * {@code b}.
612 	 * <p>
613 	 * Either side may be null to indicate that the tree has beed added or
614 	 * removed. The diff will be computed against nothing.
615 	 *
616 	 * @param a
617 	 *            the old (or previous) side or null
618 	 * @param b
619 	 *            the new (or updated) side or null
620 	 * @throws java.io.IOException
621 	 *             trees cannot be read, file contents cannot be read, or the
622 	 *             patch cannot be output.
623 	 */
624 	public void format(AnyObjectId a, AnyObjectId b) throws IOException {
625 		format(scan(a, b));
626 	}
627 
628 	/**
629 	 * Format the differences between two trees.
630 	 *
631 	 * The patch is expressed as instructions to modify {@code a} to make it
632 	 * {@code b}.
633 	 *
634 	 * <p>
635 	 * Either side may be null to indicate that the tree has beed added or
636 	 * removed. The diff will be computed against nothing.
637 	 *
638 	 * @param a
639 	 *            the old (or previous) side or null
640 	 * @param b
641 	 *            the new (or updated) side or null
642 	 * @throws java.io.IOException
643 	 *             trees cannot be read, file contents cannot be read, or the
644 	 *             patch cannot be output.
645 	 */
646 	public void format(RevTree a, RevTree b) throws IOException {
647 		format(scan(a, b));
648 	}
649 
650 	/**
651 	 * Format the differences between two trees.
652 	 *
653 	 * The patch is expressed as instructions to modify {@code a} to make it
654 	 * {@code b}.
655 	 * <p>
656 	 * Either side may be null to indicate that the tree has beed added or
657 	 * removed. The diff will be computed against nothing.
658 	 *
659 	 * @param a
660 	 *            the old (or previous) side or null
661 	 * @param b
662 	 *            the new (or updated) side or null
663 	 * @throws java.io.IOException
664 	 *             trees cannot be read, file contents cannot be read, or the
665 	 *             patch cannot be output.
666 	 */
667 	public void format(AbstractTreeIterator a, AbstractTreeIterator b)
668 			throws IOException {
669 		format(scan(a, b));
670 	}
671 
672 	/**
673 	 * Format a patch script from a list of difference entries. Requires
674 	 * {@link #scan(AbstractTreeIterator, AbstractTreeIterator)} to have been
675 	 * called first.
676 	 *
677 	 * @param entries
678 	 *            entries describing the affected files.
679 	 * @throws java.io.IOException
680 	 *             a file's content cannot be read, or the output stream cannot
681 	 *             be written to.
682 	 */
683 	public void format(List<? extends DiffEntry> entries) throws IOException {
684 		for (DiffEntry ent : entries)
685 			format(ent);
686 	}
687 
688 	/**
689 	 * Format a patch script for one file entry.
690 	 *
691 	 * @param ent
692 	 *            the entry to be formatted.
693 	 * @throws java.io.IOException
694 	 *             a file's content cannot be read, or the output stream cannot
695 	 *             be written to.
696 	 */
697 	public void format(DiffEntry ent) throws IOException {
698 		FormatResult res = createFormatResult(ent);
699 		format(res.header, res.a, res.b);
700 	}
701 
702 	private static byte[] writeGitLinkText(AbbreviatedObjectId id) {
703 		if (ObjectId.zeroId().equals(id.toObjectId())) {
704 			return EMPTY;
705 		}
706 		return encodeASCII("Subproject commit " + id.name() //$NON-NLS-1$
707 				+ "\n"); //$NON-NLS-1$
708 	}
709 
710 	private String format(AbbreviatedObjectId id) {
711 		if (id.isComplete() && reader != null) {
712 			try {
713 				id = reader.abbreviate(id.toObjectId(), abbreviationLength);
714 			} catch (IOException cannotAbbreviate) {
715 				// Ignore this. We'll report the full identity.
716 			}
717 		}
718 		return id.name();
719 	}
720 
721 	private static String quotePath(String name) {
722 		return QuotedString.GIT_PATH.quote(name);
723 	}
724 
725 	/**
726 	 * Format a patch script, reusing a previously parsed FileHeader.
727 	 * <p>
728 	 * This formatter is primarily useful for editing an existing patch script
729 	 * to increase or reduce the number of lines of context within the script.
730 	 * All header lines are reused as-is from the supplied FileHeader.
731 	 *
732 	 * @param head
733 	 *            existing file header containing the header lines to copy.
734 	 * @param a
735 	 *            text source for the pre-image version of the content. This
736 	 *            must match the content of
737 	 *            {@link org.eclipse.jgit.patch.FileHeader#getOldId()}.
738 	 * @param b
739 	 *            text source for the post-image version of the content. This
740 	 *            must match the content of
741 	 *            {@link org.eclipse.jgit.patch.FileHeader#getNewId()}.
742 	 * @throws java.io.IOException
743 	 *             writing to the supplied stream failed.
744 	 */
745 	public void format(FileHeader head, RawText a, RawText b)
746 			throws IOException {
747 		// Reuse the existing FileHeader as-is by blindly copying its
748 		// header lines, but avoiding its hunks. Instead we recreate
749 		// the hunks from the text instances we have been supplied.
750 		//
751 		final int start = head.getStartOffset();
752 		int end = head.getEndOffset();
753 		if (!head.getHunks().isEmpty())
754 			end = head.getHunks().get(0).getStartOffset();
755 		out.write(head.getBuffer(), start, end - start);
756 		if (head.getPatchType() == PatchType.UNIFIED)
757 			format(head.toEditList(), a, b);
758 	}
759 
760 	/**
761 	 * Formats a list of edits in unified diff format
762 	 *
763 	 * @param edits
764 	 *            some differences which have been calculated between A and B
765 	 * @param a
766 	 *            the text A which was compared
767 	 * @param b
768 	 *            the text B which was compared
769 	 * @throws java.io.IOException
770 	 */
771 	public void format(EditList edits, RawText a, RawText b)
772 			throws IOException {
773 		for (int curIdx = 0; curIdx < edits.size();) {
774 			Edit curEdit = edits.get(curIdx);
775 			final int endIdx = findCombinedEnd(edits, curIdx);
776 			final Edit endEdit = edits.get(endIdx);
777 
778 			int aCur = (int) Math.max(0, (long) curEdit.getBeginA() - context);
779 			int bCur = (int) Math.max(0, (long) curEdit.getBeginB() - context);
780 			final int aEnd = (int) Math.min(a.size(), (long) endEdit.getEndA() + context);
781 			final int bEnd = (int) Math.min(b.size(), (long) endEdit.getEndB() + context);
782 
783 			writeHunkHeader(aCur, aEnd, bCur, bEnd);
784 
785 			while (aCur < aEnd || bCur < bEnd) {
786 				if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) {
787 					writeContextLine(a, aCur);
788 					if (isEndOfLineMissing(a, aCur))
789 						out.write(noNewLine);
790 					aCur++;
791 					bCur++;
792 				} else if (aCur < curEdit.getEndA()) {
793 					writeRemovedLine(a, aCur);
794 					if (isEndOfLineMissing(a, aCur))
795 						out.write(noNewLine);
796 					aCur++;
797 				} else if (bCur < curEdit.getEndB()) {
798 					writeAddedLine(b, bCur);
799 					if (isEndOfLineMissing(b, bCur))
800 						out.write(noNewLine);
801 					bCur++;
802 				}
803 
804 				if (end(curEdit, aCur, bCur) && ++curIdx < edits.size())
805 					curEdit = edits.get(curIdx);
806 			}
807 		}
808 	}
809 
810 	/**
811 	 * Output a line of context (unmodified line).
812 	 *
813 	 * @param text
814 	 *            RawText for accessing raw data
815 	 * @param line
816 	 *            the line number within text
817 	 * @throws java.io.IOException
818 	 */
819 	protected void writeContextLine(RawText text, int line)
820 			throws IOException {
821 		writeLine(' ', text, line);
822 	}
823 
824 	private static boolean isEndOfLineMissing(RawText text, int line) {
825 		return line + 1 == text.size() && text.isMissingNewlineAtEnd();
826 	}
827 
828 	/**
829 	 * Output an added line.
830 	 *
831 	 * @param text
832 	 *            RawText for accessing raw data
833 	 * @param line
834 	 *            the line number within text
835 	 * @throws java.io.IOException
836 	 */
837 	protected void writeAddedLine(RawText text, int line)
838 			throws IOException {
839 		writeLine('+', text, line);
840 	}
841 
842 	/**
843 	 * Output a removed line
844 	 *
845 	 * @param text
846 	 *            RawText for accessing raw data
847 	 * @param line
848 	 *            the line number within text
849 	 * @throws java.io.IOException
850 	 */
851 	protected void writeRemovedLine(RawText text, int line)
852 			throws IOException {
853 		writeLine('-', text, line);
854 	}
855 
856 	/**
857 	 * Output a hunk header
858 	 *
859 	 * @param aStartLine
860 	 *            within first source
861 	 * @param aEndLine
862 	 *            within first source
863 	 * @param bStartLine
864 	 *            within second source
865 	 * @param bEndLine
866 	 *            within second source
867 	 * @throws java.io.IOException
868 	 */
869 	protected void writeHunkHeader(int aStartLine, int aEndLine,
870 			int bStartLine, int bEndLine) throws IOException {
871 		out.write('@');
872 		out.write('@');
873 		writeRange('-', aStartLine + 1, aEndLine - aStartLine);
874 		writeRange('+', bStartLine + 1, bEndLine - bStartLine);
875 		out.write(' ');
876 		out.write('@');
877 		out.write('@');
878 		out.write('\n');
879 	}
880 
881 	private void writeRange(char prefix, int begin, int cnt)
882 			throws IOException {
883 		out.write(' ');
884 		out.write(prefix);
885 		switch (cnt) {
886 		case 0:
887 			// If the range is empty, its beginning number must be the
888 			// line just before the range, or 0 if the range is at the
889 			// start of the file stream. Here, begin is always 1 based,
890 			// so an empty file would produce "0,0".
891 			//
892 			out.write(encodeASCII(begin - 1));
893 			out.write(',');
894 			out.write('0');
895 			break;
896 
897 		case 1:
898 			// If the range is exactly one line, produce only the number.
899 			//
900 			out.write(encodeASCII(begin));
901 			break;
902 
903 		default:
904 			out.write(encodeASCII(begin));
905 			out.write(',');
906 			out.write(encodeASCII(cnt));
907 			break;
908 		}
909 	}
910 
911 	/**
912 	 * Write a standard patch script line.
913 	 *
914 	 * @param prefix
915 	 *            prefix before the line, typically '-', '+', ' '.
916 	 * @param text
917 	 *            the text object to obtain the line from.
918 	 * @param cur
919 	 *            line number to output.
920 	 * @throws java.io.IOException
921 	 *             the stream threw an exception while writing to it.
922 	 */
923 	protected void writeLine(final char prefix, final RawText text,
924 			final int cur) throws IOException {
925 		out.write(prefix);
926 		text.writeLine(out, cur);
927 		out.write('\n');
928 	}
929 
930 	/**
931 	 * Creates a {@link org.eclipse.jgit.patch.FileHeader} representing the
932 	 * given {@link org.eclipse.jgit.diff.DiffEntry}
933 	 * <p>
934 	 * This method does not use the OutputStream associated with this
935 	 * DiffFormatter instance. It is therefore safe to instantiate this
936 	 * DiffFormatter instance with a
937 	 * {@link org.eclipse.jgit.util.io.DisabledOutputStream} if this method is
938 	 * the only one that will be used.
939 	 *
940 	 * @param ent
941 	 *            the DiffEntry to create the FileHeader for
942 	 * @return a FileHeader representing the DiffEntry. The FileHeader's buffer
943 	 *         will contain only the header of the diff output. It will also
944 	 *         contain one {@link org.eclipse.jgit.patch.HunkHeader}.
945 	 * @throws java.io.IOException
946 	 *             the stream threw an exception while writing to it, or one of
947 	 *             the blobs referenced by the DiffEntry could not be read.
948 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
949 	 *             one of the blobs referenced by the DiffEntry is corrupt.
950 	 * @throws org.eclipse.jgit.errors.MissingObjectException
951 	 *             one of the blobs referenced by the DiffEntry is missing.
952 	 */
953 	public FileHeader toFileHeader(DiffEntry ent) throws IOException,
954 			CorruptObjectException, MissingObjectException {
955 		return createFormatResult(ent).header;
956 	}
957 
958 	private static class FormatResult {
959 		FileHeader header;
960 
961 		RawText a;
962 
963 		RawText b;
964 	}
965 
966 	private FormatResult createFormatResult(DiffEntry ent) throws IOException,
967 			CorruptObjectException, MissingObjectException {
968 		final FormatResult res = new FormatResult();
969 		ByteArrayOutputStream buf = new ByteArrayOutputStream();
970 		final EditList editList;
971 		final FileHeader.PatchType type;
972 
973 		formatHeader(buf, ent);
974 
975 		if (ent.getOldId() == null || ent.getNewId() == null) {
976 			// Content not changed (e.g. only mode, pure rename)
977 			editList = new EditList();
978 			type = PatchType.UNIFIED;
979 			res.header = new FileHeader(buf.toByteArray(), editList, type);
980 			return res;
981 		}
982 
983 		assertHaveReader();
984 
985 		RawText aRaw = null;
986 		RawText bRaw = null;
987 		if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
988 			aRaw = new RawText(writeGitLinkText(ent.getOldId()));
989 			bRaw = new RawText(writeGitLinkText(ent.getNewId()));
990 		} else {
991 			try {
992 				aRaw = open(OLD, ent);
993 				bRaw = open(NEW, ent);
994 			} catch (BinaryBlobException e) {
995 				// Do nothing; we check for null below.
996 				formatOldNewPaths(buf, ent);
997 				buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
998 				editList = new EditList();
999 				type = PatchType.BINARY;
1000 				res.header = new FileHeader(buf.toByteArray(), editList, type);
1001 				return res;
1002 			}
1003 		}
1004 
1005 		res.a = aRaw;
1006 		res.b = bRaw;
1007 		editList = diff(res.a, res.b);
1008 		type = PatchType.UNIFIED;
1009 
1010 		switch (ent.getChangeType()) {
1011 			case RENAME:
1012 			case COPY:
1013 				if (!editList.isEmpty())
1014 					formatOldNewPaths(buf, ent);
1015 				break;
1016 
1017 			default:
1018 				formatOldNewPaths(buf, ent);
1019 				break;
1020 		}
1021 
1022 
1023 		res.header = new FileHeader(buf.toByteArray(), editList, type);
1024 		return res;
1025 	}
1026 
1027 	private EditList diff(RawText a, RawText b) {
1028 		return diffAlgorithm.diff(comparator, a, b);
1029 	}
1030 
1031 	private void assertHaveReader() {
1032 		if (reader == null) {
1033 			throw new IllegalStateException(JGitText.get().readerIsRequired);
1034 		}
1035 	}
1036 
1037 	private RawText open(DiffEntry.Side side, DiffEntry entry)
1038 			throws IOException, BinaryBlobException {
1039 		if (entry.getMode(side) == FileMode.MISSING)
1040 			return RawText.EMPTY_TEXT;
1041 
1042 		if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
1043 			return RawText.EMPTY_TEXT;
1044 
1045 		AbbreviatedObjectId id = entry.getId(side);
1046 		if (!id.isComplete()) {
1047 			Collection<ObjectId> ids = reader.resolve(id);
1048 			if (ids.size() == 1) {
1049 				id = AbbreviatedObjectId.fromObjectId(ids.iterator().next());
1050 				switch (side) {
1051 				case OLD:
1052 					entry.oldId = id;
1053 					break;
1054 				case NEW:
1055 					entry.newId = id;
1056 					break;
1057 				}
1058 			} else if (ids.size() == 0)
1059 				throw new MissingObjectException(id, Constants.OBJ_BLOB);
1060 			else
1061 				throw new AmbiguousObjectException(id, ids);
1062 		}
1063 
1064 		ObjectLoader ldr = LfsFactory.getInstance().applySmudgeFilter(repository,
1065 				source.open(side, entry), entry.getDiffAttribute());
1066 		return RawText.load(ldr, binaryFileThreshold);
1067 	}
1068 
1069 	/**
1070 	 * Output the first header line
1071 	 *
1072 	 * @param o
1073 	 *            The stream the formatter will write the first header line to
1074 	 * @param type
1075 	 *            The {@link org.eclipse.jgit.diff.DiffEntry.ChangeType}
1076 	 * @param oldPath
1077 	 *            old path to the file
1078 	 * @param newPath
1079 	 *            new path to the file
1080 	 * @throws java.io.IOException
1081 	 *             the stream threw an exception while writing to it.
1082 	 */
1083 	protected void formatGitDiffFirstHeaderLine(ByteArrayOutputStream o,
1084 			final ChangeType type, final String oldPath, final String newPath)
1085 			throws IOException {
1086 		o.write(encodeASCII("diff --git ")); //$NON-NLS-1$
1087 		o.write(encode(quotePath(oldPrefix + (type == ADD ? newPath : oldPath))));
1088 		o.write(' ');
1089 		o.write(encode(quotePath(newPrefix
1090 				+ (type == DELETE ? oldPath : newPath))));
1091 		o.write('\n');
1092 	}
1093 
1094 	private void formatHeader(ByteArrayOutputStream o, DiffEntry ent)
1095 			throws IOException {
1096 		final ChangeType type = ent.getChangeType();
1097 		final String oldp = ent.getOldPath();
1098 		final String newp = ent.getNewPath();
1099 		final FileMode oldMode = ent.getOldMode();
1100 		final FileMode newMode = ent.getNewMode();
1101 
1102 		formatGitDiffFirstHeaderLine(o, type, oldp, newp);
1103 
1104 		if ((type == MODIFY || type == COPY || type == RENAME)
1105 				&& !oldMode.equals(newMode)) {
1106 			o.write(encodeASCII("old mode ")); //$NON-NLS-1$
1107 			oldMode.copyTo(o);
1108 			o.write('\n');
1109 
1110 			o.write(encodeASCII("new mode ")); //$NON-NLS-1$
1111 			newMode.copyTo(o);
1112 			o.write('\n');
1113 		}
1114 
1115 		switch (type) {
1116 		case ADD:
1117 			o.write(encodeASCII("new file mode ")); //$NON-NLS-1$
1118 			newMode.copyTo(o);
1119 			o.write('\n');
1120 			break;
1121 
1122 		case DELETE:
1123 			o.write(encodeASCII("deleted file mode ")); //$NON-NLS-1$
1124 			oldMode.copyTo(o);
1125 			o.write('\n');
1126 			break;
1127 
1128 		case RENAME:
1129 			o.write(encodeASCII("similarity index " + ent.getScore() + "%")); //$NON-NLS-1$ //$NON-NLS-2$
1130 			o.write('\n');
1131 
1132 			o.write(encode("rename from " + quotePath(oldp))); //$NON-NLS-1$
1133 			o.write('\n');
1134 
1135 			o.write(encode("rename to " + quotePath(newp))); //$NON-NLS-1$
1136 			o.write('\n');
1137 			break;
1138 
1139 		case COPY:
1140 			o.write(encodeASCII("similarity index " + ent.getScore() + "%")); //$NON-NLS-1$ //$NON-NLS-2$
1141 			o.write('\n');
1142 
1143 			o.write(encode("copy from " + quotePath(oldp))); //$NON-NLS-1$
1144 			o.write('\n');
1145 
1146 			o.write(encode("copy to " + quotePath(newp))); //$NON-NLS-1$
1147 			o.write('\n');
1148 			break;
1149 
1150 		case MODIFY:
1151 			if (0 < ent.getScore()) {
1152 				o.write(encodeASCII("dissimilarity index " //$NON-NLS-1$
1153 						+ (100 - ent.getScore()) + "%")); //$NON-NLS-1$
1154 				o.write('\n');
1155 			}
1156 			break;
1157 		}
1158 
1159 		if (ent.getOldId() != null && !ent.getOldId().equals(ent.getNewId())) {
1160 			formatIndexLine(o, ent);
1161 		}
1162 	}
1163 
1164 	/**
1165 	 * Format index line
1166 	 *
1167 	 * @param o
1168 	 *            the stream the formatter will write line data to
1169 	 * @param ent
1170 	 *            the DiffEntry to create the FileHeader for
1171 	 * @throws java.io.IOException
1172 	 *             writing to the supplied stream failed.
1173 	 */
1174 	protected void formatIndexLine(OutputStream o, DiffEntry ent)
1175 			throws IOException {
1176 		o.write(encodeASCII("index " // //$NON-NLS-1$
1177 				+ format(ent.getOldId()) //
1178 				+ ".." // //$NON-NLS-1$
1179 				+ format(ent.getNewId())));
1180 		if (ent.getOldMode().equals(ent.getNewMode())) {
1181 			o.write(' ');
1182 			ent.getNewMode().copyTo(o);
1183 		}
1184 		o.write('\n');
1185 	}
1186 
1187 	private void formatOldNewPaths(ByteArrayOutputStream o, DiffEntry ent)
1188 			throws IOException {
1189 		if (ent.oldId.equals(ent.newId))
1190 			return;
1191 
1192 		final String oldp;
1193 		final String newp;
1194 
1195 		switch (ent.getChangeType()) {
1196 		case ADD:
1197 			oldp = DiffEntry.DEV_NULL;
1198 			newp = quotePath(newPrefix + ent.getNewPath());
1199 			break;
1200 
1201 		case DELETE:
1202 			oldp = quotePath(oldPrefix + ent.getOldPath());
1203 			newp = DiffEntry.DEV_NULL;
1204 			break;
1205 
1206 		default:
1207 			oldp = quotePath(oldPrefix + ent.getOldPath());
1208 			newp = quotePath(newPrefix + ent.getNewPath());
1209 			break;
1210 		}
1211 
1212 		o.write(encode("--- " + oldp + "\n")); //$NON-NLS-1$ //$NON-NLS-2$
1213 		o.write(encode("+++ " + newp + "\n")); //$NON-NLS-1$ //$NON-NLS-2$
1214 	}
1215 
1216 	private int findCombinedEnd(List<Edit> edits, int i) {
1217 		int end = i + 1;
1218 		while (end < edits.size()
1219 				&& (combineA(edits, end) || combineB(edits, end)))
1220 			end++;
1221 		return end - 1;
1222 	}
1223 
1224 	private boolean combineA(List<Edit> e, int i) {
1225 		return e.get(i).getBeginA() - e.get(i - 1).getEndA() <= 2 * context;
1226 	}
1227 
1228 	private boolean combineB(List<Edit> e, int i) {
1229 		return e.get(i).getBeginB() - e.get(i - 1).getEndB() <= 2 * context;
1230 	}
1231 
1232 	private static boolean end(Edit edit, int a, int b) {
1233 		return edit.getEndA() <= a && edit.getEndB() <= b;
1234 	}
1235 }