DiffAlgorithm.java

  1. /*
  2.  * Copyright (C) 2010, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.diff;

  11. /**
  12.  * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
  13.  * {@link org.eclipse.jgit.diff.EditList} of changes.
  14.  * <p>
  15.  * An algorithm's {@code diff} method must be callable from concurrent threads
  16.  * without data collisions. This permits some algorithms to use a singleton
  17.  * pattern, with concurrent invocations using the same singleton. Other
  18.  * algorithms may support parameterization, in which case the caller can create
  19.  * a unique instance per thread.
  20.  */
  21. public abstract class DiffAlgorithm {
  22.     /**
  23.      * Supported diff algorithm
  24.      */
  25.     public enum SupportedAlgorithm {
  26.         /**
  27.          * Myers diff algorithm
  28.          */
  29.         MYERS,

  30.         /**
  31.          * Histogram diff algorithm
  32.          */
  33.         HISTOGRAM
  34.     }

  35.     /**
  36.      * Get diff algorithm
  37.      *
  38.      * @param alg
  39.      *            the diff algorithm for which an implementation should be
  40.      *            returned
  41.      * @return an implementation of the specified diff algorithm
  42.      */
  43.     public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
  44.         switch (alg) {
  45.         case MYERS:
  46.             return MyersDiff.INSTANCE;
  47.         case HISTOGRAM:
  48.             return new HistogramDiff();
  49.         default:
  50.             throw new IllegalArgumentException();
  51.         }
  52.     }

  53.     /**
  54.      * Compare two sequences and identify a list of edits between them.
  55.      *
  56.      * @param cmp
  57.      *            the comparator supplying the element equivalence function.
  58.      * @param a
  59.      *            the first (also known as old or pre-image) sequence. Edits
  60.      *            returned by this algorithm will reference indexes using the
  61.      *            'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
  62.      *            {@link org.eclipse.jgit.diff.Edit#getEndA()}.
  63.      * @param b
  64.      *            the second (also known as new or post-image) sequence. Edits
  65.      *            returned by this algorithm will reference indexes using the
  66.      *            'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
  67.      *            {@link org.eclipse.jgit.diff.Edit#getEndB()}.
  68.      * @return a modifiable edit list comparing the two sequences. If empty, the
  69.      *         sequences are identical according to {@code cmp}'s rules. The
  70.      *         result list is never null.
  71.      */
  72.     public <S extends Sequence> EditList diff(
  73.             SequenceComparator<? super S> cmp, S a, S b) {
  74.         Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));

  75.         switch (region.getType()) {
  76.         case INSERT:
  77.         case DELETE:
  78.             return EditList.singleton(region);

  79.         case REPLACE: {
  80.             if (region.getLengthA() == 1 && region.getLengthB() == 1)
  81.                 return EditList.singleton(region);

  82.             SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
  83.             Subsequence<S> as = Subsequence.a(a, region);
  84.             Subsequence<S> bs = Subsequence.b(b, region);
  85.             EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
  86.             return normalize(cmp, e, a, b);
  87.         }

  88.         case EMPTY:
  89.             return new EditList(0);

  90.         default:
  91.             throw new IllegalStateException();
  92.         }
  93.     }

  94.     private static <S extends Sequence> Edit coverEdit(S a, S b) {
  95.         return new Edit(0, a.size(), 0, b.size());
  96.     }

  97.     /**
  98.      * Reorganize an {@link EditList} for better diff consistency.
  99.      * <p>
  100.      * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
  101.      * {@link Edit.Type#DELETE} edits that can be "shifted". For
  102.      * example, the deleted section
  103.      * <pre>
  104.      * -a
  105.      * -b
  106.      * -c
  107.      *  a
  108.      *  b
  109.      *  c
  110.      * </pre>
  111.      * can be shifted down by 1, 2 or 3 locations.
  112.      * <p>
  113.      * To avoid later merge issues, we shift such edits to a
  114.      * consistent location. {@code normalize} uses a simple strategy of
  115.      * shifting such edits to their latest possible location.
  116.      * <p>
  117.      * This strategy may not always produce an aesthetically pleasing
  118.      * diff. For instance, it works well with
  119.      * <pre>
  120.      *  function1 {
  121.      *   ...
  122.      *  }
  123.      *
  124.      * +function2 {
  125.      * + ...
  126.      * +}
  127.      * +
  128.      * function3 {
  129.      * ...
  130.      * }
  131.      * </pre>
  132.      * but less so for
  133.      * <pre>
  134.      *  #
  135.      *  # comment1
  136.      *  #
  137.      *  function1() {
  138.      *  }
  139.      *
  140.      *  #
  141.      * +# comment3
  142.      * +#
  143.      * +function3() {
  144.      * +}
  145.      * +
  146.      * +#
  147.      *  # comment2
  148.      *  #
  149.      *  function2() {
  150.      *  }
  151.      * </pre>
  152.      * <a href="https://github.com/mhagger/diff-slider-tools">More
  153.      * sophisticated strategies</a> are possible, say by calculating a
  154.      * suitable "aesthetic cost" for each possible position and using
  155.      * the lowest cost, but {@code normalize} just shifts edits
  156.      * to the end as much as possible.
  157.      *
  158.      * @param <S>
  159.      *            type of sequence being compared.
  160.      * @param cmp
  161.      *            the comparator supplying the element equivalence function.
  162.      * @param e
  163.      *            a modifiable edit list comparing the provided sequences.
  164.      * @param a
  165.      *            the first (also known as old or pre-image) sequence.
  166.      * @param b
  167.      *            the second (also known as new or post-image) sequence.
  168.      * @return a modifiable edit list with edit regions shifted to their
  169.      *         latest possible location. The result list is never null.
  170.      * @since 4.7
  171.      */
  172.     private static <S extends Sequence> EditList normalize(
  173.         SequenceComparator<? super S> cmp, EditList e, S a, S b) {
  174.         Edit prev = null;
  175.         for (int i = e.size() - 1; i >= 0; i--) {
  176.             Edit cur = e.get(i);
  177.             Edit.Type curType = cur.getType();

  178.             int maxA = (prev == null) ? a.size() : prev.beginA;
  179.             int maxB = (prev == null) ? b.size() : prev.beginB;

  180.             if (curType == Edit.Type.INSERT) {
  181.                 while (cur.endA < maxA && cur.endB < maxB
  182.                     && cmp.equals(b, cur.beginB, b, cur.endB)) {
  183.                     cur.shift(1);
  184.                 }
  185.             } else if (curType == Edit.Type.DELETE) {
  186.                 while (cur.endA < maxA && cur.endB < maxB
  187.                     && cmp.equals(a, cur.beginA, a, cur.endA)) {
  188.                     cur.shift(1);
  189.                 }
  190.             }
  191.             prev = cur;
  192.         }
  193.         return e;
  194.     }

  195.     /**
  196.      * Compare two sequences and identify a list of edits between them.
  197.      *
  198.      * This method should be invoked only after the two sequences have been
  199.      * proven to have no common starting or ending elements. The expected
  200.      * elimination of common starting and ending elements is automatically
  201.      * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
  202.      * method, which invokes this method using
  203.      * {@link org.eclipse.jgit.diff.Subsequence}s.
  204.      *
  205.      * @param cmp
  206.      *            the comparator supplying the element equivalence function.
  207.      * @param a
  208.      *            the first (also known as old or pre-image) sequence. Edits
  209.      *            returned by this algorithm will reference indexes using the
  210.      *            'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
  211.      *            {@link org.eclipse.jgit.diff.Edit#getEndA()}.
  212.      * @param b
  213.      *            the second (also known as new or post-image) sequence. Edits
  214.      *            returned by this algorithm will reference indexes using the
  215.      *            'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
  216.      *            {@link org.eclipse.jgit.diff.Edit#getEndB()}.
  217.      * @return a modifiable edit list comparing the two sequences.
  218.      */
  219.     public abstract <S extends Sequence> EditList diffNonCommon(
  220.             SequenceComparator<? super S> cmp, S a, S b);
  221. }