NameConflictTreeWalk.java

  1. /*
  2.  * Copyright (C) 2008, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.treewalk;

  11. import java.io.IOException;

  12. import org.eclipse.jgit.annotations.Nullable;
  13. import org.eclipse.jgit.errors.CorruptObjectException;
  14. import org.eclipse.jgit.lib.FileMode;
  15. import org.eclipse.jgit.lib.ObjectReader;
  16. import org.eclipse.jgit.lib.Repository;

  17. /**
  18.  * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
  19.  * <p>
  20.  * Due to the way a Git tree is organized the standard
  21.  * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
  22.  * when merging two or more trees together. In the standard TreeWalk the file
  23.  * will be returned first, and then much later the directory will be returned.
  24.  * This makes it impossible for the application to efficiently detect and handle
  25.  * the conflict.
  26.  * <p>
  27.  * Using this walk implementation causes the directory to report earlier than
  28.  * usual, at the same time as the non-directory entry. This permits the
  29.  * application to handle the D/F conflict in a single step. The directory is
  30.  * returned only once, so it does not get returned later in the iteration.
  31.  * <p>
  32.  * When a D/F conflict is detected
  33.  * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
  34.  * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
  35.  * the subtree, no matter which iterator originally supplied the subtree.
  36.  * <p>
  37.  * Because conflicted directories report early, using this walk implementation
  38.  * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
  39.  * automatic resorting to run and fix the entry ordering.
  40.  * <p>
  41.  * This walk implementation requires more CPU to implement a look-ahead and a
  42.  * look-behind to merge a D/F pair together, or to skip a previously reported
  43.  * directory. In typical Git repositories the look-ahead cost is 0 and the
  44.  * look-behind doesn't trigger, as users tend not to create trees which contain
  45.  * both "foo" as a directory and "foo.c" as a file.
  46.  * <p>
  47.  * In the worst-case however several thousand look-ahead steps per walk step may
  48.  * be necessary, making the overhead quite significant. Since this worst-case
  49.  * should never happen this walk implementation has made the time/space tradeoff
  50.  * in favor of more-time/less-space, as that better suits the typical case.
  51.  */
  52. public class NameConflictTreeWalk extends TreeWalk {
  53.     private static final int TREE_MODE = FileMode.TREE.getBits();

  54.     private boolean fastMinHasMatch;

  55.     private AbstractTreeIterator dfConflict;

  56.     /**
  57.      * Create a new tree walker for a given repository.
  58.      *
  59.      * @param repo
  60.      *            the repository the walker will obtain data from.
  61.      */
  62.     public NameConflictTreeWalk(Repository repo) {
  63.         super(repo);
  64.     }

  65.     /**
  66.      * Create a new tree walker for a given repository.
  67.      *
  68.      * @param repo
  69.      *            the repository the walker will obtain data from.
  70.      * @param or
  71.      *            the reader the walker will obtain tree data from.
  72.      * @since 4.3
  73.      */
  74.     public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) {
  75.         super(repo, or);
  76.     }

  77.     /**
  78.      * Create a new tree walker for a given repository.
  79.      *
  80.      * @param or
  81.      *            the reader the walker will obtain tree data from.
  82.      */
  83.     public NameConflictTreeWalk(ObjectReader or) {
  84.         super(or);
  85.     }

  86.     @Override
  87.     AbstractTreeIterator min() throws CorruptObjectException {
  88.         for (;;) {
  89.             final AbstractTreeIterator minRef = fastMin();
  90.             if (fastMinHasMatch)
  91.                 return minRef;

  92.             if (isTree(minRef)) {
  93.                 if (skipEntry(minRef)) {
  94.                     for (AbstractTreeIterator t : trees) {
  95.                         if (t.matches == minRef) {
  96.                             t.next(1);
  97.                             t.matches = null;
  98.                         }
  99.                     }
  100.                     continue;
  101.                 }
  102.                 return minRef;
  103.             }

  104.             return combineDF(minRef);
  105.         }
  106.     }

  107.     private AbstractTreeIterator fastMin() {
  108.         fastMinHasMatch = true;

  109.         int i = 0;
  110.         AbstractTreeIterator minRef = trees[i];
  111.         while (minRef.eof() && ++i < trees.length)
  112.             minRef = trees[i];
  113.         if (minRef.eof())
  114.             return minRef;

  115.         boolean hasConflict = false;
  116.         minRef.matches = minRef;
  117.         while (++i < trees.length) {
  118.             final AbstractTreeIterator t = trees[i];
  119.             if (t.eof())
  120.                 continue;

  121.             final int cmp = t.pathCompare(minRef);
  122.             if (cmp < 0) {
  123.                 if (fastMinHasMatch && isTree(minRef) && !isTree(t)
  124.                         && nameEqual(minRef, t)) {
  125.                     // We used to be at a tree, but now we are at a file
  126.                     // with the same name. Allow the file to match the
  127.                     // tree anyway.
  128.                     //
  129.                     t.matches = minRef;
  130.                     hasConflict = true;
  131.                 } else {
  132.                     fastMinHasMatch = false;
  133.                     t.matches = t;
  134.                     minRef = t;
  135.                 }
  136.             } else if (cmp == 0) {
  137.                 // Exact name/mode match is best.
  138.                 //
  139.                 t.matches = minRef;
  140.             } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
  141.                     && !isGitlink(minRef) && nameEqual(t, minRef)) {
  142.                 // The minimum is a file (non-tree) but the next entry
  143.                 // of this iterator is a tree whose name matches our file.
  144.                 // This is a classic D/F conflict and commonly occurs like
  145.                 // this, with no gaps in between the file and directory.
  146.                 //
  147.                 // Use the tree as the minimum instead (see combineDF).
  148.                 //

  149.                 for (int k = 0; k < i; k++) {
  150.                     final AbstractTreeIterator p = trees[k];
  151.                     if (p.matches == minRef)
  152.                         p.matches = t;
  153.                 }
  154.                 t.matches = t;
  155.                 minRef = t;
  156.                 hasConflict = true;
  157.             } else
  158.                 fastMinHasMatch = false;
  159.         }

  160.         if (hasConflict && fastMinHasMatch && dfConflict == null)
  161.             dfConflict = minRef;
  162.         return minRef;
  163.     }

  164.     private static boolean nameEqual(final AbstractTreeIterator a,
  165.             final AbstractTreeIterator b) {
  166.         return a.pathCompare(b, TREE_MODE) == 0;
  167.     }

  168.     private boolean isGitlink(AbstractTreeIterator p) {
  169.         return FileMode.GITLINK.equals(p.mode);
  170.     }

  171.     private static boolean isTree(AbstractTreeIterator p) {
  172.         return FileMode.TREE.equals(p.mode);
  173.     }

  174.     private boolean skipEntry(AbstractTreeIterator minRef)
  175.             throws CorruptObjectException {
  176.         // A tree D/F may have been handled earlier. We need to
  177.         // not report this path if it has already been reported.
  178.         //
  179.         for (AbstractTreeIterator t : trees) {
  180.             if (t.matches == minRef || t.first())
  181.                 continue;

  182.             int stepsBack = 0;
  183.             for (;;) {
  184.                 stepsBack++;
  185.                 t.back(1);

  186.                 final int cmp = t.pathCompare(minRef, 0);
  187.                 if (cmp == 0) {
  188.                     // We have already seen this "$path" before. Skip it.
  189.                     //
  190.                     t.next(stepsBack);
  191.                     return true;
  192.                 } else if (cmp < 0 || t.first()) {
  193.                     // We cannot find "$path" in t; it will never appear.
  194.                     //
  195.                     t.next(stepsBack);
  196.                     break;
  197.                 }
  198.             }
  199.         }

  200.         // We have never seen the current path before.
  201.         //
  202.         return false;
  203.     }

  204.     private AbstractTreeIterator combineDF(AbstractTreeIterator minRef)
  205.             throws CorruptObjectException {
  206.         // Look for a possible D/F conflict forward in the tree(s)
  207.         // as there may be a "$path/" which matches "$path". Make
  208.         // such entries match this entry.
  209.         //
  210.         AbstractTreeIterator treeMatch = null;
  211.         for (AbstractTreeIterator t : trees) {
  212.             if (t.matches == minRef || t.eof())
  213.                 continue;

  214.             for (;;) {
  215.                 final int cmp = t.pathCompare(minRef, TREE_MODE);
  216.                 if (cmp < 0) {
  217.                     // The "$path/" may still appear later.
  218.                     //
  219.                     t.matchShift++;
  220.                     t.next(1);
  221.                     if (t.eof()) {
  222.                         t.back(t.matchShift);
  223.                         t.matchShift = 0;
  224.                         break;
  225.                     }
  226.                 } else if (cmp == 0) {
  227.                     // We have a conflict match here.
  228.                     //
  229.                     t.matches = minRef;
  230.                     treeMatch = t;
  231.                     break;
  232.                 } else {
  233.                     // A conflict match is not possible.
  234.                     //
  235.                     if (t.matchShift != 0) {
  236.                         t.back(t.matchShift);
  237.                         t.matchShift = 0;
  238.                     }
  239.                     break;
  240.                 }
  241.             }
  242.         }

  243.         if (treeMatch != null) {
  244.             // If we do have a conflict use one of the directory
  245.             // matching iterators instead of the file iterator.
  246.             // This way isSubtree is true and isRecursive works.
  247.             //
  248.             for (AbstractTreeIterator t : trees)
  249.                 if (t.matches == minRef)
  250.                     t.matches = treeMatch;

  251.             if (dfConflict == null && !isGitlink(minRef)) {
  252.                 dfConflict = treeMatch;
  253.             }

  254.             return treeMatch;
  255.         }

  256.         return minRef;
  257.     }

  258.     @Override
  259.     void popEntriesEqual() throws CorruptObjectException {
  260.         final AbstractTreeIterator ch = currentHead;
  261.         for (AbstractTreeIterator t : trees) {
  262.             if (t.matches == ch) {
  263.                 if (t.matchShift == 0)
  264.                     t.next(1);
  265.                 else {
  266.                     t.back(t.matchShift);
  267.                     t.matchShift = 0;
  268.                 }
  269.                 t.matches = null;
  270.             }
  271.         }

  272.         if (ch == dfConflict)
  273.             dfConflict = null;
  274.     }

  275.     @Override
  276.     void skipEntriesEqual() throws CorruptObjectException {
  277.         final AbstractTreeIterator ch = currentHead;
  278.         for (AbstractTreeIterator t : trees) {
  279.             if (t.matches == ch) {
  280.                 if (t.matchShift == 0)
  281.                     t.skip();
  282.                 else {
  283.                     t.back(t.matchShift);
  284.                     t.matchShift = 0;
  285.                 }
  286.                 t.matches = null;
  287.             }
  288.         }

  289.         if (ch == dfConflict)
  290.             dfConflict = null;
  291.     }

  292.     @Override
  293.     void stopWalk() throws IOException {
  294.         if (!needsStopWalk()) {
  295.             return;
  296.         }

  297.         // Name conflicts make aborting early difficult. Multiple paths may
  298.         // exist between the file and directory versions of a name. To ensure
  299.         // the directory version is skipped over (as it was previously visited
  300.         // during the file version step) requires popping up the stack and
  301.         // finishing out each subtree that the walker dove into. Siblings in
  302.         // parents do not need to be recursed into, bounding the cost.
  303.         for (;;) {
  304.             AbstractTreeIterator t = min();
  305.             if (t.eof()) {
  306.                 if (depth > 0) {
  307.                     exitSubtree();
  308.                     popEntriesEqual();
  309.                     continue;
  310.                 }
  311.                 return;
  312.             }
  313.             currentHead = t;
  314.             skipEntriesEqual();
  315.         }
  316.     }

  317.     private boolean needsStopWalk() {
  318.         for (AbstractTreeIterator t : trees) {
  319.             if (t.needsStopWalk()) {
  320.                 return true;
  321.             }
  322.         }
  323.         return false;
  324.     }

  325.     /**
  326.      * True if the current entry is covered by a directory/file conflict.
  327.      *
  328.      * This means that for some prefix of the current entry's path, this walk
  329.      * has detected a directory/file conflict. Also true if the current entry
  330.      * itself is a directory/file conflict.
  331.      *
  332.      * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
  333.      * true then you know that either for path foo or for path foo/bar files and
  334.      * folders were detected.
  335.      *
  336.      * @return <code>true</code> if the current entry is covered by a
  337.      *         directory/file conflict, <code>false</code> otherwise
  338.      */
  339.     public boolean isDirectoryFileConflict() {
  340.         return dfConflict != null;
  341.     }
  342. }