DirCacheTree.java

  1. /*
  2.  * Copyright (C) 2008-2009, Google Inc.
  3.  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
  4.  *
  5.  * This program and the accompanying materials are made available under the
  6.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  7.  * https://www.eclipse.org/org/documents/edl-v10.php.
  8.  *
  9.  * SPDX-License-Identifier: BSD-3-Clause
  10.  */

  11. package org.eclipse.jgit.dircache;

  12. import static java.nio.charset.StandardCharsets.UTF_8;
  13. import static org.eclipse.jgit.lib.FileMode.TREE;
  14. import static org.eclipse.jgit.lib.TreeFormatter.entrySize;

  15. import java.io.IOException;
  16. import java.io.OutputStream;
  17. import java.nio.ByteBuffer;
  18. import java.util.Arrays;
  19. import java.util.Comparator;

  20. import org.eclipse.jgit.errors.UnmergedPathException;
  21. import org.eclipse.jgit.lib.Constants;
  22. import org.eclipse.jgit.lib.ObjectId;
  23. import org.eclipse.jgit.lib.ObjectInserter;
  24. import org.eclipse.jgit.lib.TreeFormatter;
  25. import org.eclipse.jgit.util.MutableInteger;
  26. import org.eclipse.jgit.util.RawParseUtils;

  27. /**
  28.  * Single tree record from the 'TREE' {@link org.eclipse.jgit.dircache.DirCache}
  29.  * extension.
  30.  * <p>
  31.  * A valid cache tree record contains the object id of a tree object and the
  32.  * total number of {@link org.eclipse.jgit.dircache.DirCacheEntry} instances
  33.  * (counted recursively) from the DirCache contained within the tree. This
  34.  * information facilitates faster traversal of the index and quicker generation
  35.  * of tree objects prior to creating a new commit.
  36.  * <p>
  37.  * An invalid cache tree record indicates a known subtree whose file entries
  38.  * have changed in ways that cause the tree to no longer have a known object id.
  39.  * Invalid cache tree records must be revalidated prior to use.
  40.  */
  41. public class DirCacheTree {
  42.     private static final byte[] NO_NAME = {};

  43.     private static final DirCacheTree[] NO_CHILDREN = {};

  44.     private static final Comparator<DirCacheTree> TREE_CMP = (DirCacheTree o1,
  45.             DirCacheTree o2) -> {
  46.         final byte[] a = o1.encodedName;
  47.         final byte[] b = o2.encodedName;
  48.         final int aLen = a.length;
  49.         final int bLen = b.length;
  50.         int cPos;
  51.         for (cPos = 0; cPos < aLen && cPos < bLen; cPos++) {
  52.             final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
  53.             if (cmp != 0) {
  54.                 return cmp;
  55.             }
  56.         }
  57.         if (aLen == bLen) {
  58.             return 0;
  59.         }
  60.         if (aLen < bLen) {
  61.             return '/' - (b[cPos] & 0xff);
  62.         }
  63.         return (a[cPos] & 0xff) - '/';
  64.     };

  65.     /** Tree this tree resides in; null if we are the root. */
  66.     private DirCacheTree parent;

  67.     /** Name of this tree within its parent. */
  68.     byte[] encodedName;

  69.     /** Number of {@link DirCacheEntry} records that belong to this tree. */
  70.     private int entrySpan;

  71.     /** Unique SHA-1 of this tree; null if invalid. */
  72.     private ObjectId id;

  73.     /** Child trees, if any, sorted by {@link #encodedName}. */
  74.     private DirCacheTree[] children;

  75.     /** Number of valid children in {@link #children}. */
  76.     private int childCnt;

  77.     DirCacheTree() {
  78.         encodedName = NO_NAME;
  79.         children = NO_CHILDREN;
  80.         childCnt = 0;
  81.         entrySpan = -1;
  82.     }

  83.     private DirCacheTree(final DirCacheTree myParent, final byte[] path,
  84.             final int pathOff, final int pathLen) {
  85.         parent = myParent;
  86.         encodedName = new byte[pathLen];
  87.         System.arraycopy(path, pathOff, encodedName, 0, pathLen);
  88.         children = NO_CHILDREN;
  89.         childCnt = 0;
  90.         entrySpan = -1;
  91.     }

  92.     DirCacheTree(final byte[] in, final MutableInteger off,
  93.             final DirCacheTree myParent) {
  94.         parent = myParent;

  95.         int ptr = RawParseUtils.next(in, off.value, '\0');
  96.         final int nameLen = ptr - off.value - 1;
  97.         if (nameLen > 0) {
  98.             encodedName = new byte[nameLen];
  99.             System.arraycopy(in, off.value, encodedName, 0, nameLen);
  100.         } else
  101.             encodedName = NO_NAME;

  102.         entrySpan = RawParseUtils.parseBase10(in, ptr, off);
  103.         final int subcnt = RawParseUtils.parseBase10(in, off.value, off);
  104.         off.value = RawParseUtils.next(in, off.value, '\n');

  105.         if (entrySpan >= 0) {
  106.             // Valid trees have a positive entry count and an id of a
  107.             // tree object that should exist in the object database.
  108.             //
  109.             id = ObjectId.fromRaw(in, off.value);
  110.             off.value += Constants.OBJECT_ID_LENGTH;
  111.         }

  112.         if (subcnt > 0) {
  113.             boolean alreadySorted = true;
  114.             children = new DirCacheTree[subcnt];
  115.             for (int i = 0; i < subcnt; i++) {
  116.                 children[i] = new DirCacheTree(in, off, this);

  117.                 // C Git's ordering differs from our own; it prefers to
  118.                 // sort by length first. This sometimes produces a sort
  119.                 // we do not desire. On the other hand it may have been
  120.                 // created by us, and be sorted the way we want.
  121.                 //
  122.                 if (alreadySorted && i > 0
  123.                         && TREE_CMP.compare(children[i - 1], children[i]) > 0)
  124.                     alreadySorted = false;
  125.             }
  126.             if (!alreadySorted)
  127.                 Arrays.sort(children, 0, subcnt, TREE_CMP);
  128.         } else {
  129.             // Leaf level trees have no children, only (file) entries.
  130.             //
  131.             children = NO_CHILDREN;
  132.         }
  133.         childCnt = subcnt;
  134.     }

  135.     void write(byte[] tmp, OutputStream os) throws IOException {
  136.         int ptr = tmp.length;
  137.         tmp[--ptr] = '\n';
  138.         ptr = RawParseUtils.formatBase10(tmp, ptr, childCnt);
  139.         tmp[--ptr] = ' ';
  140.         ptr = RawParseUtils.formatBase10(tmp, ptr, isValid() ? entrySpan : -1);
  141.         tmp[--ptr] = 0;

  142.         os.write(encodedName);
  143.         os.write(tmp, ptr, tmp.length - ptr);
  144.         if (isValid()) {
  145.             id.copyRawTo(tmp, 0);
  146.             os.write(tmp, 0, Constants.OBJECT_ID_LENGTH);
  147.         }
  148.         for (int i = 0; i < childCnt; i++)
  149.             children[i].write(tmp, os);
  150.     }

  151.     /**
  152.      * Determine if this cache is currently valid.
  153.      * <p>
  154.      * A valid cache tree knows how many
  155.      * {@link org.eclipse.jgit.dircache.DirCacheEntry} instances from the parent
  156.      * {@link org.eclipse.jgit.dircache.DirCache} reside within this tree
  157.      * (recursively enumerated). It also knows the object id of the tree, as the
  158.      * tree should be readily available from the repository's object database.
  159.      *
  160.      * @return true if this tree is knows key details about itself; false if the
  161.      *         tree needs to be regenerated.
  162.      */
  163.     public boolean isValid() {
  164.         return id != null;
  165.     }

  166.     /**
  167.      * Get the number of entries this tree spans within the DirCache.
  168.      * <p>
  169.      * If this tree is not valid (see {@link #isValid()}) this method's return
  170.      * value is always strictly negative (less than 0) but is otherwise an
  171.      * undefined result.
  172.      *
  173.      * @return total number of entries (recursively) contained within this tree.
  174.      */
  175.     public int getEntrySpan() {
  176.         return entrySpan;
  177.     }

  178.     /**
  179.      * Get the number of cached subtrees contained within this tree.
  180.      *
  181.      * @return number of child trees available through this tree.
  182.      */
  183.     public int getChildCount() {
  184.         return childCnt;
  185.     }

  186.     /**
  187.      * Get the i-th child cache tree.
  188.      *
  189.      * @param i
  190.      *            index of the child to obtain.
  191.      * @return the child tree.
  192.      */
  193.     public DirCacheTree getChild(int i) {
  194.         return children[i];
  195.     }

  196.     /**
  197.      * Get the tree's ObjectId.
  198.      * <p>
  199.      * If {@link #isValid()} returns false this method will return null.
  200.      *
  201.      * @return ObjectId of this tree or null.
  202.      * @since 4.3
  203.      */
  204.     public ObjectId getObjectId() {
  205.         return id;
  206.     }

  207.     /**
  208.      * Get the tree's name within its parent.
  209.      * <p>
  210.      * This method is not very efficient and is primarily meant for debugging
  211.      * and final output generation. Applications should try to avoid calling it,
  212.      * and if invoked do so only once per interesting entry, where the name is
  213.      * absolutely required for correct function.
  214.      *
  215.      * @return name of the tree. This does not contain any '/' characters.
  216.      */
  217.     public String getNameString() {
  218.         final ByteBuffer bb = ByteBuffer.wrap(encodedName);
  219.         return UTF_8.decode(bb).toString();
  220.     }

  221.     /**
  222.      * Get the tree's path within the repository.
  223.      * <p>
  224.      * This method is not very efficient and is primarily meant for debugging
  225.      * and final output generation. Applications should try to avoid calling it,
  226.      * and if invoked do so only once per interesting entry, where the name is
  227.      * absolutely required for correct function.
  228.      *
  229.      * @return path of the tree, relative to the repository root. If this is not
  230.      *         the root tree the path ends with '/'. The root tree's path string
  231.      *         is the empty string ("").
  232.      */
  233.     public String getPathString() {
  234.         final StringBuilder r = new StringBuilder();
  235.         appendName(r);
  236.         return r.toString();
  237.     }

  238.     /**
  239.      * Write (if necessary) this tree to the object store.
  240.      *
  241.      * @param cache
  242.      *            the complete cache from DirCache.
  243.      * @param cIdx
  244.      *            first position of <code>cache</code> that is a member of this
  245.      *            tree. The path of <code>cache[cacheIdx].path</code> for the
  246.      *            range <code>[0,pathOff-1)</code> matches the complete path of
  247.      *            this tree, from the root of the repository.
  248.      * @param pathOffset
  249.      *            number of bytes of <code>cache[cacheIdx].path</code> that
  250.      *            matches this tree's path. The value at array position
  251.      *            <code>cache[cacheIdx].path[pathOff-1]</code> is always '/' if
  252.      *            <code>pathOff</code> is > 0.
  253.      * @param ow
  254.      *            the writer to use when serializing to the store.
  255.      * @return identity of this tree.
  256.      * @throws UnmergedPathException
  257.      *             one or more paths contain higher-order stages (stage > 0),
  258.      *             which cannot be stored in a tree object.
  259.      * @throws IOException
  260.      *             an unexpected error occurred writing to the object store.
  261.      */
  262.     ObjectId writeTree(final DirCacheEntry[] cache, int cIdx,
  263.             final int pathOffset, final ObjectInserter ow)
  264.             throws UnmergedPathException, IOException {
  265.         if (id == null) {
  266.             final int endIdx = cIdx + entrySpan;
  267.             final TreeFormatter fmt = new TreeFormatter(computeSize(cache,
  268.                     cIdx, pathOffset, ow));
  269.             int childIdx = 0;
  270.             int entryIdx = cIdx;

  271.             while (entryIdx < endIdx) {
  272.                 final DirCacheEntry e = cache[entryIdx];
  273.                 final byte[] ep = e.path;
  274.                 if (childIdx < childCnt) {
  275.                     final DirCacheTree st = children[childIdx];
  276.                     if (st.contains(ep, pathOffset, ep.length)) {
  277.                         fmt.append(st.encodedName, TREE, st.id);
  278.                         entryIdx += st.entrySpan;
  279.                         childIdx++;
  280.                         continue;
  281.                     }
  282.                 }

  283.                 fmt.append(ep, pathOffset, ep.length - pathOffset, e
  284.                         .getFileMode(), e.idBuffer(), e.idOffset());
  285.                 entryIdx++;
  286.             }

  287.             id = ow.insert(fmt);
  288.         }
  289.         return id;
  290.     }

  291.     private int computeSize(final DirCacheEntry[] cache, int cIdx,
  292.             final int pathOffset, final ObjectInserter ow)
  293.             throws UnmergedPathException, IOException {
  294.         final int endIdx = cIdx + entrySpan;
  295.         int childIdx = 0;
  296.         int entryIdx = cIdx;
  297.         int size = 0;

  298.         while (entryIdx < endIdx) {
  299.             final DirCacheEntry e = cache[entryIdx];
  300.             if (e.getStage() != 0)
  301.                 throw new UnmergedPathException(e);

  302.             final byte[] ep = e.path;
  303.             if (childIdx < childCnt) {
  304.                 final DirCacheTree st = children[childIdx];
  305.                 if (st.contains(ep, pathOffset, ep.length)) {
  306.                     final int stOffset = pathOffset + st.nameLength() + 1;
  307.                     st.writeTree(cache, entryIdx, stOffset, ow);

  308.                     size += entrySize(TREE, st.nameLength());

  309.                     entryIdx += st.entrySpan;
  310.                     childIdx++;
  311.                     continue;
  312.                 }
  313.             }

  314.             size += entrySize(e.getFileMode(), ep.length - pathOffset);
  315.             entryIdx++;
  316.         }

  317.         return size;
  318.     }

  319.     private void appendName(StringBuilder r) {
  320.         if (parent != null) {
  321.             parent.appendName(r);
  322.             r.append(getNameString());
  323.             r.append('/');
  324.         } else if (nameLength() > 0) {
  325.             r.append(getNameString());
  326.             r.append('/');
  327.         }
  328.     }

  329.     final int nameLength() {
  330.         return encodedName.length;
  331.     }

  332.     final boolean contains(byte[] a, int aOff, int aLen) {
  333.         final byte[] e = encodedName;
  334.         final int eLen = e.length;
  335.         for (int eOff = 0; eOff < eLen && aOff < aLen; eOff++, aOff++)
  336.             if (e[eOff] != a[aOff])
  337.                 return false;
  338.         if (aOff >= aLen)
  339.             return false;
  340.         return a[aOff] == '/';
  341.     }

  342.     /**
  343.      * Update (if necessary) this tree's entrySpan.
  344.      *
  345.      * @param cache
  346.      *            the complete cache from DirCache.
  347.      * @param cCnt
  348.      *            number of entries in <code>cache</code> that are valid for
  349.      *            iteration.
  350.      * @param cIdx
  351.      *            first position of <code>cache</code> that is a member of this
  352.      *            tree. The path of <code>cache[cacheIdx].path</code> for the
  353.      *            range <code>[0,pathOff-1)</code> matches the complete path of
  354.      *            this tree, from the root of the repository.
  355.      * @param pathOff
  356.      *            number of bytes of <code>cache[cacheIdx].path</code> that
  357.      *            matches this tree's path. The value at array position
  358.      *            <code>cache[cacheIdx].path[pathOff-1]</code> is always '/' if
  359.      *            <code>pathOff</code> is > 0.
  360.      */
  361.     void validate(final DirCacheEntry[] cache, final int cCnt, int cIdx,
  362.             final int pathOff) {
  363.         if (entrySpan >= 0 && cIdx + entrySpan <= cCnt) {
  364.             // If we are valid, our children are also valid.
  365.             // We have no need to validate them.
  366.             //
  367.             return;
  368.         }

  369.         entrySpan = 0;
  370.         if (cCnt == 0) {
  371.             // Special case of an empty index, and we are the root tree.
  372.             //
  373.             return;
  374.         }

  375.         final byte[] firstPath = cache[cIdx].path;
  376.         int stIdx = 0;
  377.         while (cIdx < cCnt) {
  378.             final byte[] currPath = cache[cIdx].path;
  379.             if (pathOff > 0 && !peq(firstPath, currPath, pathOff)) {
  380.                 // The current entry is no longer in this tree. Our
  381.                 // span is updated and the remainder goes elsewhere.
  382.                 //
  383.                 break;
  384.             }

  385.             DirCacheTree st = stIdx < childCnt ? children[stIdx] : null;
  386.             final int cc = namecmp(currPath, pathOff, st);
  387.             if (cc > 0) {
  388.                 // This subtree is now empty.
  389.                 //
  390.                 removeChild(stIdx);
  391.                 continue;
  392.             }

  393.             if (cc < 0) {
  394.                 final int p = slash(currPath, pathOff);
  395.                 if (p < 0) {
  396.                     // The entry has no '/' and thus is directly in this
  397.                     // tree. Count it as one of our own.
  398.                     //
  399.                     cIdx++;
  400.                     entrySpan++;
  401.                     continue;
  402.                 }

  403.                 // Build a new subtree for this entry.
  404.                 //
  405.                 st = new DirCacheTree(this, currPath, pathOff, p - pathOff);
  406.                 insertChild(stIdx, st);
  407.             }

  408.             // The entry is contained in this subtree.
  409.             //
  410.             assert(st != null);
  411.             st.validate(cache, cCnt, cIdx, pathOff + st.nameLength() + 1);
  412.             cIdx += st.entrySpan;
  413.             entrySpan += st.entrySpan;
  414.             stIdx++;
  415.         }

  416.         // None of our remaining children can be in this tree
  417.         // as the current cache entry is after our own name.
  418.         //
  419.         while (stIdx < childCnt)
  420.             removeChild(childCnt - 1);
  421.     }

  422.     private void insertChild(int stIdx, DirCacheTree st) {
  423.         final DirCacheTree[] c = children;
  424.         if (childCnt + 1 <= c.length) {
  425.             if (stIdx < childCnt)
  426.                 System.arraycopy(c, stIdx, c, stIdx + 1, childCnt - stIdx);
  427.             c[stIdx] = st;
  428.             childCnt++;
  429.             return;
  430.         }

  431.         final int n = c.length;
  432.         final DirCacheTree[] a = new DirCacheTree[n + 1];
  433.         if (stIdx > 0)
  434.             System.arraycopy(c, 0, a, 0, stIdx);
  435.         a[stIdx] = st;
  436.         if (stIdx < n)
  437.             System.arraycopy(c, stIdx, a, stIdx + 1, n - stIdx);
  438.         children = a;
  439.         childCnt++;
  440.     }

  441.     private void removeChild(int stIdx) {
  442.         final int n = --childCnt;
  443.         if (stIdx < n)
  444.             System.arraycopy(children, stIdx + 1, children, stIdx, n - stIdx);
  445.         children[n] = null;
  446.     }

  447.     static boolean peq(byte[] a, byte[] b, int aLen) {
  448.         if (b.length < aLen)
  449.             return false;
  450.         for (aLen--; aLen >= 0; aLen--)
  451.             if (a[aLen] != b[aLen])
  452.                 return false;
  453.         return true;
  454.     }

  455.     private static int namecmp(byte[] a, int aPos, DirCacheTree ct) {
  456.         if (ct == null)
  457.             return -1;
  458.         final byte[] b = ct.encodedName;
  459.         final int aLen = a.length;
  460.         final int bLen = b.length;
  461.         int bPos = 0;
  462.         for (; aPos < aLen && bPos < bLen; aPos++, bPos++) {
  463.             final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff);
  464.             if (cmp != 0)
  465.                 return cmp;
  466.         }
  467.         if (bPos == bLen)
  468.             return a[aPos] == '/' ? 0 : -1;
  469.         return aLen - bLen;
  470.     }

  471.     private static int slash(byte[] a, int aPos) {
  472.         final int aLen = a.length;
  473.         for (; aPos < aLen; aPos++)
  474.             if (a[aPos] == '/')
  475.                 return aPos;
  476.         return -1;
  477.     }

  478.     /** {@inheritDoc} */
  479.     @Override
  480.     public String toString() {
  481.         return getNameString();
  482.     }
  483. }