PackIndex.java

  1. /*
  2.  * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  3.  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
  4.  *
  5.  * This program and the accompanying materials are made available under the
  6.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  7.  * https://www.eclipse.org/org/documents/edl-v10.php.
  8.  *
  9.  * SPDX-License-Identifier: BSD-3-Clause
  10.  */

  11. package org.eclipse.jgit.internal.storage.file;

  12. import java.io.File;
  13. import java.io.FileNotFoundException;
  14. import java.io.IOException;
  15. import java.io.InputStream;
  16. import java.text.MessageFormat;
  17. import java.util.Iterator;
  18. import java.util.Set;

  19. import org.eclipse.jgit.errors.CorruptObjectException;
  20. import org.eclipse.jgit.errors.MissingObjectException;
  21. import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
  22. import org.eclipse.jgit.internal.JGitText;
  23. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  24. import org.eclipse.jgit.lib.AnyObjectId;
  25. import org.eclipse.jgit.lib.MutableObjectId;
  26. import org.eclipse.jgit.lib.ObjectId;
  27. import org.eclipse.jgit.lib.ObjectIdSet;
  28. import org.eclipse.jgit.util.IO;
  29. import org.eclipse.jgit.util.NB;
  30. import org.eclipse.jgit.util.io.SilentFileInputStream;

  31. /**
  32.  * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a
  33.  * {@link org.eclipse.jgit.internal.storage.file.Pack}.
  34.  * <p>
  35.  * Indexes are strictly redundant information in that we can rebuild all of the
  36.  * data held in the index file from the on disk representation of the pack file
  37.  * itself, but it is faster to access for random requests because data is stored
  38.  * by ObjectId.
  39.  * </p>
  40.  */
  41. public abstract class PackIndex
  42.         implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
  43.     /**
  44.      * Open an existing pack <code>.idx</code> file for reading.
  45.      * <p>
  46.      * The format of the file will be automatically detected and a proper access
  47.      * implementation for that format will be constructed and returned to the
  48.      * caller. The file may or may not be held open by the returned instance.
  49.      * </p>
  50.      *
  51.      * @param idxFile
  52.      *            existing pack .idx to read.
  53.      * @return access implementation for the requested file.
  54.      * @throws FileNotFoundException
  55.      *             the file does not exist.
  56.      * @throws java.io.IOException
  57.      *             the file exists but could not be read due to security errors,
  58.      *             unrecognized data version, or unexpected data corruption.
  59.      */
  60.     public static PackIndex open(File idxFile) throws IOException {
  61.         try (SilentFileInputStream fd = new SilentFileInputStream(
  62.                 idxFile)) {
  63.                 return read(fd);
  64.         } catch (IOException ioe) {
  65.             throw new IOException(
  66.                     MessageFormat.format(JGitText.get().unreadablePackIndex,
  67.                             idxFile.getAbsolutePath()),
  68.                     ioe);
  69.         }
  70.     }

  71.     /**
  72.      * Read an existing pack index file from a buffered stream.
  73.      * <p>
  74.      * The format of the file will be automatically detected and a proper access
  75.      * implementation for that format will be constructed and returned to the
  76.      * caller. The file may or may not be held open by the returned instance.
  77.      *
  78.      * @param fd
  79.      *            stream to read the index file from. The stream must be
  80.      *            buffered as some small IOs are performed against the stream.
  81.      *            The caller is responsible for closing the stream.
  82.      * @return a copy of the index in-memory.
  83.      * @throws java.io.IOException
  84.      *             the stream cannot be read.
  85.      * @throws org.eclipse.jgit.errors.CorruptObjectException
  86.      *             the stream does not contain a valid pack index.
  87.      */
  88.     public static PackIndex read(InputStream fd) throws IOException,
  89.             CorruptObjectException {
  90.         final byte[] hdr = new byte[8];
  91.         IO.readFully(fd, hdr, 0, hdr.length);
  92.         if (isTOC(hdr)) {
  93.             final int v = NB.decodeInt32(hdr, 4);
  94.             switch (v) {
  95.             case 2:
  96.                 return new PackIndexV2(fd);
  97.             default:
  98.                 throw new UnsupportedPackIndexVersionException(v);
  99.             }
  100.         }
  101.         return new PackIndexV1(fd, hdr);
  102.     }

  103.     private static boolean isTOC(byte[] h) {
  104.         final byte[] toc = PackIndexWriter.TOC;
  105.         for (int i = 0; i < toc.length; i++)
  106.             if (h[i] != toc[i])
  107.                 return false;
  108.         return true;
  109.     }

  110.     /** Footer checksum applied on the bottom of the pack file. */
  111.     protected byte[] packChecksum;

  112.     /**
  113.      * Determine if an object is contained within the pack file.
  114.      *
  115.      * @param id
  116.      *            the object to look for. Must not be null.
  117.      * @return true if the object is listed in this index; false otherwise.
  118.      */
  119.     public boolean hasObject(AnyObjectId id) {
  120.         return findOffset(id) != -1;
  121.     }

  122.     /** {@inheritDoc} */
  123.     @Override
  124.     public boolean contains(AnyObjectId id) {
  125.         return findOffset(id) != -1;
  126.     }

  127.     /**
  128.      * {@inheritDoc}
  129.      * <p>
  130.      * Provide iterator that gives access to index entries. Note, that iterator
  131.      * returns reference to mutable object, the same reference in each call -
  132.      * for performance reason. If client needs immutable objects, it must copy
  133.      * returned object on its own.
  134.      * <p>
  135.      * Iterator returns objects in SHA-1 lexicographical order.
  136.      * </p>
  137.      */
  138.     @Override
  139.     public abstract Iterator<MutableEntry> iterator();

  140.     /**
  141.      * Obtain the total number of objects described by this index.
  142.      *
  143.      * @return number of objects in this index, and likewise in the associated
  144.      *         pack that this index was generated from.
  145.      */
  146.     public abstract long getObjectCount();

  147.     /**
  148.      * Obtain the total number of objects needing 64 bit offsets.
  149.      *
  150.      * @return number of objects in this index using a 64 bit offset; that is an
  151.      *         object positioned after the 2 GB position within the file.
  152.      */
  153.     public abstract long getOffset64Count();

  154.     /**
  155.      * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
  156.      * <p>
  157.      * This method is a constant-time replacement for the following loop:
  158.      *
  159.      * <pre>
  160.      * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
  161.      * int curPosition = 0;
  162.      * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
  163.      *  eItr.next();
  164.      * ObjectId result = eItr.next().toObjectId();
  165.      * </pre>
  166.      *
  167.      * @param nthPosition
  168.      *            position within the traversal of {@link #iterator()} that the
  169.      *            caller needs the object for. The first returned
  170.      *            {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
  171.      *            is 0, the second is 1, etc.
  172.      * @return the ObjectId for the corresponding entry.
  173.      */
  174.     public abstract ObjectId getObjectId(long nthPosition);

  175.     /**
  176.      * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
  177.      * <p>
  178.      * This method is a constant-time replacement for the following loop:
  179.      *
  180.      * <pre>
  181.      * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
  182.      * int curPosition = 0;
  183.      * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
  184.      *  eItr.next();
  185.      * ObjectId result = eItr.next().toObjectId();
  186.      * </pre>
  187.      *
  188.      * @param nthPosition
  189.      *            unsigned 32 bit position within the traversal of
  190.      *            {@link #iterator()} that the caller needs the object for. The
  191.      *            first returned
  192.      *            {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
  193.      *            is 0, the second is 1, etc. Positions past 2**31-1 are
  194.      *            negative, but still valid.
  195.      * @return the ObjectId for the corresponding entry.
  196.      */
  197.     public final ObjectId getObjectId(int nthPosition) {
  198.         if (nthPosition >= 0)
  199.             return getObjectId((long) nthPosition);
  200.         final int u31 = nthPosition >>> 1;
  201.         final int one = nthPosition & 1;
  202.         return getObjectId(((long) u31) << 1 | one);
  203.     }

  204.     /**
  205.      * Get offset in a pack for the n-th object entry returned by
  206.      * {@link #iterator()}.
  207.      *
  208.      * @param nthPosition
  209.      *            unsigned 32 bit position within the traversal of
  210.      *            {@link #iterator()} for which the caller needs the offset. The
  211.      *            first returned {@link MutableEntry} is 0, the second is 1,
  212.      *            etc. Positions past 2**31-1 are negative, but still valid.
  213.      * @return the offset in a pack for the corresponding entry.
  214.      */
  215.     abstract long getOffset(long nthPosition);

  216.     /**
  217.      * Locate the file offset position for the requested object.
  218.      *
  219.      * @param objId
  220.      *            name of the object to locate within the pack.
  221.      * @return offset of the object's header and compressed content; -1 if the
  222.      *         object does not exist in this index and is thus not stored in the
  223.      *         associated pack.
  224.      */
  225.     public abstract long findOffset(AnyObjectId objId);

  226.     /**
  227.      * Retrieve stored CRC32 checksum of the requested object raw-data
  228.      * (including header).
  229.      *
  230.      * @param objId
  231.      *            id of object to look for
  232.      * @return CRC32 checksum of specified object (at 32 less significant bits)
  233.      * @throws org.eclipse.jgit.errors.MissingObjectException
  234.      *             when requested ObjectId was not found in this index
  235.      * @throws java.lang.UnsupportedOperationException
  236.      *             when this index doesn't support CRC32 checksum
  237.      */
  238.     public abstract long findCRC32(AnyObjectId objId)
  239.             throws MissingObjectException, UnsupportedOperationException;

  240.     /**
  241.      * Check whether this index supports (has) CRC32 checksums for objects.
  242.      *
  243.      * @return true if CRC32 is stored, false otherwise
  244.      */
  245.     public abstract boolean hasCRC32Support();

  246.     /**
  247.      * Find objects matching the prefix abbreviation.
  248.      *
  249.      * @param matches
  250.      *            set to add any located ObjectIds to. This is an output
  251.      *            parameter.
  252.      * @param id
  253.      *            prefix to search for.
  254.      * @param matchLimit
  255.      *            maximum number of results to return. At most this many
  256.      *            ObjectIds should be added to matches before returning.
  257.      * @throws java.io.IOException
  258.      *             the index cannot be read.
  259.      */
  260.     public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
  261.             int matchLimit) throws IOException;

  262.     /**
  263.      * @return the checksum of the pack; caller must not modify it
  264.      * @since 5.5
  265.      */
  266.     public byte[] getChecksum() {
  267.         return packChecksum;
  268.     }

  269.     /**
  270.      * Represent mutable entry of pack index consisting of object id and offset
  271.      * in pack (both mutable).
  272.      *
  273.      */
  274.     public static class MutableEntry {
  275.         final MutableObjectId idBuffer = new MutableObjectId();

  276.         long offset;

  277.         /**
  278.          * Returns offset for this index object entry
  279.          *
  280.          * @return offset of this object in a pack file
  281.          */
  282.         public long getOffset() {
  283.             return offset;
  284.         }

  285.         /** @return hex string describing the object id of this entry. */
  286.         public String name() {
  287.             ensureId();
  288.             return idBuffer.name();
  289.         }

  290.         /** @return a copy of the object id. */
  291.         public ObjectId toObjectId() {
  292.             ensureId();
  293.             return idBuffer.toObjectId();
  294.         }

  295.         /** @return a complete copy of this entry, that won't modify */
  296.         public MutableEntry cloneEntry() {
  297.             final MutableEntry r = new MutableEntry();
  298.             ensureId();
  299.             r.idBuffer.fromObjectId(idBuffer);
  300.             r.offset = offset;
  301.             return r;
  302.         }

  303.         void ensureId() {
  304.             // Override in implementations.
  305.         }
  306.     }

  307.     abstract class EntriesIterator implements Iterator<MutableEntry> {
  308.         protected final MutableEntry entry = initEntry();

  309.         protected long returnedNumber = 0;

  310.         protected abstract MutableEntry initEntry();

  311.         @Override
  312.         public boolean hasNext() {
  313.             return returnedNumber < getObjectCount();
  314.         }

  315.         /**
  316.          * Implementation must update {@link #returnedNumber} before returning
  317.          * element.
  318.          */
  319.         @Override
  320.         public abstract MutableEntry next();

  321.         @Override
  322.         public void remove() {
  323.             throw new UnsupportedOperationException();
  324.         }
  325.     }
  326. }