PackIndexWriter.java

  1. /*
  2.  * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
  3.  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
  4.  *
  5.  * This program and the accompanying materials are made available under the
  6.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  7.  * https://www.eclipse.org/org/documents/edl-v10.php.
  8.  *
  9.  * SPDX-License-Identifier: BSD-3-Clause
  10.  */

  11. package org.eclipse.jgit.internal.storage.file;

  12. import java.io.BufferedOutputStream;
  13. import java.io.IOException;
  14. import java.io.OutputStream;
  15. import java.security.DigestOutputStream;
  16. import java.text.MessageFormat;
  17. import java.util.List;

  18. import org.eclipse.jgit.internal.JGitText;
  19. import org.eclipse.jgit.lib.Constants;
  20. import org.eclipse.jgit.transport.PackedObjectInfo;
  21. import org.eclipse.jgit.util.NB;

  22. /**
  23.  * Creates a table of contents to support random access by
  24.  * {@link org.eclipse.jgit.internal.storage.file.Pack}.
  25.  * <p>
  26.  * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
  27.  * random access to any object in the pack by associating an ObjectId to the
  28.  * byte offset within the pack where the object's data can be read.
  29.  */
  30. public abstract class PackIndexWriter {
  31.     /** Magic constant indicating post-version 1 format. */
  32.     protected static final byte[] TOC = { -1, 't', 'O', 'c' };

  33.     /**
  34.      * Create a new writer for the oldest (most widely understood) format.
  35.      * <p>
  36.      * This method selects an index format that can accurate describe the
  37.      * supplied objects and that will be the most compatible format with older
  38.      * Git implementations.
  39.      * <p>
  40.      * Index version 1 is widely recognized by all Git implementations, but
  41.      * index version 2 (and later) is not as well recognized as it was
  42.      * introduced more than a year later. Index version 1 can only be used if
  43.      * the resulting pack file is under 4 gigabytes in size; packs larger than
  44.      * that limit must use index version 2.
  45.      *
  46.      * @param dst
  47.      *            the stream the index data will be written to. If not already
  48.      *            buffered it will be automatically wrapped in a buffered
  49.      *            stream. Callers are always responsible for closing the stream.
  50.      * @param objs
  51.      *            the objects the caller needs to store in the index. Entries
  52.      *            will be examined until a format can be conclusively selected.
  53.      * @return a new writer to output an index file of the requested format to
  54.      *         the supplied stream.
  55.      * @throws java.lang.IllegalArgumentException
  56.      *             no recognized pack index version can support the supplied
  57.      *             objects. This is likely a bug in the implementation.
  58.      * @see #oldestPossibleFormat(List)
  59.      */
  60.     public static PackIndexWriter createOldestPossible(final OutputStream dst,
  61.             final List<? extends PackedObjectInfo> objs) {
  62.         return createVersion(dst, oldestPossibleFormat(objs));
  63.     }

  64.     /**
  65.      * Return the oldest (most widely understood) index format.
  66.      * <p>
  67.      * This method selects an index format that can accurate describe the
  68.      * supplied objects and that will be the most compatible format with older
  69.      * Git implementations.
  70.      * <p>
  71.      * Index version 1 is widely recognized by all Git implementations, but
  72.      * index version 2 (and later) is not as well recognized as it was
  73.      * introduced more than a year later. Index version 1 can only be used if
  74.      * the resulting pack file is under 4 gigabytes in size; packs larger than
  75.      * that limit must use index version 2.
  76.      *
  77.      * @param objs
  78.      *            the objects the caller needs to store in the index. Entries
  79.      *            will be examined until a format can be conclusively selected.
  80.      * @return the index format.
  81.      * @throws java.lang.IllegalArgumentException
  82.      *             no recognized pack index version can support the supplied
  83.      *             objects. This is likely a bug in the implementation.
  84.      */
  85.     public static int oldestPossibleFormat(
  86.             final List<? extends PackedObjectInfo> objs) {
  87.         for (PackedObjectInfo oe : objs) {
  88.             if (!PackIndexWriterV1.canStore(oe))
  89.                 return 2;
  90.         }
  91.         return 1;
  92.     }


  93.     /**
  94.      * Create a new writer instance for a specific index format version.
  95.      *
  96.      * @param dst
  97.      *            the stream the index data will be written to. If not already
  98.      *            buffered it will be automatically wrapped in a buffered
  99.      *            stream. Callers are always responsible for closing the stream.
  100.      * @param version
  101.      *            index format version number required by the caller. Exactly
  102.      *            this formatted version will be written.
  103.      * @return a new writer to output an index file of the requested format to
  104.      *         the supplied stream.
  105.      * @throws java.lang.IllegalArgumentException
  106.      *             the version requested is not supported by this
  107.      *             implementation.
  108.      */
  109.     public static PackIndexWriter createVersion(final OutputStream dst,
  110.             final int version) {
  111.         switch (version) {
  112.         case 1:
  113.             return new PackIndexWriterV1(dst);
  114.         case 2:
  115.             return new PackIndexWriterV2(dst);
  116.         default:
  117.             throw new IllegalArgumentException(MessageFormat.format(
  118.                     JGitText.get().unsupportedPackIndexVersion,
  119.                     Integer.valueOf(version)));
  120.         }
  121.     }

  122.     /** The index data stream we are responsible for creating. */
  123.     protected final DigestOutputStream out;

  124.     /** A temporary buffer for use during IO to {link #out}. */
  125.     protected final byte[] tmp;

  126.     /** The entries this writer must pack. */
  127.     protected List<? extends PackedObjectInfo> entries;

  128.     /** SHA-1 checksum for the entire pack data. */
  129.     protected byte[] packChecksum;

  130.     /**
  131.      * Create a new writer instance.
  132.      *
  133.      * @param dst
  134.      *            the stream this instance outputs to. If not already buffered
  135.      *            it will be automatically wrapped in a buffered stream.
  136.      */
  137.     protected PackIndexWriter(OutputStream dst) {
  138.         out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
  139.                 : new BufferedOutputStream(dst),
  140.                 Constants.newMessageDigest());
  141.         tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
  142.     }

  143.     /**
  144.      * Write all object entries to the index stream.
  145.      * <p>
  146.      * After writing the stream passed to the factory is flushed but remains
  147.      * open. Callers are always responsible for closing the output stream.
  148.      *
  149.      * @param toStore
  150.      *            sorted list of objects to store in the index. The caller must
  151.      *            have previously sorted the list using
  152.      *            {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
  153.      *            {@link java.lang.Comparable} implementation.
  154.      * @param packDataChecksum
  155.      *            checksum signature of the entire pack data content. This is
  156.      *            traditionally the last 20 bytes of the pack file's own stream.
  157.      * @throws java.io.IOException
  158.      *             an error occurred while writing to the output stream, or this
  159.      *             index format cannot store the object data supplied.
  160.      */
  161.     public void write(final List<? extends PackedObjectInfo> toStore,
  162.             final byte[] packDataChecksum) throws IOException {
  163.         entries = toStore;
  164.         packChecksum = packDataChecksum;
  165.         writeImpl();
  166.         out.flush();
  167.     }

  168.     /**
  169.      * Writes the index file to {@link #out}.
  170.      * <p>
  171.      * Implementations should go something like:
  172.      *
  173.      * <pre>
  174.      * writeFanOutTable();
  175.      * for (final PackedObjectInfo po : entries)
  176.      *  writeOneEntry(po);
  177.      * writeChecksumFooter();
  178.      * </pre>
  179.      *
  180.      * <p>
  181.      * Where the logic for <code>writeOneEntry</code> is specific to the index
  182.      * format in use. Additional headers/footers may be used if necessary and
  183.      * the {@link #entries} collection may be iterated over more than once if
  184.      * necessary. Implementors therefore have complete control over the data.
  185.      *
  186.      * @throws java.io.IOException
  187.      *             an error occurred while writing to the output stream, or this
  188.      *             index format cannot store the object data supplied.
  189.      */
  190.     protected abstract void writeImpl() throws IOException;

  191.     /**
  192.      * Output the version 2 (and later) TOC header, with version number.
  193.      * <p>
  194.      * Post version 1 all index files start with a TOC header that makes the
  195.      * file an invalid version 1 file, and then includes the version number.
  196.      * This header is necessary to recognize a version 1 from a version 2
  197.      * formatted index.
  198.      *
  199.      * @param version
  200.      *            version number of this index format being written.
  201.      * @throws java.io.IOException
  202.      *             an error occurred while writing to the output stream.
  203.      */
  204.     protected void writeTOC(int version) throws IOException {
  205.         out.write(TOC);
  206.         NB.encodeInt32(tmp, 0, version);
  207.         out.write(tmp, 0, 4);
  208.     }

  209.     /**
  210.      * Output the standard 256 entry first-level fan-out table.
  211.      * <p>
  212.      * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
  213.      * counts. Each count represents the number of objects within this index
  214.      * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
  215.      * count's position in the fan-out table.
  216.      *
  217.      * @throws java.io.IOException
  218.      *             an error occurred while writing to the output stream.
  219.      */
  220.     protected void writeFanOutTable() throws IOException {
  221.         final int[] fanout = new int[256];
  222.         for (PackedObjectInfo po : entries)
  223.             fanout[po.getFirstByte() & 0xff]++;
  224.         for (int i = 1; i < 256; i++)
  225.             fanout[i] += fanout[i - 1];
  226.         for (int n : fanout) {
  227.             NB.encodeInt32(tmp, 0, n);
  228.             out.write(tmp, 0, 4);
  229.         }
  230.     }

  231.     /**
  232.      * Output the standard two-checksum index footer.
  233.      * <p>
  234.      * The standard footer contains two checksums (20 byte SHA-1 values):
  235.      * <ol>
  236.      * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
  237.      * <li>Index data checksum - checksum of all index bytes written, including
  238.      * the pack data checksum above.</li>
  239.      * </ol>
  240.      *
  241.      * @throws java.io.IOException
  242.      *             an error occurred while writing to the output stream.
  243.      */
  244.     protected void writeChecksumFooter() throws IOException {
  245.         out.write(packChecksum);
  246.         out.on(false);
  247.         out.write(out.getMessageDigest().digest());
  248.     }
  249. }