PackIndexWriter.java
- /*
- * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
- * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Distribution License v. 1.0 which is available at
- * https://www.eclipse.org/org/documents/edl-v10.php.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
- package org.eclipse.jgit.internal.storage.file;
- import java.io.BufferedOutputStream;
- import java.io.IOException;
- import java.io.OutputStream;
- import java.security.DigestOutputStream;
- import java.text.MessageFormat;
- import java.util.List;
- import org.eclipse.jgit.internal.JGitText;
- import org.eclipse.jgit.lib.Constants;
- import org.eclipse.jgit.transport.PackedObjectInfo;
- import org.eclipse.jgit.util.NB;
- /**
- * Creates a table of contents to support random access by
- * {@link org.eclipse.jgit.internal.storage.file.Pack}.
- * <p>
- * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
- * random access to any object in the pack by associating an ObjectId to the
- * byte offset within the pack where the object's data can be read.
- */
- public abstract class PackIndexWriter {
- /** Magic constant indicating post-version 1 format. */
- protected static final byte[] TOC = { -1, 't', 'O', 'c' };
- /**
- * Create a new writer for the oldest (most widely understood) format.
- * <p>
- * This method selects an index format that can accurate describe the
- * supplied objects and that will be the most compatible format with older
- * Git implementations.
- * <p>
- * Index version 1 is widely recognized by all Git implementations, but
- * index version 2 (and later) is not as well recognized as it was
- * introduced more than a year later. Index version 1 can only be used if
- * the resulting pack file is under 4 gigabytes in size; packs larger than
- * that limit must use index version 2.
- *
- * @param dst
- * the stream the index data will be written to. If not already
- * buffered it will be automatically wrapped in a buffered
- * stream. Callers are always responsible for closing the stream.
- * @param objs
- * the objects the caller needs to store in the index. Entries
- * will be examined until a format can be conclusively selected.
- * @return a new writer to output an index file of the requested format to
- * the supplied stream.
- * @throws java.lang.IllegalArgumentException
- * no recognized pack index version can support the supplied
- * objects. This is likely a bug in the implementation.
- * @see #oldestPossibleFormat(List)
- */
- public static PackIndexWriter createOldestPossible(final OutputStream dst,
- final List<? extends PackedObjectInfo> objs) {
- return createVersion(dst, oldestPossibleFormat(objs));
- }
- /**
- * Return the oldest (most widely understood) index format.
- * <p>
- * This method selects an index format that can accurate describe the
- * supplied objects and that will be the most compatible format with older
- * Git implementations.
- * <p>
- * Index version 1 is widely recognized by all Git implementations, but
- * index version 2 (and later) is not as well recognized as it was
- * introduced more than a year later. Index version 1 can only be used if
- * the resulting pack file is under 4 gigabytes in size; packs larger than
- * that limit must use index version 2.
- *
- * @param objs
- * the objects the caller needs to store in the index. Entries
- * will be examined until a format can be conclusively selected.
- * @return the index format.
- * @throws java.lang.IllegalArgumentException
- * no recognized pack index version can support the supplied
- * objects. This is likely a bug in the implementation.
- */
- public static int oldestPossibleFormat(
- final List<? extends PackedObjectInfo> objs) {
- for (PackedObjectInfo oe : objs) {
- if (!PackIndexWriterV1.canStore(oe))
- return 2;
- }
- return 1;
- }
- /**
- * Create a new writer instance for a specific index format version.
- *
- * @param dst
- * the stream the index data will be written to. If not already
- * buffered it will be automatically wrapped in a buffered
- * stream. Callers are always responsible for closing the stream.
- * @param version
- * index format version number required by the caller. Exactly
- * this formatted version will be written.
- * @return a new writer to output an index file of the requested format to
- * the supplied stream.
- * @throws java.lang.IllegalArgumentException
- * the version requested is not supported by this
- * implementation.
- */
- public static PackIndexWriter createVersion(final OutputStream dst,
- final int version) {
- switch (version) {
- case 1:
- return new PackIndexWriterV1(dst);
- case 2:
- return new PackIndexWriterV2(dst);
- default:
- throw new IllegalArgumentException(MessageFormat.format(
- JGitText.get().unsupportedPackIndexVersion,
- Integer.valueOf(version)));
- }
- }
- /** The index data stream we are responsible for creating. */
- protected final DigestOutputStream out;
- /** A temporary buffer for use during IO to {link #out}. */
- protected final byte[] tmp;
- /** The entries this writer must pack. */
- protected List<? extends PackedObjectInfo> entries;
- /** SHA-1 checksum for the entire pack data. */
- protected byte[] packChecksum;
- /**
- * Create a new writer instance.
- *
- * @param dst
- * the stream this instance outputs to. If not already buffered
- * it will be automatically wrapped in a buffered stream.
- */
- protected PackIndexWriter(OutputStream dst) {
- out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
- : new BufferedOutputStream(dst),
- Constants.newMessageDigest());
- tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
- }
- /**
- * Write all object entries to the index stream.
- * <p>
- * After writing the stream passed to the factory is flushed but remains
- * open. Callers are always responsible for closing the output stream.
- *
- * @param toStore
- * sorted list of objects to store in the index. The caller must
- * have previously sorted the list using
- * {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
- * {@link java.lang.Comparable} implementation.
- * @param packDataChecksum
- * checksum signature of the entire pack data content. This is
- * traditionally the last 20 bytes of the pack file's own stream.
- * @throws java.io.IOException
- * an error occurred while writing to the output stream, or this
- * index format cannot store the object data supplied.
- */
- public void write(final List<? extends PackedObjectInfo> toStore,
- final byte[] packDataChecksum) throws IOException {
- entries = toStore;
- packChecksum = packDataChecksum;
- writeImpl();
- out.flush();
- }
- /**
- * Writes the index file to {@link #out}.
- * <p>
- * Implementations should go something like:
- *
- * <pre>
- * writeFanOutTable();
- * for (final PackedObjectInfo po : entries)
- * writeOneEntry(po);
- * writeChecksumFooter();
- * </pre>
- *
- * <p>
- * Where the logic for <code>writeOneEntry</code> is specific to the index
- * format in use. Additional headers/footers may be used if necessary and
- * the {@link #entries} collection may be iterated over more than once if
- * necessary. Implementors therefore have complete control over the data.
- *
- * @throws java.io.IOException
- * an error occurred while writing to the output stream, or this
- * index format cannot store the object data supplied.
- */
- protected abstract void writeImpl() throws IOException;
- /**
- * Output the version 2 (and later) TOC header, with version number.
- * <p>
- * Post version 1 all index files start with a TOC header that makes the
- * file an invalid version 1 file, and then includes the version number.
- * This header is necessary to recognize a version 1 from a version 2
- * formatted index.
- *
- * @param version
- * version number of this index format being written.
- * @throws java.io.IOException
- * an error occurred while writing to the output stream.
- */
- protected void writeTOC(int version) throws IOException {
- out.write(TOC);
- NB.encodeInt32(tmp, 0, version);
- out.write(tmp, 0, 4);
- }
- /**
- * Output the standard 256 entry first-level fan-out table.
- * <p>
- * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
- * counts. Each count represents the number of objects within this index
- * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
- * count's position in the fan-out table.
- *
- * @throws java.io.IOException
- * an error occurred while writing to the output stream.
- */
- protected void writeFanOutTable() throws IOException {
- final int[] fanout = new int[256];
- for (PackedObjectInfo po : entries)
- fanout[po.getFirstByte() & 0xff]++;
- for (int i = 1; i < 256; i++)
- fanout[i] += fanout[i - 1];
- for (int n : fanout) {
- NB.encodeInt32(tmp, 0, n);
- out.write(tmp, 0, 4);
- }
- }
- /**
- * Output the standard two-checksum index footer.
- * <p>
- * The standard footer contains two checksums (20 byte SHA-1 values):
- * <ol>
- * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
- * <li>Index data checksum - checksum of all index bytes written, including
- * the pack data checksum above.</li>
- * </ol>
- *
- * @throws java.io.IOException
- * an error occurred while writing to the output stream.
- */
- protected void writeChecksumFooter() throws IOException {
- out.write(packChecksum);
- out.on(false);
- out.write(out.getMessageDigest().digest());
- }
- }