ObjectLoader.java

  1. /*
  2.  * Copyright (C) 2008-2009, Google Inc.
  3.  * Copyright (C) 2008, Jonas Fonseca <fonseca@diku.dk>
  4.  * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  5.  * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
  6.  * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org> and others
  7.  *
  8.  * This program and the accompanying materials are made available under the
  9.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  10.  * https://www.eclipse.org/org/documents/edl-v10.php.
  11.  *
  12.  * SPDX-License-Identifier: BSD-3-Clause
  13.  */

  14. package org.eclipse.jgit.lib;

  15. import java.io.EOFException;
  16. import java.io.IOException;
  17. import java.io.OutputStream;

  18. import org.eclipse.jgit.errors.LargeObjectException;
  19. import org.eclipse.jgit.errors.MissingObjectException;
  20. import org.eclipse.jgit.util.IO;

  21. /**
  22.  * Base class for a set of loaders for different representations of Git objects.
  23.  * New loaders are constructed for every object.
  24.  */
  25. public abstract class ObjectLoader {
  26.     /**
  27.      * Get Git in pack object type
  28.      *
  29.      * @return Git in pack object type, see
  30.      *         {@link org.eclipse.jgit.lib.Constants}.
  31.      */
  32.     public abstract int getType();

  33.     /**
  34.      * Get size of object in bytes
  35.      *
  36.      * @return size of object in bytes
  37.      */
  38.     public abstract long getSize();

  39.     /**
  40.      * Whether this object is too large to obtain as a byte array.
  41.      *
  42.      * @return true if this object is too large to obtain as a byte array.
  43.      *         Objects over a certain threshold should be accessed only by their
  44.      *         {@link #openStream()} to prevent overflowing the JVM heap.
  45.      */
  46.     public boolean isLarge() {
  47.         try {
  48.             getCachedBytes();
  49.             return false;
  50.         } catch (LargeObjectException tooBig) {
  51.             return true;
  52.         }
  53.     }

  54.     /**
  55.      * Obtain a copy of the bytes of this object.
  56.      * <p>
  57.      * Unlike {@link #getCachedBytes()} this method returns an array that might
  58.      * be modified by the caller.
  59.      *
  60.      * @return the bytes of this object.
  61.      * @throws org.eclipse.jgit.errors.LargeObjectException
  62.      *             if the object won't fit into a byte array, because
  63.      *             {@link #isLarge()} returns true. Callers should use
  64.      *             {@link #openStream()} instead to access the contents.
  65.      */
  66.     public final byte[] getBytes() throws LargeObjectException {
  67.         return cloneArray(getCachedBytes());
  68.     }

  69.     /**
  70.      * Obtain a copy of the bytes of this object.
  71.      *
  72.      * If the object size is less than or equal to {@code sizeLimit} this method
  73.      * will provide it as a byte array, even if {@link #isLarge()} is true. This
  74.      * utility is useful for application code that absolutely must have the
  75.      * object as a single contiguous byte array in memory.
  76.      *
  77.      * Unlike {@link #getCachedBytes(int)} this method returns an array that
  78.      * might be modified by the caller.
  79.      *
  80.      * @param sizeLimit
  81.      *            maximum number of bytes to return. If the object is larger
  82.      *            than this limit,
  83.      *            {@link org.eclipse.jgit.errors.LargeObjectException} will be
  84.      *            thrown.
  85.      * @return the bytes of this object.
  86.      * @throws org.eclipse.jgit.errors.LargeObjectException
  87.      *             if the object is bigger than {@code sizeLimit}, or if
  88.      *             {@link java.lang.OutOfMemoryError} occurs during allocation
  89.      *             of the result array. Callers should use {@link #openStream()}
  90.      *             instead to access the contents.
  91.      * @throws org.eclipse.jgit.errors.MissingObjectException
  92.      *             the object is large, and it no longer exists.
  93.      * @throws java.io.IOException
  94.      *             the object store cannot be accessed.
  95.      */
  96.     public final byte[] getBytes(int sizeLimit) throws LargeObjectException,
  97.             MissingObjectException, IOException {
  98.         byte[] cached = getCachedBytes(sizeLimit);
  99.         try {
  100.             return cloneArray(cached);
  101.         } catch (OutOfMemoryError tooBig) {
  102.             throw new LargeObjectException.OutOfMemory(tooBig);
  103.         }
  104.     }

  105.     /**
  106.      * Obtain a reference to the (possibly cached) bytes of this object.
  107.      * <p>
  108.      * This method offers direct access to the internal caches, potentially
  109.      * saving on data copies between the internal cache and higher level code.
  110.      * Callers who receive this reference <b>must not</b> modify its contents.
  111.      * Changes (if made) will affect the cache but not the repository itself.
  112.      *
  113.      * @return the cached bytes of this object. Do not modify it.
  114.      * @throws org.eclipse.jgit.errors.LargeObjectException
  115.      *             if the object won't fit into a byte array, because
  116.      *             {@link #isLarge()} returns true. Callers should use
  117.      *             {@link #openStream()} instead to access the contents.
  118.      */
  119.     public abstract byte[] getCachedBytes() throws LargeObjectException;

  120.     /**
  121.      * Obtain a reference to the (possibly cached) bytes of this object.
  122.      *
  123.      * If the object size is less than or equal to {@code sizeLimit} this method
  124.      * will provide it as a byte array, even if {@link #isLarge()} is true. This
  125.      * utility is useful for application code that absolutely must have the
  126.      * object as a single contiguous byte array in memory.
  127.      *
  128.      * This method offers direct access to the internal caches, potentially
  129.      * saving on data copies between the internal cache and higher level code.
  130.      * Callers who receive this reference <b>must not</b> modify its contents.
  131.      * Changes (if made) will affect the cache but not the repository itself.
  132.      *
  133.      * @param sizeLimit
  134.      *            maximum number of bytes to return. If the object size is
  135.      *            larger than this limit and {@link #isLarge()} is true,
  136.      *            {@link org.eclipse.jgit.errors.LargeObjectException} will be
  137.      *            thrown.
  138.      * @return the cached bytes of this object. Do not modify it.
  139.      * @throws org.eclipse.jgit.errors.LargeObjectException
  140.      *             if the object is bigger than {@code sizeLimit}, or if
  141.      *             {@link java.lang.OutOfMemoryError} occurs during allocation
  142.      *             of the result array. Callers should use {@link #openStream()}
  143.      *             instead to access the contents.
  144.      * @throws org.eclipse.jgit.errors.MissingObjectException
  145.      *             the object is large, and it no longer exists.
  146.      * @throws java.io.IOException
  147.      *             the object store cannot be accessed.
  148.      */
  149.     public byte[] getCachedBytes(int sizeLimit) throws LargeObjectException,
  150.             MissingObjectException, IOException {
  151.         if (!isLarge())
  152.             return getCachedBytes();

  153.         try (ObjectStream in = openStream()) {
  154.             long sz = in.getSize();
  155.             if (sizeLimit < sz)
  156.                 throw new LargeObjectException.ExceedsLimit(sizeLimit, sz);

  157.             if (Integer.MAX_VALUE < sz)
  158.                 throw new LargeObjectException.ExceedsByteArrayLimit();

  159.             byte[] buf;
  160.             try {
  161.                 buf = new byte[(int) sz];
  162.             } catch (OutOfMemoryError notEnoughHeap) {
  163.                 throw new LargeObjectException.OutOfMemory(notEnoughHeap);
  164.             }

  165.             IO.readFully(in, buf, 0, buf.length);
  166.             return buf;
  167.         }
  168.     }

  169.     /**
  170.      * Obtain an input stream to read this object's data.
  171.      *
  172.      * @return a stream of this object's data. Caller must close the stream when
  173.      *         through with it. The returned stream is buffered with a
  174.      *         reasonable buffer size.
  175.      * @throws org.eclipse.jgit.errors.MissingObjectException
  176.      *             the object no longer exists.
  177.      * @throws java.io.IOException
  178.      *             the object store cannot be accessed.
  179.      */
  180.     public abstract ObjectStream openStream() throws MissingObjectException,
  181.             IOException;

  182.     /**
  183.      * Copy this object to the output stream.
  184.      * <p>
  185.      * For some object store implementations, this method may be more efficient
  186.      * than reading from {@link #openStream()} into a temporary byte array, then
  187.      * writing to the destination stream.
  188.      * <p>
  189.      * The default implementation of this method is to copy with a temporary
  190.      * byte array for large objects, or to pass through the cached byte array
  191.      * for small objects.
  192.      *
  193.      * @param out
  194.      *            stream to receive the complete copy of this object's data.
  195.      *            Caller is responsible for flushing or closing this stream
  196.      *            after this method returns.
  197.      * @throws org.eclipse.jgit.errors.MissingObjectException
  198.      *             the object no longer exists.
  199.      * @throws java.io.IOException
  200.      *             the object store cannot be accessed, or the stream cannot be
  201.      *             written to.
  202.      */
  203.     public void copyTo(OutputStream out) throws MissingObjectException,
  204.             IOException {
  205.         if (isLarge()) {
  206.             try (ObjectStream in = openStream()) {
  207.                 final long sz = in.getSize();
  208.                 byte[] tmp = new byte[8192];
  209.                 long copied = 0;
  210.                 while (copied < sz) {
  211.                     int n = in.read(tmp);
  212.                     if (n < 0)
  213.                         throw new EOFException();
  214.                     out.write(tmp, 0, n);
  215.                     copied += n;
  216.                 }
  217.                 if (0 <= in.read())
  218.                     throw new EOFException();
  219.             }
  220.         } else {
  221.             out.write(getCachedBytes());
  222.         }
  223.     }

  224.     private static byte[] cloneArray(byte[] data) {
  225.         final byte[] copy = new byte[data.length];
  226.         System.arraycopy(data, 0, copy, 0, data.length);
  227.         return copy;
  228.     }

  229.     /**
  230.      * Simple loader around the cached byte array.
  231.      * <p>
  232.      * ObjectReader implementations can use this stream type when the object's
  233.      * content is small enough to be accessed as a single byte array.
  234.      */
  235.     public static class SmallObject extends ObjectLoader {
  236.         private final int type;

  237.         private final byte[] data;

  238.         /**
  239.          * Construct a small object loader.
  240.          *
  241.          * @param type
  242.          *            type of the object.
  243.          * @param data
  244.          *            the object's data array. This array will be returned as-is
  245.          *            for the {@link #getCachedBytes()} method.
  246.          */
  247.         public SmallObject(int type, byte[] data) {
  248.             this.type = type;
  249.             this.data = data;
  250.         }

  251.         @Override
  252.         public int getType() {
  253.             return type;
  254.         }

  255.         @Override
  256.         public long getSize() {
  257.             return getCachedBytes().length;
  258.         }

  259.         @Override
  260.         public boolean isLarge() {
  261.             return false;
  262.         }

  263.         @Override
  264.         public byte[] getCachedBytes() {
  265.             return data;
  266.         }

  267.         @Override
  268.         public ObjectStream openStream() {
  269.             return new ObjectStream.SmallStream(this);
  270.         }
  271.     }

  272.     /**
  273.      * Wraps a delegate ObjectLoader.
  274.      *
  275.      * @since 4.10
  276.      */
  277.     public abstract static class Filter extends ObjectLoader {
  278.         /**
  279.          * @return delegate ObjectLoader to handle all processing.
  280.          * @since 4.10
  281.          */
  282.         protected abstract ObjectLoader delegate();

  283.         @Override
  284.         public int getType() {
  285.             return delegate().getType();
  286.         }

  287.         @Override
  288.         public long getSize() {
  289.             return delegate().getSize();
  290.         }

  291.         @Override
  292.         public boolean isLarge() {
  293.             return delegate().isLarge();
  294.         }

  295.         @Override
  296.         public byte[] getCachedBytes() {
  297.             return delegate().getCachedBytes();
  298.         }

  299.         @Override
  300.         public ObjectStream openStream() throws IOException {
  301.             return delegate().openStream();
  302.         }
  303.     }
  304. }