ObjectReader.java

  1. /*
  2.  * Copyright (C) 2010, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.lib;

  11. import static org.eclipse.jgit.lib.Constants.OBJECT_ID_ABBREV_STRING_LENGTH;

  12. import java.io.IOException;
  13. import java.util.ArrayList;
  14. import java.util.Collection;
  15. import java.util.Iterator;
  16. import java.util.List;
  17. import java.util.Set;

  18. import org.eclipse.jgit.annotations.NonNull;
  19. import org.eclipse.jgit.annotations.Nullable;
  20. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  21. import org.eclipse.jgit.errors.MissingObjectException;
  22. import org.eclipse.jgit.internal.revwalk.BitmappedObjectReachabilityChecker;
  23. import org.eclipse.jgit.internal.revwalk.BitmappedReachabilityChecker;
  24. import org.eclipse.jgit.internal.revwalk.PedestrianObjectReachabilityChecker;
  25. import org.eclipse.jgit.internal.revwalk.PedestrianReachabilityChecker;
  26. import org.eclipse.jgit.revwalk.ObjectReachabilityChecker;
  27. import org.eclipse.jgit.revwalk.ObjectWalk;
  28. import org.eclipse.jgit.revwalk.ReachabilityChecker;
  29. import org.eclipse.jgit.revwalk.RevWalk;

  30. /**
  31.  * Reads an {@link org.eclipse.jgit.lib.ObjectDatabase} for a single thread.
  32.  * <p>
  33.  * Readers that can support efficient reuse of pack encoded objects should also
  34.  * implement the companion interface
  35.  * {@link org.eclipse.jgit.internal.storage.pack.ObjectReuseAsIs}.
  36.  */
  37. public abstract class ObjectReader implements AutoCloseable {
  38.     /** Type hint indicating the caller doesn't know the type. */
  39.     public static final int OBJ_ANY = -1;

  40.     /**
  41.      * The threshold at which a file will be streamed rather than loaded
  42.      * entirely into memory.
  43.      * @since 4.6
  44.      */
  45.     protected int streamFileThreshold;

  46.     /**
  47.      * Construct a new reader from the same data.
  48.      * <p>
  49.      * Applications can use this method to build a new reader from the same data
  50.      * source, but for an different thread.
  51.      *
  52.      * @return a brand new reader, using the same data source.
  53.      */
  54.     public abstract ObjectReader newReader();

  55.     /**
  56.      * Obtain a unique abbreviation (prefix) of an object SHA-1.
  57.      *
  58.      * This method uses a reasonable default for the minimum length. Callers who
  59.      * don't care about the minimum length should prefer this method.
  60.      *
  61.      * The returned abbreviation would expand back to the argument ObjectId when
  62.      * passed to {@link #resolve(AbbreviatedObjectId)}, assuming no new objects
  63.      * are added to this repository between calls.
  64.      *
  65.      * @param objectId
  66.      *            object identity that needs to be abbreviated.
  67.      * @return SHA-1 abbreviation.
  68.      * @throws java.io.IOException
  69.      *             the object store cannot be read.
  70.      */
  71.     public AbbreviatedObjectId abbreviate(AnyObjectId objectId)
  72.             throws IOException {
  73.         return abbreviate(objectId, OBJECT_ID_ABBREV_STRING_LENGTH);
  74.     }

  75.     /**
  76.      * Obtain a unique abbreviation (prefix) of an object SHA-1.
  77.      *
  78.      * The returned abbreviation would expand back to the argument ObjectId when
  79.      * passed to {@link #resolve(AbbreviatedObjectId)}, assuming no new objects
  80.      * are added to this repository between calls.
  81.      *
  82.      * The default implementation of this method abbreviates the id to the
  83.      * minimum length, then resolves it to see if there are multiple results.
  84.      * When multiple results are found, the length is extended by 1 and resolve
  85.      * is tried again.
  86.      *
  87.      * @param objectId
  88.      *            object identity that needs to be abbreviated.
  89.      * @param len
  90.      *            minimum length of the abbreviated string. Must be in the range
  91.      *            [2, {@value Constants#OBJECT_ID_STRING_LENGTH}].
  92.      * @return SHA-1 abbreviation. If no matching objects exist in the
  93.      *         repository, the abbreviation will match the minimum length.
  94.      * @throws java.io.IOException
  95.      *             the object store cannot be read.
  96.      */
  97.     public AbbreviatedObjectId abbreviate(AnyObjectId objectId, int len)
  98.             throws IOException {
  99.         if (len == Constants.OBJECT_ID_STRING_LENGTH)
  100.             return AbbreviatedObjectId.fromObjectId(objectId);

  101.         AbbreviatedObjectId abbrev = objectId.abbreviate(len);
  102.         Collection<ObjectId> matches = resolve(abbrev);
  103.         while (1 < matches.size() && len < Constants.OBJECT_ID_STRING_LENGTH) {
  104.             abbrev = objectId.abbreviate(++len);
  105.             List<ObjectId> n = new ArrayList<>(8);
  106.             for (ObjectId candidate : matches) {
  107.                 if (abbrev.prefixCompare(candidate) == 0)
  108.                     n.add(candidate);
  109.             }
  110.             if (1 < n.size())
  111.                 matches = n;
  112.             else
  113.                 matches = resolve(abbrev);
  114.         }
  115.         return abbrev;
  116.     }

  117.     /**
  118.      * Resolve an abbreviated ObjectId to its full form.
  119.      *
  120.      * This method searches for an ObjectId that begins with the abbreviation,
  121.      * and returns at least some matching candidates.
  122.      *
  123.      * If the returned collection is empty, no objects start with this
  124.      * abbreviation. The abbreviation doesn't belong to this repository, or the
  125.      * repository lacks the necessary objects to complete it.
  126.      *
  127.      * If the collection contains exactly one member, the abbreviation is
  128.      * (currently) unique within this database. There is a reasonably high
  129.      * probability that the returned id is what was previously abbreviated.
  130.      *
  131.      * If the collection contains 2 or more members, the abbreviation is not
  132.      * unique. In this case the implementation is only required to return at
  133.      * least 2 candidates to signal the abbreviation has conflicts. User
  134.      * friendly implementations should return as many candidates as reasonably
  135.      * possible, as the caller may be able to disambiguate further based on
  136.      * context. However since databases can be very large (e.g. 10 million
  137.      * objects) returning 625,000 candidates for the abbreviation "0" is simply
  138.      * unreasonable, so implementors should draw the line at around 256 matches.
  139.      *
  140.      * @param id
  141.      *            abbreviated id to resolve to a complete identity. The
  142.      *            abbreviation must have a length of at least 2.
  143.      * @return candidates that begin with the abbreviated identity.
  144.      * @throws java.io.IOException
  145.      *             the object store cannot be read.
  146.      */
  147.     public abstract Collection<ObjectId> resolve(AbbreviatedObjectId id)
  148.             throws IOException;

  149.     /**
  150.      * Does the requested object exist in this database?
  151.      *
  152.      * @param objectId
  153.      *            identity of the object to test for existence of.
  154.      * @return true if the specified object is stored in this database.
  155.      * @throws java.io.IOException
  156.      *             the object store cannot be accessed.
  157.      */
  158.     public boolean has(AnyObjectId objectId) throws IOException {
  159.         return has(objectId, OBJ_ANY);
  160.     }

  161.     /**
  162.      * Does the requested object exist in this database?
  163.      *
  164.      * @param objectId
  165.      *            identity of the object to test for existence of.
  166.      * @param typeHint
  167.      *            hint about the type of object being requested, e.g.
  168.      *            {@link org.eclipse.jgit.lib.Constants#OBJ_BLOB};
  169.      *            {@link #OBJ_ANY} if the object type is not known, or does not
  170.      *            matter to the caller.
  171.      * @return true if the specified object is stored in this database.
  172.      * @throws IncorrectObjectTypeException
  173.      *             typeHint was not OBJ_ANY, and the object's actual type does
  174.      *             not match typeHint.
  175.      * @throws java.io.IOException
  176.      *             the object store cannot be accessed.
  177.      */
  178.     public boolean has(AnyObjectId objectId, int typeHint) throws IOException {
  179.         try {
  180.             open(objectId, typeHint);
  181.             return true;
  182.         } catch (MissingObjectException notFound) {
  183.             return false;
  184.         }
  185.     }

  186.     /**
  187.      * Open an object from this database.
  188.      *
  189.      * @param objectId
  190.      *            identity of the object to open.
  191.      * @return a {@link org.eclipse.jgit.lib.ObjectLoader} for accessing the
  192.      *         object.
  193.      * @throws org.eclipse.jgit.errors.MissingObjectException
  194.      *             the object does not exist.
  195.      * @throws java.io.IOException
  196.      *             the object store cannot be accessed.
  197.      */
  198.     public ObjectLoader open(AnyObjectId objectId)
  199.             throws MissingObjectException, IOException {
  200.         return open(objectId, OBJ_ANY);
  201.     }

  202.     /**
  203.      * Open an object from this database.
  204.      *
  205.      * @param objectId
  206.      *            identity of the object to open.
  207.      * @param typeHint
  208.      *            hint about the type of object being requested, e.g.
  209.      *            {@link org.eclipse.jgit.lib.Constants#OBJ_BLOB};
  210.      *            {@link #OBJ_ANY} if the object type is not known, or does not
  211.      *            matter to the caller.
  212.      * @return a {@link org.eclipse.jgit.lib.ObjectLoader} for accessing the
  213.      *         object.
  214.      * @throws org.eclipse.jgit.errors.MissingObjectException
  215.      *             the object does not exist.
  216.      * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException
  217.      *             typeHint was not OBJ_ANY, and the object's actual type does
  218.      *             not match typeHint.
  219.      * @throws java.io.IOException
  220.      *             the object store cannot be accessed.
  221.      */
  222.     public abstract ObjectLoader open(AnyObjectId objectId, int typeHint)
  223.             throws MissingObjectException, IncorrectObjectTypeException,
  224.             IOException;

  225.     /**
  226.      * Returns IDs for those commits which should be considered as shallow.
  227.      *
  228.      * @return IDs of shallow commits
  229.      * @throws java.io.IOException
  230.      */
  231.     public abstract Set<ObjectId> getShallowCommits() throws IOException;

  232.     /**
  233.      * Asynchronous object opening.
  234.      *
  235.      * @param objectIds
  236.      *            objects to open from the object store. The supplied collection
  237.      *            must not be modified until the queue has finished.
  238.      * @param reportMissing
  239.      *            if true missing objects are reported by calling failure with a
  240.      *            MissingObjectException. This may be more expensive for the
  241.      *            implementation to guarantee. If false the implementation may
  242.      *            choose to report MissingObjectException, or silently skip over
  243.      *            the object with no warning.
  244.      * @return queue to read the objects from.
  245.      */
  246.     public <T extends ObjectId> AsyncObjectLoaderQueue<T> open(
  247.             Iterable<T> objectIds, final boolean reportMissing) {
  248.         final Iterator<T> idItr = objectIds.iterator();
  249.         return new AsyncObjectLoaderQueue<>() {
  250.             private T cur;

  251.             @Override
  252.             public boolean next() throws MissingObjectException, IOException {
  253.                 if (idItr.hasNext()) {
  254.                     cur = idItr.next();
  255.                     return true;
  256.                 }
  257.                 return false;
  258.             }

  259.             @Override
  260.             public T getCurrent() {
  261.                 return cur;
  262.             }

  263.             @Override
  264.             public ObjectId getObjectId() {
  265.                 return cur;
  266.             }

  267.             @Override
  268.             public ObjectLoader open() throws IOException {
  269.                 return ObjectReader.this.open(cur, OBJ_ANY);
  270.             }

  271.             @Override
  272.             public boolean cancel(boolean mayInterruptIfRunning) {
  273.                 return true;
  274.             }

  275.             @Override
  276.             public void release() {
  277.                 // Since we are sequential by default, we don't
  278.                 // have any state to clean up if we terminate early.
  279.             }
  280.         };
  281.     }

  282.     /**
  283.      * Get only the size of an object.
  284.      * <p>
  285.      * The default implementation of this method opens an ObjectLoader.
  286.      * Databases are encouraged to override this if a faster access method is
  287.      * available to them.
  288.      *
  289.      * @param objectId
  290.      *            identity of the object to open.
  291.      * @param typeHint
  292.      *            hint about the type of object being requested, e.g.
  293.      *            {@link org.eclipse.jgit.lib.Constants#OBJ_BLOB};
  294.      *            {@link #OBJ_ANY} if the object type is not known, or does not
  295.      *            matter to the caller.
  296.      * @return size of object in bytes.
  297.      * @throws org.eclipse.jgit.errors.MissingObjectException
  298.      *             the object does not exist.
  299.      * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException
  300.      *             typeHint was not OBJ_ANY, and the object's actual type does
  301.      *             not match typeHint.
  302.      * @throws java.io.IOException
  303.      *             the object store cannot be accessed.
  304.      */
  305.     public long getObjectSize(AnyObjectId objectId, int typeHint)
  306.             throws MissingObjectException, IncorrectObjectTypeException,
  307.             IOException {
  308.         return open(objectId, typeHint).getSize();
  309.     }

  310.     /**
  311.      * Asynchronous object size lookup.
  312.      *
  313.      * @param objectIds
  314.      *            objects to get the size of from the object store. The supplied
  315.      *            collection must not be modified until the queue has finished.
  316.      * @param reportMissing
  317.      *            if true missing objects are reported by calling failure with a
  318.      *            MissingObjectException. This may be more expensive for the
  319.      *            implementation to guarantee. If false the implementation may
  320.      *            choose to report MissingObjectException, or silently skip over
  321.      *            the object with no warning.
  322.      * @return queue to read object sizes from.
  323.      */
  324.     public <T extends ObjectId> AsyncObjectSizeQueue<T> getObjectSize(
  325.             Iterable<T> objectIds, final boolean reportMissing) {
  326.         final Iterator<T> idItr = objectIds.iterator();
  327.         return new AsyncObjectSizeQueue<>() {
  328.             private T cur;

  329.             private long sz;

  330.             @Override
  331.             public boolean next() throws MissingObjectException, IOException {
  332.                 if (idItr.hasNext()) {
  333.                     cur = idItr.next();
  334.                     sz = getObjectSize(cur, OBJ_ANY);
  335.                     return true;
  336.                 }
  337.                 return false;
  338.             }

  339.             @Override
  340.             public T getCurrent() {
  341.                 return cur;
  342.             }

  343.             @Override
  344.             public ObjectId getObjectId() {
  345.                 return cur;
  346.             }

  347.             @Override
  348.             public long getSize() {
  349.                 return sz;
  350.             }

  351.             @Override
  352.             public boolean cancel(boolean mayInterruptIfRunning) {
  353.                 return true;
  354.             }

  355.             @Override
  356.             public void release() {
  357.                 // Since we are sequential by default, we don't
  358.                 // have any state to clean up if we terminate early.
  359.             }
  360.         };
  361.     }

  362.     /**
  363.      * Advise the reader to avoid unreachable objects.
  364.      * <p>
  365.      * While enabled the reader will skip over anything previously proven to be
  366.      * unreachable. This may be dangerous in the face of concurrent writes.
  367.      *
  368.      * @param avoid
  369.      *            true to avoid unreachable objects.
  370.      * @since 3.0
  371.      */
  372.     public void setAvoidUnreachableObjects(boolean avoid) {
  373.         // Do nothing by default.
  374.     }

  375.     /**
  376.      * An index that can be used to speed up ObjectWalks.
  377.      *
  378.      * @return the index or null if one does not exist.
  379.      * @throws java.io.IOException
  380.      *             when the index fails to load
  381.      * @since 3.0
  382.      */
  383.     public BitmapIndex getBitmapIndex() throws IOException {
  384.         return null;
  385.     }

  386.     /**
  387.      * Create a reachability checker that will use bitmaps if possible.
  388.      *
  389.      * @param rw
  390.      *            revwalk for use by the reachability checker
  391.      * @return the most efficient reachability checker for this repository.
  392.      * @throws IOException
  393.      *             if it cannot open any of the underlying indices.
  394.      *
  395.      * @since 5.11
  396.      */
  397.     @NonNull
  398.     public ReachabilityChecker createReachabilityChecker(RevWalk rw)
  399.             throws IOException {
  400.         if (getBitmapIndex() != null) {
  401.             return new BitmappedReachabilityChecker(rw);
  402.         }

  403.         return new PedestrianReachabilityChecker(true, rw);
  404.     }

  405.     /**
  406.      * Create an object reachability checker that will use bitmaps if possible.
  407.      *
  408.      * This reachability checker accepts any object as target. For checks
  409.      * exclusively between commits, use
  410.      * {@link #createReachabilityChecker(RevWalk)}.
  411.      *
  412.      * @param ow
  413.      *            objectwalk for use by the reachability checker
  414.      * @return the most efficient object reachability checker for this
  415.      *         repository.
  416.      *
  417.      * @throws IOException
  418.      *             if it cannot open any of the underlying indices.
  419.      *
  420.      * @since 5.11
  421.      */
  422.     @NonNull
  423.     public ObjectReachabilityChecker createObjectReachabilityChecker(
  424.             ObjectWalk ow) throws IOException {
  425.         if (getBitmapIndex() != null) {
  426.             return new BitmappedObjectReachabilityChecker(ow);
  427.         }

  428.         return new PedestrianObjectReachabilityChecker(ow);
  429.     }

  430.     /**
  431.      * Get the {@link org.eclipse.jgit.lib.ObjectInserter} from which this
  432.      * reader was created using {@code inserter.newReader()}
  433.      *
  434.      * @return the {@link org.eclipse.jgit.lib.ObjectInserter} from which this
  435.      *         reader was created using {@code inserter.newReader()}, or null if
  436.      *         this reader was not created from an inserter.
  437.      * @since 4.4
  438.      */
  439.     @Nullable
  440.     public ObjectInserter getCreatedFromInserter() {
  441.         return null;
  442.     }

  443.     /**
  444.      * {@inheritDoc}
  445.      * <p>
  446.      * Release any resources used by this reader.
  447.      * <p>
  448.      * A reader that has been released can be used again, but may need to be
  449.      * released after the subsequent usage.
  450.      *
  451.      * @since 4.0
  452.      */
  453.     @Override
  454.     public abstract void close();

  455.     /**
  456.      * Sets the threshold at which a file will be streamed rather than loaded
  457.      * entirely into memory
  458.      *
  459.      * @param threshold
  460.      *            the new threshold
  461.      * @since 4.6
  462.      */
  463.     public void setStreamFileThreshold(int threshold) {
  464.         streamFileThreshold = threshold;
  465.     }

  466.     /**
  467.      * Returns the threshold at which a file will be streamed rather than loaded
  468.      * entirely into memory
  469.      *
  470.      * @return the threshold in bytes
  471.      * @since 4.6
  472.      */
  473.     public int getStreamFileThreshold() {
  474.         return streamFileThreshold;
  475.     }

  476.     /**
  477.      * Wraps a delegate ObjectReader.
  478.      *
  479.      * @since 4.4
  480.      */
  481.     public abstract static class Filter extends ObjectReader {
  482.         /**
  483.          * @return delegate ObjectReader to handle all processing.
  484.          * @since 4.4
  485.          */
  486.         protected abstract ObjectReader delegate();

  487.         @Override
  488.         public ObjectReader newReader() {
  489.             return delegate().newReader();
  490.         }

  491.         @Override
  492.         public AbbreviatedObjectId abbreviate(AnyObjectId objectId)
  493.                 throws IOException {
  494.             return delegate().abbreviate(objectId);
  495.         }

  496.         @Override
  497.         public AbbreviatedObjectId abbreviate(AnyObjectId objectId, int len)
  498.                 throws IOException {
  499.             return delegate().abbreviate(objectId, len);
  500.         }

  501.         @Override
  502.         public Collection<ObjectId> resolve(AbbreviatedObjectId id)
  503.                 throws IOException {
  504.             return delegate().resolve(id);
  505.         }

  506.         @Override
  507.         public boolean has(AnyObjectId objectId) throws IOException {
  508.             return delegate().has(objectId);
  509.         }

  510.         @Override
  511.         public boolean has(AnyObjectId objectId, int typeHint) throws IOException {
  512.             return delegate().has(objectId, typeHint);
  513.         }

  514.         @Override
  515.         public ObjectLoader open(AnyObjectId objectId)
  516.                 throws MissingObjectException, IOException {
  517.             return delegate().open(objectId);
  518.         }

  519.         @Override
  520.         public ObjectLoader open(AnyObjectId objectId, int typeHint)
  521.                 throws MissingObjectException, IncorrectObjectTypeException,
  522.                 IOException {
  523.             return delegate().open(objectId, typeHint);
  524.         }

  525.         @Override
  526.         public Set<ObjectId> getShallowCommits() throws IOException {
  527.             return delegate().getShallowCommits();
  528.         }

  529.         @Override
  530.         public <T extends ObjectId> AsyncObjectLoaderQueue<T> open(
  531.                 Iterable<T> objectIds, boolean reportMissing) {
  532.             return delegate().open(objectIds, reportMissing);
  533.         }

  534.         @Override
  535.         public long getObjectSize(AnyObjectId objectId, int typeHint)
  536.                 throws MissingObjectException, IncorrectObjectTypeException,
  537.                 IOException {
  538.             return delegate().getObjectSize(objectId, typeHint);
  539.         }

  540.         @Override
  541.         public <T extends ObjectId> AsyncObjectSizeQueue<T> getObjectSize(
  542.                 Iterable<T> objectIds, boolean reportMissing) {
  543.             return delegate().getObjectSize(objectIds, reportMissing);
  544.         }

  545.         @Override
  546.         public void setAvoidUnreachableObjects(boolean avoid) {
  547.             delegate().setAvoidUnreachableObjects(avoid);
  548.         }

  549.         @Override
  550.         public BitmapIndex getBitmapIndex() throws IOException {
  551.             return delegate().getBitmapIndex();
  552.         }

  553.         @Override
  554.         @Nullable
  555.         public ObjectInserter getCreatedFromInserter() {
  556.             return delegate().getCreatedFromInserter();
  557.         }

  558.         @Override
  559.         public void close() {
  560.             delegate().close();
  561.         }
  562.     }
  563. }