PackReverseIndex.java

  1. /*
  2.  * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com> and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.internal.storage.file;

  11. import java.text.MessageFormat;

  12. import org.eclipse.jgit.errors.CorruptObjectException;
  13. import org.eclipse.jgit.internal.JGitText;
  14. import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
  15. import org.eclipse.jgit.lib.ObjectId;

  16. /**
  17.  * <p>
  18.  * Reverse index for forward pack index. Provides operations based on offset
  19.  * instead of object id. Such offset-based reverse lookups are performed in
  20.  * O(log n) time.
  21.  * </p>
  22.  *
  23.  * @see PackIndex
  24.  * @see Pack
  25.  */
  26. public class PackReverseIndex {
  27.     /** Index we were created from, and that has our ObjectId data. */
  28.     private final PackIndex index;

  29.     /** The number of bytes per entry in the offsetIndex. */
  30.     private final long bucketSize;

  31.     /**
  32.      * An index into the nth mapping, where the value is the position after the
  33.      * the last index that contains the values of the bucket. For example given
  34.      * offset o (and bucket = o / bucketSize), the offset will be contained in
  35.      * the range nth[offsetIndex[bucket - 1]] inclusive to
  36.      * nth[offsetIndex[bucket]] exclusive.
  37.      *
  38.      * See {@link #binarySearch}
  39.      */
  40.     private final int[] offsetIndex;

  41.     /** Mapping from indices in offset order to indices in SHA-1 order. */
  42.     private final int[] nth;

  43.     /**
  44.      * Create reverse index from straight/forward pack index, by indexing all
  45.      * its entries.
  46.      *
  47.      * @param packIndex
  48.      *            forward index - entries to (reverse) index.
  49.      */
  50.     public PackReverseIndex(PackIndex packIndex) {
  51.         index = packIndex;

  52.         final long cnt = index.getObjectCount();
  53.         if (cnt + 1 > Integer.MAX_VALUE)
  54.             throw new IllegalArgumentException(
  55.                     JGitText.get().hugeIndexesAreNotSupportedByJgitYet);

  56.         if (cnt == 0) {
  57.             bucketSize = Long.MAX_VALUE;
  58.             offsetIndex = new int[1];
  59.             nth = new int[0];
  60.             return;
  61.         }

  62.         final long[] offsetsBySha1 = new long[(int) cnt];

  63.         long maxOffset = 0;
  64.         int ith = 0;
  65.         for (MutableEntry me : index) {
  66.             final long o = me.getOffset();
  67.             offsetsBySha1[ith++] = o;
  68.             if (o > maxOffset)
  69.                 maxOffset = o;
  70.         }

  71.         bucketSize = maxOffset / cnt + 1;
  72.         int[] bucketIndex = new int[(int) cnt];
  73.         int[] bucketValues = new int[(int) cnt + 1];
  74.         for (int oi = 0; oi < offsetsBySha1.length; oi++) {
  75.             final long o = offsetsBySha1[oi];
  76.             final int bucket = (int) (o / bucketSize);
  77.             final int bucketValuesPos = oi + 1;
  78.             final int current = bucketIndex[bucket];
  79.             bucketIndex[bucket] = bucketValuesPos;
  80.             bucketValues[bucketValuesPos] = current;
  81.         }

  82.         int nthByOffset = 0;
  83.         nth = new int[offsetsBySha1.length];
  84.         offsetIndex = bucketIndex; // Reuse the allocation
  85.         for (int bi = 0; bi < bucketIndex.length; bi++) {
  86.             final int start = nthByOffset;
  87.             // Insertion sort of the values in the bucket.
  88.             for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
  89.                 final int nthBySha1 = vi - 1;
  90.                 final long o = offsetsBySha1[nthBySha1];
  91.                 int insertion = nthByOffset++;
  92.                 for (; start < insertion; insertion--) {
  93.                     if (o > offsetsBySha1[nth[insertion - 1]])
  94.                         break;
  95.                     nth[insertion] = nth[insertion - 1];
  96.                 }
  97.                 nth[insertion] = nthBySha1;
  98.             }
  99.             offsetIndex[bi] = nthByOffset;
  100.         }
  101.     }

  102.     /**
  103.      * Search for object id with the specified start offset in this pack
  104.      * (reverse) index.
  105.      *
  106.      * @param offset
  107.      *            start offset of object to find.
  108.      * @return object id for this offset, or null if no object was found.
  109.      */
  110.     public ObjectId findObject(long offset) {
  111.         final int ith = binarySearch(offset);
  112.         if (ith < 0)
  113.             return null;
  114.         return index.getObjectId(nth[ith]);
  115.     }

  116.     /**
  117.      * Search for the next offset to the specified offset in this pack (reverse)
  118.      * index.
  119.      *
  120.      * @param offset
  121.      *            start offset of previous object (must be valid-existing
  122.      *            offset).
  123.      * @param maxOffset
  124.      *            maximum offset in a pack (returned when there is no next
  125.      *            offset).
  126.      * @return offset of the next object in a pack or maxOffset if provided
  127.      *         offset was the last one.
  128.      * @throws org.eclipse.jgit.errors.CorruptObjectException
  129.      *             when there is no object with the provided offset.
  130.      */
  131.     public long findNextOffset(long offset, long maxOffset)
  132.             throws CorruptObjectException {
  133.         final int ith = binarySearch(offset);
  134.         if (ith < 0)
  135.             throw new CorruptObjectException(
  136.                     MessageFormat.format(
  137.                             JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
  138.                             Long.valueOf(offset)));

  139.         if (ith + 1 == nth.length)
  140.             return maxOffset;
  141.         return index.getOffset(nth[ith + 1]);
  142.     }

  143.     int findPostion(long offset) {
  144.         return binarySearch(offset);
  145.     }

  146.     private int binarySearch(long offset) {
  147.         int bucket = (int) (offset / bucketSize);
  148.         int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
  149.         int high = offsetIndex[bucket];
  150.         while (low < high) {
  151.             final int mid = (low + high) >>> 1;
  152.             final long o = index.getOffset(nth[mid]);
  153.             if (offset < o)
  154.                 high = mid;
  155.             else if (offset == o)
  156.                 return mid;
  157.             else
  158.                 low = mid + 1;
  159.         }
  160.         return -1;
  161.     }

  162.     ObjectId findObjectByPosition(int nthPosition) {
  163.         return index.getObjectId(nth[nthPosition]);
  164.     }
  165. }