1 /*
2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.storage.file;
45
46 import java.text.MessageFormat;
47
48 import org.eclipse.jgit.errors.CorruptObjectException;
49 import org.eclipse.jgit.internal.JGitText;
50 import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
51 import org.eclipse.jgit.lib.ObjectId;
52
53 /**
54 * <p>
55 * Reverse index for forward pack index. Provides operations based on offset
56 * instead of object id. Such offset-based reverse lookups are performed in
57 * O(log n) time.
58 * </p>
59 *
60 * @see PackIndex
61 * @see PackFile
62 */
63 public class PackReverseIndex {
64 /** Index we were created from, and that has our ObjectId data. */
65 private final PackIndex index;
66
67 /** The number of bytes per entry in the offsetIndex. */
68 private final long bucketSize;
69
70 /**
71 * An index into the nth mapping, where the value is the position after the
72 * the last index that contains the values of the bucket. For example given
73 * offset o (and bucket = o / bucketSize), the offset will be contained in
74 * the range nth[offsetIndex[bucket - 1]] inclusive to
75 * nth[offsetIndex[bucket]] exclusive.
76 *
77 * See {@link #binarySearch}
78 */
79 private final int[] offsetIndex;
80
81 /** Mapping from indices in offset order to indices in SHA-1 order. */
82 private final int[] nth;
83
84 /**
85 * Create reverse index from straight/forward pack index, by indexing all
86 * its entries.
87 *
88 * @param packIndex
89 * forward index - entries to (reverse) index.
90 */
91 public PackReverseIndex(PackIndex packIndex) {
92 index = packIndex;
93
94 final long cnt = index.getObjectCount();
95 if (cnt + 1 > Integer.MAX_VALUE)
96 throw new IllegalArgumentException(
97 JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
98
99 if (cnt == 0) {
100 bucketSize = Long.MAX_VALUE;
101 offsetIndex = new int[1];
102 nth = new int[0];
103 return;
104 }
105
106 final long[] offsetsBySha1 = new long[(int) cnt];
107
108 long maxOffset = 0;
109 int ith = 0;
110 for (MutableEntry me : index) {
111 final long o = me.getOffset();
112 offsetsBySha1[ith++] = o;
113 if (o > maxOffset)
114 maxOffset = o;
115 }
116
117 bucketSize = maxOffset / cnt + 1;
118 int[] bucketIndex = new int[(int) cnt];
119 int[] bucketValues = new int[(int) cnt + 1];
120 for (int oi = 0; oi < offsetsBySha1.length; oi++) {
121 final long o = offsetsBySha1[oi];
122 final int bucket = (int) (o / bucketSize);
123 final int bucketValuesPos = oi + 1;
124 final int current = bucketIndex[bucket];
125 bucketIndex[bucket] = bucketValuesPos;
126 bucketValues[bucketValuesPos] = current;
127 }
128
129 int nthByOffset = 0;
130 nth = new int[offsetsBySha1.length];
131 offsetIndex = bucketIndex; // Reuse the allocation
132 for (int bi = 0; bi < bucketIndex.length; bi++) {
133 final int start = nthByOffset;
134 // Insertion sort of the values in the bucket.
135 for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
136 final int nthBySha1 = vi - 1;
137 final long o = offsetsBySha1[nthBySha1];
138 int insertion = nthByOffset++;
139 for (; start < insertion; insertion--) {
140 if (o > offsetsBySha1[nth[insertion - 1]])
141 break;
142 nth[insertion] = nth[insertion - 1];
143 }
144 nth[insertion] = nthBySha1;
145 }
146 offsetIndex[bi] = nthByOffset;
147 }
148 }
149
150 /**
151 * Search for object id with the specified start offset in this pack
152 * (reverse) index.
153 *
154 * @param offset
155 * start offset of object to find.
156 * @return object id for this offset, or null if no object was found.
157 */
158 public ObjectId findObject(long offset) {
159 final int ith = binarySearch(offset);
160 if (ith < 0)
161 return null;
162 return index.getObjectId(nth[ith]);
163 }
164
165 /**
166 * Search for the next offset to the specified offset in this pack (reverse)
167 * index.
168 *
169 * @param offset
170 * start offset of previous object (must be valid-existing
171 * offset).
172 * @param maxOffset
173 * maximum offset in a pack (returned when there is no next
174 * offset).
175 * @return offset of the next object in a pack or maxOffset if provided
176 * offset was the last one.
177 * @throws org.eclipse.jgit.errors.CorruptObjectException
178 * when there is no object with the provided offset.
179 */
180 public long findNextOffset(long offset, long maxOffset)
181 throws CorruptObjectException {
182 final int ith = binarySearch(offset);
183 if (ith < 0)
184 throw new CorruptObjectException(
185 MessageFormat.format(
186 JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
187 Long.valueOf(offset)));
188
189 if (ith + 1 == nth.length)
190 return maxOffset;
191 return index.getOffset(nth[ith + 1]);
192 }
193
194 int findPostion(long offset) {
195 return binarySearch(offset);
196 }
197
198 private int binarySearch(long offset) {
199 int bucket = (int) (offset / bucketSize);
200 int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
201 int high = offsetIndex[bucket];
202 while (low < high) {
203 final int mid = (low + high) >>> 1;
204 final long o = index.getOffset(nth[mid]);
205 if (offset < o)
206 high = mid;
207 else if (offset == o)
208 return mid;
209 else
210 low = mid + 1;
211 }
212 return -1;
213 }
214
215 ObjectId findObjectByPosition(int nthPosition) {
216 return index.getObjectId(nth[nthPosition]);
217 }
218 }