1 /*
2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.internal.storage.file;
46
47 import java.io.File;
48 import java.io.FileNotFoundException;
49 import java.io.IOException;
50 import java.io.InputStream;
51 import java.text.MessageFormat;
52 import java.util.Iterator;
53 import java.util.Set;
54
55 import org.eclipse.jgit.errors.CorruptObjectException;
56 import org.eclipse.jgit.errors.MissingObjectException;
57 import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
58 import org.eclipse.jgit.internal.JGitText;
59 import org.eclipse.jgit.lib.AbbreviatedObjectId;
60 import org.eclipse.jgit.lib.AnyObjectId;
61 import org.eclipse.jgit.lib.MutableObjectId;
62 import org.eclipse.jgit.lib.ObjectId;
63 import org.eclipse.jgit.lib.ObjectIdSet;
64 import org.eclipse.jgit.util.IO;
65 import org.eclipse.jgit.util.NB;
66 import org.eclipse.jgit.util.io.SilentFileInputStream;
67
68 /**
69 * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a
70 * {@link org.eclipse.jgit.internal.storage.file.PackFile}.
71 * <p>
72 * Indexes are strictly redundant information in that we can rebuild all of the
73 * data held in the index file from the on disk representation of the pack file
74 * itself, but it is faster to access for random requests because data is stored
75 * by ObjectId.
76 * </p>
77 */
78 public abstract class PackIndex
79 implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
80 /**
81 * Open an existing pack <code>.idx</code> file for reading.
82 * <p>
83 * The format of the file will be automatically detected and a proper access
84 * implementation for that format will be constructed and returned to the
85 * caller. The file may or may not be held open by the returned instance.
86 * </p>
87 *
88 * @param idxFile
89 * existing pack .idx to read.
90 * @return access implementation for the requested file.
91 * @throws FileNotFoundException
92 * the file does not exist.
93 * @throws java.io.IOException
94 * the file exists but could not be read due to security errors,
95 * unrecognized data version, or unexpected data corruption.
96 */
97 public static PackIndex open(File idxFile) throws IOException {
98 try (SilentFileInputStream fd = new SilentFileInputStream(
99 idxFile)) {
100 return read(fd);
101 } catch (IOException ioe) {
102 throw new IOException(
103 MessageFormat.format(JGitText.get().unreadablePackIndex,
104 idxFile.getAbsolutePath()),
105 ioe);
106 }
107 }
108
109 /**
110 * Read an existing pack index file from a buffered stream.
111 * <p>
112 * The format of the file will be automatically detected and a proper access
113 * implementation for that format will be constructed and returned to the
114 * caller. The file may or may not be held open by the returned instance.
115 *
116 * @param fd
117 * stream to read the index file from. The stream must be
118 * buffered as some small IOs are performed against the stream.
119 * The caller is responsible for closing the stream.
120 * @return a copy of the index in-memory.
121 * @throws java.io.IOException
122 * the stream cannot be read.
123 * @throws org.eclipse.jgit.errors.CorruptObjectException
124 * the stream does not contain a valid pack index.
125 */
126 public static PackIndex read(InputStream fd) throws IOException,
127 CorruptObjectException {
128 final byte[] hdr = new byte[8];
129 IO.readFully(fd, hdr, 0, hdr.length);
130 if (isTOC(hdr)) {
131 final int v = NB.decodeInt32(hdr, 4);
132 switch (v) {
133 case 2:
134 return new PackIndexV2(fd);
135 default:
136 throw new UnsupportedPackIndexVersionException(v);
137 }
138 }
139 return new PackIndexV1(fd, hdr);
140 }
141
142 private static boolean isTOC(byte[] h) {
143 final byte[] toc = PackIndexWriter.TOC;
144 for (int i = 0; i < toc.length; i++)
145 if (h[i] != toc[i])
146 return false;
147 return true;
148 }
149
150 /** Footer checksum applied on the bottom of the pack file. */
151 protected byte[] packChecksum;
152
153 /**
154 * Determine if an object is contained within the pack file.
155 *
156 * @param id
157 * the object to look for. Must not be null.
158 * @return true if the object is listed in this index; false otherwise.
159 */
160 public boolean hasObject(AnyObjectId id) {
161 return findOffset(id) != -1;
162 }
163
164 /** {@inheritDoc} */
165 @Override
166 public boolean contains(AnyObjectId id) {
167 return findOffset(id) != -1;
168 }
169
170 /**
171 * {@inheritDoc}
172 * <p>
173 * Provide iterator that gives access to index entries. Note, that iterator
174 * returns reference to mutable object, the same reference in each call -
175 * for performance reason. If client needs immutable objects, it must copy
176 * returned object on its own.
177 * <p>
178 * Iterator returns objects in SHA-1 lexicographical order.
179 * </p>
180 */
181 @Override
182 public abstract Iterator<MutableEntry> iterator();
183
184 /**
185 * Obtain the total number of objects described by this index.
186 *
187 * @return number of objects in this index, and likewise in the associated
188 * pack that this index was generated from.
189 */
190 public abstract long getObjectCount();
191
192 /**
193 * Obtain the total number of objects needing 64 bit offsets.
194 *
195 * @return number of objects in this index using a 64 bit offset; that is an
196 * object positioned after the 2 GB position within the file.
197 */
198 public abstract long getOffset64Count();
199
200 /**
201 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
202 * <p>
203 * This method is a constant-time replacement for the following loop:
204 *
205 * <pre>
206 * Iterator<MutableEntry> eItr = index.iterator();
207 * int curPosition = 0;
208 * while (eItr.hasNext() && curPosition++ < nthPosition)
209 * eItr.next();
210 * ObjectId result = eItr.next().toObjectId();
211 * </pre>
212 *
213 * @param nthPosition
214 * position within the traversal of {@link #iterator()} that the
215 * caller needs the object for. The first returned
216 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
217 * is 0, the second is 1, etc.
218 * @return the ObjectId for the corresponding entry.
219 */
220 public abstract ObjectId getObjectId(long nthPosition);
221
222 /**
223 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
224 * <p>
225 * This method is a constant-time replacement for the following loop:
226 *
227 * <pre>
228 * Iterator<MutableEntry> eItr = index.iterator();
229 * int curPosition = 0;
230 * while (eItr.hasNext() && curPosition++ < nthPosition)
231 * eItr.next();
232 * ObjectId result = eItr.next().toObjectId();
233 * </pre>
234 *
235 * @param nthPosition
236 * unsigned 32 bit position within the traversal of
237 * {@link #iterator()} that the caller needs the object for. The
238 * first returned
239 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
240 * is 0, the second is 1, etc. Positions past 2**31-1 are
241 * negative, but still valid.
242 * @return the ObjectId for the corresponding entry.
243 */
244 public final ObjectId getObjectId(int nthPosition) {
245 if (nthPosition >= 0)
246 return getObjectId((long) nthPosition);
247 final int u31 = nthPosition >>> 1;
248 final int one = nthPosition & 1;
249 return getObjectId(((long) u31) << 1 | one);
250 }
251
252 /**
253 * Get offset in a pack for the n-th object entry returned by
254 * {@link #iterator()}.
255 *
256 * @param nthPosition
257 * unsigned 32 bit position within the traversal of
258 * {@link #iterator()} for which the caller needs the offset. The
259 * first returned {@link MutableEntry} is 0, the second is 1,
260 * etc. Positions past 2**31-1 are negative, but still valid.
261 * @return the offset in a pack for the corresponding entry.
262 */
263 abstract long getOffset(long nthPosition);
264
265 /**
266 * Locate the file offset position for the requested object.
267 *
268 * @param objId
269 * name of the object to locate within the pack.
270 * @return offset of the object's header and compressed content; -1 if the
271 * object does not exist in this index and is thus not stored in the
272 * associated pack.
273 */
274 public abstract long findOffset(AnyObjectId objId);
275
276 /**
277 * Retrieve stored CRC32 checksum of the requested object raw-data
278 * (including header).
279 *
280 * @param objId
281 * id of object to look for
282 * @return CRC32 checksum of specified object (at 32 less significant bits)
283 * @throws org.eclipse.jgit.errors.MissingObjectException
284 * when requested ObjectId was not found in this index
285 * @throws java.lang.UnsupportedOperationException
286 * when this index doesn't support CRC32 checksum
287 */
288 public abstract long findCRC32(AnyObjectId objId)
289 throws MissingObjectException, UnsupportedOperationException;
290
291 /**
292 * Check whether this index supports (has) CRC32 checksums for objects.
293 *
294 * @return true if CRC32 is stored, false otherwise
295 */
296 public abstract boolean hasCRC32Support();
297
298 /**
299 * Find objects matching the prefix abbreviation.
300 *
301 * @param matches
302 * set to add any located ObjectIds to. This is an output
303 * parameter.
304 * @param id
305 * prefix to search for.
306 * @param matchLimit
307 * maximum number of results to return. At most this many
308 * ObjectIds should be added to matches before returning.
309 * @throws java.io.IOException
310 * the index cannot be read.
311 */
312 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
313 int matchLimit) throws IOException;
314
315 /**
316 * Represent mutable entry of pack index consisting of object id and offset
317 * in pack (both mutable).
318 *
319 */
320 public static class MutableEntry {
321 final MutableObjectId idBuffer = new MutableObjectId();
322
323 long offset;
324
325 /**
326 * Returns offset for this index object entry
327 *
328 * @return offset of this object in a pack file
329 */
330 public long getOffset() {
331 return offset;
332 }
333
334 /** @return hex string describing the object id of this entry. */
335 public String name() {
336 ensureId();
337 return idBuffer.name();
338 }
339
340 /** @return a copy of the object id. */
341 public ObjectId toObjectId() {
342 ensureId();
343 return idBuffer.toObjectId();
344 }
345
346 /** @return a complete copy of this entry, that won't modify */
347 public MutableEntry cloneEntry() {
348 final MutableEntry r = new MutableEntry();
349 ensureId();
350 r.idBuffer.fromObjectId(idBuffer);
351 r.offset = offset;
352 return r;
353 }
354
355 void ensureId() {
356 // Override in implementations.
357 }
358 }
359
360 abstract class EntriesIterator implements Iterator<MutableEntry> {
361 protected final MutableEntry entry = initEntry();
362
363 protected long returnedNumber = 0;
364
365 protected abstract MutableEntry initEntry();
366
367 @Override
368 public boolean hasNext() {
369 return returnedNumber < getObjectCount();
370 }
371
372 /**
373 * Implementation must update {@link #returnedNumber} before returning
374 * element.
375 */
376 @Override
377 public abstract MutableEntry next();
378
379 @Override
380 public void remove() {
381 throw new UnsupportedOperationException();
382 }
383 }
384 }