1 /*
2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.internal.storage.file;
46
47 import java.io.File;
48 import java.io.FileInputStream;
49 import java.io.FileNotFoundException;
50 import java.io.IOException;
51 import java.io.InputStream;
52 import java.text.MessageFormat;
53 import java.util.Iterator;
54 import java.util.Set;
55
56 import org.eclipse.jgit.errors.CorruptObjectException;
57 import org.eclipse.jgit.errors.MissingObjectException;
58 import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
59 import org.eclipse.jgit.internal.JGitText;
60 import org.eclipse.jgit.lib.AbbreviatedObjectId;
61 import org.eclipse.jgit.lib.AnyObjectId;
62 import org.eclipse.jgit.lib.MutableObjectId;
63 import org.eclipse.jgit.lib.ObjectId;
64 import org.eclipse.jgit.lib.ObjectIdSet;
65 import org.eclipse.jgit.util.IO;
66 import org.eclipse.jgit.util.NB;
67
68 /**
69 * Access path to locate objects by {@link ObjectId} in a {@link PackFile}.
70 * <p>
71 * Indexes are strictly redundant information in that we can rebuild all of the
72 * data held in the index file from the on disk representation of the pack file
73 * itself, but it is faster to access for random requests because data is stored
74 * by ObjectId.
75 * </p>
76 */
77 public abstract class PackIndex
78 implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
79 /**
80 * Open an existing pack <code>.idx</code> file for reading.
81 * <p>
82 * The format of the file will be automatically detected and a proper access
83 * implementation for that format will be constructed and returned to the
84 * caller. The file may or may not be held open by the returned instance.
85 * </p>
86 *
87 * @param idxFile
88 * existing pack .idx to read.
89 * @return access implementation for the requested file.
90 * @throws FileNotFoundException
91 * the file does not exist.
92 * @throws IOException
93 * the file exists but could not be read due to security errors,
94 * unrecognized data version, or unexpected data corruption.
95 */
96 public static PackIndex open(final File idxFile) throws IOException {
97 final FileInputStream fd = new FileInputStream(idxFile);
98 try {
99 return read(fd);
100 } catch (IOException ioe) {
101 final String path = idxFile.getAbsolutePath();
102 final IOException err;
103 err = new IOException(MessageFormat.format(JGitText.get().unreadablePackIndex, path));
104 err.initCause(ioe);
105 throw err;
106 } finally {
107 try {
108 fd.close();
109 } catch (IOException err2) {
110 // ignore
111 }
112 }
113 }
114
115 /**
116 * Read an existing pack index file from a buffered stream.
117 * <p>
118 * The format of the file will be automatically detected and a proper access
119 * implementation for that format will be constructed and returned to the
120 * caller. The file may or may not be held open by the returned instance.
121 *
122 * @param fd
123 * stream to read the index file from. The stream must be
124 * buffered as some small IOs are performed against the stream.
125 * The caller is responsible for closing the stream.
126 * @return a copy of the index in-memory.
127 * @throws IOException
128 * the stream cannot be read.
129 * @throws CorruptObjectException
130 * the stream does not contain a valid pack index.
131 */
132 public static PackIndex read(InputStream fd) throws IOException,
133 CorruptObjectException {
134 final byte[] hdr = new byte[8];
135 IO.readFully(fd, hdr, 0, hdr.length);
136 if (isTOC(hdr)) {
137 final int v = NB.decodeInt32(hdr, 4);
138 switch (v) {
139 case 2:
140 return new PackIndexV2(fd);
141 default:
142 throw new UnsupportedPackIndexVersionException(v);
143 }
144 }
145 return new PackIndexV1(fd, hdr);
146 }
147
148 private static boolean isTOC(final byte[] h) {
149 final byte[] toc = PackIndexWriter.TOC;
150 for (int i = 0; i < toc.length; i++)
151 if (h[i] != toc[i])
152 return false;
153 return true;
154 }
155
156 /** Footer checksum applied on the bottom of the pack file. */
157 protected byte[] packChecksum;
158
159 /**
160 * Determine if an object is contained within the pack file.
161 *
162 * @param id
163 * the object to look for. Must not be null.
164 * @return true if the object is listed in this index; false otherwise.
165 */
166 public boolean hasObject(final AnyObjectId id) {
167 return findOffset(id) != -1;
168 }
169
170 @Override
171 public boolean contains(AnyObjectId id) {
172 return findOffset(id) != -1;
173 }
174
175 /**
176 * Provide iterator that gives access to index entries. Note, that iterator
177 * returns reference to mutable object, the same reference in each call -
178 * for performance reason. If client needs immutable objects, it must copy
179 * returned object on its own.
180 * <p>
181 * Iterator returns objects in SHA-1 lexicographical order.
182 * </p>
183 *
184 * @return iterator over pack index entries
185 */
186 @Override
187 public abstract Iterator<MutableEntry> iterator();
188
189 /**
190 * Obtain the total number of objects described by this index.
191 *
192 * @return number of objects in this index, and likewise in the associated
193 * pack that this index was generated from.
194 */
195 public abstract long getObjectCount();
196
197 /**
198 * Obtain the total number of objects needing 64 bit offsets.
199 *
200 * @return number of objects in this index using a 64 bit offset; that is an
201 * object positioned after the 2 GB position within the file.
202 */
203 public abstract long getOffset64Count();
204
205 /**
206 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
207 * <p>
208 * This method is a constant-time replacement for the following loop:
209 *
210 * <pre>
211 * Iterator<MutableEntry> eItr = index.iterator();
212 * int curPosition = 0;
213 * while (eItr.hasNext() && curPosition++ < nthPosition)
214 * eItr.next();
215 * ObjectId result = eItr.next().toObjectId();
216 * </pre>
217 *
218 * @param nthPosition
219 * position within the traversal of {@link #iterator()} that the
220 * caller needs the object for. The first returned
221 * {@link MutableEntry} is 0, the second is 1, etc.
222 * @return the ObjectId for the corresponding entry.
223 */
224 public abstract ObjectId getObjectId(long nthPosition);
225
226 /**
227 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
228 * <p>
229 * This method is a constant-time replacement for the following loop:
230 *
231 * <pre>
232 * Iterator<MutableEntry> eItr = index.iterator();
233 * int curPosition = 0;
234 * while (eItr.hasNext() && curPosition++ < nthPosition)
235 * eItr.next();
236 * ObjectId result = eItr.next().toObjectId();
237 * </pre>
238 *
239 * @param nthPosition
240 * unsigned 32 bit position within the traversal of
241 * {@link #iterator()} that the caller needs the object for. The
242 * first returned {@link MutableEntry} is 0, the second is 1,
243 * etc. Positions past 2**31-1 are negative, but still valid.
244 * @return the ObjectId for the corresponding entry.
245 */
246 public final ObjectId getObjectId(final int nthPosition) {
247 if (nthPosition >= 0)
248 return getObjectId((long) nthPosition);
249 final int u31 = nthPosition >>> 1;
250 final int one = nthPosition & 1;
251 return getObjectId(((long) u31) << 1 | one);
252 }
253
254 /**
255 * Get offset in a pack for the n-th object entry returned by
256 * {@link #iterator()}.
257 *
258 * @param nthPosition
259 * unsigned 32 bit position within the traversal of
260 * {@link #iterator()} for which the caller needs the offset. The
261 * first returned {@link MutableEntry} is 0, the second is 1,
262 * etc. Positions past 2**31-1 are negative, but still valid.
263 * @return the offset in a pack for the corresponding entry.
264 */
265 abstract long getOffset(long nthPosition);
266
267 /**
268 * Locate the file offset position for the requested object.
269 *
270 * @param objId
271 * name of the object to locate within the pack.
272 * @return offset of the object's header and compressed content; -1 if the
273 * object does not exist in this index and is thus not stored in the
274 * associated pack.
275 */
276 public abstract long findOffset(AnyObjectId objId);
277
278 /**
279 * Retrieve stored CRC32 checksum of the requested object raw-data
280 * (including header).
281 *
282 * @param objId
283 * id of object to look for
284 * @return CRC32 checksum of specified object (at 32 less significant bits)
285 * @throws MissingObjectException
286 * when requested ObjectId was not found in this index
287 * @throws UnsupportedOperationException
288 * when this index doesn't support CRC32 checksum
289 */
290 public abstract long findCRC32(AnyObjectId objId)
291 throws MissingObjectException, UnsupportedOperationException;
292
293 /**
294 * Check whether this index supports (has) CRC32 checksums for objects.
295 *
296 * @return true if CRC32 is stored, false otherwise
297 */
298 public abstract boolean hasCRC32Support();
299
300 /**
301 * Find objects matching the prefix abbreviation.
302 *
303 * @param matches
304 * set to add any located ObjectIds to. This is an output
305 * parameter.
306 * @param id
307 * prefix to search for.
308 * @param matchLimit
309 * maximum number of results to return. At most this many
310 * ObjectIds should be added to matches before returning.
311 * @throws IOException
312 * the index cannot be read.
313 */
314 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
315 int matchLimit) throws IOException;
316
317 /**
318 * Represent mutable entry of pack index consisting of object id and offset
319 * in pack (both mutable).
320 *
321 */
322 public static class MutableEntry {
323 final MutableObjectId idBuffer = new MutableObjectId();
324
325 long offset;
326
327 /**
328 * Returns offset for this index object entry
329 *
330 * @return offset of this object in a pack file
331 */
332 public long getOffset() {
333 return offset;
334 }
335
336 /** @return hex string describing the object id of this entry. */
337 public String name() {
338 ensureId();
339 return idBuffer.name();
340 }
341
342 /** @return a copy of the object id. */
343 public ObjectId toObjectId() {
344 ensureId();
345 return idBuffer.toObjectId();
346 }
347
348 /** @return a complete copy of this entry, that won't modify */
349 public MutableEntry cloneEntry() {
350 final MutableEntry r = new MutableEntry();
351 ensureId();
352 r.idBuffer.fromObjectId(idBuffer);
353 r.offset = offset;
354 return r;
355 }
356
357 void ensureId() {
358 // Override in implementations.
359 }
360 }
361
362 abstract class EntriesIterator implements Iterator<MutableEntry> {
363 protected final MutableEntry entry = initEntry();
364
365 protected long returnedNumber = 0;
366
367 protected abstract MutableEntry initEntry();
368
369 @Override
370 public boolean hasNext() {
371 return returnedNumber < getObjectCount();
372 }
373
374 /**
375 * Implementation must update {@link #returnedNumber} before returning
376 * element.
377 */
378 @Override
379 public abstract MutableEntry next();
380
381 @Override
382 public void remove() {
383 throw new UnsupportedOperationException();
384 }
385 }
386 }