View Javadoc
1   /*
2    * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.file;
46  
47  import java.io.File;
48  import java.io.FileInputStream;
49  import java.io.FileNotFoundException;
50  import java.io.IOException;
51  import java.io.InputStream;
52  import java.text.MessageFormat;
53  import java.util.Iterator;
54  import java.util.Set;
55  
56  import org.eclipse.jgit.errors.CorruptObjectException;
57  import org.eclipse.jgit.errors.MissingObjectException;
58  import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
59  import org.eclipse.jgit.internal.JGitText;
60  import org.eclipse.jgit.lib.AbbreviatedObjectId;
61  import org.eclipse.jgit.lib.AnyObjectId;
62  import org.eclipse.jgit.lib.MutableObjectId;
63  import org.eclipse.jgit.lib.ObjectId;
64  import org.eclipse.jgit.lib.ObjectIdSet;
65  import org.eclipse.jgit.util.IO;
66  import org.eclipse.jgit.util.NB;
67  
68  /**
69   * Access path to locate objects by {@link ObjectId} in a {@link PackFile}.
70   * <p>
71   * Indexes are strictly redundant information in that we can rebuild all of the
72   * data held in the index file from the on disk representation of the pack file
73   * itself, but it is faster to access for random requests because data is stored
74   * by ObjectId.
75   * </p>
76   */
77  public abstract class PackIndex
78  		implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
79  	/**
80  	 * Open an existing pack <code>.idx</code> file for reading.
81  	 * <p>
82  	 * The format of the file will be automatically detected and a proper access
83  	 * implementation for that format will be constructed and returned to the
84  	 * caller. The file may or may not be held open by the returned instance.
85  	 * </p>
86  	 *
87  	 * @param idxFile
88  	 *            existing pack .idx to read.
89  	 * @return access implementation for the requested file.
90  	 * @throws FileNotFoundException
91  	 *             the file does not exist.
92  	 * @throws IOException
93  	 *             the file exists but could not be read due to security errors,
94  	 *             unrecognized data version, or unexpected data corruption.
95  	 */
96  	public static PackIndex open(final File idxFile) throws IOException {
97  		final FileInputStream fd = new FileInputStream(idxFile);
98  		try {
99  			return read(fd);
100 		} catch (IOException ioe) {
101 			final String path = idxFile.getAbsolutePath();
102 			final IOException err;
103 			err = new IOException(MessageFormat.format(JGitText.get().unreadablePackIndex, path));
104 			err.initCause(ioe);
105 			throw err;
106 		} finally {
107 			try {
108 				fd.close();
109 			} catch (IOException err2) {
110 				// ignore
111 			}
112 		}
113 	}
114 
115 	/**
116 	 * Read an existing pack index file from a buffered stream.
117 	 * <p>
118 	 * The format of the file will be automatically detected and a proper access
119 	 * implementation for that format will be constructed and returned to the
120 	 * caller. The file may or may not be held open by the returned instance.
121 	 *
122 	 * @param fd
123 	 *            stream to read the index file from. The stream must be
124 	 *            buffered as some small IOs are performed against the stream.
125 	 *            The caller is responsible for closing the stream.
126 	 * @return a copy of the index in-memory.
127 	 * @throws IOException
128 	 *             the stream cannot be read.
129 	 * @throws CorruptObjectException
130 	 *             the stream does not contain a valid pack index.
131 	 */
132 	public static PackIndex read(InputStream fd) throws IOException,
133 			CorruptObjectException {
134 		final byte[] hdr = new byte[8];
135 		IO.readFully(fd, hdr, 0, hdr.length);
136 		if (isTOC(hdr)) {
137 			final int v = NB.decodeInt32(hdr, 4);
138 			switch (v) {
139 			case 2:
140 				return new PackIndexV2(fd);
141 			default:
142 				throw new UnsupportedPackIndexVersionException(v);
143 			}
144 		}
145 		return new PackIndexV1(fd, hdr);
146 	}
147 
148 	private static boolean isTOC(final byte[] h) {
149 		final byte[] toc = PackIndexWriter.TOC;
150 		for (int i = 0; i < toc.length; i++)
151 			if (h[i] != toc[i])
152 				return false;
153 		return true;
154 	}
155 
156 	/** Footer checksum applied on the bottom of the pack file. */
157 	protected byte[] packChecksum;
158 
159 	/**
160 	 * Determine if an object is contained within the pack file.
161 	 *
162 	 * @param id
163 	 *            the object to look for. Must not be null.
164 	 * @return true if the object is listed in this index; false otherwise.
165 	 */
166 	public boolean hasObject(final AnyObjectId id) {
167 		return findOffset(id) != -1;
168 	}
169 
170 	@Override
171 	public boolean contains(AnyObjectId id) {
172 		return findOffset(id) != -1;
173 	}
174 
175 	/**
176 	 * Provide iterator that gives access to index entries. Note, that iterator
177 	 * returns reference to mutable object, the same reference in each call -
178 	 * for performance reason. If client needs immutable objects, it must copy
179 	 * returned object on its own.
180 	 * <p>
181 	 * Iterator returns objects in SHA-1 lexicographical order.
182 	 * </p>
183 	 *
184 	 * @return iterator over pack index entries
185 	 */
186 	@Override
187 	public abstract Iterator<MutableEntry> iterator();
188 
189 	/**
190 	 * Obtain the total number of objects described by this index.
191 	 *
192 	 * @return number of objects in this index, and likewise in the associated
193 	 *         pack that this index was generated from.
194 	 */
195 	public abstract long getObjectCount();
196 
197 	/**
198 	 * Obtain the total number of objects needing 64 bit offsets.
199 	 *
200 	 * @return number of objects in this index using a 64 bit offset; that is an
201 	 *         object positioned after the 2 GB position within the file.
202 	 */
203 	public abstract long getOffset64Count();
204 
205 	/**
206 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
207 	 * <p>
208 	 * This method is a constant-time replacement for the following loop:
209 	 *
210 	 * <pre>
211 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
212 	 * int curPosition = 0;
213 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
214 	 * 	eItr.next();
215 	 * ObjectId result = eItr.next().toObjectId();
216 	 * </pre>
217 	 *
218 	 * @param nthPosition
219 	 *            position within the traversal of {@link #iterator()} that the
220 	 *            caller needs the object for. The first returned
221 	 *            {@link MutableEntry} is 0, the second is 1, etc.
222 	 * @return the ObjectId for the corresponding entry.
223 	 */
224 	public abstract ObjectId getObjectId(long nthPosition);
225 
226 	/**
227 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
228 	 * <p>
229 	 * This method is a constant-time replacement for the following loop:
230 	 *
231 	 * <pre>
232 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
233 	 * int curPosition = 0;
234 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
235 	 * 	eItr.next();
236 	 * ObjectId result = eItr.next().toObjectId();
237 	 * </pre>
238 	 *
239 	 * @param nthPosition
240 	 *            unsigned 32 bit position within the traversal of
241 	 *            {@link #iterator()} that the caller needs the object for. The
242 	 *            first returned {@link MutableEntry} is 0, the second is 1,
243 	 *            etc. Positions past 2**31-1 are negative, but still valid.
244 	 * @return the ObjectId for the corresponding entry.
245 	 */
246 	public final ObjectId getObjectId(final int nthPosition) {
247 		if (nthPosition >= 0)
248 			return getObjectId((long) nthPosition);
249 		final int u31 = nthPosition >>> 1;
250 		final int one = nthPosition & 1;
251 		return getObjectId(((long) u31) << 1 | one);
252 	}
253 
254 	/**
255 	 * Get offset in a pack for the n-th object entry returned by
256 	 * {@link #iterator()}.
257 	 *
258 	 * @param nthPosition
259 	 *            unsigned 32 bit position within the traversal of
260 	 *            {@link #iterator()} for which the caller needs the offset. The
261 	 *            first returned {@link MutableEntry} is 0, the second is 1,
262 	 *            etc. Positions past 2**31-1 are negative, but still valid.
263 	 * @return the offset in a pack for the corresponding entry.
264 	 */
265 	abstract long getOffset(long nthPosition);
266 
267 	/**
268 	 * Locate the file offset position for the requested object.
269 	 *
270 	 * @param objId
271 	 *            name of the object to locate within the pack.
272 	 * @return offset of the object's header and compressed content; -1 if the
273 	 *         object does not exist in this index and is thus not stored in the
274 	 *         associated pack.
275 	 */
276 	public abstract long findOffset(AnyObjectId objId);
277 
278 	/**
279 	 * Retrieve stored CRC32 checksum of the requested object raw-data
280 	 * (including header).
281 	 *
282 	 * @param objId
283 	 *            id of object to look for
284 	 * @return CRC32 checksum of specified object (at 32 less significant bits)
285 	 * @throws MissingObjectException
286 	 *             when requested ObjectId was not found in this index
287 	 * @throws UnsupportedOperationException
288 	 *             when this index doesn't support CRC32 checksum
289 	 */
290 	public abstract long findCRC32(AnyObjectId objId)
291 			throws MissingObjectException, UnsupportedOperationException;
292 
293 	/**
294 	 * Check whether this index supports (has) CRC32 checksums for objects.
295 	 *
296 	 * @return true if CRC32 is stored, false otherwise
297 	 */
298 	public abstract boolean hasCRC32Support();
299 
300 	/**
301 	 * Find objects matching the prefix abbreviation.
302 	 *
303 	 * @param matches
304 	 *            set to add any located ObjectIds to. This is an output
305 	 *            parameter.
306 	 * @param id
307 	 *            prefix to search for.
308 	 * @param matchLimit
309 	 *            maximum number of results to return. At most this many
310 	 *            ObjectIds should be added to matches before returning.
311 	 * @throws IOException
312 	 *             the index cannot be read.
313 	 */
314 	public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
315 			int matchLimit) throws IOException;
316 
317 	/**
318 	 * Represent mutable entry of pack index consisting of object id and offset
319 	 * in pack (both mutable).
320 	 *
321 	 */
322 	public static class MutableEntry {
323 		final MutableObjectId idBuffer = new MutableObjectId();
324 
325 		long offset;
326 
327 		/**
328 		 * Returns offset for this index object entry
329 		 *
330 		 * @return offset of this object in a pack file
331 		 */
332 		public long getOffset() {
333 			return offset;
334 		}
335 
336 		/** @return hex string describing the object id of this entry. */
337 		public String name() {
338 			ensureId();
339 			return idBuffer.name();
340 		}
341 
342 		/** @return a copy of the object id. */
343 		public ObjectId toObjectId() {
344 			ensureId();
345 			return idBuffer.toObjectId();
346 		}
347 
348 		/** @return a complete copy of this entry, that won't modify */
349 		public MutableEntry cloneEntry() {
350 			final MutableEntry r = new MutableEntry();
351 			ensureId();
352 			r.idBuffer.fromObjectId(idBuffer);
353 			r.offset = offset;
354 			return r;
355 		}
356 
357 		void ensureId() {
358 			// Override in implementations.
359 		}
360 	}
361 
362 	abstract class EntriesIterator implements Iterator<MutableEntry> {
363 		protected final MutableEntry entry = initEntry();
364 
365 		protected long returnedNumber = 0;
366 
367 		protected abstract MutableEntry initEntry();
368 
369 		@Override
370 		public boolean hasNext() {
371 			return returnedNumber < getObjectCount();
372 		}
373 
374 		/**
375 		 * Implementation must update {@link #returnedNumber} before returning
376 		 * element.
377 		 */
378 		@Override
379 		public abstract MutableEntry next();
380 
381 		@Override
382 		public void remove() {
383 			throw new UnsupportedOperationException();
384 		}
385 	}
386 }