View Javadoc
1   /*
2    * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4    *
5    * This program and the accompanying materials are made available under the
6    * terms of the Eclipse Distribution License v. 1.0 which is available at
7    * https://www.eclipse.org/org/documents/edl-v10.php.
8    *
9    * SPDX-License-Identifier: BSD-3-Clause
10   */
11  
12  package org.eclipse.jgit.internal.storage.file;
13  
14  import java.io.File;
15  import java.io.FileNotFoundException;
16  import java.io.IOException;
17  import java.io.InputStream;
18  import java.text.MessageFormat;
19  import java.util.Iterator;
20  import java.util.Set;
21  
22  import org.eclipse.jgit.errors.CorruptObjectException;
23  import org.eclipse.jgit.errors.MissingObjectException;
24  import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
25  import org.eclipse.jgit.internal.JGitText;
26  import org.eclipse.jgit.lib.AbbreviatedObjectId;
27  import org.eclipse.jgit.lib.AnyObjectId;
28  import org.eclipse.jgit.lib.MutableObjectId;
29  import org.eclipse.jgit.lib.ObjectId;
30  import org.eclipse.jgit.lib.ObjectIdSet;
31  import org.eclipse.jgit.util.IO;
32  import org.eclipse.jgit.util.NB;
33  import org.eclipse.jgit.util.io.SilentFileInputStream;
34  
35  /**
36   * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a
37   * {@link org.eclipse.jgit.internal.storage.file.Pack}.
38   * <p>
39   * Indexes are strictly redundant information in that we can rebuild all of the
40   * data held in the index file from the on disk representation of the pack file
41   * itself, but it is faster to access for random requests because data is stored
42   * by ObjectId.
43   * </p>
44   */
45  public abstract class PackIndex
46  		implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
47  	/**
48  	 * Open an existing pack <code>.idx</code> file for reading.
49  	 * <p>
50  	 * The format of the file will be automatically detected and a proper access
51  	 * implementation for that format will be constructed and returned to the
52  	 * caller. The file may or may not be held open by the returned instance.
53  	 * </p>
54  	 *
55  	 * @param idxFile
56  	 *            existing pack .idx to read.
57  	 * @return access implementation for the requested file.
58  	 * @throws FileNotFoundException
59  	 *             the file does not exist.
60  	 * @throws java.io.IOException
61  	 *             the file exists but could not be read due to security errors,
62  	 *             unrecognized data version, or unexpected data corruption.
63  	 */
64  	public static PackIndex open(File idxFile) throws IOException {
65  		try (SilentFileInputStream fd = new SilentFileInputStream(
66  				idxFile)) {
67  				return read(fd);
68  		} catch (IOException ioe) {
69  			throw new IOException(
70  					MessageFormat.format(JGitText.get().unreadablePackIndex,
71  							idxFile.getAbsolutePath()),
72  					ioe);
73  		}
74  	}
75  
76  	/**
77  	 * Read an existing pack index file from a buffered stream.
78  	 * <p>
79  	 * The format of the file will be automatically detected and a proper access
80  	 * implementation for that format will be constructed and returned to the
81  	 * caller. The file may or may not be held open by the returned instance.
82  	 *
83  	 * @param fd
84  	 *            stream to read the index file from. The stream must be
85  	 *            buffered as some small IOs are performed against the stream.
86  	 *            The caller is responsible for closing the stream.
87  	 * @return a copy of the index in-memory.
88  	 * @throws java.io.IOException
89  	 *             the stream cannot be read.
90  	 * @throws org.eclipse.jgit.errors.CorruptObjectException
91  	 *             the stream does not contain a valid pack index.
92  	 */
93  	public static PackIndex read(InputStream fd) throws IOException,
94  			CorruptObjectException {
95  		final byte[] hdr = new byte[8];
96  		IO.readFully(fd, hdr, 0, hdr.length);
97  		if (isTOC(hdr)) {
98  			final int v = NB.decodeInt32(hdr, 4);
99  			switch (v) {
100 			case 2:
101 				return new PackIndexV2(fd);
102 			default:
103 				throw new UnsupportedPackIndexVersionException(v);
104 			}
105 		}
106 		return new PackIndexV1(fd, hdr);
107 	}
108 
109 	private static boolean isTOC(byte[] h) {
110 		final byte[] toc = PackIndexWriter.TOC;
111 		for (int i = 0; i < toc.length; i++)
112 			if (h[i] != toc[i])
113 				return false;
114 		return true;
115 	}
116 
117 	/** Footer checksum applied on the bottom of the pack file. */
118 	protected byte[] packChecksum;
119 
120 	/**
121 	 * Determine if an object is contained within the pack file.
122 	 *
123 	 * @param id
124 	 *            the object to look for. Must not be null.
125 	 * @return true if the object is listed in this index; false otherwise.
126 	 */
127 	public boolean hasObject(AnyObjectId id) {
128 		return findOffset(id) != -1;
129 	}
130 
131 	/** {@inheritDoc} */
132 	@Override
133 	public boolean contains(AnyObjectId id) {
134 		return findOffset(id) != -1;
135 	}
136 
137 	/**
138 	 * {@inheritDoc}
139 	 * <p>
140 	 * Provide iterator that gives access to index entries. Note, that iterator
141 	 * returns reference to mutable object, the same reference in each call -
142 	 * for performance reason. If client needs immutable objects, it must copy
143 	 * returned object on its own.
144 	 * <p>
145 	 * Iterator returns objects in SHA-1 lexicographical order.
146 	 * </p>
147 	 */
148 	@Override
149 	public abstract Iterator<MutableEntry> iterator();
150 
151 	/**
152 	 * Obtain the total number of objects described by this index.
153 	 *
154 	 * @return number of objects in this index, and likewise in the associated
155 	 *         pack that this index was generated from.
156 	 */
157 	public abstract long getObjectCount();
158 
159 	/**
160 	 * Obtain the total number of objects needing 64 bit offsets.
161 	 *
162 	 * @return number of objects in this index using a 64 bit offset; that is an
163 	 *         object positioned after the 2 GB position within the file.
164 	 */
165 	public abstract long getOffset64Count();
166 
167 	/**
168 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
169 	 * <p>
170 	 * This method is a constant-time replacement for the following loop:
171 	 *
172 	 * <pre>
173 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
174 	 * int curPosition = 0;
175 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
176 	 * 	eItr.next();
177 	 * ObjectId result = eItr.next().toObjectId();
178 	 * </pre>
179 	 *
180 	 * @param nthPosition
181 	 *            position within the traversal of {@link #iterator()} that the
182 	 *            caller needs the object for. The first returned
183 	 *            {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
184 	 *            is 0, the second is 1, etc.
185 	 * @return the ObjectId for the corresponding entry.
186 	 */
187 	public abstract ObjectId getObjectId(long nthPosition);
188 
189 	/**
190 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
191 	 * <p>
192 	 * This method is a constant-time replacement for the following loop:
193 	 *
194 	 * <pre>
195 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
196 	 * int curPosition = 0;
197 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
198 	 * 	eItr.next();
199 	 * ObjectId result = eItr.next().toObjectId();
200 	 * </pre>
201 	 *
202 	 * @param nthPosition
203 	 *            unsigned 32 bit position within the traversal of
204 	 *            {@link #iterator()} that the caller needs the object for. The
205 	 *            first returned
206 	 *            {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
207 	 *            is 0, the second is 1, etc. Positions past 2**31-1 are
208 	 *            negative, but still valid.
209 	 * @return the ObjectId for the corresponding entry.
210 	 */
211 	public final ObjectId getObjectId(int nthPosition) {
212 		if (nthPosition >= 0)
213 			return getObjectId((long) nthPosition);
214 		final int u31 = nthPosition >>> 1;
215 		final int one = nthPosition & 1;
216 		return getObjectId(((long) u31) << 1 | one);
217 	}
218 
219 	/**
220 	 * Get offset in a pack for the n-th object entry returned by
221 	 * {@link #iterator()}.
222 	 *
223 	 * @param nthPosition
224 	 *            unsigned 32 bit position within the traversal of
225 	 *            {@link #iterator()} for which the caller needs the offset. The
226 	 *            first returned {@link MutableEntry} is 0, the second is 1,
227 	 *            etc. Positions past 2**31-1 are negative, but still valid.
228 	 * @return the offset in a pack for the corresponding entry.
229 	 */
230 	abstract long getOffset(long nthPosition);
231 
232 	/**
233 	 * Locate the file offset position for the requested object.
234 	 *
235 	 * @param objId
236 	 *            name of the object to locate within the pack.
237 	 * @return offset of the object's header and compressed content; -1 if the
238 	 *         object does not exist in this index and is thus not stored in the
239 	 *         associated pack.
240 	 */
241 	public abstract long findOffset(AnyObjectId objId);
242 
243 	/**
244 	 * Retrieve stored CRC32 checksum of the requested object raw-data
245 	 * (including header).
246 	 *
247 	 * @param objId
248 	 *            id of object to look for
249 	 * @return CRC32 checksum of specified object (at 32 less significant bits)
250 	 * @throws org.eclipse.jgit.errors.MissingObjectException
251 	 *             when requested ObjectId was not found in this index
252 	 * @throws java.lang.UnsupportedOperationException
253 	 *             when this index doesn't support CRC32 checksum
254 	 */
255 	public abstract long findCRC32(AnyObjectId objId)
256 			throws MissingObjectException, UnsupportedOperationException;
257 
258 	/**
259 	 * Check whether this index supports (has) CRC32 checksums for objects.
260 	 *
261 	 * @return true if CRC32 is stored, false otherwise
262 	 */
263 	public abstract boolean hasCRC32Support();
264 
265 	/**
266 	 * Find objects matching the prefix abbreviation.
267 	 *
268 	 * @param matches
269 	 *            set to add any located ObjectIds to. This is an output
270 	 *            parameter.
271 	 * @param id
272 	 *            prefix to search for.
273 	 * @param matchLimit
274 	 *            maximum number of results to return. At most this many
275 	 *            ObjectIds should be added to matches before returning.
276 	 * @throws java.io.IOException
277 	 *             the index cannot be read.
278 	 */
279 	public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
280 			int matchLimit) throws IOException;
281 
282 	/**
283 	 * @return the checksum of the pack; caller must not modify it
284 	 * @since 5.5
285 	 */
286 	public byte[] getChecksum() {
287 		return packChecksum;
288 	}
289 
290 	/**
291 	 * Represent mutable entry of pack index consisting of object id and offset
292 	 * in pack (both mutable).
293 	 *
294 	 */
295 	public static class MutableEntry {
296 		final MutableObjectId idBuffer = new MutableObjectId();
297 
298 		long offset;
299 
300 		/**
301 		 * Returns offset for this index object entry
302 		 *
303 		 * @return offset of this object in a pack file
304 		 */
305 		public long getOffset() {
306 			return offset;
307 		}
308 
309 		/** @return hex string describing the object id of this entry. */
310 		public String name() {
311 			ensureId();
312 			return idBuffer.name();
313 		}
314 
315 		/** @return a copy of the object id. */
316 		public ObjectId toObjectId() {
317 			ensureId();
318 			return idBuffer.toObjectId();
319 		}
320 
321 		/** @return a complete copy of this entry, that won't modify */
322 		public MutableEntry cloneEntry() {
323 			final MutableEntry r = new MutableEntry();
324 			ensureId();
325 			r.idBuffer.fromObjectId(idBuffer);
326 			r.offset = offset;
327 			return r;
328 		}
329 
330 		void ensureId() {
331 			// Override in implementations.
332 		}
333 	}
334 
335 	abstract class EntriesIterator implements Iterator<MutableEntry> {
336 		protected final MutableEntry entry = initEntry();
337 
338 		protected long returnedNumber = 0;
339 
340 		protected abstract MutableEntry initEntry();
341 
342 		@Override
343 		public boolean hasNext() {
344 			return returnedNumber < getObjectCount();
345 		}
346 
347 		/**
348 		 * Implementation must update {@link #returnedNumber} before returning
349 		 * element.
350 		 */
351 		@Override
352 		public abstract MutableEntry next();
353 
354 		@Override
355 		public void remove() {
356 			throw new UnsupportedOperationException();
357 		}
358 	}
359 }