1 /*
2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4 *
5 * This program and the accompanying materials are made available under the
6 * terms of the Eclipse Distribution License v. 1.0 which is available at
7 * https://www.eclipse.org/org/documents/edl-v10.php.
8 *
9 * SPDX-License-Identifier: BSD-3-Clause
10 */
11
12 package org.eclipse.jgit.internal.storage.file;
13
14 import java.io.File;
15 import java.io.FileNotFoundException;
16 import java.io.IOException;
17 import java.io.InputStream;
18 import java.text.MessageFormat;
19 import java.util.Iterator;
20 import java.util.Set;
21
22 import org.eclipse.jgit.errors.CorruptObjectException;
23 import org.eclipse.jgit.errors.MissingObjectException;
24 import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
25 import org.eclipse.jgit.internal.JGitText;
26 import org.eclipse.jgit.lib.AbbreviatedObjectId;
27 import org.eclipse.jgit.lib.AnyObjectId;
28 import org.eclipse.jgit.lib.MutableObjectId;
29 import org.eclipse.jgit.lib.ObjectId;
30 import org.eclipse.jgit.lib.ObjectIdSet;
31 import org.eclipse.jgit.util.IO;
32 import org.eclipse.jgit.util.NB;
33 import org.eclipse.jgit.util.io.SilentFileInputStream;
34
35 /**
36 * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a
37 * {@link org.eclipse.jgit.internal.storage.file.Pack}.
38 * <p>
39 * Indexes are strictly redundant information in that we can rebuild all of the
40 * data held in the index file from the on disk representation of the pack file
41 * itself, but it is faster to access for random requests because data is stored
42 * by ObjectId.
43 * </p>
44 */
45 public abstract class PackIndex
46 implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
47 /**
48 * Open an existing pack <code>.idx</code> file for reading.
49 * <p>
50 * The format of the file will be automatically detected and a proper access
51 * implementation for that format will be constructed and returned to the
52 * caller. The file may or may not be held open by the returned instance.
53 * </p>
54 *
55 * @param idxFile
56 * existing pack .idx to read.
57 * @return access implementation for the requested file.
58 * @throws FileNotFoundException
59 * the file does not exist.
60 * @throws java.io.IOException
61 * the file exists but could not be read due to security errors,
62 * unrecognized data version, or unexpected data corruption.
63 */
64 public static PackIndex open(File idxFile) throws IOException {
65 try (SilentFileInputStream fd = new SilentFileInputStream(
66 idxFile)) {
67 return read(fd);
68 } catch (IOException ioe) {
69 throw new IOException(
70 MessageFormat.format(JGitText.get().unreadablePackIndex,
71 idxFile.getAbsolutePath()),
72 ioe);
73 }
74 }
75
76 /**
77 * Read an existing pack index file from a buffered stream.
78 * <p>
79 * The format of the file will be automatically detected and a proper access
80 * implementation for that format will be constructed and returned to the
81 * caller. The file may or may not be held open by the returned instance.
82 *
83 * @param fd
84 * stream to read the index file from. The stream must be
85 * buffered as some small IOs are performed against the stream.
86 * The caller is responsible for closing the stream.
87 * @return a copy of the index in-memory.
88 * @throws java.io.IOException
89 * the stream cannot be read.
90 * @throws org.eclipse.jgit.errors.CorruptObjectException
91 * the stream does not contain a valid pack index.
92 */
93 public static PackIndex read(InputStream fd) throws IOException,
94 CorruptObjectException {
95 final byte[] hdr = new byte[8];
96 IO.readFully(fd, hdr, 0, hdr.length);
97 if (isTOC(hdr)) {
98 final int v = NB.decodeInt32(hdr, 4);
99 switch (v) {
100 case 2:
101 return new PackIndexV2(fd);
102 default:
103 throw new UnsupportedPackIndexVersionException(v);
104 }
105 }
106 return new PackIndexV1(fd, hdr);
107 }
108
109 private static boolean isTOC(byte[] h) {
110 final byte[] toc = PackIndexWriter.TOC;
111 for (int i = 0; i < toc.length; i++)
112 if (h[i] != toc[i])
113 return false;
114 return true;
115 }
116
117 /** Footer checksum applied on the bottom of the pack file. */
118 protected byte[] packChecksum;
119
120 /**
121 * Determine if an object is contained within the pack file.
122 *
123 * @param id
124 * the object to look for. Must not be null.
125 * @return true if the object is listed in this index; false otherwise.
126 */
127 public boolean hasObject(AnyObjectId id) {
128 return findOffset(id) != -1;
129 }
130
131 /** {@inheritDoc} */
132 @Override
133 public boolean contains(AnyObjectId id) {
134 return findOffset(id) != -1;
135 }
136
137 /**
138 * {@inheritDoc}
139 * <p>
140 * Provide iterator that gives access to index entries. Note, that iterator
141 * returns reference to mutable object, the same reference in each call -
142 * for performance reason. If client needs immutable objects, it must copy
143 * returned object on its own.
144 * <p>
145 * Iterator returns objects in SHA-1 lexicographical order.
146 * </p>
147 */
148 @Override
149 public abstract Iterator<MutableEntry> iterator();
150
151 /**
152 * Obtain the total number of objects described by this index.
153 *
154 * @return number of objects in this index, and likewise in the associated
155 * pack that this index was generated from.
156 */
157 public abstract long getObjectCount();
158
159 /**
160 * Obtain the total number of objects needing 64 bit offsets.
161 *
162 * @return number of objects in this index using a 64 bit offset; that is an
163 * object positioned after the 2 GB position within the file.
164 */
165 public abstract long getOffset64Count();
166
167 /**
168 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
169 * <p>
170 * This method is a constant-time replacement for the following loop:
171 *
172 * <pre>
173 * Iterator<MutableEntry> eItr = index.iterator();
174 * int curPosition = 0;
175 * while (eItr.hasNext() && curPosition++ < nthPosition)
176 * eItr.next();
177 * ObjectId result = eItr.next().toObjectId();
178 * </pre>
179 *
180 * @param nthPosition
181 * position within the traversal of {@link #iterator()} that the
182 * caller needs the object for. The first returned
183 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
184 * is 0, the second is 1, etc.
185 * @return the ObjectId for the corresponding entry.
186 */
187 public abstract ObjectId getObjectId(long nthPosition);
188
189 /**
190 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
191 * <p>
192 * This method is a constant-time replacement for the following loop:
193 *
194 * <pre>
195 * Iterator<MutableEntry> eItr = index.iterator();
196 * int curPosition = 0;
197 * while (eItr.hasNext() && curPosition++ < nthPosition)
198 * eItr.next();
199 * ObjectId result = eItr.next().toObjectId();
200 * </pre>
201 *
202 * @param nthPosition
203 * unsigned 32 bit position within the traversal of
204 * {@link #iterator()} that the caller needs the object for. The
205 * first returned
206 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
207 * is 0, the second is 1, etc. Positions past 2**31-1 are
208 * negative, but still valid.
209 * @return the ObjectId for the corresponding entry.
210 */
211 public final ObjectId getObjectId(int nthPosition) {
212 if (nthPosition >= 0)
213 return getObjectId((long) nthPosition);
214 final int u31 = nthPosition >>> 1;
215 final int one = nthPosition & 1;
216 return getObjectId(((long) u31) << 1 | one);
217 }
218
219 /**
220 * Get offset in a pack for the n-th object entry returned by
221 * {@link #iterator()}.
222 *
223 * @param nthPosition
224 * unsigned 32 bit position within the traversal of
225 * {@link #iterator()} for which the caller needs the offset. The
226 * first returned {@link MutableEntry} is 0, the second is 1,
227 * etc. Positions past 2**31-1 are negative, but still valid.
228 * @return the offset in a pack for the corresponding entry.
229 */
230 abstract long getOffset(long nthPosition);
231
232 /**
233 * Locate the file offset position for the requested object.
234 *
235 * @param objId
236 * name of the object to locate within the pack.
237 * @return offset of the object's header and compressed content; -1 if the
238 * object does not exist in this index and is thus not stored in the
239 * associated pack.
240 */
241 public abstract long findOffset(AnyObjectId objId);
242
243 /**
244 * Retrieve stored CRC32 checksum of the requested object raw-data
245 * (including header).
246 *
247 * @param objId
248 * id of object to look for
249 * @return CRC32 checksum of specified object (at 32 less significant bits)
250 * @throws org.eclipse.jgit.errors.MissingObjectException
251 * when requested ObjectId was not found in this index
252 * @throws java.lang.UnsupportedOperationException
253 * when this index doesn't support CRC32 checksum
254 */
255 public abstract long findCRC32(AnyObjectId objId)
256 throws MissingObjectException, UnsupportedOperationException;
257
258 /**
259 * Check whether this index supports (has) CRC32 checksums for objects.
260 *
261 * @return true if CRC32 is stored, false otherwise
262 */
263 public abstract boolean hasCRC32Support();
264
265 /**
266 * Find objects matching the prefix abbreviation.
267 *
268 * @param matches
269 * set to add any located ObjectIds to. This is an output
270 * parameter.
271 * @param id
272 * prefix to search for.
273 * @param matchLimit
274 * maximum number of results to return. At most this many
275 * ObjectIds should be added to matches before returning.
276 * @throws java.io.IOException
277 * the index cannot be read.
278 */
279 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
280 int matchLimit) throws IOException;
281
282 /**
283 * @return the checksum of the pack; caller must not modify it
284 * @since 5.5
285 */
286 public byte[] getChecksum() {
287 return packChecksum;
288 }
289
290 /**
291 * Represent mutable entry of pack index consisting of object id and offset
292 * in pack (both mutable).
293 *
294 */
295 public static class MutableEntry {
296 final MutableObjectId idBuffer = new MutableObjectId();
297
298 long offset;
299
300 /**
301 * Returns offset for this index object entry
302 *
303 * @return offset of this object in a pack file
304 */
305 public long getOffset() {
306 return offset;
307 }
308
309 /** @return hex string describing the object id of this entry. */
310 public String name() {
311 ensureId();
312 return idBuffer.name();
313 }
314
315 /** @return a copy of the object id. */
316 public ObjectId toObjectId() {
317 ensureId();
318 return idBuffer.toObjectId();
319 }
320
321 /** @return a complete copy of this entry, that won't modify */
322 public MutableEntry cloneEntry() {
323 final MutableEntry r = new MutableEntry();
324 ensureId();
325 r.idBuffer.fromObjectId(idBuffer);
326 r.offset = offset;
327 return r;
328 }
329
330 void ensureId() {
331 // Override in implementations.
332 }
333 }
334
335 abstract class EntriesIterator implements Iterator<MutableEntry> {
336 protected final MutableEntry entry = initEntry();
337
338 protected long returnedNumber = 0;
339
340 protected abstract MutableEntry initEntry();
341
342 @Override
343 public boolean hasNext() {
344 return returnedNumber < getObjectCount();
345 }
346
347 /**
348 * Implementation must update {@link #returnedNumber} before returning
349 * element.
350 */
351 @Override
352 public abstract MutableEntry next();
353
354 @Override
355 public void remove() {
356 throw new UnsupportedOperationException();
357 }
358 }
359 }