1 /* 2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others 4 * 5 * This program and the accompanying materials are made available under the 6 * terms of the Eclipse Distribution License v. 1.0 which is available at 7 * https://www.eclipse.org/org/documents/edl-v10.php. 8 * 9 * SPDX-License-Identifier: BSD-3-Clause 10 */ 11 12 package org.eclipse.jgit.internal.storage.file; 13 14 import java.io.File; 15 import java.io.FileNotFoundException; 16 import java.io.IOException; 17 import java.io.InputStream; 18 import java.text.MessageFormat; 19 import java.util.Iterator; 20 import java.util.Set; 21 22 import org.eclipse.jgit.errors.CorruptObjectException; 23 import org.eclipse.jgit.errors.MissingObjectException; 24 import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException; 25 import org.eclipse.jgit.internal.JGitText; 26 import org.eclipse.jgit.lib.AbbreviatedObjectId; 27 import org.eclipse.jgit.lib.AnyObjectId; 28 import org.eclipse.jgit.lib.MutableObjectId; 29 import org.eclipse.jgit.lib.ObjectId; 30 import org.eclipse.jgit.lib.ObjectIdSet; 31 import org.eclipse.jgit.util.IO; 32 import org.eclipse.jgit.util.NB; 33 import org.eclipse.jgit.util.io.SilentFileInputStream; 34 35 /** 36 * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a 37 * {@link org.eclipse.jgit.internal.storage.file.Pack}. 38 * <p> 39 * Indexes are strictly redundant information in that we can rebuild all of the 40 * data held in the index file from the on disk representation of the pack file 41 * itself, but it is faster to access for random requests because data is stored 42 * by ObjectId. 43 * </p> 44 */ 45 public abstract class PackIndex 46 implements Iterable<PackIndex.MutableEntry>, ObjectIdSet { 47 /** 48 * Open an existing pack <code>.idx</code> file for reading. 49 * <p> 50 * The format of the file will be automatically detected and a proper access 51 * implementation for that format will be constructed and returned to the 52 * caller. The file may or may not be held open by the returned instance. 53 * </p> 54 * 55 * @param idxFile 56 * existing pack .idx to read. 57 * @return access implementation for the requested file. 58 * @throws FileNotFoundException 59 * the file does not exist. 60 * @throws java.io.IOException 61 * the file exists but could not be read due to security errors, 62 * unrecognized data version, or unexpected data corruption. 63 */ 64 public static PackIndex open(File idxFile) throws IOException { 65 try (SilentFileInputStream fd = new SilentFileInputStream( 66 idxFile)) { 67 return read(fd); 68 } catch (IOException ioe) { 69 throw new IOException( 70 MessageFormat.format(JGitText.get().unreadablePackIndex, 71 idxFile.getAbsolutePath()), 72 ioe); 73 } 74 } 75 76 /** 77 * Read an existing pack index file from a buffered stream. 78 * <p> 79 * The format of the file will be automatically detected and a proper access 80 * implementation for that format will be constructed and returned to the 81 * caller. The file may or may not be held open by the returned instance. 82 * 83 * @param fd 84 * stream to read the index file from. The stream must be 85 * buffered as some small IOs are performed against the stream. 86 * The caller is responsible for closing the stream. 87 * @return a copy of the index in-memory. 88 * @throws java.io.IOException 89 * the stream cannot be read. 90 * @throws org.eclipse.jgit.errors.CorruptObjectException 91 * the stream does not contain a valid pack index. 92 */ 93 public static PackIndex read(InputStream fd) throws IOException, 94 CorruptObjectException { 95 final byte[] hdr = new byte[8]; 96 IO.readFully(fd, hdr, 0, hdr.length); 97 if (isTOC(hdr)) { 98 final int v = NB.decodeInt32(hdr, 4); 99 switch (v) { 100 case 2: 101 return new PackIndexV2(fd); 102 default: 103 throw new UnsupportedPackIndexVersionException(v); 104 } 105 } 106 return new PackIndexV1(fd, hdr); 107 } 108 109 private static boolean isTOC(byte[] h) { 110 final byte[] toc = PackIndexWriter.TOC; 111 for (int i = 0; i < toc.length; i++) 112 if (h[i] != toc[i]) 113 return false; 114 return true; 115 } 116 117 /** Footer checksum applied on the bottom of the pack file. */ 118 protected byte[] packChecksum; 119 120 /** 121 * Determine if an object is contained within the pack file. 122 * 123 * @param id 124 * the object to look for. Must not be null. 125 * @return true if the object is listed in this index; false otherwise. 126 */ 127 public boolean hasObject(AnyObjectId id) { 128 return findOffset(id) != -1; 129 } 130 131 /** {@inheritDoc} */ 132 @Override 133 public boolean contains(AnyObjectId id) { 134 return findOffset(id) != -1; 135 } 136 137 /** 138 * {@inheritDoc} 139 * <p> 140 * Provide iterator that gives access to index entries. Note, that iterator 141 * returns reference to mutable object, the same reference in each call - 142 * for performance reason. If client needs immutable objects, it must copy 143 * returned object on its own. 144 * <p> 145 * Iterator returns objects in SHA-1 lexicographical order. 146 * </p> 147 */ 148 @Override 149 public abstract Iterator<MutableEntry> iterator(); 150 151 /** 152 * Obtain the total number of objects described by this index. 153 * 154 * @return number of objects in this index, and likewise in the associated 155 * pack that this index was generated from. 156 */ 157 public abstract long getObjectCount(); 158 159 /** 160 * Obtain the total number of objects needing 64 bit offsets. 161 * 162 * @return number of objects in this index using a 64 bit offset; that is an 163 * object positioned after the 2 GB position within the file. 164 */ 165 public abstract long getOffset64Count(); 166 167 /** 168 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 169 * <p> 170 * This method is a constant-time replacement for the following loop: 171 * 172 * <pre> 173 * Iterator<MutableEntry> eItr = index.iterator(); 174 * int curPosition = 0; 175 * while (eItr.hasNext() && curPosition++ < nthPosition) 176 * eItr.next(); 177 * ObjectId result = eItr.next().toObjectId(); 178 * </pre> 179 * 180 * @param nthPosition 181 * position within the traversal of {@link #iterator()} that the 182 * caller needs the object for. The first returned 183 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry} 184 * is 0, the second is 1, etc. 185 * @return the ObjectId for the corresponding entry. 186 */ 187 public abstract ObjectId getObjectId(long nthPosition); 188 189 /** 190 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 191 * <p> 192 * This method is a constant-time replacement for the following loop: 193 * 194 * <pre> 195 * Iterator<MutableEntry> eItr = index.iterator(); 196 * int curPosition = 0; 197 * while (eItr.hasNext() && curPosition++ < nthPosition) 198 * eItr.next(); 199 * ObjectId result = eItr.next().toObjectId(); 200 * </pre> 201 * 202 * @param nthPosition 203 * unsigned 32 bit position within the traversal of 204 * {@link #iterator()} that the caller needs the object for. The 205 * first returned 206 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry} 207 * is 0, the second is 1, etc. Positions past 2**31-1 are 208 * negative, but still valid. 209 * @return the ObjectId for the corresponding entry. 210 */ 211 public final ObjectId getObjectId(int nthPosition) { 212 if (nthPosition >= 0) 213 return getObjectId((long) nthPosition); 214 final int u31 = nthPosition >>> 1; 215 final int one = nthPosition & 1; 216 return getObjectId(((long) u31) << 1 | one); 217 } 218 219 /** 220 * Get offset in a pack for the n-th object entry returned by 221 * {@link #iterator()}. 222 * 223 * @param nthPosition 224 * unsigned 32 bit position within the traversal of 225 * {@link #iterator()} for which the caller needs the offset. The 226 * first returned {@link MutableEntry} is 0, the second is 1, 227 * etc. Positions past 2**31-1 are negative, but still valid. 228 * @return the offset in a pack for the corresponding entry. 229 */ 230 abstract long getOffset(long nthPosition); 231 232 /** 233 * Locate the file offset position for the requested object. 234 * 235 * @param objId 236 * name of the object to locate within the pack. 237 * @return offset of the object's header and compressed content; -1 if the 238 * object does not exist in this index and is thus not stored in the 239 * associated pack. 240 */ 241 public abstract long findOffset(AnyObjectId objId); 242 243 /** 244 * Retrieve stored CRC32 checksum of the requested object raw-data 245 * (including header). 246 * 247 * @param objId 248 * id of object to look for 249 * @return CRC32 checksum of specified object (at 32 less significant bits) 250 * @throws org.eclipse.jgit.errors.MissingObjectException 251 * when requested ObjectId was not found in this index 252 * @throws java.lang.UnsupportedOperationException 253 * when this index doesn't support CRC32 checksum 254 */ 255 public abstract long findCRC32(AnyObjectId objId) 256 throws MissingObjectException, UnsupportedOperationException; 257 258 /** 259 * Check whether this index supports (has) CRC32 checksums for objects. 260 * 261 * @return true if CRC32 is stored, false otherwise 262 */ 263 public abstract boolean hasCRC32Support(); 264 265 /** 266 * Find objects matching the prefix abbreviation. 267 * 268 * @param matches 269 * set to add any located ObjectIds to. This is an output 270 * parameter. 271 * @param id 272 * prefix to search for. 273 * @param matchLimit 274 * maximum number of results to return. At most this many 275 * ObjectIds should be added to matches before returning. 276 * @throws java.io.IOException 277 * the index cannot be read. 278 */ 279 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, 280 int matchLimit) throws IOException; 281 282 /** 283 * @return the checksum of the pack; caller must not modify it 284 * @since 5.5 285 */ 286 public byte[] getChecksum() { 287 return packChecksum; 288 } 289 290 /** 291 * Represent mutable entry of pack index consisting of object id and offset 292 * in pack (both mutable). 293 * 294 */ 295 public static class MutableEntry { 296 final MutableObjectId idBuffer = new MutableObjectId(); 297 298 long offset; 299 300 /** 301 * Returns offset for this index object entry 302 * 303 * @return offset of this object in a pack file 304 */ 305 public long getOffset() { 306 return offset; 307 } 308 309 /** @return hex string describing the object id of this entry. */ 310 public String name() { 311 ensureId(); 312 return idBuffer.name(); 313 } 314 315 /** @return a copy of the object id. */ 316 public ObjectId toObjectId() { 317 ensureId(); 318 return idBuffer.toObjectId(); 319 } 320 321 /** @return a complete copy of this entry, that won't modify */ 322 public MutableEntry cloneEntry() { 323 final MutableEntry r = new MutableEntry(); 324 ensureId(); 325 r.idBuffer.fromObjectId(idBuffer); 326 r.offset = offset; 327 return r; 328 } 329 330 void ensureId() { 331 // Override in implementations. 332 } 333 } 334 335 abstract class EntriesIterator implements Iterator<MutableEntry> { 336 protected final MutableEntry entry = initEntry(); 337 338 protected long returnedNumber = 0; 339 340 protected abstract MutableEntry initEntry(); 341 342 @Override 343 public boolean hasNext() { 344 return returnedNumber < getObjectCount(); 345 } 346 347 /** 348 * Implementation must update {@link #returnedNumber} before returning 349 * element. 350 */ 351 @Override 352 public abstract MutableEntry next(); 353 354 @Override 355 public void remove() { 356 throw new UnsupportedOperationException(); 357 } 358 } 359 }