1 /* 2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.internal.storage.file; 46 47 import java.io.File; 48 import java.io.FileNotFoundException; 49 import java.io.IOException; 50 import java.io.InputStream; 51 import java.text.MessageFormat; 52 import java.util.Iterator; 53 import java.util.Set; 54 55 import org.eclipse.jgit.errors.CorruptObjectException; 56 import org.eclipse.jgit.errors.MissingObjectException; 57 import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException; 58 import org.eclipse.jgit.internal.JGitText; 59 import org.eclipse.jgit.lib.AbbreviatedObjectId; 60 import org.eclipse.jgit.lib.AnyObjectId; 61 import org.eclipse.jgit.lib.MutableObjectId; 62 import org.eclipse.jgit.lib.ObjectId; 63 import org.eclipse.jgit.lib.ObjectIdSet; 64 import org.eclipse.jgit.util.IO; 65 import org.eclipse.jgit.util.NB; 66 import org.eclipse.jgit.util.io.SilentFileInputStream; 67 68 /** 69 * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a 70 * {@link org.eclipse.jgit.internal.storage.file.PackFile}. 71 * <p> 72 * Indexes are strictly redundant information in that we can rebuild all of the 73 * data held in the index file from the on disk representation of the pack file 74 * itself, but it is faster to access for random requests because data is stored 75 * by ObjectId. 76 * </p> 77 */ 78 public abstract class PackIndex 79 implements Iterable<PackIndex.MutableEntry>, ObjectIdSet { 80 /** 81 * Open an existing pack <code>.idx</code> file for reading. 82 * <p> 83 * The format of the file will be automatically detected and a proper access 84 * implementation for that format will be constructed and returned to the 85 * caller. The file may or may not be held open by the returned instance. 86 * </p> 87 * 88 * @param idxFile 89 * existing pack .idx to read. 90 * @return access implementation for the requested file. 91 * @throws FileNotFoundException 92 * the file does not exist. 93 * @throws java.io.IOException 94 * the file exists but could not be read due to security errors, 95 * unrecognized data version, or unexpected data corruption. 96 */ 97 public static PackIndex open(File idxFile) throws IOException { 98 try (SilentFileInputStream fd = new SilentFileInputStream( 99 idxFile)) { 100 return read(fd); 101 } catch (IOException ioe) { 102 throw new IOException( 103 MessageFormat.format(JGitText.get().unreadablePackIndex, 104 idxFile.getAbsolutePath()), 105 ioe); 106 } 107 } 108 109 /** 110 * Read an existing pack index file from a buffered stream. 111 * <p> 112 * The format of the file will be automatically detected and a proper access 113 * implementation for that format will be constructed and returned to the 114 * caller. The file may or may not be held open by the returned instance. 115 * 116 * @param fd 117 * stream to read the index file from. The stream must be 118 * buffered as some small IOs are performed against the stream. 119 * The caller is responsible for closing the stream. 120 * @return a copy of the index in-memory. 121 * @throws java.io.IOException 122 * the stream cannot be read. 123 * @throws org.eclipse.jgit.errors.CorruptObjectException 124 * the stream does not contain a valid pack index. 125 */ 126 public static PackIndex read(InputStream fd) throws IOException, 127 CorruptObjectException { 128 final byte[] hdr = new byte[8]; 129 IO.readFully(fd, hdr, 0, hdr.length); 130 if (isTOC(hdr)) { 131 final int v = NB.decodeInt32(hdr, 4); 132 switch (v) { 133 case 2: 134 return new PackIndexV2(fd); 135 default: 136 throw new UnsupportedPackIndexVersionException(v); 137 } 138 } 139 return new PackIndexV1(fd, hdr); 140 } 141 142 private static boolean isTOC(byte[] h) { 143 final byte[] toc = PackIndexWriter.TOC; 144 for (int i = 0; i < toc.length; i++) 145 if (h[i] != toc[i]) 146 return false; 147 return true; 148 } 149 150 /** Footer checksum applied on the bottom of the pack file. */ 151 protected byte[] packChecksum; 152 153 /** 154 * Determine if an object is contained within the pack file. 155 * 156 * @param id 157 * the object to look for. Must not be null. 158 * @return true if the object is listed in this index; false otherwise. 159 */ 160 public boolean hasObject(AnyObjectId id) { 161 return findOffset(id) != -1; 162 } 163 164 /** {@inheritDoc} */ 165 @Override 166 public boolean contains(AnyObjectId id) { 167 return findOffset(id) != -1; 168 } 169 170 /** 171 * {@inheritDoc} 172 * <p> 173 * Provide iterator that gives access to index entries. Note, that iterator 174 * returns reference to mutable object, the same reference in each call - 175 * for performance reason. If client needs immutable objects, it must copy 176 * returned object on its own. 177 * <p> 178 * Iterator returns objects in SHA-1 lexicographical order. 179 * </p> 180 */ 181 @Override 182 public abstract Iterator<MutableEntry> iterator(); 183 184 /** 185 * Obtain the total number of objects described by this index. 186 * 187 * @return number of objects in this index, and likewise in the associated 188 * pack that this index was generated from. 189 */ 190 public abstract long getObjectCount(); 191 192 /** 193 * Obtain the total number of objects needing 64 bit offsets. 194 * 195 * @return number of objects in this index using a 64 bit offset; that is an 196 * object positioned after the 2 GB position within the file. 197 */ 198 public abstract long getOffset64Count(); 199 200 /** 201 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 202 * <p> 203 * This method is a constant-time replacement for the following loop: 204 * 205 * <pre> 206 * Iterator<MutableEntry> eItr = index.iterator(); 207 * int curPosition = 0; 208 * while (eItr.hasNext() && curPosition++ < nthPosition) 209 * eItr.next(); 210 * ObjectId result = eItr.next().toObjectId(); 211 * </pre> 212 * 213 * @param nthPosition 214 * position within the traversal of {@link #iterator()} that the 215 * caller needs the object for. The first returned 216 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry} 217 * is 0, the second is 1, etc. 218 * @return the ObjectId for the corresponding entry. 219 */ 220 public abstract ObjectId getObjectId(long nthPosition); 221 222 /** 223 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 224 * <p> 225 * This method is a constant-time replacement for the following loop: 226 * 227 * <pre> 228 * Iterator<MutableEntry> eItr = index.iterator(); 229 * int curPosition = 0; 230 * while (eItr.hasNext() && curPosition++ < nthPosition) 231 * eItr.next(); 232 * ObjectId result = eItr.next().toObjectId(); 233 * </pre> 234 * 235 * @param nthPosition 236 * unsigned 32 bit position within the traversal of 237 * {@link #iterator()} that the caller needs the object for. The 238 * first returned 239 * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry} 240 * is 0, the second is 1, etc. Positions past 2**31-1 are 241 * negative, but still valid. 242 * @return the ObjectId for the corresponding entry. 243 */ 244 public final ObjectId getObjectId(int nthPosition) { 245 if (nthPosition >= 0) 246 return getObjectId((long) nthPosition); 247 final int u31 = nthPosition >>> 1; 248 final int one = nthPosition & 1; 249 return getObjectId(((long) u31) << 1 | one); 250 } 251 252 /** 253 * Get offset in a pack for the n-th object entry returned by 254 * {@link #iterator()}. 255 * 256 * @param nthPosition 257 * unsigned 32 bit position within the traversal of 258 * {@link #iterator()} for which the caller needs the offset. The 259 * first returned {@link MutableEntry} is 0, the second is 1, 260 * etc. Positions past 2**31-1 are negative, but still valid. 261 * @return the offset in a pack for the corresponding entry. 262 */ 263 abstract long getOffset(long nthPosition); 264 265 /** 266 * Locate the file offset position for the requested object. 267 * 268 * @param objId 269 * name of the object to locate within the pack. 270 * @return offset of the object's header and compressed content; -1 if the 271 * object does not exist in this index and is thus not stored in the 272 * associated pack. 273 */ 274 public abstract long findOffset(AnyObjectId objId); 275 276 /** 277 * Retrieve stored CRC32 checksum of the requested object raw-data 278 * (including header). 279 * 280 * @param objId 281 * id of object to look for 282 * @return CRC32 checksum of specified object (at 32 less significant bits) 283 * @throws org.eclipse.jgit.errors.MissingObjectException 284 * when requested ObjectId was not found in this index 285 * @throws java.lang.UnsupportedOperationException 286 * when this index doesn't support CRC32 checksum 287 */ 288 public abstract long findCRC32(AnyObjectId objId) 289 throws MissingObjectException, UnsupportedOperationException; 290 291 /** 292 * Check whether this index supports (has) CRC32 checksums for objects. 293 * 294 * @return true if CRC32 is stored, false otherwise 295 */ 296 public abstract boolean hasCRC32Support(); 297 298 /** 299 * Find objects matching the prefix abbreviation. 300 * 301 * @param matches 302 * set to add any located ObjectIds to. This is an output 303 * parameter. 304 * @param id 305 * prefix to search for. 306 * @param matchLimit 307 * maximum number of results to return. At most this many 308 * ObjectIds should be added to matches before returning. 309 * @throws java.io.IOException 310 * the index cannot be read. 311 */ 312 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, 313 int matchLimit) throws IOException; 314 315 /** 316 * Represent mutable entry of pack index consisting of object id and offset 317 * in pack (both mutable). 318 * 319 */ 320 public static class MutableEntry { 321 final MutableObjectId idBuffer = new MutableObjectId(); 322 323 long offset; 324 325 /** 326 * Returns offset for this index object entry 327 * 328 * @return offset of this object in a pack file 329 */ 330 public long getOffset() { 331 return offset; 332 } 333 334 /** @return hex string describing the object id of this entry. */ 335 public String name() { 336 ensureId(); 337 return idBuffer.name(); 338 } 339 340 /** @return a copy of the object id. */ 341 public ObjectId toObjectId() { 342 ensureId(); 343 return idBuffer.toObjectId(); 344 } 345 346 /** @return a complete copy of this entry, that won't modify */ 347 public MutableEntry cloneEntry() { 348 final MutableEntry r = new MutableEntry(); 349 ensureId(); 350 r.idBuffer.fromObjectId(idBuffer); 351 r.offset = offset; 352 return r; 353 } 354 355 void ensureId() { 356 // Override in implementations. 357 } 358 } 359 360 abstract class EntriesIterator implements Iterator<MutableEntry> { 361 protected final MutableEntry entry = initEntry(); 362 363 protected long returnedNumber = 0; 364 365 protected abstract MutableEntry initEntry(); 366 367 @Override 368 public boolean hasNext() { 369 return returnedNumber < getObjectCount(); 370 } 371 372 /** 373 * Implementation must update {@link #returnedNumber} before returning 374 * element. 375 */ 376 @Override 377 public abstract MutableEntry next(); 378 379 @Override 380 public void remove() { 381 throw new UnsupportedOperationException(); 382 } 383 } 384 }