1 /* 2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.internal.storage.file; 46 47 import java.io.File; 48 import java.io.FileInputStream; 49 import java.io.FileNotFoundException; 50 import java.io.IOException; 51 import java.io.InputStream; 52 import java.text.MessageFormat; 53 import java.util.Iterator; 54 import java.util.Set; 55 56 import org.eclipse.jgit.errors.CorruptObjectException; 57 import org.eclipse.jgit.errors.MissingObjectException; 58 import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException; 59 import org.eclipse.jgit.internal.JGitText; 60 import org.eclipse.jgit.lib.AbbreviatedObjectId; 61 import org.eclipse.jgit.lib.AnyObjectId; 62 import org.eclipse.jgit.lib.MutableObjectId; 63 import org.eclipse.jgit.lib.ObjectId; 64 import org.eclipse.jgit.lib.ObjectIdSet; 65 import org.eclipse.jgit.util.IO; 66 import org.eclipse.jgit.util.NB; 67 68 /** 69 * Access path to locate objects by {@link ObjectId} in a {@link PackFile}. 70 * <p> 71 * Indexes are strictly redundant information in that we can rebuild all of the 72 * data held in the index file from the on disk representation of the pack file 73 * itself, but it is faster to access for random requests because data is stored 74 * by ObjectId. 75 * </p> 76 */ 77 public abstract class PackIndex 78 implements Iterable<PackIndex.MutableEntry>, ObjectIdSet { 79 /** 80 * Open an existing pack <code>.idx</code> file for reading. 81 * <p> 82 * The format of the file will be automatically detected and a proper access 83 * implementation for that format will be constructed and returned to the 84 * caller. The file may or may not be held open by the returned instance. 85 * </p> 86 * 87 * @param idxFile 88 * existing pack .idx to read. 89 * @return access implementation for the requested file. 90 * @throws FileNotFoundException 91 * the file does not exist. 92 * @throws IOException 93 * the file exists but could not be read due to security errors, 94 * unrecognized data version, or unexpected data corruption. 95 */ 96 public static PackIndex open(final File idxFile) throws IOException { 97 final FileInputStream fd = new FileInputStream(idxFile); 98 try { 99 return read(fd); 100 } catch (IOException ioe) { 101 final String path = idxFile.getAbsolutePath(); 102 final IOException err; 103 err = new IOException(MessageFormat.format(JGitText.get().unreadablePackIndex, path)); 104 err.initCause(ioe); 105 throw err; 106 } finally { 107 try { 108 fd.close(); 109 } catch (IOException err2) { 110 // ignore 111 } 112 } 113 } 114 115 /** 116 * Read an existing pack index file from a buffered stream. 117 * <p> 118 * The format of the file will be automatically detected and a proper access 119 * implementation for that format will be constructed and returned to the 120 * caller. The file may or may not be held open by the returned instance. 121 * 122 * @param fd 123 * stream to read the index file from. The stream must be 124 * buffered as some small IOs are performed against the stream. 125 * The caller is responsible for closing the stream. 126 * @return a copy of the index in-memory. 127 * @throws IOException 128 * the stream cannot be read. 129 * @throws CorruptObjectException 130 * the stream does not contain a valid pack index. 131 */ 132 public static PackIndex read(InputStream fd) throws IOException, 133 CorruptObjectException { 134 final byte[] hdr = new byte[8]; 135 IO.readFully(fd, hdr, 0, hdr.length); 136 if (isTOC(hdr)) { 137 final int v = NB.decodeInt32(hdr, 4); 138 switch (v) { 139 case 2: 140 return new PackIndexV2(fd); 141 default: 142 throw new UnsupportedPackIndexVersionException(v); 143 } 144 } 145 return new PackIndexV1(fd, hdr); 146 } 147 148 private static boolean isTOC(final byte[] h) { 149 final byte[] toc = PackIndexWriter.TOC; 150 for (int i = 0; i < toc.length; i++) 151 if (h[i] != toc[i]) 152 return false; 153 return true; 154 } 155 156 /** Footer checksum applied on the bottom of the pack file. */ 157 protected byte[] packChecksum; 158 159 /** 160 * Determine if an object is contained within the pack file. 161 * 162 * @param id 163 * the object to look for. Must not be null. 164 * @return true if the object is listed in this index; false otherwise. 165 */ 166 public boolean hasObject(final AnyObjectId id) { 167 return findOffset(id) != -1; 168 } 169 170 @Override 171 public boolean contains(AnyObjectId id) { 172 return findOffset(id) != -1; 173 } 174 175 /** 176 * Provide iterator that gives access to index entries. Note, that iterator 177 * returns reference to mutable object, the same reference in each call - 178 * for performance reason. If client needs immutable objects, it must copy 179 * returned object on its own. 180 * <p> 181 * Iterator returns objects in SHA-1 lexicographical order. 182 * </p> 183 * 184 * @return iterator over pack index entries 185 */ 186 @Override 187 public abstract Iterator<MutableEntry> iterator(); 188 189 /** 190 * Obtain the total number of objects described by this index. 191 * 192 * @return number of objects in this index, and likewise in the associated 193 * pack that this index was generated from. 194 */ 195 public abstract long getObjectCount(); 196 197 /** 198 * Obtain the total number of objects needing 64 bit offsets. 199 * 200 * @return number of objects in this index using a 64 bit offset; that is an 201 * object positioned after the 2 GB position within the file. 202 */ 203 public abstract long getOffset64Count(); 204 205 /** 206 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 207 * <p> 208 * This method is a constant-time replacement for the following loop: 209 * 210 * <pre> 211 * Iterator<MutableEntry> eItr = index.iterator(); 212 * int curPosition = 0; 213 * while (eItr.hasNext() && curPosition++ < nthPosition) 214 * eItr.next(); 215 * ObjectId result = eItr.next().toObjectId(); 216 * </pre> 217 * 218 * @param nthPosition 219 * position within the traversal of {@link #iterator()} that the 220 * caller needs the object for. The first returned 221 * {@link MutableEntry} is 0, the second is 1, etc. 222 * @return the ObjectId for the corresponding entry. 223 */ 224 public abstract ObjectId getObjectId(long nthPosition); 225 226 /** 227 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 228 * <p> 229 * This method is a constant-time replacement for the following loop: 230 * 231 * <pre> 232 * Iterator<MutableEntry> eItr = index.iterator(); 233 * int curPosition = 0; 234 * while (eItr.hasNext() && curPosition++ < nthPosition) 235 * eItr.next(); 236 * ObjectId result = eItr.next().toObjectId(); 237 * </pre> 238 * 239 * @param nthPosition 240 * unsigned 32 bit position within the traversal of 241 * {@link #iterator()} that the caller needs the object for. The 242 * first returned {@link MutableEntry} is 0, the second is 1, 243 * etc. Positions past 2**31-1 are negative, but still valid. 244 * @return the ObjectId for the corresponding entry. 245 */ 246 public final ObjectId getObjectId(final int nthPosition) { 247 if (nthPosition >= 0) 248 return getObjectId((long) nthPosition); 249 final int u31 = nthPosition >>> 1; 250 final int one = nthPosition & 1; 251 return getObjectId(((long) u31) << 1 | one); 252 } 253 254 /** 255 * Get offset in a pack for the n-th object entry returned by 256 * {@link #iterator()}. 257 * 258 * @param nthPosition 259 * unsigned 32 bit position within the traversal of 260 * {@link #iterator()} for which the caller needs the offset. The 261 * first returned {@link MutableEntry} is 0, the second is 1, 262 * etc. Positions past 2**31-1 are negative, but still valid. 263 * @return the offset in a pack for the corresponding entry. 264 */ 265 abstract long getOffset(long nthPosition); 266 267 /** 268 * Locate the file offset position for the requested object. 269 * 270 * @param objId 271 * name of the object to locate within the pack. 272 * @return offset of the object's header and compressed content; -1 if the 273 * object does not exist in this index and is thus not stored in the 274 * associated pack. 275 */ 276 public abstract long findOffset(AnyObjectId objId); 277 278 /** 279 * Retrieve stored CRC32 checksum of the requested object raw-data 280 * (including header). 281 * 282 * @param objId 283 * id of object to look for 284 * @return CRC32 checksum of specified object (at 32 less significant bits) 285 * @throws MissingObjectException 286 * when requested ObjectId was not found in this index 287 * @throws UnsupportedOperationException 288 * when this index doesn't support CRC32 checksum 289 */ 290 public abstract long findCRC32(AnyObjectId objId) 291 throws MissingObjectException, UnsupportedOperationException; 292 293 /** 294 * Check whether this index supports (has) CRC32 checksums for objects. 295 * 296 * @return true if CRC32 is stored, false otherwise 297 */ 298 public abstract boolean hasCRC32Support(); 299 300 /** 301 * Find objects matching the prefix abbreviation. 302 * 303 * @param matches 304 * set to add any located ObjectIds to. This is an output 305 * parameter. 306 * @param id 307 * prefix to search for. 308 * @param matchLimit 309 * maximum number of results to return. At most this many 310 * ObjectIds should be added to matches before returning. 311 * @throws IOException 312 * the index cannot be read. 313 */ 314 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, 315 int matchLimit) throws IOException; 316 317 /** 318 * Represent mutable entry of pack index consisting of object id and offset 319 * in pack (both mutable). 320 * 321 */ 322 public static class MutableEntry { 323 final MutableObjectId idBuffer = new MutableObjectId(); 324 325 long offset; 326 327 /** 328 * Returns offset for this index object entry 329 * 330 * @return offset of this object in a pack file 331 */ 332 public long getOffset() { 333 return offset; 334 } 335 336 /** @return hex string describing the object id of this entry. */ 337 public String name() { 338 ensureId(); 339 return idBuffer.name(); 340 } 341 342 /** @return a copy of the object id. */ 343 public ObjectId toObjectId() { 344 ensureId(); 345 return idBuffer.toObjectId(); 346 } 347 348 /** @return a complete copy of this entry, that won't modify */ 349 public MutableEntry cloneEntry() { 350 final MutableEntry r = new MutableEntry(); 351 ensureId(); 352 r.idBuffer.fromObjectId(idBuffer); 353 r.offset = offset; 354 return r; 355 } 356 357 void ensureId() { 358 // Override in implementations. 359 } 360 } 361 362 abstract class EntriesIterator implements Iterator<MutableEntry> { 363 protected final MutableEntry entry = initEntry(); 364 365 protected long returnedNumber = 0; 366 367 protected abstract MutableEntry initEntry(); 368 369 @Override 370 public boolean hasNext() { 371 return returnedNumber < getObjectCount(); 372 } 373 374 /** 375 * Implementation must update {@link #returnedNumber} before returning 376 * element. 377 */ 378 @Override 379 public abstract MutableEntry next(); 380 381 @Override 382 public void remove() { 383 throw new UnsupportedOperationException(); 384 } 385 } 386 }