1 /* 2 * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.internal.storage.file; 46 47 import java.io.BufferedOutputStream; 48 import java.io.IOException; 49 import java.io.OutputStream; 50 import java.security.DigestOutputStream; 51 import java.text.MessageFormat; 52 import java.util.List; 53 54 import org.eclipse.jgit.internal.JGitText; 55 import org.eclipse.jgit.lib.Constants; 56 import org.eclipse.jgit.lib.ObjectId; 57 import org.eclipse.jgit.transport.PackedObjectInfo; 58 import org.eclipse.jgit.util.NB; 59 60 /** 61 * Creates a table of contents to support random access by {@link PackFile}. 62 * <p> 63 * Pack index files (the <code>.idx</code> suffix in a pack file pair) 64 * provides random access to any object in the pack by associating an ObjectId 65 * to the byte offset within the pack where the object's data can be read. 66 */ 67 public abstract class PackIndexWriter { 68 /** Magic constant indicating post-version 1 format. */ 69 protected static final byte[] TOC = { -1, 't', 'O', 'c' }; 70 71 /** 72 * Create a new writer for the oldest (most widely understood) format. 73 * <p> 74 * This method selects an index format that can accurate describe the 75 * supplied objects and that will be the most compatible format with older 76 * Git implementations. 77 * <p> 78 * Index version 1 is widely recognized by all Git implementations, but 79 * index version 2 (and later) is not as well recognized as it was 80 * introduced more than a year later. Index version 1 can only be used if 81 * the resulting pack file is under 4 gigabytes in size; packs larger than 82 * that limit must use index version 2. 83 * 84 * @param dst 85 * the stream the index data will be written to. If not already 86 * buffered it will be automatically wrapped in a buffered 87 * stream. Callers are always responsible for closing the stream. 88 * @param objs 89 * the objects the caller needs to store in the index. Entries 90 * will be examined until a format can be conclusively selected. 91 * @return a new writer to output an index file of the requested format to 92 * the supplied stream. 93 * @throws IllegalArgumentException 94 * no recognized pack index version can support the supplied 95 * objects. This is likely a bug in the implementation. 96 * @see #oldestPossibleFormat(List) 97 */ 98 public static PackIndexWriter createOldestPossible(final OutputStream dst, 99 final List<? extends PackedObjectInfo> objs) { 100 return createVersion(dst, oldestPossibleFormat(objs)); 101 } 102 103 /** 104 * Return the oldest (most widely understood) index format. 105 * <p> 106 * This method selects an index format that can accurate describe the 107 * supplied objects and that will be the most compatible format with older 108 * Git implementations. 109 * <p> 110 * Index version 1 is widely recognized by all Git implementations, but 111 * index version 2 (and later) is not as well recognized as it was 112 * introduced more than a year later. Index version 1 can only be used if 113 * the resulting pack file is under 4 gigabytes in size; packs larger than 114 * that limit must use index version 2. 115 * 116 * @param objs 117 * the objects the caller needs to store in the index. Entries 118 * will be examined until a format can be conclusively selected. 119 * @return the index format. 120 * @throws IllegalArgumentException 121 * no recognized pack index version can support the supplied 122 * objects. This is likely a bug in the implementation. 123 */ 124 public static int oldestPossibleFormat( 125 final List<? extends PackedObjectInfo> objs) { 126 for (final PackedObjectInfo oe : objs) { 127 if (!PackIndexWriterV1.canStore(oe)) 128 return 2; 129 } 130 return 1; 131 } 132 133 134 /** 135 * Create a new writer instance for a specific index format version. 136 * 137 * @param dst 138 * the stream the index data will be written to. If not already 139 * buffered it will be automatically wrapped in a buffered 140 * stream. Callers are always responsible for closing the stream. 141 * @param version 142 * index format version number required by the caller. Exactly 143 * this formatted version will be written. 144 * @return a new writer to output an index file of the requested format to 145 * the supplied stream. 146 * @throws IllegalArgumentException 147 * the version requested is not supported by this 148 * implementation. 149 */ 150 public static PackIndexWriter createVersion(final OutputStream dst, 151 final int version) { 152 switch (version) { 153 case 1: 154 return new PackIndexWriterV1(dst); 155 case 2: 156 return new PackIndexWriterV2(dst); 157 default: 158 throw new IllegalArgumentException(MessageFormat.format( 159 JGitText.get().unsupportedPackIndexVersion, 160 Integer.valueOf(version))); 161 } 162 } 163 164 /** The index data stream we are responsible for creating. */ 165 protected final DigestOutputStream out; 166 167 /** A temporary buffer for use during IO to {link #out}. */ 168 protected final byte[] tmp; 169 170 /** The entries this writer must pack. */ 171 protected List<? extends PackedObjectInfo> entries; 172 173 /** SHA-1 checksum for the entire pack data. */ 174 protected byte[] packChecksum; 175 176 /** 177 * Create a new writer instance. 178 * 179 * @param dst 180 * the stream this instance outputs to. If not already buffered 181 * it will be automatically wrapped in a buffered stream. 182 */ 183 protected PackIndexWriter(final OutputStream dst) { 184 out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst 185 : new BufferedOutputStream(dst), 186 Constants.newMessageDigest()); 187 tmp = new byte[4 + Constants.OBJECT_ID_LENGTH]; 188 } 189 190 /** 191 * Write all object entries to the index stream. 192 * <p> 193 * After writing the stream passed to the factory is flushed but remains 194 * open. Callers are always responsible for closing the output stream. 195 * 196 * @param toStore 197 * sorted list of objects to store in the index. The caller must 198 * have previously sorted the list using {@link PackedObjectInfo}'s 199 * native {@link Comparable} implementation. 200 * @param packDataChecksum 201 * checksum signature of the entire pack data content. This is 202 * traditionally the last 20 bytes of the pack file's own stream. 203 * @throws IOException 204 * an error occurred while writing to the output stream, or this 205 * index format cannot store the object data supplied. 206 */ 207 public void write(final List<? extends PackedObjectInfo> toStore, 208 final byte[] packDataChecksum) throws IOException { 209 entries = toStore; 210 packChecksum = packDataChecksum; 211 writeImpl(); 212 out.flush(); 213 } 214 215 /** 216 * Writes the index file to {@link #out}. 217 * <p> 218 * Implementations should go something like: 219 * 220 * <pre> 221 * writeFanOutTable(); 222 * for (final PackedObjectInfo po : entries) 223 * writeOneEntry(po); 224 * writeChecksumFooter(); 225 * </pre> 226 * 227 * <p> 228 * Where the logic for <code>writeOneEntry</code> is specific to the index 229 * format in use. Additional headers/footers may be used if necessary and 230 * the {@link #entries} collection may be iterated over more than once if 231 * necessary. Implementors therefore have complete control over the data. 232 * 233 * @throws IOException 234 * an error occurred while writing to the output stream, or this 235 * index format cannot store the object data supplied. 236 */ 237 protected abstract void writeImpl() throws IOException; 238 239 /** 240 * Output the version 2 (and later) TOC header, with version number. 241 * <p> 242 * Post version 1 all index files start with a TOC header that makes the 243 * file an invalid version 1 file, and then includes the version number. 244 * This header is necessary to recognize a version 1 from a version 2 245 * formatted index. 246 * 247 * @param version 248 * version number of this index format being written. 249 * @throws IOException 250 * an error occurred while writing to the output stream. 251 */ 252 protected void writeTOC(final int version) throws IOException { 253 out.write(TOC); 254 NB.encodeInt32(tmp, 0, version); 255 out.write(tmp, 0, 4); 256 } 257 258 /** 259 * Output the standard 256 entry first-level fan-out table. 260 * <p> 261 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer 262 * counts. Each count represents the number of objects within this index 263 * whose {@link ObjectId#getFirstByte()} matches the count's position in the 264 * fan-out table. 265 * 266 * @throws IOException 267 * an error occurred while writing to the output stream. 268 */ 269 protected void writeFanOutTable() throws IOException { 270 final int[] fanout = new int[256]; 271 for (final PackedObjectInfo po : entries) 272 fanout[po.getFirstByte() & 0xff]++; 273 for (int i = 1; i < 256; i++) 274 fanout[i] += fanout[i - 1]; 275 for (final int n : fanout) { 276 NB.encodeInt32(tmp, 0, n); 277 out.write(tmp, 0, 4); 278 } 279 } 280 281 /** 282 * Output the standard two-checksum index footer. 283 * <p> 284 * The standard footer contains two checksums (20 byte SHA-1 values): 285 * <ol> 286 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li> 287 * <li>Index data checksum - checksum of all index bytes written, including 288 * the pack data checksum above.</li> 289 * </ol> 290 * 291 * @throws IOException 292 * an error occurred while writing to the output stream. 293 */ 294 protected void writeChecksumFooter() throws IOException { 295 out.write(packChecksum); 296 out.on(false); 297 out.write(out.getMessageDigest().digest()); 298 } 299 }