1 /* 2 * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others 4 * 5 * This program and the accompanying materials are made available under the 6 * terms of the Eclipse Distribution License v. 1.0 which is available at 7 * https://www.eclipse.org/org/documents/edl-v10.php. 8 * 9 * SPDX-License-Identifier: BSD-3-Clause 10 */ 11 12 package org.eclipse.jgit.internal.storage.file; 13 14 import java.io.BufferedOutputStream; 15 import java.io.IOException; 16 import java.io.OutputStream; 17 import java.security.DigestOutputStream; 18 import java.text.MessageFormat; 19 import java.util.List; 20 21 import org.eclipse.jgit.internal.JGitText; 22 import org.eclipse.jgit.lib.Constants; 23 import org.eclipse.jgit.transport.PackedObjectInfo; 24 import org.eclipse.jgit.util.NB; 25 26 /** 27 * Creates a table of contents to support random access by 28 * {@link org.eclipse.jgit.internal.storage.file.PackFile}. 29 * <p> 30 * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides 31 * random access to any object in the pack by associating an ObjectId to the 32 * byte offset within the pack where the object's data can be read. 33 */ 34 public abstract class PackIndexWriter { 35 /** Magic constant indicating post-version 1 format. */ 36 protected static final byte[] TOC = { -1, 't', 'O', 'c' }; 37 38 /** 39 * Create a new writer for the oldest (most widely understood) format. 40 * <p> 41 * This method selects an index format that can accurate describe the 42 * supplied objects and that will be the most compatible format with older 43 * Git implementations. 44 * <p> 45 * Index version 1 is widely recognized by all Git implementations, but 46 * index version 2 (and later) is not as well recognized as it was 47 * introduced more than a year later. Index version 1 can only be used if 48 * the resulting pack file is under 4 gigabytes in size; packs larger than 49 * that limit must use index version 2. 50 * 51 * @param dst 52 * the stream the index data will be written to. If not already 53 * buffered it will be automatically wrapped in a buffered 54 * stream. Callers are always responsible for closing the stream. 55 * @param objs 56 * the objects the caller needs to store in the index. Entries 57 * will be examined until a format can be conclusively selected. 58 * @return a new writer to output an index file of the requested format to 59 * the supplied stream. 60 * @throws java.lang.IllegalArgumentException 61 * no recognized pack index version can support the supplied 62 * objects. This is likely a bug in the implementation. 63 * @see #oldestPossibleFormat(List) 64 */ 65 public static PackIndexWriter createOldestPossible(final OutputStream dst, 66 final List<? extends PackedObjectInfo> objs) { 67 return createVersion(dst, oldestPossibleFormat(objs)); 68 } 69 70 /** 71 * Return the oldest (most widely understood) index format. 72 * <p> 73 * This method selects an index format that can accurate describe the 74 * supplied objects and that will be the most compatible format with older 75 * Git implementations. 76 * <p> 77 * Index version 1 is widely recognized by all Git implementations, but 78 * index version 2 (and later) is not as well recognized as it was 79 * introduced more than a year later. Index version 1 can only be used if 80 * the resulting pack file is under 4 gigabytes in size; packs larger than 81 * that limit must use index version 2. 82 * 83 * @param objs 84 * the objects the caller needs to store in the index. Entries 85 * will be examined until a format can be conclusively selected. 86 * @return the index format. 87 * @throws java.lang.IllegalArgumentException 88 * no recognized pack index version can support the supplied 89 * objects. This is likely a bug in the implementation. 90 */ 91 public static int oldestPossibleFormat( 92 final List<? extends PackedObjectInfo> objs) { 93 for (PackedObjectInfo oe : objs) { 94 if (!PackIndexWriterV1.canStore(oe)) 95 return 2; 96 } 97 return 1; 98 } 99 100 101 /** 102 * Create a new writer instance for a specific index format version. 103 * 104 * @param dst 105 * the stream the index data will be written to. If not already 106 * buffered it will be automatically wrapped in a buffered 107 * stream. Callers are always responsible for closing the stream. 108 * @param version 109 * index format version number required by the caller. Exactly 110 * this formatted version will be written. 111 * @return a new writer to output an index file of the requested format to 112 * the supplied stream. 113 * @throws java.lang.IllegalArgumentException 114 * the version requested is not supported by this 115 * implementation. 116 */ 117 public static PackIndexWriter createVersion(final OutputStream dst, 118 final int version) { 119 switch (version) { 120 case 1: 121 return new PackIndexWriterV1(dst); 122 case 2: 123 return new PackIndexWriterV2(dst); 124 default: 125 throw new IllegalArgumentException(MessageFormat.format( 126 JGitText.get().unsupportedPackIndexVersion, 127 Integer.valueOf(version))); 128 } 129 } 130 131 /** The index data stream we are responsible for creating. */ 132 protected final DigestOutputStream out; 133 134 /** A temporary buffer for use during IO to {link #out}. */ 135 protected final byte[] tmp; 136 137 /** The entries this writer must pack. */ 138 protected List<? extends PackedObjectInfo> entries; 139 140 /** SHA-1 checksum for the entire pack data. */ 141 protected byte[] packChecksum; 142 143 /** 144 * Create a new writer instance. 145 * 146 * @param dst 147 * the stream this instance outputs to. If not already buffered 148 * it will be automatically wrapped in a buffered stream. 149 */ 150 protected PackIndexWriter(OutputStream dst) { 151 out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst 152 : new BufferedOutputStream(dst), 153 Constants.newMessageDigest()); 154 tmp = new byte[4 + Constants.OBJECT_ID_LENGTH]; 155 } 156 157 /** 158 * Write all object entries to the index stream. 159 * <p> 160 * After writing the stream passed to the factory is flushed but remains 161 * open. Callers are always responsible for closing the output stream. 162 * 163 * @param toStore 164 * sorted list of objects to store in the index. The caller must 165 * have previously sorted the list using 166 * {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native 167 * {@link java.lang.Comparable} implementation. 168 * @param packDataChecksum 169 * checksum signature of the entire pack data content. This is 170 * traditionally the last 20 bytes of the pack file's own stream. 171 * @throws java.io.IOException 172 * an error occurred while writing to the output stream, or this 173 * index format cannot store the object data supplied. 174 */ 175 public void write(final List<? extends PackedObjectInfo> toStore, 176 final byte[] packDataChecksum) throws IOException { 177 entries = toStore; 178 packChecksum = packDataChecksum; 179 writeImpl(); 180 out.flush(); 181 } 182 183 /** 184 * Writes the index file to {@link #out}. 185 * <p> 186 * Implementations should go something like: 187 * 188 * <pre> 189 * writeFanOutTable(); 190 * for (final PackedObjectInfo po : entries) 191 * writeOneEntry(po); 192 * writeChecksumFooter(); 193 * </pre> 194 * 195 * <p> 196 * Where the logic for <code>writeOneEntry</code> is specific to the index 197 * format in use. Additional headers/footers may be used if necessary and 198 * the {@link #entries} collection may be iterated over more than once if 199 * necessary. Implementors therefore have complete control over the data. 200 * 201 * @throws java.io.IOException 202 * an error occurred while writing to the output stream, or this 203 * index format cannot store the object data supplied. 204 */ 205 protected abstract void writeImpl() throws IOException; 206 207 /** 208 * Output the version 2 (and later) TOC header, with version number. 209 * <p> 210 * Post version 1 all index files start with a TOC header that makes the 211 * file an invalid version 1 file, and then includes the version number. 212 * This header is necessary to recognize a version 1 from a version 2 213 * formatted index. 214 * 215 * @param version 216 * version number of this index format being written. 217 * @throws java.io.IOException 218 * an error occurred while writing to the output stream. 219 */ 220 protected void writeTOC(int version) throws IOException { 221 out.write(TOC); 222 NB.encodeInt32(tmp, 0, version); 223 out.write(tmp, 0, 4); 224 } 225 226 /** 227 * Output the standard 256 entry first-level fan-out table. 228 * <p> 229 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer 230 * counts. Each count represents the number of objects within this index 231 * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the 232 * count's position in the fan-out table. 233 * 234 * @throws java.io.IOException 235 * an error occurred while writing to the output stream. 236 */ 237 protected void writeFanOutTable() throws IOException { 238 final int[] fanout = new int[256]; 239 for (PackedObjectInfo po : entries) 240 fanout[po.getFirstByte() & 0xff]++; 241 for (int i = 1; i < 256; i++) 242 fanout[i] += fanout[i - 1]; 243 for (int n : fanout) { 244 NB.encodeInt32(tmp, 0, n); 245 out.write(tmp, 0, 4); 246 } 247 } 248 249 /** 250 * Output the standard two-checksum index footer. 251 * <p> 252 * The standard footer contains two checksums (20 byte SHA-1 values): 253 * <ol> 254 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li> 255 * <li>Index data checksum - checksum of all index bytes written, including 256 * the pack data checksum above.</li> 257 * </ol> 258 * 259 * @throws java.io.IOException 260 * an error occurred while writing to the output stream. 261 */ 262 protected void writeChecksumFooter() throws IOException { 263 out.write(packChecksum); 264 out.on(false); 265 out.write(out.getMessageDigest().digest()); 266 } 267 }