1 /*
2 * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.internal.storage.file;
46
47 import java.io.BufferedOutputStream;
48 import java.io.IOException;
49 import java.io.OutputStream;
50 import java.security.DigestOutputStream;
51 import java.text.MessageFormat;
52 import java.util.List;
53
54 import org.eclipse.jgit.internal.JGitText;
55 import org.eclipse.jgit.lib.Constants;
56 import org.eclipse.jgit.transport.PackedObjectInfo;
57 import org.eclipse.jgit.util.NB;
58
59 /**
60 * Creates a table of contents to support random access by
61 * {@link org.eclipse.jgit.internal.storage.file.PackFile}.
62 * <p>
63 * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
64 * random access to any object in the pack by associating an ObjectId to the
65 * byte offset within the pack where the object's data can be read.
66 */
67 public abstract class PackIndexWriter {
68 /** Magic constant indicating post-version 1 format. */
69 protected static final byte[] TOC = { -1, 't', 'O', 'c' };
70
71 /**
72 * Create a new writer for the oldest (most widely understood) format.
73 * <p>
74 * This method selects an index format that can accurate describe the
75 * supplied objects and that will be the most compatible format with older
76 * Git implementations.
77 * <p>
78 * Index version 1 is widely recognized by all Git implementations, but
79 * index version 2 (and later) is not as well recognized as it was
80 * introduced more than a year later. Index version 1 can only be used if
81 * the resulting pack file is under 4 gigabytes in size; packs larger than
82 * that limit must use index version 2.
83 *
84 * @param dst
85 * the stream the index data will be written to. If not already
86 * buffered it will be automatically wrapped in a buffered
87 * stream. Callers are always responsible for closing the stream.
88 * @param objs
89 * the objects the caller needs to store in the index. Entries
90 * will be examined until a format can be conclusively selected.
91 * @return a new writer to output an index file of the requested format to
92 * the supplied stream.
93 * @throws java.lang.IllegalArgumentException
94 * no recognized pack index version can support the supplied
95 * objects. This is likely a bug in the implementation.
96 * @see #oldestPossibleFormat(List)
97 */
98 public static PackIndexWriter createOldestPossible(final OutputStream dst,
99 final List<? extends PackedObjectInfo> objs) {
100 return createVersion(dst, oldestPossibleFormat(objs));
101 }
102
103 /**
104 * Return the oldest (most widely understood) index format.
105 * <p>
106 * This method selects an index format that can accurate describe the
107 * supplied objects and that will be the most compatible format with older
108 * Git implementations.
109 * <p>
110 * Index version 1 is widely recognized by all Git implementations, but
111 * index version 2 (and later) is not as well recognized as it was
112 * introduced more than a year later. Index version 1 can only be used if
113 * the resulting pack file is under 4 gigabytes in size; packs larger than
114 * that limit must use index version 2.
115 *
116 * @param objs
117 * the objects the caller needs to store in the index. Entries
118 * will be examined until a format can be conclusively selected.
119 * @return the index format.
120 * @throws java.lang.IllegalArgumentException
121 * no recognized pack index version can support the supplied
122 * objects. This is likely a bug in the implementation.
123 */
124 public static int oldestPossibleFormat(
125 final List<? extends PackedObjectInfo> objs) {
126 for (PackedObjectInfo oe : objs) {
127 if (!PackIndexWriterV1.canStore(oe))
128 return 2;
129 }
130 return 1;
131 }
132
133
134 /**
135 * Create a new writer instance for a specific index format version.
136 *
137 * @param dst
138 * the stream the index data will be written to. If not already
139 * buffered it will be automatically wrapped in a buffered
140 * stream. Callers are always responsible for closing the stream.
141 * @param version
142 * index format version number required by the caller. Exactly
143 * this formatted version will be written.
144 * @return a new writer to output an index file of the requested format to
145 * the supplied stream.
146 * @throws java.lang.IllegalArgumentException
147 * the version requested is not supported by this
148 * implementation.
149 */
150 public static PackIndexWriter createVersion(final OutputStream dst,
151 final int version) {
152 switch (version) {
153 case 1:
154 return new PackIndexWriterV1(dst);
155 case 2:
156 return new PackIndexWriterV2(dst);
157 default:
158 throw new IllegalArgumentException(MessageFormat.format(
159 JGitText.get().unsupportedPackIndexVersion,
160 Integer.valueOf(version)));
161 }
162 }
163
164 /** The index data stream we are responsible for creating. */
165 protected final DigestOutputStream out;
166
167 /** A temporary buffer for use during IO to {link #out}. */
168 protected final byte[] tmp;
169
170 /** The entries this writer must pack. */
171 protected List<? extends PackedObjectInfo> entries;
172
173 /** SHA-1 checksum for the entire pack data. */
174 protected byte[] packChecksum;
175
176 /**
177 * Create a new writer instance.
178 *
179 * @param dst
180 * the stream this instance outputs to. If not already buffered
181 * it will be automatically wrapped in a buffered stream.
182 */
183 protected PackIndexWriter(OutputStream dst) {
184 out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
185 : new BufferedOutputStream(dst),
186 Constants.newMessageDigest());
187 tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
188 }
189
190 /**
191 * Write all object entries to the index stream.
192 * <p>
193 * After writing the stream passed to the factory is flushed but remains
194 * open. Callers are always responsible for closing the output stream.
195 *
196 * @param toStore
197 * sorted list of objects to store in the index. The caller must
198 * have previously sorted the list using
199 * {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
200 * {@link java.lang.Comparable} implementation.
201 * @param packDataChecksum
202 * checksum signature of the entire pack data content. This is
203 * traditionally the last 20 bytes of the pack file's own stream.
204 * @throws java.io.IOException
205 * an error occurred while writing to the output stream, or this
206 * index format cannot store the object data supplied.
207 */
208 public void write(final List<? extends PackedObjectInfo> toStore,
209 final byte[] packDataChecksum) throws IOException {
210 entries = toStore;
211 packChecksum = packDataChecksum;
212 writeImpl();
213 out.flush();
214 }
215
216 /**
217 * Writes the index file to {@link #out}.
218 * <p>
219 * Implementations should go something like:
220 *
221 * <pre>
222 * writeFanOutTable();
223 * for (final PackedObjectInfo po : entries)
224 * writeOneEntry(po);
225 * writeChecksumFooter();
226 * </pre>
227 *
228 * <p>
229 * Where the logic for <code>writeOneEntry</code> is specific to the index
230 * format in use. Additional headers/footers may be used if necessary and
231 * the {@link #entries} collection may be iterated over more than once if
232 * necessary. Implementors therefore have complete control over the data.
233 *
234 * @throws java.io.IOException
235 * an error occurred while writing to the output stream, or this
236 * index format cannot store the object data supplied.
237 */
238 protected abstract void writeImpl() throws IOException;
239
240 /**
241 * Output the version 2 (and later) TOC header, with version number.
242 * <p>
243 * Post version 1 all index files start with a TOC header that makes the
244 * file an invalid version 1 file, and then includes the version number.
245 * This header is necessary to recognize a version 1 from a version 2
246 * formatted index.
247 *
248 * @param version
249 * version number of this index format being written.
250 * @throws java.io.IOException
251 * an error occurred while writing to the output stream.
252 */
253 protected void writeTOC(int version) throws IOException {
254 out.write(TOC);
255 NB.encodeInt32(tmp, 0, version);
256 out.write(tmp, 0, 4);
257 }
258
259 /**
260 * Output the standard 256 entry first-level fan-out table.
261 * <p>
262 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
263 * counts. Each count represents the number of objects within this index
264 * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
265 * count's position in the fan-out table.
266 *
267 * @throws java.io.IOException
268 * an error occurred while writing to the output stream.
269 */
270 protected void writeFanOutTable() throws IOException {
271 final int[] fanout = new int[256];
272 for (PackedObjectInfo po : entries)
273 fanout[po.getFirstByte() & 0xff]++;
274 for (int i = 1; i < 256; i++)
275 fanout[i] += fanout[i - 1];
276 for (int n : fanout) {
277 NB.encodeInt32(tmp, 0, n);
278 out.write(tmp, 0, 4);
279 }
280 }
281
282 /**
283 * Output the standard two-checksum index footer.
284 * <p>
285 * The standard footer contains two checksums (20 byte SHA-1 values):
286 * <ol>
287 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
288 * <li>Index data checksum - checksum of all index bytes written, including
289 * the pack data checksum above.</li>
290 * </ol>
291 *
292 * @throws java.io.IOException
293 * an error occurred while writing to the output stream.
294 */
295 protected void writeChecksumFooter() throws IOException {
296 out.write(packChecksum);
297 out.on(false);
298 out.write(out.getMessageDigest().digest());
299 }
300 }