1 /*
2 * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4 *
5 * This program and the accompanying materials are made available under the
6 * terms of the Eclipse Distribution License v. 1.0 which is available at
7 * https://www.eclipse.org/org/documents/edl-v10.php.
8 *
9 * SPDX-License-Identifier: BSD-3-Clause
10 */
11
12 package org.eclipse.jgit.internal.storage.file;
13
14 import java.io.BufferedOutputStream;
15 import java.io.IOException;
16 import java.io.OutputStream;
17 import java.security.DigestOutputStream;
18 import java.text.MessageFormat;
19 import java.util.List;
20
21 import org.eclipse.jgit.internal.JGitText;
22 import org.eclipse.jgit.lib.Constants;
23 import org.eclipse.jgit.transport.PackedObjectInfo;
24 import org.eclipse.jgit.util.NB;
25
26 /**
27 * Creates a table of contents to support random access by
28 * {@link org.eclipse.jgit.internal.storage.file.PackFile}.
29 * <p>
30 * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
31 * random access to any object in the pack by associating an ObjectId to the
32 * byte offset within the pack where the object's data can be read.
33 */
34 public abstract class PackIndexWriter {
35 /** Magic constant indicating post-version 1 format. */
36 protected static final byte[] TOC = { -1, 't', 'O', 'c' };
37
38 /**
39 * Create a new writer for the oldest (most widely understood) format.
40 * <p>
41 * This method selects an index format that can accurate describe the
42 * supplied objects and that will be the most compatible format with older
43 * Git implementations.
44 * <p>
45 * Index version 1 is widely recognized by all Git implementations, but
46 * index version 2 (and later) is not as well recognized as it was
47 * introduced more than a year later. Index version 1 can only be used if
48 * the resulting pack file is under 4 gigabytes in size; packs larger than
49 * that limit must use index version 2.
50 *
51 * @param dst
52 * the stream the index data will be written to. If not already
53 * buffered it will be automatically wrapped in a buffered
54 * stream. Callers are always responsible for closing the stream.
55 * @param objs
56 * the objects the caller needs to store in the index. Entries
57 * will be examined until a format can be conclusively selected.
58 * @return a new writer to output an index file of the requested format to
59 * the supplied stream.
60 * @throws java.lang.IllegalArgumentException
61 * no recognized pack index version can support the supplied
62 * objects. This is likely a bug in the implementation.
63 * @see #oldestPossibleFormat(List)
64 */
65 public static PackIndexWriter createOldestPossible(final OutputStream dst,
66 final List<? extends PackedObjectInfo> objs) {
67 return createVersion(dst, oldestPossibleFormat(objs));
68 }
69
70 /**
71 * Return the oldest (most widely understood) index format.
72 * <p>
73 * This method selects an index format that can accurate describe the
74 * supplied objects and that will be the most compatible format with older
75 * Git implementations.
76 * <p>
77 * Index version 1 is widely recognized by all Git implementations, but
78 * index version 2 (and later) is not as well recognized as it was
79 * introduced more than a year later. Index version 1 can only be used if
80 * the resulting pack file is under 4 gigabytes in size; packs larger than
81 * that limit must use index version 2.
82 *
83 * @param objs
84 * the objects the caller needs to store in the index. Entries
85 * will be examined until a format can be conclusively selected.
86 * @return the index format.
87 * @throws java.lang.IllegalArgumentException
88 * no recognized pack index version can support the supplied
89 * objects. This is likely a bug in the implementation.
90 */
91 public static int oldestPossibleFormat(
92 final List<? extends PackedObjectInfo> objs) {
93 for (PackedObjectInfo oe : objs) {
94 if (!PackIndexWriterV1.canStore(oe))
95 return 2;
96 }
97 return 1;
98 }
99
100
101 /**
102 * Create a new writer instance for a specific index format version.
103 *
104 * @param dst
105 * the stream the index data will be written to. If not already
106 * buffered it will be automatically wrapped in a buffered
107 * stream. Callers are always responsible for closing the stream.
108 * @param version
109 * index format version number required by the caller. Exactly
110 * this formatted version will be written.
111 * @return a new writer to output an index file of the requested format to
112 * the supplied stream.
113 * @throws java.lang.IllegalArgumentException
114 * the version requested is not supported by this
115 * implementation.
116 */
117 public static PackIndexWriter createVersion(final OutputStream dst,
118 final int version) {
119 switch (version) {
120 case 1:
121 return new PackIndexWriterV1(dst);
122 case 2:
123 return new PackIndexWriterV2(dst);
124 default:
125 throw new IllegalArgumentException(MessageFormat.format(
126 JGitText.get().unsupportedPackIndexVersion,
127 Integer.valueOf(version)));
128 }
129 }
130
131 /** The index data stream we are responsible for creating. */
132 protected final DigestOutputStream out;
133
134 /** A temporary buffer for use during IO to {link #out}. */
135 protected final byte[] tmp;
136
137 /** The entries this writer must pack. */
138 protected List<? extends PackedObjectInfo> entries;
139
140 /** SHA-1 checksum for the entire pack data. */
141 protected byte[] packChecksum;
142
143 /**
144 * Create a new writer instance.
145 *
146 * @param dst
147 * the stream this instance outputs to. If not already buffered
148 * it will be automatically wrapped in a buffered stream.
149 */
150 protected PackIndexWriter(OutputStream dst) {
151 out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
152 : new BufferedOutputStream(dst),
153 Constants.newMessageDigest());
154 tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
155 }
156
157 /**
158 * Write all object entries to the index stream.
159 * <p>
160 * After writing the stream passed to the factory is flushed but remains
161 * open. Callers are always responsible for closing the output stream.
162 *
163 * @param toStore
164 * sorted list of objects to store in the index. The caller must
165 * have previously sorted the list using
166 * {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
167 * {@link java.lang.Comparable} implementation.
168 * @param packDataChecksum
169 * checksum signature of the entire pack data content. This is
170 * traditionally the last 20 bytes of the pack file's own stream.
171 * @throws java.io.IOException
172 * an error occurred while writing to the output stream, or this
173 * index format cannot store the object data supplied.
174 */
175 public void write(final List<? extends PackedObjectInfo> toStore,
176 final byte[] packDataChecksum) throws IOException {
177 entries = toStore;
178 packChecksum = packDataChecksum;
179 writeImpl();
180 out.flush();
181 }
182
183 /**
184 * Writes the index file to {@link #out}.
185 * <p>
186 * Implementations should go something like:
187 *
188 * <pre>
189 * writeFanOutTable();
190 * for (final PackedObjectInfo po : entries)
191 * writeOneEntry(po);
192 * writeChecksumFooter();
193 * </pre>
194 *
195 * <p>
196 * Where the logic for <code>writeOneEntry</code> is specific to the index
197 * format in use. Additional headers/footers may be used if necessary and
198 * the {@link #entries} collection may be iterated over more than once if
199 * necessary. Implementors therefore have complete control over the data.
200 *
201 * @throws java.io.IOException
202 * an error occurred while writing to the output stream, or this
203 * index format cannot store the object data supplied.
204 */
205 protected abstract void writeImpl() throws IOException;
206
207 /**
208 * Output the version 2 (and later) TOC header, with version number.
209 * <p>
210 * Post version 1 all index files start with a TOC header that makes the
211 * file an invalid version 1 file, and then includes the version number.
212 * This header is necessary to recognize a version 1 from a version 2
213 * formatted index.
214 *
215 * @param version
216 * version number of this index format being written.
217 * @throws java.io.IOException
218 * an error occurred while writing to the output stream.
219 */
220 protected void writeTOC(int version) throws IOException {
221 out.write(TOC);
222 NB.encodeInt32(tmp, 0, version);
223 out.write(tmp, 0, 4);
224 }
225
226 /**
227 * Output the standard 256 entry first-level fan-out table.
228 * <p>
229 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
230 * counts. Each count represents the number of objects within this index
231 * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
232 * count's position in the fan-out table.
233 *
234 * @throws java.io.IOException
235 * an error occurred while writing to the output stream.
236 */
237 protected void writeFanOutTable() throws IOException {
238 final int[] fanout = new int[256];
239 for (PackedObjectInfo po : entries)
240 fanout[po.getFirstByte() & 0xff]++;
241 for (int i = 1; i < 256; i++)
242 fanout[i] += fanout[i - 1];
243 for (int n : fanout) {
244 NB.encodeInt32(tmp, 0, n);
245 out.write(tmp, 0, 4);
246 }
247 }
248
249 /**
250 * Output the standard two-checksum index footer.
251 * <p>
252 * The standard footer contains two checksums (20 byte SHA-1 values):
253 * <ol>
254 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
255 * <li>Index data checksum - checksum of all index bytes written, including
256 * the pack data checksum above.</li>
257 * </ol>
258 *
259 * @throws java.io.IOException
260 * an error occurred while writing to the output stream.
261 */
262 protected void writeChecksumFooter() throws IOException {
263 out.write(packChecksum);
264 out.on(false);
265 out.write(out.getMessageDigest().digest());
266 }
267 }