1 /*
2 * Copyright (C) 2010, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.storage.pack;
45
46 import java.io.IOException;
47 import java.util.Collection;
48 import java.util.List;
49
50 import org.eclipse.jgit.errors.MissingObjectException;
51 import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
52 import org.eclipse.jgit.lib.AnyObjectId;
53 import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
54 import org.eclipse.jgit.lib.ObjectReader;
55 import org.eclipse.jgit.lib.ProgressMonitor;
56
57 /**
58 * Extension of {@link ObjectReader} that supports reusing objects in packs.
59 * <p>
60 * {@code ObjectReader} implementations may also optionally implement this
61 * interface to support {@link PackWriter} with a means of copying an object
62 * that is already in pack encoding format directly into the output stream,
63 * without incurring decompression and recompression overheads.
64 */
65 public interface ObjectReuseAsIs {
66 /**
67 * Allocate a new {@code PackWriter} state structure for an object.
68 * <p>
69 * {@link PackWriter} allocates these objects to keep track of the
70 * per-object state, and how to load the objects efficiently into the
71 * generated stream. Implementers may subclass this type with additional
72 * object state, such as to remember what file and offset contains the
73 * object's pack encoded data.
74 *
75 * @param objectId
76 * the id of the object that will be packed.
77 * @param type
78 * the Git type of the object that will be packed.
79 * @return a new instance for this object.
80 */
81 public ObjectToPack newObjectToPack(AnyObjectId objectId, int type);
82
83 /**
84 * Select the best object representation for a packer.
85 * <p>
86 * Implementations should iterate through all available representations of
87 * an object, and pass them in turn to the PackWriter though
88 * {@link PackWriter#select(ObjectToPack, StoredObjectRepresentation)} so
89 * the writer can select the most suitable representation to reuse into the
90 * output stream.
91 * <p>
92 * If the implementation returns CachedPack from {@link #getCachedPacksAndUpdate(BitmapBuilder)}
93 * it must consider the representation of any object that is stored in any
94 * of the offered CachedPacks. PackWriter relies on this behavior to prune
95 * duplicate objects out of the pack stream when it selects a CachedPack and
96 * the object was also reached through the thin-pack enumeration.
97 * <p>
98 * The implementation may choose to consider multiple objects at once on
99 * concurrent threads, but must evaluate all representations of an object
100 * within the same thread.
101 *
102 * @param packer
103 * the packer that will write the object in the near future.
104 * @param monitor
105 * progress monitor, implementation should update the monitor
106 * once for each item in the iteration when selection is done.
107 * @param objects
108 * the objects that are being packed.
109 * @throws MissingObjectException
110 * there is no representation available for the object, as it is
111 * no longer in the repository. Packing will abort.
112 * @throws IOException
113 * the repository cannot be accessed. Packing will abort.
114 */
115 public void selectObjectRepresentation(PackWriter packer,
116 ProgressMonitor monitor, Iterable<ObjectToPack> objects)
117 throws IOException, MissingObjectException;
118
119 /**
120 * Write objects to the pack stream in roughly the order given.
121 *
122 * {@code PackWriter} invokes this method to write out one or more objects,
123 * in approximately the order specified by the iteration over the list. A
124 * simple implementation of this method would just iterate the list and
125 * output each object:
126 *
127 * <pre>
128 * for (ObjectToPack obj : list)
129 * out.writeObject(obj)
130 * </pre>
131 *
132 * However more sophisticated implementors may try to perform some (small)
133 * reordering to access objects that are stored close to each other at
134 * roughly the same time. Implementations may choose to write objects out of
135 * order, but this may increase pack file size due to using a larger header
136 * format to reach a delta base that is later in the stream. It may also
137 * reduce data locality for the reader, slowing down data access.
138 *
139 * Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause
140 * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be
141 * invoked recursively on {@code this} if the current object is scheduled
142 * for reuse.
143 *
144 * @param out
145 * the stream to write each object to.
146 * @param list
147 * the list of objects to write. Objects should be written in
148 * approximately this order. Implementors may resort the list
149 * elements in-place during writing if desired.
150 * @throws IOException
151 * the stream cannot be written to, or one or more required
152 * objects cannot be accessed from the object database.
153 */
154 public void writeObjects(PackOutputStream out, List<ObjectToPack> list)
155 throws IOException;
156
157 /**
158 * Output a previously selected representation.
159 * <p>
160 * {@code PackWriter} invokes this method only if a representation
161 * previously given to it by {@code selectObjectRepresentation} was chosen
162 * for reuse into the output stream. The {@code otp} argument is an instance
163 * created by this reader's own {@code newObjectToPack}, and the
164 * representation data saved within it also originated from this reader.
165 * <p>
166 * Implementors must write the object header before copying the raw data to
167 * the output stream. The typical implementation is like:
168 *
169 * <pre>
170 * MyToPack mtp = (MyToPack) otp;
171 * byte[] raw;
172 * if (validate)
173 * raw = validate(mtp); // throw SORNAE here, if at all
174 * else
175 * raw = readFast(mtp);
176 * out.writeHeader(mtp, mtp.inflatedSize);
177 * out.write(raw);
178 * </pre>
179 *
180 * @param out
181 * stream the object should be written to.
182 * @param otp
183 * the object's saved representation information.
184 * @param validate
185 * if true the representation must be validated and not be
186 * corrupt before being reused. If false, validation may be
187 * skipped as it will be performed elsewhere in the processing
188 * pipeline.
189 * @throws StoredObjectRepresentationNotAvailableException
190 * the previously selected representation is no longer
191 * available. If thrown before {@code out.writeHeader} the pack
192 * writer will try to find another representation, and write
193 * that one instead. If throw after {@code out.writeHeader},
194 * packing will abort.
195 * @throws IOException
196 * the stream's write method threw an exception. Packing will
197 * abort.
198 */
199 public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
200 boolean validate) throws IOException,
201 StoredObjectRepresentationNotAvailableException;
202
203 /**
204 * Append an entire pack's contents onto the output stream.
205 * <p>
206 * The entire pack, excluding its header and trailing footer is sent.
207 *
208 * @param out
209 * stream to append the pack onto.
210 * @param pack
211 * the cached pack to send.
212 * @throws IOException
213 * the pack cannot be read, or stream did not accept a write.
214 */
215 public abstract void copyPackAsIs(PackOutputStream out, CachedPack pack)
216 throws IOException;
217
218 /**
219 * Obtain the available cached packs that match the bitmap and update
220 * the bitmap by removing the items that are in the CachedPack.
221 * <p>
222 * A cached pack has known starting points and may be sent entirely as-is,
223 * with almost no effort on the sender's part.
224 *
225 * @param needBitmap
226 * the bitmap that contains all of the objects the client wants.
227 * @return the available cached packs.
228 * @throws IOException
229 * the cached packs cannot be listed from the repository.
230 * Callers may choose to ignore this and continue as-if there
231 * were no cached packs.
232 */
233 public Collection<CachedPack> getCachedPacksAndUpdate(
234 BitmapBuilder needBitmap) throws IOException;
235 }