View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
47  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
48  import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
49  import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
50  
51  import java.io.IOException;
52  import java.util.ArrayList;
53  import java.util.Collections;
54  import java.util.Comparator;
55  import java.util.List;
56  
57  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
58  import org.eclipse.jgit.internal.JGitText;
59  import org.eclipse.jgit.internal.storage.file.PackIndex;
60  import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
61  import org.eclipse.jgit.internal.storage.pack.PackWriter;
62  import org.eclipse.jgit.lib.AnyObjectId;
63  import org.eclipse.jgit.lib.NullProgressMonitor;
64  import org.eclipse.jgit.lib.ObjectId;
65  import org.eclipse.jgit.lib.ProgressMonitor;
66  import org.eclipse.jgit.revwalk.RevFlag;
67  import org.eclipse.jgit.revwalk.RevObject;
68  import org.eclipse.jgit.revwalk.RevWalk;
69  import org.eclipse.jgit.storage.pack.PackConfig;
70  import org.eclipse.jgit.util.BlockList;
71  import org.eclipse.jgit.util.io.CountingOutputStream;
72  
73  /**
74   * Combine several pack files into one pack.
75   * <p>
76   * The compactor combines several pack files together by including all objects
77   * contained in each pack file into the same output pack. If an object appears
78   * multiple times, it is only included once in the result. Because the new pack
79   * is constructed by enumerating the indexes of the source packs, it is quicker
80   * than doing a full repack of the repository, however the result is not nearly
81   * as space efficient as new delta compression is disabled.
82   * <p>
83   * This method is suitable for quickly combining several packs together after
84   * receiving a number of small fetch or push operations into a repository,
85   * allowing the system to maintain reasonable read performance without expending
86   * a lot of time repacking the entire repository.
87   */
88  public class DfsPackCompactor {
89  	private final DfsRepository repo;
90  
91  	private final List<DfsPackFile> srcPacks;
92  
93  	private final List<PackWriter.ObjectIdSet> exclude;
94  
95  	private final List<DfsPackDescription> newPacks;
96  
97  	private final List<PackWriter.Statistics> newStats;
98  
99  	private int autoAddSize;
100 
101 	private RevWalk rw;
102 	private RevFlag added;
103 	private RevFlag isBase;
104 
105 	/**
106 	 * Initialize a pack compactor.
107 	 *
108 	 * @param repository
109 	 *            repository objects to be packed will be read from.
110 	 */
111 	public DfsPackCompactor(DfsRepository repository) {
112 		repo = repository;
113 		autoAddSize = 5 * 1024 * 1024; // 5 MiB
114 		srcPacks = new ArrayList<DfsPackFile>();
115 		exclude = new ArrayList<PackWriter.ObjectIdSet>(4);
116 		newPacks = new ArrayList<DfsPackDescription>(1);
117 		newStats = new ArrayList<PackWriter.Statistics>(1);
118 	}
119 
120 	/**
121 	 * Add a pack to be compacted.
122 	 * <p>
123 	 * All of the objects in this pack will be copied into the resulting pack.
124 	 * The resulting pack will order objects according to the source pack's own
125 	 * description ordering (which is based on creation date), and then by the
126 	 * order the objects appear in the source pack.
127 	 *
128 	 * @param pack
129 	 *            a pack to combine into the resulting pack.
130 	 * @return {@code this}
131 	 */
132 	public DfsPackCompactor add(DfsPackFile pack) {
133 		srcPacks.add(pack);
134 		return this;
135 	}
136 
137 	/**
138 	 * Automatically select packs to be included, and add them.
139 	 * <p>
140 	 * Packs are selected based on size, smaller packs get included while bigger
141 	 * ones are omitted.
142 	 *
143 	 * @return {@code this}
144 	 * @throws IOException
145 	 *             existing packs cannot be read.
146 	 */
147 	public DfsPackCompactor autoAdd() throws IOException {
148 		DfsObjDatabase objdb = repo.getObjectDatabase();
149 		for (DfsPackFile pack : objdb.getPacks()) {
150 			DfsPackDescription d = pack.getPackDescription();
151 			if (d.getFileSize(PACK) < autoAddSize)
152 				add(pack);
153 			else
154 				exclude(pack);
155 		}
156 		return this;
157 	}
158 
159 	/**
160 	 * Exclude objects from the compacted pack.
161 	 *
162 	 * @param set
163 	 *            objects to not include.
164 	 * @return {@code this}.
165 	 */
166 	public DfsPackCompactor exclude(PackWriter.ObjectIdSet set) {
167 		exclude.add(set);
168 		return this;
169 	}
170 
171 	/**
172 	 * Exclude objects from the compacted pack.
173 	 *
174 	 * @param pack
175 	 *            objects to not include.
176 	 * @return {@code this}.
177 	 * @throws IOException
178 	 *             pack index cannot be loaded.
179 	 */
180 	public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
181 		final PackIndex idx;
182 		try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
183 			idx = pack.getPackIndex(ctx);
184 		}
185 		return exclude(new PackWriter.ObjectIdSet() {
186 			public boolean contains(AnyObjectId id) {
187 				return idx.hasObject(id);
188 			}
189 		});
190 	}
191 
192 	/**
193 	 * Compact the pack files together.
194 	 *
195 	 * @param pm
196 	 *            progress monitor to receive updates on as packing may take a
197 	 *            while, depending on the size of the repository.
198 	 * @throws IOException
199 	 *             the packs cannot be compacted.
200 	 */
201 	public void compact(ProgressMonitor pm) throws IOException {
202 		if (pm == null)
203 			pm = NullProgressMonitor.INSTANCE;
204 
205 		DfsObjDatabase objdb = repo.getObjectDatabase();
206 		try (DfsReader ctx = (DfsReader) objdb.newReader()) {
207 			PackConfig pc = new PackConfig(repo);
208 			pc.setIndexVersion(2);
209 			pc.setDeltaCompress(false);
210 			pc.setReuseDeltas(true);
211 			pc.setReuseObjects(true);
212 
213 			PackWriter pw = new PackWriter(pc, ctx);
214 			try {
215 				pw.setDeltaBaseAsOffset(true);
216 				pw.setReuseDeltaCommits(false);
217 
218 				addObjectsToPack(pw, ctx, pm);
219 				if (pw.getObjectCount() == 0) {
220 					List<DfsPackDescription> remove = toPrune();
221 					if (remove.size() > 0)
222 						objdb.commitPack(
223 								Collections.<DfsPackDescription>emptyList(),
224 								remove);
225 					return;
226 				}
227 
228 				boolean rollback = true;
229 				DfsPackDescription pack = objdb.newPack(COMPACT);
230 				try {
231 					writePack(objdb, pack, pw, pm);
232 					writeIndex(objdb, pack, pw);
233 
234 					PackWriter.Statistics stats = pw.getStatistics();
235 					pw.close();
236 					pw = null;
237 
238 					pack.setPackStats(stats);
239 					objdb.commitPack(Collections.singletonList(pack), toPrune());
240 					newPacks.add(pack);
241 					newStats.add(stats);
242 					rollback = false;
243 				} finally {
244 					if (rollback)
245 						objdb.rollbackPack(Collections.singletonList(pack));
246 				}
247 			} finally {
248 				if (pw != null)
249 					pw.close();
250 			}
251 		} finally {
252 			rw = null;
253 		}
254 	}
255 
256 	/** @return all of the source packs that fed into this compaction. */
257 	public List<DfsPackDescription> getSourcePacks() {
258 		return toPrune();
259 	}
260 
261 	/** @return new packs created by this compaction. */
262 	public List<DfsPackDescription> getNewPacks() {
263 		return newPacks;
264 	}
265 
266 	/** @return statistics corresponding to the {@link #getNewPacks()}. */
267 	public List<PackWriter.Statistics> getNewPackStatistics() {
268 		return newStats;
269 	}
270 
271 	private List<DfsPackDescription> toPrune() {
272 		int cnt = srcPacks.size();
273 		List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
274 		for (DfsPackFile pack : srcPacks)
275 			all.add(pack.getPackDescription());
276 		return all;
277 	}
278 
279 	private void addObjectsToPack(PackWriter pw, DfsReader ctx,
280 			ProgressMonitor pm) throws IOException,
281 			IncorrectObjectTypeException {
282 		// Sort packs by description ordering, this places newer packs before
283 		// older packs, allowing the PackWriter to be handed newer objects
284 		// first and older objects last.
285 		Collections.sort(srcPacks, new Comparator<DfsPackFile>() {
286 			public int compare(DfsPackFile a, DfsPackFile b) {
287 				return a.getPackDescription().compareTo(b.getPackDescription());
288 			}
289 		});
290 
291 		rw = new RevWalk(ctx);
292 		added = rw.newFlag("ADDED"); //$NON-NLS-1$
293 		isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
294 		List<RevObject> baseObjects = new BlockList<RevObject>();
295 
296 		pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
297 		for (DfsPackFile src : srcPacks) {
298 			List<ObjectIdWithOffset> want = toInclude(src, ctx);
299 			if (want.isEmpty())
300 				continue;
301 
302 			PackReverseIndex rev = src.getReverseIdx(ctx);
303 			DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
304 			for (ObjectIdWithOffset id : want) {
305 				int type = src.getObjectType(ctx, id.offset);
306 				RevObject obj = rw.lookupAny(id, type);
307 				if (obj.has(added))
308 					continue;
309 
310 				pm.update(1);
311 				pw.addObject(obj);
312 				obj.add(added);
313 
314 				src.representation(rep, id.offset, ctx, rev);
315 				if (rep.getFormat() != PACK_DELTA)
316 					continue;
317 
318 				RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
319 				if (!base.has(added) && !base.has(isBase)) {
320 					baseObjects.add(base);
321 					base.add(isBase);
322 				}
323 			}
324 		}
325 		for (RevObject obj : baseObjects) {
326 			if (!obj.has(added)) {
327 				pm.update(1);
328 				pw.addObject(obj);
329 				obj.add(added);
330 			}
331 		}
332 		pm.endTask();
333 	}
334 
335 	private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
336 			throws IOException {
337 		PackIndex srcIdx = src.getPackIndex(ctx);
338 		List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>(
339 				(int) srcIdx.getObjectCount());
340 		SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
341 			ObjectId id = ent.toObjectId();
342 			RevObject obj = rw.lookupOrNull(id);
343 			if (obj != null && (obj.has(added) || obj.has(isBase)))
344 				continue;
345 			for (PackWriter.ObjectIdSet e : exclude)
346 				if (e.contains(id))
347 					continue SCAN;
348 			want.add(new ObjectIdWithOffset(id, ent.getOffset()));
349 		}
350 		Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
351 			public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
352 				return Long.signum(a.offset - b.offset);
353 			}
354 		});
355 		return want;
356 	}
357 
358 	private static void writePack(DfsObjDatabase objdb,
359 			DfsPackDescription pack,
360 			PackWriter pw, ProgressMonitor pm) throws IOException {
361 		DfsOutputStream out = objdb.writeFile(pack, PACK);
362 		try {
363 			pw.writePack(pm, pm, out);
364 			pack.addFileExt(PACK);
365 		} finally {
366 			out.close();
367 		}
368 	}
369 
370 	private static void writeIndex(DfsObjDatabase objdb,
371 			DfsPackDescription pack,
372 			PackWriter pw) throws IOException {
373 		DfsOutputStream out = objdb.writeFile(pack, INDEX);
374 		try {
375 			CountingOutputStream cnt = new CountingOutputStream(out);
376 			pw.writeIndex(cnt);
377 			pack.addFileExt(INDEX);
378 			pack.setFileSize(INDEX, cnt.getCount());
379 			pack.setIndexVersion(pw.getIndexVersion());
380 		} finally {
381 			out.close();
382 		}
383 	}
384 
385 	private static class ObjectIdWithOffset extends ObjectId {
386 		final long offset;
387 
388 		ObjectIdWithOffset(AnyObjectId id, long ofs) {
389 			super(id);
390 			offset = ofs;
391 		}
392 	}
393 }