View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
47  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
48  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
49  import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
50  import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
51  import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
52  
53  import java.io.IOException;
54  import java.util.ArrayList;
55  import java.util.Collection;
56  import java.util.Collections;
57  import java.util.Comparator;
58  import java.util.HashSet;
59  import java.util.Iterator;
60  import java.util.List;
61  import java.util.Set;
62  
63  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
64  import org.eclipse.jgit.internal.JGitText;
65  import org.eclipse.jgit.internal.storage.file.PackIndex;
66  import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
67  import org.eclipse.jgit.internal.storage.pack.PackWriter;
68  import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
69  import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
70  import org.eclipse.jgit.lib.AnyObjectId;
71  import org.eclipse.jgit.lib.NullProgressMonitor;
72  import org.eclipse.jgit.lib.ObjectId;
73  import org.eclipse.jgit.lib.ObjectIdSet;
74  import org.eclipse.jgit.lib.ProgressMonitor;
75  import org.eclipse.jgit.revwalk.RevFlag;
76  import org.eclipse.jgit.revwalk.RevObject;
77  import org.eclipse.jgit.revwalk.RevWalk;
78  import org.eclipse.jgit.storage.pack.PackConfig;
79  import org.eclipse.jgit.storage.pack.PackStatistics;
80  import org.eclipse.jgit.util.BlockList;
81  import org.eclipse.jgit.util.io.CountingOutputStream;
82  
83  /**
84   * Combine several pack files into one pack.
85   * <p>
86   * The compactor combines several pack files together by including all objects
87   * contained in each pack file into the same output pack. If an object appears
88   * multiple times, it is only included once in the result. Because the new pack
89   * is constructed by enumerating the indexes of the source packs, it is quicker
90   * than doing a full repack of the repository, however the result is not nearly
91   * as space efficient as new delta compression is disabled.
92   * <p>
93   * This method is suitable for quickly combining several packs together after
94   * receiving a number of small fetch or push operations into a repository,
95   * allowing the system to maintain reasonable read performance without expending
96   * a lot of time repacking the entire repository.
97   */
98  public class DfsPackCompactor {
99  	private final DfsRepository repo;
100 	private final List<DfsPackFile> srcPacks;
101 	private final List<DfsReftable> srcReftables;
102 	private final List<ObjectIdSet> exclude;
103 
104 	private PackStatistics newStats;
105 	private DfsPackDescription outDesc;
106 
107 	private int autoAddSize;
108 	private ReftableConfig reftableConfig;
109 
110 	private RevWalk rw;
111 	private RevFlag added;
112 	private RevFlag isBase;
113 
114 	/**
115 	 * Initialize a pack compactor.
116 	 *
117 	 * @param repository
118 	 *            repository objects to be packed will be read from.
119 	 */
120 	public DfsPackCompactor(DfsRepository repository) {
121 		repo = repository;
122 		autoAddSize = 5 * 1024 * 1024; // 5 MiB
123 		srcPacks = new ArrayList<>();
124 		srcReftables = new ArrayList<>();
125 		exclude = new ArrayList<>(4);
126 	}
127 
128 	/**
129 	 * Set configuration to write a reftable.
130 	 *
131 	 * @param cfg
132 	 *            configuration to write a reftable. Reftable compacting is
133 	 *            disabled (default) when {@code cfg} is {@code null}.
134 	 * @return {@code this}
135 	 */
136 	public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
137 		reftableConfig = cfg;
138 		return this;
139 	}
140 
141 	/**
142 	 * Add a pack to be compacted.
143 	 * <p>
144 	 * All of the objects in this pack will be copied into the resulting pack.
145 	 * The resulting pack will order objects according to the source pack's own
146 	 * description ordering (which is based on creation date), and then by the
147 	 * order the objects appear in the source pack.
148 	 *
149 	 * @param pack
150 	 *            a pack to combine into the resulting pack.
151 	 * @return {@code this}
152 	 */
153 	public DfsPackCompactor add(DfsPackFile pack) {
154 		srcPacks.add(pack);
155 		return this;
156 	}
157 
158 	/**
159 	 * Add a reftable to be compacted.
160 	 *
161 	 * @param table
162 	 *            a reftable to combine.
163 	 * @return {@code this}
164 	 */
165 	public DfsPackCompactor add(DfsReftable table) {
166 		srcReftables.add(table);
167 		return this;
168 	}
169 
170 	/**
171 	 * Automatically select pack and reftables to be included, and add them.
172 	 * <p>
173 	 * Packs are selected based on size, smaller packs get included while bigger
174 	 * ones are omitted.
175 	 *
176 	 * @return {@code this}
177 	 * @throws java.io.IOException
178 	 *             existing packs cannot be read.
179 	 */
180 	public DfsPackCompactor autoAdd() throws IOException {
181 		DfsObjDatabase objdb = repo.getObjectDatabase();
182 		for (DfsPackFile pack : objdb.getPacks()) {
183 			DfsPackDescription d = pack.getPackDescription();
184 			if (d.getFileSize(PACK) < autoAddSize)
185 				add(pack);
186 			else
187 				exclude(pack);
188 		}
189 
190 		if (reftableConfig != null) {
191 			for (DfsReftable table : objdb.getReftables()) {
192 				DfsPackDescription d = table.getPackDescription();
193 				if (d.getPackSource() != GC
194 						&& d.getFileSize(REFTABLE) < autoAddSize) {
195 					add(table);
196 				}
197 			}
198 		}
199 		return this;
200 	}
201 
202 	/**
203 	 * Exclude objects from the compacted pack.
204 	 *
205 	 * @param set
206 	 *            objects to not include.
207 	 * @return {@code this}.
208 	 */
209 	public DfsPackCompactor exclude(ObjectIdSet set) {
210 		exclude.add(set);
211 		return this;
212 	}
213 
214 	/**
215 	 * Exclude objects from the compacted pack.
216 	 *
217 	 * @param pack
218 	 *            objects to not include.
219 	 * @return {@code this}.
220 	 * @throws java.io.IOException
221 	 *             pack index cannot be loaded.
222 	 */
223 	public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
224 		final PackIndex idx;
225 		try (DfsReader../../../../../../org/eclipse/jgit/internal/storage/dfs/DfsReader.html#DfsReader">DfsReader ctx = (DfsReader) repo.newObjectReader()) {
226 			idx = pack.getPackIndex(ctx);
227 		}
228 		return exclude(idx);
229 	}
230 
231 	/**
232 	 * Compact the pack files together.
233 	 *
234 	 * @param pm
235 	 *            progress monitor to receive updates on as packing may take a
236 	 *            while, depending on the size of the repository.
237 	 * @throws java.io.IOException
238 	 *             the packs cannot be compacted.
239 	 */
240 	public void compact(ProgressMonitor pm) throws IOException {
241 		if (pm == null) {
242 			pm = NullProgressMonitor.INSTANCE;
243 		}
244 
245 		DfsObjDatabase objdb = repo.getObjectDatabase();
246 		try (DfsReader ctx = objdb.newReader()) {
247 			if (reftableConfig != null && !srcReftables.isEmpty()) {
248 				compactReftables(ctx);
249 			}
250 			compactPacks(ctx, pm);
251 
252 			List<DfsPackDescription> commit = getNewPacks();
253 			Collection<DfsPackDescription> remove = toPrune();
254 			if (!commit.isEmpty() || !remove.isEmpty()) {
255 				objdb.commitPack(commit, remove);
256 			}
257 		} finally {
258 			rw = null;
259 		}
260 	}
261 
262 	private void compactPacks(DfsReader ctx, ProgressMonitor pm)
263 			throws IOException, IncorrectObjectTypeException {
264 		DfsObjDatabase objdb = repo.getObjectDatabase();
265 		PackConfig pc = new PackConfig(repo);
266 		pc.setIndexVersion(2);
267 		pc.setDeltaCompress(false);
268 		pc.setReuseDeltas(true);
269 		pc.setReuseObjects(true);
270 
271 		try (PackWriternal/storage/pack/PackWriter.html#PackWriter">PackWriter pw = new PackWriter(pc, ctx)) {
272 			pw.setDeltaBaseAsOffset(true);
273 			pw.setReuseDeltaCommits(false);
274 
275 			addObjectsToPack(pw, ctx, pm);
276 			if (pw.getObjectCount() == 0) {
277 				return;
278 			}
279 
280 			boolean rollback = true;
281 			initOutDesc(objdb);
282 			try {
283 				writePack(objdb, outDesc, pw, pm);
284 				writeIndex(objdb, outDesc, pw);
285 
286 				PackStatistics stats = pw.getStatistics();
287 
288 				outDesc.setPackStats(stats);
289 				newStats = stats;
290 				rollback = false;
291 			} finally {
292 				if (rollback) {
293 					objdb.rollbackPack(Collections.singletonList(outDesc));
294 				}
295 			}
296 		}
297 	}
298 
299 	private long estimatePackSize() {
300 		// Every pack file contains 12 bytes of header and 20 bytes of trailer.
301 		// Include the final pack file header and trailer size here and ignore
302 		// the same from individual pack files.
303 		long size = 32;
304 		for (DfsPackFile pack : srcPacks) {
305 			size += pack.getPackDescription().getFileSize(PACK) - 32;
306 		}
307 		return size;
308 	}
309 
310 	private void compactReftables(DfsReader ctx) throws IOException {
311 		DfsObjDatabase objdb = repo.getObjectDatabase();
312 		Collections.sort(srcReftables, objdb.reftableComparator());
313 
314 		try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) {
315 			initOutDesc(objdb);
316 			ReftableCompactor compact = new ReftableCompactor();
317 			compact.addAll(stack.readers());
318 			compact.setIncludeDeletes(true);
319 			writeReftable(objdb, outDesc, compact);
320 		}
321 	}
322 
323 	private void initOutDesc(DfsObjDatabase objdb) throws IOException {
324 		if (outDesc == null) {
325 			outDesc = objdb.newPack(COMPACT, estimatePackSize());
326 		}
327 	}
328 
329 	/**
330 	 * Get all of the source packs that fed into this compaction.
331 	 *
332 	 * @return all of the source packs that fed into this compaction.
333 	 */
334 	public Collection<DfsPackDescription> getSourcePacks() {
335 		Set<DfsPackDescription> src = new HashSet<>();
336 		for (DfsPackFile pack : srcPacks) {
337 			src.add(pack.getPackDescription());
338 		}
339 		for (DfsReftable table : srcReftables) {
340 			src.add(table.getPackDescription());
341 		}
342 		return src;
343 	}
344 
345 	/**
346 	 * Get new packs created by this compaction.
347 	 *
348 	 * @return new packs created by this compaction.
349 	 */
350 	public List<DfsPackDescription> getNewPacks() {
351 		return outDesc != null
352 				? Collections.singletonList(outDesc)
353 				: Collections.emptyList();
354 	}
355 
356 	/**
357 	 * Get statistics corresponding to the {@link #getNewPacks()}.
358 	 * May be null if statistics are not available.
359 	 *
360 	 * @return statistics corresponding to the {@link #getNewPacks()}.
361 	 *
362 	 */
363 	public List<PackStatistics> getNewPackStatistics() {
364 		return outDesc != null
365 				? Collections.singletonList(newStats)
366 				: Collections.emptyList();
367 	}
368 
369 	private Collection<DfsPackDescription> toPrune() {
370 		Set<DfsPackDescription> packs = new HashSet<>();
371 		for (DfsPackFile pack : srcPacks) {
372 			packs.add(pack.getPackDescription());
373 		}
374 
375 		Set<DfsPackDescription> reftables = new HashSet<>();
376 		for (DfsReftable table : srcReftables) {
377 			reftables.add(table.getPackDescription());
378 		}
379 
380 		for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
381 			DfsPackDescription d = i.next();
382 			if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
383 				i.remove();
384 			}
385 		}
386 
387 		for (Iterator<DfsPackDescription> i = reftables.iterator();
388 				i.hasNext();) {
389 			DfsPackDescription d = i.next();
390 			if (d.hasFileExt(PACK) && !packs.contains(d)) {
391 				i.remove();
392 			}
393 		}
394 
395 		Set<DfsPackDescription> toPrune = new HashSet<>();
396 		toPrune.addAll(packs);
397 		toPrune.addAll(reftables);
398 		return toPrune;
399 	}
400 
401 	private void addObjectsToPack(PackWriter pw, DfsReader ctx,
402 			ProgressMonitor pm) throws IOException,
403 			IncorrectObjectTypeException {
404 		// Sort packs by description ordering, this places newer packs before
405 		// older packs, allowing the PackWriter to be handed newer objects
406 		// first and older objects last.
407 		Collections.sort(
408 				srcPacks,
409 				Comparator.comparing(
410 						DfsPackFile::getPackDescription,
411 						DfsPackDescription.objectLookupComparator()));
412 
413 		rw = new RevWalk(ctx);
414 		added = rw.newFlag("ADDED"); //$NON-NLS-1$
415 		isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
416 		List<RevObject> baseObjects = new BlockList<>();
417 
418 		pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
419 		for (DfsPackFile src : srcPacks) {
420 			List<ObjectIdWithOffset> want = toInclude(src, ctx);
421 			if (want.isEmpty())
422 				continue;
423 
424 			PackReverseIndex rev = src.getReverseIdx(ctx);
425 			DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
426 			for (ObjectIdWithOffset id : want) {
427 				int type = src.getObjectType(ctx, id.offset);
428 				RevObject obj = rw.lookupAny(id, type);
429 				if (obj.has(added))
430 					continue;
431 
432 				pm.update(1);
433 				pw.addObject(obj);
434 				obj.add(added);
435 
436 				src.representation(rep, id.offset, ctx, rev);
437 				if (rep.getFormat() != PACK_DELTA)
438 					continue;
439 
440 				RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
441 				if (!base.has(added) && !base.has(isBase)) {
442 					baseObjects.add(base);
443 					base.add(isBase);
444 				}
445 			}
446 		}
447 		for (RevObject obj : baseObjects) {
448 			if (!obj.has(added)) {
449 				pm.update(1);
450 				pw.addObject(obj);
451 				obj.add(added);
452 			}
453 		}
454 		pm.endTask();
455 	}
456 
457 	private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
458 			throws IOException {
459 		PackIndex srcIdx = src.getPackIndex(ctx);
460 		List<ObjectIdWithOffset> want = new BlockList<>(
461 				(int) srcIdx.getObjectCount());
462 		SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
463 			ObjectId id = ent.toObjectId();
464 			RevObject obj = rw.lookupOrNull(id);
465 			if (obj != null && (obj.has(added) || obj.has(isBase)))
466 				continue;
467 			for (ObjectIdSet e : exclude)
468 				if (e.contains(id))
469 					continue SCAN;
470 			want.add(new ObjectIdWithOffset(id, ent.getOffset()));
471 		}
472 		Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
473 			@Override
474 			public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
475 				return Long.signum(a.offset - b.offset);
476 			}
477 		});
478 		return want;
479 	}
480 
481 	private static void writePack(DfsObjDatabase objdb,
482 			DfsPackDescription pack,
483 			PackWriter pw, ProgressMonitor pm) throws IOException {
484 		try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
485 			pw.writePack(pm, pm, out);
486 			pack.addFileExt(PACK);
487 			pack.setBlockSize(PACK, out.blockSize());
488 		}
489 	}
490 
491 	private static void writeIndex(DfsObjDatabase objdb,
492 			DfsPackDescription pack,
493 			PackWriter pw) throws IOException {
494 		try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) {
495 			CountingOutputStream cnt = new CountingOutputStream(out);
496 			pw.writeIndex(cnt);
497 			pack.addFileExt(INDEX);
498 			pack.setFileSize(INDEX, cnt.getCount());
499 			pack.setBlockSize(INDEX, out.blockSize());
500 			pack.setIndexVersion(pw.getIndexVersion());
501 		}
502 	}
503 
504 	private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack,
505 			ReftableCompactor compact) throws IOException {
506 		try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
507 			compact.setConfig(configureReftable(reftableConfig, out));
508 			compact.compact(out);
509 			pack.addFileExt(REFTABLE);
510 			pack.setReftableStats(compact.getStats());
511 		}
512 	}
513 
514 	static ReftableConfig./org/eclipse/jgit/internal/storage/reftable/ReftableConfig.html#ReftableConfig">ReftableConfig configureReftable(ReftableConfig cfg,
515 			DfsOutputStream out) {
516 		int bs = out.blockSize();
517 		if (bs > 0) {
518 			cfg = new ReftableConfig(cfg);
519 			cfg.setRefBlockSize(bs);
520 			cfg.setAlignBlocks(true);
521 		}
522 		return cfg;
523 	}
524 
525 	private static class ObjectIdWithOffset extends ObjectId {
526 		final long offset;
527 
528 		ObjectIdWithOffset(AnyObjectId id, long ofs) {
529 			super(id);
530 			offset = ofs;
531 		}
532 	}
533 }