View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
47  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_TXN;
48  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
49  import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
50  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
51  import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
52  
53  import java.io.IOException;
54  import java.util.ArrayList;
55  import java.util.Collection;
56  import java.util.HashSet;
57  import java.util.List;
58  import java.util.Set;
59  
60  import org.eclipse.jgit.internal.JGitText;
61  import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
62  import org.eclipse.jgit.internal.storage.file.PackIndex;
63  import org.eclipse.jgit.internal.storage.pack.PackExt;
64  import org.eclipse.jgit.internal.storage.pack.PackWriter;
65  import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
66  import org.eclipse.jgit.lib.AnyObjectId;
67  import org.eclipse.jgit.lib.Constants;
68  import org.eclipse.jgit.lib.NullProgressMonitor;
69  import org.eclipse.jgit.lib.ObjectId;
70  import org.eclipse.jgit.lib.ObjectIdSet;
71  import org.eclipse.jgit.lib.ProgressMonitor;
72  import org.eclipse.jgit.lib.Ref;
73  import org.eclipse.jgit.lib.RefDatabase;
74  import org.eclipse.jgit.revwalk.RevWalk;
75  import org.eclipse.jgit.storage.pack.PackConfig;
76  import org.eclipse.jgit.storage.pack.PackStatistics;
77  import org.eclipse.jgit.util.io.CountingOutputStream;
78  
79  /** Repack and garbage collect a repository. */
80  public class DfsGarbageCollector {
81  	private final DfsRepository repo;
82  	private final RefDatabase refdb;
83  	private final DfsObjDatabase objdb;
84  
85  	private final List<DfsPackDescription> newPackDesc;
86  
87  	private final List<PackStatistics> newPackStats;
88  
89  	private final List<ObjectIdSet> newPackObj;
90  
91  	private DfsReader ctx;
92  
93  	private PackConfig packConfig;
94  
95  	private long coalesceGarbageLimit = 50 << 20;
96  
97  	private List<DfsPackFile> packsBefore;
98  
99  	private Set<ObjectId> allHeads;
100 	private Set<ObjectId> nonHeads;
101 	private Set<ObjectId> txnHeads;
102 	private Set<ObjectId> tagTargets;
103 
104 	/**
105 	 * Initialize a garbage collector.
106 	 *
107 	 * @param repository
108 	 *            repository objects to be packed will be read from.
109 	 */
110 	public DfsGarbageCollector(DfsRepository repository) {
111 		repo = repository;
112 		refdb = repo.getRefDatabase();
113 		objdb = repo.getObjectDatabase();
114 		newPackDesc = new ArrayList<DfsPackDescription>(4);
115 		newPackStats = new ArrayList<PackStatistics>(4);
116 		newPackObj = new ArrayList<ObjectIdSet>(4);
117 
118 		packConfig = new PackConfig(repo);
119 		packConfig.setIndexVersion(2);
120 	}
121 
122 	/** @return configuration used to generate the new pack file. */
123 	public PackConfig getPackConfig() {
124 		return packConfig;
125 	}
126 
127 	/**
128 	 * @param newConfig
129 	 *            the new configuration to use when creating the pack file.
130 	 * @return {@code this}
131 	 */
132 	public DfsGarbageCollector setPackConfig(PackConfig newConfig) {
133 		packConfig = newConfig;
134 		return this;
135 	}
136 
137 	/** @return garbage packs smaller than this size will be repacked. */
138 	public long getCoalesceGarbageLimit() {
139 		return coalesceGarbageLimit;
140 	}
141 
142 	/**
143 	 * Set the byte size limit for garbage packs to be repacked.
144 	 * <p>
145 	 * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
146 	 * the end of the run. This allows the garbage collector to coalesce
147 	 * unreachable objects into a single file.
148 	 * <p>
149 	 * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
150 	 * be left alone by the garbage collector. This avoids unnecessary disk IO
151 	 * reading and copying the objects.
152 	 * <p>
153 	 * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
154 	 * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
155 	 * <p>
156 	 * Keeping unreachable garbage prevents race conditions with repository
157 	 * changes that may suddenly need an object whose only copy was stored in
158 	 * the UNREACHABLE_GARBAGE pack.
159 	 *
160 	 * @param limit
161 	 *            size in bytes.
162 	 * @return {@code this}
163 	 */
164 	public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
165 		coalesceGarbageLimit = limit;
166 		return this;
167 	}
168 
169 	/**
170 	 * Create a single new pack file containing all of the live objects.
171 	 * <p>
172 	 * This method safely decides which packs can be expired after the new pack
173 	 * is created by validating the references have not been modified in an
174 	 * incompatible way.
175 	 *
176 	 * @param pm
177 	 *            progress monitor to receive updates on as packing may take a
178 	 *            while, depending on the size of the repository.
179 	 * @return true if the repack was successful without race conditions. False
180 	 *         if a race condition was detected and the repack should be run
181 	 *         again later.
182 	 * @throws IOException
183 	 *             a new pack cannot be created.
184 	 */
185 	public boolean pack(ProgressMonitor pm) throws IOException {
186 		if (pm == null)
187 			pm = NullProgressMonitor.INSTANCE;
188 		if (packConfig.getIndexVersion() != 2)
189 			throw new IllegalStateException(
190 					JGitText.get().supportOnlyPackIndexVersion2);
191 
192 		ctx = (DfsReader) objdb.newReader();
193 		try {
194 			refdb.refresh();
195 			objdb.clearCache();
196 
197 			Collection<Ref> refsBefore = getAllRefs();
198 			packsBefore = packsToRebuild();
199 			if (packsBefore.isEmpty())
200 				return true;
201 
202 			allHeads = new HashSet<ObjectId>();
203 			nonHeads = new HashSet<ObjectId>();
204 			txnHeads = new HashSet<ObjectId>();
205 			tagTargets = new HashSet<ObjectId>();
206 			for (Ref ref : refsBefore) {
207 				if (ref.isSymbolic() || ref.getObjectId() == null)
208 					continue;
209 				if (isHead(ref))
210 					allHeads.add(ref.getObjectId());
211 				else if (RefTreeNames.isRefTree(refdb, ref.getName()))
212 					txnHeads.add(ref.getObjectId());
213 				else
214 					nonHeads.add(ref.getObjectId());
215 				if (ref.getPeeledObjectId() != null)
216 					tagTargets.add(ref.getPeeledObjectId());
217 			}
218 			tagTargets.addAll(allHeads);
219 
220 			boolean rollback = true;
221 			try {
222 				packHeads(pm);
223 				packRest(pm);
224 				packRefTreeGraph(pm);
225 				packGarbage(pm);
226 				objdb.commitPack(newPackDesc, toPrune());
227 				rollback = false;
228 				return true;
229 			} finally {
230 				if (rollback)
231 					objdb.rollbackPack(newPackDesc);
232 			}
233 		} finally {
234 			ctx.close();
235 		}
236 	}
237 
238 	private Collection<Ref> getAllRefs() throws IOException {
239 		Collection<Ref> refs = refdb.getRefs(RefDatabase.ALL).values();
240 		List<Ref> addl = refdb.getAdditionalRefs();
241 		if (!addl.isEmpty()) {
242 			List<Ref> all = new ArrayList<>(refs.size() + addl.size());
243 			all.addAll(refs);
244 			// add additional refs which start with refs/
245 			for (Ref r : addl) {
246 				if (r.getName().startsWith(Constants.R_REFS)) {
247 					all.add(r);
248 				}
249 			}
250 			return all;
251 		}
252 		return refs;
253 	}
254 
255 	private List<DfsPackFile> packsToRebuild() throws IOException {
256 		DfsPackFile[] packs = objdb.getPacks();
257 		List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
258 		for (DfsPackFile p : packs) {
259 			DfsPackDescription d = p.getPackDescription();
260 			if (d.getPackSource() != UNREACHABLE_GARBAGE)
261 				out.add(p);
262 			else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
263 				out.add(p);
264 		}
265 		return out;
266 	}
267 
268 	/** @return all of the source packs that fed into this compaction. */
269 	public List<DfsPackDescription> getSourcePacks() {
270 		return toPrune();
271 	}
272 
273 	/** @return new packs created by this compaction. */
274 	public List<DfsPackDescription> getNewPacks() {
275 		return newPackDesc;
276 	}
277 
278 	/** @return statistics corresponding to the {@link #getNewPacks()}. */
279 	public List<PackStatistics> getNewPackStatistics() {
280 		return newPackStats;
281 	}
282 
283 	private List<DfsPackDescription> toPrune() {
284 		int cnt = packsBefore.size();
285 		List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
286 		for (DfsPackFile pack : packsBefore)
287 			all.add(pack.getPackDescription());
288 		return all;
289 	}
290 
291 	private void packHeads(ProgressMonitor pm) throws IOException {
292 		if (allHeads.isEmpty())
293 			return;
294 
295 		try (PackWriter pw = newPackWriter()) {
296 			pw.setTagTargets(tagTargets);
297 			pw.preparePack(pm, allHeads, PackWriter.NONE);
298 			if (0 < pw.getObjectCount())
299 				writePack(GC, pw, pm);
300 		}
301 	}
302 	private void packRest(ProgressMonitor pm) throws IOException {
303 		if (nonHeads.isEmpty())
304 			return;
305 
306 		try (PackWriter pw = newPackWriter()) {
307 			for (ObjectIdSet packedObjs : newPackObj)
308 				pw.excludeObjects(packedObjs);
309 			pw.preparePack(pm, nonHeads, allHeads);
310 			if (0 < pw.getObjectCount())
311 				writePack(GC, pw, pm);
312 		}
313 	}
314 
315 	private void packRefTreeGraph(ProgressMonitor pm) throws IOException {
316 		if (txnHeads.isEmpty())
317 			return;
318 
319 		try (PackWriter pw = newPackWriter()) {
320 			for (ObjectIdSet packedObjs : newPackObj)
321 				pw.excludeObjects(packedObjs);
322 			pw.preparePack(pm, txnHeads, PackWriter.NONE);
323 			if (0 < pw.getObjectCount())
324 				writePack(GC_TXN, pw, pm);
325 		}
326 	}
327 
328 	private void packGarbage(ProgressMonitor pm) throws IOException {
329 		// TODO(sop) This is ugly. The garbage pack needs to be deleted.
330 		PackConfig cfg = new PackConfig(packConfig);
331 		cfg.setReuseDeltas(true);
332 		cfg.setReuseObjects(true);
333 		cfg.setDeltaCompress(false);
334 		cfg.setBuildBitmaps(false);
335 
336 		try (PackWriter pw = new PackWriter(cfg, ctx);
337 				RevWalk pool = new RevWalk(ctx)) {
338 			pw.setDeltaBaseAsOffset(true);
339 			pw.setReuseDeltaCommits(true);
340 			pm.beginTask(JGitText.get().findingGarbage, objectsBefore());
341 			for (DfsPackFile oldPack : packsBefore) {
342 				PackIndex oldIdx = oldPack.getPackIndex(ctx);
343 				for (PackIndex.MutableEntry ent : oldIdx) {
344 					pm.update(1);
345 					ObjectId id = ent.toObjectId();
346 					if (pool.lookupOrNull(id) != null || anyPackHas(id))
347 						continue;
348 
349 					int type = oldPack.getObjectType(ctx, ent.getOffset());
350 					pw.addObject(pool.lookupAny(id, type));
351 				}
352 			}
353 			pm.endTask();
354 			if (0 < pw.getObjectCount())
355 				writePack(UNREACHABLE_GARBAGE, pw, pm);
356 		}
357 	}
358 
359 	private boolean anyPackHas(AnyObjectId id) {
360 		for (ObjectIdSet packedObjs : newPackObj)
361 			if (packedObjs.contains(id))
362 				return true;
363 		return false;
364 	}
365 
366 	private static boolean isHead(Ref ref) {
367 		return ref.getName().startsWith(Constants.R_HEADS);
368 	}
369 
370 	private int objectsBefore() {
371 		int cnt = 0;
372 		for (DfsPackFile p : packsBefore)
373 			cnt += p.getPackDescription().getObjectCount();
374 		return cnt;
375 	}
376 
377 	private PackWriter newPackWriter() {
378 		PackWriter pw = new PackWriter(packConfig, ctx);
379 		pw.setDeltaBaseAsOffset(true);
380 		pw.setReuseDeltaCommits(false);
381 		return pw;
382 	}
383 
384 	private DfsPackDescription writePack(PackSource source, PackWriter pw,
385 			ProgressMonitor pm) throws IOException {
386 		DfsOutputStream out;
387 		DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
388 		newPackDesc.add(pack);
389 
390 		out = objdb.writeFile(pack, PACK);
391 		try {
392 			pw.writePack(pm, pm, out);
393 			pack.addFileExt(PACK);
394 		} finally {
395 			out.close();
396 		}
397 
398 		out = objdb.writeFile(pack, INDEX);
399 		try {
400 			CountingOutputStream cnt = new CountingOutputStream(out);
401 			pw.writeIndex(cnt);
402 			pack.addFileExt(INDEX);
403 			pack.setFileSize(INDEX, cnt.getCount());
404 			pack.setIndexVersion(pw.getIndexVersion());
405 		} finally {
406 			out.close();
407 		}
408 
409 		if (pw.prepareBitmapIndex(pm)) {
410 			out = objdb.writeFile(pack, BITMAP_INDEX);
411 			try {
412 				CountingOutputStream cnt = new CountingOutputStream(out);
413 				pw.writeBitmapIndex(cnt);
414 				pack.addFileExt(BITMAP_INDEX);
415 				pack.setFileSize(BITMAP_INDEX, cnt.getCount());
416 			} finally {
417 				out.close();
418 			}
419 		}
420 
421 		PackStatistics stats = pw.getStatistics();
422 		pack.setPackStats(stats);
423 		newPackStats.add(stats);
424 		newPackObj.add(pw.getObjectSet());
425 
426 		DfsBlockCache.getInstance().getOrCreate(pack, null);
427 		return pack;
428 	}
429 }