DfsPackCompactor.java
- /*
- * Copyright (C) 2011, Google Inc.
- * and other copyright owners as documented in the project's IP log.
- *
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- package org.eclipse.jgit.internal.storage.dfs;
- import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
- import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
- import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
- import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
- import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
- import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Set;
- import org.eclipse.jgit.errors.IncorrectObjectTypeException;
- import org.eclipse.jgit.internal.JGitText;
- import org.eclipse.jgit.internal.storage.file.PackIndex;
- import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
- import org.eclipse.jgit.internal.storage.pack.PackWriter;
- import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
- import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
- import org.eclipse.jgit.lib.AnyObjectId;
- import org.eclipse.jgit.lib.NullProgressMonitor;
- import org.eclipse.jgit.lib.ObjectId;
- import org.eclipse.jgit.lib.ObjectIdSet;
- import org.eclipse.jgit.lib.ProgressMonitor;
- import org.eclipse.jgit.revwalk.RevFlag;
- import org.eclipse.jgit.revwalk.RevObject;
- import org.eclipse.jgit.revwalk.RevWalk;
- import org.eclipse.jgit.storage.pack.PackConfig;
- import org.eclipse.jgit.storage.pack.PackStatistics;
- import org.eclipse.jgit.util.BlockList;
- import org.eclipse.jgit.util.io.CountingOutputStream;
- /**
- * Combine several pack files into one pack.
- * <p>
- * The compactor combines several pack files together by including all objects
- * contained in each pack file into the same output pack. If an object appears
- * multiple times, it is only included once in the result. Because the new pack
- * is constructed by enumerating the indexes of the source packs, it is quicker
- * than doing a full repack of the repository, however the result is not nearly
- * as space efficient as new delta compression is disabled.
- * <p>
- * This method is suitable for quickly combining several packs together after
- * receiving a number of small fetch or push operations into a repository,
- * allowing the system to maintain reasonable read performance without expending
- * a lot of time repacking the entire repository.
- */
- public class DfsPackCompactor {
- private final DfsRepository repo;
- private final List<DfsPackFile> srcPacks;
- private final List<DfsReftable> srcReftables;
- private final List<ObjectIdSet> exclude;
- private PackStatistics newStats;
- private DfsPackDescription outDesc;
- private int autoAddSize;
- private ReftableConfig reftableConfig;
- private RevWalk rw;
- private RevFlag added;
- private RevFlag isBase;
- /**
- * Initialize a pack compactor.
- *
- * @param repository
- * repository objects to be packed will be read from.
- */
- public DfsPackCompactor(DfsRepository repository) {
- repo = repository;
- autoAddSize = 5 * 1024 * 1024; // 5 MiB
- srcPacks = new ArrayList<>();
- srcReftables = new ArrayList<>();
- exclude = new ArrayList<>(4);
- }
- /**
- * Set configuration to write a reftable.
- *
- * @param cfg
- * configuration to write a reftable. Reftable compacting is
- * disabled (default) when {@code cfg} is {@code null}.
- * @return {@code this}
- */
- public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
- reftableConfig = cfg;
- return this;
- }
- /**
- * Add a pack to be compacted.
- * <p>
- * All of the objects in this pack will be copied into the resulting pack.
- * The resulting pack will order objects according to the source pack's own
- * description ordering (which is based on creation date), and then by the
- * order the objects appear in the source pack.
- *
- * @param pack
- * a pack to combine into the resulting pack.
- * @return {@code this}
- */
- public DfsPackCompactor add(DfsPackFile pack) {
- srcPacks.add(pack);
- return this;
- }
- /**
- * Add a reftable to be compacted.
- *
- * @param table
- * a reftable to combine.
- * @return {@code this}
- */
- public DfsPackCompactor add(DfsReftable table) {
- srcReftables.add(table);
- return this;
- }
- /**
- * Automatically select pack and reftables to be included, and add them.
- * <p>
- * Packs are selected based on size, smaller packs get included while bigger
- * ones are omitted.
- *
- * @return {@code this}
- * @throws java.io.IOException
- * existing packs cannot be read.
- */
- public DfsPackCompactor autoAdd() throws IOException {
- DfsObjDatabase objdb = repo.getObjectDatabase();
- for (DfsPackFile pack : objdb.getPacks()) {
- DfsPackDescription d = pack.getPackDescription();
- if (d.getFileSize(PACK) < autoAddSize)
- add(pack);
- else
- exclude(pack);
- }
- if (reftableConfig != null) {
- for (DfsReftable table : objdb.getReftables()) {
- DfsPackDescription d = table.getPackDescription();
- if (d.getPackSource() != GC
- && d.getFileSize(REFTABLE) < autoAddSize) {
- add(table);
- }
- }
- }
- return this;
- }
- /**
- * Exclude objects from the compacted pack.
- *
- * @param set
- * objects to not include.
- * @return {@code this}.
- */
- public DfsPackCompactor exclude(ObjectIdSet set) {
- exclude.add(set);
- return this;
- }
- /**
- * Exclude objects from the compacted pack.
- *
- * @param pack
- * objects to not include.
- * @return {@code this}.
- * @throws java.io.IOException
- * pack index cannot be loaded.
- */
- public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
- final PackIndex idx;
- try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
- idx = pack.getPackIndex(ctx);
- }
- return exclude(idx);
- }
- /**
- * Compact the pack files together.
- *
- * @param pm
- * progress monitor to receive updates on as packing may take a
- * while, depending on the size of the repository.
- * @throws java.io.IOException
- * the packs cannot be compacted.
- */
- public void compact(ProgressMonitor pm) throws IOException {
- if (pm == null) {
- pm = NullProgressMonitor.INSTANCE;
- }
- DfsObjDatabase objdb = repo.getObjectDatabase();
- try (DfsReader ctx = objdb.newReader()) {
- if (reftableConfig != null && !srcReftables.isEmpty()) {
- compactReftables(ctx);
- }
- compactPacks(ctx, pm);
- List<DfsPackDescription> commit = getNewPacks();
- Collection<DfsPackDescription> remove = toPrune();
- if (!commit.isEmpty() || !remove.isEmpty()) {
- objdb.commitPack(commit, remove);
- }
- } finally {
- rw = null;
- }
- }
- private void compactPacks(DfsReader ctx, ProgressMonitor pm)
- throws IOException, IncorrectObjectTypeException {
- DfsObjDatabase objdb = repo.getObjectDatabase();
- PackConfig pc = new PackConfig(repo);
- pc.setIndexVersion(2);
- pc.setDeltaCompress(false);
- pc.setReuseDeltas(true);
- pc.setReuseObjects(true);
- try (PackWriter pw = new PackWriter(pc, ctx)) {
- pw.setDeltaBaseAsOffset(true);
- pw.setReuseDeltaCommits(false);
- addObjectsToPack(pw, ctx, pm);
- if (pw.getObjectCount() == 0) {
- return;
- }
- boolean rollback = true;
- initOutDesc(objdb);
- try {
- writePack(objdb, outDesc, pw, pm);
- writeIndex(objdb, outDesc, pw);
- PackStatistics stats = pw.getStatistics();
- outDesc.setPackStats(stats);
- newStats = stats;
- rollback = false;
- } finally {
- if (rollback) {
- objdb.rollbackPack(Collections.singletonList(outDesc));
- }
- }
- }
- }
- private long estimatePackSize() {
- // Every pack file contains 12 bytes of header and 20 bytes of trailer.
- // Include the final pack file header and trailer size here and ignore
- // the same from individual pack files.
- long size = 32;
- for (DfsPackFile pack : srcPacks) {
- size += pack.getPackDescription().getFileSize(PACK) - 32;
- }
- return size;
- }
- private void compactReftables(DfsReader ctx) throws IOException {
- DfsObjDatabase objdb = repo.getObjectDatabase();
- Collections.sort(srcReftables, objdb.reftableComparator());
- try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) {
- initOutDesc(objdb);
- ReftableCompactor compact = new ReftableCompactor();
- compact.addAll(stack.readers());
- compact.setIncludeDeletes(true);
- writeReftable(objdb, outDesc, compact);
- }
- }
- private void initOutDesc(DfsObjDatabase objdb) throws IOException {
- if (outDesc == null) {
- outDesc = objdb.newPack(COMPACT, estimatePackSize());
- }
- }
- /**
- * Get all of the source packs that fed into this compaction.
- *
- * @return all of the source packs that fed into this compaction.
- */
- public Collection<DfsPackDescription> getSourcePacks() {
- Set<DfsPackDescription> src = new HashSet<>();
- for (DfsPackFile pack : srcPacks) {
- src.add(pack.getPackDescription());
- }
- for (DfsReftable table : srcReftables) {
- src.add(table.getPackDescription());
- }
- return src;
- }
- /**
- * Get new packs created by this compaction.
- *
- * @return new packs created by this compaction.
- */
- public List<DfsPackDescription> getNewPacks() {
- return outDesc != null
- ? Collections.singletonList(outDesc)
- : Collections.emptyList();
- }
- /**
- * Get statistics corresponding to the {@link #getNewPacks()}.
- * May be null if statistics are not available.
- *
- * @return statistics corresponding to the {@link #getNewPacks()}.
- *
- */
- public List<PackStatistics> getNewPackStatistics() {
- return outDesc != null
- ? Collections.singletonList(newStats)
- : Collections.emptyList();
- }
- private Collection<DfsPackDescription> toPrune() {
- Set<DfsPackDescription> packs = new HashSet<>();
- for (DfsPackFile pack : srcPacks) {
- packs.add(pack.getPackDescription());
- }
- Set<DfsPackDescription> reftables = new HashSet<>();
- for (DfsReftable table : srcReftables) {
- reftables.add(table.getPackDescription());
- }
- for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
- DfsPackDescription d = i.next();
- if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
- i.remove();
- }
- }
- for (Iterator<DfsPackDescription> i = reftables.iterator();
- i.hasNext();) {
- DfsPackDescription d = i.next();
- if (d.hasFileExt(PACK) && !packs.contains(d)) {
- i.remove();
- }
- }
- Set<DfsPackDescription> toPrune = new HashSet<>();
- toPrune.addAll(packs);
- toPrune.addAll(reftables);
- return toPrune;
- }
- private void addObjectsToPack(PackWriter pw, DfsReader ctx,
- ProgressMonitor pm) throws IOException,
- IncorrectObjectTypeException {
- // Sort packs by description ordering, this places newer packs before
- // older packs, allowing the PackWriter to be handed newer objects
- // first and older objects last.
- Collections.sort(
- srcPacks,
- Comparator.comparing(
- DfsPackFile::getPackDescription,
- DfsPackDescription.objectLookupComparator()));
- rw = new RevWalk(ctx);
- added = rw.newFlag("ADDED"); //$NON-NLS-1$
- isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
- List<RevObject> baseObjects = new BlockList<>();
- pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
- for (DfsPackFile src : srcPacks) {
- List<ObjectIdWithOffset> want = toInclude(src, ctx);
- if (want.isEmpty())
- continue;
- PackReverseIndex rev = src.getReverseIdx(ctx);
- DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
- for (ObjectIdWithOffset id : want) {
- int type = src.getObjectType(ctx, id.offset);
- RevObject obj = rw.lookupAny(id, type);
- if (obj.has(added))
- continue;
- pm.update(1);
- pw.addObject(obj);
- obj.add(added);
- src.representation(rep, id.offset, ctx, rev);
- if (rep.getFormat() != PACK_DELTA)
- continue;
- RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
- if (!base.has(added) && !base.has(isBase)) {
- baseObjects.add(base);
- base.add(isBase);
- }
- }
- }
- for (RevObject obj : baseObjects) {
- if (!obj.has(added)) {
- pm.update(1);
- pw.addObject(obj);
- obj.add(added);
- }
- }
- pm.endTask();
- }
- private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
- throws IOException {
- PackIndex srcIdx = src.getPackIndex(ctx);
- List<ObjectIdWithOffset> want = new BlockList<>(
- (int) srcIdx.getObjectCount());
- SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
- ObjectId id = ent.toObjectId();
- RevObject obj = rw.lookupOrNull(id);
- if (obj != null && (obj.has(added) || obj.has(isBase)))
- continue;
- for (ObjectIdSet e : exclude)
- if (e.contains(id))
- continue SCAN;
- want.add(new ObjectIdWithOffset(id, ent.getOffset()));
- }
- Collections.sort(want, (ObjectIdWithOffset a,
- ObjectIdWithOffset b) -> Long.signum(a.offset - b.offset));
- return want;
- }
- private static void writePack(DfsObjDatabase objdb,
- DfsPackDescription pack,
- PackWriter pw, ProgressMonitor pm) throws IOException {
- try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
- pw.writePack(pm, pm, out);
- pack.addFileExt(PACK);
- pack.setBlockSize(PACK, out.blockSize());
- }
- }
- private static void writeIndex(DfsObjDatabase objdb,
- DfsPackDescription pack,
- PackWriter pw) throws IOException {
- try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) {
- CountingOutputStream cnt = new CountingOutputStream(out);
- pw.writeIndex(cnt);
- pack.addFileExt(INDEX);
- pack.setFileSize(INDEX, cnt.getCount());
- pack.setBlockSize(INDEX, out.blockSize());
- pack.setIndexVersion(pw.getIndexVersion());
- }
- }
- private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack,
- ReftableCompactor compact) throws IOException {
- try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
- compact.setConfig(configureReftable(reftableConfig, out));
- compact.compact(out);
- pack.addFileExt(REFTABLE);
- pack.setReftableStats(compact.getStats());
- }
- }
- static ReftableConfig configureReftable(ReftableConfig cfg,
- DfsOutputStream out) {
- int bs = out.blockSize();
- if (bs > 0) {
- cfg = new ReftableConfig(cfg);
- cfg.setRefBlockSize(bs);
- cfg.setAlignBlocks(true);
- }
- return cfg;
- }
- private static class ObjectIdWithOffset extends ObjectId {
- final long offset;
- ObjectIdWithOffset(AnyObjectId id, long ofs) {
- super(id);
- offset = ofs;
- }
- }
- }