DfsPackCompactor.java

  1. /*
  2.  * Copyright (C) 2011, Google Inc.
  3.  * and other copyright owners as documented in the project's IP log.
  4.  *
  5.  * This program and the accompanying materials are made available
  6.  * under the terms of the Eclipse Distribution License v1.0 which
  7.  * accompanies this distribution, is reproduced below, and is
  8.  * available at http://www.eclipse.org/org/documents/edl-v10.php
  9.  *
  10.  * All rights reserved.
  11.  *
  12.  * Redistribution and use in source and binary forms, with or
  13.  * without modification, are permitted provided that the following
  14.  * conditions are met:
  15.  *
  16.  * - Redistributions of source code must retain the above copyright
  17.  *   notice, this list of conditions and the following disclaimer.
  18.  *
  19.  * - Redistributions in binary form must reproduce the above
  20.  *   copyright notice, this list of conditions and the following
  21.  *   disclaimer in the documentation and/or other materials provided
  22.  *   with the distribution.
  23.  *
  24.  * - Neither the name of the Eclipse Foundation, Inc. nor the
  25.  *   names of its contributors may be used to endorse or promote
  26.  *   products derived from this software without specific prior
  27.  *   written permission.
  28.  *
  29.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30.  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31.  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33.  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34.  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35.  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37.  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38.  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39.  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40.  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41.  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42.  */

  43. package org.eclipse.jgit.internal.storage.dfs;

  44. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
  45. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
  46. import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
  47. import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
  48. import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
  49. import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;

  50. import java.io.IOException;
  51. import java.util.ArrayList;
  52. import java.util.Collection;
  53. import java.util.Collections;
  54. import java.util.Comparator;
  55. import java.util.HashSet;
  56. import java.util.Iterator;
  57. import java.util.List;
  58. import java.util.Set;

  59. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  60. import org.eclipse.jgit.internal.JGitText;
  61. import org.eclipse.jgit.internal.storage.file.PackIndex;
  62. import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
  63. import org.eclipse.jgit.internal.storage.pack.PackWriter;
  64. import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
  65. import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
  66. import org.eclipse.jgit.lib.AnyObjectId;
  67. import org.eclipse.jgit.lib.NullProgressMonitor;
  68. import org.eclipse.jgit.lib.ObjectId;
  69. import org.eclipse.jgit.lib.ObjectIdSet;
  70. import org.eclipse.jgit.lib.ProgressMonitor;
  71. import org.eclipse.jgit.revwalk.RevFlag;
  72. import org.eclipse.jgit.revwalk.RevObject;
  73. import org.eclipse.jgit.revwalk.RevWalk;
  74. import org.eclipse.jgit.storage.pack.PackConfig;
  75. import org.eclipse.jgit.storage.pack.PackStatistics;
  76. import org.eclipse.jgit.util.BlockList;
  77. import org.eclipse.jgit.util.io.CountingOutputStream;

  78. /**
  79.  * Combine several pack files into one pack.
  80.  * <p>
  81.  * The compactor combines several pack files together by including all objects
  82.  * contained in each pack file into the same output pack. If an object appears
  83.  * multiple times, it is only included once in the result. Because the new pack
  84.  * is constructed by enumerating the indexes of the source packs, it is quicker
  85.  * than doing a full repack of the repository, however the result is not nearly
  86.  * as space efficient as new delta compression is disabled.
  87.  * <p>
  88.  * This method is suitable for quickly combining several packs together after
  89.  * receiving a number of small fetch or push operations into a repository,
  90.  * allowing the system to maintain reasonable read performance without expending
  91.  * a lot of time repacking the entire repository.
  92.  */
  93. public class DfsPackCompactor {
  94.     private final DfsRepository repo;
  95.     private final List<DfsPackFile> srcPacks;
  96.     private final List<DfsReftable> srcReftables;
  97.     private final List<ObjectIdSet> exclude;

  98.     private PackStatistics newStats;
  99.     private DfsPackDescription outDesc;

  100.     private int autoAddSize;
  101.     private ReftableConfig reftableConfig;

  102.     private RevWalk rw;
  103.     private RevFlag added;
  104.     private RevFlag isBase;

  105.     /**
  106.      * Initialize a pack compactor.
  107.      *
  108.      * @param repository
  109.      *            repository objects to be packed will be read from.
  110.      */
  111.     public DfsPackCompactor(DfsRepository repository) {
  112.         repo = repository;
  113.         autoAddSize = 5 * 1024 * 1024; // 5 MiB
  114.         srcPacks = new ArrayList<>();
  115.         srcReftables = new ArrayList<>();
  116.         exclude = new ArrayList<>(4);
  117.     }

  118.     /**
  119.      * Set configuration to write a reftable.
  120.      *
  121.      * @param cfg
  122.      *            configuration to write a reftable. Reftable compacting is
  123.      *            disabled (default) when {@code cfg} is {@code null}.
  124.      * @return {@code this}
  125.      */
  126.     public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
  127.         reftableConfig = cfg;
  128.         return this;
  129.     }

  130.     /**
  131.      * Add a pack to be compacted.
  132.      * <p>
  133.      * All of the objects in this pack will be copied into the resulting pack.
  134.      * The resulting pack will order objects according to the source pack's own
  135.      * description ordering (which is based on creation date), and then by the
  136.      * order the objects appear in the source pack.
  137.      *
  138.      * @param pack
  139.      *            a pack to combine into the resulting pack.
  140.      * @return {@code this}
  141.      */
  142.     public DfsPackCompactor add(DfsPackFile pack) {
  143.         srcPacks.add(pack);
  144.         return this;
  145.     }

  146.     /**
  147.      * Add a reftable to be compacted.
  148.      *
  149.      * @param table
  150.      *            a reftable to combine.
  151.      * @return {@code this}
  152.      */
  153.     public DfsPackCompactor add(DfsReftable table) {
  154.         srcReftables.add(table);
  155.         return this;
  156.     }

  157.     /**
  158.      * Automatically select pack and reftables to be included, and add them.
  159.      * <p>
  160.      * Packs are selected based on size, smaller packs get included while bigger
  161.      * ones are omitted.
  162.      *
  163.      * @return {@code this}
  164.      * @throws java.io.IOException
  165.      *             existing packs cannot be read.
  166.      */
  167.     public DfsPackCompactor autoAdd() throws IOException {
  168.         DfsObjDatabase objdb = repo.getObjectDatabase();
  169.         for (DfsPackFile pack : objdb.getPacks()) {
  170.             DfsPackDescription d = pack.getPackDescription();
  171.             if (d.getFileSize(PACK) < autoAddSize)
  172.                 add(pack);
  173.             else
  174.                 exclude(pack);
  175.         }

  176.         if (reftableConfig != null) {
  177.             for (DfsReftable table : objdb.getReftables()) {
  178.                 DfsPackDescription d = table.getPackDescription();
  179.                 if (d.getPackSource() != GC
  180.                         && d.getFileSize(REFTABLE) < autoAddSize) {
  181.                     add(table);
  182.                 }
  183.             }
  184.         }
  185.         return this;
  186.     }

  187.     /**
  188.      * Exclude objects from the compacted pack.
  189.      *
  190.      * @param set
  191.      *            objects to not include.
  192.      * @return {@code this}.
  193.      */
  194.     public DfsPackCompactor exclude(ObjectIdSet set) {
  195.         exclude.add(set);
  196.         return this;
  197.     }

  198.     /**
  199.      * Exclude objects from the compacted pack.
  200.      *
  201.      * @param pack
  202.      *            objects to not include.
  203.      * @return {@code this}.
  204.      * @throws java.io.IOException
  205.      *             pack index cannot be loaded.
  206.      */
  207.     public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
  208.         final PackIndex idx;
  209.         try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
  210.             idx = pack.getPackIndex(ctx);
  211.         }
  212.         return exclude(idx);
  213.     }

  214.     /**
  215.      * Compact the pack files together.
  216.      *
  217.      * @param pm
  218.      *            progress monitor to receive updates on as packing may take a
  219.      *            while, depending on the size of the repository.
  220.      * @throws java.io.IOException
  221.      *             the packs cannot be compacted.
  222.      */
  223.     public void compact(ProgressMonitor pm) throws IOException {
  224.         if (pm == null) {
  225.             pm = NullProgressMonitor.INSTANCE;
  226.         }

  227.         DfsObjDatabase objdb = repo.getObjectDatabase();
  228.         try (DfsReader ctx = objdb.newReader()) {
  229.             if (reftableConfig != null && !srcReftables.isEmpty()) {
  230.                 compactReftables(ctx);
  231.             }
  232.             compactPacks(ctx, pm);

  233.             List<DfsPackDescription> commit = getNewPacks();
  234.             Collection<DfsPackDescription> remove = toPrune();
  235.             if (!commit.isEmpty() || !remove.isEmpty()) {
  236.                 objdb.commitPack(commit, remove);
  237.             }
  238.         } finally {
  239.             rw = null;
  240.         }
  241.     }

  242.     private void compactPacks(DfsReader ctx, ProgressMonitor pm)
  243.             throws IOException, IncorrectObjectTypeException {
  244.         DfsObjDatabase objdb = repo.getObjectDatabase();
  245.         PackConfig pc = new PackConfig(repo);
  246.         pc.setIndexVersion(2);
  247.         pc.setDeltaCompress(false);
  248.         pc.setReuseDeltas(true);
  249.         pc.setReuseObjects(true);

  250.         try (PackWriter pw = new PackWriter(pc, ctx)) {
  251.             pw.setDeltaBaseAsOffset(true);
  252.             pw.setReuseDeltaCommits(false);

  253.             addObjectsToPack(pw, ctx, pm);
  254.             if (pw.getObjectCount() == 0) {
  255.                 return;
  256.             }

  257.             boolean rollback = true;
  258.             initOutDesc(objdb);
  259.             try {
  260.                 writePack(objdb, outDesc, pw, pm);
  261.                 writeIndex(objdb, outDesc, pw);

  262.                 PackStatistics stats = pw.getStatistics();

  263.                 outDesc.setPackStats(stats);
  264.                 newStats = stats;
  265.                 rollback = false;
  266.             } finally {
  267.                 if (rollback) {
  268.                     objdb.rollbackPack(Collections.singletonList(outDesc));
  269.                 }
  270.             }
  271.         }
  272.     }

  273.     private long estimatePackSize() {
  274.         // Every pack file contains 12 bytes of header and 20 bytes of trailer.
  275.         // Include the final pack file header and trailer size here and ignore
  276.         // the same from individual pack files.
  277.         long size = 32;
  278.         for (DfsPackFile pack : srcPacks) {
  279.             size += pack.getPackDescription().getFileSize(PACK) - 32;
  280.         }
  281.         return size;
  282.     }

  283.     private void compactReftables(DfsReader ctx) throws IOException {
  284.         DfsObjDatabase objdb = repo.getObjectDatabase();
  285.         Collections.sort(srcReftables, objdb.reftableComparator());

  286.         try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) {
  287.             initOutDesc(objdb);
  288.             ReftableCompactor compact = new ReftableCompactor();
  289.             compact.addAll(stack.readers());
  290.             compact.setIncludeDeletes(true);
  291.             writeReftable(objdb, outDesc, compact);
  292.         }
  293.     }

  294.     private void initOutDesc(DfsObjDatabase objdb) throws IOException {
  295.         if (outDesc == null) {
  296.             outDesc = objdb.newPack(COMPACT, estimatePackSize());
  297.         }
  298.     }

  299.     /**
  300.      * Get all of the source packs that fed into this compaction.
  301.      *
  302.      * @return all of the source packs that fed into this compaction.
  303.      */
  304.     public Collection<DfsPackDescription> getSourcePacks() {
  305.         Set<DfsPackDescription> src = new HashSet<>();
  306.         for (DfsPackFile pack : srcPacks) {
  307.             src.add(pack.getPackDescription());
  308.         }
  309.         for (DfsReftable table : srcReftables) {
  310.             src.add(table.getPackDescription());
  311.         }
  312.         return src;
  313.     }

  314.     /**
  315.      * Get new packs created by this compaction.
  316.      *
  317.      * @return new packs created by this compaction.
  318.      */
  319.     public List<DfsPackDescription> getNewPacks() {
  320.         return outDesc != null
  321.                 ? Collections.singletonList(outDesc)
  322.                 : Collections.emptyList();
  323.     }

  324.     /**
  325.      * Get statistics corresponding to the {@link #getNewPacks()}.
  326.      * May be null if statistics are not available.
  327.      *
  328.      * @return statistics corresponding to the {@link #getNewPacks()}.
  329.      *
  330.      */
  331.     public List<PackStatistics> getNewPackStatistics() {
  332.         return outDesc != null
  333.                 ? Collections.singletonList(newStats)
  334.                 : Collections.emptyList();
  335.     }

  336.     private Collection<DfsPackDescription> toPrune() {
  337.         Set<DfsPackDescription> packs = new HashSet<>();
  338.         for (DfsPackFile pack : srcPacks) {
  339.             packs.add(pack.getPackDescription());
  340.         }

  341.         Set<DfsPackDescription> reftables = new HashSet<>();
  342.         for (DfsReftable table : srcReftables) {
  343.             reftables.add(table.getPackDescription());
  344.         }

  345.         for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
  346.             DfsPackDescription d = i.next();
  347.             if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
  348.                 i.remove();
  349.             }
  350.         }

  351.         for (Iterator<DfsPackDescription> i = reftables.iterator();
  352.                 i.hasNext();) {
  353.             DfsPackDescription d = i.next();
  354.             if (d.hasFileExt(PACK) && !packs.contains(d)) {
  355.                 i.remove();
  356.             }
  357.         }

  358.         Set<DfsPackDescription> toPrune = new HashSet<>();
  359.         toPrune.addAll(packs);
  360.         toPrune.addAll(reftables);
  361.         return toPrune;
  362.     }

  363.     private void addObjectsToPack(PackWriter pw, DfsReader ctx,
  364.             ProgressMonitor pm) throws IOException,
  365.             IncorrectObjectTypeException {
  366.         // Sort packs by description ordering, this places newer packs before
  367.         // older packs, allowing the PackWriter to be handed newer objects
  368.         // first and older objects last.
  369.         Collections.sort(
  370.                 srcPacks,
  371.                 Comparator.comparing(
  372.                         DfsPackFile::getPackDescription,
  373.                         DfsPackDescription.objectLookupComparator()));

  374.         rw = new RevWalk(ctx);
  375.         added = rw.newFlag("ADDED"); //$NON-NLS-1$
  376.         isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
  377.         List<RevObject> baseObjects = new BlockList<>();

  378.         pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
  379.         for (DfsPackFile src : srcPacks) {
  380.             List<ObjectIdWithOffset> want = toInclude(src, ctx);
  381.             if (want.isEmpty())
  382.                 continue;

  383.             PackReverseIndex rev = src.getReverseIdx(ctx);
  384.             DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
  385.             for (ObjectIdWithOffset id : want) {
  386.                 int type = src.getObjectType(ctx, id.offset);
  387.                 RevObject obj = rw.lookupAny(id, type);
  388.                 if (obj.has(added))
  389.                     continue;

  390.                 pm.update(1);
  391.                 pw.addObject(obj);
  392.                 obj.add(added);

  393.                 src.representation(rep, id.offset, ctx, rev);
  394.                 if (rep.getFormat() != PACK_DELTA)
  395.                     continue;

  396.                 RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
  397.                 if (!base.has(added) && !base.has(isBase)) {
  398.                     baseObjects.add(base);
  399.                     base.add(isBase);
  400.                 }
  401.             }
  402.         }
  403.         for (RevObject obj : baseObjects) {
  404.             if (!obj.has(added)) {
  405.                 pm.update(1);
  406.                 pw.addObject(obj);
  407.                 obj.add(added);
  408.             }
  409.         }
  410.         pm.endTask();
  411.     }

  412.     private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
  413.             throws IOException {
  414.         PackIndex srcIdx = src.getPackIndex(ctx);
  415.         List<ObjectIdWithOffset> want = new BlockList<>(
  416.                 (int) srcIdx.getObjectCount());
  417.         SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
  418.             ObjectId id = ent.toObjectId();
  419.             RevObject obj = rw.lookupOrNull(id);
  420.             if (obj != null && (obj.has(added) || obj.has(isBase)))
  421.                 continue;
  422.             for (ObjectIdSet e : exclude)
  423.                 if (e.contains(id))
  424.                     continue SCAN;
  425.             want.add(new ObjectIdWithOffset(id, ent.getOffset()));
  426.         }
  427.         Collections.sort(want, (ObjectIdWithOffset a,
  428.                 ObjectIdWithOffset b) -> Long.signum(a.offset - b.offset));
  429.         return want;
  430.     }

  431.     private static void writePack(DfsObjDatabase objdb,
  432.             DfsPackDescription pack,
  433.             PackWriter pw, ProgressMonitor pm) throws IOException {
  434.         try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
  435.             pw.writePack(pm, pm, out);
  436.             pack.addFileExt(PACK);
  437.             pack.setBlockSize(PACK, out.blockSize());
  438.         }
  439.     }

  440.     private static void writeIndex(DfsObjDatabase objdb,
  441.             DfsPackDescription pack,
  442.             PackWriter pw) throws IOException {
  443.         try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) {
  444.             CountingOutputStream cnt = new CountingOutputStream(out);
  445.             pw.writeIndex(cnt);
  446.             pack.addFileExt(INDEX);
  447.             pack.setFileSize(INDEX, cnt.getCount());
  448.             pack.setBlockSize(INDEX, out.blockSize());
  449.             pack.setIndexVersion(pw.getIndexVersion());
  450.         }
  451.     }

  452.     private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack,
  453.             ReftableCompactor compact) throws IOException {
  454.         try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
  455.             compact.setConfig(configureReftable(reftableConfig, out));
  456.             compact.compact(out);
  457.             pack.addFileExt(REFTABLE);
  458.             pack.setReftableStats(compact.getStats());
  459.         }
  460.     }

  461.     static ReftableConfig configureReftable(ReftableConfig cfg,
  462.             DfsOutputStream out) {
  463.         int bs = out.blockSize();
  464.         if (bs > 0) {
  465.             cfg = new ReftableConfig(cfg);
  466.             cfg.setRefBlockSize(bs);
  467.             cfg.setAlignBlocks(true);
  468.         }
  469.         return cfg;
  470.     }

  471.     private static class ObjectIdWithOffset extends ObjectId {
  472.         final long offset;

  473.         ObjectIdWithOffset(AnyObjectId id, long ofs) {
  474.             super(id);
  475.             offset = ofs;
  476.         }
  477.     }
  478. }