PackWriterBitmapPreparer.java

  1. /*
  2.  * Copyright (C) 2012, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.internal.storage.pack;

  11. import static org.eclipse.jgit.internal.storage.file.PackBitmapIndex.FLAG_REUSE;
  12. import static org.eclipse.jgit.revwalk.RevFlag.SEEN;

  13. import java.io.IOException;
  14. import java.util.ArrayList;
  15. import java.util.Collection;
  16. import java.util.Collections;
  17. import java.util.Comparator;
  18. import java.util.HashSet;
  19. import java.util.Iterator;
  20. import java.util.List;
  21. import java.util.Set;

  22. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  23. import org.eclipse.jgit.errors.MissingObjectException;
  24. import org.eclipse.jgit.internal.JGitText;
  25. import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter;
  26. import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
  27. import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap;
  28. import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
  29. import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder;
  30. import org.eclipse.jgit.internal.storage.file.PackBitmapIndexRemapper;
  31. import org.eclipse.jgit.lib.AnyObjectId;
  32. import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
  33. import org.eclipse.jgit.lib.Constants;
  34. import org.eclipse.jgit.lib.ObjectId;
  35. import org.eclipse.jgit.lib.ObjectReader;
  36. import org.eclipse.jgit.lib.ProgressMonitor;
  37. import org.eclipse.jgit.revwalk.BitmapWalker;
  38. import org.eclipse.jgit.revwalk.ObjectWalk;
  39. import org.eclipse.jgit.revwalk.RevCommit;
  40. import org.eclipse.jgit.revwalk.RevObject;
  41. import org.eclipse.jgit.revwalk.RevWalk;
  42. import org.eclipse.jgit.revwalk.filter.RevFilter;
  43. import org.eclipse.jgit.storage.pack.PackConfig;
  44. import org.eclipse.jgit.util.BlockList;
  45. import org.eclipse.jgit.util.SystemReader;

  46. import com.googlecode.javaewah.EWAHCompressedBitmap;

  47. /**
  48.  * Helper class for the {@link PackWriter} to select commits for which to build
  49.  * pack index bitmaps.
  50.  */
  51. class PackWriterBitmapPreparer {

  52.     private static final int DAY_IN_SECONDS = 24 * 60 * 60;

  53.     private static final int DISTANCE_THRESHOLD = 2000;

  54.     private static final Comparator<RevCommit> ORDER_BY_REVERSE_TIMESTAMP = (
  55.             RevCommit a, RevCommit b) -> Integer
  56.                     .signum(b.getCommitTime() - a.getCommitTime());

  57.     private final ObjectReader reader;
  58.     private final ProgressMonitor pm;
  59.     private final Set<? extends ObjectId> want;
  60.     private final PackBitmapIndexBuilder writeBitmaps;
  61.     private final BitmapIndexImpl commitBitmapIndex;
  62.     private final PackBitmapIndexRemapper bitmapRemapper;
  63.     private final BitmapIndexImpl bitmapIndex;

  64.     private final int contiguousCommitCount;
  65.     private final int recentCommitCount;
  66.     private final int recentCommitSpan;
  67.     private final int distantCommitSpan;
  68.     private final int excessiveBranchCount;
  69.     private final long inactiveBranchTimestamp;

  70.     PackWriterBitmapPreparer(ObjectReader reader,
  71.             PackBitmapIndexBuilder writeBitmaps, ProgressMonitor pm,
  72.             Set<? extends ObjectId> want, PackConfig config)
  73.                     throws IOException {
  74.         this.reader = reader;
  75.         this.writeBitmaps = writeBitmaps;
  76.         this.pm = pm;
  77.         this.want = want;
  78.         this.commitBitmapIndex = new BitmapIndexImpl(writeBitmaps);
  79.         this.bitmapRemapper = PackBitmapIndexRemapper.newPackBitmapIndex(
  80.                 reader.getBitmapIndex(), writeBitmaps);
  81.         this.bitmapIndex = new BitmapIndexImpl(bitmapRemapper);
  82.         this.contiguousCommitCount = config.getBitmapContiguousCommitCount();
  83.         this.recentCommitCount = config.getBitmapRecentCommitCount();
  84.         this.recentCommitSpan = config.getBitmapRecentCommitSpan();
  85.         this.distantCommitSpan = config.getBitmapDistantCommitSpan();
  86.         this.excessiveBranchCount = config.getBitmapExcessiveBranchCount();
  87.         long now = SystemReader.getInstance().getCurrentTime();
  88.         long ageInSeconds = config.getBitmapInactiveBranchAgeInDays()
  89.                 * DAY_IN_SECONDS;
  90.         this.inactiveBranchTimestamp = (now / 1000) - ageInSeconds;
  91.     }

  92.     /**
  93.      * Returns the commit objects for which bitmap indices should be built.
  94.      *
  95.      * @param expectedCommitCount
  96.      *            count of commits in the pack
  97.      * @param excludeFromBitmapSelection
  98.      *            commits that should be excluded from bitmap selection
  99.      * @return commit objects for which bitmap indices should be built
  100.      * @throws IncorrectObjectTypeException
  101.      *             if any of the processed objects is not a commit
  102.      * @throws IOException
  103.      *             on errors reading pack or index files
  104.      * @throws MissingObjectException
  105.      *             if an expected object is missing
  106.      */
  107.     Collection<BitmapCommit> selectCommits(int expectedCommitCount,
  108.             Set<? extends ObjectId> excludeFromBitmapSelection)
  109.             throws IncorrectObjectTypeException, IOException,
  110.             MissingObjectException {
  111.         /*
  112.          * Thinking of bitmap indices as a cache, if we find bitmaps at or at a
  113.          * close ancestor to 'old' and 'new' when calculating old..new, then all
  114.          * objects can be calculated with minimal graph walking. A distribution
  115.          * that favors creating bitmaps for the most recent commits maximizes
  116.          * the cache hits for clients that are close to HEAD, which is the
  117.          * majority of calculations performed.
  118.          */
  119.         try (RevWalk rw = new RevWalk(reader);
  120.                 RevWalk rw2 = new RevWalk(reader)) {
  121.             pm.beginTask(JGitText.get().selectingCommits,
  122.                     ProgressMonitor.UNKNOWN);
  123.             rw.setRetainBody(false);
  124.             CommitSelectionHelper selectionHelper = captureOldAndNewCommits(rw,
  125.                     expectedCommitCount, excludeFromBitmapSelection);
  126.             pm.endTask();

  127.             // Add reused bitmaps from the previous GC pack's bitmap indices.
  128.             // Currently they are always fully reused, even if their spans don't
  129.             // match this run's PackConfig values.
  130.             int newCommits = selectionHelper.getCommitCount();
  131.             BlockList<BitmapCommit> selections = new BlockList<>(
  132.                     selectionHelper.reusedCommits.size()
  133.                             + newCommits / recentCommitSpan + 1);
  134.             for (BitmapCommit reuse : selectionHelper.reusedCommits) {
  135.                 selections.add(reuse);
  136.             }

  137.             if (newCommits == 0) {
  138.                 for (AnyObjectId id : selectionHelper.newWants) {
  139.                     selections.add(new BitmapCommit(id, false, 0));
  140.                 }
  141.                 return selections;
  142.             }

  143.             pm.beginTask(JGitText.get().selectingCommits, newCommits);
  144.             int totalWants = want.size();
  145.             BitmapBuilder seen = commitBitmapIndex.newBitmapBuilder();
  146.             seen.or(selectionHelper.reusedCommitsBitmap);
  147.             rw2.setRetainBody(false);
  148.             rw2.setRevFilter(new NotInBitmapFilter(seen));

  149.             // For each branch, do a revwalk to enumerate its commits. Exclude
  150.             // both reused commits and any commits seen in a previous branch.
  151.             // Then iterate through all new commits from oldest to newest,
  152.             // selecting well-spaced commits in this branch.
  153.             for (RevCommit rc : selectionHelper.newWantsByNewest) {
  154.                 BitmapBuilder tipBitmap = commitBitmapIndex.newBitmapBuilder();
  155.                 rw2.markStart((RevCommit) rw2.peel(rw2.parseAny(rc)));
  156.                 RevCommit rc2;
  157.                 while ((rc2 = rw2.next()) != null) {
  158.                     tipBitmap.addObject(rc2, Constants.OBJ_COMMIT);
  159.                 }
  160.                 int cardinality = tipBitmap.cardinality();
  161.                 seen.or(tipBitmap);

  162.                 // Within this branch, keep ordered lists of commits
  163.                 // representing chains in its history, where each chain is a
  164.                 // "sub-branch". Ordering commits by these chains makes for
  165.                 // fewer differences between consecutive selected commits, which
  166.                 // in turn provides better compression/on the run-length
  167.                 // encoding of the XORs between them.
  168.                 List<List<BitmapCommit>> chains = new ArrayList<>();

  169.                 // Mark the current branch as inactive if its tip commit isn't
  170.                 // recent and there are an excessive number of branches, to
  171.                 // prevent memory bloat of computing too many bitmaps for stale
  172.                 // branches.
  173.                 boolean isActiveBranch = true;
  174.                 if (totalWants > excessiveBranchCount && !isRecentCommit(rc)) {
  175.                     isActiveBranch = false;
  176.                 }

  177.                 // Insert bitmaps at the offsets suggested by the
  178.                 // nextSelectionDistance() heuristic. Only reuse bitmaps created
  179.                 // for more distant commits.
  180.                 int index = -1;
  181.                 int nextIn = nextSpan(cardinality);
  182.                 int nextFlg = nextIn == distantCommitSpan
  183.                         ? PackBitmapIndex.FLAG_REUSE
  184.                         : 0;

  185.                 // For the current branch, iterate through all commits from
  186.                 // oldest to newest.
  187.                 for (RevCommit c : selectionHelper) {
  188.                     // Optimization: if we have found all the commits for this
  189.                     // branch, stop searching
  190.                     int distanceFromTip = cardinality - index - 1;
  191.                     if (distanceFromTip == 0) {
  192.                         break;
  193.                     }

  194.                     // Ignore commits that are not in this branch
  195.                     if (!tipBitmap.contains(c)) {
  196.                         continue;
  197.                     }

  198.                     index++;
  199.                     nextIn--;
  200.                     pm.update(1);

  201.                     // Always pick the items in wants, prefer merge commits.
  202.                     if (selectionHelper.newWants.remove(c)) {
  203.                         if (nextIn > 0) {
  204.                             nextFlg = 0;
  205.                         }
  206.                     } else {
  207.                         boolean stillInSpan = nextIn >= 0;
  208.                         boolean isMergeCommit = c.getParentCount() > 1;
  209.                         // Force selection if:
  210.                         // a) we have exhausted the window looking for merges
  211.                         // b) we are in the top commits of an active branch
  212.                         // c) we are at a branch tip
  213.                         boolean mustPick = (nextIn <= -recentCommitSpan)
  214.                                 || (isActiveBranch
  215.                                         && (distanceFromTip <= contiguousCommitCount))
  216.                                 || (distanceFromTip == 1); // most recent commit
  217.                         if (!mustPick && (stillInSpan || !isMergeCommit)) {
  218.                             continue;
  219.                         }
  220.                     }

  221.                     // This commit is selected.
  222.                     // Calculate where to look for the next one.
  223.                     int flags = nextFlg;
  224.                     int currDist = distanceFromTip;
  225.                     nextIn = nextSpan(distanceFromTip);
  226.                     nextFlg = nextIn == distantCommitSpan
  227.                             ? PackBitmapIndex.FLAG_REUSE
  228.                             : 0;

  229.                     // Create the commit bitmap for the current commit
  230.                     BitmapBuilder bitmap = commitBitmapIndex.newBitmapBuilder();
  231.                     rw.reset();
  232.                     rw.markStart(c);
  233.                     rw.setRevFilter(new AddUnseenToBitmapFilter(
  234.                             selectionHelper.reusedCommitsBitmap, bitmap));
  235.                     while (rw.next() != null) {
  236.                         // The filter adds the reachable commits to bitmap.
  237.                     }

  238.                     // Sort the commits by independent chains in this branch's
  239.                     // history, yielding better compression when building
  240.                     // bitmaps.
  241.                     List<BitmapCommit> longestAncestorChain = null;
  242.                     for (List<BitmapCommit> chain : chains) {
  243.                         BitmapCommit mostRecentCommit = chain
  244.                                 .get(chain.size() - 1);
  245.                         if (bitmap.contains(mostRecentCommit)) {
  246.                             if (longestAncestorChain == null
  247.                                     || longestAncestorChain.size() < chain
  248.                                             .size()) {
  249.                                 longestAncestorChain = chain;
  250.                             }
  251.                         }
  252.                     }

  253.                     if (longestAncestorChain == null) {
  254.                         longestAncestorChain = new ArrayList<>();
  255.                         chains.add(longestAncestorChain);
  256.                     }

  257.                     // The commit bc should reuse bitmap walker from its close
  258.                     // ancestor. And the bitmap of bc should only be added to
  259.                     // PackBitmapIndexBuilder when it's an old enough
  260.                     // commit,i.e. distance from tip should be greater than
  261.                     // DISTANCE_THRESHOLD, to save memory.
  262.                     BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags)
  263.                             .setAddToIndex(currDist >= DISTANCE_THRESHOLD)
  264.                             .setReuseWalker(!longestAncestorChain.isEmpty())
  265.                             .build();
  266.                     longestAncestorChain.add(bc);
  267.                     writeBitmaps.addBitmap(c, bitmap, 0);
  268.                 }

  269.                 for (List<BitmapCommit> chain : chains) {
  270.                     selections.addAll(chain);
  271.                 }
  272.             }

  273.             // Add the remaining peeledWant
  274.             for (AnyObjectId remainingWant : selectionHelper.newWants) {
  275.                 selections.add(new BitmapCommit(remainingWant, false, 0));
  276.             }
  277.             writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps.

  278.             pm.endTask();
  279.             return selections;
  280.         }
  281.     }

  282.     private boolean isRecentCommit(RevCommit revCommit) {
  283.         return revCommit.getCommitTime() > inactiveBranchTimestamp;
  284.     }

  285.     /**
  286.      * A RevFilter that excludes the commits named in a bitmap from the walk.
  287.      * <p>
  288.      * If a commit is in {@code bitmap} then that commit is not emitted by the
  289.      * walk and its parents are marked as SEEN so the walk can skip them.  The
  290.      * bitmaps passed in have the property that the parents of any commit in
  291.      * {@code bitmap} are also in {@code bitmap}, so marking the parents as
  292.      * SEEN speeds up the RevWalk by saving it from walking down blind alleys
  293.      * and does not change the commits emitted.
  294.      */
  295.     private static class NotInBitmapFilter extends RevFilter {
  296.         private final BitmapBuilder bitmap;

  297.         NotInBitmapFilter(BitmapBuilder bitmap) {
  298.             this.bitmap = bitmap;
  299.         }

  300.         @Override
  301.         public final boolean include(RevWalk rw, RevCommit c) {
  302.             if (!bitmap.contains(c)) {
  303.                 return true;
  304.             }
  305.             for (RevCommit p : c.getParents()) {
  306.                 p.add(SEEN);
  307.             }
  308.             return false;
  309.         }

  310.         @Override
  311.         public final NotInBitmapFilter clone() {
  312.             throw new UnsupportedOperationException();
  313.         }

  314.         @Override
  315.         public final boolean requiresCommitBody() {
  316.             return false;
  317.         }
  318.     }

  319.     /**
  320.      * Records which of the {@code wants} can be found in the previous GC pack's
  321.      * bitmap indices and which are new.
  322.      *
  323.      * @param rw
  324.      *            a {@link RevWalk} to find reachable objects in this repository
  325.      * @param expectedCommitCount
  326.      *            expected count of commits. The actual count may be less due to
  327.      *            unreachable garbage.
  328.      * @param excludeFromBitmapSelection
  329.      *            commits that should be excluded from bitmap selection
  330.      * @return a {@link CommitSelectionHelper} capturing which commits are
  331.      *         covered by a previous pack's bitmaps and which new commits need
  332.      *         bitmap coverage
  333.      * @throws IncorrectObjectTypeException
  334.      *             if any of the processed objects is not a commit
  335.      * @throws IOException
  336.      *             on errors reading pack or index files
  337.      * @throws MissingObjectException
  338.      *             if an expected object is missing
  339.      */
  340.     private CommitSelectionHelper captureOldAndNewCommits(RevWalk rw,
  341.             int expectedCommitCount,
  342.             Set<? extends ObjectId> excludeFromBitmapSelection)
  343.             throws IncorrectObjectTypeException, IOException,
  344.             MissingObjectException {
  345.         // Track bitmaps and commits from the previous GC pack bitmap indices.
  346.         BitmapBuilder reuse = commitBitmapIndex.newBitmapBuilder();
  347.         List<BitmapCommit> reuseCommits = new ArrayList<>();
  348.         for (PackBitmapIndexRemapper.Entry entry : bitmapRemapper) {
  349.             // More recent commits did not have the reuse flag set, so skip them
  350.             if ((entry.getFlags() & FLAG_REUSE) != FLAG_REUSE) {
  351.                 continue;
  352.             }
  353.             RevObject ro = rw.peel(rw.parseAny(entry));
  354.             if (!(ro instanceof RevCommit)) {
  355.                 continue;
  356.             }

  357.             RevCommit rc = (RevCommit) ro;
  358.             reuseCommits.add(new BitmapCommit(rc, false, entry.getFlags()));
  359.             if (!reuse.contains(rc)) {
  360.                 EWAHCompressedBitmap bitmap = bitmapRemapper.ofObjectType(
  361.                         bitmapRemapper.getBitmap(rc), Constants.OBJ_COMMIT);
  362.                 reuse.or(new CompressedBitmap(bitmap, commitBitmapIndex));
  363.             }
  364.         }

  365.         // Add branch tips that are not represented in a previous pack's bitmap
  366.         // indices. Set up a RevWalk to find new commits not in the old packs.
  367.         List<RevCommit> newWantsByNewest = new ArrayList<>(want.size());
  368.         Set<RevCommit> newWants = new HashSet<>(want.size());
  369.         for (AnyObjectId objectId : want) {
  370.             RevObject ro = rw.peel(rw.parseAny(objectId));
  371.             if (!(ro instanceof RevCommit) || reuse.contains(ro)
  372.                     || excludeFromBitmapSelection.contains(ro)) {
  373.                 continue;
  374.             }

  375.             RevCommit rc = (RevCommit) ro;
  376.             rw.markStart(rc);
  377.             newWants.add(rc);
  378.             newWantsByNewest.add(rc);
  379.         }

  380.         // Create a list of commits in reverse order (older to newer) that are
  381.         // not in the previous bitmap indices and are reachable.
  382.         rw.setRevFilter(new NotInBitmapFilter(reuse));
  383.         RevCommit[] commits = new RevCommit[expectedCommitCount];
  384.         int pos = commits.length;
  385.         RevCommit rc;
  386.         while ((rc = rw.next()) != null && pos > 0) {
  387.             commits[--pos] = rc;
  388.             pm.update(1);
  389.         }

  390.         // Sort the new wants by reverse commit time.
  391.         Collections.sort(newWantsByNewest, ORDER_BY_REVERSE_TIMESTAMP);
  392.         return new CommitSelectionHelper(newWants, commits, pos,
  393.                 newWantsByNewest, reuse, reuseCommits);
  394.     }

  395.     /*-
  396.      * Returns the desired distance to the next bitmap based on the distance
  397.      * from the tip commit. Only differentiates recent from distant spans,
  398.      * selectCommits() handles the contiguous commits at the tip for active
  399.      * or inactive branches.
  400.      *
  401.      * A graph of this function looks like this, where
  402.      * the X axis is the distance from the tip commit and the Y axis is the
  403.      * bitmap selection distance.
  404.      *
  405.      * 5000                ____...
  406.      *                    /
  407.      *                  /
  408.      *                /
  409.      *              /
  410.      *  100  _____/
  411.      *       0  20100  25000
  412.      *
  413.      * Linear scaling between 20100 and 25000 prevents spans >100 for distances
  414.      * <20000 (otherwise, a span of 5000 would be returned for a distance of
  415.      * 21000, and the range 16000-20000 would have no selections).
  416.      */
  417.     int nextSpan(int distanceFromTip) {
  418.         if (distanceFromTip < 0) {
  419.             throw new IllegalArgumentException();
  420.         }

  421.         // Commits more toward the start will have more bitmaps.
  422.         if (distanceFromTip <= recentCommitCount) {
  423.             return recentCommitSpan;
  424.         }

  425.         int next = Math.min(distanceFromTip - recentCommitCount,
  426.                 distantCommitSpan);
  427.         return Math.max(next, recentCommitSpan);
  428.     }

  429.     BitmapWalker newBitmapWalker() {
  430.         return new BitmapWalker(
  431.                 new ObjectWalk(reader), bitmapIndex, null);
  432.     }

  433.     /**
  434.      * Container for state used in the first phase of selecting commits, which
  435.      * walks all of the reachable commits via the branch tips that are not
  436.      * covered by a previous pack's bitmaps ({@code newWants}) and stores them
  437.      * in {@code newCommitsByOldest}. {@code newCommitsByOldest} is initialized
  438.      * with an expected size of all commits, but may be smaller if some commits
  439.      * are unreachable and/or some commits are covered by a previous pack's
  440.      * bitmaps. {@code commitStartPos} will contain a positive offset to either
  441.      * the root commit or the oldest commit not covered by previous bitmaps.
  442.      */
  443.     private static final class CommitSelectionHelper implements Iterable<RevCommit> {
  444.         final Set<? extends ObjectId> newWants;

  445.         final List<RevCommit> newWantsByNewest;
  446.         final BitmapBuilder reusedCommitsBitmap;

  447.         final List<BitmapCommit> reusedCommits;

  448.         final RevCommit[] newCommitsByOldest;

  449.         final int newCommitStartPos;

  450.         CommitSelectionHelper(Set<? extends ObjectId> newWants,
  451.                 RevCommit[] commitsByOldest, int commitStartPos,
  452.                 List<RevCommit> newWantsByNewest,
  453.                 BitmapBuilder reusedCommitsBitmap,
  454.                 List<BitmapCommit> reuse) {
  455.             this.newWants = newWants;
  456.             this.newCommitsByOldest = commitsByOldest;
  457.             this.newCommitStartPos = commitStartPos;
  458.             this.newWantsByNewest = newWantsByNewest;
  459.             this.reusedCommitsBitmap = reusedCommitsBitmap;
  460.             this.reusedCommits = reuse;
  461.         }

  462.         @Override
  463.         public Iterator<RevCommit> iterator() {
  464.             // Member variables referenced by this iterator will have synthetic
  465.             // accessors generated for them if they are made private.
  466.             return new Iterator<RevCommit>() {
  467.                 int pos = newCommitStartPos;

  468.                 @Override
  469.                 public boolean hasNext() {
  470.                     return pos < newCommitsByOldest.length;
  471.                 }

  472.                 @Override
  473.                 public RevCommit next() {
  474.                     return newCommitsByOldest[pos++];
  475.                 }

  476.                 @Override
  477.                 public void remove() {
  478.                     throw new UnsupportedOperationException();
  479.                 }
  480.             };
  481.         }

  482.         int getCommitCount() {
  483.             return newCommitsByOldest.length - newCommitStartPos;
  484.         }
  485.     }
  486. }