ReftableCompactor.java

  1. /*
  2.  * Copyright (C) 2017, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.internal.storage.reftable;

  11. import java.io.IOException;
  12. import java.io.OutputStream;
  13. import java.util.ArrayDeque;
  14. import java.util.ArrayList;
  15. import java.util.List;

  16. import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats;
  17. import org.eclipse.jgit.lib.PersonIdent;
  18. import org.eclipse.jgit.lib.ReflogEntry;

  19. /**
  20.  * Merges reftables and compacts them into a single output.
  21.  * <p>
  22.  * For a partial compaction callers should {@link #setIncludeDeletes(boolean)}
  23.  * to {@code true} to ensure the new reftable continues to use a delete marker
  24.  * to shadow any lower reftable that may have the reference present.
  25.  * <p>
  26.  * By default all log entries within the range defined by
  27.  * {@link #setReflogExpireMinUpdateIndex(long)} and {@link #setReflogExpireMaxUpdateIndex(long)} are
  28.  * copied, even if no references in the output file match the log records.
  29.  * Callers may truncate the log to a more recent time horizon with
  30.  * {@link #setReflogExpireOldestReflogTimeMillis(long)}, or disable the log altogether with
  31.  * {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}.
  32.  */
  33. public class ReftableCompactor {
  34.     private final ReftableWriter writer;
  35.     private final ArrayDeque<ReftableReader> tables = new ArrayDeque<>();

  36.     private boolean includeDeletes;
  37.     private long reflogExpireMinUpdateIndex = 0;
  38.     private long reflogExpireMaxUpdateIndex = Long.MAX_VALUE;
  39.     private long reflogExpireOldestReflogTimeMillis;
  40.     private Stats stats;

  41.     /**
  42.      * Creates a new compactor.
  43.      *
  44.      * @param out
  45.      *            stream to write the compacted tables to. Caller is responsible
  46.      *            for closing {@code out}.
  47.      */
  48.     public ReftableCompactor(OutputStream out) {
  49.         writer = new ReftableWriter(out);
  50.     }

  51.     /**
  52.      * Set configuration for the reftable.
  53.      *
  54.      * @param cfg
  55.      *            configuration for the reftable.
  56.      * @return {@code this}
  57.      */
  58.     public ReftableCompactor setConfig(ReftableConfig cfg) {
  59.         writer.setConfig(cfg);
  60.         return this;
  61.     }

  62.     /**
  63.      * Whether to include deletions in the output, which may be necessary for
  64.      * partial compaction.
  65.      *
  66.      * @param deletes
  67.      *            {@code true} to include deletions in the output, which may be
  68.      *            necessary for partial compaction.
  69.      * @return {@code this}
  70.      */
  71.     public ReftableCompactor setIncludeDeletes(boolean deletes) {
  72.         includeDeletes = deletes;
  73.         return this;
  74.     }

  75.     /**
  76.      * Set the minimum update index for log entries that appear in the compacted
  77.      * reftable.
  78.      *
  79.      * @param min
  80.      *            the minimum update index for log entries that appear in the
  81.      *            compacted reftable. This should be 1 higher than the prior
  82.      *            reftable's {@code maxUpdateIndex} if this table will be used
  83.      *            in a stack.
  84.      * @return {@code this}
  85.      */
  86.     public ReftableCompactor setReflogExpireMinUpdateIndex(long min) {
  87.         reflogExpireMinUpdateIndex = min;
  88.         return this;
  89.     }

  90.     /**
  91.      * Set the maximum update index for log entries that appear in the compacted
  92.      * reftable.
  93.      *
  94.      * @param max
  95.      *            the maximum update index for log entries that appear in the
  96.      *            compacted reftable. This should be at least 1 higher than the
  97.      *            prior reftable's {@code maxUpdateIndex} if this table will be
  98.      *            used in a stack.
  99.      * @return {@code this}
  100.      */
  101.     public ReftableCompactor setReflogExpireMaxUpdateIndex(long max) {
  102.         reflogExpireMaxUpdateIndex = max;
  103.         return this;
  104.     }

  105.     /**
  106.      * Set oldest reflog time to preserve.
  107.      *
  108.      * @param timeMillis
  109.      *            oldest log time to preserve. Entries whose timestamps are
  110.      *            {@code >= timeMillis} will be copied into the output file. Log
  111.      *            entries that predate {@code timeMillis} will be discarded.
  112.      *            Specified in Java standard milliseconds since the epoch.
  113.      * @return {@code this}
  114.      */
  115.     public ReftableCompactor setReflogExpireOldestReflogTimeMillis(long timeMillis) {
  116.         reflogExpireOldestReflogTimeMillis = timeMillis;
  117.         return this;
  118.     }

  119.     /**
  120.      * Add all of the tables, in the specified order.
  121.      *
  122.      * @param readers
  123.      *            tables to compact. Tables should be ordered oldest first/most
  124.      *            recent last so that the more recent tables can shadow the
  125.      *            older results. Caller is responsible for closing the readers.
  126.      * @throws java.io.IOException
  127.      *             update indexes of a reader cannot be accessed.
  128.      */
  129.     public void addAll(List<ReftableReader> readers) throws IOException {
  130.         for (ReftableReader r : readers) {
  131.             tables.add(r);
  132.         }
  133.     }

  134.     /**
  135.      * Write a compaction to {@code out}.
  136.      *
  137.      * @throws java.io.IOException
  138.      *             if tables cannot be read, or cannot be written.
  139.      */
  140.     public void compact() throws IOException {
  141.         MergedReftable mr = new MergedReftable(new ArrayList<>(tables));
  142.         mr.setIncludeDeletes(includeDeletes);

  143.         writer.setMaxUpdateIndex(mr.maxUpdateIndex());
  144.         writer.setMinUpdateIndex(mr.minUpdateIndex());

  145.         writer.begin();
  146.         mergeRefs(mr);
  147.         mergeLogs(mr);
  148.         writer.finish();
  149.         stats = writer.getStats();
  150.     }

  151.     /**
  152.      * Get statistics of the last written reftable.
  153.      *
  154.      * @return statistics of the last written reftable.
  155.      */
  156.     public Stats getStats() {
  157.         return stats;
  158.     }

  159.     private void mergeRefs(MergedReftable mr) throws IOException {
  160.         try (RefCursor rc = mr.allRefs()) {
  161.             while (rc.next()) {
  162.                 writer.writeRef(rc.getRef(), rc.getRef().getUpdateIndex());
  163.             }
  164.         }
  165.     }

  166.     private void mergeLogs(MergedReftable mr) throws IOException {
  167.         if (reflogExpireOldestReflogTimeMillis == Long.MAX_VALUE) {
  168.             return;
  169.         }

  170.         try (LogCursor lc = mr.allLogs()) {
  171.             while (lc.next()) {
  172.                 long updateIndex = lc.getUpdateIndex();
  173.                 if (updateIndex > reflogExpireMaxUpdateIndex || updateIndex < reflogExpireMinUpdateIndex) {
  174.                     continue;
  175.                 }

  176.                 String refName = lc.getRefName();
  177.                 ReflogEntry log = lc.getReflogEntry();
  178.                 if (log == null) {
  179.                     if (includeDeletes) {
  180.                         writer.deleteLog(refName, updateIndex);
  181.                     }
  182.                     continue;
  183.                 }

  184.                 PersonIdent who = log.getWho();
  185.                 if (who.getWhen().getTime() >= reflogExpireOldestReflogTimeMillis) {
  186.                     writer.writeLog(
  187.                             refName,
  188.                             updateIndex,
  189.                             who,
  190.                             log.getOldId(),
  191.                             log.getNewId(),
  192.                             log.getComment());
  193.                 }
  194.             }
  195.         }
  196.     }
  197. }