ReftableCompactor.java
/*
* Copyright (C) 2017, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.reftable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.ReflogEntry;
/**
* Merges reftables and compacts them into a single output.
* <p>
* For a partial compaction callers should {@link #setIncludeDeletes(boolean)}
* to {@code true} to ensure the new reftable continues to use a delete marker
* to shadow any lower reftable that may have the reference present.
* <p>
* By default all log entries within the range defined by
* {@link #setReflogExpireMinUpdateIndex(long)} and {@link #setReflogExpireMaxUpdateIndex(long)} are
* copied, even if no references in the output file match the log records.
* Callers may truncate the log to a more recent time horizon with
* {@link #setReflogExpireOldestReflogTimeMillis(long)}, or disable the log altogether with
* {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}.
*/
public class ReftableCompactor {
private final ReftableWriter writer;
private final ArrayDeque<ReftableReader> tables = new ArrayDeque<>();
private boolean includeDeletes;
private long reflogExpireMinUpdateIndex = 0;
private long reflogExpireMaxUpdateIndex = Long.MAX_VALUE;
private long reflogExpireOldestReflogTimeMillis;
private Stats stats;
/**
* Creates a new compactor.
*
* @param out
* stream to write the compacted tables to. Caller is responsible
* for closing {@code out}.
*/
public ReftableCompactor(OutputStream out) {
writer = new ReftableWriter(out);
}
/**
* Set configuration for the reftable.
*
* @param cfg
* configuration for the reftable.
* @return {@code this}
*/
public ReftableCompactor setConfig(ReftableConfig cfg) {
writer.setConfig(cfg);
return this;
}
/**
* Whether to include deletions in the output, which may be necessary for
* partial compaction.
*
* @param deletes
* {@code true} to include deletions in the output, which may be
* necessary for partial compaction.
* @return {@code this}
*/
public ReftableCompactor setIncludeDeletes(boolean deletes) {
includeDeletes = deletes;
return this;
}
/**
* Set the minimum update index for log entries that appear in the compacted
* reftable.
*
* @param min
* the minimum update index for log entries that appear in the
* compacted reftable. This should be 1 higher than the prior
* reftable's {@code maxUpdateIndex} if this table will be used
* in a stack.
* @return {@code this}
*/
public ReftableCompactor setReflogExpireMinUpdateIndex(long min) {
reflogExpireMinUpdateIndex = min;
return this;
}
/**
* Set the maximum update index for log entries that appear in the compacted
* reftable.
*
* @param max
* the maximum update index for log entries that appear in the
* compacted reftable. This should be at least 1 higher than the
* prior reftable's {@code maxUpdateIndex} if this table will be
* used in a stack.
* @return {@code this}
*/
public ReftableCompactor setReflogExpireMaxUpdateIndex(long max) {
reflogExpireMaxUpdateIndex = max;
return this;
}
/**
* Set oldest reflog time to preserve.
*
* @param timeMillis
* oldest log time to preserve. Entries whose timestamps are
* {@code >= timeMillis} will be copied into the output file. Log
* entries that predate {@code timeMillis} will be discarded.
* Specified in Java standard milliseconds since the epoch.
* @return {@code this}
*/
public ReftableCompactor setReflogExpireOldestReflogTimeMillis(long timeMillis) {
reflogExpireOldestReflogTimeMillis = timeMillis;
return this;
}
/**
* Add all of the tables, in the specified order.
*
* @param readers
* tables to compact. Tables should be ordered oldest first/most
* recent last so that the more recent tables can shadow the
* older results. Caller is responsible for closing the readers.
* @throws java.io.IOException
* update indexes of a reader cannot be accessed.
*/
public void addAll(List<ReftableReader> readers) throws IOException {
for (ReftableReader r : readers) {
tables.add(r);
}
}
/**
* Write a compaction to {@code out}.
*
* @throws java.io.IOException
* if tables cannot be read, or cannot be written.
*/
public void compact() throws IOException {
MergedReftable mr = new MergedReftable(new ArrayList<>(tables));
mr.setIncludeDeletes(includeDeletes);
writer.setMaxUpdateIndex(mr.maxUpdateIndex());
writer.setMinUpdateIndex(mr.minUpdateIndex());
writer.begin();
mergeRefs(mr);
mergeLogs(mr);
writer.finish();
stats = writer.getStats();
}
/**
* Get statistics of the last written reftable.
*
* @return statistics of the last written reftable.
*/
public Stats getStats() {
return stats;
}
private void mergeRefs(MergedReftable mr) throws IOException {
try (RefCursor rc = mr.allRefs()) {
while (rc.next()) {
writer.writeRef(rc.getRef(), rc.getRef().getUpdateIndex());
}
}
}
private void mergeLogs(MergedReftable mr) throws IOException {
if (reflogExpireOldestReflogTimeMillis == Long.MAX_VALUE) {
return;
}
try (LogCursor lc = mr.allLogs()) {
while (lc.next()) {
long updateIndex = lc.getUpdateIndex();
if (updateIndex > reflogExpireMaxUpdateIndex || updateIndex < reflogExpireMinUpdateIndex) {
continue;
}
String refName = lc.getRefName();
ReflogEntry log = lc.getReflogEntry();
if (log == null) {
if (includeDeletes) {
writer.deleteLog(refName, updateIndex);
}
continue;
}
PersonIdent who = log.getWho();
if (who.getWhen().getTime() >= reflogExpireOldestReflogTimeMillis) {
writer.writeLog(
refName,
updateIndex,
who,
log.getOldId(),
log.getNewId(),
log.getComment());
}
}
}
}
}