FileReftableStack.java
/*
* Copyright (C) 2019 Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.LockFailedException;
import org.eclipse.jgit.internal.storage.io.BlockSource;
import org.eclipse.jgit.internal.storage.reftable.MergedReftable;
import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
import org.eclipse.jgit.internal.storage.reftable.ReftableReader;
import org.eclipse.jgit.internal.storage.reftable.ReftableWriter;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.util.FileUtils;
/**
* A mutable stack of reftables on local filesystem storage. Not thread-safe.
* This is an AutoCloseable because this object owns the file handles to the
* open reftables.
*/
public class FileReftableStack implements AutoCloseable {
private static class StackEntry {
String name;
ReftableReader reftableReader;
}
private MergedReftable mergedReftable;
private List<StackEntry> stack;
private long lastNextUpdateIndex;
private final File stackPath;
private final File reftableDir;
private final Runnable onChange;
private final SecureRandom random = new SecureRandom();
private final Supplier<Config> configSupplier;
// Used for stats & testing.
static class CompactionStats {
long tables;
long bytes;
int attempted;
int failed;
long refCount;
long logCount;
CompactionStats() {
tables = 0;
bytes = 0;
attempted = 0;
failed = 0;
logCount = 0;
refCount = 0;
}
}
private final CompactionStats stats;
/**
* Creates a stack corresponding to the list of reftables in the argument
*
* @param stackPath
* the filename for the stack.
* @param reftableDir
* the dir holding the tables.
* @param onChange
* hook to call if we notice a new write
* @param configSupplier
* Config supplier
* @throws IOException
* on I/O problems
*/
public FileReftableStack(File stackPath, File reftableDir,
@Nullable Runnable onChange, Supplier<Config> configSupplier)
throws IOException {
this.stackPath = stackPath;
this.reftableDir = reftableDir;
this.stack = new ArrayList<>();
this.configSupplier = configSupplier;
this.onChange = onChange;
// skip event notification
lastNextUpdateIndex = 0;
reload();
stats = new CompactionStats();
}
CompactionStats getStats() {
return stats;
}
/**
* Reloads the stack, potentially reusing opened reftableReaders.
*
* @param names
* holds the names of the tables to load.
* @throws FileNotFoundException
* load must be retried.
* @throws IOException
* on other IO errors.
*/
private void reloadOnce(List<String> names)
throws IOException, FileNotFoundException {
Map<String, ReftableReader> current = stack.stream()
.collect(Collectors.toMap(e -> e.name, e -> e.reftableReader));
List<ReftableReader> newTables = new ArrayList<>();
List<StackEntry> newStack = new ArrayList<>(stack.size() + 1);
try {
for (String name : names) {
StackEntry entry = new StackEntry();
entry.name = name;
ReftableReader t = null;
if (current.containsKey(name)) {
t = current.remove(name);
} else {
File subtable = new File(reftableDir, name);
FileInputStream is;
is = new FileInputStream(subtable);
t = new ReftableReader(BlockSource.from(is));
newTables.add(t);
}
entry.reftableReader = t;
newStack.add(entry);
}
// survived without exceptions: swap in new stack, and close
// dangling tables.
stack = newStack;
newTables.clear();
current.values().forEach(r -> {
try {
r.close();
} catch (IOException e) {
throw new AssertionError(e);
}
});
} finally {
newTables.forEach(t -> {
try {
t.close();
} catch (IOException ioe) {
// reader close should not generate errors.
throw new AssertionError(ioe);
}
});
}
}
void reload() throws IOException {
// Try for 2.5 seconds.
long deadline = System.currentTimeMillis() + 2500;
// A successful reftable transaction is 2 atomic file writes
// (open, write, close, rename), which a fast Linux system should be
// able to do in about ~200us. So 1 ms should be ample time.
long min = 1;
long max = 1000;
long delay = 0;
boolean success = false;
// Don't check deadline for the first 3 retries, so we can step with a
// debugger without worrying about deadlines.
int tries = 0;
while (tries < 3 || System.currentTimeMillis() < deadline) {
List<String> names = readTableNames();
tries++;
try {
reloadOnce(names);
success = true;
break;
} catch (FileNotFoundException e) {
List<String> changed = readTableNames();
if (changed.equals(names)) {
throw e;
}
}
delay = FileUtils.delay(delay, min, max);
try {
Thread.sleep(delay);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
}
if (!success) {
throw new LockFailedException(stackPath);
}
mergedReftable = new MergedReftable(stack.stream()
.map(x -> x.reftableReader).collect(Collectors.toList()));
long curr = nextUpdateIndex();
if (lastNextUpdateIndex > 0 && lastNextUpdateIndex != curr
&& onChange != null) {
onChange.run();
}
lastNextUpdateIndex = curr;
}
/**
* @return the merged reftable
*/
public MergedReftable getMergedReftable() {
return mergedReftable;
}
/**
* Writer is a callable that writes data to a reftable under construction.
* It should set the min/max update index, and then write refs and/or logs.
* It should not call finish() on the writer.
*/
public interface Writer {
/**
* Write data to reftable
*
* @param w
* writer to use
* @throws IOException
*/
void call(ReftableWriter w) throws IOException;
}
private List<String> readTableNames() throws IOException {
List<String> names = new ArrayList<>(stack.size() + 1);
try (BufferedReader br = new BufferedReader(
new InputStreamReader(new FileInputStream(stackPath), UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
if (!line.isEmpty()) {
names.add(line);
}
}
} catch (FileNotFoundException e) {
// file isn't there: empty repository.
}
return names;
}
/**
* @return true if the on-disk file corresponds to the in-memory data.
* @throws IOException
* on IO problem
*/
boolean isUpToDate() throws IOException {
// We could use FileSnapshot to avoid reading the file, but the file is
// small so it's probably a minor optimization.
try {
List<String> names = readTableNames();
if (names.size() != stack.size()) {
return false;
}
for (int i = 0; i < names.size(); i++) {
if (!names.get(i).equals(stack.get(i).name)) {
return false;
}
}
} catch (FileNotFoundException e) {
return stack.isEmpty();
}
return true;
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
for (StackEntry entry : stack) {
try {
entry.reftableReader.close();
} catch (Exception e) {
// we are reading; this should never fail.
throw new AssertionError(e);
}
}
}
private long nextUpdateIndex() throws IOException {
return stack.size() > 0
? stack.get(stack.size() - 1).reftableReader.maxUpdateIndex()
+ 1
: 1;
}
private String filename(long low, long high) {
return String.format("%012x-%012x-%08x", //$NON-NLS-1$
Long.valueOf(low), Long.valueOf(high),
Integer.valueOf(random.nextInt()));
}
/**
* Tries to add a new reftable to the stack. Returns true if it succeeded,
* or false if there was a lock failure, due to races with other processes.
* This is package private so FileReftableDatabase can call into here.
*
* @param w
* writer to write data to a reftable under construction
* @return true if the transaction was successful.
* @throws IOException
* on I/O problems
*/
@SuppressWarnings("nls")
public boolean addReftable(Writer w) throws IOException {
LockFile lock = new LockFile(stackPath);
try {
if (!lock.lockForAppend()) {
return false;
}
if (!isUpToDate()) {
return false;
}
String fn = filename(nextUpdateIndex(), nextUpdateIndex());
File tmpTable = File.createTempFile(fn + "_", ".ref",
stackPath.getParentFile());
ReftableWriter.Stats s;
try (FileOutputStream fos = new FileOutputStream(tmpTable)) {
ReftableWriter rw = new ReftableWriter(reftableConfig(), fos);
w.call(rw);
rw.finish();
s = rw.getStats();
}
if (s.minUpdateIndex() < nextUpdateIndex()) {
return false;
}
// The spec says to name log-only files with .log, which is somewhat
// pointless given compaction, but we do so anyway.
fn += s.refCount() > 0 ? ".ref" : ".log";
File dest = new File(reftableDir, fn);
FileUtils.rename(tmpTable, dest, StandardCopyOption.ATOMIC_MOVE);
lock.write((fn + "\n").getBytes(UTF_8));
if (!lock.commit()) {
FileUtils.delete(dest);
return false;
}
reload();
autoCompact();
} finally {
lock.unlock();
}
return true;
}
private ReftableConfig reftableConfig() {
return new ReftableConfig(configSupplier.get());
}
/**
* Write the reftable for the given range into a temp file.
*
* @param first
* index of first stack entry to be written
* @param last
* index of last stack entry to be written
* @return the file holding the replacement table.
* @throws IOException
* on I/O problem
*/
private File compactLocked(int first, int last) throws IOException {
String fn = filename(first, last);
File tmpTable = File.createTempFile(fn + "_", ".ref", //$NON-NLS-1$//$NON-NLS-2$
stackPath.getParentFile());
try (FileOutputStream fos = new FileOutputStream(tmpTable)) {
ReftableCompactor c = new ReftableCompactor(fos)
.setConfig(reftableConfig())
.setIncludeDeletes(first > 0);
List<ReftableReader> compactMe = new ArrayList<>();
long totalBytes = 0;
for (int i = first; i <= last; i++) {
compactMe.add(stack.get(i).reftableReader);
totalBytes += stack.get(i).reftableReader.size();
}
c.addAll(compactMe);
c.compact();
// Even though the compaction did not definitely succeed, we keep
// tally here as we've expended the effort.
stats.bytes += totalBytes;
stats.tables += first - last + 1;
stats.attempted++;
stats.refCount += c.getStats().refCount();
stats.logCount += c.getStats().logCount();
}
return tmpTable;
}
/**
* Compacts a range of the stack, following the file locking protocol
* documented in the spec.
*
* @param first
* index of first stack entry to be considered in compaction
* @param last
* index of last stack entry to be considered in compaction
* @return true if a compaction was successfully applied.
* @throws IOException
* on I/O problem
*/
boolean compactRange(int first, int last) throws IOException {
if (first >= last) {
return true;
}
LockFile lock = new LockFile(stackPath);
File tmpTable = null;
List<LockFile> subtableLocks = new ArrayList<>();
try {
if (!lock.lock()) {
return false;
}
if (!isUpToDate()) {
return false;
}
List<File> deleteOnSuccess = new ArrayList<>();
for (int i = first; i <= last; i++) {
File f = new File(reftableDir, stack.get(i).name);
LockFile lf = new LockFile(f);
if (!lf.lock()) {
return false;
}
subtableLocks.add(lf);
deleteOnSuccess.add(f);
}
lock.unlock();
lock = null;
tmpTable = compactLocked(first, last);
lock = new LockFile(stackPath);
if (!lock.lock()) {
return false;
}
if (!isUpToDate()) {
return false;
}
String fn = filename(
stack.get(first).reftableReader.minUpdateIndex(),
stack.get(last).reftableReader.maxUpdateIndex());
// The spec suggests to use .log for log-only tables, and collect
// all log entries in a single file at the bottom of the stack. That would
// require supporting overlapping ranges for the different tables. For the
// sake of simplicity, we simply ignore this and always produce a log +
// ref combined table.
fn += ".ref"; //$NON-NLS-1$
File dest = new File(reftableDir, fn);
FileUtils.rename(tmpTable, dest, StandardCopyOption.ATOMIC_MOVE);
tmpTable = null;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < first; i++) {
sb.append(stack.get(i).name + "\n"); //$NON-NLS-1$
}
sb.append(fn + "\n"); //$NON-NLS-1$
for (int i = last + 1; i < stack.size(); i++) {
sb.append(stack.get(i).name + "\n"); //$NON-NLS-1$
}
lock.write(sb.toString().getBytes(UTF_8));
if (!lock.commit()) {
dest.delete();
return false;
}
for (File f : deleteOnSuccess) {
Files.delete(f.toPath());
}
reload();
return true;
} finally {
if (tmpTable != null) {
tmpTable.delete();
}
for (LockFile lf : subtableLocks) {
lf.unlock();
}
if (lock != null) {
lock.unlock();
}
}
}
/**
* Calculate an approximate log2.
*
* @param sz
* @return log2
*/
static int log(long sz) {
long base = 2;
if (sz <= 0) {
throw new IllegalArgumentException("log2 negative"); //$NON-NLS-1$
}
int l = 0;
while (sz > 0) {
l++;
sz /= base;
}
return l - 1;
}
/**
* A segment is a consecutive list of reftables of the same approximate
* size.
*/
static class Segment {
// the approximate log_2 of the size.
int log;
// The total bytes in this segment
long bytes;
int start;
int end; // exclusive.
int size() {
return end - start;
}
Segment(int start, int end, int log, long bytes) {
this.log = log;
this.start = start;
this.end = end;
this.bytes = bytes;
}
Segment() {
this(0, 0, 0, 0);
}
@Override
public int hashCode() {
return 0; // appease error-prone
}
@Override
public boolean equals(Object other) {
if (other == null) {
return false;
}
Segment o = (Segment) other;
return o.bytes == bytes && o.log == log && o.start == start
&& o.end == end;
}
@SuppressWarnings("boxing")
@Override
public String toString() {
return String.format("{ [%d,%d) l=%d sz=%d }", start, end, log, //$NON-NLS-1$
bytes);
}
}
static List<Segment> segmentSizes(long[] sizes) {
List<Segment> segments = new ArrayList<>();
Segment cur = new Segment();
for (int i = 0; i < sizes.length; i++) {
int l = log(sizes[i]);
if (l != cur.log && cur.bytes > 0) {
segments.add(cur);
cur = new Segment();
cur.start = i;
cur.log = l;
}
cur.log = l;
cur.end = i + 1;
cur.bytes += sizes[i];
}
segments.add(cur);
return segments;
}
private static Optional<Segment> autoCompactCandidate(long[] sizes) {
if (sizes.length == 0) {
return Optional.empty();
}
// The cost of compaction is proportional to the size, and we want to
// avoid frequent large compactions. We do this by playing the game 2048
// here: first compact together the smallest tables if there are more
// than one. Then try to see if the result will be big enough to match
// up with next up.
List<Segment> segments = segmentSizes(sizes);
segments = segments.stream().filter(s -> s.size() > 1)
.collect(Collectors.toList());
if (segments.isEmpty()) {
return Optional.empty();
}
Optional<Segment> optMinSeg = segments.stream()
.min(Comparator.comparing(s -> Integer.valueOf(s.log)));
// Input is non-empty, so always present.
Segment smallCollected = optMinSeg.get();
while (smallCollected.start > 0) {
int prev = smallCollected.start - 1;
long prevSize = sizes[prev];
if (log(smallCollected.bytes) < log(prevSize)) {
break;
}
smallCollected.start = prev;
smallCollected.bytes += prevSize;
}
return Optional.of(smallCollected);
}
/**
* Heuristically tries to compact the stack if the stack has a suitable
* shape.
*
* @throws IOException
*/
private void autoCompact() throws IOException {
Optional<Segment> cand = autoCompactCandidate(tableSizes());
if (cand.isPresent()) {
if (!compactRange(cand.get().start, cand.get().end - 1)) {
stats.failed++;
}
}
}
// 68b footer, 24b header = 92.
private static long OVERHEAD = 91;
private long[] tableSizes() throws IOException {
long[] sizes = new long[stack.size()];
for (int i = 0; i < stack.size(); i++) {
// If we don't subtract the overhead, the file size isn't
// proportional to the number of entries. This will cause us to
// compact too often, which is expensive.
sizes[i] = stack.get(i).reftableReader.size() - OVERHEAD;
}
return sizes;
}
void compactFully() throws IOException {
if (!compactRange(0, stack.size() - 1)) {
stats.failed++;
}
}
}