ReftableReader.java
/*
* Copyright (C) 2017, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.reftable;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.internal.storage.reftable.BlockReader.decodeBlockLen;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_BLOCK_TYPE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_FOOTER_LEN;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.FILE_HEADER_LEN;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.INDEX_BLOCK_TYPE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.LOG_BLOCK_TYPE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.REF_BLOCK_TYPE;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.VERSION_1;
import static org.eclipse.jgit.internal.storage.reftable.ReftableConstants.isFileHeaderMagic;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.zip.CRC32;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.io.BlockSource;
import org.eclipse.jgit.internal.storage.reftable.BlockWriter.LogEntry;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.ReflogEntry;
import org.eclipse.jgit.util.LongList;
import org.eclipse.jgit.util.LongMap;
import org.eclipse.jgit.util.NB;
/**
* Reads a reftable formatted file.
* <p>
* {@code ReftableReader} is not thread-safe. Concurrent readers need their own
* instance to read from the same file.
*/
public class ReftableReader extends Reftable implements AutoCloseable {
private final BlockSource src;
private int blockSize = -1;
private long minUpdateIndex;
private long maxUpdateIndex;
private long refEnd;
private long objPosition;
private long objEnd;
private long logPosition;
private long logEnd;
private int objIdLen;
private long refIndexPosition = -1;
private long objIndexPosition = -1;
private long logIndexPosition = -1;
private BlockReader refIndex;
private BlockReader objIndex;
private BlockReader logIndex;
private LongMap<BlockReader> indexCache;
/**
* Initialize a new reftable reader.
*
* @param src
* the file content to read.
*/
public ReftableReader(BlockSource src) {
this.src = src;
}
/**
* Get the block size in bytes chosen for this file by the writer.
*
* @return the block size in bytes chosen for this file by the writer. Most
* reads from the
* {@link org.eclipse.jgit.internal.storage.io.BlockSource} will be
* aligned to the block size.
* @throws java.io.IOException
* file cannot be read.
*/
public int blockSize() throws IOException {
if (blockSize == -1) {
readFileHeader();
}
return blockSize;
}
@Override
public boolean hasObjectMap() throws IOException {
if (objIndexPosition == -1) {
readFileFooter();
}
// We have the map, we have no refs, or the table is small.
return (objPosition > 0 || refEnd == 24 || refIndexPosition == 0);
}
/**
* {@inheritDoc}
*/
@Override
public long minUpdateIndex() throws IOException {
if (blockSize == -1) {
readFileHeader();
}
return minUpdateIndex;
}
/**
* {@inheritDoc}
*/
@Override
public long maxUpdateIndex() throws IOException {
if (blockSize == -1) {
readFileHeader();
}
return maxUpdateIndex;
}
/** {@inheritDoc} */
@Override
public RefCursor allRefs() throws IOException {
if (blockSize == -1) {
readFileHeader();
}
if (refEnd == 0) {
readFileFooter();
}
src.adviseSequentialRead(0, refEnd);
RefCursorImpl i = new RefCursorImpl(refEnd, null, false);
i.block = readBlock(0, refEnd);
return i;
}
/** {@inheritDoc} */
@Override
public RefCursor seekRef(String refName) throws IOException {
initRefIndex();
byte[] key = refName.getBytes(UTF_8);
RefCursorImpl i = new RefCursorImpl(refEnd, key, false);
i.block = seek(REF_BLOCK_TYPE, key, refIndex, 0, refEnd);
return i;
}
/** {@inheritDoc} */
@Override
public RefCursor seekRefsWithPrefix(String prefix) throws IOException {
initRefIndex();
byte[] key = prefix.getBytes(UTF_8);
RefCursorImpl i = new RefCursorImpl(refEnd, key, true);
i.block = seek(REF_BLOCK_TYPE, key, refIndex, 0, refEnd);
return i;
}
/** {@inheritDoc} */
@Override
public RefCursor byObjectId(AnyObjectId id) throws IOException {
initObjIndex();
ObjCursorImpl i = new ObjCursorImpl(refEnd, id);
if (objIndex != null) {
i.initSeek();
} else {
i.initScan();
}
return i;
}
/** {@inheritDoc} */
@Override
public LogCursor allLogs() throws IOException {
initLogIndex();
if (logPosition > 0) {
src.adviseSequentialRead(logPosition, logEnd);
LogCursorImpl i = new LogCursorImpl(logEnd, null);
i.block = readBlock(logPosition, logEnd);
return i;
}
return new EmptyLogCursor();
}
/** {@inheritDoc} */
@Override
public LogCursor seekLog(String refName, long updateIndex)
throws IOException {
initLogIndex();
if (logPosition > 0) {
byte[] key = LogEntry.key(refName, updateIndex);
byte[] match = refName.getBytes(UTF_8);
LogCursorImpl i = new LogCursorImpl(logEnd, match);
i.block = seek(LOG_BLOCK_TYPE, key, logIndex, logPosition, logEnd);
return i;
}
return new EmptyLogCursor();
}
private BlockReader seek(byte blockType, byte[] key, BlockReader idx,
long startPos, long endPos) throws IOException {
if (idx != null) {
// Walk through a possibly multi-level index to a leaf block.
BlockReader block = idx;
do {
if (block.seekKey(key) > 0) {
return null;
}
long pos = block.readPositionFromIndex();
block = readBlock(pos, endPos);
} while (block.type() == INDEX_BLOCK_TYPE);
block.seekKey(key);
return block;
}
if (blockType == LOG_BLOCK_TYPE) {
// No index. Log blocks are irregularly sized, so we can't do binary
// search between blocks. Scan over blocks instead.
BlockReader block = readBlock(startPos, endPos);
for (;;) {
if (block == null || block.type() != LOG_BLOCK_TYPE) {
return null;
}
int result = block.seekKey(key);
if (result <= 0) {
// == 0 : we found the key.
// < 0 : the key is before this block. Either the ref name is there
// but only at a newer updateIndex, or it is absent. We leave it to
// logcursor to distinguish between both cases.
return block;
}
long pos = block.endPosition();
if (pos >= endPos) {
return null;
}
block = readBlock(pos, endPos);
}
}
return binarySearch(blockType, key, startPos, endPos);
}
private BlockReader binarySearch(byte blockType, byte[] key,
long startPos, long endPos) throws IOException {
if (blockSize == 0) {
BlockReader b = readBlock(startPos, endPos);
if (blockType != b.type()) {
return null;
}
b.seekKey(key);
return b;
}
int low = (int) (startPos / blockSize);
int end = blocksIn(startPos, endPos);
BlockReader block = null;
do {
int mid = (low + end) >>> 1;
block = readBlock(((long) mid) * blockSize, endPos);
if (blockType != block.type()) {
return null;
}
int cmp = block.seekKey(key);
if (cmp < 0) {
end = mid;
} else if (cmp == 0) {
break;
} else /* if (cmp > 0) */ {
low = mid + 1;
}
} while (low < end);
return block;
}
private void readFileHeader() throws IOException {
readHeaderOrFooter(0, FILE_HEADER_LEN);
}
private void readFileFooter() throws IOException {
int ftrLen = FILE_FOOTER_LEN;
byte[] ftr = readHeaderOrFooter(src.size() - ftrLen, ftrLen);
CRC32 crc = new CRC32();
crc.update(ftr, 0, ftrLen - 4);
if (crc.getValue() != NB.decodeUInt32(ftr, ftrLen - 4)) {
throw new IOException(JGitText.get().invalidReftableCRC);
}
refIndexPosition = NB.decodeInt64(ftr, 24);
long p = NB.decodeInt64(ftr, 32);
objPosition = p >>> 5;
objIdLen = (int) (p & 0x1f);
objIndexPosition = NB.decodeInt64(ftr, 40);
logPosition = NB.decodeInt64(ftr, 48);
logIndexPosition = NB.decodeInt64(ftr, 56);
if (refIndexPosition > 0) {
refEnd = refIndexPosition;
} else if (objPosition > 0) {
refEnd = objPosition;
} else if (logPosition > 0) {
refEnd = logPosition;
} else {
refEnd = src.size() - ftrLen;
}
if (objPosition > 0) {
if (objIndexPosition > 0) {
objEnd = objIndexPosition;
} else if (logPosition > 0) {
objEnd = logPosition;
} else {
objEnd = src.size() - ftrLen;
}
}
if (logPosition > 0) {
if (logIndexPosition > 0) {
logEnd = logIndexPosition;
} else {
logEnd = src.size() - ftrLen;
}
}
}
private byte[] readHeaderOrFooter(long pos, int len) throws IOException {
ByteBuffer buf = src.read(pos, len);
if (buf.position() != len) {
throw new IOException(JGitText.get().shortReadOfBlock);
}
byte[] tmp = new byte[len];
buf.flip();
buf.get(tmp);
if (!isFileHeaderMagic(tmp, 0, len)) {
throw new IOException(JGitText.get().invalidReftableFile);
}
int v = NB.decodeInt32(tmp, 4);
int version = v >>> 24;
if (VERSION_1 != version) {
throw new IOException(MessageFormat.format(
JGitText.get().unsupportedReftableVersion,
Integer.valueOf(version)));
}
if (blockSize == -1) {
blockSize = v & 0xffffff;
}
minUpdateIndex = NB.decodeInt64(tmp, 8);
maxUpdateIndex = NB.decodeInt64(tmp, 16);
return tmp;
}
private void initRefIndex() throws IOException {
if (refIndexPosition < 0) {
readFileFooter();
}
if (refIndex == null && refIndexPosition > 0) {
refIndex = readIndex(refIndexPosition);
}
}
private void initObjIndex() throws IOException {
if (objIndexPosition < 0) {
readFileFooter();
}
if (objIndex == null && objIndexPosition > 0) {
objIndex = readIndex(objIndexPosition);
}
}
private void initLogIndex() throws IOException {
if (logIndexPosition < 0) {
readFileFooter();
}
if (logIndex == null && logIndexPosition > 0) {
logIndex = readIndex(logIndexPosition);
}
}
private BlockReader readIndex(long pos) throws IOException {
int sz = readBlockLen(pos);
BlockReader i = new BlockReader();
i.readBlock(src, pos, sz);
i.verifyIndex();
return i;
}
private int readBlockLen(long pos) throws IOException {
int sz = pos == 0 ? FILE_HEADER_LEN + 4 : 4;
ByteBuffer tmp = src.read(pos, sz);
if (tmp.position() < sz) {
throw new IOException(JGitText.get().invalidReftableFile);
}
byte[] buf;
if (tmp.hasArray() && tmp.arrayOffset() == 0) {
buf = tmp.array();
} else {
buf = new byte[sz];
tmp.flip();
tmp.get(buf);
}
if (pos == 0 && buf[FILE_HEADER_LEN] == FILE_BLOCK_TYPE) {
return FILE_HEADER_LEN;
}
int p = pos == 0 ? FILE_HEADER_LEN : 0;
return decodeBlockLen(NB.decodeInt32(buf, p));
}
private BlockReader readBlock(long pos, long end) throws IOException {
if (indexCache != null) {
BlockReader b = indexCache.get(pos);
if (b != null) {
return b;
}
}
int sz = blockSize;
if (sz == 0) {
sz = readBlockLen(pos);
} else if (pos + sz > end) {
sz = (int) (end - pos); // last block may omit padding.
}
BlockReader b = new BlockReader();
b.readBlock(src, pos, sz);
if (b.type() == INDEX_BLOCK_TYPE && !b.truncated()) {
if (indexCache == null) {
indexCache = new LongMap<>();
}
indexCache.put(pos, b);
}
return b;
}
private int blocksIn(long pos, long end) {
int blocks = (int) ((end - pos) / blockSize);
return end % blockSize == 0 ? blocks : (blocks + 1);
}
/**
* Get size of the reftable, in bytes.
*
* @return size of the reftable, in bytes.
* @throws java.io.IOException
* size cannot be obtained.
*/
public long size() throws IOException {
return src.size();
}
/** {@inheritDoc} */
@Override
public void close() throws IOException {
src.close();
}
private class RefCursorImpl extends RefCursor {
private final long scanEnd;
private final byte[] match;
private final boolean prefix;
private Ref ref;
BlockReader block;
RefCursorImpl(long scanEnd, byte[] match, boolean prefix) {
this.scanEnd = scanEnd;
this.match = match;
this.prefix = prefix;
}
@Override
public boolean next() throws IOException {
for (;;) {
if (block == null || block.type() != REF_BLOCK_TYPE) {
return false;
} else if (!block.next()) {
long pos = block.endPosition();
if (pos >= scanEnd) {
return false;
}
block = readBlock(pos, scanEnd);
continue;
}
block.parseKey();
if (match != null && !block.match(match, prefix)) {
block.skipValue();
return false;
}
ref = block.readRef(minUpdateIndex);
if (!includeDeletes && wasDeleted()) {
continue;
}
return true;
}
}
@Override
public void seekPastPrefix(String prefixName) throws IOException {
initRefIndex();
byte[] key = prefixName.getBytes(UTF_8);
ByteBuffer byteBuffer = ByteBuffer.allocate(key.length + 1);
byteBuffer.put(key);
// Add the representation of the last byte lexicographically. Based on how UTF_8
// representation works, this byte will be bigger lexicographically than any
// UTF_8 character when translated into bytes, since 0xFF can never be a part of
// a UTF_8 string.
byteBuffer.put((byte) 0xFF);
block = seek(REF_BLOCK_TYPE, byteBuffer.array(), refIndex, 0, refEnd);
}
@Override
public Ref getRef() {
return ref;
}
@Override
public void close() {
// Do nothing.
}
}
private class LogCursorImpl extends LogCursor {
private final long scanEnd;
private final byte[] match;
private String refName;
private long updateIndex;
private ReflogEntry entry;
BlockReader block;
/**
* Scans logs from this table until scanEnd position.
*
* @param scanEnd
* end of the log data in the reftable.
* @param match
* if non-null, limits the scan to precisely that refname.
*/
LogCursorImpl(long scanEnd, byte[] match) {
this.scanEnd = scanEnd;
this.match = match;
}
@Override
public boolean next() throws IOException {
for (;;) {
if (block == null || block.type() != LOG_BLOCK_TYPE) {
return false;
} else if (!block.next()) {
long pos = block.endPosition();
if (pos >= scanEnd) {
return false;
}
block = readBlock(pos, scanEnd);
continue;
}
block.parseKey();
if (match != null && !block.match(match, false)) {
block.skipValue();
return false;
}
refName = block.name();
updateIndex = block.readLogUpdateIndex();
entry = block.readLogEntry();
if (entry == null && !includeDeletes) {
continue;
}
return true;
}
}
@Override
public String getRefName() {
return refName;
}
@Override
public long getUpdateIndex() {
return updateIndex;
}
@Override
public ReflogEntry getReflogEntry() {
return entry;
}
@Override
public void close() {
// Do nothing.
}
}
static final LongList EMPTY_LONG_LIST = new LongList(0);
private class ObjCursorImpl extends RefCursor {
private final long scanEnd;
private final ObjectId match;
private Ref ref;
private int listIdx;
private LongList blockPos;
private BlockReader block;
ObjCursorImpl(long scanEnd, AnyObjectId id) {
this.scanEnd = scanEnd;
this.match = id.copy();
}
void initSeek() throws IOException {
byte[] rawId = new byte[OBJECT_ID_LENGTH];
match.copyRawTo(rawId, 0);
byte[] key = Arrays.copyOf(rawId, objIdLen);
BlockReader b = objIndex;
do {
if (b.seekKey(key) > 0) {
blockPos = EMPTY_LONG_LIST;
return;
}
long pos = b.readPositionFromIndex();
b = readBlock(pos, objEnd);
} while (b.type() == INDEX_BLOCK_TYPE);
b.seekKey(key);
while (b.next()) {
b.parseKey();
if (b.match(key, false)) {
blockPos = b.readBlockPositionList();
if (blockPos == null) {
initScan();
return;
}
break;
}
b.skipValue();
}
if (blockPos == null) {
blockPos = EMPTY_LONG_LIST;
}
if (blockPos.size() > 0) {
long pos = blockPos.get(listIdx++);
block = readBlock(pos, scanEnd);
}
}
void initScan() throws IOException {
block = readBlock(0, scanEnd);
}
@Override
public boolean next() throws IOException {
for (;;) {
if (block == null || block.type() != REF_BLOCK_TYPE) {
return false;
} else if (!block.next()) {
long pos;
if (blockPos != null) {
if (listIdx >= blockPos.size()) {
return false;
}
pos = blockPos.get(listIdx++);
} else {
pos = block.endPosition();
}
if (pos >= scanEnd) {
return false;
}
block = readBlock(pos, scanEnd);
continue;
}
block.parseKey();
ref = block.readRef(minUpdateIndex);
ObjectId id = ref.getObjectId();
if (id != null && match.equals(id)
&& (includeDeletes || !wasDeleted())) {
return true;
}
}
}
@Override
/**
* The implementation here would not be efficient complexity-wise since it
* expected that there are a small number of refs that match the same object id.
* In such case it's better to not even use this method (as the caller might
* expect it to be efficient).
*/
public void seekPastPrefix(String prefixName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Ref getRef() {
return ref;
}
@Override
public void close() {
// Do nothing.
}
}
}