PackInserter.java
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.storage.file;
import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJ_OFS_DELTA;
import static org.eclipse.jgit.lib.Constants.OBJ_REF_DELTA;
import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.channels.Channels;
import java.text.MessageFormat;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.InflaterCache;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.transport.PackParser;
import org.eclipse.jgit.transport.PackedObjectInfo;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.FileUtils;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.NB;
import org.eclipse.jgit.util.io.CountingOutputStream;
import org.eclipse.jgit.util.sha1.SHA1;
/**
* Object inserter that inserts one pack per call to {@link #flush()}, and never
* inserts loose objects.
*/
public class PackInserter extends ObjectInserter {
/** Always produce version 2 indexes, to get CRC data. */
private static final int INDEX_VERSION = 2;
private final ObjectDirectory db;
private List<PackedObjectInfo> objectList;
private ObjectIdOwnerMap<PackedObjectInfo> objectMap;
private boolean rollback;
private boolean checkExisting = true;
private int compression = Deflater.BEST_COMPRESSION;
private File tmpPack;
private PackStream packOut;
private Inflater cachedInflater;
PackInserter(ObjectDirectory db) {
this.db = db;
}
/**
* Whether to check if objects exist in the repo
*
* @param check
* if {@code false}, will write out possibly-duplicate objects
* without first checking whether they exist in the repo; default
* is true.
*/
public void checkExisting(boolean check) {
checkExisting = check;
}
/**
* Set compression level for zlib deflater.
*
* @param compression
* compression level for zlib deflater.
*/
public void setCompressionLevel(int compression) {
this.compression = compression;
}
int getBufferSize() {
return buffer().length;
}
/** {@inheritDoc} */
@Override
public ObjectId insert(int type, byte[] data, int off, int len)
throws IOException {
ObjectId id = idFor(type, data, off, len);
if (objectMap != null && objectMap.contains(id)) {
return id;
}
// Ignore loose objects, which are potentially unreachable.
if (checkExisting && db.hasPackedObject(id)) {
return id;
}
long offset = beginObject(type, len);
packOut.compress.write(data, off, len);
packOut.compress.finish();
return endObject(id, offset);
}
/** {@inheritDoc} */
@Override
public ObjectId insert(int type, long len, InputStream in)
throws IOException {
byte[] buf = buffer();
if (len <= buf.length) {
IO.readFully(in, buf, 0, (int) len);
return insert(type, buf, 0, (int) len);
}
long offset = beginObject(type, len);
SHA1 md = digest();
md.update(Constants.encodedTypeString(type));
md.update((byte) ' ');
md.update(Constants.encodeASCII(len));
md.update((byte) 0);
while (0 < len) {
int n = in.read(buf, 0, (int) Math.min(buf.length, len));
if (n <= 0) {
throw new EOFException();
}
md.update(buf, 0, n);
packOut.compress.write(buf, 0, n);
len -= n;
}
packOut.compress.finish();
return endObject(md.toObjectId(), offset);
}
private long beginObject(int type, long len) throws IOException {
if (packOut == null) {
beginPack();
}
long offset = packOut.getOffset();
packOut.beginObject(type, len);
return offset;
}
private ObjectId endObject(ObjectId id, long offset) {
PackedObjectInfo obj = new PackedObjectInfo(id);
obj.setOffset(offset);
obj.setCRC((int) packOut.crc32.getValue());
objectList.add(obj);
objectMap.addIfAbsent(obj);
return id;
}
private static File idxFor(File packFile) {
String p = packFile.getName();
return new File(
packFile.getParentFile(),
p.substring(0, p.lastIndexOf('.')) + ".idx"); //$NON-NLS-1$
}
private void beginPack() throws IOException {
objectList = new BlockList<>();
objectMap = new ObjectIdOwnerMap<>();
rollback = true;
tmpPack = File.createTempFile("insert_", ".pack", db.getDirectory()); //$NON-NLS-1$ //$NON-NLS-2$
packOut = new PackStream(tmpPack);
// Write the header as though it were a single object pack.
packOut.write(packOut.hdrBuf, 0, writePackHeader(packOut.hdrBuf, 1));
}
private static int writePackHeader(byte[] buf, int objectCount) {
System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4);
NB.encodeInt32(buf, 4, 2); // Always use pack version 2.
NB.encodeInt32(buf, 8, objectCount);
return 12;
}
/** {@inheritDoc} */
@Override
public PackParser newPackParser(InputStream in) {
throw new UnsupportedOperationException();
}
/** {@inheritDoc} */
@Override
public ObjectReader newReader() {
return new Reader();
}
/** {@inheritDoc} */
@Override
public void flush() throws IOException {
if (tmpPack == null) {
return;
}
if (packOut == null) {
throw new IOException();
}
byte[] packHash;
try {
packHash = packOut.finishPack();
} finally {
packOut = null;
}
Collections.sort(objectList);
File tmpIdx = idxFor(tmpPack);
writePackIndex(tmpIdx, packHash, objectList);
File realPack = new File(db.getPackDirectory(),
"pack-" + computeName(objectList).name() + ".pack"); //$NON-NLS-1$ //$NON-NLS-2$
db.closeAllPackHandles(realPack);
tmpPack.setReadOnly();
FileUtils.rename(tmpPack, realPack, ATOMIC_MOVE);
File realIdx = idxFor(realPack);
tmpIdx.setReadOnly();
try {
FileUtils.rename(tmpIdx, realIdx, ATOMIC_MOVE);
} catch (IOException e) {
File newIdx = new File(
realIdx.getParentFile(), realIdx.getName() + ".new"); //$NON-NLS-1$
try {
FileUtils.rename(tmpIdx, newIdx, ATOMIC_MOVE);
} catch (IOException e2) {
newIdx = tmpIdx;
e = e2;
}
throw new IOException(MessageFormat.format(
JGitText.get().panicCantRenameIndexFile, newIdx,
realIdx), e);
}
db.openPack(realPack);
rollback = false;
clear();
}
private static void writePackIndex(File idx, byte[] packHash,
List<PackedObjectInfo> list) throws IOException {
try (OutputStream os = new FileOutputStream(idx)) {
PackIndexWriter w = PackIndexWriter.createVersion(os, INDEX_VERSION);
w.write(list, packHash);
}
}
private ObjectId computeName(List<PackedObjectInfo> list) {
SHA1 md = digest().reset();
byte[] buf = buffer();
for (PackedObjectInfo otp : list) {
otp.copyRawTo(buf, 0);
md.update(buf, 0, OBJECT_ID_LENGTH);
}
return ObjectId.fromRaw(md.digest());
}
/** {@inheritDoc} */
@Override
public void close() {
try {
if (packOut != null) {
try {
packOut.close();
} catch (IOException err) {
// Ignore a close failure, the pack should be removed.
}
}
if (rollback && tmpPack != null) {
try {
FileUtils.delete(tmpPack);
} catch (IOException e) {
// Still delete idx.
}
try {
FileUtils.delete(idxFor(tmpPack));
} catch (IOException e) {
// Ignore error deleting temp idx.
}
rollback = false;
}
} finally {
clear();
try {
InflaterCache.release(cachedInflater);
} finally {
cachedInflater = null;
}
}
}
private void clear() {
objectList = null;
objectMap = null;
tmpPack = null;
packOut = null;
}
private Inflater inflater() {
if (cachedInflater == null) {
cachedInflater = InflaterCache.get();
} else {
cachedInflater.reset();
}
return cachedInflater;
}
/**
* Stream that writes to a pack file.
* <p>
* Backed by two views of the same open file descriptor: a random-access file,
* and an output stream. Seeking in the file causes subsequent writes to the
* output stream to occur wherever the file pointer is pointing, so we need to
* take care to always seek to the end of the file before writing a new
* object.
* <p>
* Callers should always use {@link #seek(long)} to seek, rather than reaching
* into the file member. As long as this contract is followed, calls to {@link
* #write(byte[], int, int)} are guaranteed to write at the end of the file,
* even if there have been intermediate seeks.
*/
private class PackStream extends OutputStream {
final byte[] hdrBuf;
final CRC32 crc32;
final DeflaterOutputStream compress;
private final RandomAccessFile file;
private final CountingOutputStream out;
private final Deflater deflater;
private boolean atEnd;
PackStream(File pack) throws IOException {
file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
out = new CountingOutputStream(new FileOutputStream(file.getFD()));
deflater = new Deflater(compression);
compress = new DeflaterOutputStream(this, deflater, 8192);
hdrBuf = new byte[32];
crc32 = new CRC32();
atEnd = true;
}
long getOffset() {
// This value is accurate as long as we only ever write to the end of the
// file, and don't seek back to overwrite any previous segments. Although
// this is subtle, storing the stream counter this way is still preferable
// to returning file.length() here, as it avoids a syscall and possible
// IOException.
return out.getCount();
}
void seek(long offset) throws IOException {
file.seek(offset);
atEnd = false;
}
void beginObject(int objectType, long length) throws IOException {
crc32.reset();
deflater.reset();
write(hdrBuf, 0, encodeTypeSize(objectType, length));
}
private int encodeTypeSize(int type, long rawLength) {
long nextLength = rawLength >>> 4;
hdrBuf[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (type << 4) | (rawLength & 0x0F));
rawLength = nextLength;
int n = 1;
while (rawLength > 0) {
nextLength >>>= 7;
hdrBuf[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (rawLength & 0x7F));
rawLength = nextLength;
}
return n;
}
@Override
public void write(int b) throws IOException {
hdrBuf[0] = (byte) b;
write(hdrBuf, 0, 1);
}
@Override
public void write(byte[] data, int off, int len) throws IOException {
crc32.update(data, off, len);
if (!atEnd) {
file.seek(file.length());
atEnd = true;
}
out.write(data, off, len);
}
byte[] finishPack() throws IOException {
// Overwrite placeholder header with actual object count, then hash. This
// method intentionally uses direct seek/write calls rather than the
// wrappers which keep track of atEnd. This leaves atEnd, the file
// pointer, and out's counter in an inconsistent state; that's ok, since
// this method closes the file anyway.
try {
file.seek(0);
out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
byte[] buf = buffer();
SHA1 md = digest().reset();
file.seek(0);
while (true) {
int r = file.read(buf);
if (r < 0) {
break;
}
md.update(buf, 0, r);
}
byte[] packHash = md.digest();
out.write(packHash, 0, packHash.length);
return packHash;
} finally {
close();
}
}
@Override
public void close() throws IOException {
deflater.end();
try {
out.close();
} finally {
file.close();
}
}
byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
byte[] dstbuf;
try {
dstbuf = new byte[len];
} catch (OutOfMemoryError noMemory) {
return null; // Caller will switch to large object streaming.
}
byte[] srcbuf = buffer();
Inflater inf = inflater();
filePos += setInput(filePos, inf, srcbuf);
for (int dstoff = 0;;) {
int n = inf.inflate(dstbuf, dstoff, dstbuf.length - dstoff);
dstoff += n;
if (inf.finished()) {
return dstbuf;
}
if (inf.needsInput()) {
filePos += setInput(filePos, inf, srcbuf);
} else if (n == 0) {
throw new DataFormatException();
}
}
}
private int setInput(long filePos, Inflater inf, byte[] buf)
throws IOException {
if (file.getFilePointer() != filePos) {
seek(filePos);
}
int n = file.read(buf);
if (n < 0) {
throw new EOFException(JGitText.get().unexpectedEofInPack);
}
inf.setInput(buf, 0, n);
return n;
}
}
private class Reader extends ObjectReader {
private final ObjectReader ctx;
private Reader() {
ctx = db.newReader();
setStreamFileThreshold(ctx.getStreamFileThreshold());
}
@Override
public ObjectReader newReader() {
return db.newReader();
}
@Override
public ObjectInserter getCreatedFromInserter() {
return PackInserter.this;
}
@Override
public Collection<ObjectId> resolve(AbbreviatedObjectId id)
throws IOException {
Collection<ObjectId> stored = ctx.resolve(id);
if (objectList == null) {
return stored;
}
Set<ObjectId> r = new HashSet<>(stored.size() + 2);
r.addAll(stored);
for (PackedObjectInfo obj : objectList) {
if (id.prefixCompare(obj) == 0) {
r.add(obj.copy());
}
}
return r;
}
@Override
public ObjectLoader open(AnyObjectId objectId, int typeHint)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
if (objectMap == null) {
return ctx.open(objectId, typeHint);
}
PackedObjectInfo obj = objectMap.get(objectId);
if (obj == null) {
return ctx.open(objectId, typeHint);
}
byte[] buf = buffer();
packOut.seek(obj.getOffset());
int cnt = packOut.file.read(buf, 0, 20);
if (cnt <= 0) {
throw new EOFException(JGitText.get().unexpectedEofInPack);
}
int c = buf[0] & 0xff;
int type = (c >> 4) & 7;
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
throw new IOException(MessageFormat.format(
JGitText.get().cannotReadBackDelta, Integer.toString(type)));
}
if (typeHint != OBJ_ANY && type != typeHint) {
throw new IncorrectObjectTypeException(objectId.copy(), typeHint);
}
long sz = c & 0x0f;
int ptr = 1;
int shift = 4;
while ((c & 0x80) != 0) {
if (ptr >= cnt) {
throw new EOFException(JGitText.get().unexpectedEofInPack);
}
c = buf[ptr++] & 0xff;
sz += ((long) (c & 0x7f)) << shift;
shift += 7;
}
long zpos = obj.getOffset() + ptr;
if (sz < getStreamFileThreshold()) {
byte[] data = inflate(obj, zpos, (int) sz);
if (data != null) {
return new ObjectLoader.SmallObject(type, data);
}
}
return new StreamLoader(type, sz, zpos);
}
private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
throws IOException, CorruptObjectException {
try {
return packOut.inflate(zpos, sz);
} catch (DataFormatException dfe) {
throw new CorruptObjectException(
MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(obj.getOffset()),
tmpPack.getAbsolutePath()),
dfe);
}
}
@Override
public Set<ObjectId> getShallowCommits() throws IOException {
return ctx.getShallowCommits();
}
@Override
public void close() {
ctx.close();
}
private class StreamLoader extends ObjectLoader {
private final int type;
private final long size;
private final long pos;
StreamLoader(int type, long size, long pos) {
this.type = type;
this.size = size;
this.pos = pos;
}
@Override
public ObjectStream openStream()
throws MissingObjectException, IOException {
int bufsz = buffer().length;
packOut.seek(pos);
InputStream fileStream = new FilterInputStream(
Channels.newInputStream(packOut.file.getChannel())) {
// atEnd was already set to false by the previous seek, but it's
// technically possible for a caller to call insert on the
// inserter in the middle of reading from this stream. Behavior is
// undefined in this case, so it would arguably be ok to ignore,
// but it's not hard to at least make an attempt to not corrupt
// the data.
@Override
public int read() throws IOException {
packOut.atEnd = false;
return super.read();
}
@Override
public int read(byte[] b) throws IOException {
packOut.atEnd = false;
return super.read(b);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
packOut.atEnd = false;
return super.read(b,off,len);
}
@Override
public void close() {
// Never close underlying RandomAccessFile, which lasts the
// lifetime of the enclosing PackStream.
}
};
return new ObjectStream.Filter(
type, size,
new BufferedInputStream(
new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
}
@Override
public int getType() {
return type;
}
@Override
public long getSize() {
return size;
}
@Override
public byte[] getCachedBytes() throws LargeObjectException {
throw new LargeObjectException.ExceedsLimit(
getStreamFileThreshold(), size);
}
}
}
}