PackInserter.java
- /*
- * Copyright (C) 2017, Google Inc.
- * and other copyright owners as documented in the project's IP log.
- *
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- package org.eclipse.jgit.internal.storage.file;
- import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
- import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
- import static org.eclipse.jgit.lib.Constants.OBJ_OFS_DELTA;
- import static org.eclipse.jgit.lib.Constants.OBJ_REF_DELTA;
- import java.io.BufferedInputStream;
- import java.io.EOFException;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.FilterInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.io.RandomAccessFile;
- import java.nio.channels.Channels;
- import java.text.MessageFormat;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Set;
- import java.util.zip.CRC32;
- import java.util.zip.DataFormatException;
- import java.util.zip.Deflater;
- import java.util.zip.DeflaterOutputStream;
- import java.util.zip.Inflater;
- import java.util.zip.InflaterInputStream;
- import org.eclipse.jgit.errors.CorruptObjectException;
- import org.eclipse.jgit.errors.IncorrectObjectTypeException;
- import org.eclipse.jgit.errors.LargeObjectException;
- import org.eclipse.jgit.errors.MissingObjectException;
- import org.eclipse.jgit.internal.JGitText;
- import org.eclipse.jgit.internal.storage.pack.PackExt;
- import org.eclipse.jgit.lib.AbbreviatedObjectId;
- import org.eclipse.jgit.lib.AnyObjectId;
- import org.eclipse.jgit.lib.Constants;
- import org.eclipse.jgit.lib.InflaterCache;
- import org.eclipse.jgit.lib.ObjectId;
- import org.eclipse.jgit.lib.ObjectIdOwnerMap;
- import org.eclipse.jgit.lib.ObjectInserter;
- import org.eclipse.jgit.lib.ObjectLoader;
- import org.eclipse.jgit.lib.ObjectReader;
- import org.eclipse.jgit.lib.ObjectStream;
- import org.eclipse.jgit.storage.pack.PackConfig;
- import org.eclipse.jgit.transport.PackParser;
- import org.eclipse.jgit.transport.PackedObjectInfo;
- import org.eclipse.jgit.util.BlockList;
- import org.eclipse.jgit.util.FileUtils;
- import org.eclipse.jgit.util.IO;
- import org.eclipse.jgit.util.NB;
- import org.eclipse.jgit.util.io.CountingOutputStream;
- import org.eclipse.jgit.util.sha1.SHA1;
- /**
- * Object inserter that inserts one pack per call to {@link #flush()}, and never
- * inserts loose objects.
- */
- public class PackInserter extends ObjectInserter {
- /** Always produce version 2 indexes, to get CRC data. */
- private static final int INDEX_VERSION = 2;
- private final ObjectDirectory db;
- private List<PackedObjectInfo> objectList;
- private ObjectIdOwnerMap<PackedObjectInfo> objectMap;
- private boolean rollback;
- private boolean checkExisting = true;
- private int compression = Deflater.BEST_COMPRESSION;
- private File tmpPack;
- private PackStream packOut;
- private Inflater cachedInflater;
- private PackConfig pconfig;
- PackInserter(ObjectDirectory db) {
- this.db = db;
- this.pconfig = new PackConfig(db.getConfig());
- }
- /**
- * Whether to check if objects exist in the repo
- *
- * @param check
- * if {@code false}, will write out possibly-duplicate objects
- * without first checking whether they exist in the repo; default
- * is true.
- */
- public void checkExisting(boolean check) {
- checkExisting = check;
- }
- /**
- * Set compression level for zlib deflater.
- *
- * @param compression
- * compression level for zlib deflater.
- */
- public void setCompressionLevel(int compression) {
- this.compression = compression;
- }
- int getBufferSize() {
- return buffer().length;
- }
- /** {@inheritDoc} */
- @Override
- public ObjectId insert(int type, byte[] data, int off, int len)
- throws IOException {
- ObjectId id = idFor(type, data, off, len);
- if (objectMap != null && objectMap.contains(id)) {
- return id;
- }
- // Ignore loose objects, which are potentially unreachable.
- if (checkExisting && db.hasPackedObject(id)) {
- return id;
- }
- long offset = beginObject(type, len);
- packOut.compress.write(data, off, len);
- packOut.compress.finish();
- return endObject(id, offset);
- }
- /** {@inheritDoc} */
- @Override
- public ObjectId insert(int type, long len, InputStream in)
- throws IOException {
- byte[] buf = buffer();
- if (len <= buf.length) {
- IO.readFully(in, buf, 0, (int) len);
- return insert(type, buf, 0, (int) len);
- }
- long offset = beginObject(type, len);
- SHA1 md = digest();
- md.update(Constants.encodedTypeString(type));
- md.update((byte) ' ');
- md.update(Constants.encodeASCII(len));
- md.update((byte) 0);
- while (0 < len) {
- int n = in.read(buf, 0, (int) Math.min(buf.length, len));
- if (n <= 0) {
- throw new EOFException();
- }
- md.update(buf, 0, n);
- packOut.compress.write(buf, 0, n);
- len -= n;
- }
- packOut.compress.finish();
- return endObject(md.toObjectId(), offset);
- }
- private long beginObject(int type, long len) throws IOException {
- if (packOut == null) {
- beginPack();
- }
- long offset = packOut.getOffset();
- packOut.beginObject(type, len);
- return offset;
- }
- private ObjectId endObject(ObjectId id, long offset) {
- PackedObjectInfo obj = new PackedObjectInfo(id);
- obj.setOffset(offset);
- obj.setCRC((int) packOut.crc32.getValue());
- objectList.add(obj);
- objectMap.addIfAbsent(obj);
- return id;
- }
- private static File idxFor(File packFile) {
- String p = packFile.getName();
- return new File(
- packFile.getParentFile(),
- p.substring(0, p.lastIndexOf('.')) + ".idx"); //$NON-NLS-1$
- }
- private void beginPack() throws IOException {
- objectList = new BlockList<>();
- objectMap = new ObjectIdOwnerMap<>();
- rollback = true;
- tmpPack = File.createTempFile("insert_", ".pack", db.getDirectory()); //$NON-NLS-1$ //$NON-NLS-2$
- packOut = new PackStream(tmpPack);
- // Write the header as though it were a single object pack.
- packOut.write(packOut.hdrBuf, 0, writePackHeader(packOut.hdrBuf, 1));
- }
- private static int writePackHeader(byte[] buf, int objectCount) {
- System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4);
- NB.encodeInt32(buf, 4, 2); // Always use pack version 2.
- NB.encodeInt32(buf, 8, objectCount);
- return 12;
- }
- /** {@inheritDoc} */
- @Override
- public PackParser newPackParser(InputStream in) {
- throw new UnsupportedOperationException();
- }
- /** {@inheritDoc} */
- @Override
- public ObjectReader newReader() {
- return new Reader();
- }
- /** {@inheritDoc} */
- @Override
- public void flush() throws IOException {
- if (tmpPack == null) {
- return;
- }
- if (packOut == null) {
- throw new IOException();
- }
- byte[] packHash;
- try {
- packHash = packOut.finishPack();
- } finally {
- packOut = null;
- }
- Collections.sort(objectList);
- File tmpIdx = idxFor(tmpPack); // TODO(nasserg) Use PackFile?
- writePackIndex(tmpIdx, packHash, objectList);
- PackFile realPack = new PackFile(db.getPackDirectory(),
- computeName(objectList), PackExt.PACK);
- db.closeAllPackHandles(realPack);
- tmpPack.setReadOnly();
- FileUtils.rename(tmpPack, realPack, ATOMIC_MOVE);
- PackFile realIdx = realPack.create(PackExt.INDEX);
- tmpIdx.setReadOnly();
- try {
- FileUtils.rename(tmpIdx, realIdx, ATOMIC_MOVE);
- } catch (IOException e) {
- File newIdx = new File(
- realIdx.getParentFile(), realIdx.getName() + ".new"); //$NON-NLS-1$
- try {
- FileUtils.rename(tmpIdx, newIdx, ATOMIC_MOVE);
- } catch (IOException e2) {
- newIdx = tmpIdx;
- e = e2;
- }
- throw new IOException(MessageFormat.format(
- JGitText.get().panicCantRenameIndexFile, newIdx,
- realIdx), e);
- }
- boolean interrupted = false;
- try {
- FileSnapshot snapshot = FileSnapshot.save(realPack);
- if (pconfig.doWaitPreventRacyPack(snapshot.size())) {
- snapshot.waitUntilNotRacy();
- }
- } catch (InterruptedException e) {
- interrupted = true;
- }
- try {
- db.openPack(realPack);
- rollback = false;
- } finally {
- clear();
- if (interrupted) {
- // Re-set interrupted flag
- Thread.currentThread().interrupt();
- }
- }
- }
- private static void writePackIndex(File idx, byte[] packHash,
- List<PackedObjectInfo> list) throws IOException {
- try (OutputStream os = new FileOutputStream(idx)) {
- PackIndexWriter w = PackIndexWriter.createVersion(os, INDEX_VERSION);
- w.write(list, packHash);
- }
- }
- private ObjectId computeName(List<PackedObjectInfo> list) {
- SHA1 md = digest().reset();
- byte[] buf = buffer();
- for (PackedObjectInfo otp : list) {
- otp.copyRawTo(buf, 0);
- md.update(buf, 0, OBJECT_ID_LENGTH);
- }
- return ObjectId.fromRaw(md.digest());
- }
- /** {@inheritDoc} */
- @Override
- public void close() {
- try {
- if (packOut != null) {
- try {
- packOut.close();
- } catch (IOException err) {
- // Ignore a close failure, the pack should be removed.
- }
- }
- if (rollback && tmpPack != null) {
- try {
- FileUtils.delete(tmpPack);
- } catch (IOException e) {
- // Still delete idx.
- }
- try {
- FileUtils.delete(idxFor(tmpPack));
- } catch (IOException e) {
- // Ignore error deleting temp idx.
- }
- rollback = false;
- }
- } finally {
- clear();
- try {
- InflaterCache.release(cachedInflater);
- } finally {
- cachedInflater = null;
- }
- }
- }
- private void clear() {
- objectList = null;
- objectMap = null;
- tmpPack = null;
- packOut = null;
- }
- private Inflater inflater() {
- if (cachedInflater == null) {
- cachedInflater = InflaterCache.get();
- } else {
- cachedInflater.reset();
- }
- return cachedInflater;
- }
- /**
- * Stream that writes to a pack file.
- * <p>
- * Backed by two views of the same open file descriptor: a random-access file,
- * and an output stream. Seeking in the file causes subsequent writes to the
- * output stream to occur wherever the file pointer is pointing, so we need to
- * take care to always seek to the end of the file before writing a new
- * object.
- * <p>
- * Callers should always use {@link #seek(long)} to seek, rather than reaching
- * into the file member. As long as this contract is followed, calls to {@link
- * #write(byte[], int, int)} are guaranteed to write at the end of the file,
- * even if there have been intermediate seeks.
- */
- private class PackStream extends OutputStream {
- final byte[] hdrBuf;
- final CRC32 crc32;
- final DeflaterOutputStream compress;
- private final RandomAccessFile file;
- private final CountingOutputStream out;
- private final Deflater deflater;
- private boolean atEnd;
- PackStream(File pack) throws IOException {
- file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
- out = new CountingOutputStream(new FileOutputStream(file.getFD()));
- deflater = new Deflater(compression);
- compress = new DeflaterOutputStream(this, deflater, 8192);
- hdrBuf = new byte[32];
- crc32 = new CRC32();
- atEnd = true;
- }
- long getOffset() {
- // This value is accurate as long as we only ever write to the end of the
- // file, and don't seek back to overwrite any previous segments. Although
- // this is subtle, storing the stream counter this way is still preferable
- // to returning file.length() here, as it avoids a syscall and possible
- // IOException.
- return out.getCount();
- }
- void seek(long offset) throws IOException {
- file.seek(offset);
- atEnd = false;
- }
- void beginObject(int objectType, long length) throws IOException {
- crc32.reset();
- deflater.reset();
- write(hdrBuf, 0, encodeTypeSize(objectType, length));
- }
- private int encodeTypeSize(int type, long rawLength) {
- long nextLength = rawLength >>> 4;
- hdrBuf[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (type << 4) | (rawLength & 0x0F));
- rawLength = nextLength;
- int n = 1;
- while (rawLength > 0) {
- nextLength >>>= 7;
- hdrBuf[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (rawLength & 0x7F));
- rawLength = nextLength;
- }
- return n;
- }
- @Override
- public void write(int b) throws IOException {
- hdrBuf[0] = (byte) b;
- write(hdrBuf, 0, 1);
- }
- @Override
- public void write(byte[] data, int off, int len) throws IOException {
- crc32.update(data, off, len);
- if (!atEnd) {
- file.seek(file.length());
- atEnd = true;
- }
- out.write(data, off, len);
- }
- byte[] finishPack() throws IOException {
- // Overwrite placeholder header with actual object count, then hash. This
- // method intentionally uses direct seek/write calls rather than the
- // wrappers which keep track of atEnd. This leaves atEnd, the file
- // pointer, and out's counter in an inconsistent state; that's ok, since
- // this method closes the file anyway.
- try {
- file.seek(0);
- out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
- byte[] buf = buffer();
- SHA1 md = digest().reset();
- file.seek(0);
- while (true) {
- int r = file.read(buf);
- if (r < 0) {
- break;
- }
- md.update(buf, 0, r);
- }
- byte[] packHash = md.digest();
- out.write(packHash, 0, packHash.length);
- return packHash;
- } finally {
- close();
- }
- }
- @Override
- public void close() throws IOException {
- deflater.end();
- try {
- out.close();
- } finally {
- file.close();
- }
- }
- byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
- byte[] dstbuf;
- try {
- dstbuf = new byte[len];
- } catch (OutOfMemoryError noMemory) {
- return null; // Caller will switch to large object streaming.
- }
- byte[] srcbuf = buffer();
- Inflater inf = inflater();
- filePos += setInput(filePos, inf, srcbuf);
- for (int dstoff = 0;;) {
- int n = inf.inflate(dstbuf, dstoff, dstbuf.length - dstoff);
- dstoff += n;
- if (inf.finished()) {
- return dstbuf;
- }
- if (inf.needsInput()) {
- filePos += setInput(filePos, inf, srcbuf);
- } else if (n == 0) {
- throw new DataFormatException();
- }
- }
- }
- private int setInput(long filePos, Inflater inf, byte[] buf)
- throws IOException {
- if (file.getFilePointer() != filePos) {
- seek(filePos);
- }
- int n = file.read(buf);
- if (n < 0) {
- throw new EOFException(JGitText.get().unexpectedEofInPack);
- }
- inf.setInput(buf, 0, n);
- return n;
- }
- }
- private class Reader extends ObjectReader {
- private final ObjectReader ctx;
- private Reader() {
- ctx = db.newReader();
- setStreamFileThreshold(ctx.getStreamFileThreshold());
- }
- @Override
- public ObjectReader newReader() {
- return db.newReader();
- }
- @Override
- public ObjectInserter getCreatedFromInserter() {
- return PackInserter.this;
- }
- @Override
- public Collection<ObjectId> resolve(AbbreviatedObjectId id)
- throws IOException {
- Collection<ObjectId> stored = ctx.resolve(id);
- if (objectList == null) {
- return stored;
- }
- Set<ObjectId> r = new HashSet<>(stored.size() + 2);
- r.addAll(stored);
- for (PackedObjectInfo obj : objectList) {
- if (id.prefixCompare(obj) == 0) {
- r.add(obj.copy());
- }
- }
- return r;
- }
- @Override
- public ObjectLoader open(AnyObjectId objectId, int typeHint)
- throws MissingObjectException, IncorrectObjectTypeException,
- IOException {
- if (objectMap == null) {
- return ctx.open(objectId, typeHint);
- }
- PackedObjectInfo obj = objectMap.get(objectId);
- if (obj == null) {
- return ctx.open(objectId, typeHint);
- }
- byte[] buf = buffer();
- packOut.seek(obj.getOffset());
- int cnt = packOut.file.read(buf, 0, 20);
- if (cnt <= 0) {
- throw new EOFException(JGitText.get().unexpectedEofInPack);
- }
- int c = buf[0] & 0xff;
- int type = (c >> 4) & 7;
- if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
- throw new IOException(MessageFormat.format(
- JGitText.get().cannotReadBackDelta, Integer.toString(type)));
- }
- if (typeHint != OBJ_ANY && type != typeHint) {
- throw new IncorrectObjectTypeException(objectId.copy(), typeHint);
- }
- long sz = c & 0x0f;
- int ptr = 1;
- int shift = 4;
- while ((c & 0x80) != 0) {
- if (ptr >= cnt) {
- throw new EOFException(JGitText.get().unexpectedEofInPack);
- }
- c = buf[ptr++] & 0xff;
- sz += ((long) (c & 0x7f)) << shift;
- shift += 7;
- }
- long zpos = obj.getOffset() + ptr;
- if (sz < getStreamFileThreshold()) {
- byte[] data = inflate(obj, zpos, (int) sz);
- if (data != null) {
- return new ObjectLoader.SmallObject(type, data);
- }
- }
- return new StreamLoader(type, sz, zpos);
- }
- private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
- throws IOException, CorruptObjectException {
- try {
- return packOut.inflate(zpos, sz);
- } catch (DataFormatException dfe) {
- throw new CorruptObjectException(
- MessageFormat.format(
- JGitText.get().objectAtHasBadZlibStream,
- Long.valueOf(obj.getOffset()),
- tmpPack.getAbsolutePath()),
- dfe);
- }
- }
- @Override
- public Set<ObjectId> getShallowCommits() throws IOException {
- return ctx.getShallowCommits();
- }
- @Override
- public void close() {
- ctx.close();
- }
- private class StreamLoader extends ObjectLoader {
- private final int type;
- private final long size;
- private final long pos;
- StreamLoader(int type, long size, long pos) {
- this.type = type;
- this.size = size;
- this.pos = pos;
- }
- @Override
- public ObjectStream openStream()
- throws MissingObjectException, IOException {
- int bufsz = buffer().length;
- packOut.seek(pos);
- InputStream fileStream = new FilterInputStream(
- Channels.newInputStream(packOut.file.getChannel())) {
- // atEnd was already set to false by the previous seek, but it's
- // technically possible for a caller to call insert on the
- // inserter in the middle of reading from this stream. Behavior is
- // undefined in this case, so it would arguably be ok to ignore,
- // but it's not hard to at least make an attempt to not corrupt
- // the data.
- @Override
- public int read() throws IOException {
- packOut.atEnd = false;
- return super.read();
- }
- @Override
- public int read(byte[] b) throws IOException {
- packOut.atEnd = false;
- return super.read(b);
- }
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- packOut.atEnd = false;
- return super.read(b,off,len);
- }
- @Override
- public void close() {
- // Never close underlying RandomAccessFile, which lasts the
- // lifetime of the enclosing PackStream.
- }
- };
- return new ObjectStream.Filter(
- type, size,
- new BufferedInputStream(
- new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
- }
- @Override
- public int getType() {
- return type;
- }
- @Override
- public long getSize() {
- return size;
- }
- @Override
- public byte[] getCachedBytes() throws LargeObjectException {
- throw new LargeObjectException.ExceedsLimit(
- getStreamFileThreshold(), size);
- }
- }
- }
- }