PackInserter.java

/*
 * Copyright (C) 2017, Google Inc.
 * and other copyright owners as documented in the project's IP log.
 *
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Distribution License v1.0 which
 * accompanies this distribution, is reproduced below, and is
 * available at http://www.eclipse.org/org/documents/edl-v10.php
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following
 *   disclaimer in the documentation and/or other materials provided
 *   with the distribution.
 *
 * - Neither the name of the Eclipse Foundation, Inc. nor the
 *   names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.eclipse.jgit.internal.storage.file;

import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJ_OFS_DELTA;
import static org.eclipse.jgit.lib.Constants.OBJ_REF_DELTA;

import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.channels.Channels;
import java.text.MessageFormat;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;

import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.InflaterCache;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.transport.PackParser;
import org.eclipse.jgit.transport.PackedObjectInfo;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.FileUtils;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.NB;
import org.eclipse.jgit.util.io.CountingOutputStream;
import org.eclipse.jgit.util.sha1.SHA1;

/**
 * Object inserter that inserts one pack per call to {@link #flush()}, and never
 * inserts loose objects.
 */
public class PackInserter extends ObjectInserter {
    /** Always produce version 2 indexes, to get CRC data. */
    private static final int INDEX_VERSION = 2;

    private final ObjectDirectory db;

    private List<PackedObjectInfo> objectList;
    private ObjectIdOwnerMap<PackedObjectInfo> objectMap;
    private boolean rollback;
    private boolean checkExisting = true;

    private int compression = Deflater.BEST_COMPRESSION;
    private File tmpPack;
    private PackStream packOut;
    private Inflater cachedInflater;

    private PackConfig pconfig;

    PackInserter(ObjectDirectory db) {
        this.db = db;
        this.pconfig = new PackConfig(db.getConfig());
    }

    /**
     * Whether to check if objects exist in the repo
     *
     * @param check
     *            if {@code false}, will write out possibly-duplicate objects
     *            without first checking whether they exist in the repo; default
     *            is true.
     */
    public void checkExisting(boolean check) {
        checkExisting = check;
    }

    /**
     * Set compression level for zlib deflater.
     *
     * @param compression
     *            compression level for zlib deflater.
     */
    public void setCompressionLevel(int compression) {
        this.compression = compression;
    }

    int getBufferSize() {
        return buffer().length;
    }

    /** {@inheritDoc} */
    @Override
    public ObjectId insert(int type, byte[] data, int off, int len)
            throws IOException {
        ObjectId id = idFor(type, data, off, len);
        if (objectMap != null && objectMap.contains(id)) {
            return id;
        }
        // Ignore loose objects, which are potentially unreachable.
        if (checkExisting && db.hasPackedObject(id)) {
            return id;
        }

        long offset = beginObject(type, len);
        packOut.compress.write(data, off, len);
        packOut.compress.finish();
        return endObject(id, offset);
    }

    /** {@inheritDoc} */
    @Override
    public ObjectId insert(int type, long len, InputStream in)
            throws IOException {
        byte[] buf = buffer();
        if (len <= buf.length) {
            IO.readFully(in, buf, 0, (int) len);
            return insert(type, buf, 0, (int) len);
        }

        long offset = beginObject(type, len);
        SHA1 md = digest();
        md.update(Constants.encodedTypeString(type));
        md.update((byte) ' ');
        md.update(Constants.encodeASCII(len));
        md.update((byte) 0);

        while (0 < len) {
            int n = in.read(buf, 0, (int) Math.min(buf.length, len));
            if (n <= 0) {
                throw new EOFException();
            }
            md.update(buf, 0, n);
            packOut.compress.write(buf, 0, n);
            len -= n;
        }
        packOut.compress.finish();
        return endObject(md.toObjectId(), offset);
    }

    private long beginObject(int type, long len) throws IOException {
        if (packOut == null) {
            beginPack();
        }
        long offset = packOut.getOffset();
        packOut.beginObject(type, len);
        return offset;
    }

    private ObjectId endObject(ObjectId id, long offset) {
        PackedObjectInfo obj = new PackedObjectInfo(id);
        obj.setOffset(offset);
        obj.setCRC((int) packOut.crc32.getValue());
        objectList.add(obj);
        objectMap.addIfAbsent(obj);
        return id;
    }

    private static File idxFor(File packFile) {
        String p = packFile.getName();
        return new File(
                packFile.getParentFile(),
                p.substring(0, p.lastIndexOf('.')) + ".idx"); //$NON-NLS-1$
    }

    private void beginPack() throws IOException {
        objectList = new BlockList<>();
        objectMap = new ObjectIdOwnerMap<>();

        rollback = true;
        tmpPack = File.createTempFile("insert_", ".pack", db.getDirectory()); //$NON-NLS-1$ //$NON-NLS-2$
        packOut = new PackStream(tmpPack);

        // Write the header as though it were a single object pack.
        packOut.write(packOut.hdrBuf, 0, writePackHeader(packOut.hdrBuf, 1));
    }

    private static int writePackHeader(byte[] buf, int objectCount) {
        System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4);
        NB.encodeInt32(buf, 4, 2); // Always use pack version 2.
        NB.encodeInt32(buf, 8, objectCount);
        return 12;
    }

    /** {@inheritDoc} */
    @Override
    public PackParser newPackParser(InputStream in) {
        throw new UnsupportedOperationException();
    }

    /** {@inheritDoc} */
    @Override
    public ObjectReader newReader() {
        return new Reader();
    }

    /** {@inheritDoc} */
    @Override
    public void flush() throws IOException {
        if (tmpPack == null) {
            return;
        }

        if (packOut == null) {
            throw new IOException();
        }

        byte[] packHash;
        try {
            packHash = packOut.finishPack();
        } finally {
            packOut = null;
        }

        Collections.sort(objectList);
        File tmpIdx = idxFor(tmpPack); // TODO(nasserg) Use PackFile?
        writePackIndex(tmpIdx, packHash, objectList);

        PackFile realPack = new PackFile(db.getPackDirectory(),
                computeName(objectList), PackExt.PACK);
        db.closeAllPackHandles(realPack);
        tmpPack.setReadOnly();
        FileUtils.rename(tmpPack, realPack, ATOMIC_MOVE);

        PackFile realIdx = realPack.create(PackExt.INDEX);
        tmpIdx.setReadOnly();
        try {
            FileUtils.rename(tmpIdx, realIdx, ATOMIC_MOVE);
        } catch (IOException e) {
            File newIdx = new File(
                    realIdx.getParentFile(), realIdx.getName() + ".new"); //$NON-NLS-1$
            try {
                FileUtils.rename(tmpIdx, newIdx, ATOMIC_MOVE);
            } catch (IOException e2) {
                newIdx = tmpIdx;
                e = e2;
            }
            throw new IOException(MessageFormat.format(
                    JGitText.get().panicCantRenameIndexFile, newIdx,
                    realIdx), e);
        }

        boolean interrupted = false;
        try {
            FileSnapshot snapshot = FileSnapshot.save(realPack);
            if (pconfig.doWaitPreventRacyPack(snapshot.size())) {
                snapshot.waitUntilNotRacy();
            }
        } catch (InterruptedException e) {
            interrupted = true;
        }
        try {
            db.openPack(realPack);
            rollback = false;
        } finally {
            clear();
            if (interrupted) {
                // Re-set interrupted flag
                Thread.currentThread().interrupt();
            }
        }
    }

    private static void writePackIndex(File idx, byte[] packHash,
            List<PackedObjectInfo> list) throws IOException {
        try (OutputStream os = new FileOutputStream(idx)) {
            PackIndexWriter w = PackIndexWriter.createVersion(os, INDEX_VERSION);
            w.write(list, packHash);
        }
    }

    private ObjectId computeName(List<PackedObjectInfo> list) {
        SHA1 md = digest().reset();
        byte[] buf = buffer();
        for (PackedObjectInfo otp : list) {
            otp.copyRawTo(buf, 0);
            md.update(buf, 0, OBJECT_ID_LENGTH);
        }
        return ObjectId.fromRaw(md.digest());
    }

    /** {@inheritDoc} */
    @Override
    public void close() {
        try {
            if (packOut != null) {
                try {
                    packOut.close();
                } catch (IOException err) {
                    // Ignore a close failure, the pack should be removed.
                }
            }
            if (rollback && tmpPack != null) {
                try {
                    FileUtils.delete(tmpPack);
                } catch (IOException e) {
                    // Still delete idx.
                }
                try {
                    FileUtils.delete(idxFor(tmpPack));
                } catch (IOException e) {
                    // Ignore error deleting temp idx.
                }
                rollback = false;
            }
        } finally {
            clear();
            try {
                InflaterCache.release(cachedInflater);
            } finally {
                cachedInflater = null;
            }
        }
    }

    private void clear() {
        objectList = null;
        objectMap = null;
        tmpPack = null;
        packOut = null;
    }

    private Inflater inflater() {
        if (cachedInflater == null) {
            cachedInflater = InflaterCache.get();
        } else {
            cachedInflater.reset();
        }
        return cachedInflater;
    }

    /**
     * Stream that writes to a pack file.
     * <p>
     * Backed by two views of the same open file descriptor: a random-access file,
     * and an output stream. Seeking in the file causes subsequent writes to the
     * output stream to occur wherever the file pointer is pointing, so we need to
     * take care to always seek to the end of the file before writing a new
     * object.
     * <p>
     * Callers should always use {@link #seek(long)} to seek, rather than reaching
     * into the file member. As long as this contract is followed, calls to {@link
     * #write(byte[], int, int)} are guaranteed to write at the end of the file,
     * even if there have been intermediate seeks.
     */
    private class PackStream extends OutputStream {
        final byte[] hdrBuf;
        final CRC32 crc32;
        final DeflaterOutputStream compress;

        private final RandomAccessFile file;
        private final CountingOutputStream out;
        private final Deflater deflater;

        private boolean atEnd;

        PackStream(File pack) throws IOException {
            file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
            out = new CountingOutputStream(new FileOutputStream(file.getFD()));
            deflater = new Deflater(compression);
            compress = new DeflaterOutputStream(this, deflater, 8192);
            hdrBuf = new byte[32];
            crc32 = new CRC32();
            atEnd = true;
        }

        long getOffset() {
            // This value is accurate as long as we only ever write to the end of the
            // file, and don't seek back to overwrite any previous segments. Although
            // this is subtle, storing the stream counter this way is still preferable
            // to returning file.length() here, as it avoids a syscall and possible
            // IOException.
            return out.getCount();
        }

        void seek(long offset) throws IOException {
            file.seek(offset);
            atEnd = false;
        }

        void beginObject(int objectType, long length) throws IOException {
            crc32.reset();
            deflater.reset();
            write(hdrBuf, 0, encodeTypeSize(objectType, length));
        }

        private int encodeTypeSize(int type, long rawLength) {
            long nextLength = rawLength >>> 4;
            hdrBuf[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (type << 4) | (rawLength & 0x0F));
            rawLength = nextLength;
            int n = 1;
            while (rawLength > 0) {
                nextLength >>>= 7;
                hdrBuf[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (rawLength & 0x7F));
                rawLength = nextLength;
            }
            return n;
        }

        @Override
        public void write(int b) throws IOException {
            hdrBuf[0] = (byte) b;
            write(hdrBuf, 0, 1);
        }

        @Override
        public void write(byte[] data, int off, int len) throws IOException {
            crc32.update(data, off, len);
            if (!atEnd) {
                file.seek(file.length());
                atEnd = true;
            }
            out.write(data, off, len);
        }

        byte[] finishPack() throws IOException {
            // Overwrite placeholder header with actual object count, then hash. This
            // method intentionally uses direct seek/write calls rather than the
            // wrappers which keep track of atEnd. This leaves atEnd, the file
            // pointer, and out's counter in an inconsistent state; that's ok, since
            // this method closes the file anyway.
            try {
                file.seek(0);
                out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));

                byte[] buf = buffer();
                SHA1 md = digest().reset();
                file.seek(0);
                while (true) {
                    int r = file.read(buf);
                    if (r < 0) {
                        break;
                    }
                    md.update(buf, 0, r);
                }
                byte[] packHash = md.digest();
                out.write(packHash, 0, packHash.length);
                return packHash;
            } finally {
                close();
            }
        }

        @Override
        public void close() throws IOException {
            deflater.end();
            try {
                out.close();
            } finally {
                file.close();
            }
        }

        byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
            byte[] dstbuf;
            try {
                dstbuf = new byte[len];
            } catch (OutOfMemoryError noMemory) {
                return null; // Caller will switch to large object streaming.
            }

            byte[] srcbuf = buffer();
            Inflater inf = inflater();
            filePos += setInput(filePos, inf, srcbuf);
            for (int dstoff = 0;;) {
                int n = inf.inflate(dstbuf, dstoff, dstbuf.length - dstoff);
                dstoff += n;
                if (inf.finished()) {
                    return dstbuf;
                }
                if (inf.needsInput()) {
                    filePos += setInput(filePos, inf, srcbuf);
                } else if (n == 0) {
                    throw new DataFormatException();
                }
            }
        }

        private int setInput(long filePos, Inflater inf, byte[] buf)
                throws IOException {
            if (file.getFilePointer() != filePos) {
                seek(filePos);
            }
            int n = file.read(buf);
            if (n < 0) {
                throw new EOFException(JGitText.get().unexpectedEofInPack);
            }
            inf.setInput(buf, 0, n);
            return n;
        }
    }

    private class Reader extends ObjectReader {
        private final ObjectReader ctx;

        private Reader() {
            ctx = db.newReader();
            setStreamFileThreshold(ctx.getStreamFileThreshold());
        }

        @Override
        public ObjectReader newReader() {
            return db.newReader();
        }

        @Override
        public ObjectInserter getCreatedFromInserter() {
            return PackInserter.this;
        }

        @Override
        public Collection<ObjectId> resolve(AbbreviatedObjectId id)
                throws IOException {
            Collection<ObjectId> stored = ctx.resolve(id);
            if (objectList == null) {
                return stored;
            }

            Set<ObjectId> r = new HashSet<>(stored.size() + 2);
            r.addAll(stored);
            for (PackedObjectInfo obj : objectList) {
                if (id.prefixCompare(obj) == 0) {
                    r.add(obj.copy());
                }
            }
            return r;
        }

        @Override
        public ObjectLoader open(AnyObjectId objectId, int typeHint)
                throws MissingObjectException, IncorrectObjectTypeException,
                IOException {
            if (objectMap == null) {
                return ctx.open(objectId, typeHint);
            }

            PackedObjectInfo obj = objectMap.get(objectId);
            if (obj == null) {
                return ctx.open(objectId, typeHint);
            }

            byte[] buf = buffer();
            packOut.seek(obj.getOffset());
            int cnt = packOut.file.read(buf, 0, 20);
            if (cnt <= 0) {
                throw new EOFException(JGitText.get().unexpectedEofInPack);
            }

            int c = buf[0] & 0xff;
            int type = (c >> 4) & 7;
            if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
                throw new IOException(MessageFormat.format(
                        JGitText.get().cannotReadBackDelta, Integer.toString(type)));
            }
            if (typeHint != OBJ_ANY && type != typeHint) {
                throw new IncorrectObjectTypeException(objectId.copy(), typeHint);
            }

            long sz = c & 0x0f;
            int ptr = 1;
            int shift = 4;
            while ((c & 0x80) != 0) {
                if (ptr >= cnt) {
                    throw new EOFException(JGitText.get().unexpectedEofInPack);
                }
                c = buf[ptr++] & 0xff;
                sz += ((long) (c & 0x7f)) << shift;
                shift += 7;
            }

            long zpos = obj.getOffset() + ptr;
            if (sz < getStreamFileThreshold()) {
                byte[] data = inflate(obj, zpos, (int) sz);
                if (data != null) {
                    return new ObjectLoader.SmallObject(type, data);
                }
            }
            return new StreamLoader(type, sz, zpos);
        }

        private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
                throws IOException, CorruptObjectException {
            try {
                return packOut.inflate(zpos, sz);
            } catch (DataFormatException dfe) {
                throw new CorruptObjectException(
                        MessageFormat.format(
                                JGitText.get().objectAtHasBadZlibStream,
                                Long.valueOf(obj.getOffset()),
                                tmpPack.getAbsolutePath()),
                        dfe);
            }
        }

        @Override
        public Set<ObjectId> getShallowCommits() throws IOException {
            return ctx.getShallowCommits();
        }

        @Override
        public void close() {
            ctx.close();
        }

        private class StreamLoader extends ObjectLoader {
            private final int type;
            private final long size;
            private final long pos;

            StreamLoader(int type, long size, long pos) {
                this.type = type;
                this.size = size;
                this.pos = pos;
            }

            @Override
            public ObjectStream openStream()
                    throws MissingObjectException, IOException {
                int bufsz = buffer().length;
                packOut.seek(pos);

                InputStream fileStream = new FilterInputStream(
                        Channels.newInputStream(packOut.file.getChannel())) {
                            // atEnd was already set to false by the previous seek, but it's
                            // technically possible for a caller to call insert on the
                            // inserter in the middle of reading from this stream. Behavior is
                            // undefined in this case, so it would arguably be ok to ignore,
                            // but it's not hard to at least make an attempt to not corrupt
                            // the data.
                            @Override
                            public int read() throws IOException {
                                packOut.atEnd = false;
                                return super.read();
                            }

                            @Override
                            public int read(byte[] b) throws IOException {
                                packOut.atEnd = false;
                                return super.read(b);
                            }

                            @Override
                            public int read(byte[] b, int off, int len) throws IOException {
                                packOut.atEnd = false;
                                return super.read(b,off,len);
                            }

                            @Override
                            public void close() {
                                // Never close underlying RandomAccessFile, which lasts the
                                // lifetime of the enclosing PackStream.
                            }
                        };
                return new ObjectStream.Filter(
                        type, size,
                        new BufferedInputStream(
                                new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
            }

            @Override
            public int getType() {
                return type;
            }

            @Override
            public long getSize() {
                return size;
            }

            @Override
            public byte[] getCachedBytes() throws LargeObjectException {
                throw new LargeObjectException.ExceedsLimit(
                        getStreamFileThreshold(), size);
            }
        }
    }
}