DeltaEncoder.java

  1. /*
  2.  * Copyright (C) 2010, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.internal.storage.pack;

  11. import java.io.IOException;
  12. import java.io.OutputStream;

  13. import org.eclipse.jgit.lib.Constants;

  14. /**
  15.  * Encodes an instruction stream for
  16.  * {@link org.eclipse.jgit.internal.storage.pack.BinaryDelta}.
  17.  */
  18. public class DeltaEncoder {
  19.     /**
  20.      * Maximum number of bytes to be copied in pack v2 format.
  21.      * <p>
  22.      * Historical limitations have this at 64k, even though current delta
  23.      * decoders recognize larger copy instructions.
  24.      */
  25.     private static final int MAX_V2_COPY = 0x10000;

  26.     /*
  27.      * Maximum number of bytes to be copied in pack v3 format.
  28.      *
  29.      * Current delta decoders can recognize a copy instruction with a count that
  30.      * is this large, but the historical limitation of {@link MAX_V2_COPY} is
  31.      * still used.
  32.      */
  33.     // private static final int MAX_V3_COPY = (0xff << 16) | (0xff << 8) | 0xff;

  34.     /** Maximum number of bytes used by a copy instruction. */
  35.     private static final int MAX_COPY_CMD_SIZE = 8;

  36.     /** Maximum length that an insert command can encode at once. */
  37.     private static final int MAX_INSERT_DATA_SIZE = 127;

  38.     private final OutputStream out;

  39.     private final byte[] buf = new byte[MAX_COPY_CMD_SIZE * 4];

  40.     private final int limit;

  41.     private int size;

  42.     /**
  43.      * Create an encoder with no upper bound on the instruction stream size.
  44.      *
  45.      * @param out
  46.      *            buffer to store the instructions written.
  47.      * @param baseSize
  48.      *            size of the base object, in bytes.
  49.      * @param resultSize
  50.      *            size of the resulting object, after applying this instruction
  51.      *            stream to the base object, in bytes.
  52.      * @throws java.io.IOException
  53.      *             the output buffer cannot store the instruction stream's
  54.      *             header with the size fields.
  55.      */
  56.     public DeltaEncoder(OutputStream out, long baseSize, long resultSize)
  57.             throws IOException {
  58.         this(out, baseSize, resultSize, 0);
  59.     }

  60.     /**
  61.      * Create an encoder with an upper limit on the instruction size.
  62.      *
  63.      * @param out
  64.      *            buffer to store the instructions written.
  65.      * @param baseSize
  66.      *            size of the base object, in bytes.
  67.      * @param resultSize
  68.      *            size of the resulting object, after applying this instruction
  69.      *            stream to the base object, in bytes.
  70.      * @param limit
  71.      *            maximum number of bytes to write to the out buffer declaring
  72.      *            the stream is over limit and should be discarded. May be 0 to
  73.      *            specify an infinite limit.
  74.      * @throws java.io.IOException
  75.      *             the output buffer cannot store the instruction stream's
  76.      *             header with the size fields.
  77.      */
  78.     public DeltaEncoder(OutputStream out, long baseSize, long resultSize,
  79.             int limit) throws IOException {
  80.         this.out = out;
  81.         this.limit = limit;
  82.         writeVarint(baseSize);
  83.         writeVarint(resultSize);
  84.     }

  85.     private void writeVarint(long sz) throws IOException {
  86.         int p = 0;
  87.         while (sz >= 0x80) {
  88.             buf[p++] = (byte) (0x80 | (((int) sz) & 0x7f));
  89.             sz >>>= 7;
  90.         }
  91.         buf[p++] = (byte) (((int) sz) & 0x7f);
  92.         size += p;
  93.         if (limit == 0 || size < limit)
  94.             out.write(buf, 0, p);
  95.     }

  96.     /**
  97.      * Get current size of the delta stream, in bytes.
  98.      *
  99.      * @return current size of the delta stream, in bytes.
  100.      */
  101.     public int getSize() {
  102.         return size;
  103.     }

  104.     /**
  105.      * Insert a literal string of text, in UTF-8 encoding.
  106.      *
  107.      * @param text
  108.      *            the string to insert.
  109.      * @return true if the insert fits within the limit; false if the insert
  110.      *         would cause the instruction stream to exceed the limit.
  111.      * @throws java.io.IOException
  112.      *             the instruction buffer can't store the instructions.
  113.      */
  114.     public boolean insert(String text) throws IOException {
  115.         return insert(Constants.encode(text));
  116.     }

  117.     /**
  118.      * Insert a literal binary sequence.
  119.      *
  120.      * @param text
  121.      *            the binary to insert.
  122.      * @return true if the insert fits within the limit; false if the insert
  123.      *         would cause the instruction stream to exceed the limit.
  124.      * @throws java.io.IOException
  125.      *             the instruction buffer can't store the instructions.
  126.      */
  127.     public boolean insert(byte[] text) throws IOException {
  128.         return insert(text, 0, text.length);
  129.     }

  130.     /**
  131.      * Insert a literal binary sequence.
  132.      *
  133.      * @param text
  134.      *            the binary to insert.
  135.      * @param off
  136.      *            offset within {@code text} to start copying from.
  137.      * @param cnt
  138.      *            number of bytes to insert.
  139.      * @return true if the insert fits within the limit; false if the insert
  140.      *         would cause the instruction stream to exceed the limit.
  141.      * @throws java.io.IOException
  142.      *             the instruction buffer can't store the instructions.
  143.      */
  144.     public boolean insert(byte[] text, int off, int cnt)
  145.             throws IOException {
  146.         if (cnt <= 0)
  147.             return true;
  148.         if (limit != 0) {
  149.             int hdrs = cnt / MAX_INSERT_DATA_SIZE;
  150.             if (cnt % MAX_INSERT_DATA_SIZE != 0)
  151.                 hdrs++;
  152.             if (limit < size + hdrs + cnt)
  153.                 return false;
  154.         }
  155.         do {
  156.             int n = Math.min(MAX_INSERT_DATA_SIZE, cnt);
  157.             out.write((byte) n);
  158.             out.write(text, off, n);
  159.             off += n;
  160.             cnt -= n;
  161.             size += 1 + n;
  162.         } while (0 < cnt);
  163.         return true;
  164.     }

  165.     /**
  166.      * Create a copy instruction to copy from the base object.
  167.      *
  168.      * @param offset
  169.      *            position in the base object to copy from. This is absolute,
  170.      *            from the beginning of the base.
  171.      * @param cnt
  172.      *            number of bytes to copy.
  173.      * @return true if the copy fits within the limit; false if the copy
  174.      *         would cause the instruction stream to exceed the limit.
  175.      * @throws java.io.IOException
  176.      *             the instruction buffer cannot store the instructions.
  177.      */
  178.     public boolean copy(long offset, int cnt) throws IOException {
  179.         if (cnt == 0)
  180.             return true;

  181.         int p = 0;

  182.         // We cannot encode more than MAX_V2_COPY bytes in a single
  183.         // command, so encode that much and start a new command.
  184.         // This limit is imposed by the pack file format rules.
  185.         //
  186.         while (MAX_V2_COPY < cnt) {
  187.             p = encodeCopy(p, offset, MAX_V2_COPY);
  188.             offset += MAX_V2_COPY;
  189.             cnt -= MAX_V2_COPY;

  190.             if (buf.length < p + MAX_COPY_CMD_SIZE) {
  191.                 if (limit != 0 && limit < size + p)
  192.                     return false;
  193.                 out.write(buf, 0, p);
  194.                 size += p;
  195.                 p = 0;
  196.             }
  197.         }

  198.         p = encodeCopy(p, offset, cnt);
  199.         if (limit != 0 && limit < size + p)
  200.             return false;
  201.         out.write(buf, 0, p);
  202.         size += p;
  203.         return true;
  204.     }

  205.     private int encodeCopy(int p, long offset, int cnt) {
  206.         int cmd = 0x80;
  207.         final int cmdPtr = p++; // save room for the command
  208.         byte b;

  209.         if ((b = (byte) (offset & 0xff)) != 0) {
  210.             cmd |= 0x01;
  211.             buf[p++] = b;
  212.         }
  213.         if ((b = (byte) ((offset >>> 8) & 0xff)) != 0) {
  214.             cmd |= 0x02;
  215.             buf[p++] = b;
  216.         }
  217.         if ((b = (byte) ((offset >>> 16) & 0xff)) != 0) {
  218.             cmd |= 0x04;
  219.             buf[p++] = b;
  220.         }
  221.         if ((b = (byte) ((offset >>> 24) & 0xff)) != 0) {
  222.             cmd |= 0x08;
  223.             buf[p++] = b;
  224.         }

  225.         if (cnt != MAX_V2_COPY) {
  226.             if ((b = (byte) (cnt & 0xff)) != 0) {
  227.                 cmd |= 0x10;
  228.                 buf[p++] = b;
  229.             }
  230.             if ((b = (byte) ((cnt >>> 8) & 0xff)) != 0) {
  231.                 cmd |= 0x20;
  232.                 buf[p++] = b;
  233.             }
  234.             if ((b = (byte) ((cnt >>> 16) & 0xff)) != 0) {
  235.                 cmd |= 0x40;
  236.                 buf[p++] = b;
  237.             }
  238.         }

  239.         buf[cmdPtr] = (byte) cmd;
  240.         return p;
  241.     }
  242. }