AutoLFInputStream.java

  1. /*
  2.  * Copyright (C) 2010, 2013 Marc Strapetz <marc.strapetz@syntevo.com>
  3.  * Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others
  4.  *
  5.  * This program and the accompanying materials are made available under the
  6.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  7.  * https://www.eclipse.org/org/documents/edl-v10.php.
  8.  *
  9.  * SPDX-License-Identifier: BSD-3-Clause
  10.  */

  11. package org.eclipse.jgit.util.io;

  12. import java.io.IOException;
  13. import java.io.InputStream;
  14. import java.util.Arrays;
  15. import java.util.EnumSet;
  16. import java.util.Set;

  17. import org.eclipse.jgit.diff.RawText;

  18. /**
  19.  * An InputStream that normalizes CRLF to LF.
  20.  * <p>
  21.  * Existing single CR are not changed to LF but are retained as is.
  22.  * </p>
  23.  * <p>
  24.  * Optionally, a binary check on the first 8kB is performed and in case of
  25.  * binary files, canonicalization is turned off (for the complete file). If
  26.  * binary checking determines that the input is CR/LF-delimited text and the
  27.  * stream has been created for checkout, canonicalization is also turned off.
  28.  * </p>
  29.  *
  30.  * @since 4.3
  31.  */
  32. public class AutoLFInputStream extends InputStream {

  33.     // This is the former EolCanonicalizingInputStream with a new name in order
  34.     // to have same naming for all LF / CRLF streams.

  35.     /**
  36.      * Flags for controlling auto-detection of binary vs. text content (for
  37.      * text=auto).
  38.      *
  39.      * @since 5.9
  40.      */
  41.     public enum StreamFlag {
  42.         /**
  43.          * Check the first 8kB for binary content and switch off
  44.          * canonicalization off for the whole file if so.
  45.          */
  46.         DETECT_BINARY,
  47.         /**
  48.          * If {@link #DETECT_BINARY} is set, throw an {@link IsBinaryException}
  49.          * if binary content is detected.
  50.          */
  51.         ABORT_IF_BINARY,
  52.         /**
  53.          * If {@link #DETECT_BINARY} is set and content is found to be CR-LF
  54.          * delimited text, switch off canonicalization.
  55.          */
  56.         FOR_CHECKOUT
  57.     }

  58.     private final byte[] single = new byte[1];

  59.     private final byte[] buf = new byte[8 * 1024];

  60.     private final InputStream in;

  61.     private int cnt;

  62.     private int ptr;

  63.     /**
  64.      * Set to {@code true} if no CR/LF processing is to be done: if the input is
  65.      * binary data, or CR/LF-delimited text and {@link StreamFlag#FOR_CHECKOUT}
  66.      * was given.
  67.      */
  68.     private boolean passAsIs;

  69.     /**
  70.      * Set to {@code true} if the input was detected to be binary data.
  71.      */
  72.     private boolean isBinary;

  73.     private boolean detectBinary;

  74.     private final boolean abortIfBinary;

  75.     private final boolean forCheckout;

  76.     /**
  77.      * A special exception thrown when {@link AutoLFInputStream} is told to
  78.      * throw an exception when attempting to read a binary file. The exception
  79.      * may be thrown at any stage during reading.
  80.      *
  81.      * @since 3.3
  82.      */
  83.     public static class IsBinaryException extends IOException {
  84.         private static final long serialVersionUID = 1L;

  85.         IsBinaryException() {
  86.             super();
  87.         }
  88.     }

  89.     /**
  90.      * Factory method for creating an {@link AutoLFInputStream} with the
  91.      * specified {@link StreamFlag flags}.
  92.      *
  93.      * @param in
  94.      *            raw input stream
  95.      * @param flags
  96.      *            {@link StreamFlag}s controlling the stream behavior
  97.      * @return a new {@link AutoLFInputStream}
  98.      * @since 5.9
  99.      */
  100.     public static AutoLFInputStream create(InputStream in,
  101.             StreamFlag... flags) {
  102.         if (flags == null) {
  103.             return new AutoLFInputStream(in, null);
  104.         }
  105.         EnumSet<StreamFlag> set = EnumSet.noneOf(StreamFlag.class);
  106.         set.addAll(Arrays.asList(flags));
  107.         return new AutoLFInputStream(in, set);
  108.     }

  109.     /**
  110.      * Creates a new InputStream, wrapping the specified stream.
  111.      *
  112.      * @param in
  113.      *            raw input stream
  114.      * @param flags
  115.      *            {@link StreamFlag}s controlling the stream behavior;
  116.      *            {@code null} is treated as an empty set
  117.      * @since 5.9
  118.      */
  119.     public AutoLFInputStream(InputStream in, Set<StreamFlag> flags) {
  120.         this.in = in;
  121.         this.detectBinary = flags != null
  122.                 && flags.contains(StreamFlag.DETECT_BINARY);
  123.         this.abortIfBinary = flags != null
  124.                 && flags.contains(StreamFlag.ABORT_IF_BINARY);
  125.         this.forCheckout = flags != null
  126.                 && flags.contains(StreamFlag.FOR_CHECKOUT);
  127.     }

  128.     /**
  129.      * Creates a new InputStream, wrapping the specified stream.
  130.      *
  131.      * @param in
  132.      *            raw input stream
  133.      * @param detectBinary
  134.      *            whether binaries should be detected
  135.      * @since 2.0
  136.      * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
  137.      *             instead
  138.      */
  139.     @Deprecated
  140.     public AutoLFInputStream(InputStream in, boolean detectBinary) {
  141.         this(in, detectBinary, false);
  142.     }

  143.     /**
  144.      * Creates a new InputStream, wrapping the specified stream.
  145.      *
  146.      * @param in
  147.      *            raw input stream
  148.      * @param detectBinary
  149.      *            whether binaries should be detected
  150.      * @param abortIfBinary
  151.      *            throw an IOException if the file is binary
  152.      * @since 3.3
  153.      * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
  154.      *             instead
  155.      */
  156.     @Deprecated
  157.     public AutoLFInputStream(InputStream in, boolean detectBinary,
  158.             boolean abortIfBinary) {
  159.         this.in = in;
  160.         this.detectBinary = detectBinary;
  161.         this.abortIfBinary = abortIfBinary;
  162.         this.forCheckout = false;
  163.     }

  164.     /** {@inheritDoc} */
  165.     @Override
  166.     public int read() throws IOException {
  167.         final int read = read(single, 0, 1);
  168.         return read == 1 ? single[0] & 0xff : -1;
  169.     }

  170.     /** {@inheritDoc} */
  171.     @Override
  172.     public int read(byte[] bs, int off, int len)
  173.             throws IOException {
  174.         if (len == 0)
  175.             return 0;

  176.         if (cnt == -1)
  177.             return -1;

  178.         int i = off;
  179.         final int end = off + len;

  180.         while (i < end) {
  181.             if (ptr == cnt && !fillBuffer()) {
  182.                 break;
  183.             }

  184.             byte b = buf[ptr++];
  185.             if (passAsIs || b != '\r') {
  186.                 // Logic for binary files ends here
  187.                 bs[i++] = b;
  188.                 continue;
  189.             }

  190.             if (ptr == cnt && !fillBuffer()) {
  191.                 bs[i++] = '\r';
  192.                 break;
  193.             }

  194.             if (buf[ptr] == '\n') {
  195.                 bs[i++] = '\n';
  196.                 ptr++;
  197.             } else
  198.                 bs[i++] = '\r';
  199.         }

  200.         return i == off ? -1 : i - off;
  201.     }

  202.     /**
  203.      * Whether the stream has detected as a binary so far.
  204.      *
  205.      * @return true if the stream has detected as a binary so far.
  206.      * @since 3.3
  207.      */
  208.     public boolean isBinary() {
  209.         return isBinary;
  210.     }

  211.     /** {@inheritDoc} */
  212.     @Override
  213.     public void close() throws IOException {
  214.         in.close();
  215.     }

  216.     private boolean fillBuffer() throws IOException {
  217.         cnt = 0;
  218.         while (cnt < buf.length) {
  219.             int n = in.read(buf, cnt, buf.length - cnt);
  220.             if (n < 0) {
  221.                 break;
  222.             }
  223.             cnt += n;
  224.         }
  225.         if (cnt < 1) {
  226.             cnt = -1;
  227.             return false;
  228.         }
  229.         if (detectBinary) {
  230.             isBinary = RawText.isBinary(buf, cnt);
  231.             passAsIs = isBinary;
  232.             detectBinary = false;
  233.             if (isBinary && abortIfBinary) {
  234.                 throw new IsBinaryException();
  235.             }
  236.             if (!passAsIs && forCheckout) {
  237.                 passAsIs = RawText.isCrLfText(buf, cnt);
  238.             }
  239.         }
  240.         ptr = 0;
  241.         return true;
  242.     }
  243. }