AutoLFInputStream.java
/*
* Copyright (C) 2010, 2013 Marc Strapetz <marc.strapetz@syntevo.com>
* Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util.io;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.Set;
import org.eclipse.jgit.diff.RawText;
/**
* An InputStream that normalizes CRLF to LF.
* <p>
* Existing single CR are not changed to LF but are retained as is.
* </p>
* <p>
* Optionally, a binary check on the first 8kB is performed and in case of
* binary files, canonicalization is turned off (for the complete file). If
* binary checking determines that the input is CR/LF-delimited text and the
* stream has been created for checkout, canonicalization is also turned off.
* </p>
*
* @since 4.3
*/
public class AutoLFInputStream extends InputStream {
// This is the former EolCanonicalizingInputStream with a new name in order
// to have same naming for all LF / CRLF streams.
/**
* Flags for controlling auto-detection of binary vs. text content (for
* text=auto).
*
* @since 5.9
*/
public enum StreamFlag {
/**
* Check the first 8kB for binary content and switch off
* canonicalization off for the whole file if so.
*/
DETECT_BINARY,
/**
* If {@link #DETECT_BINARY} is set, throw an {@link IsBinaryException}
* if binary content is detected.
*/
ABORT_IF_BINARY,
/**
* If {@link #DETECT_BINARY} is set and content is found to be CR-LF
* delimited text, switch off canonicalization.
*/
FOR_CHECKOUT
}
private final byte[] single = new byte[1];
private final byte[] buf = new byte[8 * 1024];
private final InputStream in;
private int cnt;
private int ptr;
/**
* Set to {@code true} if no CR/LF processing is to be done: if the input is
* binary data, or CR/LF-delimited text and {@link StreamFlag#FOR_CHECKOUT}
* was given.
*/
private boolean passAsIs;
/**
* Set to {@code true} if the input was detected to be binary data.
*/
private boolean isBinary;
private boolean detectBinary;
private final boolean abortIfBinary;
private final boolean forCheckout;
/**
* A special exception thrown when {@link AutoLFInputStream} is told to
* throw an exception when attempting to read a binary file. The exception
* may be thrown at any stage during reading.
*
* @since 3.3
*/
public static class IsBinaryException extends IOException {
private static final long serialVersionUID = 1L;
IsBinaryException() {
super();
}
}
/**
* Factory method for creating an {@link AutoLFInputStream} with the
* specified {@link StreamFlag flags}.
*
* @param in
* raw input stream
* @param flags
* {@link StreamFlag}s controlling the stream behavior
* @return a new {@link AutoLFInputStream}
* @since 5.9
*/
public static AutoLFInputStream create(InputStream in,
StreamFlag... flags) {
if (flags == null) {
return new AutoLFInputStream(in, null);
}
EnumSet<StreamFlag> set = EnumSet.noneOf(StreamFlag.class);
set.addAll(Arrays.asList(flags));
return new AutoLFInputStream(in, set);
}
/**
* Creates a new InputStream, wrapping the specified stream.
*
* @param in
* raw input stream
* @param flags
* {@link StreamFlag}s controlling the stream behavior;
* {@code null} is treated as an empty set
* @since 5.9
*/
public AutoLFInputStream(InputStream in, Set<StreamFlag> flags) {
this.in = in;
this.detectBinary = flags != null
&& flags.contains(StreamFlag.DETECT_BINARY);
this.abortIfBinary = flags != null
&& flags.contains(StreamFlag.ABORT_IF_BINARY);
this.forCheckout = flags != null
&& flags.contains(StreamFlag.FOR_CHECKOUT);
}
/**
* Creates a new InputStream, wrapping the specified stream.
*
* @param in
* raw input stream
* @param detectBinary
* whether binaries should be detected
* @since 2.0
* @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
* instead
*/
@Deprecated
public AutoLFInputStream(InputStream in, boolean detectBinary) {
this(in, detectBinary, false);
}
/**
* Creates a new InputStream, wrapping the specified stream.
*
* @param in
* raw input stream
* @param detectBinary
* whether binaries should be detected
* @param abortIfBinary
* throw an IOException if the file is binary
* @since 3.3
* @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
* instead
*/
@Deprecated
public AutoLFInputStream(InputStream in, boolean detectBinary,
boolean abortIfBinary) {
this.in = in;
this.detectBinary = detectBinary;
this.abortIfBinary = abortIfBinary;
this.forCheckout = false;
}
/** {@inheritDoc} */
@Override
public int read() throws IOException {
final int read = read(single, 0, 1);
return read == 1 ? single[0] & 0xff : -1;
}
/** {@inheritDoc} */
@Override
public int read(byte[] bs, int off, int len)
throws IOException {
if (len == 0)
return 0;
if (cnt == -1)
return -1;
int i = off;
final int end = off + len;
while (i < end) {
if (ptr == cnt && !fillBuffer()) {
break;
}
byte b = buf[ptr++];
if (passAsIs || b != '\r') {
// Logic for binary files ends here
bs[i++] = b;
continue;
}
if (ptr == cnt && !fillBuffer()) {
bs[i++] = '\r';
break;
}
if (buf[ptr] == '\n') {
bs[i++] = '\n';
ptr++;
} else
bs[i++] = '\r';
}
return i == off ? -1 : i - off;
}
/**
* Whether the stream has detected as a binary so far.
*
* @return true if the stream has detected as a binary so far.
* @since 3.3
*/
public boolean isBinary() {
return isBinary;
}
/** {@inheritDoc} */
@Override
public void close() throws IOException {
in.close();
}
private boolean fillBuffer() throws IOException {
cnt = 0;
while (cnt < buf.length) {
int n = in.read(buf, cnt, buf.length - cnt);
if (n < 0) {
break;
}
cnt += n;
}
if (cnt < 1) {
cnt = -1;
return false;
}
if (detectBinary) {
isBinary = RawText.isBinary(buf, cnt);
passAsIs = isBinary;
detectBinary = false;
if (isBinary && abortIfBinary) {
throw new IsBinaryException();
}
if (!passAsIs && forCheckout) {
passAsIs = RawText.isCrLfText(buf, cnt);
}
}
ptr = 0;
return true;
}
}