View Javadoc
1   /*
2    * Copyright (C) 2010, 2013 Marc Strapetz <marc.strapetz@syntevo.com>
3    * Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others
4    *
5    * This program and the accompanying materials are made available under the
6    * terms of the Eclipse Distribution License v. 1.0 which is available at
7    * https://www.eclipse.org/org/documents/edl-v10.php.
8    *
9    * SPDX-License-Identifier: BSD-3-Clause
10   */
11  
12  package org.eclipse.jgit.util.io;
13  
14  import java.io.IOException;
15  import java.io.InputStream;
16  import java.util.Arrays;
17  import java.util.EnumSet;
18  import java.util.Set;
19  
20  import org.eclipse.jgit.diff.RawText;
21  
22  /**
23   * An InputStream that normalizes CRLF to LF.
24   * <p>
25   * Existing single CR are not changed to LF but are retained as is.
26   * </p>
27   * <p>
28   * Optionally, a binary check on the first 8kB is performed and in case of
29   * binary files, canonicalization is turned off (for the complete file). If
30   * binary checking determines that the input is CR/LF-delimited text and the
31   * stream has been created for checkout, canonicalization is also turned off.
32   * </p>
33   *
34   * @since 4.3
35   */
36  public class AutoLFInputStream extends InputStream {
37  
38  	// This is the former EolCanonicalizingInputStream with a new name in order
39  	// to have same naming for all LF / CRLF streams.
40  
41  	/**
42  	 * Flags for controlling auto-detection of binary vs. text content (for
43  	 * text=auto).
44  	 *
45  	 * @since 5.9
46  	 */
47  	public enum StreamFlag {
48  		/**
49  		 * Check the first 8kB for binary content and switch off
50  		 * canonicalization off for the whole file if so.
51  		 */
52  		DETECT_BINARY,
53  		/**
54  		 * If {@link #DETECT_BINARY} is set, throw an {@link IsBinaryException}
55  		 * if binary content is detected.
56  		 */
57  		ABORT_IF_BINARY,
58  		/**
59  		 * If {@link #DETECT_BINARY} is set and content is found to be CR-LF
60  		 * delimited text, switch off canonicalization.
61  		 */
62  		FOR_CHECKOUT
63  	}
64  
65  	private final byte[] single = new byte[1];
66  
67  	private final byte[] buf = new byte[8 * 1024];
68  
69  	private final InputStream in;
70  
71  	private int cnt;
72  
73  	private int ptr;
74  
75  	/**
76  	 * Set to {@code true} if no CR/LF processing is to be done: if the input is
77  	 * binary data, or CR/LF-delimited text and {@link StreamFlag#FOR_CHECKOUT}
78  	 * was given.
79  	 */
80  	private boolean passAsIs;
81  
82  	/**
83  	 * Set to {@code true} if the input was detected to be binary data.
84  	 */
85  	private boolean isBinary;
86  
87  	private boolean detectBinary;
88  
89  	private final boolean abortIfBinary;
90  
91  	private final boolean forCheckout;
92  
93  	/**
94  	 * A special exception thrown when {@link AutoLFInputStream} is told to
95  	 * throw an exception when attempting to read a binary file. The exception
96  	 * may be thrown at any stage during reading.
97  	 *
98  	 * @since 3.3
99  	 */
100 	public static class IsBinaryException extends IOException {
101 		private static final long serialVersionUID = 1L;
102 
103 		IsBinaryException() {
104 			super();
105 		}
106 	}
107 
108 	/**
109 	 * Factory method for creating an {@link AutoLFInputStream} with the
110 	 * specified {@link StreamFlag flags}.
111 	 *
112 	 * @param in
113 	 *            raw input stream
114 	 * @param flags
115 	 *            {@link StreamFlag}s controlling the stream behavior
116 	 * @return a new {@link AutoLFInputStream}
117 	 * @since 5.9
118 	 */
119 	public static AutoLFInputStream create(InputStream in,
120 			StreamFlag... flags) {
121 		if (flags == null) {
122 			return new AutoLFInputStream(in, null);
123 		}
124 		EnumSet<StreamFlag> set = EnumSet.noneOf(StreamFlag.class);
125 		set.addAll(Arrays.asList(flags));
126 		return new AutoLFInputStream(in, set);
127 	}
128 
129 	/**
130 	 * Creates a new InputStream, wrapping the specified stream.
131 	 *
132 	 * @param in
133 	 *            raw input stream
134 	 * @param flags
135 	 *            {@link StreamFlag}s controlling the stream behavior;
136 	 *            {@code null} is treated as an empty set
137 	 * @since 5.9
138 	 */
139 	public AutoLFInputStream(InputStream in, Set<StreamFlag> flags) {
140 		this.in = in;
141 		this.detectBinary = flags != null
142 				&& flags.contains(StreamFlag.DETECT_BINARY);
143 		this.abortIfBinary = flags != null
144 				&& flags.contains(StreamFlag.ABORT_IF_BINARY);
145 		this.forCheckout = flags != null
146 				&& flags.contains(StreamFlag.FOR_CHECKOUT);
147 	}
148 
149 	/**
150 	 * Creates a new InputStream, wrapping the specified stream.
151 	 *
152 	 * @param in
153 	 *            raw input stream
154 	 * @param detectBinary
155 	 *            whether binaries should be detected
156 	 * @since 2.0
157 	 * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
158 	 *             instead
159 	 */
160 	@Deprecated
161 	public AutoLFInputStream(InputStream in, boolean detectBinary) {
162 		this(in, detectBinary, false);
163 	}
164 
165 	/**
166 	 * Creates a new InputStream, wrapping the specified stream.
167 	 *
168 	 * @param in
169 	 *            raw input stream
170 	 * @param detectBinary
171 	 *            whether binaries should be detected
172 	 * @param abortIfBinary
173 	 *            throw an IOException if the file is binary
174 	 * @since 3.3
175 	 * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
176 	 *             instead
177 	 */
178 	@Deprecated
179 	public AutoLFInputStream(InputStream in, boolean detectBinary,
180 			boolean abortIfBinary) {
181 		this.in = in;
182 		this.detectBinary = detectBinary;
183 		this.abortIfBinary = abortIfBinary;
184 		this.forCheckout = false;
185 	}
186 
187 	/** {@inheritDoc} */
188 	@Override
189 	public int read() throws IOException {
190 		final int read = read(single, 0, 1);
191 		return read == 1 ? single[0] & 0xff : -1;
192 	}
193 
194 	/** {@inheritDoc} */
195 	@Override
196 	public int read(byte[] bs, int off, int len)
197 			throws IOException {
198 		if (len == 0)
199 			return 0;
200 
201 		if (cnt == -1)
202 			return -1;
203 
204 		int i = off;
205 		final int end = off + len;
206 
207 		while (i < end) {
208 			if (ptr == cnt && !fillBuffer()) {
209 				break;
210 			}
211 
212 			byte b = buf[ptr++];
213 			if (passAsIs || b != '\r') {
214 				// Logic for binary files ends here
215 				bs[i++] = b;
216 				continue;
217 			}
218 
219 			if (ptr == cnt && !fillBuffer()) {
220 				bs[i++] = '\r';
221 				break;
222 			}
223 
224 			if (buf[ptr] == '\n') {
225 				bs[i++] = '\n';
226 				ptr++;
227 			} else
228 				bs[i++] = '\r';
229 		}
230 
231 		return i == off ? -1 : i - off;
232 	}
233 
234 	/**
235 	 * Whether the stream has detected as a binary so far.
236 	 *
237 	 * @return true if the stream has detected as a binary so far.
238 	 * @since 3.3
239 	 */
240 	public boolean isBinary() {
241 		return isBinary;
242 	}
243 
244 	/** {@inheritDoc} */
245 	@Override
246 	public void close() throws IOException {
247 		in.close();
248 	}
249 
250 	private boolean fillBuffer() throws IOException {
251 		cnt = 0;
252 		while (cnt < buf.length) {
253 			int n = in.read(buf, cnt, buf.length - cnt);
254 			if (n < 0) {
255 				break;
256 			}
257 			cnt += n;
258 		}
259 		if (cnt < 1) {
260 			cnt = -1;
261 			return false;
262 		}
263 		if (detectBinary) {
264 			isBinary = RawText.isBinary(buf, cnt);
265 			passAsIs = isBinary;
266 			detectBinary = false;
267 			if (isBinary && abortIfBinary) {
268 				throw new IsBinaryException();
269 			}
270 			if (!passAsIs && forCheckout) {
271 				passAsIs = RawText.isCrLfText(buf, cnt);
272 			}
273 		}
274 		ptr = 0;
275 		return true;
276 	}
277 }