View Javadoc
1   /*
2    * Copyright (C) 2010, 2013 Marc Strapetz <marc.strapetz@syntevo.com>
3    * Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others
4    *
5    * This program and the accompanying materials are made available under the
6    * terms of the Eclipse Distribution License v. 1.0 which is available at
7    * https://www.eclipse.org/org/documents/edl-v10.php.
8    *
9    * SPDX-License-Identifier: BSD-3-Clause
10   */
11  
12  package org.eclipse.jgit.util.io;
13  
14  import java.io.IOException;
15  import java.io.InputStream;
16  import java.util.Arrays;
17  import java.util.EnumSet;
18  import java.util.Set;
19  
20  import org.eclipse.jgit.diff.RawText;
21  
22  /**
23   * An InputStream that normalizes CRLF to LF.
24   * <p>
25   * Existing single CR are not changed to LF but are retained as is.
26   * </p>
27   * <p>
28   * Optionally, a binary check on the first {@link RawText#getBufferSize()} bytes
29   * is performed and in case of binary files, canonicalization is turned off (for
30   * the complete file). If binary checking determines that the input is
31   * CR/LF-delimited text and the stream has been created for checkout,
32   * canonicalization is also turned off.
33   * </p>
34   *
35   * @since 4.3
36   */
37  public class AutoLFInputStream extends InputStream {
38  
39  	// This is the former EolCanonicalizingInputStream with a new name in order
40  	// to have same naming for all LF / CRLF streams.
41  
42  	/**
43  	 * Flags for controlling auto-detection of binary vs. text content (for
44  	 * text=auto).
45  	 *
46  	 * @since 5.9
47  	 */
48  	public enum StreamFlag {
49  		/**
50  		 * Check the first 8kB for binary content and switch off
51  		 * canonicalization off for the whole file if so.
52  		 */
53  		DETECT_BINARY,
54  		/**
55  		 * If {@link #DETECT_BINARY} is set, throw an {@link IsBinaryException}
56  		 * if binary content is detected.
57  		 */
58  		ABORT_IF_BINARY,
59  		/**
60  		 * If {@link #DETECT_BINARY} is set and content is found to be CR-LF
61  		 * delimited text, switch off canonicalization.
62  		 */
63  		FOR_CHECKOUT
64  	}
65  
66  	private final byte[] single = new byte[1];
67  
68  	private final byte[] buf = new byte[RawText.getBufferSize()];
69  
70  	private final InputStream in;
71  
72  	private int cnt;
73  
74  	private int ptr;
75  
76  	/**
77  	 * Set to {@code true} if no CR/LF processing is to be done: if the input is
78  	 * binary data, or CR/LF-delimited text and {@link StreamFlag#FOR_CHECKOUT}
79  	 * was given.
80  	 */
81  	private boolean passAsIs;
82  
83  	/**
84  	 * Set to {@code true} if the input was detected to be binary data.
85  	 */
86  	private boolean isBinary;
87  
88  	private boolean detectBinary;
89  
90  	private final boolean abortIfBinary;
91  
92  	private final boolean forCheckout;
93  
94  	/**
95  	 * A special exception thrown when {@link AutoLFInputStream} is told to
96  	 * throw an exception when attempting to read a binary file. The exception
97  	 * may be thrown at any stage during reading.
98  	 *
99  	 * @since 3.3
100 	 */
101 	public static class IsBinaryException extends IOException {
102 		private static final long serialVersionUID = 1L;
103 
104 		IsBinaryException() {
105 			super();
106 		}
107 	}
108 
109 	/**
110 	 * Factory method for creating an {@link AutoLFInputStream} with the
111 	 * specified {@link StreamFlag flags}.
112 	 *
113 	 * @param in
114 	 *            raw input stream
115 	 * @param flags
116 	 *            {@link StreamFlag}s controlling the stream behavior
117 	 * @return a new {@link AutoLFInputStream}
118 	 * @since 5.9
119 	 */
120 	public static AutoLFInputStream create(InputStream in,
121 			StreamFlag... flags) {
122 		if (flags == null) {
123 			return new AutoLFInputStream(in, null);
124 		}
125 		EnumSet<StreamFlag> set = EnumSet.noneOf(StreamFlag.class);
126 		set.addAll(Arrays.asList(flags));
127 		return new AutoLFInputStream(in, set);
128 	}
129 
130 	/**
131 	 * Creates a new InputStream, wrapping the specified stream.
132 	 *
133 	 * @param in
134 	 *            raw input stream
135 	 * @param flags
136 	 *            {@link StreamFlag}s controlling the stream behavior;
137 	 *            {@code null} is treated as an empty set
138 	 * @since 5.9
139 	 */
140 	public AutoLFInputStream(InputStream in, Set<StreamFlag> flags) {
141 		this.in = in;
142 		this.detectBinary = flags != null
143 				&& flags.contains(StreamFlag.DETECT_BINARY);
144 		this.abortIfBinary = flags != null
145 				&& flags.contains(StreamFlag.ABORT_IF_BINARY);
146 		this.forCheckout = flags != null
147 				&& flags.contains(StreamFlag.FOR_CHECKOUT);
148 	}
149 
150 	/**
151 	 * Creates a new InputStream, wrapping the specified stream.
152 	 *
153 	 * @param in
154 	 *            raw input stream
155 	 * @param detectBinary
156 	 *            whether binaries should be detected
157 	 * @since 2.0
158 	 * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
159 	 *             instead
160 	 */
161 	@Deprecated
162 	public AutoLFInputStream(InputStream in, boolean detectBinary) {
163 		this(in, detectBinary, false);
164 	}
165 
166 	/**
167 	 * Creates a new InputStream, wrapping the specified stream.
168 	 *
169 	 * @param in
170 	 *            raw input stream
171 	 * @param detectBinary
172 	 *            whether binaries should be detected
173 	 * @param abortIfBinary
174 	 *            throw an IOException if the file is binary
175 	 * @since 3.3
176 	 * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
177 	 *             instead
178 	 */
179 	@Deprecated
180 	public AutoLFInputStream(InputStream in, boolean detectBinary,
181 			boolean abortIfBinary) {
182 		this.in = in;
183 		this.detectBinary = detectBinary;
184 		this.abortIfBinary = abortIfBinary;
185 		this.forCheckout = false;
186 	}
187 
188 	/** {@inheritDoc} */
189 	@Override
190 	public int read() throws IOException {
191 		final int read = read(single, 0, 1);
192 		return read == 1 ? single[0] & 0xff : -1;
193 	}
194 
195 	/** {@inheritDoc} */
196 	@Override
197 	public int read(byte[] bs, int off, int len)
198 			throws IOException {
199 		if (len == 0)
200 			return 0;
201 
202 		if (cnt == -1)
203 			return -1;
204 
205 		int i = off;
206 		final int end = off + len;
207 
208 		while (i < end) {
209 			if (ptr == cnt && !fillBuffer()) {
210 				break;
211 			}
212 
213 			byte b = buf[ptr++];
214 			if (passAsIs || b != '\r') {
215 				// Logic for binary files ends here
216 				bs[i++] = b;
217 				continue;
218 			}
219 
220 			if (ptr == cnt && !fillBuffer()) {
221 				bs[i++] = '\r';
222 				break;
223 			}
224 
225 			if (buf[ptr] == '\n') {
226 				bs[i++] = '\n';
227 				ptr++;
228 			} else
229 				bs[i++] = '\r';
230 		}
231 
232 		return i == off ? -1 : i - off;
233 	}
234 
235 	/**
236 	 * Whether the stream has detected as a binary so far.
237 	 *
238 	 * @return true if the stream has detected as a binary so far.
239 	 * @since 3.3
240 	 */
241 	public boolean isBinary() {
242 		return isBinary;
243 	}
244 
245 	/** {@inheritDoc} */
246 	@Override
247 	public void close() throws IOException {
248 		in.close();
249 	}
250 
251 	private boolean fillBuffer() throws IOException {
252 		cnt = 0;
253 		while (cnt < buf.length) {
254 			int n = in.read(buf, cnt, buf.length - cnt);
255 			if (n < 0) {
256 				break;
257 			}
258 			cnt += n;
259 		}
260 		if (cnt < 1) {
261 			cnt = -1;
262 			return false;
263 		}
264 		if (detectBinary) {
265 			isBinary = RawText.isBinary(buf, cnt, cnt < buf.length);
266 			passAsIs = isBinary;
267 			detectBinary = false;
268 			if (isBinary && abortIfBinary) {
269 				throw new IsBinaryException();
270 			}
271 			if (!passAsIs && forCheckout) {
272 				passAsIs = RawText.isCrLfText(buf, cnt, cnt < buf.length);
273 			}
274 		}
275 		ptr = 0;
276 		return true;
277 	}
278 }