View Javadoc
1   /*
2    * Copyright (C) 2016, 2021 Christian Halstrick <christian.halstrick@sap.com> and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  package org.eclipse.jgit.lfs;
11  
12  import static java.nio.charset.StandardCharsets.UTF_8;
13  
14  import java.io.BufferedInputStream;
15  import java.io.BufferedReader;
16  import java.io.ByteArrayInputStream;
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.OutputStream;
21  import java.io.PrintStream;
22  import java.io.UnsupportedEncodingException;
23  import java.util.Locale;
24  import java.util.Objects;
25  
26  import org.eclipse.jgit.annotations.Nullable;
27  import org.eclipse.jgit.lfs.lib.AnyLongObjectId;
28  import org.eclipse.jgit.lfs.lib.Constants;
29  import org.eclipse.jgit.lfs.lib.LongObjectId;
30  import org.eclipse.jgit.util.IO;
31  
32  /**
33   * Represents an LFS pointer file
34   *
35   * @since 4.6
36   */
37  public class LfsPointer implements Comparable<LfsPointer> {
38  	/**
39  	 * The version of the LfsPointer file format
40  	 */
41  	public static final String VERSION = "https://git-lfs.github.com/spec/v1"; //$NON-NLS-1$
42  
43  	/**
44  	 * The version of the LfsPointer file format using legacy URL
45  	 * @since 4.7
46  	 */
47  	public static final String VERSION_LEGACY = "https://hawser.github.com/spec/v1"; //$NON-NLS-1$
48  
49  	/**
50  	 * Don't inspect files that are larger than this threshold to avoid
51  	 * excessive reading. No pointer file should be larger than this.
52  	 * @since 4.11
53  	 */
54  	public static final int SIZE_THRESHOLD = 200;
55  
56  	/**
57  	 * The name of the hash function as used in the pointer files. This will
58  	 * evaluate to "sha256"
59  	 */
60  	public static final String HASH_FUNCTION_NAME = Constants.LONG_HASH_FUNCTION
61  			.toLowerCase(Locale.ROOT).replace("-", ""); //$NON-NLS-1$ //$NON-NLS-2$
62  
63  	/**
64  	 * {@link #SIZE_THRESHOLD} is too low; with lfs extensions a LFS pointer can
65  	 * be larger. But 8kB should be more than enough.
66  	 */
67  	static final int FULL_SIZE_THRESHOLD = 8 * 1024;
68  
69  	private final AnyLongObjectId oid;
70  
71  	private final long size;
72  
73  	/**
74  	 * <p>Constructor for LfsPointer.</p>
75  	 *
76  	 * @param oid
77  	 *            the id of the content
78  	 * @param size
79  	 *            the size of the content
80  	 */
81  	public LfsPointer(AnyLongObjectId oid, long size) {
82  		this.oid = oid;
83  		this.size = size;
84  	}
85  
86  	/**
87  	 * <p>Getter for the field <code>oid</code>.</p>
88  	 *
89  	 * @return the id of the content
90  	 */
91  	public AnyLongObjectId getOid() {
92  		return oid;
93  	}
94  
95  	/**
96  	 * <p>Getter for the field <code>size</code>.</p>
97  	 *
98  	 * @return the size of the content
99  	 */
100 	public long getSize() {
101 		return size;
102 	}
103 
104 	/**
105 	 * Encode this object into the LFS format defined by {@link #VERSION}
106 	 *
107 	 * @param out
108 	 *            the {@link java.io.OutputStream} into which the encoded data should be
109 	 *            written
110 	 */
111 	public void encode(OutputStream out) {
112 		try (PrintStream ps = new PrintStream(out, false,
113 				UTF_8.name())) {
114 			ps.print("version "); //$NON-NLS-1$
115 			ps.print(VERSION + "\n"); //$NON-NLS-1$
116 			ps.print("oid " + HASH_FUNCTION_NAME + ":"); //$NON-NLS-1$ //$NON-NLS-2$
117 			ps.print(oid.name() + "\n"); //$NON-NLS-1$
118 			ps.print("size "); //$NON-NLS-1$
119 			ps.print(size + "\n"); //$NON-NLS-1$
120 		} catch (UnsupportedEncodingException e) {
121 			// should not happen, we are using a standard charset
122 		}
123 	}
124 
125 	/**
126 	 * Try to parse the data provided by an InputStream to the format defined by
127 	 * {@link #VERSION}. If the given stream supports mark and reset as
128 	 * indicated by {@link InputStream#markSupported()}, its input position will
129 	 * be reset if the stream content is not actually a LFS pointer (i.e., when
130 	 * {@code null} is returned). If the stream content is an invalid LFS
131 	 * pointer or the given stream does not support mark/reset, the input
132 	 * position may not be reset.
133 	 *
134 	 * @param in
135 	 *            the {@link java.io.InputStream} from where to read the data
136 	 * @return an {@link org.eclipse.jgit.lfs.LfsPointer} or {@code null} if the
137 	 *         stream was not parseable as LfsPointer
138 	 * @throws java.io.IOException
139 	 */
140 	@Nullable
141 	public static LfsPointer parseLfsPointer(InputStream in)
142 			throws IOException {
143 		if (in.markSupported()) {
144 			return parse(in);
145 		}
146 		// Fallback; note that while parse() resets its input stream, that won't
147 		// reset "in".
148 		return parse(new BufferedInputStream(in));
149 	}
150 
151 	@Nullable
152 	private static LfsPointer parse(InputStream in)
153 			throws IOException {
154 		if (!in.markSupported()) {
155 			// No translation; internal error
156 			throw new IllegalArgumentException(
157 					"LFS pointer parsing needs InputStream.markSupported() == true"); //$NON-NLS-1$
158 		}
159 		// Try reading only a short block first.
160 		in.mark(SIZE_THRESHOLD);
161 		byte[] preamble = new byte[SIZE_THRESHOLD];
162 		int length = IO.readFully(in, preamble, 0);
163 		if (length < preamble.length || in.read() < 0) {
164 			// We have the whole file. Try to parse a pointer from it.
165 			try (BufferedReader r = new BufferedReader(new InputStreamReader(
166 					new ByteArrayInputStream(preamble, 0, length), UTF_8))) {
167 				LfsPointer ptr = parse(r);
168 				if (ptr == null) {
169 					in.reset();
170 				}
171 				return ptr;
172 			}
173 		}
174 		// Longer than SIZE_THRESHOLD: expect "version" to be the first line.
175 		boolean hasVersion = checkVersion(preamble);
176 		in.reset();
177 		if (!hasVersion) {
178 			return null;
179 		}
180 		in.mark(FULL_SIZE_THRESHOLD);
181 		byte[] fullPointer = new byte[FULL_SIZE_THRESHOLD];
182 		length = IO.readFully(in, fullPointer, 0);
183 		if (length == fullPointer.length && in.read() >= 0) {
184 			in.reset();
185 			return null; // Too long.
186 		}
187 		try (BufferedReader r = new BufferedReader(new InputStreamReader(
188 				new ByteArrayInputStream(fullPointer, 0, length), UTF_8))) {
189 			LfsPointer ptr = parse(r);
190 			if (ptr == null) {
191 				in.reset();
192 			}
193 			return ptr;
194 		}
195 	}
196 
197 	private static LfsPointer parse(BufferedReader r) throws IOException {
198 		boolean versionLine = false;
199 		LongObjectId id = null;
200 		long sz = -1;
201 		// This parsing is a bit too general if we go by the spec at
202 		// https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md
203 		// Comment lines are not mentioned in the spec, the "version" line
204 		// MUST be the first, and keys are ordered alphabetically.
205 		for (String s = r.readLine(); s != null; s = r.readLine()) {
206 			if (s.startsWith("#") || s.length() == 0) { //$NON-NLS-1$
207 				continue;
208 			} else if (s.startsWith("version")) { //$NON-NLS-1$
209 				if (versionLine || !checkVersionLine(s)) {
210 					return null; // Not a LFS pointer
211 				}
212 				versionLine = true;
213 			} else {
214 				try {
215 					if (s.startsWith("oid sha256:")) { //$NON-NLS-1$
216 						if (id != null) {
217 							return null; // Not a LFS pointer
218 						}
219 						id = LongObjectId.fromString(s.substring(11).trim());
220 					} else if (s.startsWith("size")) { //$NON-NLS-1$
221 						if (sz > 0 || s.length() < 5 || s.charAt(4) != ' ') {
222 							return null; // Not a LFS pointer
223 						}
224 						sz = Long.parseLong(s.substring(5).trim());
225 					}
226 				} catch (RuntimeException e) {
227 					// We could not parse the line. If we have a version
228 					// already, this is a corrupt LFS pointer. Otherwise it
229 					// is just not an LFS pointer.
230 					if (versionLine) {
231 						throw e;
232 					}
233 					return null;
234 				}
235 			}
236 			if (versionLine && id != null && sz > -1) {
237 				return new LfsPointer(id, sz);
238 			}
239 		}
240 		return null;
241 	}
242 
243 	private static boolean checkVersion(byte[] data) {
244 		// According to the spec at
245 		// https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md
246 		// it MUST always be the first line.
247 		try (BufferedReader r = new BufferedReader(
248 				new InputStreamReader(new ByteArrayInputStream(data), UTF_8))) {
249 			String s = r.readLine();
250 			if (s != null && s.startsWith("version")) { //$NON-NLS-1$
251 				return checkVersionLine(s);
252 			}
253 		} catch (IOException e) {
254 			// Doesn't occur, we're reading from a byte array!
255 		}
256 		return false;
257 	}
258 
259 	private static boolean checkVersionLine(String s) {
260 		if (s.length() < 8 || s.charAt(7) != ' ') {
261 			return false; // Not a valid LFS pointer version line
262 		}
263 		String rest = s.substring(8).trim();
264 		return VERSION.equals(rest) || VERSION_LEGACY.equals(rest);
265 	}
266 
267 	/** {@inheritDoc} */
268 	@Override
269 	public String toString() {
270 		return "LfsPointer: oid=" + oid.name() + ", size=" //$NON-NLS-1$ //$NON-NLS-2$
271 				+ size;
272 	}
273 
274 	/**
275 	 * @since 4.11
276 	 */
277 	@Override
278 	public int compareTo(LfsPointer o) {
279 		int x = getOid().compareTo(o.getOid());
280 		if (x != 0) {
281 			return x;
282 		}
283 
284 		return Long.compare(getSize(), o.getSize());
285 	}
286 
287 	@Override
288 	public int hashCode() {
289 		return Objects.hash(getOid()) * 31 + Long.hashCode(getSize());
290 	}
291 
292 	@Override
293 	public boolean equals(Object obj) {
294 		if (this == obj) {
295 			return true;
296 		}
297 		if (obj == null || getClass() != obj.getClass()) {
298 			return false;
299 		}
300 		LfsPointer other = (LfsPointer) obj;
301 		return Objects.equals(getOid(), other.getOid())
302 				&& getSize() == other.getSize();
303 	}
304 }