View Javadoc
1   /*
2    * Copyright (C) 2015, Matthias Sohn <matthias.sohn@sap.com>
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.lfs.lib;
45  
46  import java.io.Serializable;
47  import java.text.MessageFormat;
48  
49  import org.eclipse.jgit.lfs.errors.InvalidLongObjectIdException;
50  import org.eclipse.jgit.lfs.internal.LfsText;
51  import org.eclipse.jgit.lib.AbbreviatedObjectId;
52  import org.eclipse.jgit.lib.AnyObjectId;
53  import org.eclipse.jgit.lib.ObjectId;
54  import org.eclipse.jgit.util.NB;
55  import org.eclipse.jgit.util.RawParseUtils;
56  
57  /**
58   * A prefix abbreviation of an {@link LongObjectId}.
59   * <p>
60   * Enable abbreviating SHA-256 strings used by Git LFS, using sufficient leading
61   * digits from the LongObjectId name to still be unique within the repository
62   * the string was generated from. These ids are likely to be unique for a useful
63   * period of time, especially if they contain at least 6-10 hex digits.
64   * <p>
65   * This class converts the hex string into a binary form, to make it more
66   * efficient for matching against an object.
67   *
68   * Ported to SHA-256 from {@link AbbreviatedObjectId}
69   *
70   * @since 4.3
71   */
72  public final class AbbreviatedLongObjectId implements Serializable {
73  	private static final long serialVersionUID = 1L;
74  
75  	/**
76  	 * Test a string of characters to verify it is a hex format.
77  	 * <p>
78  	 * If true the string can be parsed with {@link #fromString(String)}.
79  	 *
80  	 * @param id
81  	 *            the string to test.
82  	 * @return true if the string can converted into an AbbreviatedObjectId.
83  	 */
84  	public static final boolean isId(final String id) {
85  		if (id.length() < 2
86  				|| Constants.LONG_OBJECT_ID_STRING_LENGTH < id.length())
87  			return false;
88  		try {
89  			for (int i = 0; i < id.length(); i++)
90  				RawParseUtils.parseHexInt4((byte) id.charAt(i));
91  			return true;
92  		} catch (ArrayIndexOutOfBoundsException e) {
93  			return false;
94  		}
95  	}
96  
97  	/**
98  	 * Convert an AbbreviatedObjectId from hex characters (US-ASCII).
99  	 *
100 	 * @param buf
101 	 *            the US-ASCII buffer to read from.
102 	 * @param offset
103 	 *            position to read the first character from.
104 	 * @param end
105 	 *            one past the last position to read (<code>end-offset</code> is
106 	 *            the length of the string).
107 	 * @return the converted object id.
108 	 */
109 	public static final AbbreviatedLongObjectId fromString(final byte[] buf,
110 			final int offset, final int end) {
111 		if (end - offset > Constants.LONG_OBJECT_ID_STRING_LENGTH)
112 			throw new IllegalArgumentException(MessageFormat.format(
113 							LfsText.get().invalidLongIdLength,
114 					Integer.valueOf(end - offset),
115 					Integer.valueOf(Constants.LONG_OBJECT_ID_STRING_LENGTH)));
116 		return fromHexString(buf, offset, end);
117 	}
118 
119 	/**
120 	 * Convert an AbbreviatedObjectId from an {@link AnyObjectId}.
121 	 * <p>
122 	 * This method copies over all bits of the Id, and is therefore complete
123 	 * (see {@link #isComplete()}).
124 	 *
125 	 * @param id
126 	 *            the {@link ObjectId} to convert from.
127 	 * @return the converted object id.
128 	 */
129 	public static final AbbreviatedLongObjectId fromLongObjectId(
130 			AnyLongObjectId id) {
131 		return new AbbreviatedLongObjectId(
132 				Constants.LONG_OBJECT_ID_STRING_LENGTH, id.w1, id.w2, id.w3,
133 				id.w4);
134 	}
135 
136 	/**
137 	 * Convert an AbbreviatedLongObjectId from hex characters.
138 	 *
139 	 * @param str
140 	 *            the string to read from. Must be &lt;= 64 characters.
141 	 * @return the converted object id.
142 	 */
143 	public static final AbbreviatedLongObjectId fromString(final String str) {
144 		if (str.length() > Constants.LONG_OBJECT_ID_STRING_LENGTH)
145 			throw new IllegalArgumentException(
146 					MessageFormat.format(LfsText.get().invalidLongId, str));
147 		final byte[] b = org.eclipse.jgit.lib.Constants.encodeASCII(str);
148 		return fromHexString(b, 0, b.length);
149 	}
150 
151 	private static final AbbreviatedLongObjectId fromHexString(final byte[] bs,
152 			int ptr, final int end) {
153 		try {
154 			final long a = hexUInt64(bs, ptr, end);
155 			final long b = hexUInt64(bs, ptr + 16, end);
156 			final long c = hexUInt64(bs, ptr + 32, end);
157 			final long d = hexUInt64(bs, ptr + 48, end);
158 			return new AbbreviatedLongObjectId(end - ptr, a, b, c, d);
159 		} catch (ArrayIndexOutOfBoundsException e1) {
160 			throw new InvalidLongObjectIdException(bs, ptr, end - ptr);
161 		}
162 	}
163 
164 	private static final long hexUInt64(final byte[] bs, int p, final int end) {
165 		if (16 <= end - p)
166 			return RawParseUtils.parseHexInt64(bs, p);
167 
168 		long r = 0;
169 		int n = 0;
170 		while (n < 16 && p < end) {
171 			r <<= 4;
172 			r |= RawParseUtils.parseHexInt4(bs[p++]);
173 			n++;
174 		}
175 		return r << (16 - n) * 4;
176 	}
177 
178 	static long mask(final int nibbles, final long word, final long v) {
179 		final long b = (word - 1) * 16;
180 		if (b + 16 <= nibbles) {
181 			// We have all of the bits required for this word.
182 			//
183 			return v;
184 		}
185 
186 		if (nibbles <= b) {
187 			// We have none of the bits required for this word.
188 			//
189 			return 0;
190 		}
191 
192 		final long s = 64 - (nibbles - b) * 4;
193 		return (v >>> s) << s;
194 	}
195 
196 	/** Number of half-bytes used by this id. */
197 	final int nibbles;
198 
199 	final long w1;
200 
201 	final long w2;
202 
203 	final long w3;
204 
205 	final long w4;
206 
207 	AbbreviatedLongObjectId(final int n, final long new_1, final long new_2,
208 			final long new_3, final long new_4) {
209 		nibbles = n;
210 		w1 = new_1;
211 		w2 = new_2;
212 		w3 = new_3;
213 		w4 = new_4;
214 	}
215 
216 	/** @return number of hex digits appearing in this id */
217 	public int length() {
218 		return nibbles;
219 	}
220 
221 	/** @return true if this ObjectId is actually a complete id. */
222 	public boolean isComplete() {
223 		return length() == Constants.LONG_OBJECT_ID_STRING_LENGTH;
224 	}
225 
226 	/** @return a complete ObjectId; null if {@link #isComplete()} is false */
227 	public LongObjectId toLongObjectId() {
228 		return isComplete() ? new LongObjectId(w1, w2, w3, w4) : null;
229 	}
230 
231 	/**
232 	 * Compares this abbreviation to a full object id.
233 	 *
234 	 * @param other
235 	 *            the other object id.
236 	 * @return &lt;0 if this abbreviation names an object that is less than
237 	 *         <code>other</code>; 0 if this abbreviation exactly matches the
238 	 *         first {@link #length()} digits of <code>other.name()</code>;
239 	 *         &gt;0 if this abbreviation names an object that is after
240 	 *         <code>other</code>.
241 	 */
242 	public final int prefixCompare(final AnyLongObjectId other) {
243 		int cmp;
244 
245 		cmp = NB.compareUInt64(w1, mask(1, other.w1));
246 		if (cmp != 0)
247 			return cmp;
248 
249 		cmp = NB.compareUInt64(w2, mask(2, other.w2));
250 		if (cmp != 0)
251 			return cmp;
252 
253 		cmp = NB.compareUInt64(w3, mask(3, other.w3));
254 		if (cmp != 0)
255 			return cmp;
256 
257 		return NB.compareUInt64(w4, mask(4, other.w4));
258 	}
259 
260 	/**
261 	 * Compare this abbreviation to a network-byte-order LongObjectId.
262 	 *
263 	 * @param bs
264 	 *            array containing the other LongObjectId in network byte order.
265 	 * @param p
266 	 *            position within {@code bs} to start the compare at. At least
267 	 *            32 bytes, starting at this position are required.
268 	 * @return &lt;0 if this abbreviation names an object that is less than
269 	 *         <code>other</code>; 0 if this abbreviation exactly matches the
270 	 *         first {@link #length()} digits of <code>other.name()</code>;
271 	 *         &gt;0 if this abbreviation names an object that is after
272 	 *         <code>other</code>.
273 	 */
274 	public final int prefixCompare(final byte[] bs, final int p) {
275 		int cmp;
276 
277 		cmp = NB.compareUInt64(w1, mask(1, NB.decodeInt64(bs, p)));
278 		if (cmp != 0)
279 			return cmp;
280 
281 		cmp = NB.compareUInt64(w2, mask(2, NB.decodeInt64(bs, p + 8)));
282 		if (cmp != 0)
283 			return cmp;
284 
285 		cmp = NB.compareUInt64(w3, mask(3, NB.decodeInt64(bs, p + 16)));
286 		if (cmp != 0)
287 			return cmp;
288 
289 		return NB.compareUInt64(w4, mask(4, NB.decodeInt64(bs, p + 24)));
290 	}
291 
292 	/**
293 	 * Compare this abbreviation to a network-byte-order LongObjectId.
294 	 *
295 	 * @param bs
296 	 *            array containing the other LongObjectId in network byte order.
297 	 * @param p
298 	 *            position within {@code bs} to start the compare at. At least 4
299 	 *            longs, starting at this position are required.
300 	 * @return &lt;0 if this abbreviation names an object that is less than
301 	 *         <code>other</code>; 0 if this abbreviation exactly matches the
302 	 *         first {@link #length()} digits of <code>other.name()</code>;
303 	 *         &gt;0 if this abbreviation names an object that is after
304 	 *         <code>other</code>.
305 	 */
306 	public final int prefixCompare(final long[] bs, final int p) {
307 		int cmp;
308 
309 		cmp = NB.compareUInt64(w1, mask(1, bs[p]));
310 		if (cmp != 0)
311 			return cmp;
312 
313 		cmp = NB.compareUInt64(w2, mask(2, bs[p + 1]));
314 		if (cmp != 0)
315 			return cmp;
316 
317 		cmp = NB.compareUInt64(w3, mask(3, bs[p + 2]));
318 		if (cmp != 0)
319 			return cmp;
320 
321 		return NB.compareUInt64(w4, mask(4, bs[p + 3]));
322 	}
323 
324 	/** @return value for a fan-out style map, only valid of length &gt;= 2. */
325 	public final int getFirstByte() {
326 		return (int) (w1 >>> 56);
327 	}
328 
329 	private long mask(final long word, final long v) {
330 		return mask(nibbles, word, v);
331 	}
332 
333 	@Override
334 	public int hashCode() {
335 		return (int) (w1 >> 32);
336 	}
337 
338 	@Override
339 	public boolean equals(final Object o) {
340 		if (o instanceof AbbreviatedLongObjectId) {
341 			final AbbreviatedLongObjectId b = (AbbreviatedLongObjectId) o;
342 			return nibbles == b.nibbles && w1 == b.w1 && w2 == b.w2
343 					&& w3 == b.w3 && w4 == b.w4;
344 		}
345 		return false;
346 	}
347 
348 	/**
349 	 * @return string form of the abbreviation, in lower case hexadecimal.
350 	 */
351 	public final String name() {
352 		final char[] b = new char[Constants.LONG_OBJECT_ID_STRING_LENGTH];
353 
354 		AnyLongObjectId.formatHexChar(b, 0, w1);
355 		if (nibbles <= 16)
356 			return new String(b, 0, nibbles);
357 
358 		AnyLongObjectId.formatHexChar(b, 16, w2);
359 		if (nibbles <= 32)
360 			return new String(b, 0, nibbles);
361 
362 		AnyLongObjectId.formatHexChar(b, 32, w3);
363 		if (nibbles <= 48)
364 			return new String(b, 0, nibbles);
365 
366 		AnyLongObjectId.formatHexChar(b, 48, w4);
367 		return new String(b, 0, nibbles);
368 	}
369 
370 	@SuppressWarnings("nls")
371 	@Override
372 	public String toString() {
373 		return "AbbreviatedLongObjectId[" + name() + "]"; //$NON-NLS-1$
374 	}
375 }