1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
4 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
5 *
6 * This program and the accompanying materials are made available under the
7 * terms of the Eclipse Distribution License v. 1.0 which is available at
8 * https://www.eclipse.org/org/documents/edl-v10.php.
9 *
10 * SPDX-License-Identifier: BSD-3-Clause
11 */
12
13 package org.eclipse.jgit.treewalk;
14
15 import static java.nio.charset.StandardCharsets.UTF_8;
16
17 import java.io.IOException;
18 import java.nio.ByteBuffer;
19 import java.nio.CharBuffer;
20
21 import org.eclipse.jgit.attributes.AttributesHandler;
22 import org.eclipse.jgit.attributes.AttributesNode;
23 import org.eclipse.jgit.errors.CorruptObjectException;
24 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
25 import org.eclipse.jgit.lib.Constants;
26 import org.eclipse.jgit.lib.FileMode;
27 import org.eclipse.jgit.lib.MutableObjectId;
28 import org.eclipse.jgit.lib.ObjectId;
29 import org.eclipse.jgit.lib.ObjectReader;
30 import org.eclipse.jgit.util.Paths;
31
32 /**
33 * Walks a Git tree (directory) in Git sort order.
34 * <p>
35 * A new iterator instance should be positioned on the first entry, or at eof.
36 * Data for the first entry (if not at eof) should be available immediately.
37 * <p>
38 * Implementors must walk a tree in the Git sort order, which has the following
39 * odd sorting:
40 * <ol>
41 * <li>A.c</li>
42 * <li>A/c</li>
43 * <li>A0c</li>
44 * </ol>
45 * <p>
46 * In the second item, <code>A</code> is the name of a subtree and
47 * <code>c</code> is a file within that subtree. The other two items are files
48 * in the root level tree.
49 *
50 * @see CanonicalTreeParser
51 */
52 public abstract class AbstractTreeIterator {
53 /** Default size for the {@link #path} buffer. */
54 protected static final int DEFAULT_PATH_SIZE = 128;
55
56 /** A dummy object id buffer that matches the zero ObjectId. */
57 protected static final byte[] zeroid = new byte[Constants.OBJECT_ID_LENGTH];
58
59 /**
60 * Iterator for the parent tree; null if we are the root iterator.
61 * <p>
62 * Used by {@link TreeWalk} and {@link AttributesHandler}
63 *
64 * @since 4.3
65 */
66 public final AbstractTreeIterator parent;
67
68 /** The iterator this current entry is path equal to. */
69 AbstractTreeIterator matches;
70
71 /**
72 * Parsed rules of .gitattributes file if it exists.
73 *
74 * @since 4.2
75 */
76 protected AttributesNode attributesNode;
77
78 /**
79 * Number of entries we moved forward to force a D/F conflict match.
80 *
81 * @see NameConflictTreeWalk
82 */
83 int matchShift;
84
85 /**
86 * Mode bits for the current entry.
87 * <p>
88 * A numerical value from FileMode is usually faster for an iterator to
89 * obtain from its data source so this is the preferred representation.
90 *
91 * @see org.eclipse.jgit.lib.FileMode
92 */
93 protected int mode;
94
95 /**
96 * Path buffer for the current entry.
97 * <p>
98 * This buffer is pre-allocated at the start of walking and is shared from
99 * parent iterators down into their subtree iterators. The sharing allows
100 * the current entry to always be a full path from the root, while each
101 * subtree only needs to populate the part that is under their control.
102 */
103 protected byte[] path;
104
105 /**
106 * Position within {@link #path} this iterator starts writing at.
107 * <p>
108 * This is the first offset in {@link #path} that this iterator must
109 * populate during {@link #next}. At the root level (when {@link #parent}
110 * is null) this is 0. For a subtree iterator the index before this position
111 * should have the value '/'.
112 */
113 protected final int pathOffset;
114
115 /**
116 * Total length of the current entry's complete path from the root.
117 * <p>
118 * This is the number of bytes within {@link #path} that pertain to the
119 * current entry. Values at this index through the end of the array are
120 * garbage and may be randomly populated from prior entries.
121 */
122 protected int pathLen;
123
124 /**
125 * Create a new iterator with no parent.
126 */
127 protected AbstractTreeIterator() {
128 parent = null;
129 path = new byte[DEFAULT_PATH_SIZE];
130 pathOffset = 0;
131 }
132
133 /**
134 * Create a new iterator with no parent and a prefix.
135 * <p>
136 * The prefix path supplied is inserted in front of all paths generated by
137 * this iterator. It is intended to be used when an iterator is being
138 * created for a subsection of an overall repository and needs to be
139 * combined with other iterators that are created to run over the entire
140 * repository namespace.
141 *
142 * @param prefix
143 * position of this iterator in the repository tree. The value
144 * may be null or the empty string to indicate the prefix is the
145 * root of the repository. A trailing slash ('/') is
146 * automatically appended if the prefix does not end in '/'.
147 */
148 protected AbstractTreeIterator(String prefix) {
149 parent = null;
150
151 if (prefix != null && prefix.length() > 0) {
152 final ByteBuffer b;
153
154 b = UTF_8.encode(CharBuffer.wrap(prefix));
155 pathLen = b.limit();
156 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
157 b.get(path, 0, pathLen);
158 if (path[pathLen - 1] != '/')
159 path[pathLen++] = '/';
160 pathOffset = pathLen;
161 } else {
162 path = new byte[DEFAULT_PATH_SIZE];
163 pathOffset = 0;
164 }
165 }
166
167 /**
168 * Create a new iterator with no parent and a prefix.
169 * <p>
170 * The prefix path supplied is inserted in front of all paths generated by
171 * this iterator. It is intended to be used when an iterator is being
172 * created for a subsection of an overall repository and needs to be
173 * combined with other iterators that are created to run over the entire
174 * repository namespace.
175 *
176 * @param prefix
177 * position of this iterator in the repository tree. The value
178 * may be null or the empty array to indicate the prefix is the
179 * root of the repository. A trailing slash ('/') is
180 * automatically appended if the prefix does not end in '/'.
181 */
182 protected AbstractTreeIterator(byte[] prefix) {
183 parent = null;
184
185 if (prefix != null && prefix.length > 0) {
186 pathLen = prefix.length;
187 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
188 System.arraycopy(prefix, 0, path, 0, pathLen);
189 if (path[pathLen - 1] != '/')
190 path[pathLen++] = '/';
191 pathOffset = pathLen;
192 } else {
193 path = new byte[DEFAULT_PATH_SIZE];
194 pathOffset = 0;
195 }
196 }
197
198 /**
199 * Create an iterator for a subtree of an existing iterator.
200 *
201 * @param p
202 * parent tree iterator.
203 */
204 protected AbstractTreeIterator(AbstractTreeIterator p) {
205 parent = p;
206 path = p.path;
207 pathOffset = p.pathLen + 1;
208
209 if (pathOffset > path.length) {
210 growPath(p.pathLen);
211 }
212 path[pathOffset - 1] = '/';
213 }
214
215 /**
216 * Create an iterator for a subtree of an existing iterator.
217 * <p>
218 * The caller is responsible for setting up the path of the child iterator.
219 *
220 * @param p
221 * parent tree iterator.
222 * @param childPath
223 * path array to be used by the child iterator. This path must
224 * contain the path from the top of the walk to the first child
225 * and must end with a '/'.
226 * @param childPathOffset
227 * position within <code>childPath</code> where the child can
228 * insert its data. The value at
229 * <code>childPath[childPathOffset-1]</code> must be '/'.
230 */
231 protected AbstractTreeIterator(final AbstractTreeIterator p,
232 final byte[] childPath, final int childPathOffset) {
233 parent = p;
234 path = childPath;
235 pathOffset = childPathOffset;
236 }
237
238 /**
239 * Grow the path buffer larger.
240 *
241 * @param len
242 * number of live bytes in the path buffer. This many bytes will
243 * be moved into the larger buffer.
244 */
245 protected void growPath(int len) {
246 setPathCapacity(path.length << 1, len);
247 }
248
249 /**
250 * Ensure that path is capable to hold at least {@code capacity} bytes
251 *
252 * @param capacity
253 * the amount of bytes to hold
254 * @param len
255 * the amount of live bytes in path buffer
256 */
257 protected void ensurePathCapacity(int capacity, int len) {
258 if (path.length >= capacity)
259 return;
260 final byte[] o = path;
261 int current = o.length;
262 int newCapacity = current;
263 while (newCapacity < capacity && newCapacity > 0)
264 newCapacity <<= 1;
265 setPathCapacity(newCapacity, len);
266 }
267
268 /**
269 * Set path buffer capacity to the specified size
270 *
271 * @param capacity
272 * the new size
273 * @param len
274 * the amount of bytes to copy
275 */
276 private void setPathCapacity(int capacity, int len) {
277 final byte[] o = path;
278 final byte[] n = new byte[capacity];
279 System.arraycopy(o, 0, n, 0, len);
280 for (AbstractTreeIterator p = this; p != null && p.path == o; p = p.parent)
281 p.path = n;
282 }
283
284 /**
285 * Compare the path of this current entry to another iterator's entry.
286 *
287 * @param p
288 * the other iterator to compare the path against.
289 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
290 * p's entry sorts first.
291 */
292 public int pathCompare(AbstractTreeIterator p) {
293 return pathCompare(p, p.mode);
294 }
295
296 int pathCompare(AbstractTreeIterator p, int pMode) {
297 // Its common when we are a subtree for both parents to match;
298 // when this happens everything in path[0..cPos] is known to
299 // be equal and does not require evaluation again.
300 //
301 int cPos = alreadyMatch(this, p);
302 return pathCompare(p.path, cPos, p.pathLen, pMode, cPos);
303 }
304
305 /**
306 * Seek the iterator on a file, if present.
307 *
308 * @param name
309 * file name to find (will not find a directory).
310 * @return true if the file exists in this tree; false otherwise.
311 * @throws org.eclipse.jgit.errors.CorruptObjectException
312 * tree is invalid.
313 * @since 4.2
314 */
315 public boolean findFile(String name) throws CorruptObjectException {
316 return findFile(Constants.encode(name));
317 }
318
319 /**
320 * Seek the iterator on a file, if present.
321 *
322 * @param name
323 * file name to find (will not find a directory).
324 * @return true if the file exists in this tree; false otherwise.
325 * @throws org.eclipse.jgit.errors.CorruptObjectException
326 * tree is invalid.
327 * @since 4.2
328 */
329 public boolean findFile(byte[] name) throws CorruptObjectException {
330 for (; !eof(); next(1)) {
331 int cmp = pathCompare(name, 0, name.length, 0, pathOffset);
332 if (cmp == 0) {
333 return true;
334 } else if (cmp > 0) {
335 return false;
336 }
337 }
338 return false;
339 }
340
341 /**
342 * Compare the path of this current entry to a raw buffer.
343 *
344 * @param buf
345 * the raw path buffer.
346 * @param pos
347 * position to start reading the raw buffer.
348 * @param end
349 * one past the end of the raw buffer (length is end - pos).
350 * @param pathMode
351 * the mode of the path.
352 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
353 * p's entry sorts first.
354 */
355 public int pathCompare(byte[] buf, int pos, int end, int pathMode) {
356 return pathCompare(buf, pos, end, pathMode, 0);
357 }
358
359 private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) {
360 return Paths.compare(
361 path, aPos, pathLen, mode,
362 b, bPos, bEnd, bMode);
363 }
364
365 private static int alreadyMatch(AbstractTreeIterator a,
366 AbstractTreeIterator b) {
367 for (;;) {
368 final AbstractTreeIterator ap = a.parent;
369 final AbstractTreeIterator bp = b.parent;
370 if (ap == null || bp == null)
371 return 0;
372 if (ap.matches == bp.matches)
373 return a.pathOffset;
374 a = ap;
375 b = bp;
376 }
377 }
378
379 /**
380 * Check if the current entry of both iterators has the same id.
381 * <p>
382 * This method is faster than {@link #getEntryObjectId()} as it does not
383 * require copying the bytes out of the buffers. A direct {@link #idBuffer}
384 * compare operation is performed.
385 *
386 * @param otherIterator
387 * the other iterator to test against.
388 * @return true if both iterators have the same object id; false otherwise.
389 */
390 public boolean idEqual(AbstractTreeIterator otherIterator) {
391 return ObjectId.equals(idBuffer(), idOffset(),
392 otherIterator.idBuffer(), otherIterator.idOffset());
393 }
394
395 /**
396 * Whether the entry has a valid ObjectId.
397 *
398 * @return {@code true} if the entry has a valid ObjectId.
399 */
400 public abstract boolean hasId();
401
402 /**
403 * Get the object id of the current entry.
404 *
405 * @return an object id for the current entry.
406 */
407 public ObjectId getEntryObjectId() {
408 return ObjectId.fromRaw(idBuffer(), idOffset());
409 }
410
411 /**
412 * Obtain the ObjectId for the current entry.
413 *
414 * @param out
415 * buffer to copy the object id into.
416 */
417 public void getEntryObjectId(MutableObjectId out) {
418 out.fromRaw(idBuffer(), idOffset());
419 }
420
421 /**
422 * Get the file mode of the current entry.
423 *
424 * @return the file mode of the current entry.
425 */
426 public FileMode getEntryFileMode() {
427 return FileMode.fromBits(mode);
428 }
429
430 /**
431 * Get the file mode of the current entry as bits.
432 *
433 * @return the file mode of the current entry as bits.
434 */
435 public int getEntryRawMode() {
436 return mode;
437 }
438
439 /**
440 * Get path of the current entry, as a string.
441 *
442 * @return path of the current entry, as a string.
443 */
444 public String getEntryPathString() {
445 return TreeWalk.pathOf(this);
446 }
447
448 /**
449 * Get the current entry path buffer.
450 * <p>
451 * Note that the returned byte[] has to be used together with
452 * {@link #getEntryPathLength()} (only use bytes up to this length).
453 *
454 * @return the internal buffer holding the current path.
455 */
456 public byte[] getEntryPathBuffer() {
457 return path;
458 }
459
460 /**
461 * Get length of the path in {@link #getEntryPathBuffer()}.
462 *
463 * @return length of the path in {@link #getEntryPathBuffer()}.
464 */
465 public int getEntryPathLength() {
466 return pathLen;
467 }
468
469 /**
470 * Get the current entry's path hash code.
471 * <p>
472 * This method computes a hash code on the fly for this path, the hash is
473 * suitable to cluster objects that may have similar paths together.
474 *
475 * @return path hash code; any integer may be returned.
476 */
477 public int getEntryPathHashCode() {
478 int hash = 0;
479 for (int i = Math.max(0, pathLen - 16); i < pathLen; i++) {
480 byte c = path[i];
481 if (c != ' ')
482 hash = (hash >>> 2) + (c << 24);
483 }
484 return hash;
485 }
486
487 /**
488 * Get the byte array buffer object IDs must be copied out of.
489 * <p>
490 * The id buffer contains the bytes necessary to construct an ObjectId for
491 * the current entry of this iterator. The buffer can be the same buffer for
492 * all entries, or it can be a unique buffer per-entry. Implementations are
493 * encouraged to expose their private buffer whenever possible to reduce
494 * garbage generation and copying costs.
495 *
496 * @return byte array the implementation stores object IDs within.
497 * @see #getEntryObjectId()
498 */
499 public abstract byte[] idBuffer();
500
501 /**
502 * Get the position within {@link #idBuffer()} of this entry's ObjectId.
503 *
504 * @return offset into the array returned by {@link #idBuffer()} where the
505 * ObjectId must be copied out of.
506 */
507 public abstract int idOffset();
508
509 /**
510 * Create a new iterator for the current entry's subtree.
511 * <p>
512 * The parent reference of the iterator must be <code>this</code>,
513 * otherwise the caller would not be able to exit out of the subtree
514 * iterator correctly and return to continue walking <code>this</code>.
515 *
516 * @param reader
517 * reader to load the tree data from.
518 * @return a new parser that walks over the current subtree.
519 * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException
520 * the current entry is not actually a tree and cannot be parsed
521 * as though it were a tree.
522 * @throws java.io.IOException
523 * a loose object or pack file could not be read.
524 */
525 public abstract AbstractTreeIterator createSubtreeIterator(
526 ObjectReader reader) throws IncorrectObjectTypeException,
527 IOException;
528
529 /**
530 * Create a new iterator as though the current entry were a subtree.
531 *
532 * @return a new empty tree iterator.
533 */
534 public EmptyTreeIterator createEmptyTreeIterator() {
535 return new EmptyTreeIterator(this);
536 }
537
538 /**
539 * Create a new iterator for the current entry's subtree.
540 * <p>
541 * The parent reference of the iterator must be <code>this</code>, otherwise
542 * the caller would not be able to exit out of the subtree iterator
543 * correctly and return to continue walking <code>this</code>.
544 *
545 * @param reader
546 * reader to load the tree data from.
547 * @param idBuffer
548 * temporary ObjectId buffer for use by this method.
549 * @return a new parser that walks over the current subtree.
550 * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException
551 * the current entry is not actually a tree and cannot be parsed
552 * as though it were a tree.
553 * @throws java.io.IOException
554 * a loose object or pack file could not be read.
555 */
556 public AbstractTreeIterator createSubtreeIterator(
557 final ObjectReader reader, final MutableObjectId idBuffer)
558 throws IncorrectObjectTypeException, IOException {
559 return createSubtreeIterator(reader);
560 }
561
562 /**
563 * Position this iterator on the first entry.
564 *
565 * The default implementation of this method uses {@code back(1)} until
566 * {@code first()} is true. This is most likely not the most efficient
567 * method of repositioning the iterator to its first entry, so subclasses
568 * are strongly encouraged to override the method.
569 *
570 * @throws org.eclipse.jgit.errors.CorruptObjectException
571 * the tree is invalid.
572 */
573 public void reset() throws CorruptObjectException {
574 while (!first())
575 back(1);
576 }
577
578 /**
579 * Is this tree iterator positioned on its first entry?
580 * <p>
581 * An iterator is positioned on the first entry if <code>back(1)</code>
582 * would be an invalid request as there is no entry before the current one.
583 * <p>
584 * An empty iterator (one with no entries) will be
585 * <code>first() && eof()</code>.
586 *
587 * @return true if the iterator is positioned on the first entry.
588 */
589 public abstract boolean first();
590
591 /**
592 * Is this tree iterator at its EOF point (no more entries)?
593 * <p>
594 * An iterator is at EOF if there is no current entry.
595 *
596 * @return true if we have walked all entries and have none left.
597 */
598 public abstract boolean eof();
599
600 /**
601 * Move to next entry, populating this iterator with the entry data.
602 * <p>
603 * The delta indicates how many moves forward should occur. The most common
604 * delta is 1 to move to the next entry.
605 * <p>
606 * Implementations must populate the following members:
607 * <ul>
608 * <li>{@link #mode}</li>
609 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
610 * <li>{@link #pathLen}</li>
611 * </ul>
612 * as well as any implementation dependent information necessary to
613 * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
614 * when demanded.
615 *
616 * @param delta
617 * number of entries to move the iterator by. Must be a positive,
618 * non-zero integer.
619 * @throws org.eclipse.jgit.errors.CorruptObjectException
620 * the tree is invalid.
621 */
622 public abstract void next(int delta) throws CorruptObjectException;
623
624 /**
625 * Move to prior entry, populating this iterator with the entry data.
626 * <p>
627 * The delta indicates how many moves backward should occur.The most common
628 * delta is 1 to move to the prior entry.
629 * <p>
630 * Implementations must populate the following members:
631 * <ul>
632 * <li>{@link #mode}</li>
633 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
634 * <li>{@link #pathLen}</li>
635 * </ul>
636 * as well as any implementation dependent information necessary to
637 * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
638 * when demanded.
639 *
640 * @param delta
641 * number of entries to move the iterator by. Must be a positive,
642 * non-zero integer.
643 * @throws org.eclipse.jgit.errors.CorruptObjectException
644 * the tree is invalid.
645 */
646 public abstract void back(int delta) throws CorruptObjectException;
647
648 /**
649 * Advance to the next tree entry, populating this iterator with its data.
650 * <p>
651 * This method behaves like <code>seek(1)</code> but is called by
652 * {@link org.eclipse.jgit.treewalk.TreeWalk} only if a
653 * {@link org.eclipse.jgit.treewalk.filter.TreeFilter} was used and ruled
654 * out the current entry from the results. In such cases this tree iterator
655 * may perform special behavior.
656 *
657 * @throws org.eclipse.jgit.errors.CorruptObjectException
658 * the tree is invalid.
659 */
660 public void skip() throws CorruptObjectException {
661 next(1);
662 }
663
664 /**
665 * Indicates to the iterator that no more entries will be read.
666 * <p>
667 * This is only invoked by TreeWalk when the iteration is aborted early due
668 * to a {@link org.eclipse.jgit.errors.StopWalkException} being thrown from
669 * within a TreeFilter.
670 */
671 public void stopWalk() {
672 // Do nothing by default. Most iterators do not care.
673 }
674
675 /**
676 * Whether the iterator implements {@link #stopWalk()}.
677 *
678 * @return {@code true} if the iterator implements {@link #stopWalk()}.
679 * @since 4.2
680 */
681 protected boolean needsStopWalk() {
682 return false;
683 }
684
685 /**
686 * Get the length of the name component of the path for the current entry.
687 *
688 * @return the length of the name component of the path for the current
689 * entry.
690 */
691 public int getNameLength() {
692 return pathLen - pathOffset;
693 }
694
695 /**
696 * JGit internal API for use by
697 * {@link org.eclipse.jgit.dircache.DirCacheCheckout}
698 *
699 * @return start of name component part within {@link #getEntryPathBuffer()}
700 * @since 2.0
701 */
702 public int getNameOffset() {
703 return pathOffset;
704 }
705
706 /**
707 * Get the name component of the current entry path into the provided
708 * buffer.
709 *
710 * @param buffer
711 * the buffer to get the name into, it is assumed that buffer can
712 * hold the name
713 * @param offset
714 * the offset of the name in the buffer
715 * @see #getNameLength()
716 */
717 public void getName(byte[] buffer, int offset) {
718 System.arraycopy(path, pathOffset, buffer, offset, pathLen - pathOffset);
719 }
720
721 /** {@inheritDoc} */
722 @SuppressWarnings("nls")
723 @Override
724 public String toString() {
725 return getClass().getSimpleName() + "[" + getEntryPathString() + "]"; //$NON-NLS-1$
726 }
727
728 /**
729 * Whether or not this Iterator is iterating through the working tree.
730 *
731 * @return whether or not this Iterator is iterating through the working
732 * tree
733 * @since 4.3
734 */
735 public boolean isWorkTree() {
736 return false;
737 }
738 }