1 /* 2 * Copyright (C) 2008-2009, Google Inc. 3 * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com> 4 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others 5 * 6 * This program and the accompanying materials are made available under the 7 * terms of the Eclipse Distribution License v. 1.0 which is available at 8 * https://www.eclipse.org/org/documents/edl-v10.php. 9 * 10 * SPDX-License-Identifier: BSD-3-Clause 11 */ 12 13 package org.eclipse.jgit.treewalk; 14 15 import static java.nio.charset.StandardCharsets.UTF_8; 16 17 import java.io.IOException; 18 import java.nio.ByteBuffer; 19 import java.nio.CharBuffer; 20 21 import org.eclipse.jgit.attributes.AttributesHandler; 22 import org.eclipse.jgit.attributes.AttributesNode; 23 import org.eclipse.jgit.errors.CorruptObjectException; 24 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 25 import org.eclipse.jgit.lib.Constants; 26 import org.eclipse.jgit.lib.FileMode; 27 import org.eclipse.jgit.lib.MutableObjectId; 28 import org.eclipse.jgit.lib.ObjectId; 29 import org.eclipse.jgit.lib.ObjectReader; 30 import org.eclipse.jgit.util.Paths; 31 32 /** 33 * Walks a Git tree (directory) in Git sort order. 34 * <p> 35 * A new iterator instance should be positioned on the first entry, or at eof. 36 * Data for the first entry (if not at eof) should be available immediately. 37 * <p> 38 * Implementors must walk a tree in the Git sort order, which has the following 39 * odd sorting: 40 * <ol> 41 * <li>A.c</li> 42 * <li>A/c</li> 43 * <li>A0c</li> 44 * </ol> 45 * <p> 46 * In the second item, <code>A</code> is the name of a subtree and 47 * <code>c</code> is a file within that subtree. The other two items are files 48 * in the root level tree. 49 * 50 * @see CanonicalTreeParser 51 */ 52 public abstract class AbstractTreeIterator { 53 /** Default size for the {@link #path} buffer. */ 54 protected static final int DEFAULT_PATH_SIZE = 128; 55 56 /** A dummy object id buffer that matches the zero ObjectId. */ 57 protected static final byte[] zeroid = new byte[Constants.OBJECT_ID_LENGTH]; 58 59 /** 60 * Iterator for the parent tree; null if we are the root iterator. 61 * <p> 62 * Used by {@link TreeWalk} and {@link AttributesHandler} 63 * 64 * @since 4.3 65 */ 66 public final AbstractTreeIterator parent; 67 68 /** The iterator this current entry is path equal to. */ 69 AbstractTreeIterator matches; 70 71 /** 72 * Parsed rules of .gitattributes file if it exists. 73 * 74 * @since 4.2 75 */ 76 protected AttributesNode attributesNode; 77 78 /** 79 * Number of entries we moved forward to force a D/F conflict match. 80 * 81 * @see NameConflictTreeWalk 82 */ 83 int matchShift; 84 85 /** 86 * Mode bits for the current entry. 87 * <p> 88 * A numerical value from FileMode is usually faster for an iterator to 89 * obtain from its data source so this is the preferred representation. 90 * 91 * @see org.eclipse.jgit.lib.FileMode 92 */ 93 protected int mode; 94 95 /** 96 * Path buffer for the current entry. 97 * <p> 98 * This buffer is pre-allocated at the start of walking and is shared from 99 * parent iterators down into their subtree iterators. The sharing allows 100 * the current entry to always be a full path from the root, while each 101 * subtree only needs to populate the part that is under their control. 102 */ 103 protected byte[] path; 104 105 /** 106 * Position within {@link #path} this iterator starts writing at. 107 * <p> 108 * This is the first offset in {@link #path} that this iterator must 109 * populate during {@link #next}. At the root level (when {@link #parent} 110 * is null) this is 0. For a subtree iterator the index before this position 111 * should have the value '/'. 112 */ 113 protected final int pathOffset; 114 115 /** 116 * Total length of the current entry's complete path from the root. 117 * <p> 118 * This is the number of bytes within {@link #path} that pertain to the 119 * current entry. Values at this index through the end of the array are 120 * garbage and may be randomly populated from prior entries. 121 */ 122 protected int pathLen; 123 124 /** 125 * Create a new iterator with no parent. 126 */ 127 protected AbstractTreeIterator() { 128 parent = null; 129 path = new byte[DEFAULT_PATH_SIZE]; 130 pathOffset = 0; 131 } 132 133 /** 134 * Create a new iterator with no parent and a prefix. 135 * <p> 136 * The prefix path supplied is inserted in front of all paths generated by 137 * this iterator. It is intended to be used when an iterator is being 138 * created for a subsection of an overall repository and needs to be 139 * combined with other iterators that are created to run over the entire 140 * repository namespace. 141 * 142 * @param prefix 143 * position of this iterator in the repository tree. The value 144 * may be null or the empty string to indicate the prefix is the 145 * root of the repository. A trailing slash ('/') is 146 * automatically appended if the prefix does not end in '/'. 147 */ 148 protected AbstractTreeIterator(String prefix) { 149 parent = null; 150 151 if (prefix != null && prefix.length() > 0) { 152 final ByteBuffer b; 153 154 b = UTF_8.encode(CharBuffer.wrap(prefix)); 155 pathLen = b.limit(); 156 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)]; 157 b.get(path, 0, pathLen); 158 if (path[pathLen - 1] != '/') 159 path[pathLen++] = '/'; 160 pathOffset = pathLen; 161 } else { 162 path = new byte[DEFAULT_PATH_SIZE]; 163 pathOffset = 0; 164 } 165 } 166 167 /** 168 * Create a new iterator with no parent and a prefix. 169 * <p> 170 * The prefix path supplied is inserted in front of all paths generated by 171 * this iterator. It is intended to be used when an iterator is being 172 * created for a subsection of an overall repository and needs to be 173 * combined with other iterators that are created to run over the entire 174 * repository namespace. 175 * 176 * @param prefix 177 * position of this iterator in the repository tree. The value 178 * may be null or the empty array to indicate the prefix is the 179 * root of the repository. A trailing slash ('/') is 180 * automatically appended if the prefix does not end in '/'. 181 */ 182 protected AbstractTreeIterator(byte[] prefix) { 183 parent = null; 184 185 if (prefix != null && prefix.length > 0) { 186 pathLen = prefix.length; 187 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)]; 188 System.arraycopy(prefix, 0, path, 0, pathLen); 189 if (path[pathLen - 1] != '/') 190 path[pathLen++] = '/'; 191 pathOffset = pathLen; 192 } else { 193 path = new byte[DEFAULT_PATH_SIZE]; 194 pathOffset = 0; 195 } 196 } 197 198 /** 199 * Create an iterator for a subtree of an existing iterator. 200 * 201 * @param p 202 * parent tree iterator. 203 */ 204 protected AbstractTreeIterator./../../org/eclipse/jgit/treewalk/AbstractTreeIterator.html#AbstractTreeIterator">AbstractTreeIterator(AbstractTreeIterator p) { 205 parent = p; 206 path = p.path; 207 pathOffset = p.pathLen + 1; 208 209 if (pathOffset > path.length) { 210 growPath(p.pathLen); 211 } 212 path[pathOffset - 1] = '/'; 213 } 214 215 /** 216 * Create an iterator for a subtree of an existing iterator. 217 * <p> 218 * The caller is responsible for setting up the path of the child iterator. 219 * 220 * @param p 221 * parent tree iterator. 222 * @param childPath 223 * path array to be used by the child iterator. This path must 224 * contain the path from the top of the walk to the first child 225 * and must end with a '/'. 226 * @param childPathOffset 227 * position within <code>childPath</code> where the child can 228 * insert its data. The value at 229 * <code>childPath[childPathOffset-1]</code> must be '/'. 230 */ 231 protected AbstractTreeIteratorreeIterator.html#AbstractTreeIterator">AbstractTreeIterator(final AbstractTreeIterator p, 232 final byte[] childPath, final int childPathOffset) { 233 parent = p; 234 path = childPath; 235 pathOffset = childPathOffset; 236 } 237 238 /** 239 * Grow the path buffer larger. 240 * 241 * @param len 242 * number of live bytes in the path buffer. This many bytes will 243 * be moved into the larger buffer. 244 */ 245 protected void growPath(int len) { 246 setPathCapacity(path.length << 1, len); 247 } 248 249 /** 250 * Ensure that path is capable to hold at least {@code capacity} bytes 251 * 252 * @param capacity 253 * the amount of bytes to hold 254 * @param len 255 * the amount of live bytes in path buffer 256 */ 257 protected void ensurePathCapacity(int capacity, int len) { 258 if (path.length >= capacity) 259 return; 260 final byte[] o = path; 261 int current = o.length; 262 int newCapacity = current; 263 while (newCapacity < capacity && newCapacity > 0) 264 newCapacity <<= 1; 265 setPathCapacity(newCapacity, len); 266 } 267 268 /** 269 * Set path buffer capacity to the specified size 270 * 271 * @param capacity 272 * the new size 273 * @param len 274 * the amount of bytes to copy 275 */ 276 private void setPathCapacity(int capacity, int len) { 277 final byte[] o = path; 278 final byte[] n = new byte[capacity]; 279 System.arraycopy(o, 0, n, 0, len); 280 for (AbstractTreeIterator p = this; p != null && p.path == o; p = p.parent) 281 p.path = n; 282 } 283 284 /** 285 * Compare the path of this current entry to another iterator's entry. 286 * 287 * @param p 288 * the other iterator to compare the path against. 289 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if 290 * p's entry sorts first. 291 */ 292 public int pathCompare(AbstractTreeIterator p) { 293 return pathCompare(p, p.mode); 294 } 295 296 int pathCompare(AbstractTreeIterator p, int pMode) { 297 // Its common when we are a subtree for both parents to match; 298 // when this happens everything in path[0..cPos] is known to 299 // be equal and does not require evaluation again. 300 // 301 int cPos = alreadyMatch(this, p); 302 return pathCompare(p.path, cPos, p.pathLen, pMode, cPos); 303 } 304 305 /** 306 * Seek the iterator on a file, if present. 307 * 308 * @param name 309 * file name to find (will not find a directory). 310 * @return true if the file exists in this tree; false otherwise. 311 * @throws org.eclipse.jgit.errors.CorruptObjectException 312 * tree is invalid. 313 * @since 4.2 314 */ 315 public boolean findFile(String name) throws CorruptObjectException { 316 return findFile(Constants.encode(name)); 317 } 318 319 /** 320 * Seek the iterator on a file, if present. 321 * 322 * @param name 323 * file name to find (will not find a directory). 324 * @return true if the file exists in this tree; false otherwise. 325 * @throws org.eclipse.jgit.errors.CorruptObjectException 326 * tree is invalid. 327 * @since 4.2 328 */ 329 public boolean findFile(byte[] name) throws CorruptObjectException { 330 for (; !eof(); next(1)) { 331 int cmp = pathCompare(name, 0, name.length, 0, pathOffset); 332 if (cmp == 0) { 333 return true; 334 } else if (cmp > 0) { 335 return false; 336 } 337 } 338 return false; 339 } 340 341 /** 342 * Compare the path of this current entry to a raw buffer. 343 * 344 * @param buf 345 * the raw path buffer. 346 * @param pos 347 * position to start reading the raw buffer. 348 * @param end 349 * one past the end of the raw buffer (length is end - pos). 350 * @param pathMode 351 * the mode of the path. 352 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if 353 * p's entry sorts first. 354 */ 355 public int pathCompare(byte[] buf, int pos, int end, int pathMode) { 356 return pathCompare(buf, pos, end, pathMode, 0); 357 } 358 359 private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) { 360 return Paths.compare( 361 path, aPos, pathLen, mode, 362 b, bPos, bEnd, bMode); 363 } 364 365 private static int alreadyMatch(AbstractTreeIterator a, 366 AbstractTreeIterator b) { 367 for (;;) { 368 final AbstractTreeIterator ap = a.parent; 369 final AbstractTreeIterator bp = b.parent; 370 if (ap == null || bp == null) 371 return 0; 372 if (ap.matches == bp.matches) 373 return a.pathOffset; 374 a = ap; 375 b = bp; 376 } 377 } 378 379 /** 380 * Check if the current entry of both iterators has the same id. 381 * <p> 382 * This method is faster than {@link #getEntryObjectId()} as it does not 383 * require copying the bytes out of the buffers. A direct {@link #idBuffer} 384 * compare operation is performed. 385 * 386 * @param otherIterator 387 * the other iterator to test against. 388 * @return true if both iterators have the same object id; false otherwise. 389 */ 390 public boolean idEqual(AbstractTreeIterator otherIterator) { 391 return ObjectId.equals(idBuffer(), idOffset(), 392 otherIterator.idBuffer(), otherIterator.idOffset()); 393 } 394 395 /** 396 * Whether the entry has a valid ObjectId. 397 * 398 * @return {@code true} if the entry has a valid ObjectId. 399 */ 400 public abstract boolean hasId(); 401 402 /** 403 * Get the object id of the current entry. 404 * 405 * @return an object id for the current entry. 406 */ 407 public ObjectId getEntryObjectId() { 408 return ObjectId.fromRaw(idBuffer(), idOffset()); 409 } 410 411 /** 412 * Obtain the ObjectId for the current entry. 413 * 414 * @param out 415 * buffer to copy the object id into. 416 */ 417 public void getEntryObjectId(MutableObjectId out) { 418 out.fromRaw(idBuffer(), idOffset()); 419 } 420 421 /** 422 * Get the file mode of the current entry. 423 * 424 * @return the file mode of the current entry. 425 */ 426 public FileMode getEntryFileMode() { 427 return FileMode.fromBits(mode); 428 } 429 430 /** 431 * Get the file mode of the current entry as bits. 432 * 433 * @return the file mode of the current entry as bits. 434 */ 435 public int getEntryRawMode() { 436 return mode; 437 } 438 439 /** 440 * Get path of the current entry, as a string. 441 * 442 * @return path of the current entry, as a string. 443 */ 444 public String getEntryPathString() { 445 return TreeWalk.pathOf(this); 446 } 447 448 /** 449 * Get the current entry path buffer. 450 * <p> 451 * Note that the returned byte[] has to be used together with 452 * {@link #getEntryPathLength()} (only use bytes up to this length). 453 * 454 * @return the internal buffer holding the current path. 455 */ 456 public byte[] getEntryPathBuffer() { 457 return path; 458 } 459 460 /** 461 * Get length of the path in {@link #getEntryPathBuffer()}. 462 * 463 * @return length of the path in {@link #getEntryPathBuffer()}. 464 */ 465 public int getEntryPathLength() { 466 return pathLen; 467 } 468 469 /** 470 * Get the current entry's path hash code. 471 * <p> 472 * This method computes a hash code on the fly for this path, the hash is 473 * suitable to cluster objects that may have similar paths together. 474 * 475 * @return path hash code; any integer may be returned. 476 */ 477 public int getEntryPathHashCode() { 478 int hash = 0; 479 for (int i = Math.max(0, pathLen - 16); i < pathLen; i++) { 480 byte c = path[i]; 481 if (c != ' ') 482 hash = (hash >>> 2) + (c << 24); 483 } 484 return hash; 485 } 486 487 /** 488 * Get the byte array buffer object IDs must be copied out of. 489 * <p> 490 * The id buffer contains the bytes necessary to construct an ObjectId for 491 * the current entry of this iterator. The buffer can be the same buffer for 492 * all entries, or it can be a unique buffer per-entry. Implementations are 493 * encouraged to expose their private buffer whenever possible to reduce 494 * garbage generation and copying costs. 495 * 496 * @return byte array the implementation stores object IDs within. 497 * @see #getEntryObjectId() 498 */ 499 public abstract byte[] idBuffer(); 500 501 /** 502 * Get the position within {@link #idBuffer()} of this entry's ObjectId. 503 * 504 * @return offset into the array returned by {@link #idBuffer()} where the 505 * ObjectId must be copied out of. 506 */ 507 public abstract int idOffset(); 508 509 /** 510 * Create a new iterator for the current entry's subtree. 511 * <p> 512 * The parent reference of the iterator must be <code>this</code>, 513 * otherwise the caller would not be able to exit out of the subtree 514 * iterator correctly and return to continue walking <code>this</code>. 515 * 516 * @param reader 517 * reader to load the tree data from. 518 * @return a new parser that walks over the current subtree. 519 * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException 520 * the current entry is not actually a tree and cannot be parsed 521 * as though it were a tree. 522 * @throws java.io.IOException 523 * a loose object or pack file could not be read. 524 */ 525 public abstract AbstractTreeIterator createSubtreeIterator( 526 ObjectReader reader) throws IncorrectObjectTypeException, 527 IOException; 528 529 /** 530 * Create a new iterator as though the current entry were a subtree. 531 * 532 * @return a new empty tree iterator. 533 */ 534 public EmptyTreeIterator createEmptyTreeIterator() { 535 return new EmptyTreeIterator(this); 536 } 537 538 /** 539 * Create a new iterator for the current entry's subtree. 540 * <p> 541 * The parent reference of the iterator must be <code>this</code>, otherwise 542 * the caller would not be able to exit out of the subtree iterator 543 * correctly and return to continue walking <code>this</code>. 544 * 545 * @param reader 546 * reader to load the tree data from. 547 * @param idBuffer 548 * temporary ObjectId buffer for use by this method. 549 * @return a new parser that walks over the current subtree. 550 * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException 551 * the current entry is not actually a tree and cannot be parsed 552 * as though it were a tree. 553 * @throws java.io.IOException 554 * a loose object or pack file could not be read. 555 */ 556 public AbstractTreeIterator createSubtreeIterator( 557 final ObjectReader reader, final MutableObjectId idBuffer) 558 throws IncorrectObjectTypeException, IOException { 559 return createSubtreeIterator(reader); 560 } 561 562 /** 563 * Position this iterator on the first entry. 564 * 565 * The default implementation of this method uses {@code back(1)} until 566 * {@code first()} is true. This is most likely not the most efficient 567 * method of repositioning the iterator to its first entry, so subclasses 568 * are strongly encouraged to override the method. 569 * 570 * @throws org.eclipse.jgit.errors.CorruptObjectException 571 * the tree is invalid. 572 */ 573 public void reset() throws CorruptObjectException { 574 while (!first()) 575 back(1); 576 } 577 578 /** 579 * Is this tree iterator positioned on its first entry? 580 * <p> 581 * An iterator is positioned on the first entry if <code>back(1)</code> 582 * would be an invalid request as there is no entry before the current one. 583 * <p> 584 * An empty iterator (one with no entries) will be 585 * <code>first() && eof()</code>. 586 * 587 * @return true if the iterator is positioned on the first entry. 588 */ 589 public abstract boolean first(); 590 591 /** 592 * Is this tree iterator at its EOF point (no more entries)? 593 * <p> 594 * An iterator is at EOF if there is no current entry. 595 * 596 * @return true if we have walked all entries and have none left. 597 */ 598 public abstract boolean eof(); 599 600 /** 601 * Move to next entry, populating this iterator with the entry data. 602 * <p> 603 * The delta indicates how many moves forward should occur. The most common 604 * delta is 1 to move to the next entry. 605 * <p> 606 * Implementations must populate the following members: 607 * <ul> 608 * <li>{@link #mode}</li> 609 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li> 610 * <li>{@link #pathLen}</li> 611 * </ul> 612 * as well as any implementation dependent information necessary to 613 * accurately return data from {@link #idBuffer()} and {@link #idOffset()} 614 * when demanded. 615 * 616 * @param delta 617 * number of entries to move the iterator by. Must be a positive, 618 * non-zero integer. 619 * @throws org.eclipse.jgit.errors.CorruptObjectException 620 * the tree is invalid. 621 */ 622 public abstract void next(int delta) throws CorruptObjectException; 623 624 /** 625 * Move to prior entry, populating this iterator with the entry data. 626 * <p> 627 * The delta indicates how many moves backward should occur.The most common 628 * delta is 1 to move to the prior entry. 629 * <p> 630 * Implementations must populate the following members: 631 * <ul> 632 * <li>{@link #mode}</li> 633 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li> 634 * <li>{@link #pathLen}</li> 635 * </ul> 636 * as well as any implementation dependent information necessary to 637 * accurately return data from {@link #idBuffer()} and {@link #idOffset()} 638 * when demanded. 639 * 640 * @param delta 641 * number of entries to move the iterator by. Must be a positive, 642 * non-zero integer. 643 * @throws org.eclipse.jgit.errors.CorruptObjectException 644 * the tree is invalid. 645 */ 646 public abstract void back(int delta) throws CorruptObjectException; 647 648 /** 649 * Advance to the next tree entry, populating this iterator with its data. 650 * <p> 651 * This method behaves like <code>seek(1)</code> but is called by 652 * {@link org.eclipse.jgit.treewalk.TreeWalk} only if a 653 * {@link org.eclipse.jgit.treewalk.filter.TreeFilter} was used and ruled 654 * out the current entry from the results. In such cases this tree iterator 655 * may perform special behavior. 656 * 657 * @throws org.eclipse.jgit.errors.CorruptObjectException 658 * the tree is invalid. 659 */ 660 public void skip() throws CorruptObjectException { 661 next(1); 662 } 663 664 /** 665 * Indicates to the iterator that no more entries will be read. 666 * <p> 667 * This is only invoked by TreeWalk when the iteration is aborted early due 668 * to a {@link org.eclipse.jgit.errors.StopWalkException} being thrown from 669 * within a TreeFilter. 670 */ 671 public void stopWalk() { 672 // Do nothing by default. Most iterators do not care. 673 } 674 675 /** 676 * Whether the iterator implements {@link #stopWalk()}. 677 * 678 * @return {@code true} if the iterator implements {@link #stopWalk()}. 679 * @since 4.2 680 */ 681 protected boolean needsStopWalk() { 682 return false; 683 } 684 685 /** 686 * Get the length of the name component of the path for the current entry. 687 * 688 * @return the length of the name component of the path for the current 689 * entry. 690 */ 691 public int getNameLength() { 692 return pathLen - pathOffset; 693 } 694 695 /** 696 * JGit internal API for use by 697 * {@link org.eclipse.jgit.dircache.DirCacheCheckout} 698 * 699 * @return start of name component part within {@link #getEntryPathBuffer()} 700 * @since 2.0 701 */ 702 public int getNameOffset() { 703 return pathOffset; 704 } 705 706 /** 707 * Get the name component of the current entry path into the provided 708 * buffer. 709 * 710 * @param buffer 711 * the buffer to get the name into, it is assumed that buffer can 712 * hold the name 713 * @param offset 714 * the offset of the name in the buffer 715 * @see #getNameLength() 716 */ 717 public void getName(byte[] buffer, int offset) { 718 System.arraycopy(path, pathOffset, buffer, offset, pathLen - pathOffset); 719 } 720 721 /** {@inheritDoc} */ 722 @SuppressWarnings("nls") 723 @Override 724 public String toString() { 725 return getClass().getSimpleName() + "[" + getEntryPathString() + "]"; //$NON-NLS-1$ 726 } 727 728 /** 729 * Whether or not this Iterator is iterating through the working tree. 730 * 731 * @return whether or not this Iterator is iterating through the working 732 * tree 733 * @since 4.3 734 */ 735 public boolean isWorkTree() { 736 return false; 737 } 738 }