View Javadoc
1   /*
2    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
3    * Copyright (C) 2010, Christian Halstrick <christian.halstrick@sap.com>
4    * Copyright (C) 2010, Matthias Sohn <matthias.sohn@sap.com>
5    * Copyright (C) 2012-2013, Robin Rosenberg
6    * and other copyright owners as documented in the project's IP log.
7    *
8    * This program and the accompanying materials are made available
9    * under the terms of the Eclipse Distribution License v1.0 which
10   * accompanies this distribution, is reproduced below, and is
11   * available at http://www.eclipse.org/org/documents/edl-v10.php
12   *
13   * All rights reserved.
14   *
15   * Redistribution and use in source and binary forms, with or
16   * without modification, are permitted provided that the following
17   * conditions are met:
18   *
19   * - Redistributions of source code must retain the above copyright
20   *   notice, this list of conditions and the following disclaimer.
21   *
22   * - Redistributions in binary form must reproduce the above
23   *   copyright notice, this list of conditions and the following
24   *   disclaimer in the documentation and/or other materials provided
25   *   with the distribution.
26   *
27   * - Neither the name of the Eclipse Foundation, Inc. nor the
28   *   names of its contributors may be used to endorse or promote
29   *   products derived from this software without specific prior
30   *   written permission.
31   *
32   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
33   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
37   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
39   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
40   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
42   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
43   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
44   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45   */
46  
47  package org.eclipse.jgit.treewalk;
48  
49  import java.io.ByteArrayInputStream;
50  import java.io.File;
51  import java.io.FileInputStream;
52  import java.io.FileNotFoundException;
53  import java.io.IOException;
54  import java.io.InputStream;
55  import java.nio.ByteBuffer;
56  import java.nio.CharBuffer;
57  import java.nio.charset.CharacterCodingException;
58  import java.nio.charset.CharsetEncoder;
59  import java.security.MessageDigest;
60  import java.text.MessageFormat;
61  import java.util.Arrays;
62  import java.util.Collections;
63  import java.util.Comparator;
64  
65  import org.eclipse.jgit.api.errors.FilterFailedException;
66  import org.eclipse.jgit.attributes.AttributesNode;
67  import org.eclipse.jgit.attributes.AttributesRule;
68  import org.eclipse.jgit.diff.RawText;
69  import org.eclipse.jgit.dircache.DirCache;
70  import org.eclipse.jgit.dircache.DirCacheEntry;
71  import org.eclipse.jgit.dircache.DirCacheIterator;
72  import org.eclipse.jgit.errors.CorruptObjectException;
73  import org.eclipse.jgit.errors.MissingObjectException;
74  import org.eclipse.jgit.errors.NoWorkTreeException;
75  import org.eclipse.jgit.ignore.FastIgnoreRule;
76  import org.eclipse.jgit.ignore.IgnoreNode;
77  import org.eclipse.jgit.internal.JGitText;
78  import org.eclipse.jgit.lib.Constants;
79  import org.eclipse.jgit.lib.CoreConfig;
80  import org.eclipse.jgit.lib.CoreConfig.AutoCRLF;
81  import org.eclipse.jgit.lib.CoreConfig.CheckStat;
82  import org.eclipse.jgit.lib.CoreConfig.SymLinks;
83  import org.eclipse.jgit.lib.FileMode;
84  import org.eclipse.jgit.lib.ObjectId;
85  import org.eclipse.jgit.lib.ObjectLoader;
86  import org.eclipse.jgit.lib.ObjectReader;
87  import org.eclipse.jgit.lib.Repository;
88  import org.eclipse.jgit.submodule.SubmoduleWalk;
89  import org.eclipse.jgit.util.FS;
90  import org.eclipse.jgit.util.FS.ExecutionResult;
91  import org.eclipse.jgit.util.IO;
92  import org.eclipse.jgit.util.Paths;
93  import org.eclipse.jgit.util.RawParseUtils;
94  import org.eclipse.jgit.util.io.EolCanonicalizingInputStream;
95  
96  /**
97   * Walks a working directory tree as part of a {@link TreeWalk}.
98   * <p>
99   * Most applications will want to use the standard implementation of this
100  * iterator, {@link FileTreeIterator}, as that does all IO through the standard
101  * <code>java.io</code> package. Plugins for a Java based IDE may however wish
102  * to create their own implementations of this class to allow traversal of the
103  * IDE's project space, as well as benefit from any caching the IDE may have.
104  *
105  * @see FileTreeIterator
106  */
107 public abstract class WorkingTreeIterator extends AbstractTreeIterator {
108 	private static final int MAX_EXCEPTION_TEXT_SIZE = 10 * 1024;
109 
110 	/** An empty entry array, suitable for {@link #init(Entry[])}. */
111 	protected static final Entry[] EOF = {};
112 
113 	/** Size we perform file IO in if we have to read and hash a file. */
114 	static final int BUFFER_SIZE = 2048;
115 
116 	/**
117 	 * Maximum size of files which may be read fully into memory for performance
118 	 * reasons.
119 	 */
120 	private static final long MAXIMUM_FILE_SIZE_TO_READ_FULLY = 65536;
121 
122 	/** Inherited state of this iterator, describing working tree, etc. */
123 	private final IteratorState state;
124 
125 	/** The {@link #idBuffer()} for the current entry. */
126 	private byte[] contentId;
127 
128 	/** Index within {@link #entries} that {@link #contentId} came from. */
129 	private int contentIdFromPtr;
130 
131 	/** List of entries obtained from the subclass. */
132 	private Entry[] entries;
133 
134 	/** Total number of entries in {@link #entries} that are valid. */
135 	private int entryCnt;
136 
137 	/** Current position within {@link #entries}. */
138 	private int ptr;
139 
140 	/** If there is a .gitignore file present, the parsed rules from it. */
141 	private IgnoreNode ignoreNode;
142 
143 	private String cleanFilterCommand;
144 
145 	/** Repository that is the root level being iterated over */
146 	protected Repository repository;
147 
148 	/** Cached canonical length, initialized from {@link #idBuffer()} */
149 	private long canonLen = -1;
150 
151 	/** The offset of the content id in {@link #idBuffer()} */
152 	private int contentIdOffset;
153 
154 	/**
155 	 * Create a new iterator with no parent.
156 	 *
157 	 * @param options
158 	 *            working tree options to be used
159 	 */
160 	protected WorkingTreeIterator(WorkingTreeOptions options) {
161 		super();
162 		state = new IteratorState(options);
163 	}
164 
165 	/**
166 	 * Create a new iterator with no parent and a prefix.
167 	 * <p>
168 	 * The prefix path supplied is inserted in front of all paths generated by
169 	 * this iterator. It is intended to be used when an iterator is being
170 	 * created for a subsection of an overall repository and needs to be
171 	 * combined with other iterators that are created to run over the entire
172 	 * repository namespace.
173 	 *
174 	 * @param prefix
175 	 *            position of this iterator in the repository tree. The value
176 	 *            may be null or the empty string to indicate the prefix is the
177 	 *            root of the repository. A trailing slash ('/') is
178 	 *            automatically appended if the prefix does not end in '/'.
179 	 * @param options
180 	 *            working tree options to be used
181 	 */
182 	protected WorkingTreeIterator(final String prefix,
183 			WorkingTreeOptions options) {
184 		super(prefix);
185 		state = new IteratorState(options);
186 	}
187 
188 	/**
189 	 * Create an iterator for a subtree of an existing iterator.
190 	 *
191 	 * @param p
192 	 *            parent tree iterator.
193 	 */
194 	protected WorkingTreeIterator(final WorkingTreeIterator p) {
195 		super(p);
196 		state = p.state;
197 		repository = p.repository;
198 	}
199 
200 	/**
201 	 * Initialize this iterator for the root level of a repository.
202 	 * <p>
203 	 * This method should only be invoked after calling {@link #init(Entry[])},
204 	 * and only for the root iterator.
205 	 *
206 	 * @param repo
207 	 *            the repository.
208 	 */
209 	protected void initRootIterator(Repository repo) {
210 		repository = repo;
211 		Entry entry;
212 		if (ignoreNode instanceof PerDirectoryIgnoreNode)
213 			entry = ((PerDirectoryIgnoreNode) ignoreNode).entry;
214 		else
215 			entry = null;
216 		ignoreNode = new RootIgnoreNode(entry, repo);
217 	}
218 
219 	/**
220 	 * Define the matching {@link DirCacheIterator}, to optimize ObjectIds.
221 	 *
222 	 * Once the DirCacheIterator has been set this iterator must only be
223 	 * advanced by the TreeWalk that is supplied, as it assumes that itself and
224 	 * the corresponding DirCacheIterator are positioned on the same file path
225 	 * whenever {@link #idBuffer()} is invoked.
226 	 *
227 	 * @param walk
228 	 *            the walk that will be advancing this iterator.
229 	 * @param treeId
230 	 *            index of the matching {@link DirCacheIterator}.
231 	 */
232 	public void setDirCacheIterator(TreeWalk walk, int treeId) {
233 		state.walk = walk;
234 		state.dirCacheTree = treeId;
235 	}
236 
237 	@Override
238 	public boolean hasId() {
239 		if (contentIdFromPtr == ptr)
240 			return true;
241 		return (mode & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
242 	}
243 
244 	@Override
245 	public byte[] idBuffer() {
246 		if (contentIdFromPtr == ptr)
247 			return contentId;
248 
249 		if (state.walk != null) {
250 			// If there is a matching DirCacheIterator, we can reuse
251 			// its idBuffer, but only if we appear to be clean against
252 			// the cached index information for the path.
253 			//
254 			DirCacheIterator i = state.walk.getTree(state.dirCacheTree,
255 					DirCacheIterator.class);
256 			if (i != null) {
257 				DirCacheEntry ent = i.getDirCacheEntry();
258 				if (ent != null && compareMetadata(ent) == MetadataDiff.EQUAL) {
259 					contentIdOffset = i.idOffset();
260 					contentIdFromPtr = ptr;
261 					return contentId = i.idBuffer();
262 				}
263 				contentIdOffset = 0;
264 			} else {
265 				contentIdOffset = 0;
266 			}
267 		}
268 		switch (mode & FileMode.TYPE_MASK) {
269 		case FileMode.TYPE_SYMLINK:
270 		case FileMode.TYPE_FILE:
271 			contentIdFromPtr = ptr;
272 			return contentId = idBufferBlob(entries[ptr]);
273 		case FileMode.TYPE_GITLINK:
274 			contentIdFromPtr = ptr;
275 			return contentId = idSubmodule(entries[ptr]);
276 		}
277 		return zeroid;
278 	}
279 
280 	/**
281 	 * Get submodule id for given entry.
282 	 *
283 	 * @param e
284 	 * @return non-null submodule id
285 	 */
286 	protected byte[] idSubmodule(Entry e) {
287 		if (repository == null)
288 			return zeroid;
289 		File directory;
290 		try {
291 			directory = repository.getWorkTree();
292 		} catch (NoWorkTreeException nwte) {
293 			return zeroid;
294 		}
295 		return idSubmodule(directory, e);
296 	}
297 
298 	/**
299 	 * Get submodule id using the repository at the location of the entry
300 	 * relative to the directory.
301 	 *
302 	 * @param directory
303 	 * @param e
304 	 * @return non-null submodule id
305 	 */
306 	protected byte[] idSubmodule(File directory, Entry e) {
307 		final Repository submoduleRepo;
308 		try {
309 			submoduleRepo = SubmoduleWalk.getSubmoduleRepository(directory,
310 					e.getName());
311 		} catch (IOException exception) {
312 			return zeroid;
313 		}
314 		if (submoduleRepo == null)
315 			return zeroid;
316 
317 		final ObjectId head;
318 		try {
319 			head = submoduleRepo.resolve(Constants.HEAD);
320 		} catch (IOException exception) {
321 			return zeroid;
322 		} finally {
323 			submoduleRepo.close();
324 		}
325 		if (head == null)
326 			return zeroid;
327 		final byte[] id = new byte[Constants.OBJECT_ID_LENGTH];
328 		head.copyRawTo(id, 0);
329 		return id;
330 	}
331 
332 	private static final byte[] digits = { '0', '1', '2', '3', '4', '5', '6',
333 			'7', '8', '9' };
334 
335 	private static final byte[] hblob = Constants
336 			.encodedTypeString(Constants.OBJ_BLOB);
337 
338 	private byte[] idBufferBlob(final Entry e) {
339 		try {
340 			final InputStream is = e.openInputStream();
341 			if (is == null)
342 				return zeroid;
343 			try {
344 				state.initializeDigestAndReadBuffer();
345 
346 				final long len = e.getLength();
347 				InputStream filteredIs = possiblyFilteredInputStream(e, is, len);
348 				return computeHash(filteredIs, canonLen);
349 			} finally {
350 				safeClose(is);
351 			}
352 		} catch (IOException err) {
353 			// Can't read the file? Don't report the failure either.
354 			return zeroid;
355 		}
356 	}
357 
358 	private InputStream possiblyFilteredInputStream(final Entry e,
359 			final InputStream is, final long len) throws IOException {
360 		boolean mightNeedCleaning = mightNeedCleaning();
361 		if (!mightNeedCleaning) {
362 			canonLen = len;
363 			return is;
364 		}
365 
366 		if (len <= MAXIMUM_FILE_SIZE_TO_READ_FULLY) {
367 			ByteBuffer rawbuf = IO.readWholeStream(is, (int) len);
368 			byte[] raw = rawbuf.array();
369 			int n = rawbuf.limit();
370 			if (!isBinary(raw, n)) {
371 				rawbuf = filterClean(raw, n);
372 				raw = rawbuf.array();
373 				n = rawbuf.limit();
374 			}
375 			canonLen = n;
376 			return new ByteArrayInputStream(raw, 0, n);
377 		}
378 
379 		// TODO: fix autocrlf causing mightneedcleaning
380 		if (!mightNeedCleaning && isBinary(e)) {
381 			canonLen = len;
382 			return is;
383 		}
384 
385 		final InputStream lenIs = filterClean(e.openInputStream());
386 		try {
387 			canonLen = computeLength(lenIs);
388 		} finally {
389 			safeClose(lenIs);
390 		}
391 		return filterClean(is);
392 	}
393 
394 	private static void safeClose(final InputStream in) {
395 		try {
396 			in.close();
397 		} catch (IOException err2) {
398 			// Suppress any error related to closing an input
399 			// stream. We don't care, we should not have any
400 			// outstanding data to flush or anything like that.
401 		}
402 	}
403 
404 	private boolean mightNeedCleaning() throws IOException {
405 		switch (getOptions().getAutoCRLF()) {
406 		case FALSE:
407 		default:
408 			if (getCleanFilterCommand() != null)
409 				return true;
410 			return false;
411 
412 		case TRUE:
413 		case INPUT:
414 			return true;
415 		}
416 	}
417 
418 	private static boolean isBinary(byte[] content, int sz) {
419 		return RawText.isBinary(content, sz);
420 	}
421 
422 	private static boolean isBinary(Entry entry) throws IOException {
423 		InputStream in = entry.openInputStream();
424 		try {
425 			return RawText.isBinary(in);
426 		} finally {
427 			safeClose(in);
428 		}
429 	}
430 
431 	private ByteBuffer filterClean(byte[] src, int n) throws IOException {
432 		InputStream in = new ByteArrayInputStream(src);
433 		try {
434 			return IO.readWholeStream(filterClean(in), n);
435 		} finally {
436 			safeClose(in);
437 		}
438 	}
439 
440 	private InputStream filterClean(InputStream in) throws IOException {
441 		in = handleAutoCRLF(in);
442 		String filterCommand = getCleanFilterCommand();
443 		if (filterCommand != null) {
444 			FS fs = repository.getFS();
445 			ProcessBuilder filterProcessBuilder = fs.runInShell(filterCommand,
446 					new String[0]);
447 			filterProcessBuilder.directory(repository.getWorkTree());
448 			filterProcessBuilder.environment().put(Constants.GIT_DIR_KEY,
449 					repository.getDirectory().getAbsolutePath());
450 			ExecutionResult result;
451 			try {
452 				result = fs.execute(filterProcessBuilder, in);
453 			} catch (IOException | InterruptedException e) {
454 				throw new IOException(new FilterFailedException(e,
455 						filterCommand, getEntryPathString()));
456 			}
457 			int rc = result.getRc();
458 			if (rc != 0) {
459 				throw new IOException(new FilterFailedException(rc,
460 						filterCommand, getEntryPathString(),
461 						result.getStdout().toByteArray(MAX_EXCEPTION_TEXT_SIZE),
462 						RawParseUtils.decode(result.getStderr()
463 								.toByteArray(MAX_EXCEPTION_TEXT_SIZE))));
464 			}
465 			return result.getStdout().openInputStream();
466 		}
467 		return in;
468 	}
469 
470 	private InputStream handleAutoCRLF(InputStream in) {
471 		AutoCRLF autoCRLF = getOptions().getAutoCRLF();
472 		if (autoCRLF == AutoCRLF.TRUE || autoCRLF == AutoCRLF.INPUT) {
473 			in = new EolCanonicalizingInputStream(in, true);
474 		}
475 		return in;
476 	}
477 
478 	/**
479 	 * Returns the working tree options used by this iterator.
480 	 *
481 	 * @return working tree options
482 	 */
483 	public WorkingTreeOptions getOptions() {
484 		return state.options;
485 	}
486 
487 	@Override
488 	public int idOffset() {
489 		return contentIdOffset;
490 	}
491 
492 	@Override
493 	public void reset() {
494 		if (!first()) {
495 			ptr = 0;
496 			if (!eof())
497 				parseEntry();
498 		}
499 	}
500 
501 	@Override
502 	public boolean first() {
503 		return ptr == 0;
504 	}
505 
506 	@Override
507 	public boolean eof() {
508 		return ptr == entryCnt;
509 	}
510 
511 	@Override
512 	public void next(final int delta) throws CorruptObjectException {
513 		ptr += delta;
514 		if (!eof()) {
515 			parseEntry();
516 		}
517 	}
518 
519 	@Override
520 	public void back(final int delta) throws CorruptObjectException {
521 		ptr -= delta;
522 		parseEntry();
523 	}
524 
525 	private void parseEntry() {
526 		final Entry e = entries[ptr];
527 		mode = e.getMode().getBits();
528 
529 		final int nameLen = e.encodedNameLen;
530 		ensurePathCapacity(pathOffset + nameLen, pathOffset);
531 		System.arraycopy(e.encodedName, 0, path, pathOffset, nameLen);
532 		pathLen = pathOffset + nameLen;
533 		canonLen = -1;
534 		cleanFilterCommand = null;
535 	}
536 
537 	/**
538 	 * Get the raw byte length of this entry.
539 	 *
540 	 * @return size of this file, in bytes.
541 	 */
542 	public long getEntryLength() {
543 		return current().getLength();
544 	}
545 
546 	/**
547 	 * Get the filtered input length of this entry
548 	 *
549 	 * @return size of the content, in bytes
550 	 * @throws IOException
551 	 */
552 	public long getEntryContentLength() throws IOException {
553 		if (canonLen == -1) {
554 			long rawLen = getEntryLength();
555 			if (rawLen == 0)
556 				canonLen = 0;
557 			InputStream is = current().openInputStream();
558 			try {
559 				// canonLen gets updated here
560 				possiblyFilteredInputStream(current(), is, current()
561 						.getLength());
562 			} finally {
563 				safeClose(is);
564 			}
565 		}
566 		return canonLen;
567 	}
568 
569 	/**
570 	 * Get the last modified time of this entry.
571 	 *
572 	 * @return last modified time of this file, in milliseconds since the epoch
573 	 *         (Jan 1, 1970 UTC).
574 	 */
575 	public long getEntryLastModified() {
576 		return current().getLastModified();
577 	}
578 
579 	/**
580 	 * Obtain an input stream to read the file content.
581 	 * <p>
582 	 * Efficient implementations are not required. The caller will usually
583 	 * obtain the stream only once per entry, if at all.
584 	 * <p>
585 	 * The input stream should not use buffering if the implementation can avoid
586 	 * it. The caller will buffer as necessary to perform efficient block IO
587 	 * operations.
588 	 * <p>
589 	 * The caller will close the stream once complete.
590 	 *
591 	 * @return a stream to read from the file.
592 	 * @throws IOException
593 	 *             the file could not be opened for reading.
594 	 */
595 	public InputStream openEntryStream() throws IOException {
596 		InputStream rawis = current().openInputStream();
597 		if (mightNeedCleaning())
598 			return filterClean(rawis);
599 		else
600 			return rawis;
601 	}
602 
603 	/**
604 	 * Determine if the current entry path is ignored by an ignore rule.
605 	 *
606 	 * @return true if the entry was ignored by an ignore rule file.
607 	 * @throws IOException
608 	 *             a relevant ignore rule file exists but cannot be read.
609 	 */
610 	public boolean isEntryIgnored() throws IOException {
611 		return isEntryIgnored(pathLen);
612 	}
613 
614 	/**
615 	 * Determine if the entry path is ignored by an ignore rule.
616 	 *
617 	 * @param pLen
618 	 *            the length of the path in the path buffer.
619 	 * @return true if the entry is ignored by an ignore rule.
620 	 * @throws IOException
621 	 *             a relevant ignore rule file exists but cannot be read.
622 	 */
623 	protected boolean isEntryIgnored(final int pLen) throws IOException {
624 		return isEntryIgnored(pLen, mode, false);
625 	}
626 
627 	/**
628 	 * Determine if the entry path is ignored by an ignore rule. Consider
629 	 * possible rule negation from child iterator.
630 	 *
631 	 * @param pLen
632 	 *            the length of the path in the path buffer.
633 	 * @param fileMode
634 	 *            the original iterator file mode
635 	 * @param negatePrevious
636 	 *            true if the previous matching iterator rule was negation
637 	 * @return true if the entry is ignored by an ignore rule.
638 	 * @throws IOException
639 	 *             a relevant ignore rule file exists but cannot be read.
640 	 */
641 	private boolean isEntryIgnored(final int pLen, int fileMode,
642 			boolean negatePrevious)
643 			throws IOException {
644 		IgnoreNode rules = getIgnoreNode();
645 		if (rules != null) {
646 			// The ignore code wants path to start with a '/' if possible.
647 			// If we have the '/' in our path buffer because we are inside
648 			// a subdirectory include it in the range we convert to string.
649 			//
650 			int pOff = pathOffset;
651 			if (0 < pOff)
652 				pOff--;
653 			String p = TreeWalk.pathOf(path, pOff, pLen);
654 			switch (rules.isIgnored(p, FileMode.TREE.equals(fileMode),
655 					negatePrevious)) {
656 			case IGNORED:
657 				return true;
658 			case NOT_IGNORED:
659 				return false;
660 			case CHECK_PARENT:
661 				negatePrevious = false;
662 				break;
663 			case CHECK_PARENT_NEGATE_FIRST_MATCH:
664 				negatePrevious = true;
665 				break;
666 			}
667 		}
668 		if (parent instanceof WorkingTreeIterator)
669 			return ((WorkingTreeIterator) parent).isEntryIgnored(pLen, fileMode,
670 					negatePrevious);
671 		return false;
672 	}
673 
674 	private IgnoreNode getIgnoreNode() throws IOException {
675 		if (ignoreNode instanceof PerDirectoryIgnoreNode)
676 			ignoreNode = ((PerDirectoryIgnoreNode) ignoreNode).load();
677 		return ignoreNode;
678 	}
679 
680 	/**
681 	 * Retrieves the {@link AttributesNode} for the current entry.
682 	 *
683 	 * @return {@link AttributesNode} for the current entry.
684 	 * @throws IOException
685 	 *             if an error is raised while parsing the .gitattributes file
686 	 * @since 3.7
687 	 */
688 	public AttributesNode getEntryAttributesNode() throws IOException {
689 		if (attributesNode instanceof PerDirectoryAttributesNode)
690 			attributesNode = ((PerDirectoryAttributesNode) attributesNode)
691 					.load();
692 		return attributesNode;
693 	}
694 
695 	private static final Comparator<Entry> ENTRY_CMP = new Comparator<Entry>() {
696 		public int compare(Entry a, Entry b) {
697 			return Paths.compare(
698 					a.encodedName, 0, a.encodedNameLen, a.getMode().getBits(),
699 					b.encodedName, 0, b.encodedNameLen, b.getMode().getBits());
700 		}
701 	};
702 
703 	/**
704 	 * Constructor helper.
705 	 *
706 	 * @param list
707 	 *            files in the subtree of the work tree this iterator operates
708 	 *            on
709 	 */
710 	protected void init(final Entry[] list) {
711 		// Filter out nulls, . and .. as these are not valid tree entries,
712 		// also cache the encoded forms of the path names for efficient use
713 		// later on during sorting and iteration.
714 		//
715 		entries = list;
716 		int i, o;
717 
718 		final CharsetEncoder nameEncoder = state.nameEncoder;
719 		for (i = 0, o = 0; i < entries.length; i++) {
720 			final Entry e = entries[i];
721 			if (e == null)
722 				continue;
723 			final String name = e.getName();
724 			if (".".equals(name) || "..".equals(name)) //$NON-NLS-1$ //$NON-NLS-2$
725 				continue;
726 			if (Constants.DOT_GIT.equals(name))
727 				continue;
728 			if (Constants.DOT_GIT_IGNORE.equals(name))
729 				ignoreNode = new PerDirectoryIgnoreNode(e);
730 			if (Constants.DOT_GIT_ATTRIBUTES.equals(name))
731 				attributesNode = new PerDirectoryAttributesNode(e);
732 			if (i != o)
733 				entries[o] = e;
734 			e.encodeName(nameEncoder);
735 			o++;
736 		}
737 		entryCnt = o;
738 		Arrays.sort(entries, 0, entryCnt, ENTRY_CMP);
739 
740 		contentIdFromPtr = -1;
741 		ptr = 0;
742 		if (!eof())
743 			parseEntry();
744 		else if (pathLen == 0) // see bug 445363
745 			pathLen = pathOffset;
746 	}
747 
748 	/**
749 	 * Obtain the current entry from this iterator.
750 	 *
751 	 * @return the currently selected entry.
752 	 */
753 	protected Entry current() {
754 		return entries[ptr];
755 	}
756 
757 	/**
758 	 * The result of a metadata-comparison between the current entry and a
759 	 * {@link DirCacheEntry}
760 	 */
761 	public enum MetadataDiff {
762 		/**
763 		 * The entries are equal by metaData (mode, length,
764 		 * modification-timestamp) or the <code>assumeValid</code> attribute of
765 		 * the index entry is set
766 		 */
767 		EQUAL,
768 
769 		/**
770 		 * The entries are not equal by metaData (mode, length) or the
771 		 * <code>isUpdateNeeded</code> attribute of the index entry is set
772 		 */
773 		DIFFER_BY_METADATA,
774 
775 		/** index entry is smudged - can't use that entry for comparison */
776 		SMUDGED,
777 
778 		/**
779 		 * The entries are equal by metaData (mode, length) but differ by
780 		 * modification-timestamp.
781 		 */
782 		DIFFER_BY_TIMESTAMP
783 	}
784 
785 	/**
786 	 * Is the file mode of the current entry different than the given raw mode?
787 	 *
788 	 * @param rawMode
789 	 * @return true if different, false otherwise
790 	 */
791 	public boolean isModeDifferent(final int rawMode) {
792 		// Determine difference in mode-bits of file and index-entry. In the
793 		// bitwise presentation of modeDiff we'll have a '1' when the two modes
794 		// differ at this position.
795 		int modeDiff = getEntryRawMode() ^ rawMode;
796 
797 		if (modeDiff == 0)
798 			return false;
799 
800 		// Do not rely on filemode differences in case of symbolic links
801 		if (getOptions().getSymLinks() == SymLinks.FALSE)
802 			if (FileMode.SYMLINK.equals(rawMode))
803 				return false;
804 
805 		// Ignore the executable file bits if WorkingTreeOptions tell me to
806 		// do so. Ignoring is done by setting the bits representing a
807 		// EXECUTABLE_FILE to '0' in modeDiff
808 		if (!state.options.isFileMode())
809 			modeDiff &= ~FileMode.EXECUTABLE_FILE.getBits();
810 		return modeDiff != 0;
811 	}
812 
813 	/**
814 	 * Compare the metadata (mode, length, modification-timestamp) of the
815 	 * current entry and a {@link DirCacheEntry}
816 	 *
817 	 * @param entry
818 	 *            the {@link DirCacheEntry} to compare with
819 	 * @return a {@link MetadataDiff} which tells whether and how the entries
820 	 *         metadata differ
821 	 */
822 	public MetadataDiff compareMetadata(DirCacheEntry entry) {
823 		if (entry.isAssumeValid())
824 			return MetadataDiff.EQUAL;
825 
826 		if (entry.isUpdateNeeded())
827 			return MetadataDiff.DIFFER_BY_METADATA;
828 
829 		if (!entry.isSmudged() && entry.getLength() != (int) getEntryLength())
830 			return MetadataDiff.DIFFER_BY_METADATA;
831 
832 		if (isModeDifferent(entry.getRawMode()))
833 			return MetadataDiff.DIFFER_BY_METADATA;
834 
835 		// Git under windows only stores seconds so we round the timestamp
836 		// Java gives us if it looks like the timestamp in index is seconds
837 		// only. Otherwise we compare the timestamp at millisecond precision,
838 		// unless core.checkstat is set to "minimal", in which case we only
839 		// compare the whole second part.
840 		long cacheLastModified = entry.getLastModified();
841 		long fileLastModified = getEntryLastModified();
842 		long lastModifiedMillis = fileLastModified % 1000;
843 		long cacheMillis = cacheLastModified % 1000;
844 		if (getOptions().getCheckStat() == CheckStat.MINIMAL) {
845 			fileLastModified = fileLastModified - lastModifiedMillis;
846 			cacheLastModified = cacheLastModified - cacheMillis;
847 		} else if (cacheMillis == 0)
848 			fileLastModified = fileLastModified - lastModifiedMillis;
849 		// Some Java version on Linux return whole seconds only even when
850 		// the file systems supports more precision.
851 		else if (lastModifiedMillis == 0)
852 			cacheLastModified = cacheLastModified - cacheMillis;
853 
854 		if (fileLastModified != cacheLastModified)
855 			return MetadataDiff.DIFFER_BY_TIMESTAMP;
856 		else if (!entry.isSmudged())
857 			// The file is clean when you look at timestamps.
858 			return MetadataDiff.EQUAL;
859 		else
860 			return MetadataDiff.SMUDGED;
861 	}
862 
863 	/**
864 	 * Checks whether this entry differs from a given entry from the
865 	 * {@link DirCache}.
866 	 *
867 	 * File status information is used and if status is same we consider the
868 	 * file identical to the state in the working directory. Native git uses
869 	 * more stat fields than we have accessible in Java.
870 	 *
871 	 * @param entry
872 	 *            the entry from the dircache we want to compare against
873 	 * @param forceContentCheck
874 	 *            True if the actual file content should be checked if
875 	 *            modification time differs.
876 	 * @param reader
877 	 *            access to repository objects if necessary. Should not be null.
878 	 * @return true if content is most likely different.
879 	 * @throws IOException
880 	 * @since 3.3
881 	 */
882 	public boolean isModified(DirCacheEntry entry, boolean forceContentCheck,
883 			ObjectReader reader) throws IOException {
884 		if (entry == null)
885 			return !FileMode.MISSING.equals(getEntryFileMode());
886 		MetadataDiff diff = compareMetadata(entry);
887 		switch (diff) {
888 		case DIFFER_BY_TIMESTAMP:
889 			if (forceContentCheck)
890 				// But we are told to look at content even though timestamps
891 				// tell us about modification
892 				return contentCheck(entry, reader);
893 			else
894 				// We are told to assume a modification if timestamps differs
895 				return true;
896 		case SMUDGED:
897 			// The file is clean by timestamps but the entry was smudged.
898 			// Lets do a content check
899 			return contentCheck(entry, reader);
900 		case EQUAL:
901 			return false;
902 		case DIFFER_BY_METADATA:
903 			if (mode == FileMode.SYMLINK.getBits())
904 				return contentCheck(entry, reader);
905 			return true;
906 		default:
907 			throw new IllegalStateException(MessageFormat.format(
908 					JGitText.get().unexpectedCompareResult, diff.name()));
909 		}
910 	}
911 
912 	/**
913 	 * Get the file mode to use for the current entry when it is to be updated
914 	 * in the index.
915 	 *
916 	 * @param indexIter
917 	 *            {@link DirCacheIterator} positioned at the same entry as this
918 	 *            iterator or null if no {@link DirCacheIterator} is available
919 	 *            at this iterator's current entry
920 	 * @return index file mode
921 	 */
922 	public FileMode getIndexFileMode(final DirCacheIterator indexIter) {
923 		final FileMode wtMode = getEntryFileMode();
924 		if (indexIter == null)
925 			return wtMode;
926 		if (getOptions().isFileMode())
927 			return wtMode;
928 		final FileMode iMode = indexIter.getEntryFileMode();
929 		if (FileMode.REGULAR_FILE == wtMode
930 				&& FileMode.EXECUTABLE_FILE == iMode)
931 			return iMode;
932 		if (FileMode.EXECUTABLE_FILE == wtMode
933 				&& FileMode.REGULAR_FILE == iMode)
934 			return iMode;
935 		return wtMode;
936 	}
937 
938 	/**
939 	 * Compares the entries content with the content in the filesystem.
940 	 * Unsmudges the entry when it is detected that it is clean.
941 	 *
942 	 * @param entry
943 	 *            the entry to be checked
944 	 * @param reader
945 	 *            acccess to repository data if necessary
946 	 * @return <code>true</code> if the content doesn't match,
947 	 *         <code>false</code> if it matches
948 	 * @throws IOException
949 	 */
950 	private boolean contentCheck(DirCacheEntry entry, ObjectReader reader)
951 			throws IOException {
952 		if (getEntryObjectId().equals(entry.getObjectId())) {
953 			// Content has not changed
954 
955 			// We know the entry can't be racily clean because it's still clean.
956 			// Therefore we unsmudge the entry!
957 			// If by any chance we now unsmudge although we are still in the
958 			// same time-slot as the last modification to the index file the
959 			// next index write operation will smudge again.
960 			// Caution: we are unsmudging just by setting the length of the
961 			// in-memory entry object. It's the callers task to detect that we
962 			// have modified the entry and to persist the modified index.
963 			entry.setLength((int) getEntryLength());
964 
965 			return false;
966 		} else {
967 			if (mode == FileMode.SYMLINK.getBits())
968 				return !new File(readContentAsNormalizedString(current()))
969 						.equals(new File((readContentAsNormalizedString(entry,
970 								reader))));
971 			// Content differs: that's a real change, perhaps
972 			if (reader == null) // deprecated use, do no further checks
973 				return true;
974 			switch (getOptions().getAutoCRLF()) {
975 			case INPUT:
976 			case TRUE:
977 				InputStream dcIn = null;
978 				try {
979 					ObjectLoader loader = reader.open(entry.getObjectId());
980 					if (loader == null)
981 						return true;
982 
983 					// We need to compute the length, but only if it is not
984 					// a binary stream.
985 					dcIn = new EolCanonicalizingInputStream(
986 							loader.openStream(), true, true /* abort if binary */);
987 					long dcInLen;
988 					try {
989 						dcInLen = computeLength(dcIn);
990 					} catch (EolCanonicalizingInputStream.IsBinaryException e) {
991 						return true;
992 					} finally {
993 						dcIn.close();
994 					}
995 
996 					dcIn = new EolCanonicalizingInputStream(
997 							loader.openStream(), true);
998 					byte[] autoCrLfHash = computeHash(dcIn, dcInLen);
999 					boolean changed = getEntryObjectId().compareTo(
1000 							autoCrLfHash, 0) != 0;
1001 					return changed;
1002 				} catch (IOException e) {
1003 					return true;
1004 				} finally {
1005 					if (dcIn != null)
1006 						try {
1007 							dcIn.close();
1008 						} catch (IOException e) {
1009 							// empty
1010 						}
1011 				}
1012 			case FALSE:
1013 				break;
1014 			}
1015 			return true;
1016 		}
1017 	}
1018 
1019 	private static String readContentAsNormalizedString(DirCacheEntry entry,
1020 			ObjectReader reader) throws MissingObjectException, IOException {
1021 		ObjectLoader open = reader.open(entry.getObjectId());
1022 		byte[] cachedBytes = open.getCachedBytes();
1023 		return FS.detect().normalize(RawParseUtils.decode(cachedBytes));
1024 	}
1025 
1026 	private static String readContentAsNormalizedString(Entry entry) throws IOException {
1027 		long length = entry.getLength();
1028 		byte[] content = new byte[(int) length];
1029 		InputStream is = entry.openInputStream();
1030 		IO.readFully(is, content, 0, (int) length);
1031 		return FS.detect().normalize(RawParseUtils.decode(content));
1032 	}
1033 
1034 	private static long computeLength(InputStream in) throws IOException {
1035 		// Since we only care about the length, use skip. The stream
1036 		// may be able to more efficiently wade through its data.
1037 		//
1038 		long length = 0;
1039 		for (;;) {
1040 			long n = in.skip(1 << 20);
1041 			if (n <= 0)
1042 				break;
1043 			length += n;
1044 		}
1045 		return length;
1046 	}
1047 
1048 	private byte[] computeHash(InputStream in, long length) throws IOException {
1049 		final MessageDigest contentDigest = state.contentDigest;
1050 		final byte[] contentReadBuffer = state.contentReadBuffer;
1051 
1052 		contentDigest.reset();
1053 		contentDigest.update(hblob);
1054 		contentDigest.update((byte) ' ');
1055 
1056 		long sz = length;
1057 		if (sz == 0) {
1058 			contentDigest.update((byte) '0');
1059 		} else {
1060 			final int bufn = contentReadBuffer.length;
1061 			int p = bufn;
1062 			do {
1063 				contentReadBuffer[--p] = digits[(int) (sz % 10)];
1064 				sz /= 10;
1065 			} while (sz > 0);
1066 			contentDigest.update(contentReadBuffer, p, bufn - p);
1067 		}
1068 		contentDigest.update((byte) 0);
1069 
1070 		for (;;) {
1071 			final int r = in.read(contentReadBuffer);
1072 			if (r <= 0)
1073 				break;
1074 			contentDigest.update(contentReadBuffer, 0, r);
1075 			sz += r;
1076 		}
1077 		if (sz != length)
1078 			return zeroid;
1079 		return contentDigest.digest();
1080 	}
1081 
1082 	/** A single entry within a working directory tree. */
1083 	protected static abstract class Entry {
1084 		byte[] encodedName;
1085 
1086 		int encodedNameLen;
1087 
1088 		void encodeName(final CharsetEncoder enc) {
1089 			final ByteBuffer b;
1090 			try {
1091 				b = enc.encode(CharBuffer.wrap(getName()));
1092 			} catch (CharacterCodingException e) {
1093 				// This should so never happen.
1094 				throw new RuntimeException(MessageFormat.format(
1095 						JGitText.get().unencodeableFile, getName()));
1096 			}
1097 
1098 			encodedNameLen = b.limit();
1099 			if (b.hasArray() && b.arrayOffset() == 0)
1100 				encodedName = b.array();
1101 			else
1102 				b.get(encodedName = new byte[encodedNameLen]);
1103 		}
1104 
1105 		public String toString() {
1106 			return getMode().toString() + " " + getName(); //$NON-NLS-1$
1107 		}
1108 
1109 		/**
1110 		 * Get the type of this entry.
1111 		 * <p>
1112 		 * <b>Note: Efficient implementation required.</b>
1113 		 * <p>
1114 		 * The implementation of this method must be efficient. If a subclass
1115 		 * needs to compute the value they should cache the reference within an
1116 		 * instance member instead.
1117 		 *
1118 		 * @return a file mode constant from {@link FileMode}.
1119 		 */
1120 		public abstract FileMode getMode();
1121 
1122 		/**
1123 		 * Get the byte length of this entry.
1124 		 * <p>
1125 		 * <b>Note: Efficient implementation required.</b>
1126 		 * <p>
1127 		 * The implementation of this method must be efficient. If a subclass
1128 		 * needs to compute the value they should cache the reference within an
1129 		 * instance member instead.
1130 		 *
1131 		 * @return size of this file, in bytes.
1132 		 */
1133 		public abstract long getLength();
1134 
1135 		/**
1136 		 * Get the last modified time of this entry.
1137 		 * <p>
1138 		 * <b>Note: Efficient implementation required.</b>
1139 		 * <p>
1140 		 * The implementation of this method must be efficient. If a subclass
1141 		 * needs to compute the value they should cache the reference within an
1142 		 * instance member instead.
1143 		 *
1144 		 * @return time since the epoch (in ms) of the last change.
1145 		 */
1146 		public abstract long getLastModified();
1147 
1148 		/**
1149 		 * Get the name of this entry within its directory.
1150 		 * <p>
1151 		 * Efficient implementations are not required. The caller will obtain
1152 		 * the name only once and cache it once obtained.
1153 		 *
1154 		 * @return name of the entry.
1155 		 */
1156 		public abstract String getName();
1157 
1158 		/**
1159 		 * Obtain an input stream to read the file content.
1160 		 * <p>
1161 		 * Efficient implementations are not required. The caller will usually
1162 		 * obtain the stream only once per entry, if at all.
1163 		 * <p>
1164 		 * The input stream should not use buffering if the implementation can
1165 		 * avoid it. The caller will buffer as necessary to perform efficient
1166 		 * block IO operations.
1167 		 * <p>
1168 		 * The caller will close the stream once complete.
1169 		 *
1170 		 * @return a stream to read from the file.
1171 		 * @throws IOException
1172 		 *             the file could not be opened for reading.
1173 		 */
1174 		public abstract InputStream openInputStream() throws IOException;
1175 	}
1176 
1177 	/** Magic type indicating we know rules exist, but they aren't loaded. */
1178 	private static class PerDirectoryIgnoreNode extends IgnoreNode {
1179 		final Entry entry;
1180 
1181 		PerDirectoryIgnoreNode(Entry entry) {
1182 			super(Collections.<FastIgnoreRule> emptyList());
1183 			this.entry = entry;
1184 		}
1185 
1186 		IgnoreNode load() throws IOException {
1187 			IgnoreNode r = new IgnoreNode();
1188 			InputStream in = entry.openInputStream();
1189 			try {
1190 				r.parse(in);
1191 			} finally {
1192 				in.close();
1193 			}
1194 			return r.getRules().isEmpty() ? null : r;
1195 		}
1196 	}
1197 
1198 	/** Magic type indicating there may be rules for the top level. */
1199 	private static class RootIgnoreNode extends PerDirectoryIgnoreNode {
1200 		final Repository repository;
1201 
1202 		RootIgnoreNode(Entry entry, Repository repository) {
1203 			super(entry);
1204 			this.repository = repository;
1205 		}
1206 
1207 		@Override
1208 		IgnoreNode load() throws IOException {
1209 			IgnoreNode r;
1210 			if (entry != null) {
1211 				r = super.load();
1212 				if (r == null)
1213 					r = new IgnoreNode();
1214 			} else {
1215 				r = new IgnoreNode();
1216 			}
1217 
1218 			FS fs = repository.getFS();
1219 			String path = repository.getConfig().get(CoreConfig.KEY)
1220 					.getExcludesFile();
1221 			if (path != null) {
1222 				File excludesfile;
1223 				if (path.startsWith("~/")) //$NON-NLS-1$
1224 					excludesfile = fs.resolve(fs.userHome(), path.substring(2));
1225 				else
1226 					excludesfile = fs.resolve(null, path);
1227 				loadRulesFromFile(r, excludesfile);
1228 			}
1229 
1230 			File exclude = fs.resolve(repository.getDirectory(),
1231 					Constants.INFO_EXCLUDE);
1232 			loadRulesFromFile(r, exclude);
1233 
1234 			return r.getRules().isEmpty() ? null : r;
1235 		}
1236 
1237 		private static void loadRulesFromFile(IgnoreNode r, File exclude)
1238 				throws FileNotFoundException, IOException {
1239 			if (FS.DETECTED.exists(exclude)) {
1240 				FileInputStream in = new FileInputStream(exclude);
1241 				try {
1242 					r.parse(in);
1243 				} finally {
1244 					in.close();
1245 				}
1246 			}
1247 		}
1248 	}
1249 
1250 	/** Magic type indicating we know rules exist, but they aren't loaded. */
1251 	private static class PerDirectoryAttributesNode extends AttributesNode {
1252 		final Entry entry;
1253 
1254 		PerDirectoryAttributesNode(Entry entry) {
1255 			super(Collections.<AttributesRule> emptyList());
1256 			this.entry = entry;
1257 		}
1258 
1259 		AttributesNode load() throws IOException {
1260 			AttributesNode r = new AttributesNode();
1261 			InputStream in = entry.openInputStream();
1262 			try {
1263 				r.parse(in);
1264 			} finally {
1265 				in.close();
1266 			}
1267 			return r.getRules().isEmpty() ? null : r;
1268 		}
1269 	}
1270 
1271 
1272 	private static final class IteratorState {
1273 		/** Options used to process the working tree. */
1274 		final WorkingTreeOptions options;
1275 
1276 		/** File name character encoder. */
1277 		final CharsetEncoder nameEncoder;
1278 
1279 		/** Digest computer for {@link #contentId} computations. */
1280 		MessageDigest contentDigest;
1281 
1282 		/** Buffer used to perform {@link #contentId} computations. */
1283 		byte[] contentReadBuffer;
1284 
1285 		/** TreeWalk with a (supposedly) matching DirCacheIterator. */
1286 		TreeWalk walk;
1287 
1288 		/** Position of the matching {@link DirCacheIterator}. */
1289 		int dirCacheTree;
1290 
1291 		IteratorState(WorkingTreeOptions options) {
1292 			this.options = options;
1293 			this.nameEncoder = Constants.CHARSET.newEncoder();
1294 		}
1295 
1296 		void initializeDigestAndReadBuffer() {
1297 			if (contentDigest == null) {
1298 				contentDigest = Constants.newMessageDigest();
1299 				contentReadBuffer = new byte[BUFFER_SIZE];
1300 			}
1301 		}
1302 	}
1303 
1304 	/**
1305 	 * @return the clean filter command for the current entry or
1306 	 *         <code>null</code> if no such command is defined
1307 	 * @throws IOException
1308 	 * @since 4.2
1309 	 */
1310 	public String getCleanFilterCommand() throws IOException {
1311 		if (cleanFilterCommand == null && state.walk != null) {
1312 			cleanFilterCommand = state.walk
1313 					.getFilterCommand(Constants.ATTR_FILTER_TYPE_CLEAN);
1314 		}
1315 		return cleanFilterCommand;
1316 	}
1317 }