1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.treewalk;
45  
46  import java.io.IOException;
47  
48  import org.eclipse.jgit.annotations.Nullable;
49  import org.eclipse.jgit.dircache.DirCacheBuilder;
50  import org.eclipse.jgit.errors.CorruptObjectException;
51  import org.eclipse.jgit.lib.FileMode;
52  import org.eclipse.jgit.lib.ObjectReader;
53  import org.eclipse.jgit.lib.Repository;
54  
55  /**
56   * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
57   * <p>
58   * Due to the way a Git tree is organized the standard {@link TreeWalk} won't
59   * easily find a D/F conflict when merging two or more trees together. In the
60   * standard TreeWalk the file will be returned first, and then much later the
61   * directory will be returned. This makes it impossible for the application to
62   * efficiently detect and handle the conflict.
63   * <p>
64   * Using this walk implementation causes the directory to report earlier than
65   * usual, at the same time as the non-directory entry. This permits the
66   * application to handle the D/F conflict in a single step. The directory is
67   * returned only once, so it does not get returned later in the iteration.
68   * <p>
69   * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true
70   * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter
71   * which iterator originally supplied the subtree.
72   * <p>
73   * Because conflicted directories report early, using this walk implementation
74   * to populate a {@link DirCacheBuilder} may cause the automatic resorting to
75   * run and fix the entry ordering.
76   * <p>
77   * This walk implementation requires more CPU to implement a look-ahead and a
78   * look-behind to merge a D/F pair together, or to skip a previously reported
79   * directory. In typical Git repositories the look-ahead cost is 0 and the
80   * look-behind doesn't trigger, as users tend not to create trees which contain
81   * both "foo" as a directory and "foo.c" as a file.
82   * <p>
83   * In the worst-case however several thousand look-ahead steps per walk step may
84   * be necessary, making the overhead quite significant. Since this worst-case
85   * should never happen this walk implementation has made the time/space tradeoff
86   * in favor of more-time/less-space, as that better suits the typical case.
87   */
88  public class NameConflictTreeWalk extends TreeWalk {
89  	private static final int TREE_MODE = FileMode.TREE.getBits();
90  
91  	private boolean fastMinHasMatch;
92  
93  	private AbstractTreeIterator dfConflict;
94  
95  	/**
96  	 * Create a new tree walker for a given repository.
97  	 *
98  	 * @param repo
99  	 *            the repository the walker will obtain data from.
100 	 */
101 	public NameConflictTreeWalk(final Repository repo) {
102 		super(repo);
103 	}
104 
105 	/**
106 	 * Create a new tree walker for a given repository.
107 	 *
108 	 * @param repo
109 	 *            the repository the walker will obtain data from.
110 	 * @param or
111 	 *            the reader the walker will obtain tree data from.
112 	 * @since 4.3
113 	 */
114 	public NameConflictTreeWalk(@Nullable Repository repo, final ObjectReader or) {
115 		super(repo, or);
116 	}
117 
118 	/**
119 	 * Create a new tree walker for a given repository.
120 	 *
121 	 * @param or
122 	 *            the reader the walker will obtain tree data from.
123 	 */
124 	public NameConflictTreeWalk(final ObjectReader or) {
125 		super(or);
126 	}
127 
128 	@Override
129 	AbstractTreeIterator min() throws CorruptObjectException {
130 		for (;;) {
131 			final AbstractTreeIterator minRef = fastMin();
132 			if (fastMinHasMatch)
133 				return minRef;
134 
135 			if (isTree(minRef)) {
136 				if (skipEntry(minRef)) {
137 					for (final AbstractTreeIterator t : trees) {
138 						if (t.matches == minRef) {
139 							t.next(1);
140 							t.matches = null;
141 						}
142 					}
143 					continue;
144 				}
145 				return minRef;
146 			}
147 
148 			return combineDF(minRef);
149 		}
150 	}
151 
152 	private AbstractTreeIterator fastMin() {
153 		fastMinHasMatch = true;
154 
155 		int i = 0;
156 		AbstractTreeIterator minRef = trees[i];
157 		while (minRef.eof() && ++i < trees.length)
158 			minRef = trees[i];
159 		if (minRef.eof())
160 			return minRef;
161 
162 		boolean hasConflict = false;
163 		minRef.matches = minRef;
164 		while (++i < trees.length) {
165 			final AbstractTreeIterator t = trees[i];
166 			if (t.eof())
167 				continue;
168 
169 			final int cmp = t.pathCompare(minRef);
170 			if (cmp < 0) {
171 				if (fastMinHasMatch && isTree(minRef) && !isTree(t)
172 						&& nameEqual(minRef, t)) {
173 					// We used to be at a tree, but now we are at a file
174 					// with the same name. Allow the file to match the
175 					// tree anyway.
176 					//
177 					t.matches = minRef;
178 					hasConflict = true;
179 				} else {
180 					fastMinHasMatch = false;
181 					t.matches = t;
182 					minRef = t;
183 				}
184 			} else if (cmp == 0) {
185 				// Exact name/mode match is best.
186 				//
187 				t.matches = minRef;
188 			} else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
189 					&& nameEqual(t, minRef)) {
190 				// The minimum is a file (non-tree) but the next entry
191 				// of this iterator is a tree whose name matches our file.
192 				// This is a classic D/F conflict and commonly occurs like
193 				// this, with no gaps in between the file and directory.
194 				//
195 				// Use the tree as the minimum instead (see combineDF).
196 				//
197 
198 				for (int k = 0; k < i; k++) {
199 					final AbstractTreeIterator p = trees[k];
200 					if (p.matches == minRef)
201 						p.matches = t;
202 				}
203 				t.matches = t;
204 				minRef = t;
205 				hasConflict = true;
206 			} else
207 				fastMinHasMatch = false;
208 		}
209 
210 		if (hasConflict && fastMinHasMatch && dfConflict == null)
211 			dfConflict = minRef;
212 		return minRef;
213 	}
214 
215 	private static boolean nameEqual(final AbstractTreeIterator a,
216 			final AbstractTreeIterator b) {
217 		return a.pathCompare(b, TREE_MODE) == 0;
218 	}
219 
220 	private static boolean isTree(final AbstractTreeIterator p) {
221 		return FileMode.TREE.equals(p.mode);
222 	}
223 
224 	private boolean skipEntry(final AbstractTreeIterator minRef)
225 			throws CorruptObjectException {
226 		// A tree D/F may have been handled earlier. We need to
227 		// not report this path if it has already been reported.
228 		//
229 		for (final AbstractTreeIterator t : trees) {
230 			if (t.matches == minRef || t.first())
231 				continue;
232 
233 			int stepsBack = 0;
234 			for (;;) {
235 				stepsBack++;
236 				t.back(1);
237 
238 				final int cmp = t.pathCompare(minRef, 0);
239 				if (cmp == 0) {
240 					// We have already seen this "$path" before. Skip it.
241 					//
242 					t.next(stepsBack);
243 					return true;
244 				} else if (cmp < 0 || t.first()) {
245 					// We cannot find "$path" in t; it will never appear.
246 					//
247 					t.next(stepsBack);
248 					break;
249 				}
250 			}
251 		}
252 
253 		// We have never seen the current path before.
254 		//
255 		return false;
256 	}
257 
258 	private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
259 			throws CorruptObjectException {
260 		// Look for a possible D/F conflict forward in the tree(s)
261 		// as there may be a "$path/" which matches "$path". Make
262 		// such entries match this entry.
263 		//
264 		AbstractTreeIterator treeMatch = null;
265 		for (final AbstractTreeIterator t : trees) {
266 			if (t.matches == minRef || t.eof())
267 				continue;
268 
269 			for (;;) {
270 				final int cmp = t.pathCompare(minRef, TREE_MODE);
271 				if (cmp < 0) {
272 					// The "$path/" may still appear later.
273 					//
274 					t.matchShift++;
275 					t.next(1);
276 					if (t.eof()) {
277 						t.back(t.matchShift);
278 						t.matchShift = 0;
279 						break;
280 					}
281 				} else if (cmp == 0) {
282 					// We have a conflict match here.
283 					//
284 					t.matches = minRef;
285 					treeMatch = t;
286 					break;
287 				} else {
288 					// A conflict match is not possible.
289 					//
290 					if (t.matchShift != 0) {
291 						t.back(t.matchShift);
292 						t.matchShift = 0;
293 					}
294 					break;
295 				}
296 			}
297 		}
298 
299 		if (treeMatch != null) {
300 			// If we do have a conflict use one of the directory
301 			// matching iterators instead of the file iterator.
302 			// This way isSubtree is true and isRecursive works.
303 			//
304 			for (final AbstractTreeIterator t : trees)
305 				if (t.matches == minRef)
306 					t.matches = treeMatch;
307 
308 			if (dfConflict == null)
309 				dfConflict = treeMatch;
310 
311 			return treeMatch;
312 		}
313 
314 		return minRef;
315 	}
316 
317 	@Override
318 	void popEntriesEqual() throws CorruptObjectException {
319 		final AbstractTreeIterator ch = currentHead;
320 		for (int i = 0; i < trees.length; i++) {
321 			final AbstractTreeIterator t = trees[i];
322 			if (t.matches == ch) {
323 				if (t.matchShift == 0)
324 					t.next(1);
325 				else {
326 					t.back(t.matchShift);
327 					t.matchShift = 0;
328 				}
329 				t.matches = null;
330 			}
331 		}
332 
333 		if (ch == dfConflict)
334 			dfConflict = null;
335 	}
336 
337 	@Override
338 	void skipEntriesEqual() throws CorruptObjectException {
339 		final AbstractTreeIterator ch = currentHead;
340 		for (int i = 0; i < trees.length; i++) {
341 			final AbstractTreeIterator t = trees[i];
342 			if (t.matches == ch) {
343 				if (t.matchShift == 0)
344 					t.skip();
345 				else {
346 					t.back(t.matchShift);
347 					t.matchShift = 0;
348 				}
349 				t.matches = null;
350 			}
351 		}
352 
353 		if (ch == dfConflict)
354 			dfConflict = null;
355 	}
356 
357 	@Override
358 	void stopWalk() throws IOException {
359 		if (!needsStopWalk()) {
360 			return;
361 		}
362 
363 		// Name conflicts make aborting early difficult. Multiple paths may
364 		// exist between the file and directory versions of a name. To ensure
365 		// the directory version is skipped over (as it was previously visited
366 		// during the file version step) requires popping up the stack and
367 		// finishing out each subtree that the walker dove into. Siblings in
368 		// parents do not need to be recursed into, bounding the cost.
369 		for (;;) {
370 			AbstractTreeIterator t = min();
371 			if (t.eof()) {
372 				if (depth > 0) {
373 					exitSubtree();
374 					popEntriesEqual();
375 					continue;
376 				}
377 				return;
378 			}
379 			currentHead = t;
380 			skipEntriesEqual();
381 		}
382 	}
383 
384 	private boolean needsStopWalk() {
385 		for (AbstractTreeIterator t : trees) {
386 			if (t.needsStopWalk()) {
387 				return true;
388 			}
389 		}
390 		return false;
391 	}
392 
393 	/**
394 	 * True if the current entry is covered by a directory/file conflict.
395 	 *
396 	 * This means that for some prefix of the current entry's path, this walk
397 	 * has detected a directory/file conflict. Also true if the current entry
398 	 * itself is a directory/file conflict.
399 	 *
400 	 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
401 	 * true then you know that either for path foo or for path foo/bar files and
402 	 * folders were detected.
403 	 *
404 	 * @return <code>true</code> if the current entry is covered by a
405 	 *         directory/file conflict, <code>false</code> otherwise
406 	 */
407 	public boolean isDirectoryFileConflict() {
408 		return dfConflict != null;
409 	}
410 }