View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.treewalk;
45  
46  import java.io.IOException;
47  
48  import org.eclipse.jgit.dircache.DirCacheBuilder;
49  import org.eclipse.jgit.errors.CorruptObjectException;
50  import org.eclipse.jgit.lib.FileMode;
51  import org.eclipse.jgit.lib.ObjectReader;
52  import org.eclipse.jgit.lib.Repository;
53  
54  /**
55   * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
56   * <p>
57   * Due to the way a Git tree is organized the standard {@link TreeWalk} won't
58   * easily find a D/F conflict when merging two or more trees together. In the
59   * standard TreeWalk the file will be returned first, and then much later the
60   * directory will be returned. This makes it impossible for the application to
61   * efficiently detect and handle the conflict.
62   * <p>
63   * Using this walk implementation causes the directory to report earlier than
64   * usual, at the same time as the non-directory entry. This permits the
65   * application to handle the D/F conflict in a single step. The directory is
66   * returned only once, so it does not get returned later in the iteration.
67   * <p>
68   * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true
69   * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter
70   * which iterator originally supplied the subtree.
71   * <p>
72   * Because conflicted directories report early, using this walk implementation
73   * to populate a {@link DirCacheBuilder} may cause the automatic resorting to
74   * run and fix the entry ordering.
75   * <p>
76   * This walk implementation requires more CPU to implement a look-ahead and a
77   * look-behind to merge a D/F pair together, or to skip a previously reported
78   * directory. In typical Git repositories the look-ahead cost is 0 and the
79   * look-behind doesn't trigger, as users tend not to create trees which contain
80   * both "foo" as a directory and "foo.c" as a file.
81   * <p>
82   * In the worst-case however several thousand look-ahead steps per walk step may
83   * be necessary, making the overhead quite significant. Since this worst-case
84   * should never happen this walk implementation has made the time/space tradeoff
85   * in favor of more-time/less-space, as that better suits the typical case.
86   */
87  public class NameConflictTreeWalk extends TreeWalk {
88  	private static final int TREE_MODE = FileMode.TREE.getBits();
89  
90  	private boolean fastMinHasMatch;
91  
92  	private AbstractTreeIterator dfConflict;
93  
94  	/**
95  	 * Create a new tree walker for a given repository.
96  	 *
97  	 * @param repo
98  	 *            the repository the walker will obtain data from.
99  	 */
100 	public NameConflictTreeWalk(final Repository repo) {
101 		super(repo);
102 	}
103 
104 	/**
105 	 * Create a new tree walker for a given repository.
106 	 *
107 	 * @param repo
108 	 *            the repository the walker will obtain data from.
109 	 * @param or
110 	 *            the reader the walker will obtain tree data from.
111 	 * @since 4.3
112 	 */
113 	public NameConflictTreeWalk(Repository repo, final ObjectReader or) {
114 		super(repo, or);
115 	}
116 
117 	/**
118 	 * Create a new tree walker for a given repository.
119 	 *
120 	 * @param or
121 	 *            the reader the walker will obtain tree data from.
122 	 */
123 	public NameConflictTreeWalk(final ObjectReader or) {
124 		super(or);
125 	}
126 
127 	@Override
128 	AbstractTreeIterator min() throws CorruptObjectException {
129 		for (;;) {
130 			final AbstractTreeIterator minRef = fastMin();
131 			if (fastMinHasMatch)
132 				return minRef;
133 
134 			if (isTree(minRef)) {
135 				if (skipEntry(minRef)) {
136 					for (final AbstractTreeIterator t : trees) {
137 						if (t.matches == minRef) {
138 							t.next(1);
139 							t.matches = null;
140 						}
141 					}
142 					continue;
143 				}
144 				return minRef;
145 			}
146 
147 			return combineDF(minRef);
148 		}
149 	}
150 
151 	private AbstractTreeIterator fastMin() {
152 		fastMinHasMatch = true;
153 
154 		int i = 0;
155 		AbstractTreeIterator minRef = trees[i];
156 		while (minRef.eof() && ++i < trees.length)
157 			minRef = trees[i];
158 		if (minRef.eof())
159 			return minRef;
160 
161 		boolean hasConflict = false;
162 		minRef.matches = minRef;
163 		while (++i < trees.length) {
164 			final AbstractTreeIterator t = trees[i];
165 			if (t.eof())
166 				continue;
167 
168 			final int cmp = t.pathCompare(minRef);
169 			if (cmp < 0) {
170 				if (fastMinHasMatch && isTree(minRef) && !isTree(t)
171 						&& nameEqual(minRef, t)) {
172 					// We used to be at a tree, but now we are at a file
173 					// with the same name. Allow the file to match the
174 					// tree anyway.
175 					//
176 					t.matches = minRef;
177 					hasConflict = true;
178 				} else {
179 					fastMinHasMatch = false;
180 					t.matches = t;
181 					minRef = t;
182 				}
183 			} else if (cmp == 0) {
184 				// Exact name/mode match is best.
185 				//
186 				t.matches = minRef;
187 			} else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
188 					&& nameEqual(t, minRef)) {
189 				// The minimum is a file (non-tree) but the next entry
190 				// of this iterator is a tree whose name matches our file.
191 				// This is a classic D/F conflict and commonly occurs like
192 				// this, with no gaps in between the file and directory.
193 				//
194 				// Use the tree as the minimum instead (see combineDF).
195 				//
196 
197 				for (int k = 0; k < i; k++) {
198 					final AbstractTreeIterator p = trees[k];
199 					if (p.matches == minRef)
200 						p.matches = t;
201 				}
202 				t.matches = t;
203 				minRef = t;
204 				hasConflict = true;
205 			} else
206 				fastMinHasMatch = false;
207 		}
208 
209 		if (hasConflict && fastMinHasMatch && dfConflict == null)
210 			dfConflict = minRef;
211 		return minRef;
212 	}
213 
214 	private static boolean nameEqual(final AbstractTreeIterator a,
215 			final AbstractTreeIterator b) {
216 		return a.pathCompare(b, TREE_MODE) == 0;
217 	}
218 
219 	private static boolean isTree(final AbstractTreeIterator p) {
220 		return FileMode.TREE.equals(p.mode);
221 	}
222 
223 	private boolean skipEntry(final AbstractTreeIterator minRef)
224 			throws CorruptObjectException {
225 		// A tree D/F may have been handled earlier. We need to
226 		// not report this path if it has already been reported.
227 		//
228 		for (final AbstractTreeIterator t : trees) {
229 			if (t.matches == minRef || t.first())
230 				continue;
231 
232 			int stepsBack = 0;
233 			for (;;) {
234 				stepsBack++;
235 				t.back(1);
236 
237 				final int cmp = t.pathCompare(minRef, 0);
238 				if (cmp == 0) {
239 					// We have already seen this "$path" before. Skip it.
240 					//
241 					t.next(stepsBack);
242 					return true;
243 				} else if (cmp < 0 || t.first()) {
244 					// We cannot find "$path" in t; it will never appear.
245 					//
246 					t.next(stepsBack);
247 					break;
248 				}
249 			}
250 		}
251 
252 		// We have never seen the current path before.
253 		//
254 		return false;
255 	}
256 
257 	private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
258 			throws CorruptObjectException {
259 		// Look for a possible D/F conflict forward in the tree(s)
260 		// as there may be a "$path/" which matches "$path". Make
261 		// such entries match this entry.
262 		//
263 		AbstractTreeIterator treeMatch = null;
264 		for (final AbstractTreeIterator t : trees) {
265 			if (t.matches == minRef || t.eof())
266 				continue;
267 
268 			for (;;) {
269 				final int cmp = t.pathCompare(minRef, TREE_MODE);
270 				if (cmp < 0) {
271 					// The "$path/" may still appear later.
272 					//
273 					t.matchShift++;
274 					t.next(1);
275 					if (t.eof()) {
276 						t.back(t.matchShift);
277 						t.matchShift = 0;
278 						break;
279 					}
280 				} else if (cmp == 0) {
281 					// We have a conflict match here.
282 					//
283 					t.matches = minRef;
284 					treeMatch = t;
285 					break;
286 				} else {
287 					// A conflict match is not possible.
288 					//
289 					if (t.matchShift != 0) {
290 						t.back(t.matchShift);
291 						t.matchShift = 0;
292 					}
293 					break;
294 				}
295 			}
296 		}
297 
298 		if (treeMatch != null) {
299 			// If we do have a conflict use one of the directory
300 			// matching iterators instead of the file iterator.
301 			// This way isSubtree is true and isRecursive works.
302 			//
303 			for (final AbstractTreeIterator t : trees)
304 				if (t.matches == minRef)
305 					t.matches = treeMatch;
306 
307 			if (dfConflict == null)
308 				dfConflict = treeMatch;
309 
310 			return treeMatch;
311 		}
312 
313 		return minRef;
314 	}
315 
316 	@Override
317 	void popEntriesEqual() throws CorruptObjectException {
318 		final AbstractTreeIterator ch = currentHead;
319 		for (int i = 0; i < trees.length; i++) {
320 			final AbstractTreeIterator t = trees[i];
321 			if (t.matches == ch) {
322 				if (t.matchShift == 0)
323 					t.next(1);
324 				else {
325 					t.back(t.matchShift);
326 					t.matchShift = 0;
327 				}
328 				t.matches = null;
329 			}
330 		}
331 
332 		if (ch == dfConflict)
333 			dfConflict = null;
334 	}
335 
336 	@Override
337 	void skipEntriesEqual() throws CorruptObjectException {
338 		final AbstractTreeIterator ch = currentHead;
339 		for (int i = 0; i < trees.length; i++) {
340 			final AbstractTreeIterator t = trees[i];
341 			if (t.matches == ch) {
342 				if (t.matchShift == 0)
343 					t.skip();
344 				else {
345 					t.back(t.matchShift);
346 					t.matchShift = 0;
347 				}
348 				t.matches = null;
349 			}
350 		}
351 
352 		if (ch == dfConflict)
353 			dfConflict = null;
354 	}
355 
356 	@Override
357 	void stopWalk() throws IOException {
358 		if (!needsStopWalk()) {
359 			return;
360 		}
361 
362 		// Name conflicts make aborting early difficult. Multiple paths may
363 		// exist between the file and directory versions of a name. To ensure
364 		// the directory version is skipped over (as it was previously visited
365 		// during the file version step) requires popping up the stack and
366 		// finishing out each subtree that the walker dove into. Siblings in
367 		// parents do not need to be recursed into, bounding the cost.
368 		for (;;) {
369 			AbstractTreeIterator t = min();
370 			if (t.eof()) {
371 				if (depth > 0) {
372 					exitSubtree();
373 					popEntriesEqual();
374 					continue;
375 				}
376 				return;
377 			}
378 			currentHead = t;
379 			skipEntriesEqual();
380 		}
381 	}
382 
383 	private boolean needsStopWalk() {
384 		for (AbstractTreeIterator t : trees) {
385 			if (t.needsStopWalk()) {
386 				return true;
387 			}
388 		}
389 		return false;
390 	}
391 
392 	/**
393 	 * True if the current entry is covered by a directory/file conflict.
394 	 *
395 	 * This means that for some prefix of the current entry's path, this walk
396 	 * has detected a directory/file conflict. Also true if the current entry
397 	 * itself is a directory/file conflict.
398 	 *
399 	 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
400 	 * true then you know that either for path foo or for path foo/bar files and
401 	 * folders were detected.
402 	 *
403 	 * @return <code>true</code> if the current entry is covered by a
404 	 *         directory/file conflict, <code>false</code> otherwise
405 	 */
406 	public boolean isDirectoryFileConflict() {
407 		return dfConflict != null;
408 	}
409 }