1 /*
2 * Copyright (C) 2008, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.treewalk;
45
46 import java.io.IOException;
47
48 import org.eclipse.jgit.dircache.DirCacheBuilder;
49 import org.eclipse.jgit.errors.CorruptObjectException;
50 import org.eclipse.jgit.lib.FileMode;
51 import org.eclipse.jgit.lib.ObjectReader;
52 import org.eclipse.jgit.lib.Repository;
53
54 /**
55 * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
56 * <p>
57 * Due to the way a Git tree is organized the standard {@link TreeWalk} won't
58 * easily find a D/F conflict when merging two or more trees together. In the
59 * standard TreeWalk the file will be returned first, and then much later the
60 * directory will be returned. This makes it impossible for the application to
61 * efficiently detect and handle the conflict.
62 * <p>
63 * Using this walk implementation causes the directory to report earlier than
64 * usual, at the same time as the non-directory entry. This permits the
65 * application to handle the D/F conflict in a single step. The directory is
66 * returned only once, so it does not get returned later in the iteration.
67 * <p>
68 * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true
69 * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter
70 * which iterator originally supplied the subtree.
71 * <p>
72 * Because conflicted directories report early, using this walk implementation
73 * to populate a {@link DirCacheBuilder} may cause the automatic resorting to
74 * run and fix the entry ordering.
75 * <p>
76 * This walk implementation requires more CPU to implement a look-ahead and a
77 * look-behind to merge a D/F pair together, or to skip a previously reported
78 * directory. In typical Git repositories the look-ahead cost is 0 and the
79 * look-behind doesn't trigger, as users tend not to create trees which contain
80 * both "foo" as a directory and "foo.c" as a file.
81 * <p>
82 * In the worst-case however several thousand look-ahead steps per walk step may
83 * be necessary, making the overhead quite significant. Since this worst-case
84 * should never happen this walk implementation has made the time/space tradeoff
85 * in favor of more-time/less-space, as that better suits the typical case.
86 */
87 public class NameConflictTreeWalk extends TreeWalk {
88 private static final int TREE_MODE = FileMode.TREE.getBits();
89
90 private boolean fastMinHasMatch;
91
92 private AbstractTreeIterator dfConflict;
93
94 /**
95 * Create a new tree walker for a given repository.
96 *
97 * @param repo
98 * the repository the walker will obtain data from.
99 */
100 public NameConflictTreeWalk(final Repository repo) {
101 super(repo);
102 }
103
104 /**
105 * Create a new tree walker for a given repository.
106 *
107 * @param repo
108 * the repository the walker will obtain data from.
109 * @param or
110 * the reader the walker will obtain tree data from.
111 * @since 4.3
112 */
113 public NameConflictTreeWalk(Repository repo, final ObjectReader or) {
114 super(repo, or);
115 }
116
117 /**
118 * Create a new tree walker for a given repository.
119 *
120 * @param or
121 * the reader the walker will obtain tree data from.
122 */
123 public NameConflictTreeWalk(final ObjectReader or) {
124 super(or);
125 }
126
127 @Override
128 AbstractTreeIterator min() throws CorruptObjectException {
129 for (;;) {
130 final AbstractTreeIterator minRef = fastMin();
131 if (fastMinHasMatch)
132 return minRef;
133
134 if (isTree(minRef)) {
135 if (skipEntry(minRef)) {
136 for (final AbstractTreeIterator t : trees) {
137 if (t.matches == minRef) {
138 t.next(1);
139 t.matches = null;
140 }
141 }
142 continue;
143 }
144 return minRef;
145 }
146
147 return combineDF(minRef);
148 }
149 }
150
151 private AbstractTreeIterator fastMin() {
152 fastMinHasMatch = true;
153
154 int i = 0;
155 AbstractTreeIterator minRef = trees[i];
156 while (minRef.eof() && ++i < trees.length)
157 minRef = trees[i];
158 if (minRef.eof())
159 return minRef;
160
161 boolean hasConflict = false;
162 minRef.matches = minRef;
163 while (++i < trees.length) {
164 final AbstractTreeIterator t = trees[i];
165 if (t.eof())
166 continue;
167
168 final int cmp = t.pathCompare(minRef);
169 if (cmp < 0) {
170 if (fastMinHasMatch && isTree(minRef) && !isTree(t)
171 && nameEqual(minRef, t)) {
172 // We used to be at a tree, but now we are at a file
173 // with the same name. Allow the file to match the
174 // tree anyway.
175 //
176 t.matches = minRef;
177 hasConflict = true;
178 } else {
179 fastMinHasMatch = false;
180 t.matches = t;
181 minRef = t;
182 }
183 } else if (cmp == 0) {
184 // Exact name/mode match is best.
185 //
186 t.matches = minRef;
187 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
188 && nameEqual(t, minRef)) {
189 // The minimum is a file (non-tree) but the next entry
190 // of this iterator is a tree whose name matches our file.
191 // This is a classic D/F conflict and commonly occurs like
192 // this, with no gaps in between the file and directory.
193 //
194 // Use the tree as the minimum instead (see combineDF).
195 //
196
197 for (int k = 0; k < i; k++) {
198 final AbstractTreeIterator p = trees[k];
199 if (p.matches == minRef)
200 p.matches = t;
201 }
202 t.matches = t;
203 minRef = t;
204 hasConflict = true;
205 } else
206 fastMinHasMatch = false;
207 }
208
209 if (hasConflict && fastMinHasMatch && dfConflict == null)
210 dfConflict = minRef;
211 return minRef;
212 }
213
214 private static boolean nameEqual(final AbstractTreeIterator a,
215 final AbstractTreeIterator b) {
216 return a.pathCompare(b, TREE_MODE) == 0;
217 }
218
219 private static boolean isTree(final AbstractTreeIterator p) {
220 return FileMode.TREE.equals(p.mode);
221 }
222
223 private boolean skipEntry(final AbstractTreeIterator minRef)
224 throws CorruptObjectException {
225 // A tree D/F may have been handled earlier. We need to
226 // not report this path if it has already been reported.
227 //
228 for (final AbstractTreeIterator t : trees) {
229 if (t.matches == minRef || t.first())
230 continue;
231
232 int stepsBack = 0;
233 for (;;) {
234 stepsBack++;
235 t.back(1);
236
237 final int cmp = t.pathCompare(minRef, 0);
238 if (cmp == 0) {
239 // We have already seen this "$path" before. Skip it.
240 //
241 t.next(stepsBack);
242 return true;
243 } else if (cmp < 0 || t.first()) {
244 // We cannot find "$path" in t; it will never appear.
245 //
246 t.next(stepsBack);
247 break;
248 }
249 }
250 }
251
252 // We have never seen the current path before.
253 //
254 return false;
255 }
256
257 private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
258 throws CorruptObjectException {
259 // Look for a possible D/F conflict forward in the tree(s)
260 // as there may be a "$path/" which matches "$path". Make
261 // such entries match this entry.
262 //
263 AbstractTreeIterator treeMatch = null;
264 for (final AbstractTreeIterator t : trees) {
265 if (t.matches == minRef || t.eof())
266 continue;
267
268 for (;;) {
269 final int cmp = t.pathCompare(minRef, TREE_MODE);
270 if (cmp < 0) {
271 // The "$path/" may still appear later.
272 //
273 t.matchShift++;
274 t.next(1);
275 if (t.eof()) {
276 t.back(t.matchShift);
277 t.matchShift = 0;
278 break;
279 }
280 } else if (cmp == 0) {
281 // We have a conflict match here.
282 //
283 t.matches = minRef;
284 treeMatch = t;
285 break;
286 } else {
287 // A conflict match is not possible.
288 //
289 if (t.matchShift != 0) {
290 t.back(t.matchShift);
291 t.matchShift = 0;
292 }
293 break;
294 }
295 }
296 }
297
298 if (treeMatch != null) {
299 // If we do have a conflict use one of the directory
300 // matching iterators instead of the file iterator.
301 // This way isSubtree is true and isRecursive works.
302 //
303 for (final AbstractTreeIterator t : trees)
304 if (t.matches == minRef)
305 t.matches = treeMatch;
306
307 if (dfConflict == null)
308 dfConflict = treeMatch;
309
310 return treeMatch;
311 }
312
313 return minRef;
314 }
315
316 @Override
317 void popEntriesEqual() throws CorruptObjectException {
318 final AbstractTreeIterator ch = currentHead;
319 for (int i = 0; i < trees.length; i++) {
320 final AbstractTreeIterator t = trees[i];
321 if (t.matches == ch) {
322 if (t.matchShift == 0)
323 t.next(1);
324 else {
325 t.back(t.matchShift);
326 t.matchShift = 0;
327 }
328 t.matches = null;
329 }
330 }
331
332 if (ch == dfConflict)
333 dfConflict = null;
334 }
335
336 @Override
337 void skipEntriesEqual() throws CorruptObjectException {
338 final AbstractTreeIterator ch = currentHead;
339 for (int i = 0; i < trees.length; i++) {
340 final AbstractTreeIterator t = trees[i];
341 if (t.matches == ch) {
342 if (t.matchShift == 0)
343 t.skip();
344 else {
345 t.back(t.matchShift);
346 t.matchShift = 0;
347 }
348 t.matches = null;
349 }
350 }
351
352 if (ch == dfConflict)
353 dfConflict = null;
354 }
355
356 @Override
357 void stopWalk() throws IOException {
358 if (!needsStopWalk()) {
359 return;
360 }
361
362 // Name conflicts make aborting early difficult. Multiple paths may
363 // exist between the file and directory versions of a name. To ensure
364 // the directory version is skipped over (as it was previously visited
365 // during the file version step) requires popping up the stack and
366 // finishing out each subtree that the walker dove into. Siblings in
367 // parents do not need to be recursed into, bounding the cost.
368 for (;;) {
369 AbstractTreeIterator t = min();
370 if (t.eof()) {
371 if (depth > 0) {
372 exitSubtree();
373 popEntriesEqual();
374 continue;
375 }
376 return;
377 }
378 currentHead = t;
379 skipEntriesEqual();
380 }
381 }
382
383 private boolean needsStopWalk() {
384 for (AbstractTreeIterator t : trees) {
385 if (t.needsStopWalk()) {
386 return true;
387 }
388 }
389 return false;
390 }
391
392 /**
393 * True if the current entry is covered by a directory/file conflict.
394 *
395 * This means that for some prefix of the current entry's path, this walk
396 * has detected a directory/file conflict. Also true if the current entry
397 * itself is a directory/file conflict.
398 *
399 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
400 * true then you know that either for path foo or for path foo/bar files and
401 * folders were detected.
402 *
403 * @return <code>true</code> if the current entry is covered by a
404 * directory/file conflict, <code>false</code> otherwise
405 */
406 public boolean isDirectoryFileConflict() {
407 return dfConflict != null;
408 }
409 }