1 /*
2 * Copyright (C) 2008, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.treewalk;
45
46 import java.io.IOException;
47
48 import org.eclipse.jgit.annotations.Nullable;
49 import org.eclipse.jgit.dircache.DirCacheBuilder;
50 import org.eclipse.jgit.errors.CorruptObjectException;
51 import org.eclipse.jgit.lib.FileMode;
52 import org.eclipse.jgit.lib.ObjectReader;
53 import org.eclipse.jgit.lib.Repository;
54
55 /**
56 * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
57 * <p>
58 * Due to the way a Git tree is organized the standard {@link TreeWalk} won't
59 * easily find a D/F conflict when merging two or more trees together. In the
60 * standard TreeWalk the file will be returned first, and then much later the
61 * directory will be returned. This makes it impossible for the application to
62 * efficiently detect and handle the conflict.
63 * <p>
64 * Using this walk implementation causes the directory to report earlier than
65 * usual, at the same time as the non-directory entry. This permits the
66 * application to handle the D/F conflict in a single step. The directory is
67 * returned only once, so it does not get returned later in the iteration.
68 * <p>
69 * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true
70 * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter
71 * which iterator originally supplied the subtree.
72 * <p>
73 * Because conflicted directories report early, using this walk implementation
74 * to populate a {@link DirCacheBuilder} may cause the automatic resorting to
75 * run and fix the entry ordering.
76 * <p>
77 * This walk implementation requires more CPU to implement a look-ahead and a
78 * look-behind to merge a D/F pair together, or to skip a previously reported
79 * directory. In typical Git repositories the look-ahead cost is 0 and the
80 * look-behind doesn't trigger, as users tend not to create trees which contain
81 * both "foo" as a directory and "foo.c" as a file.
82 * <p>
83 * In the worst-case however several thousand look-ahead steps per walk step may
84 * be necessary, making the overhead quite significant. Since this worst-case
85 * should never happen this walk implementation has made the time/space tradeoff
86 * in favor of more-time/less-space, as that better suits the typical case.
87 */
88 public class NameConflictTreeWalk extends TreeWalk {
89 private static final int TREE_MODE = FileMode.TREE.getBits();
90
91 private boolean fastMinHasMatch;
92
93 private AbstractTreeIterator dfConflict;
94
95 /**
96 * Create a new tree walker for a given repository.
97 *
98 * @param repo
99 * the repository the walker will obtain data from.
100 */
101 public NameConflictTreeWalk(final Repository repo) {
102 super(repo);
103 }
104
105 /**
106 * Create a new tree walker for a given repository.
107 *
108 * @param repo
109 * the repository the walker will obtain data from.
110 * @param or
111 * the reader the walker will obtain tree data from.
112 * @since 4.3
113 */
114 public NameConflictTreeWalk(@Nullable Repository repo, final ObjectReader or) {
115 super(repo, or);
116 }
117
118 /**
119 * Create a new tree walker for a given repository.
120 *
121 * @param or
122 * the reader the walker will obtain tree data from.
123 */
124 public NameConflictTreeWalk(final ObjectReader or) {
125 super(or);
126 }
127
128 @Override
129 AbstractTreeIterator min() throws CorruptObjectException {
130 for (;;) {
131 final AbstractTreeIterator minRef = fastMin();
132 if (fastMinHasMatch)
133 return minRef;
134
135 if (isTree(minRef)) {
136 if (skipEntry(minRef)) {
137 for (final AbstractTreeIterator t : trees) {
138 if (t.matches == minRef) {
139 t.next(1);
140 t.matches = null;
141 }
142 }
143 continue;
144 }
145 return minRef;
146 }
147
148 return combineDF(minRef);
149 }
150 }
151
152 private AbstractTreeIterator fastMin() {
153 fastMinHasMatch = true;
154
155 int i = 0;
156 AbstractTreeIterator minRef = trees[i];
157 while (minRef.eof() && ++i < trees.length)
158 minRef = trees[i];
159 if (minRef.eof())
160 return minRef;
161
162 boolean hasConflict = false;
163 minRef.matches = minRef;
164 while (++i < trees.length) {
165 final AbstractTreeIterator t = trees[i];
166 if (t.eof())
167 continue;
168
169 final int cmp = t.pathCompare(minRef);
170 if (cmp < 0) {
171 if (fastMinHasMatch && isTree(minRef) && !isTree(t)
172 && nameEqual(minRef, t)) {
173 // We used to be at a tree, but now we are at a file
174 // with the same name. Allow the file to match the
175 // tree anyway.
176 //
177 t.matches = minRef;
178 hasConflict = true;
179 } else {
180 fastMinHasMatch = false;
181 t.matches = t;
182 minRef = t;
183 }
184 } else if (cmp == 0) {
185 // Exact name/mode match is best.
186 //
187 t.matches = minRef;
188 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
189 && nameEqual(t, minRef)) {
190 // The minimum is a file (non-tree) but the next entry
191 // of this iterator is a tree whose name matches our file.
192 // This is a classic D/F conflict and commonly occurs like
193 // this, with no gaps in between the file and directory.
194 //
195 // Use the tree as the minimum instead (see combineDF).
196 //
197
198 for (int k = 0; k < i; k++) {
199 final AbstractTreeIterator p = trees[k];
200 if (p.matches == minRef)
201 p.matches = t;
202 }
203 t.matches = t;
204 minRef = t;
205 hasConflict = true;
206 } else
207 fastMinHasMatch = false;
208 }
209
210 if (hasConflict && fastMinHasMatch && dfConflict == null)
211 dfConflict = minRef;
212 return minRef;
213 }
214
215 private static boolean nameEqual(final AbstractTreeIterator a,
216 final AbstractTreeIterator b) {
217 return a.pathCompare(b, TREE_MODE) == 0;
218 }
219
220 private static boolean isTree(final AbstractTreeIterator p) {
221 return FileMode.TREE.equals(p.mode);
222 }
223
224 private boolean skipEntry(final AbstractTreeIterator minRef)
225 throws CorruptObjectException {
226 // A tree D/F may have been handled earlier. We need to
227 // not report this path if it has already been reported.
228 //
229 for (final AbstractTreeIterator t : trees) {
230 if (t.matches == minRef || t.first())
231 continue;
232
233 int stepsBack = 0;
234 for (;;) {
235 stepsBack++;
236 t.back(1);
237
238 final int cmp = t.pathCompare(minRef, 0);
239 if (cmp == 0) {
240 // We have already seen this "$path" before. Skip it.
241 //
242 t.next(stepsBack);
243 return true;
244 } else if (cmp < 0 || t.first()) {
245 // We cannot find "$path" in t; it will never appear.
246 //
247 t.next(stepsBack);
248 break;
249 }
250 }
251 }
252
253 // We have never seen the current path before.
254 //
255 return false;
256 }
257
258 private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
259 throws CorruptObjectException {
260 // Look for a possible D/F conflict forward in the tree(s)
261 // as there may be a "$path/" which matches "$path". Make
262 // such entries match this entry.
263 //
264 AbstractTreeIterator treeMatch = null;
265 for (final AbstractTreeIterator t : trees) {
266 if (t.matches == minRef || t.eof())
267 continue;
268
269 for (;;) {
270 final int cmp = t.pathCompare(minRef, TREE_MODE);
271 if (cmp < 0) {
272 // The "$path/" may still appear later.
273 //
274 t.matchShift++;
275 t.next(1);
276 if (t.eof()) {
277 t.back(t.matchShift);
278 t.matchShift = 0;
279 break;
280 }
281 } else if (cmp == 0) {
282 // We have a conflict match here.
283 //
284 t.matches = minRef;
285 treeMatch = t;
286 break;
287 } else {
288 // A conflict match is not possible.
289 //
290 if (t.matchShift != 0) {
291 t.back(t.matchShift);
292 t.matchShift = 0;
293 }
294 break;
295 }
296 }
297 }
298
299 if (treeMatch != null) {
300 // If we do have a conflict use one of the directory
301 // matching iterators instead of the file iterator.
302 // This way isSubtree is true and isRecursive works.
303 //
304 for (final AbstractTreeIterator t : trees)
305 if (t.matches == minRef)
306 t.matches = treeMatch;
307
308 if (dfConflict == null)
309 dfConflict = treeMatch;
310
311 return treeMatch;
312 }
313
314 return minRef;
315 }
316
317 @Override
318 void popEntriesEqual() throws CorruptObjectException {
319 final AbstractTreeIterator ch = currentHead;
320 for (int i = 0; i < trees.length; i++) {
321 final AbstractTreeIterator t = trees[i];
322 if (t.matches == ch) {
323 if (t.matchShift == 0)
324 t.next(1);
325 else {
326 t.back(t.matchShift);
327 t.matchShift = 0;
328 }
329 t.matches = null;
330 }
331 }
332
333 if (ch == dfConflict)
334 dfConflict = null;
335 }
336
337 @Override
338 void skipEntriesEqual() throws CorruptObjectException {
339 final AbstractTreeIterator ch = currentHead;
340 for (int i = 0; i < trees.length; i++) {
341 final AbstractTreeIterator t = trees[i];
342 if (t.matches == ch) {
343 if (t.matchShift == 0)
344 t.skip();
345 else {
346 t.back(t.matchShift);
347 t.matchShift = 0;
348 }
349 t.matches = null;
350 }
351 }
352
353 if (ch == dfConflict)
354 dfConflict = null;
355 }
356
357 @Override
358 void stopWalk() throws IOException {
359 if (!needsStopWalk()) {
360 return;
361 }
362
363 // Name conflicts make aborting early difficult. Multiple paths may
364 // exist between the file and directory versions of a name. To ensure
365 // the directory version is skipped over (as it was previously visited
366 // during the file version step) requires popping up the stack and
367 // finishing out each subtree that the walker dove into. Siblings in
368 // parents do not need to be recursed into, bounding the cost.
369 for (;;) {
370 AbstractTreeIterator t = min();
371 if (t.eof()) {
372 if (depth > 0) {
373 exitSubtree();
374 popEntriesEqual();
375 continue;
376 }
377 return;
378 }
379 currentHead = t;
380 skipEntriesEqual();
381 }
382 }
383
384 private boolean needsStopWalk() {
385 for (AbstractTreeIterator t : trees) {
386 if (t.needsStopWalk()) {
387 return true;
388 }
389 }
390 return false;
391 }
392
393 /**
394 * True if the current entry is covered by a directory/file conflict.
395 *
396 * This means that for some prefix of the current entry's path, this walk
397 * has detected a directory/file conflict. Also true if the current entry
398 * itself is a directory/file conflict.
399 *
400 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
401 * true then you know that either for path foo or for path foo/bar files and
402 * folders were detected.
403 *
404 * @return <code>true</code> if the current entry is covered by a
405 * directory/file conflict, <code>false</code> otherwise
406 */
407 public boolean isDirectoryFileConflict() {
408 return dfConflict != null;
409 }
410 }