1 /*
2 * Copyright (C) 2008, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.treewalk;
45
46 import java.io.IOException;
47
48 import org.eclipse.jgit.annotations.Nullable;
49 import org.eclipse.jgit.errors.CorruptObjectException;
50 import org.eclipse.jgit.lib.FileMode;
51 import org.eclipse.jgit.lib.ObjectReader;
52 import org.eclipse.jgit.lib.Repository;
53
54 /**
55 * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
56 * <p>
57 * Due to the way a Git tree is organized the standard
58 * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
59 * when merging two or more trees together. In the standard TreeWalk the file
60 * will be returned first, and then much later the directory will be returned.
61 * This makes it impossible for the application to efficiently detect and handle
62 * the conflict.
63 * <p>
64 * Using this walk implementation causes the directory to report earlier than
65 * usual, at the same time as the non-directory entry. This permits the
66 * application to handle the D/F conflict in a single step. The directory is
67 * returned only once, so it does not get returned later in the iteration.
68 * <p>
69 * When a D/F conflict is detected
70 * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
71 * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
72 * the subtree, no matter which iterator originally supplied the subtree.
73 * <p>
74 * Because conflicted directories report early, using this walk implementation
75 * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
76 * automatic resorting to run and fix the entry ordering.
77 * <p>
78 * This walk implementation requires more CPU to implement a look-ahead and a
79 * look-behind to merge a D/F pair together, or to skip a previously reported
80 * directory. In typical Git repositories the look-ahead cost is 0 and the
81 * look-behind doesn't trigger, as users tend not to create trees which contain
82 * both "foo" as a directory and "foo.c" as a file.
83 * <p>
84 * In the worst-case however several thousand look-ahead steps per walk step may
85 * be necessary, making the overhead quite significant. Since this worst-case
86 * should never happen this walk implementation has made the time/space tradeoff
87 * in favor of more-time/less-space, as that better suits the typical case.
88 */
89 public class NameConflictTreeWalk extends TreeWalk {
90 private static final int TREE_MODE = FileMode.TREE.getBits();
91
92 private boolean fastMinHasMatch;
93
94 private AbstractTreeIterator dfConflict;
95
96 /**
97 * Create a new tree walker for a given repository.
98 *
99 * @param repo
100 * the repository the walker will obtain data from.
101 */
102 public NameConflictTreeWalk(Repository repo) {
103 super(repo);
104 }
105
106 /**
107 * Create a new tree walker for a given repository.
108 *
109 * @param repo
110 * the repository the walker will obtain data from.
111 * @param or
112 * the reader the walker will obtain tree data from.
113 * @since 4.3
114 */
115 public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) {
116 super(repo, or);
117 }
118
119 /**
120 * Create a new tree walker for a given repository.
121 *
122 * @param or
123 * the reader the walker will obtain tree data from.
124 */
125 public NameConflictTreeWalk(ObjectReader or) {
126 super(or);
127 }
128
129 @Override
130 AbstractTreeIterator min() throws CorruptObjectException {
131 for (;;) {
132 final AbstractTreeIterator minRef = fastMin();
133 if (fastMinHasMatch)
134 return minRef;
135
136 if (isTree(minRef)) {
137 if (skipEntry(minRef)) {
138 for (AbstractTreeIterator t : trees) {
139 if (t.matches == minRef) {
140 t.next(1);
141 t.matches = null;
142 }
143 }
144 continue;
145 }
146 return minRef;
147 }
148
149 return combineDF(minRef);
150 }
151 }
152
153 private AbstractTreeIterator fastMin() {
154 fastMinHasMatch = true;
155
156 int i = 0;
157 AbstractTreeIterator minRef = trees[i];
158 while (minRef.eof() && ++i < trees.length)
159 minRef = trees[i];
160 if (minRef.eof())
161 return minRef;
162
163 boolean hasConflict = false;
164 minRef.matches = minRef;
165 while (++i < trees.length) {
166 final AbstractTreeIterator t = trees[i];
167 if (t.eof())
168 continue;
169
170 final int cmp = t.pathCompare(minRef);
171 if (cmp < 0) {
172 if (fastMinHasMatch && isTree(minRef) && !isTree(t)
173 && nameEqual(minRef, t)) {
174 // We used to be at a tree, but now we are at a file
175 // with the same name. Allow the file to match the
176 // tree anyway.
177 //
178 t.matches = minRef;
179 hasConflict = true;
180 } else {
181 fastMinHasMatch = false;
182 t.matches = t;
183 minRef = t;
184 }
185 } else if (cmp == 0) {
186 // Exact name/mode match is best.
187 //
188 t.matches = minRef;
189 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
190 && !isGitlink(minRef) && nameEqual(t, minRef)) {
191 // The minimum is a file (non-tree) but the next entry
192 // of this iterator is a tree whose name matches our file.
193 // This is a classic D/F conflict and commonly occurs like
194 // this, with no gaps in between the file and directory.
195 //
196 // Use the tree as the minimum instead (see combineDF).
197 //
198
199 for (int k = 0; k < i; k++) {
200 final AbstractTreeIterator p = trees[k];
201 if (p.matches == minRef)
202 p.matches = t;
203 }
204 t.matches = t;
205 minRef = t;
206 hasConflict = true;
207 } else
208 fastMinHasMatch = false;
209 }
210
211 if (hasConflict && fastMinHasMatch && dfConflict == null)
212 dfConflict = minRef;
213 return minRef;
214 }
215
216 private static boolean nameEqual(final AbstractTreeIterator a,
217 final AbstractTreeIterator b) {
218 return a.pathCompare(b, TREE_MODE) == 0;
219 }
220
221 private boolean isGitlink(AbstractTreeIterator p) {
222 return FileMode.GITLINK.equals(p.mode);
223 }
224
225 private static boolean isTree(AbstractTreeIterator p) {
226 return FileMode.TREE.equals(p.mode);
227 }
228
229 private boolean skipEntry(AbstractTreeIterator minRef)
230 throws CorruptObjectException {
231 // A tree D/F may have been handled earlier. We need to
232 // not report this path if it has already been reported.
233 //
234 for (AbstractTreeIterator t : trees) {
235 if (t.matches == minRef || t.first())
236 continue;
237
238 int stepsBack = 0;
239 for (;;) {
240 stepsBack++;
241 t.back(1);
242
243 final int cmp = t.pathCompare(minRef, 0);
244 if (cmp == 0) {
245 // We have already seen this "$path" before. Skip it.
246 //
247 t.next(stepsBack);
248 return true;
249 } else if (cmp < 0 || t.first()) {
250 // We cannot find "$path" in t; it will never appear.
251 //
252 t.next(stepsBack);
253 break;
254 }
255 }
256 }
257
258 // We have never seen the current path before.
259 //
260 return false;
261 }
262
263 private AbstractTreeIteratorg/eclipse/jgit/treewalk/AbstractTreeIterator.html#AbstractTreeIterator">AbstractTreeIterator combineDF(AbstractTreeIterator minRef)
264 throws CorruptObjectException {
265 // Look for a possible D/F conflict forward in the tree(s)
266 // as there may be a "$path/" which matches "$path". Make
267 // such entries match this entry.
268 //
269 AbstractTreeIterator treeMatch = null;
270 for (AbstractTreeIterator t : trees) {
271 if (t.matches == minRef || t.eof())
272 continue;
273
274 for (;;) {
275 final int cmp = t.pathCompare(minRef, TREE_MODE);
276 if (cmp < 0) {
277 // The "$path/" may still appear later.
278 //
279 t.matchShift++;
280 t.next(1);
281 if (t.eof()) {
282 t.back(t.matchShift);
283 t.matchShift = 0;
284 break;
285 }
286 } else if (cmp == 0) {
287 // We have a conflict match here.
288 //
289 t.matches = minRef;
290 treeMatch = t;
291 break;
292 } else {
293 // A conflict match is not possible.
294 //
295 if (t.matchShift != 0) {
296 t.back(t.matchShift);
297 t.matchShift = 0;
298 }
299 break;
300 }
301 }
302 }
303
304 if (treeMatch != null) {
305 // If we do have a conflict use one of the directory
306 // matching iterators instead of the file iterator.
307 // This way isSubtree is true and isRecursive works.
308 //
309 for (AbstractTreeIterator t : trees)
310 if (t.matches == minRef)
311 t.matches = treeMatch;
312
313 if (dfConflict == null && !isGitlink(minRef)) {
314 dfConflict = treeMatch;
315 }
316
317 return treeMatch;
318 }
319
320 return minRef;
321 }
322
323 @Override
324 void popEntriesEqual() throws CorruptObjectException {
325 final AbstractTreeIterator ch = currentHead;
326 for (int i = 0; i < trees.length; i++) {
327 final AbstractTreeIterator t = trees[i];
328 if (t.matches == ch) {
329 if (t.matchShift == 0)
330 t.next(1);
331 else {
332 t.back(t.matchShift);
333 t.matchShift = 0;
334 }
335 t.matches = null;
336 }
337 }
338
339 if (ch == dfConflict)
340 dfConflict = null;
341 }
342
343 @Override
344 void skipEntriesEqual() throws CorruptObjectException {
345 final AbstractTreeIterator ch = currentHead;
346 for (int i = 0; i < trees.length; i++) {
347 final AbstractTreeIterator t = trees[i];
348 if (t.matches == ch) {
349 if (t.matchShift == 0)
350 t.skip();
351 else {
352 t.back(t.matchShift);
353 t.matchShift = 0;
354 }
355 t.matches = null;
356 }
357 }
358
359 if (ch == dfConflict)
360 dfConflict = null;
361 }
362
363 @Override
364 void stopWalk() throws IOException {
365 if (!needsStopWalk()) {
366 return;
367 }
368
369 // Name conflicts make aborting early difficult. Multiple paths may
370 // exist between the file and directory versions of a name. To ensure
371 // the directory version is skipped over (as it was previously visited
372 // during the file version step) requires popping up the stack and
373 // finishing out each subtree that the walker dove into. Siblings in
374 // parents do not need to be recursed into, bounding the cost.
375 for (;;) {
376 AbstractTreeIterator t = min();
377 if (t.eof()) {
378 if (depth > 0) {
379 exitSubtree();
380 popEntriesEqual();
381 continue;
382 }
383 return;
384 }
385 currentHead = t;
386 skipEntriesEqual();
387 }
388 }
389
390 private boolean needsStopWalk() {
391 for (AbstractTreeIterator t : trees) {
392 if (t.needsStopWalk()) {
393 return true;
394 }
395 }
396 return false;
397 }
398
399 /**
400 * True if the current entry is covered by a directory/file conflict.
401 *
402 * This means that for some prefix of the current entry's path, this walk
403 * has detected a directory/file conflict. Also true if the current entry
404 * itself is a directory/file conflict.
405 *
406 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
407 * true then you know that either for path foo or for path foo/bar files and
408 * folders were detected.
409 *
410 * @return <code>true</code> if the current entry is covered by a
411 * directory/file conflict, <code>false</code> otherwise
412 */
413 public boolean isDirectoryFileConflict() {
414 return dfConflict != null;
415 }
416 }