1 /*
2 * Copyright (C) 2008, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.treewalk;
45
46 import org.eclipse.jgit.dircache.DirCacheBuilder;
47 import org.eclipse.jgit.errors.CorruptObjectException;
48 import org.eclipse.jgit.lib.FileMode;
49 import org.eclipse.jgit.lib.ObjectReader;
50 import org.eclipse.jgit.lib.Repository;
51
52 /**
53 * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
54 * <p>
55 * Due to the way a Git tree is organized the standard {@link TreeWalk} won't
56 * easily find a D/F conflict when merging two or more trees together. In the
57 * standard TreeWalk the file will be returned first, and then much later the
58 * directory will be returned. This makes it impossible for the application to
59 * efficiently detect and handle the conflict.
60 * <p>
61 * Using this walk implementation causes the directory to report earlier than
62 * usual, at the same time as the non-directory entry. This permits the
63 * application to handle the D/F conflict in a single step. The directory is
64 * returned only once, so it does not get returned later in the iteration.
65 * <p>
66 * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true
67 * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter
68 * which iterator originally supplied the subtree.
69 * <p>
70 * Because conflicted directories report early, using this walk implementation
71 * to populate a {@link DirCacheBuilder} may cause the automatic resorting to
72 * run and fix the entry ordering.
73 * <p>
74 * This walk implementation requires more CPU to implement a look-ahead and a
75 * look-behind to merge a D/F pair together, or to skip a previously reported
76 * directory. In typical Git repositories the look-ahead cost is 0 and the
77 * look-behind doesn't trigger, as users tend not to create trees which contain
78 * both "foo" as a directory and "foo.c" as a file.
79 * <p>
80 * In the worst-case however several thousand look-ahead steps per walk step may
81 * be necessary, making the overhead quite significant. Since this worst-case
82 * should never happen this walk implementation has made the time/space tradeoff
83 * in favor of more-time/less-space, as that better suits the typical case.
84 */
85 public class NameConflictTreeWalk extends TreeWalk {
86 private static final int TREE_MODE = FileMode.TREE.getBits();
87
88 private boolean fastMinHasMatch;
89
90 private AbstractTreeIterator dfConflict;
91
92 /**
93 * Create a new tree walker for a given repository.
94 *
95 * @param repo
96 * the repository the walker will obtain data from.
97 */
98 public NameConflictTreeWalk(final Repository repo) {
99 this(repo.newObjectReader());
100 }
101
102 /**
103 * Create a new tree walker for a given repository.
104 *
105 * @param or
106 * the reader the walker will obtain tree data from.
107 */
108 public NameConflictTreeWalk(final ObjectReader or) {
109 super(or);
110 }
111
112 @Override
113 AbstractTreeIterator min() throws CorruptObjectException {
114 for (;;) {
115 final AbstractTreeIterator minRef = fastMin();
116 if (fastMinHasMatch)
117 return minRef;
118
119 if (isTree(minRef)) {
120 if (skipEntry(minRef)) {
121 for (final AbstractTreeIterator t : trees) {
122 if (t.matches == minRef) {
123 t.next(1);
124 t.matches = null;
125 }
126 }
127 continue;
128 }
129 return minRef;
130 }
131
132 return combineDF(minRef);
133 }
134 }
135
136 private AbstractTreeIterator fastMin() {
137 fastMinHasMatch = true;
138
139 int i = 0;
140 AbstractTreeIterator minRef = trees[i];
141 while (minRef.eof() && ++i < trees.length)
142 minRef = trees[i];
143 if (minRef.eof())
144 return minRef;
145
146 boolean hasConflict = false;
147 minRef.matches = minRef;
148 while (++i < trees.length) {
149 final AbstractTreeIterator t = trees[i];
150 if (t.eof())
151 continue;
152
153 final int cmp = t.pathCompare(minRef);
154 if (cmp < 0) {
155 if (fastMinHasMatch && isTree(minRef) && !isTree(t)
156 && nameEqual(minRef, t)) {
157 // We used to be at a tree, but now we are at a file
158 // with the same name. Allow the file to match the
159 // tree anyway.
160 //
161 t.matches = minRef;
162 hasConflict = true;
163 } else {
164 fastMinHasMatch = false;
165 t.matches = t;
166 minRef = t;
167 }
168 } else if (cmp == 0) {
169 // Exact name/mode match is best.
170 //
171 t.matches = minRef;
172 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
173 && nameEqual(t, minRef)) {
174 // The minimum is a file (non-tree) but the next entry
175 // of this iterator is a tree whose name matches our file.
176 // This is a classic D/F conflict and commonly occurs like
177 // this, with no gaps in between the file and directory.
178 //
179 // Use the tree as the minimum instead (see combineDF).
180 //
181
182 for (int k = 0; k < i; k++) {
183 final AbstractTreeIterator p = trees[k];
184 if (p.matches == minRef)
185 p.matches = t;
186 }
187 t.matches = t;
188 minRef = t;
189 hasConflict = true;
190 } else
191 fastMinHasMatch = false;
192 }
193
194 if (hasConflict && fastMinHasMatch && dfConflict == null)
195 dfConflict = minRef;
196 return minRef;
197 }
198
199 private static boolean nameEqual(final AbstractTreeIterator a,
200 final AbstractTreeIterator b) {
201 return a.pathCompare(b, TREE_MODE) == 0;
202 }
203
204 private static boolean isTree(final AbstractTreeIterator p) {
205 return FileMode.TREE.equals(p.mode);
206 }
207
208 private boolean skipEntry(final AbstractTreeIterator minRef)
209 throws CorruptObjectException {
210 // A tree D/F may have been handled earlier. We need to
211 // not report this path if it has already been reported.
212 //
213 for (final AbstractTreeIterator t : trees) {
214 if (t.matches == minRef || t.first())
215 continue;
216
217 int stepsBack = 0;
218 for (;;) {
219 stepsBack++;
220 t.back(1);
221
222 final int cmp = t.pathCompare(minRef, 0);
223 if (cmp == 0) {
224 // We have already seen this "$path" before. Skip it.
225 //
226 t.next(stepsBack);
227 return true;
228 } else if (cmp < 0 || t.first()) {
229 // We cannot find "$path" in t; it will never appear.
230 //
231 t.next(stepsBack);
232 break;
233 }
234 }
235 }
236
237 // We have never seen the current path before.
238 //
239 return false;
240 }
241
242 private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
243 throws CorruptObjectException {
244 // Look for a possible D/F conflict forward in the tree(s)
245 // as there may be a "$path/" which matches "$path". Make
246 // such entries match this entry.
247 //
248 AbstractTreeIterator treeMatch = null;
249 for (final AbstractTreeIterator t : trees) {
250 if (t.matches == minRef || t.eof())
251 continue;
252
253 for (;;) {
254 final int cmp = t.pathCompare(minRef, TREE_MODE);
255 if (cmp < 0) {
256 // The "$path/" may still appear later.
257 //
258 t.matchShift++;
259 t.next(1);
260 if (t.eof()) {
261 t.back(t.matchShift);
262 t.matchShift = 0;
263 break;
264 }
265 } else if (cmp == 0) {
266 // We have a conflict match here.
267 //
268 t.matches = minRef;
269 treeMatch = t;
270 break;
271 } else {
272 // A conflict match is not possible.
273 //
274 if (t.matchShift != 0) {
275 t.back(t.matchShift);
276 t.matchShift = 0;
277 }
278 break;
279 }
280 }
281 }
282
283 if (treeMatch != null) {
284 // If we do have a conflict use one of the directory
285 // matching iterators instead of the file iterator.
286 // This way isSubtree is true and isRecursive works.
287 //
288 for (final AbstractTreeIterator t : trees)
289 if (t.matches == minRef)
290 t.matches = treeMatch;
291
292 if (dfConflict == null)
293 dfConflict = treeMatch;
294
295 return treeMatch;
296 }
297
298 return minRef;
299 }
300
301 @Override
302 void popEntriesEqual() throws CorruptObjectException {
303 final AbstractTreeIterator ch = currentHead;
304 for (int i = 0; i < trees.length; i++) {
305 final AbstractTreeIterator t = trees[i];
306 if (t.matches == ch) {
307 if (t.matchShift == 0)
308 t.next(1);
309 else {
310 t.back(t.matchShift);
311 t.matchShift = 0;
312 }
313 t.matches = null;
314 }
315 }
316
317 if (ch == dfConflict)
318 dfConflict = null;
319 }
320
321 @Override
322 void skipEntriesEqual() throws CorruptObjectException {
323 final AbstractTreeIterator ch = currentHead;
324 for (int i = 0; i < trees.length; i++) {
325 final AbstractTreeIterator t = trees[i];
326 if (t.matches == ch) {
327 if (t.matchShift == 0)
328 t.skip();
329 else {
330 t.back(t.matchShift);
331 t.matchShift = 0;
332 }
333 t.matches = null;
334 }
335 }
336
337 if (ch == dfConflict)
338 dfConflict = null;
339 }
340
341 /**
342 * True if the current entry is covered by a directory/file conflict.
343 *
344 * This means that for some prefix of the current entry's path, this walk
345 * has detected a directory/file conflict. Also true if the current entry
346 * itself is a directory/file conflict.
347 *
348 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
349 * true then you know that either for path foo or for path foo/bar files and
350 * folders were detected.
351 *
352 * @return <code>true</code> if the current entry is covered by a
353 * directory/file conflict, <code>false</code> otherwise
354 */
355 public boolean isDirectoryFileConflict() {
356 return dfConflict != null;
357 }
358 }