1 /* 2 * Copyright (C) 2008, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.treewalk; 45 46 import java.io.IOException; 47 48 import org.eclipse.jgit.annotations.Nullable; 49 import org.eclipse.jgit.errors.CorruptObjectException; 50 import org.eclipse.jgit.lib.FileMode; 51 import org.eclipse.jgit.lib.ObjectReader; 52 import org.eclipse.jgit.lib.Repository; 53 54 /** 55 * Specialized TreeWalk to detect directory-file (D/F) name conflicts. 56 * <p> 57 * Due to the way a Git tree is organized the standard 58 * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict 59 * when merging two or more trees together. In the standard TreeWalk the file 60 * will be returned first, and then much later the directory will be returned. 61 * This makes it impossible for the application to efficiently detect and handle 62 * the conflict. 63 * <p> 64 * Using this walk implementation causes the directory to report earlier than 65 * usual, at the same time as the non-directory entry. This permits the 66 * application to handle the D/F conflict in a single step. The directory is 67 * returned only once, so it does not get returned later in the iteration. 68 * <p> 69 * When a D/F conflict is detected 70 * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and 71 * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into 72 * the subtree, no matter which iterator originally supplied the subtree. 73 * <p> 74 * Because conflicted directories report early, using this walk implementation 75 * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the 76 * automatic resorting to run and fix the entry ordering. 77 * <p> 78 * This walk implementation requires more CPU to implement a look-ahead and a 79 * look-behind to merge a D/F pair together, or to skip a previously reported 80 * directory. In typical Git repositories the look-ahead cost is 0 and the 81 * look-behind doesn't trigger, as users tend not to create trees which contain 82 * both "foo" as a directory and "foo.c" as a file. 83 * <p> 84 * In the worst-case however several thousand look-ahead steps per walk step may 85 * be necessary, making the overhead quite significant. Since this worst-case 86 * should never happen this walk implementation has made the time/space tradeoff 87 * in favor of more-time/less-space, as that better suits the typical case. 88 */ 89 public class NameConflictTreeWalk extends TreeWalk { 90 private static final int TREE_MODE = FileMode.TREE.getBits(); 91 92 private boolean fastMinHasMatch; 93 94 private AbstractTreeIterator dfConflict; 95 96 /** 97 * Create a new tree walker for a given repository. 98 * 99 * @param repo 100 * the repository the walker will obtain data from. 101 */ 102 public NameConflictTreeWalk(Repository repo) { 103 super(repo); 104 } 105 106 /** 107 * Create a new tree walker for a given repository. 108 * 109 * @param repo 110 * the repository the walker will obtain data from. 111 * @param or 112 * the reader the walker will obtain tree data from. 113 * @since 4.3 114 */ 115 public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) { 116 super(repo, or); 117 } 118 119 /** 120 * Create a new tree walker for a given repository. 121 * 122 * @param or 123 * the reader the walker will obtain tree data from. 124 */ 125 public NameConflictTreeWalk(ObjectReader or) { 126 super(or); 127 } 128 129 @Override 130 AbstractTreeIterator min() throws CorruptObjectException { 131 for (;;) { 132 final AbstractTreeIterator minRef = fastMin(); 133 if (fastMinHasMatch) 134 return minRef; 135 136 if (isTree(minRef)) { 137 if (skipEntry(minRef)) { 138 for (AbstractTreeIterator t : trees) { 139 if (t.matches == minRef) { 140 t.next(1); 141 t.matches = null; 142 } 143 } 144 continue; 145 } 146 return minRef; 147 } 148 149 return combineDF(minRef); 150 } 151 } 152 153 private AbstractTreeIterator fastMin() { 154 fastMinHasMatch = true; 155 156 int i = 0; 157 AbstractTreeIterator minRef = trees[i]; 158 while (minRef.eof() && ++i < trees.length) 159 minRef = trees[i]; 160 if (minRef.eof()) 161 return minRef; 162 163 boolean hasConflict = false; 164 minRef.matches = minRef; 165 while (++i < trees.length) { 166 final AbstractTreeIterator t = trees[i]; 167 if (t.eof()) 168 continue; 169 170 final int cmp = t.pathCompare(minRef); 171 if (cmp < 0) { 172 if (fastMinHasMatch && isTree(minRef) && !isTree(t) 173 && nameEqual(minRef, t)) { 174 // We used to be at a tree, but now we are at a file 175 // with the same name. Allow the file to match the 176 // tree anyway. 177 // 178 t.matches = minRef; 179 hasConflict = true; 180 } else { 181 fastMinHasMatch = false; 182 t.matches = t; 183 minRef = t; 184 } 185 } else if (cmp == 0) { 186 // Exact name/mode match is best. 187 // 188 t.matches = minRef; 189 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef) 190 && !isGitlink(minRef) && nameEqual(t, minRef)) { 191 // The minimum is a file (non-tree) but the next entry 192 // of this iterator is a tree whose name matches our file. 193 // This is a classic D/F conflict and commonly occurs like 194 // this, with no gaps in between the file and directory. 195 // 196 // Use the tree as the minimum instead (see combineDF). 197 // 198 199 for (int k = 0; k < i; k++) { 200 final AbstractTreeIterator p = trees[k]; 201 if (p.matches == minRef) 202 p.matches = t; 203 } 204 t.matches = t; 205 minRef = t; 206 hasConflict = true; 207 } else 208 fastMinHasMatch = false; 209 } 210 211 if (hasConflict && fastMinHasMatch && dfConflict == null) 212 dfConflict = minRef; 213 return minRef; 214 } 215 216 private static boolean nameEqual(final AbstractTreeIterator a, 217 final AbstractTreeIterator b) { 218 return a.pathCompare(b, TREE_MODE) == 0; 219 } 220 221 private boolean isGitlink(AbstractTreeIterator p) { 222 return FileMode.GITLINK.equals(p.mode); 223 } 224 225 private static boolean isTree(AbstractTreeIterator p) { 226 return FileMode.TREE.equals(p.mode); 227 } 228 229 private boolean skipEntry(AbstractTreeIterator minRef) 230 throws CorruptObjectException { 231 // A tree D/F may have been handled earlier. We need to 232 // not report this path if it has already been reported. 233 // 234 for (AbstractTreeIterator t : trees) { 235 if (t.matches == minRef || t.first()) 236 continue; 237 238 int stepsBack = 0; 239 for (;;) { 240 stepsBack++; 241 t.back(1); 242 243 final int cmp = t.pathCompare(minRef, 0); 244 if (cmp == 0) { 245 // We have already seen this "$path" before. Skip it. 246 // 247 t.next(stepsBack); 248 return true; 249 } else if (cmp < 0 || t.first()) { 250 // We cannot find "$path" in t; it will never appear. 251 // 252 t.next(stepsBack); 253 break; 254 } 255 } 256 } 257 258 // We have never seen the current path before. 259 // 260 return false; 261 } 262 263 private AbstractTreeIterator combineDF(AbstractTreeIterator minRef) 264 throws CorruptObjectException { 265 // Look for a possible D/F conflict forward in the tree(s) 266 // as there may be a "$path/" which matches "$path". Make 267 // such entries match this entry. 268 // 269 AbstractTreeIterator treeMatch = null; 270 for (AbstractTreeIterator t : trees) { 271 if (t.matches == minRef || t.eof()) 272 continue; 273 274 for (;;) { 275 final int cmp = t.pathCompare(minRef, TREE_MODE); 276 if (cmp < 0) { 277 // The "$path/" may still appear later. 278 // 279 t.matchShift++; 280 t.next(1); 281 if (t.eof()) { 282 t.back(t.matchShift); 283 t.matchShift = 0; 284 break; 285 } 286 } else if (cmp == 0) { 287 // We have a conflict match here. 288 // 289 t.matches = minRef; 290 treeMatch = t; 291 break; 292 } else { 293 // A conflict match is not possible. 294 // 295 if (t.matchShift != 0) { 296 t.back(t.matchShift); 297 t.matchShift = 0; 298 } 299 break; 300 } 301 } 302 } 303 304 if (treeMatch != null) { 305 // If we do have a conflict use one of the directory 306 // matching iterators instead of the file iterator. 307 // This way isSubtree is true and isRecursive works. 308 // 309 for (AbstractTreeIterator t : trees) 310 if (t.matches == minRef) 311 t.matches = treeMatch; 312 313 if (dfConflict == null && !isGitlink(minRef)) { 314 dfConflict = treeMatch; 315 } 316 317 return treeMatch; 318 } 319 320 return minRef; 321 } 322 323 @Override 324 void popEntriesEqual() throws CorruptObjectException { 325 final AbstractTreeIterator ch = currentHead; 326 for (int i = 0; i < trees.length; i++) { 327 final AbstractTreeIterator t = trees[i]; 328 if (t.matches == ch) { 329 if (t.matchShift == 0) 330 t.next(1); 331 else { 332 t.back(t.matchShift); 333 t.matchShift = 0; 334 } 335 t.matches = null; 336 } 337 } 338 339 if (ch == dfConflict) 340 dfConflict = null; 341 } 342 343 @Override 344 void skipEntriesEqual() throws CorruptObjectException { 345 final AbstractTreeIterator ch = currentHead; 346 for (int i = 0; i < trees.length; i++) { 347 final AbstractTreeIterator t = trees[i]; 348 if (t.matches == ch) { 349 if (t.matchShift == 0) 350 t.skip(); 351 else { 352 t.back(t.matchShift); 353 t.matchShift = 0; 354 } 355 t.matches = null; 356 } 357 } 358 359 if (ch == dfConflict) 360 dfConflict = null; 361 } 362 363 @Override 364 void stopWalk() throws IOException { 365 if (!needsStopWalk()) { 366 return; 367 } 368 369 // Name conflicts make aborting early difficult. Multiple paths may 370 // exist between the file and directory versions of a name. To ensure 371 // the directory version is skipped over (as it was previously visited 372 // during the file version step) requires popping up the stack and 373 // finishing out each subtree that the walker dove into. Siblings in 374 // parents do not need to be recursed into, bounding the cost. 375 for (;;) { 376 AbstractTreeIterator t = min(); 377 if (t.eof()) { 378 if (depth > 0) { 379 exitSubtree(); 380 popEntriesEqual(); 381 continue; 382 } 383 return; 384 } 385 currentHead = t; 386 skipEntriesEqual(); 387 } 388 } 389 390 private boolean needsStopWalk() { 391 for (AbstractTreeIterator t : trees) { 392 if (t.needsStopWalk()) { 393 return true; 394 } 395 } 396 return false; 397 } 398 399 /** 400 * True if the current entry is covered by a directory/file conflict. 401 * 402 * This means that for some prefix of the current entry's path, this walk 403 * has detected a directory/file conflict. Also true if the current entry 404 * itself is a directory/file conflict. 405 * 406 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns 407 * true then you know that either for path foo or for path foo/bar files and 408 * folders were detected. 409 * 410 * @return <code>true</code> if the current entry is covered by a 411 * directory/file conflict, <code>false</code> otherwise 412 */ 413 public boolean isDirectoryFileConflict() { 414 return dfConflict != null; 415 } 416 }