1 /* 2 * Copyright (C) 2008, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.treewalk; 45 46 import java.io.IOException; 47 48 import org.eclipse.jgit.annotations.Nullable; 49 import org.eclipse.jgit.dircache.DirCacheBuilder; 50 import org.eclipse.jgit.errors.CorruptObjectException; 51 import org.eclipse.jgit.lib.FileMode; 52 import org.eclipse.jgit.lib.ObjectReader; 53 import org.eclipse.jgit.lib.Repository; 54 55 /** 56 * Specialized TreeWalk to detect directory-file (D/F) name conflicts. 57 * <p> 58 * Due to the way a Git tree is organized the standard {@link TreeWalk} won't 59 * easily find a D/F conflict when merging two or more trees together. In the 60 * standard TreeWalk the file will be returned first, and then much later the 61 * directory will be returned. This makes it impossible for the application to 62 * efficiently detect and handle the conflict. 63 * <p> 64 * Using this walk implementation causes the directory to report earlier than 65 * usual, at the same time as the non-directory entry. This permits the 66 * application to handle the D/F conflict in a single step. The directory is 67 * returned only once, so it does not get returned later in the iteration. 68 * <p> 69 * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true 70 * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter 71 * which iterator originally supplied the subtree. 72 * <p> 73 * Because conflicted directories report early, using this walk implementation 74 * to populate a {@link DirCacheBuilder} may cause the automatic resorting to 75 * run and fix the entry ordering. 76 * <p> 77 * This walk implementation requires more CPU to implement a look-ahead and a 78 * look-behind to merge a D/F pair together, or to skip a previously reported 79 * directory. In typical Git repositories the look-ahead cost is 0 and the 80 * look-behind doesn't trigger, as users tend not to create trees which contain 81 * both "foo" as a directory and "foo.c" as a file. 82 * <p> 83 * In the worst-case however several thousand look-ahead steps per walk step may 84 * be necessary, making the overhead quite significant. Since this worst-case 85 * should never happen this walk implementation has made the time/space tradeoff 86 * in favor of more-time/less-space, as that better suits the typical case. 87 */ 88 public class NameConflictTreeWalk extends TreeWalk { 89 private static final int TREE_MODE = FileMode.TREE.getBits(); 90 91 private boolean fastMinHasMatch; 92 93 private AbstractTreeIterator dfConflict; 94 95 /** 96 * Create a new tree walker for a given repository. 97 * 98 * @param repo 99 * the repository the walker will obtain data from. 100 */ 101 public NameConflictTreeWalk(final Repository repo) { 102 super(repo); 103 } 104 105 /** 106 * Create a new tree walker for a given repository. 107 * 108 * @param repo 109 * the repository the walker will obtain data from. 110 * @param or 111 * the reader the walker will obtain tree data from. 112 * @since 4.3 113 */ 114 public NameConflictTreeWalk(@Nullable Repository repo, final ObjectReader or) { 115 super(repo, or); 116 } 117 118 /** 119 * Create a new tree walker for a given repository. 120 * 121 * @param or 122 * the reader the walker will obtain tree data from. 123 */ 124 public NameConflictTreeWalk(final ObjectReader or) { 125 super(or); 126 } 127 128 @Override 129 AbstractTreeIterator min() throws CorruptObjectException { 130 for (;;) { 131 final AbstractTreeIterator minRef = fastMin(); 132 if (fastMinHasMatch) 133 return minRef; 134 135 if (isTree(minRef)) { 136 if (skipEntry(minRef)) { 137 for (final AbstractTreeIterator t : trees) { 138 if (t.matches == minRef) { 139 t.next(1); 140 t.matches = null; 141 } 142 } 143 continue; 144 } 145 return minRef; 146 } 147 148 return combineDF(minRef); 149 } 150 } 151 152 private AbstractTreeIterator fastMin() { 153 fastMinHasMatch = true; 154 155 int i = 0; 156 AbstractTreeIterator minRef = trees[i]; 157 while (minRef.eof() && ++i < trees.length) 158 minRef = trees[i]; 159 if (minRef.eof()) 160 return minRef; 161 162 boolean hasConflict = false; 163 minRef.matches = minRef; 164 while (++i < trees.length) { 165 final AbstractTreeIterator t = trees[i]; 166 if (t.eof()) 167 continue; 168 169 final int cmp = t.pathCompare(minRef); 170 if (cmp < 0) { 171 if (fastMinHasMatch && isTree(minRef) && !isTree(t) 172 && nameEqual(minRef, t)) { 173 // We used to be at a tree, but now we are at a file 174 // with the same name. Allow the file to match the 175 // tree anyway. 176 // 177 t.matches = minRef; 178 hasConflict = true; 179 } else { 180 fastMinHasMatch = false; 181 t.matches = t; 182 minRef = t; 183 } 184 } else if (cmp == 0) { 185 // Exact name/mode match is best. 186 // 187 t.matches = minRef; 188 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef) 189 && nameEqual(t, minRef)) { 190 // The minimum is a file (non-tree) but the next entry 191 // of this iterator is a tree whose name matches our file. 192 // This is a classic D/F conflict and commonly occurs like 193 // this, with no gaps in between the file and directory. 194 // 195 // Use the tree as the minimum instead (see combineDF). 196 // 197 198 for (int k = 0; k < i; k++) { 199 final AbstractTreeIterator p = trees[k]; 200 if (p.matches == minRef) 201 p.matches = t; 202 } 203 t.matches = t; 204 minRef = t; 205 hasConflict = true; 206 } else 207 fastMinHasMatch = false; 208 } 209 210 if (hasConflict && fastMinHasMatch && dfConflict == null) 211 dfConflict = minRef; 212 return minRef; 213 } 214 215 private static boolean nameEqual(final AbstractTreeIterator a, 216 final AbstractTreeIterator b) { 217 return a.pathCompare(b, TREE_MODE) == 0; 218 } 219 220 private static boolean isTree(final AbstractTreeIterator p) { 221 return FileMode.TREE.equals(p.mode); 222 } 223 224 private boolean skipEntry(final AbstractTreeIterator minRef) 225 throws CorruptObjectException { 226 // A tree D/F may have been handled earlier. We need to 227 // not report this path if it has already been reported. 228 // 229 for (final AbstractTreeIterator t : trees) { 230 if (t.matches == minRef || t.first()) 231 continue; 232 233 int stepsBack = 0; 234 for (;;) { 235 stepsBack++; 236 t.back(1); 237 238 final int cmp = t.pathCompare(minRef, 0); 239 if (cmp == 0) { 240 // We have already seen this "$path" before. Skip it. 241 // 242 t.next(stepsBack); 243 return true; 244 } else if (cmp < 0 || t.first()) { 245 // We cannot find "$path" in t; it will never appear. 246 // 247 t.next(stepsBack); 248 break; 249 } 250 } 251 } 252 253 // We have never seen the current path before. 254 // 255 return false; 256 } 257 258 private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef) 259 throws CorruptObjectException { 260 // Look for a possible D/F conflict forward in the tree(s) 261 // as there may be a "$path/" which matches "$path". Make 262 // such entries match this entry. 263 // 264 AbstractTreeIterator treeMatch = null; 265 for (final AbstractTreeIterator t : trees) { 266 if (t.matches == minRef || t.eof()) 267 continue; 268 269 for (;;) { 270 final int cmp = t.pathCompare(minRef, TREE_MODE); 271 if (cmp < 0) { 272 // The "$path/" may still appear later. 273 // 274 t.matchShift++; 275 t.next(1); 276 if (t.eof()) { 277 t.back(t.matchShift); 278 t.matchShift = 0; 279 break; 280 } 281 } else if (cmp == 0) { 282 // We have a conflict match here. 283 // 284 t.matches = minRef; 285 treeMatch = t; 286 break; 287 } else { 288 // A conflict match is not possible. 289 // 290 if (t.matchShift != 0) { 291 t.back(t.matchShift); 292 t.matchShift = 0; 293 } 294 break; 295 } 296 } 297 } 298 299 if (treeMatch != null) { 300 // If we do have a conflict use one of the directory 301 // matching iterators instead of the file iterator. 302 // This way isSubtree is true and isRecursive works. 303 // 304 for (final AbstractTreeIterator t : trees) 305 if (t.matches == minRef) 306 t.matches = treeMatch; 307 308 if (dfConflict == null) 309 dfConflict = treeMatch; 310 311 return treeMatch; 312 } 313 314 return minRef; 315 } 316 317 @Override 318 void popEntriesEqual() throws CorruptObjectException { 319 final AbstractTreeIterator ch = currentHead; 320 for (int i = 0; i < trees.length; i++) { 321 final AbstractTreeIterator t = trees[i]; 322 if (t.matches == ch) { 323 if (t.matchShift == 0) 324 t.next(1); 325 else { 326 t.back(t.matchShift); 327 t.matchShift = 0; 328 } 329 t.matches = null; 330 } 331 } 332 333 if (ch == dfConflict) 334 dfConflict = null; 335 } 336 337 @Override 338 void skipEntriesEqual() throws CorruptObjectException { 339 final AbstractTreeIterator ch = currentHead; 340 for (int i = 0; i < trees.length; i++) { 341 final AbstractTreeIterator t = trees[i]; 342 if (t.matches == ch) { 343 if (t.matchShift == 0) 344 t.skip(); 345 else { 346 t.back(t.matchShift); 347 t.matchShift = 0; 348 } 349 t.matches = null; 350 } 351 } 352 353 if (ch == dfConflict) 354 dfConflict = null; 355 } 356 357 @Override 358 void stopWalk() throws IOException { 359 if (!needsStopWalk()) { 360 return; 361 } 362 363 // Name conflicts make aborting early difficult. Multiple paths may 364 // exist between the file and directory versions of a name. To ensure 365 // the directory version is skipped over (as it was previously visited 366 // during the file version step) requires popping up the stack and 367 // finishing out each subtree that the walker dove into. Siblings in 368 // parents do not need to be recursed into, bounding the cost. 369 for (;;) { 370 AbstractTreeIterator t = min(); 371 if (t.eof()) { 372 if (depth > 0) { 373 exitSubtree(); 374 popEntriesEqual(); 375 continue; 376 } 377 return; 378 } 379 currentHead = t; 380 skipEntriesEqual(); 381 } 382 } 383 384 private boolean needsStopWalk() { 385 for (AbstractTreeIterator t : trees) { 386 if (t.needsStopWalk()) { 387 return true; 388 } 389 } 390 return false; 391 } 392 393 /** 394 * True if the current entry is covered by a directory/file conflict. 395 * 396 * This means that for some prefix of the current entry's path, this walk 397 * has detected a directory/file conflict. Also true if the current entry 398 * itself is a directory/file conflict. 399 * 400 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns 401 * true then you know that either for path foo or for path foo/bar files and 402 * folders were detected. 403 * 404 * @return <code>true</code> if the current entry is covered by a 405 * directory/file conflict, <code>false</code> otherwise 406 */ 407 public boolean isDirectoryFileConflict() { 408 return dfConflict != null; 409 } 410 }