1 /* 2 * Copyright (C) 2008, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.treewalk; 45 46 import java.io.IOException; 47 48 import org.eclipse.jgit.dircache.DirCacheBuilder; 49 import org.eclipse.jgit.errors.CorruptObjectException; 50 import org.eclipse.jgit.lib.FileMode; 51 import org.eclipse.jgit.lib.ObjectReader; 52 import org.eclipse.jgit.lib.Repository; 53 54 /** 55 * Specialized TreeWalk to detect directory-file (D/F) name conflicts. 56 * <p> 57 * Due to the way a Git tree is organized the standard {@link TreeWalk} won't 58 * easily find a D/F conflict when merging two or more trees together. In the 59 * standard TreeWalk the file will be returned first, and then much later the 60 * directory will be returned. This makes it impossible for the application to 61 * efficiently detect and handle the conflict. 62 * <p> 63 * Using this walk implementation causes the directory to report earlier than 64 * usual, at the same time as the non-directory entry. This permits the 65 * application to handle the D/F conflict in a single step. The directory is 66 * returned only once, so it does not get returned later in the iteration. 67 * <p> 68 * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true 69 * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter 70 * which iterator originally supplied the subtree. 71 * <p> 72 * Because conflicted directories report early, using this walk implementation 73 * to populate a {@link DirCacheBuilder} may cause the automatic resorting to 74 * run and fix the entry ordering. 75 * <p> 76 * This walk implementation requires more CPU to implement a look-ahead and a 77 * look-behind to merge a D/F pair together, or to skip a previously reported 78 * directory. In typical Git repositories the look-ahead cost is 0 and the 79 * look-behind doesn't trigger, as users tend not to create trees which contain 80 * both "foo" as a directory and "foo.c" as a file. 81 * <p> 82 * In the worst-case however several thousand look-ahead steps per walk step may 83 * be necessary, making the overhead quite significant. Since this worst-case 84 * should never happen this walk implementation has made the time/space tradeoff 85 * in favor of more-time/less-space, as that better suits the typical case. 86 */ 87 public class NameConflictTreeWalk extends TreeWalk { 88 private static final int TREE_MODE = FileMode.TREE.getBits(); 89 90 private boolean fastMinHasMatch; 91 92 private AbstractTreeIterator dfConflict; 93 94 /** 95 * Create a new tree walker for a given repository. 96 * 97 * @param repo 98 * the repository the walker will obtain data from. 99 */ 100 public NameConflictTreeWalk(final Repository repo) { 101 super(repo); 102 } 103 104 /** 105 * Create a new tree walker for a given repository. 106 * 107 * @param repo 108 * the repository the walker will obtain data from. 109 * @param or 110 * the reader the walker will obtain tree data from. 111 * @since 4.3 112 */ 113 public NameConflictTreeWalk(Repository repo, final ObjectReader or) { 114 super(repo, or); 115 } 116 117 /** 118 * Create a new tree walker for a given repository. 119 * 120 * @param or 121 * the reader the walker will obtain tree data from. 122 */ 123 public NameConflictTreeWalk(final ObjectReader or) { 124 super(or); 125 } 126 127 @Override 128 AbstractTreeIterator min() throws CorruptObjectException { 129 for (;;) { 130 final AbstractTreeIterator minRef = fastMin(); 131 if (fastMinHasMatch) 132 return minRef; 133 134 if (isTree(minRef)) { 135 if (skipEntry(minRef)) { 136 for (final AbstractTreeIterator t : trees) { 137 if (t.matches == minRef) { 138 t.next(1); 139 t.matches = null; 140 } 141 } 142 continue; 143 } 144 return minRef; 145 } 146 147 return combineDF(minRef); 148 } 149 } 150 151 private AbstractTreeIterator fastMin() { 152 fastMinHasMatch = true; 153 154 int i = 0; 155 AbstractTreeIterator minRef = trees[i]; 156 while (minRef.eof() && ++i < trees.length) 157 minRef = trees[i]; 158 if (minRef.eof()) 159 return minRef; 160 161 boolean hasConflict = false; 162 minRef.matches = minRef; 163 while (++i < trees.length) { 164 final AbstractTreeIterator t = trees[i]; 165 if (t.eof()) 166 continue; 167 168 final int cmp = t.pathCompare(minRef); 169 if (cmp < 0) { 170 if (fastMinHasMatch && isTree(minRef) && !isTree(t) 171 && nameEqual(minRef, t)) { 172 // We used to be at a tree, but now we are at a file 173 // with the same name. Allow the file to match the 174 // tree anyway. 175 // 176 t.matches = minRef; 177 hasConflict = true; 178 } else { 179 fastMinHasMatch = false; 180 t.matches = t; 181 minRef = t; 182 } 183 } else if (cmp == 0) { 184 // Exact name/mode match is best. 185 // 186 t.matches = minRef; 187 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef) 188 && nameEqual(t, minRef)) { 189 // The minimum is a file (non-tree) but the next entry 190 // of this iterator is a tree whose name matches our file. 191 // This is a classic D/F conflict and commonly occurs like 192 // this, with no gaps in between the file and directory. 193 // 194 // Use the tree as the minimum instead (see combineDF). 195 // 196 197 for (int k = 0; k < i; k++) { 198 final AbstractTreeIterator p = trees[k]; 199 if (p.matches == minRef) 200 p.matches = t; 201 } 202 t.matches = t; 203 minRef = t; 204 hasConflict = true; 205 } else 206 fastMinHasMatch = false; 207 } 208 209 if (hasConflict && fastMinHasMatch && dfConflict == null) 210 dfConflict = minRef; 211 return minRef; 212 } 213 214 private static boolean nameEqual(final AbstractTreeIterator a, 215 final AbstractTreeIterator b) { 216 return a.pathCompare(b, TREE_MODE) == 0; 217 } 218 219 private static boolean isTree(final AbstractTreeIterator p) { 220 return FileMode.TREE.equals(p.mode); 221 } 222 223 private boolean skipEntry(final AbstractTreeIterator minRef) 224 throws CorruptObjectException { 225 // A tree D/F may have been handled earlier. We need to 226 // not report this path if it has already been reported. 227 // 228 for (final AbstractTreeIterator t : trees) { 229 if (t.matches == minRef || t.first()) 230 continue; 231 232 int stepsBack = 0; 233 for (;;) { 234 stepsBack++; 235 t.back(1); 236 237 final int cmp = t.pathCompare(minRef, 0); 238 if (cmp == 0) { 239 // We have already seen this "$path" before. Skip it. 240 // 241 t.next(stepsBack); 242 return true; 243 } else if (cmp < 0 || t.first()) { 244 // We cannot find "$path" in t; it will never appear. 245 // 246 t.next(stepsBack); 247 break; 248 } 249 } 250 } 251 252 // We have never seen the current path before. 253 // 254 return false; 255 } 256 257 private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef) 258 throws CorruptObjectException { 259 // Look for a possible D/F conflict forward in the tree(s) 260 // as there may be a "$path/" which matches "$path". Make 261 // such entries match this entry. 262 // 263 AbstractTreeIterator treeMatch = null; 264 for (final AbstractTreeIterator t : trees) { 265 if (t.matches == minRef || t.eof()) 266 continue; 267 268 for (;;) { 269 final int cmp = t.pathCompare(minRef, TREE_MODE); 270 if (cmp < 0) { 271 // The "$path/" may still appear later. 272 // 273 t.matchShift++; 274 t.next(1); 275 if (t.eof()) { 276 t.back(t.matchShift); 277 t.matchShift = 0; 278 break; 279 } 280 } else if (cmp == 0) { 281 // We have a conflict match here. 282 // 283 t.matches = minRef; 284 treeMatch = t; 285 break; 286 } else { 287 // A conflict match is not possible. 288 // 289 if (t.matchShift != 0) { 290 t.back(t.matchShift); 291 t.matchShift = 0; 292 } 293 break; 294 } 295 } 296 } 297 298 if (treeMatch != null) { 299 // If we do have a conflict use one of the directory 300 // matching iterators instead of the file iterator. 301 // This way isSubtree is true and isRecursive works. 302 // 303 for (final AbstractTreeIterator t : trees) 304 if (t.matches == minRef) 305 t.matches = treeMatch; 306 307 if (dfConflict == null) 308 dfConflict = treeMatch; 309 310 return treeMatch; 311 } 312 313 return minRef; 314 } 315 316 @Override 317 void popEntriesEqual() throws CorruptObjectException { 318 final AbstractTreeIterator ch = currentHead; 319 for (int i = 0; i < trees.length; i++) { 320 final AbstractTreeIterator t = trees[i]; 321 if (t.matches == ch) { 322 if (t.matchShift == 0) 323 t.next(1); 324 else { 325 t.back(t.matchShift); 326 t.matchShift = 0; 327 } 328 t.matches = null; 329 } 330 } 331 332 if (ch == dfConflict) 333 dfConflict = null; 334 } 335 336 @Override 337 void skipEntriesEqual() throws CorruptObjectException { 338 final AbstractTreeIterator ch = currentHead; 339 for (int i = 0; i < trees.length; i++) { 340 final AbstractTreeIterator t = trees[i]; 341 if (t.matches == ch) { 342 if (t.matchShift == 0) 343 t.skip(); 344 else { 345 t.back(t.matchShift); 346 t.matchShift = 0; 347 } 348 t.matches = null; 349 } 350 } 351 352 if (ch == dfConflict) 353 dfConflict = null; 354 } 355 356 void stopWalk() throws IOException { 357 if (!needsStopWalk()) { 358 return; 359 } 360 361 // Name conflicts make aborting early difficult. Multiple paths may 362 // exist between the file and directory versions of a name. To ensure 363 // the directory version is skipped over (as it was previously visited 364 // during the file version step) requires popping up the stack and 365 // finishing out each subtree that the walker dove into. Siblings in 366 // parents do not need to be recursed into, bounding the cost. 367 for (;;) { 368 AbstractTreeIterator t = min(); 369 if (t.eof()) { 370 if (depth > 0) { 371 exitSubtree(); 372 popEntriesEqual(); 373 continue; 374 } 375 return; 376 } 377 currentHead = t; 378 skipEntriesEqual(); 379 } 380 } 381 382 private boolean needsStopWalk() { 383 for (AbstractTreeIterator t : trees) { 384 if (t.needsStopWalk()) { 385 return true; 386 } 387 } 388 return false; 389 } 390 391 /** 392 * True if the current entry is covered by a directory/file conflict. 393 * 394 * This means that for some prefix of the current entry's path, this walk 395 * has detected a directory/file conflict. Also true if the current entry 396 * itself is a directory/file conflict. 397 * 398 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns 399 * true then you know that either for path foo or for path foo/bar files and 400 * folders were detected. 401 * 402 * @return <code>true</code> if the current entry is covered by a 403 * directory/file conflict, <code>false</code> otherwise 404 */ 405 public boolean isDirectoryFileConflict() { 406 return dfConflict != null; 407 } 408 }