1 /* 2 * Copyright (C) 2008, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.treewalk; 45 46 import org.eclipse.jgit.dircache.DirCacheBuilder; 47 import org.eclipse.jgit.errors.CorruptObjectException; 48 import org.eclipse.jgit.lib.FileMode; 49 import org.eclipse.jgit.lib.ObjectReader; 50 import org.eclipse.jgit.lib.Repository; 51 52 /** 53 * Specialized TreeWalk to detect directory-file (D/F) name conflicts. 54 * <p> 55 * Due to the way a Git tree is organized the standard {@link TreeWalk} won't 56 * easily find a D/F conflict when merging two or more trees together. In the 57 * standard TreeWalk the file will be returned first, and then much later the 58 * directory will be returned. This makes it impossible for the application to 59 * efficiently detect and handle the conflict. 60 * <p> 61 * Using this walk implementation causes the directory to report earlier than 62 * usual, at the same time as the non-directory entry. This permits the 63 * application to handle the D/F conflict in a single step. The directory is 64 * returned only once, so it does not get returned later in the iteration. 65 * <p> 66 * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true 67 * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter 68 * which iterator originally supplied the subtree. 69 * <p> 70 * Because conflicted directories report early, using this walk implementation 71 * to populate a {@link DirCacheBuilder} may cause the automatic resorting to 72 * run and fix the entry ordering. 73 * <p> 74 * This walk implementation requires more CPU to implement a look-ahead and a 75 * look-behind to merge a D/F pair together, or to skip a previously reported 76 * directory. In typical Git repositories the look-ahead cost is 0 and the 77 * look-behind doesn't trigger, as users tend not to create trees which contain 78 * both "foo" as a directory and "foo.c" as a file. 79 * <p> 80 * In the worst-case however several thousand look-ahead steps per walk step may 81 * be necessary, making the overhead quite significant. Since this worst-case 82 * should never happen this walk implementation has made the time/space tradeoff 83 * in favor of more-time/less-space, as that better suits the typical case. 84 */ 85 public class NameConflictTreeWalk extends TreeWalk { 86 private static final int TREE_MODE = FileMode.TREE.getBits(); 87 88 private boolean fastMinHasMatch; 89 90 private AbstractTreeIterator dfConflict; 91 92 /** 93 * Create a new tree walker for a given repository. 94 * 95 * @param repo 96 * the repository the walker will obtain data from. 97 */ 98 public NameConflictTreeWalk(final Repository repo) { 99 this(repo.newObjectReader()); 100 } 101 102 /** 103 * Create a new tree walker for a given repository. 104 * 105 * @param or 106 * the reader the walker will obtain tree data from. 107 */ 108 public NameConflictTreeWalk(final ObjectReader or) { 109 super(or); 110 } 111 112 @Override 113 AbstractTreeIterator min() throws CorruptObjectException { 114 for (;;) { 115 final AbstractTreeIterator minRef = fastMin(); 116 if (fastMinHasMatch) 117 return minRef; 118 119 if (isTree(minRef)) { 120 if (skipEntry(minRef)) { 121 for (final AbstractTreeIterator t : trees) { 122 if (t.matches == minRef) { 123 t.next(1); 124 t.matches = null; 125 } 126 } 127 continue; 128 } 129 return minRef; 130 } 131 132 return combineDF(minRef); 133 } 134 } 135 136 private AbstractTreeIterator fastMin() { 137 fastMinHasMatch = true; 138 139 int i = 0; 140 AbstractTreeIterator minRef = trees[i]; 141 while (minRef.eof() && ++i < trees.length) 142 minRef = trees[i]; 143 if (minRef.eof()) 144 return minRef; 145 146 boolean hasConflict = false; 147 minRef.matches = minRef; 148 while (++i < trees.length) { 149 final AbstractTreeIterator t = trees[i]; 150 if (t.eof()) 151 continue; 152 153 final int cmp = t.pathCompare(minRef); 154 if (cmp < 0) { 155 if (fastMinHasMatch && isTree(minRef) && !isTree(t) 156 && nameEqual(minRef, t)) { 157 // We used to be at a tree, but now we are at a file 158 // with the same name. Allow the file to match the 159 // tree anyway. 160 // 161 t.matches = minRef; 162 hasConflict = true; 163 } else { 164 fastMinHasMatch = false; 165 t.matches = t; 166 minRef = t; 167 } 168 } else if (cmp == 0) { 169 // Exact name/mode match is best. 170 // 171 t.matches = minRef; 172 } else if (fastMinHasMatch && isTree(t) && !isTree(minRef) 173 && nameEqual(t, minRef)) { 174 // The minimum is a file (non-tree) but the next entry 175 // of this iterator is a tree whose name matches our file. 176 // This is a classic D/F conflict and commonly occurs like 177 // this, with no gaps in between the file and directory. 178 // 179 // Use the tree as the minimum instead (see combineDF). 180 // 181 182 for (int k = 0; k < i; k++) { 183 final AbstractTreeIterator p = trees[k]; 184 if (p.matches == minRef) 185 p.matches = t; 186 } 187 t.matches = t; 188 minRef = t; 189 hasConflict = true; 190 } else 191 fastMinHasMatch = false; 192 } 193 194 if (hasConflict && fastMinHasMatch && dfConflict == null) 195 dfConflict = minRef; 196 return minRef; 197 } 198 199 private static boolean nameEqual(final AbstractTreeIterator a, 200 final AbstractTreeIterator b) { 201 return a.pathCompare(b, TREE_MODE) == 0; 202 } 203 204 private static boolean isTree(final AbstractTreeIterator p) { 205 return FileMode.TREE.equals(p.mode); 206 } 207 208 private boolean skipEntry(final AbstractTreeIterator minRef) 209 throws CorruptObjectException { 210 // A tree D/F may have been handled earlier. We need to 211 // not report this path if it has already been reported. 212 // 213 for (final AbstractTreeIterator t : trees) { 214 if (t.matches == minRef || t.first()) 215 continue; 216 217 int stepsBack = 0; 218 for (;;) { 219 stepsBack++; 220 t.back(1); 221 222 final int cmp = t.pathCompare(minRef, 0); 223 if (cmp == 0) { 224 // We have already seen this "$path" before. Skip it. 225 // 226 t.next(stepsBack); 227 return true; 228 } else if (cmp < 0 || t.first()) { 229 // We cannot find "$path" in t; it will never appear. 230 // 231 t.next(stepsBack); 232 break; 233 } 234 } 235 } 236 237 // We have never seen the current path before. 238 // 239 return false; 240 } 241 242 private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef) 243 throws CorruptObjectException { 244 // Look for a possible D/F conflict forward in the tree(s) 245 // as there may be a "$path/" which matches "$path". Make 246 // such entries match this entry. 247 // 248 AbstractTreeIterator treeMatch = null; 249 for (final AbstractTreeIterator t : trees) { 250 if (t.matches == minRef || t.eof()) 251 continue; 252 253 for (;;) { 254 final int cmp = t.pathCompare(minRef, TREE_MODE); 255 if (cmp < 0) { 256 // The "$path/" may still appear later. 257 // 258 t.matchShift++; 259 t.next(1); 260 if (t.eof()) { 261 t.back(t.matchShift); 262 t.matchShift = 0; 263 break; 264 } 265 } else if (cmp == 0) { 266 // We have a conflict match here. 267 // 268 t.matches = minRef; 269 treeMatch = t; 270 break; 271 } else { 272 // A conflict match is not possible. 273 // 274 if (t.matchShift != 0) { 275 t.back(t.matchShift); 276 t.matchShift = 0; 277 } 278 break; 279 } 280 } 281 } 282 283 if (treeMatch != null) { 284 // If we do have a conflict use one of the directory 285 // matching iterators instead of the file iterator. 286 // This way isSubtree is true and isRecursive works. 287 // 288 for (final AbstractTreeIterator t : trees) 289 if (t.matches == minRef) 290 t.matches = treeMatch; 291 292 if (dfConflict == null) 293 dfConflict = treeMatch; 294 295 return treeMatch; 296 } 297 298 return minRef; 299 } 300 301 @Override 302 void popEntriesEqual() throws CorruptObjectException { 303 final AbstractTreeIterator ch = currentHead; 304 for (int i = 0; i < trees.length; i++) { 305 final AbstractTreeIterator t = trees[i]; 306 if (t.matches == ch) { 307 if (t.matchShift == 0) 308 t.next(1); 309 else { 310 t.back(t.matchShift); 311 t.matchShift = 0; 312 } 313 t.matches = null; 314 } 315 } 316 317 if (ch == dfConflict) 318 dfConflict = null; 319 } 320 321 @Override 322 void skipEntriesEqual() throws CorruptObjectException { 323 final AbstractTreeIterator ch = currentHead; 324 for (int i = 0; i < trees.length; i++) { 325 final AbstractTreeIterator t = trees[i]; 326 if (t.matches == ch) { 327 if (t.matchShift == 0) 328 t.skip(); 329 else { 330 t.back(t.matchShift); 331 t.matchShift = 0; 332 } 333 t.matches = null; 334 } 335 } 336 337 if (ch == dfConflict) 338 dfConflict = null; 339 } 340 341 /** 342 * True if the current entry is covered by a directory/file conflict. 343 * 344 * This means that for some prefix of the current entry's path, this walk 345 * has detected a directory/file conflict. Also true if the current entry 346 * itself is a directory/file conflict. 347 * 348 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns 349 * true then you know that either for path foo or for path foo/bar files and 350 * folders were detected. 351 * 352 * @return <code>true</code> if the current entry is covered by a 353 * directory/file conflict, <code>false</code> otherwise 354 */ 355 public boolean isDirectoryFileConflict() { 356 return dfConflict != null; 357 } 358 }