1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
47 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
48
49 import java.io.IOException;
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.Collection;
53 import java.util.Collections;
54 import java.util.Comparator;
55 import java.util.HashMap;
56 import java.util.List;
57
58 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
59 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
60 import org.eclipse.jgit.internal.JGitText;
61 import org.eclipse.jgit.lib.AbbreviatedObjectId;
62 import org.eclipse.jgit.lib.FileMode;
63 import org.eclipse.jgit.lib.NullProgressMonitor;
64 import org.eclipse.jgit.lib.ObjectReader;
65 import org.eclipse.jgit.lib.ProgressMonitor;
66 import org.eclipse.jgit.lib.Repository;
67
68
69 public class RenameDetector {
70 private static final int EXACT_RENAME_SCORE = 100;
71
72 private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
73 public int compare(DiffEntry a, DiffEntry b) {
74 int cmp = nameOf(a).compareTo(nameOf(b));
75 if (cmp == 0)
76 cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
77 return cmp;
78 }
79
80 private String nameOf(DiffEntry ent) {
81
82
83
84
85 if (ent.changeType == ChangeType.DELETE)
86 return ent.oldPath;
87 return ent.newPath;
88 }
89
90 private int sortOf(ChangeType changeType) {
91
92
93
94
95 switch (changeType) {
96 case DELETE:
97 return 1;
98 case ADD:
99 return 2;
100 default:
101 return 10;
102 }
103 }
104 };
105
106 private List<DiffEntry> entries;
107
108 private List<DiffEntry> deleted;
109
110 private List<DiffEntry> added;
111
112 private boolean done;
113
114 private final ObjectReader objectReader;
115
116
117 private int renameScore = 60;
118
119
120
121
122
123
124 private int breakScore = -1;
125
126
127 private int renameLimit;
128
129
130 private boolean overRenameLimit;
131
132
133
134
135
136
137
138 public RenameDetector(Repository repo) {
139 this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
140 }
141
142
143
144
145
146
147
148
149
150
151 public RenameDetector(ObjectReader reader, DiffConfig cfg) {
152 objectReader = reader.newReader();
153 renameLimit = cfg.getRenameLimit();
154 reset();
155 }
156
157
158
159
160
161 public int getRenameScore() {
162 return renameScore;
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178 public void setRenameScore(int score) {
179 if (score < 0 || score > 100)
180 throw new IllegalArgumentException(
181 JGitText.get().similarityScoreMustBeWithinBounds);
182 renameScore = score;
183 }
184
185
186
187
188
189
190
191
192 public int getBreakScore() {
193 return breakScore;
194 }
195
196
197
198
199
200
201
202
203
204 public void setBreakScore(int breakScore) {
205 this.breakScore = breakScore;
206 }
207
208
209 public int getRenameLimit() {
210 return renameLimit;
211 }
212
213
214
215
216
217
218
219
220
221
222
223
224 public void setRenameLimit(int limit) {
225 renameLimit = limit;
226 }
227
228
229
230
231
232
233
234
235
236
237
238 public boolean isOverRenameLimit() {
239 if (done)
240 return overRenameLimit;
241 int cnt = Math.max(added.size(), deleted.size());
242 return getRenameLimit() != 0 && getRenameLimit() < cnt;
243 }
244
245
246
247
248
249
250
251
252
253 public void addAll(Collection<DiffEntry> entriesToAdd) {
254 if (done)
255 throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
256
257 for (DiffEntry entry : entriesToAdd) {
258 switch (entry.getChangeType()) {
259 case ADD:
260 added.add(entry);
261 break;
262
263 case DELETE:
264 deleted.add(entry);
265 break;
266
267 case MODIFY:
268 if (sameType(entry.getOldMode(), entry.getNewMode())) {
269 entries.add(entry);
270 } else {
271 List<DiffEntry> tmp = DiffEntry.breakModify(entry);
272 deleted.add(tmp.get(0));
273 added.add(tmp.get(1));
274 }
275 break;
276
277 case COPY:
278 case RENAME:
279 default:
280 entries.add(entry);
281 }
282 }
283 }
284
285
286
287
288
289
290
291
292
293 public void add(DiffEntry entry) {
294 addAll(Collections.singletonList(entry));
295 }
296
297
298
299
300
301
302
303
304
305
306
307 public List<DiffEntry> compute() throws IOException {
308 return compute(NullProgressMonitor.INSTANCE);
309 }
310
311
312
313
314
315
316
317
318
319
320
321 public List<DiffEntry> compute(ProgressMonitor pm) throws IOException {
322 if (!done) {
323 try {
324 return compute(objectReader, pm);
325 } finally {
326 objectReader.close();
327 }
328 }
329 return Collections.unmodifiableList(entries);
330 }
331
332
333
334
335
336
337
338
339
340
341
342
343
344 public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
345 throws IOException {
346 final ContentSource cs = ContentSource.create(reader);
347 return compute(new ContentSource.Pair(cs, cs), pm);
348 }
349
350
351
352
353
354
355
356
357
358
359
360
361
362 public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
363 throws IOException {
364 if (!done) {
365 done = true;
366
367 if (pm == null)
368 pm = NullProgressMonitor.INSTANCE;
369
370 if (0 < breakScore)
371 breakModifies(reader, pm);
372
373 if (!added.isEmpty() && !deleted.isEmpty())
374 findExactRenames(pm);
375
376 if (!added.isEmpty() && !deleted.isEmpty())
377 findContentRenames(reader, pm);
378
379 if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
380 rejoinModifies(pm);
381
382 entries.addAll(added);
383 added = null;
384
385 entries.addAll(deleted);
386 deleted = null;
387
388 Collections.sort(entries, DIFF_COMPARATOR);
389 }
390 return Collections.unmodifiableList(entries);
391 }
392
393
394 public void reset() {
395 entries = new ArrayList<DiffEntry>();
396 deleted = new ArrayList<DiffEntry>();
397 added = new ArrayList<DiffEntry>();
398 done = false;
399 }
400
401 private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
402 throws IOException {
403 ArrayList<DiffEntry> newEntries = new ArrayList<DiffEntry>(entries.size());
404
405 pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
406
407 for (int i = 0; i < entries.size(); i++) {
408 DiffEntry e = entries.get(i);
409 if (e.getChangeType() == ChangeType.MODIFY) {
410 int score = calculateModifyScore(reader, e);
411 if (score < breakScore) {
412 List<DiffEntry> tmp = DiffEntry.breakModify(e);
413 DiffEntry del = tmp.get(0);
414 del.score = score;
415 deleted.add(del);
416 added.add(tmp.get(1));
417 } else {
418 newEntries.add(e);
419 }
420 } else {
421 newEntries.add(e);
422 }
423 pm.update(1);
424 }
425
426 entries = newEntries;
427 }
428
429 private void rejoinModifies(ProgressMonitor pm) {
430 HashMap<String, DiffEntry> nameMap = new HashMap<String, DiffEntry>();
431 ArrayList<DiffEntry> newAdded = new ArrayList<DiffEntry>(added.size());
432
433 pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
434 + deleted.size());
435
436 for (DiffEntry src : deleted) {
437 nameMap.put(src.oldPath, src);
438 pm.update(1);
439 }
440
441 for (DiffEntry dst : added) {
442 DiffEntry src = nameMap.remove(dst.newPath);
443 if (src != null) {
444 if (sameType(src.oldMode, dst.newMode)) {
445 entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
446 src.score));
447 } else {
448 nameMap.put(src.oldPath, src);
449 newAdded.add(dst);
450 }
451 } else {
452 newAdded.add(dst);
453 }
454 pm.update(1);
455 }
456
457 added = newAdded;
458 deleted = new ArrayList<DiffEntry>(nameMap.values());
459 }
460
461 private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
462 throws IOException {
463 try {
464 SimilarityIndex src = new SimilarityIndex();
465 src.hash(reader.open(OLD, d));
466 src.sort();
467
468 SimilarityIndex dst = new SimilarityIndex();
469 dst.hash(reader.open(NEW, d));
470 dst.sort();
471 return src.score(dst, 100);
472 } catch (TableFullException tableFull) {
473
474
475
476
477 overRenameLimit = true;
478 return breakScore + 1;
479 }
480 }
481
482 private void findContentRenames(ContentSource.Pair reader,
483 ProgressMonitor pm)
484 throws IOException {
485 int cnt = Math.max(added.size(), deleted.size());
486 if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
487 SimilarityRenameDetector d;
488
489 d = new SimilarityRenameDetector(reader, deleted, added);
490 d.setRenameScore(getRenameScore());
491 d.compute(pm);
492 overRenameLimit |= d.isTableOverflow();
493 deleted = d.getLeftOverSources();
494 added = d.getLeftOverDestinations();
495 entries.addAll(d.getMatches());
496 } else {
497 overRenameLimit = true;
498 }
499 }
500
501 @SuppressWarnings("unchecked")
502 private void findExactRenames(ProgressMonitor pm) {
503 pm.beginTask(JGitText.get().renamesFindingExact,
504 added.size() + added.size() + deleted.size()
505 + added.size() * deleted.size());
506
507 HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
508 HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
509
510 ArrayList<DiffEntry> uniqueAdds = new ArrayList<DiffEntry>(added.size());
511 ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<List<DiffEntry>>();
512
513 for (Object o : addedMap.values()) {
514 if (o instanceof DiffEntry)
515 uniqueAdds.add((DiffEntry) o);
516 else
517 nonUniqueAdds.add((List<DiffEntry>) o);
518 }
519
520 ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size());
521
522 for (DiffEntry a : uniqueAdds) {
523 Object del = deletedMap.get(a.newId);
524 if (del instanceof DiffEntry) {
525
526
527 DiffEntry e = (DiffEntry) del;
528 if (sameType(e.oldMode, a.newMode)) {
529 e.changeType = ChangeType.RENAME;
530 entries.add(exactRename(e, a));
531 } else {
532 left.add(a);
533 }
534 } else if (del != null) {
535
536
537 List<DiffEntry> list = (List<DiffEntry>) del;
538 DiffEntry best = bestPathMatch(a, list);
539 if (best != null) {
540 best.changeType = ChangeType.RENAME;
541 entries.add(exactRename(best, a));
542 } else {
543 left.add(a);
544 }
545 } else {
546 left.add(a);
547 }
548 pm.update(1);
549 }
550
551 for (List<DiffEntry> adds : nonUniqueAdds) {
552 Object o = deletedMap.get(adds.get(0).newId);
553 if (o instanceof DiffEntry) {
554
555
556
557 DiffEntry d = (DiffEntry) o;
558 DiffEntry best = bestPathMatch(d, adds);
559 if (best != null) {
560 d.changeType = ChangeType.RENAME;
561 entries.add(exactRename(d, best));
562 for (DiffEntry a : adds) {
563 if (a != best) {
564 if (sameType(d.oldMode, a.newMode)) {
565 entries.add(exactCopy(d, a));
566 } else {
567 left.add(a);
568 }
569 }
570 }
571 } else {
572 left.addAll(adds);
573 }
574 } else if (o != null) {
575
576
577
578 List<DiffEntry> dels = (List<DiffEntry>) o;
579 long[] matrix = new long[dels.size() * adds.size()];
580 int mNext = 0;
581 for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
582 String deletedName = dels.get(delIdx).oldPath;
583
584 for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
585 String addedName = adds.get(addIdx).newPath;
586
587 int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
588 matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
589 mNext++;
590 }
591 }
592
593 Arrays.sort(matrix);
594
595 for (--mNext; mNext >= 0; mNext--) {
596 long ent = matrix[mNext];
597 int delIdx = SimilarityRenameDetector.srcFile(ent);
598 int addIdx = SimilarityRenameDetector.dstFile(ent);
599 DiffEntry d = dels.get(delIdx);
600 DiffEntry a = adds.get(addIdx);
601
602 if (a == null) {
603 pm.update(1);
604 continue;
605 }
606
607 ChangeType type;
608 if (d.changeType == ChangeType.DELETE) {
609
610
611
612
613 d.changeType = ChangeType.RENAME;
614 type = ChangeType.RENAME;
615 } else {
616 type = ChangeType.COPY;
617 }
618
619 entries.add(DiffEntry.pair(type, d, a, 100));
620 adds.set(addIdx, null);
621 pm.update(1);
622 }
623 } else {
624 left.addAll(adds);
625 }
626 }
627 added = left;
628
629 deleted = new ArrayList<DiffEntry>(deletedMap.size());
630 for (Object o : deletedMap.values()) {
631 if (o instanceof DiffEntry) {
632 DiffEntry e = (DiffEntry) o;
633 if (e.changeType == ChangeType.DELETE)
634 deleted.add(e);
635 } else {
636 List<DiffEntry> list = (List<DiffEntry>) o;
637 for (DiffEntry e : list) {
638 if (e.changeType == ChangeType.DELETE)
639 deleted.add(e);
640 }
641 }
642 }
643 pm.endTask();
644 }
645
646
647
648
649
650
651
652
653
654
655
656
657
658 private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
659 DiffEntry best = null;
660 int score = -1;
661
662 for (DiffEntry d : list) {
663 if (sameType(mode(d), mode(src))) {
664 int tmp = SimilarityRenameDetector
665 .nameScore(path(d), path(src));
666 if (tmp > score) {
667 best = d;
668 score = tmp;
669 }
670 }
671 }
672
673 return best;
674 }
675
676 @SuppressWarnings("unchecked")
677 private HashMap<AbbreviatedObjectId, Object> populateMap(
678 List<DiffEntry> diffEntries, ProgressMonitor pm) {
679 HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
680 for (DiffEntry de : diffEntries) {
681 Object old = map.put(id(de), de);
682 if (old instanceof DiffEntry) {
683 ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
684 list.add((DiffEntry) old);
685 list.add(de);
686 map.put(id(de), list);
687 } else if (old != null) {
688
689 ((List<DiffEntry>) old).add(de);
690 map.put(id(de), old);
691 }
692 pm.update(1);
693 }
694 return map;
695 }
696
697 private static String path(DiffEntry de) {
698 return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
699 }
700
701 private static FileMode mode(DiffEntry de) {
702 return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
703 }
704
705 private static AbbreviatedObjectId id(DiffEntry de) {
706 return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
707 }
708
709 static boolean sameType(FileMode a, FileMode b) {
710
711
712
713
714 int aType = a.getBits() & FileMode.TYPE_MASK;
715 int bType = b.getBits() & FileMode.TYPE_MASK;
716 return aType == bType;
717 }
718
719 private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
720 return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
721 }
722
723 private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
724 return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
725 }
726 }