1
2
3
4
5
6
7
8
9
10
11 package org.eclipse.jgit.diff;
12
13 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
14 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
15 import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
16
17 import java.io.IOException;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.Collections;
22 import java.util.Comparator;
23 import java.util.HashMap;
24 import java.util.List;
25
26 import org.eclipse.jgit.api.errors.CanceledException;
27 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
28 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
29 import org.eclipse.jgit.internal.JGitText;
30 import org.eclipse.jgit.lib.AbbreviatedObjectId;
31 import org.eclipse.jgit.lib.FileMode;
32 import org.eclipse.jgit.lib.NullProgressMonitor;
33 import org.eclipse.jgit.lib.ObjectReader;
34 import org.eclipse.jgit.lib.ProgressMonitor;
35 import org.eclipse.jgit.lib.Repository;
36
37
38
39
40 public class RenameDetector {
41 private static final int EXACT_RENAME_SCORE = 100;
42
43 private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<>() {
44
45 @Override
46 public int compare(DiffEntry a, DiffEntry b) {
47 int cmp = nameOf(a).compareTo(nameOf(b));
48 if (cmp == 0)
49 cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
50 return cmp;
51 }
52
53 private String nameOf(DiffEntry ent) {
54
55
56
57
58 if (ent.changeType == ChangeType.DELETE)
59 return ent.oldPath;
60 return ent.newPath;
61 }
62
63 private int sortOf(ChangeType changeType) {
64
65
66
67
68 switch (changeType) {
69 case DELETE:
70 return 1;
71 case ADD:
72 return 2;
73 default:
74 return 10;
75 }
76 }
77 };
78
79 private List<DiffEntry> entries;
80
81 private List<DiffEntry> deleted;
82
83 private List<DiffEntry> added;
84
85 private boolean done;
86
87 private final ObjectReader objectReader;
88
89
90 private int renameScore = 60;
91
92
93
94
95
96
97 private int breakScore = -1;
98
99
100 private int renameLimit;
101
102
103
104
105
106 private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
107
108
109
110
111
112
113 private boolean skipContentRenamesForBinaryFiles = false;
114
115
116 private boolean overRenameLimit;
117
118
119
120
121
122
123
124 public RenameDetector(Repository repo) {
125 this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
126 }
127
128
129
130
131
132
133
134
135
136
137 public RenameDetector(ObjectReader reader, DiffConfig cfg) {
138 objectReader = reader.newReader();
139 renameLimit = cfg.getRenameLimit();
140 reset();
141 }
142
143
144
145
146
147
148
149 public int getRenameScore() {
150 return renameScore;
151 }
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166 public void setRenameScore(int score) {
167 if (score < 0 || score > 100)
168 throw new IllegalArgumentException(
169 JGitText.get().similarityScoreMustBeWithinBounds);
170 renameScore = score;
171 }
172
173
174
175
176
177
178
179
180
181
182 public int getBreakScore() {
183 return breakScore;
184 }
185
186
187
188
189
190
191
192
193
194
195
196 public void setBreakScore(int breakScore) {
197 this.breakScore = breakScore;
198 }
199
200
201
202
203
204
205 public int getRenameLimit() {
206 return renameLimit;
207 }
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 public void setRenameLimit(int limit) {
223 renameLimit = limit;
224 }
225
226
227
228
229
230
231
232
233 public int getBigFileThreshold() { return bigFileThreshold; }
234
235
236
237
238
239
240
241
242 public void setBigFileThreshold(int threshold) {
243 this.bigFileThreshold = threshold;
244 }
245
246
247
248
249
250
251
252 public boolean getSkipContentRenamesForBinaryFiles() {
253 return skipContentRenamesForBinaryFiles;
254 }
255
256
257
258
259
260
261
262 public void setSkipContentRenamesForBinaryFiles(boolean value) {
263 this.skipContentRenamesForBinaryFiles = value;
264 }
265
266
267
268
269
270
271
272
273
274
275
276 public boolean isOverRenameLimit() {
277 if (done)
278 return overRenameLimit;
279 int cnt = Math.max(added.size(), deleted.size());
280 return getRenameLimit() != 0 && getRenameLimit() < cnt;
281 }
282
283
284
285
286
287
288
289
290
291 public void addAll(Collection<DiffEntry> entriesToAdd) {
292 if (done)
293 throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
294
295 for (DiffEntry entry : entriesToAdd) {
296 switch (entry.getChangeType()) {
297 case ADD:
298 added.add(entry);
299 break;
300
301 case DELETE:
302 deleted.add(entry);
303 break;
304
305 case MODIFY:
306 if (sameType(entry.getOldMode(), entry.getNewMode())) {
307 entries.add(entry);
308 } else {
309 List<DiffEntry> tmp = DiffEntry.breakModify(entry);
310 deleted.add(tmp.get(0));
311 added.add(tmp.get(1));
312 }
313 break;
314
315 case COPY:
316 case RENAME:
317 default:
318 entries.add(entry);
319 }
320 }
321 }
322
323
324
325
326
327
328
329
330
331 public void add(DiffEntry entry) {
332 addAll(Collections.singletonList(entry));
333 }
334
335
336
337
338
339
340
341
342
343
344
345
346 public List<DiffEntry> compute() throws IOException {
347 try {
348 return compute(NullProgressMonitor.INSTANCE);
349 } catch (CanceledException e) {
350
351 return Collections.emptyList();
352 }
353 }
354
355
356
357
358
359
360
361
362
363
364
365
366
367 public List<DiffEntry> compute(ProgressMonitor pm)
368 throws IOException, CanceledException {
369 if (!done) {
370 try {
371 return compute(objectReader, pm);
372 } finally {
373 objectReader.close();
374 }
375 }
376 return Collections.unmodifiableList(entries);
377 }
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393 public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
394 throws IOException, CanceledException {
395 final ContentSource cs = ContentSource.create(reader);
396 return compute(new ContentSource.Pair(cs, cs), pm);
397 }
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413 public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
414 throws IOException, CanceledException {
415 if (!done) {
416 done = true;
417
418 if (pm == null)
419 pm = NullProgressMonitor.INSTANCE;
420
421 if (0 < breakScore)
422 breakModifies(reader, pm);
423
424 if (!added.isEmpty() && !deleted.isEmpty())
425 findExactRenames(pm);
426
427 if (!added.isEmpty() && !deleted.isEmpty())
428 findContentRenames(reader, pm);
429
430 if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
431 rejoinModifies(pm);
432
433 entries.addAll(added);
434 added = null;
435
436 entries.addAll(deleted);
437 deleted = null;
438
439 Collections.sort(entries, DIFF_COMPARATOR);
440 }
441 return Collections.unmodifiableList(entries);
442 }
443
444
445
446
447 public void reset() {
448 entries = new ArrayList<>();
449 deleted = new ArrayList<>();
450 added = new ArrayList<>();
451 done = false;
452 }
453
454 private void advanceOrCancel(ProgressMonitor pm) throws CanceledException {
455 if (pm.isCancelled()) {
456 throw new CanceledException(JGitText.get().renameCancelled);
457 }
458 pm.update(1);
459 }
460
461 private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
462 throws IOException, CanceledException {
463 ArrayList<DiffEntry> newEntries = new ArrayList<>(entries.size());
464
465 pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
466
467 for (int i = 0; i < entries.size(); i++) {
468 DiffEntry e = entries.get(i);
469 if (e.getChangeType() == ChangeType.MODIFY) {
470 int score = calculateModifyScore(reader, e);
471 if (score < breakScore) {
472 List<DiffEntry> tmp = DiffEntry.breakModify(e);
473 DiffEntry del = tmp.get(0);
474 del.score = score;
475 deleted.add(del);
476 added.add(tmp.get(1));
477 } else {
478 newEntries.add(e);
479 }
480 } else {
481 newEntries.add(e);
482 }
483 advanceOrCancel(pm);
484 }
485
486 entries = newEntries;
487 }
488
489 private void rejoinModifies(ProgressMonitor pm) throws CanceledException {
490 HashMap<String, DiffEntry> nameMap = new HashMap<>();
491 ArrayList<DiffEntry> newAdded = new ArrayList<>(added.size());
492
493 pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
494 + deleted.size());
495
496 for (DiffEntry src : deleted) {
497 nameMap.put(src.oldPath, src);
498 advanceOrCancel(pm);
499 }
500
501 for (DiffEntry dst : added) {
502 DiffEntry src = nameMap.remove(dst.newPath);
503 if (src != null) {
504 if (sameType(src.oldMode, dst.newMode)) {
505 entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
506 src.score));
507 } else {
508 nameMap.put(src.oldPath, src);
509 newAdded.add(dst);
510 }
511 } else {
512 newAdded.add(dst);
513 }
514 advanceOrCancel(pm);
515 }
516
517 added = newAdded;
518 deleted = new ArrayList<>(nameMap.values());
519 }
520
521 private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
522 throws IOException {
523 try {
524 SimilarityIndex src = new SimilarityIndex();
525 src.hash(reader.open(OLD, d));
526 src.sort();
527
528 SimilarityIndex dst = new SimilarityIndex();
529 dst.hash(reader.open(NEW, d));
530 dst.sort();
531 return src.score(dst, 100);
532 } catch (TableFullException tableFull) {
533
534
535
536
537 overRenameLimit = true;
538 return breakScore + 1;
539 }
540 }
541
542 private void findContentRenames(ContentSource.Pair reader,
543 ProgressMonitor pm)
544 throws IOException, CanceledException {
545 int cnt = Math.max(added.size(), deleted.size());
546 if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
547 SimilarityRenameDetector d;
548
549 d = new SimilarityRenameDetector(reader, deleted, added);
550 d.setRenameScore(getRenameScore());
551 d.setBigFileThreshold(getBigFileThreshold());
552 d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles());
553 d.compute(pm);
554 overRenameLimit |= d.isTableOverflow();
555 deleted = d.getLeftOverSources();
556 added = d.getLeftOverDestinations();
557 entries.addAll(d.getMatches());
558 } else {
559 overRenameLimit = true;
560 }
561 }
562
563 @SuppressWarnings("unchecked")
564 private void findExactRenames(ProgressMonitor pm)
565 throws CanceledException {
566 pm.beginTask(JGitText.get().renamesFindingExact,
567 added.size() + added.size() + deleted.size()
568 + added.size() * deleted.size());
569
570 HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
571 HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
572
573 ArrayList<DiffEntry> uniqueAdds = new ArrayList<>(added.size());
574 ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<>();
575
576 for (Object o : addedMap.values()) {
577 if (o instanceof DiffEntry)
578 uniqueAdds.add((DiffEntry) o);
579 else
580 nonUniqueAdds.add((List<DiffEntry>) o);
581 }
582
583 ArrayList<DiffEntry> left = new ArrayList<>(added.size());
584
585 for (DiffEntry a : uniqueAdds) {
586 Object del = deletedMap.get(a.newId);
587 if (del instanceof DiffEntry) {
588
589
590 DiffEntry e = (DiffEntry) del;
591 if (sameType(e.oldMode, a.newMode)) {
592 e.changeType = ChangeType.RENAME;
593 entries.add(exactRename(e, a));
594 } else {
595 left.add(a);
596 }
597 } else if (del != null) {
598
599
600 List<DiffEntry> list = (List<DiffEntry>) del;
601 DiffEntry best = bestPathMatch(a, list);
602 if (best != null) {
603 best.changeType = ChangeType.RENAME;
604 entries.add(exactRename(best, a));
605 } else {
606 left.add(a);
607 }
608 } else {
609 left.add(a);
610 }
611 advanceOrCancel(pm);
612 }
613
614 for (List<DiffEntry> adds : nonUniqueAdds) {
615 Object o = deletedMap.get(adds.get(0).newId);
616 if (o instanceof DiffEntry) {
617
618
619
620 DiffEntry d = (DiffEntry) o;
621 DiffEntry best = bestPathMatch(d, adds);
622 if (best != null) {
623 d.changeType = ChangeType.RENAME;
624 entries.add(exactRename(d, best));
625 for (DiffEntry a : adds) {
626 if (a != best) {
627 if (sameType(d.oldMode, a.newMode)) {
628 entries.add(exactCopy(d, a));
629 } else {
630 left.add(a);
631 }
632 }
633 }
634 } else {
635 left.addAll(adds);
636 }
637 } else if (o != null) {
638
639
640
641 List<DiffEntry> dels = (List<DiffEntry>) o;
642 long[] matrix = new long[dels.size() * adds.size()];
643 int mNext = 0;
644 for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
645 String deletedName = dels.get(delIdx).oldPath;
646
647 for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
648 String addedName = adds.get(addIdx).newPath;
649
650 int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
651 matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
652 mNext++;
653 if (pm.isCancelled()) {
654 throw new CanceledException(
655 JGitText.get().renameCancelled);
656 }
657 }
658 }
659
660 Arrays.sort(matrix);
661
662 for (--mNext; mNext >= 0; mNext--) {
663 long ent = matrix[mNext];
664 int delIdx = SimilarityRenameDetector.srcFile(ent);
665 int addIdx = SimilarityRenameDetector.dstFile(ent);
666 DiffEntry d = dels.get(delIdx);
667 DiffEntry a = adds.get(addIdx);
668
669 if (a == null) {
670 advanceOrCancel(pm);
671 continue;
672 }
673
674 ChangeType type;
675 if (d.changeType == ChangeType.DELETE) {
676
677
678
679
680 d.changeType = ChangeType.RENAME;
681 type = ChangeType.RENAME;
682 } else {
683 type = ChangeType.COPY;
684 }
685
686 entries.add(DiffEntry.pair(type, d, a, 100));
687 adds.set(addIdx, null);
688 advanceOrCancel(pm);
689 }
690 } else {
691 left.addAll(adds);
692 }
693 advanceOrCancel(pm);
694 }
695 added = left;
696
697 deleted = new ArrayList<>(deletedMap.size());
698 for (Object o : deletedMap.values()) {
699 if (o instanceof DiffEntry) {
700 DiffEntry e = (DiffEntry) o;
701 if (e.changeType == ChangeType.DELETE)
702 deleted.add(e);
703 } else {
704 List<DiffEntry> list = (List<DiffEntry>) o;
705 for (DiffEntry e : list) {
706 if (e.changeType == ChangeType.DELETE)
707 deleted.add(e);
708 }
709 }
710 }
711 pm.endTask();
712 }
713
714
715
716
717
718
719
720
721
722
723
724
725
726 private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
727 DiffEntry best = null;
728 int score = -1;
729
730 for (DiffEntry d : list) {
731 if (sameType(mode(d), mode(src))) {
732 int tmp = SimilarityRenameDetector
733 .nameScore(path(d), path(src));
734 if (tmp > score) {
735 best = d;
736 score = tmp;
737 }
738 }
739 }
740
741 return best;
742 }
743
744 @SuppressWarnings("unchecked")
745 private HashMap<AbbreviatedObjectId, Object> populateMap(
746 List<DiffEntry> diffEntries, ProgressMonitor pm)
747 throws CanceledException {
748 HashMap<AbbreviatedObjectId, Object> map = new HashMap<>();
749 for (DiffEntry de : diffEntries) {
750 Object old = map.put(id(de), de);
751 if (old instanceof DiffEntry) {
752 ArrayList<DiffEntry> list = new ArrayList<>(2);
753 list.add((DiffEntry) old);
754 list.add(de);
755 map.put(id(de), list);
756 } else if (old != null) {
757
758 ((List<DiffEntry>) old).add(de);
759 map.put(id(de), old);
760 }
761 advanceOrCancel(pm);
762 }
763 return map;
764 }
765
766 private static String path(DiffEntry de) {
767 return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
768 }
769
770 private static FileMode mode(DiffEntry de) {
771 return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
772 }
773
774 private static AbbreviatedObjectId id(DiffEntry de) {
775 return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
776 }
777
778 static boolean sameType(FileMode a, FileMode b) {
779
780
781
782
783 int aType = a.getBits() & FileMode.TYPE_MASK;
784 int bType = b.getBits() & FileMode.TYPE_MASK;
785 return aType == bType;
786 }
787
788 private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
789 return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
790 }
791
792 private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
793 return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
794 }
795 }