1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
47 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
48
49 import java.io.IOException;
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.Collection;
53 import java.util.Collections;
54 import java.util.Comparator;
55 import java.util.HashMap;
56 import java.util.List;
57
58 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
59 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
60 import org.eclipse.jgit.errors.CancelledException;
61 import org.eclipse.jgit.internal.JGitText;
62 import org.eclipse.jgit.lib.AbbreviatedObjectId;
63 import org.eclipse.jgit.lib.FileMode;
64 import org.eclipse.jgit.lib.NullProgressMonitor;
65 import org.eclipse.jgit.lib.ObjectReader;
66 import org.eclipse.jgit.lib.ProgressMonitor;
67 import org.eclipse.jgit.lib.Repository;
68
69
70
71
72 public class RenameDetector {
73 private static final int EXACT_RENAME_SCORE = 100;
74
75 private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
76 @Override
77 public int compare(DiffEntryef="../../../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry a, DiffEntry b) {
78 int cmp = nameOf(a).compareTo(nameOf(b));
79 if (cmp == 0)
80 cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
81 return cmp;
82 }
83
84 private String nameOf(DiffEntry ent) {
85
86
87
88
89 if (ent.changeType == ChangeType.DELETE)
90 return ent.oldPath;
91 return ent.newPath;
92 }
93
94 private int sortOf(ChangeType changeType) {
95
96
97
98
99 switch (changeType) {
100 case DELETE:
101 return 1;
102 case ADD:
103 return 2;
104 default:
105 return 10;
106 }
107 }
108 };
109
110 private List<DiffEntry> entries;
111
112 private List<DiffEntry> deleted;
113
114 private List<DiffEntry> added;
115
116 private boolean done;
117
118 private final ObjectReader objectReader;
119
120
121 private int renameScore = 60;
122
123
124
125
126
127
128 private int breakScore = -1;
129
130
131 private int renameLimit;
132
133
134 private boolean overRenameLimit;
135
136
137
138
139
140
141
142 public RenameDetector(Repository repo) {
143 this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
144 }
145
146
147
148
149
150
151
152
153
154
155 public RenameDetector(ObjectReader reader, DiffConfig cfg) {
156 objectReader = reader.newReader();
157 renameLimit = cfg.getRenameLimit();
158 reset();
159 }
160
161
162
163
164
165
166
167 public int getRenameScore() {
168 return renameScore;
169 }
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184 public void setRenameScore(int score) {
185 if (score < 0 || score > 100)
186 throw new IllegalArgumentException(
187 JGitText.get().similarityScoreMustBeWithinBounds);
188 renameScore = score;
189 }
190
191
192
193
194
195
196
197
198
199
200 public int getBreakScore() {
201 return breakScore;
202 }
203
204
205
206
207
208
209
210
211
212
213
214 public void setBreakScore(int breakScore) {
215 this.breakScore = breakScore;
216 }
217
218
219
220
221
222
223 public int getRenameLimit() {
224 return renameLimit;
225 }
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240 public void setRenameLimit(int limit) {
241 renameLimit = limit;
242 }
243
244
245
246
247
248
249
250
251
252
253
254 public boolean isOverRenameLimit() {
255 if (done)
256 return overRenameLimit;
257 int cnt = Math.max(added.size(), deleted.size());
258 return getRenameLimit() != 0 && getRenameLimit() < cnt;
259 }
260
261
262
263
264
265
266
267
268
269 public void addAll(Collection<DiffEntry> entriesToAdd) {
270 if (done)
271 throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
272
273 for (DiffEntry entry : entriesToAdd) {
274 switch (entry.getChangeType()) {
275 case ADD:
276 added.add(entry);
277 break;
278
279 case DELETE:
280 deleted.add(entry);
281 break;
282
283 case MODIFY:
284 if (sameType(entry.getOldMode(), entry.getNewMode())) {
285 entries.add(entry);
286 } else {
287 List<DiffEntry> tmp = DiffEntry.breakModify(entry);
288 deleted.add(tmp.get(0));
289 added.add(tmp.get(1));
290 }
291 break;
292
293 case COPY:
294 case RENAME:
295 default:
296 entries.add(entry);
297 }
298 }
299 }
300
301
302
303
304
305
306
307
308
309 public void add(DiffEntry entry) {
310 addAll(Collections.singletonList(entry));
311 }
312
313
314
315
316
317
318
319
320
321
322
323 public List<DiffEntry> compute() throws IOException {
324 return compute(NullProgressMonitor.INSTANCE);
325 }
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 public List<DiffEntry> compute(ProgressMonitor pm)
342 throws IOException, CancelledException {
343 if (!done) {
344 try {
345 return compute(objectReader, pm);
346 } finally {
347 objectReader.close();
348 }
349 }
350 return Collections.unmodifiableList(entries);
351 }
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369 public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
370 throws IOException, CancelledException {
371 final ContentSource cs = ContentSource.create(reader);
372 return compute(new ContentSource.Pair(cs, cs), pm);
373 }
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
392 throws IOException, CancelledException {
393 if (!done) {
394 done = true;
395
396 if (pm == null)
397 pm = NullProgressMonitor.INSTANCE;
398
399 if (0 < breakScore)
400 breakModifies(reader, pm);
401
402 if (!added.isEmpty() && !deleted.isEmpty())
403 findExactRenames(pm);
404
405 if (!added.isEmpty() && !deleted.isEmpty())
406 findContentRenames(reader, pm);
407
408 if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
409 rejoinModifies(pm);
410
411 entries.addAll(added);
412 added = null;
413
414 entries.addAll(deleted);
415 deleted = null;
416
417 Collections.sort(entries, DIFF_COMPARATOR);
418 }
419 return Collections.unmodifiableList(entries);
420 }
421
422
423
424
425 public void reset() {
426 entries = new ArrayList<>();
427 deleted = new ArrayList<>();
428 added = new ArrayList<>();
429 done = false;
430 }
431
432 private void advanceOrCancel(ProgressMonitor pm) throws CancelledException {
433 if (pm.isCancelled()) {
434 throw new CancelledException(JGitText.get().renameCancelled);
435 }
436 pm.update(1);
437 }
438
439 private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
440 throws IOException, CancelledException {
441 ArrayList<DiffEntry> newEntries = new ArrayList<>(entries.size());
442
443 pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
444
445 for (int i = 0; i < entries.size(); i++) {
446 DiffEntry e = entries.get(i);
447 if (e.getChangeType() == ChangeType.MODIFY) {
448 int score = calculateModifyScore(reader, e);
449 if (score < breakScore) {
450 List<DiffEntry> tmp = DiffEntry.breakModify(e);
451 DiffEntry del = tmp.get(0);
452 del.score = score;
453 deleted.add(del);
454 added.add(tmp.get(1));
455 } else {
456 newEntries.add(e);
457 }
458 } else {
459 newEntries.add(e);
460 }
461 advanceOrCancel(pm);
462 }
463
464 entries = newEntries;
465 }
466
467 private void rejoinModifies(ProgressMonitor pm) throws CancelledException {
468 HashMap<String, DiffEntry> nameMap = new HashMap<>();
469 ArrayList<DiffEntry> newAdded = new ArrayList<>(added.size());
470
471 pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
472 + deleted.size());
473
474 for (DiffEntry src : deleted) {
475 nameMap.put(src.oldPath, src);
476 advanceOrCancel(pm);
477 }
478
479 for (DiffEntry dst : added) {
480 DiffEntry src = nameMap.remove(dst.newPath);
481 if (src != null) {
482 if (sameType(src.oldMode, dst.newMode)) {
483 entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
484 src.score));
485 } else {
486 nameMap.put(src.oldPath, src);
487 newAdded.add(dst);
488 }
489 } else {
490 newAdded.add(dst);
491 }
492 advanceOrCancel(pm);
493 }
494
495 added = newAdded;
496 deleted = new ArrayList<>(nameMap.values());
497 }
498
499 private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
500 throws IOException {
501 try {
502 SimilarityIndex src = new SimilarityIndex();
503 src.hash(reader.open(OLD, d));
504 src.sort();
505
506 SimilarityIndex dst = new SimilarityIndex();
507 dst.hash(reader.open(NEW, d));
508 dst.sort();
509 return src.score(dst, 100);
510 } catch (TableFullException tableFull) {
511
512
513
514
515 overRenameLimit = true;
516 return breakScore + 1;
517 }
518 }
519
520 private void findContentRenames(ContentSource.Pair reader,
521 ProgressMonitor pm)
522 throws IOException, CancelledException {
523 int cnt = Math.max(added.size(), deleted.size());
524 if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
525 SimilarityRenameDetector d;
526
527 d = new SimilarityRenameDetector(reader, deleted, added);
528 d.setRenameScore(getRenameScore());
529 d.compute(pm);
530 overRenameLimit |= d.isTableOverflow();
531 deleted = d.getLeftOverSources();
532 added = d.getLeftOverDestinations();
533 entries.addAll(d.getMatches());
534 } else {
535 overRenameLimit = true;
536 }
537 }
538
539 @SuppressWarnings("unchecked")
540 private void findExactRenames(ProgressMonitor pm)
541 throws CancelledException {
542 pm.beginTask(JGitText.get().renamesFindingExact,
543 added.size() + added.size() + deleted.size()
544 + added.size() * deleted.size());
545
546 HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
547 HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
548
549 ArrayList<DiffEntry> uniqueAdds = new ArrayList<>(added.size());
550 ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<>();
551
552 for (Object o : addedMap.values()) {
553 if (o instanceof DiffEntry)
554 uniqueAdds.add((DiffEntry) o);
555 else
556 nonUniqueAdds.add((List<DiffEntry>) o);
557 }
558
559 ArrayList<DiffEntry> left = new ArrayList<>(added.size());
560
561 for (DiffEntry a : uniqueAdds) {
562 Object del = deletedMap.get(a.newId);
563 if (del instanceof DiffEntry) {
564
565
566 DiffEntry e = (DiffEntry) del;
567 if (sameType(e.oldMode, a.newMode)) {
568 e.changeType = ChangeType.RENAME;
569 entries.add(exactRename(e, a));
570 } else {
571 left.add(a);
572 }
573 } else if (del != null) {
574
575
576 List<DiffEntry> list = (List<DiffEntry>) del;
577 DiffEntry best = bestPathMatch(a, list);
578 if (best != null) {
579 best.changeType = ChangeType.RENAME;
580 entries.add(exactRename(best, a));
581 } else {
582 left.add(a);
583 }
584 } else {
585 left.add(a);
586 }
587 advanceOrCancel(pm);
588 }
589
590 for (List<DiffEntry> adds : nonUniqueAdds) {
591 Object o = deletedMap.get(adds.get(0).newId);
592 if (o instanceof DiffEntry) {
593
594
595
596 DiffEntry d = (DiffEntry) o;
597 DiffEntry best = bestPathMatch(d, adds);
598 if (best != null) {
599 d.changeType = ChangeType.RENAME;
600 entries.add(exactRename(d, best));
601 for (DiffEntry a : adds) {
602 if (a != best) {
603 if (sameType(d.oldMode, a.newMode)) {
604 entries.add(exactCopy(d, a));
605 } else {
606 left.add(a);
607 }
608 }
609 }
610 } else {
611 left.addAll(adds);
612 }
613 } else if (o != null) {
614
615
616
617 List<DiffEntry> dels = (List<DiffEntry>) o;
618 long[] matrix = new long[dels.size() * adds.size()];
619 int mNext = 0;
620 for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
621 String deletedName = dels.get(delIdx).oldPath;
622
623 for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
624 String addedName = adds.get(addIdx).newPath;
625
626 int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
627 matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
628 mNext++;
629 if (pm.isCancelled()) {
630 throw new CancelledException(
631 JGitText.get().renameCancelled);
632 }
633 }
634 }
635
636 Arrays.sort(matrix);
637
638 for (--mNext; mNext >= 0; mNext--) {
639 long ent = matrix[mNext];
640 int delIdx = SimilarityRenameDetector.srcFile(ent);
641 int addIdx = SimilarityRenameDetector.dstFile(ent);
642 DiffEntry d = dels.get(delIdx);
643 DiffEntry a = adds.get(addIdx);
644
645 if (a == null) {
646 advanceOrCancel(pm);
647 continue;
648 }
649
650 ChangeType type;
651 if (d.changeType == ChangeType.DELETE) {
652
653
654
655
656 d.changeType = ChangeType.RENAME;
657 type = ChangeType.RENAME;
658 } else {
659 type = ChangeType.COPY;
660 }
661
662 entries.add(DiffEntry.pair(type, d, a, 100));
663 adds.set(addIdx, null);
664 advanceOrCancel(pm);
665 }
666 } else {
667 left.addAll(adds);
668 }
669 advanceOrCancel(pm);
670 }
671 added = left;
672
673 deleted = new ArrayList<>(deletedMap.size());
674 for (Object o : deletedMap.values()) {
675 if (o instanceof DiffEntry) {
676 DiffEntry e = (DiffEntry) o;
677 if (e.changeType == ChangeType.DELETE)
678 deleted.add(e);
679 } else {
680 List<DiffEntry> list = (List<DiffEntry>) o;
681 for (DiffEntry e : list) {
682 if (e.changeType == ChangeType.DELETE)
683 deleted.add(e);
684 }
685 }
686 }
687 pm.endTask();
688 }
689
690
691
692
693
694
695
696
697
698
699
700
701
702 private static DiffEntry./../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
703 DiffEntry best = null;
704 int score = -1;
705
706 for (DiffEntry d : list) {
707 if (sameType(mode(d), mode(src))) {
708 int tmp = SimilarityRenameDetector
709 .nameScore(path(d), path(src));
710 if (tmp > score) {
711 best = d;
712 score = tmp;
713 }
714 }
715 }
716
717 return best;
718 }
719
720 @SuppressWarnings("unchecked")
721 private HashMap<AbbreviatedObjectId, Object> populateMap(
722 List<DiffEntry> diffEntries, ProgressMonitor pm)
723 throws CancelledException {
724 HashMap<AbbreviatedObjectId, Object> map = new HashMap<>();
725 for (DiffEntry de : diffEntries) {
726 Object old = map.put(id(de), de);
727 if (old instanceof DiffEntry) {
728 ArrayList<DiffEntry> list = new ArrayList<>(2);
729 list.add((DiffEntry) old);
730 list.add(de);
731 map.put(id(de), list);
732 } else if (old != null) {
733
734 ((List<DiffEntry>) old).add(de);
735 map.put(id(de), old);
736 }
737 advanceOrCancel(pm);
738 }
739 return map;
740 }
741
742 private static String path(DiffEntry de) {
743 return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
744 }
745
746 private static FileMode mode(DiffEntry de) {
747 return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
748 }
749
750 private static AbbreviatedObjectId id(DiffEntry de) {
751 return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
752 }
753
754 static boolean sameType(FileModeref="../../../../org/eclipse/jgit/lib/FileMode.html#FileMode">FileMode a, FileMode b) {
755
756
757
758
759 int aType = a.getBits() & FileMode.TYPE_MASK;
760 int bType = b.getBits() & FileMode.TYPE_MASK;
761 return aType == bType;
762 }
763
764 private static DiffEntry="../../../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry/../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry exactRename(DiffEntry="../../../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry src, DiffEntry dst) {
765 return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
766 }
767
768 private static DiffEntry="../../../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry../../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry exactCopy(DiffEntry="../../../../org/eclipse/jgit/diff/DiffEntry.html#DiffEntry">DiffEntry src, DiffEntry dst) {
769 return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
770 }
771 }