1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
47 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
48
49 import java.io.IOException;
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.Collection;
53 import java.util.Collections;
54 import java.util.Comparator;
55 import java.util.HashMap;
56 import java.util.List;
57
58 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
59 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
60 import org.eclipse.jgit.internal.JGitText;
61 import org.eclipse.jgit.lib.AbbreviatedObjectId;
62 import org.eclipse.jgit.lib.FileMode;
63 import org.eclipse.jgit.lib.NullProgressMonitor;
64 import org.eclipse.jgit.lib.ObjectReader;
65 import org.eclipse.jgit.lib.ProgressMonitor;
66 import org.eclipse.jgit.lib.Repository;
67
68
69
70
71 public class RenameDetector {
72 private static final int EXACT_RENAME_SCORE = 100;
73
74 private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
75 @Override
76 public int compare(DiffEntry a, DiffEntry b) {
77 int cmp = nameOf(a).compareTo(nameOf(b));
78 if (cmp == 0)
79 cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
80 return cmp;
81 }
82
83 private String nameOf(DiffEntry ent) {
84
85
86
87
88 if (ent.changeType == ChangeType.DELETE)
89 return ent.oldPath;
90 return ent.newPath;
91 }
92
93 private int sortOf(ChangeType changeType) {
94
95
96
97
98 switch (changeType) {
99 case DELETE:
100 return 1;
101 case ADD:
102 return 2;
103 default:
104 return 10;
105 }
106 }
107 };
108
109 private List<DiffEntry> entries;
110
111 private List<DiffEntry> deleted;
112
113 private List<DiffEntry> added;
114
115 private boolean done;
116
117 private final ObjectReader objectReader;
118
119
120 private int renameScore = 60;
121
122
123
124
125
126
127 private int breakScore = -1;
128
129
130 private int renameLimit;
131
132
133 private boolean overRenameLimit;
134
135
136
137
138
139
140
141 public RenameDetector(Repository repo) {
142 this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
143 }
144
145
146
147
148
149
150
151
152
153
154 public RenameDetector(ObjectReader reader, DiffConfig cfg) {
155 objectReader = reader.newReader();
156 renameLimit = cfg.getRenameLimit();
157 reset();
158 }
159
160
161
162
163
164
165
166 public int getRenameScore() {
167 return renameScore;
168 }
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 public void setRenameScore(int score) {
184 if (score < 0 || score > 100)
185 throw new IllegalArgumentException(
186 JGitText.get().similarityScoreMustBeWithinBounds);
187 renameScore = score;
188 }
189
190
191
192
193
194
195
196
197
198
199 public int getBreakScore() {
200 return breakScore;
201 }
202
203
204
205
206
207
208
209
210
211
212
213 public void setBreakScore(int breakScore) {
214 this.breakScore = breakScore;
215 }
216
217
218
219
220
221
222 public int getRenameLimit() {
223 return renameLimit;
224 }
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 public void setRenameLimit(int limit) {
240 renameLimit = limit;
241 }
242
243
244
245
246
247
248
249
250
251
252
253 public boolean isOverRenameLimit() {
254 if (done)
255 return overRenameLimit;
256 int cnt = Math.max(added.size(), deleted.size());
257 return getRenameLimit() != 0 && getRenameLimit() < cnt;
258 }
259
260
261
262
263
264
265
266
267
268 public void addAll(Collection<DiffEntry> entriesToAdd) {
269 if (done)
270 throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
271
272 for (DiffEntry entry : entriesToAdd) {
273 switch (entry.getChangeType()) {
274 case ADD:
275 added.add(entry);
276 break;
277
278 case DELETE:
279 deleted.add(entry);
280 break;
281
282 case MODIFY:
283 if (sameType(entry.getOldMode(), entry.getNewMode())) {
284 entries.add(entry);
285 } else {
286 List<DiffEntry> tmp = DiffEntry.breakModify(entry);
287 deleted.add(tmp.get(0));
288 added.add(tmp.get(1));
289 }
290 break;
291
292 case COPY:
293 case RENAME:
294 default:
295 entries.add(entry);
296 }
297 }
298 }
299
300
301
302
303
304
305
306
307
308 public void add(DiffEntry entry) {
309 addAll(Collections.singletonList(entry));
310 }
311
312
313
314
315
316
317
318
319
320
321
322 public List<DiffEntry> compute() throws IOException {
323 return compute(NullProgressMonitor.INSTANCE);
324 }
325
326
327
328
329
330
331
332
333
334
335
336 public List<DiffEntry> compute(ProgressMonitor pm) throws IOException {
337 if (!done) {
338 try {
339 return compute(objectReader, pm);
340 } finally {
341 objectReader.close();
342 }
343 }
344 return Collections.unmodifiableList(entries);
345 }
346
347
348
349
350
351
352
353
354
355
356
357
358
359 public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
360 throws IOException {
361 final ContentSource cs = ContentSource.create(reader);
362 return compute(new ContentSource.Pair(cs, cs), pm);
363 }
364
365
366
367
368
369
370
371
372
373
374
375
376
377 public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
378 throws IOException {
379 if (!done) {
380 done = true;
381
382 if (pm == null)
383 pm = NullProgressMonitor.INSTANCE;
384
385 if (0 < breakScore)
386 breakModifies(reader, pm);
387
388 if (!added.isEmpty() && !deleted.isEmpty())
389 findExactRenames(pm);
390
391 if (!added.isEmpty() && !deleted.isEmpty())
392 findContentRenames(reader, pm);
393
394 if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
395 rejoinModifies(pm);
396
397 entries.addAll(added);
398 added = null;
399
400 entries.addAll(deleted);
401 deleted = null;
402
403 Collections.sort(entries, DIFF_COMPARATOR);
404 }
405 return Collections.unmodifiableList(entries);
406 }
407
408
409
410
411 public void reset() {
412 entries = new ArrayList<>();
413 deleted = new ArrayList<>();
414 added = new ArrayList<>();
415 done = false;
416 }
417
418 private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
419 throws IOException {
420 ArrayList<DiffEntry> newEntries = new ArrayList<>(entries.size());
421
422 pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
423
424 for (int i = 0; i < entries.size(); i++) {
425 DiffEntry e = entries.get(i);
426 if (e.getChangeType() == ChangeType.MODIFY) {
427 int score = calculateModifyScore(reader, e);
428 if (score < breakScore) {
429 List<DiffEntry> tmp = DiffEntry.breakModify(e);
430 DiffEntry del = tmp.get(0);
431 del.score = score;
432 deleted.add(del);
433 added.add(tmp.get(1));
434 } else {
435 newEntries.add(e);
436 }
437 } else {
438 newEntries.add(e);
439 }
440 pm.update(1);
441 }
442
443 entries = newEntries;
444 }
445
446 private void rejoinModifies(ProgressMonitor pm) {
447 HashMap<String, DiffEntry> nameMap = new HashMap<>();
448 ArrayList<DiffEntry> newAdded = new ArrayList<>(added.size());
449
450 pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
451 + deleted.size());
452
453 for (DiffEntry src : deleted) {
454 nameMap.put(src.oldPath, src);
455 pm.update(1);
456 }
457
458 for (DiffEntry dst : added) {
459 DiffEntry src = nameMap.remove(dst.newPath);
460 if (src != null) {
461 if (sameType(src.oldMode, dst.newMode)) {
462 entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
463 src.score));
464 } else {
465 nameMap.put(src.oldPath, src);
466 newAdded.add(dst);
467 }
468 } else {
469 newAdded.add(dst);
470 }
471 pm.update(1);
472 }
473
474 added = newAdded;
475 deleted = new ArrayList<>(nameMap.values());
476 }
477
478 private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
479 throws IOException {
480 try {
481 SimilarityIndex src = new SimilarityIndex();
482 src.hash(reader.open(OLD, d));
483 src.sort();
484
485 SimilarityIndex dst = new SimilarityIndex();
486 dst.hash(reader.open(NEW, d));
487 dst.sort();
488 return src.score(dst, 100);
489 } catch (TableFullException tableFull) {
490
491
492
493
494 overRenameLimit = true;
495 return breakScore + 1;
496 }
497 }
498
499 private void findContentRenames(ContentSource.Pair reader,
500 ProgressMonitor pm)
501 throws IOException {
502 int cnt = Math.max(added.size(), deleted.size());
503 if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
504 SimilarityRenameDetector d;
505
506 d = new SimilarityRenameDetector(reader, deleted, added);
507 d.setRenameScore(getRenameScore());
508 d.compute(pm);
509 overRenameLimit |= d.isTableOverflow();
510 deleted = d.getLeftOverSources();
511 added = d.getLeftOverDestinations();
512 entries.addAll(d.getMatches());
513 } else {
514 overRenameLimit = true;
515 }
516 }
517
518 @SuppressWarnings("unchecked")
519 private void findExactRenames(ProgressMonitor pm) {
520 pm.beginTask(JGitText.get().renamesFindingExact,
521 added.size() + added.size() + deleted.size()
522 + added.size() * deleted.size());
523
524 HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
525 HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
526
527 ArrayList<DiffEntry> uniqueAdds = new ArrayList<>(added.size());
528 ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<>();
529
530 for (Object o : addedMap.values()) {
531 if (o instanceof DiffEntry)
532 uniqueAdds.add((DiffEntry) o);
533 else
534 nonUniqueAdds.add((List<DiffEntry>) o);
535 }
536
537 ArrayList<DiffEntry> left = new ArrayList<>(added.size());
538
539 for (DiffEntry a : uniqueAdds) {
540 Object del = deletedMap.get(a.newId);
541 if (del instanceof DiffEntry) {
542
543
544 DiffEntry e = (DiffEntry) del;
545 if (sameType(e.oldMode, a.newMode)) {
546 e.changeType = ChangeType.RENAME;
547 entries.add(exactRename(e, a));
548 } else {
549 left.add(a);
550 }
551 } else if (del != null) {
552
553
554 List<DiffEntry> list = (List<DiffEntry>) del;
555 DiffEntry best = bestPathMatch(a, list);
556 if (best != null) {
557 best.changeType = ChangeType.RENAME;
558 entries.add(exactRename(best, a));
559 } else {
560 left.add(a);
561 }
562 } else {
563 left.add(a);
564 }
565 pm.update(1);
566 }
567
568 for (List<DiffEntry> adds : nonUniqueAdds) {
569 Object o = deletedMap.get(adds.get(0).newId);
570 if (o instanceof DiffEntry) {
571
572
573
574 DiffEntry d = (DiffEntry) o;
575 DiffEntry best = bestPathMatch(d, adds);
576 if (best != null) {
577 d.changeType = ChangeType.RENAME;
578 entries.add(exactRename(d, best));
579 for (DiffEntry a : adds) {
580 if (a != best) {
581 if (sameType(d.oldMode, a.newMode)) {
582 entries.add(exactCopy(d, a));
583 } else {
584 left.add(a);
585 }
586 }
587 }
588 } else {
589 left.addAll(adds);
590 }
591 } else if (o != null) {
592
593
594
595 List<DiffEntry> dels = (List<DiffEntry>) o;
596 long[] matrix = new long[dels.size() * adds.size()];
597 int mNext = 0;
598 for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
599 String deletedName = dels.get(delIdx).oldPath;
600
601 for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
602 String addedName = adds.get(addIdx).newPath;
603
604 int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
605 matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
606 mNext++;
607 }
608 }
609
610 Arrays.sort(matrix);
611
612 for (--mNext; mNext >= 0; mNext--) {
613 long ent = matrix[mNext];
614 int delIdx = SimilarityRenameDetector.srcFile(ent);
615 int addIdx = SimilarityRenameDetector.dstFile(ent);
616 DiffEntry d = dels.get(delIdx);
617 DiffEntry a = adds.get(addIdx);
618
619 if (a == null) {
620 pm.update(1);
621 continue;
622 }
623
624 ChangeType type;
625 if (d.changeType == ChangeType.DELETE) {
626
627
628
629
630 d.changeType = ChangeType.RENAME;
631 type = ChangeType.RENAME;
632 } else {
633 type = ChangeType.COPY;
634 }
635
636 entries.add(DiffEntry.pair(type, d, a, 100));
637 adds.set(addIdx, null);
638 pm.update(1);
639 }
640 } else {
641 left.addAll(adds);
642 }
643 }
644 added = left;
645
646 deleted = new ArrayList<>(deletedMap.size());
647 for (Object o : deletedMap.values()) {
648 if (o instanceof DiffEntry) {
649 DiffEntry e = (DiffEntry) o;
650 if (e.changeType == ChangeType.DELETE)
651 deleted.add(e);
652 } else {
653 List<DiffEntry> list = (List<DiffEntry>) o;
654 for (DiffEntry e : list) {
655 if (e.changeType == ChangeType.DELETE)
656 deleted.add(e);
657 }
658 }
659 }
660 pm.endTask();
661 }
662
663
664
665
666
667
668
669
670
671
672
673
674
675 private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
676 DiffEntry best = null;
677 int score = -1;
678
679 for (DiffEntry d : list) {
680 if (sameType(mode(d), mode(src))) {
681 int tmp = SimilarityRenameDetector
682 .nameScore(path(d), path(src));
683 if (tmp > score) {
684 best = d;
685 score = tmp;
686 }
687 }
688 }
689
690 return best;
691 }
692
693 @SuppressWarnings("unchecked")
694 private HashMap<AbbreviatedObjectId, Object> populateMap(
695 List<DiffEntry> diffEntries, ProgressMonitor pm) {
696 HashMap<AbbreviatedObjectId, Object> map = new HashMap<>();
697 for (DiffEntry de : diffEntries) {
698 Object old = map.put(id(de), de);
699 if (old instanceof DiffEntry) {
700 ArrayList<DiffEntry> list = new ArrayList<>(2);
701 list.add((DiffEntry) old);
702 list.add(de);
703 map.put(id(de), list);
704 } else if (old != null) {
705
706 ((List<DiffEntry>) old).add(de);
707 map.put(id(de), old);
708 }
709 pm.update(1);
710 }
711 return map;
712 }
713
714 private static String path(DiffEntry de) {
715 return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
716 }
717
718 private static FileMode mode(DiffEntry de) {
719 return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
720 }
721
722 private static AbbreviatedObjectId id(DiffEntry de) {
723 return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
724 }
725
726 static boolean sameType(FileMode a, FileMode b) {
727
728
729
730
731 int aType = a.getBits() & FileMode.TYPE_MASK;
732 int bType = b.getBits() & FileMode.TYPE_MASK;
733 return aType == bType;
734 }
735
736 private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
737 return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
738 }
739
740 private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
741 return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
742 }
743 }