1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
47 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
48
49 import java.io.IOException;
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.Collection;
53 import java.util.Collections;
54 import java.util.Comparator;
55 import java.util.HashMap;
56 import java.util.List;
57
58 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
59 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
60 import org.eclipse.jgit.internal.JGitText;
61 import org.eclipse.jgit.lib.AbbreviatedObjectId;
62 import org.eclipse.jgit.lib.FileMode;
63 import org.eclipse.jgit.lib.NullProgressMonitor;
64 import org.eclipse.jgit.lib.ObjectReader;
65 import org.eclipse.jgit.lib.ProgressMonitor;
66 import org.eclipse.jgit.lib.Repository;
67
68
69 public class RenameDetector {
70 private static final int EXACT_RENAME_SCORE = 100;
71
72 private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
73 @Override
74 public int compare(DiffEntry a, DiffEntry b) {
75 int cmp = nameOf(a).compareTo(nameOf(b));
76 if (cmp == 0)
77 cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
78 return cmp;
79 }
80
81 private String nameOf(DiffEntry ent) {
82
83
84
85
86 if (ent.changeType == ChangeType.DELETE)
87 return ent.oldPath;
88 return ent.newPath;
89 }
90
91 private int sortOf(ChangeType changeType) {
92
93
94
95
96 switch (changeType) {
97 case DELETE:
98 return 1;
99 case ADD:
100 return 2;
101 default:
102 return 10;
103 }
104 }
105 };
106
107 private List<DiffEntry> entries;
108
109 private List<DiffEntry> deleted;
110
111 private List<DiffEntry> added;
112
113 private boolean done;
114
115 private final ObjectReader objectReader;
116
117
118 private int renameScore = 60;
119
120
121
122
123
124
125 private int breakScore = -1;
126
127
128 private int renameLimit;
129
130
131 private boolean overRenameLimit;
132
133
134
135
136
137
138
139 public RenameDetector(Repository repo) {
140 this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
141 }
142
143
144
145
146
147
148
149
150
151
152 public RenameDetector(ObjectReader reader, DiffConfig cfg) {
153 objectReader = reader.newReader();
154 renameLimit = cfg.getRenameLimit();
155 reset();
156 }
157
158
159
160
161
162 public int getRenameScore() {
163 return renameScore;
164 }
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 public void setRenameScore(int score) {
180 if (score < 0 || score > 100)
181 throw new IllegalArgumentException(
182 JGitText.get().similarityScoreMustBeWithinBounds);
183 renameScore = score;
184 }
185
186
187
188
189
190
191
192
193 public int getBreakScore() {
194 return breakScore;
195 }
196
197
198
199
200
201
202
203
204
205 public void setBreakScore(int breakScore) {
206 this.breakScore = breakScore;
207 }
208
209
210 public int getRenameLimit() {
211 return renameLimit;
212 }
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227 public void setRenameLimit(int limit) {
228 renameLimit = limit;
229 }
230
231
232
233
234
235
236
237
238
239
240
241 public boolean isOverRenameLimit() {
242 if (done)
243 return overRenameLimit;
244 int cnt = Math.max(added.size(), deleted.size());
245 return getRenameLimit() != 0 && getRenameLimit() < cnt;
246 }
247
248
249
250
251
252
253
254
255
256 public void addAll(Collection<DiffEntry> entriesToAdd) {
257 if (done)
258 throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
259
260 for (DiffEntry entry : entriesToAdd) {
261 switch (entry.getChangeType()) {
262 case ADD:
263 added.add(entry);
264 break;
265
266 case DELETE:
267 deleted.add(entry);
268 break;
269
270 case MODIFY:
271 if (sameType(entry.getOldMode(), entry.getNewMode())) {
272 entries.add(entry);
273 } else {
274 List<DiffEntry> tmp = DiffEntry.breakModify(entry);
275 deleted.add(tmp.get(0));
276 added.add(tmp.get(1));
277 }
278 break;
279
280 case COPY:
281 case RENAME:
282 default:
283 entries.add(entry);
284 }
285 }
286 }
287
288
289
290
291
292
293
294
295
296 public void add(DiffEntry entry) {
297 addAll(Collections.singletonList(entry));
298 }
299
300
301
302
303
304
305
306
307
308
309
310 public List<DiffEntry> compute() throws IOException {
311 return compute(NullProgressMonitor.INSTANCE);
312 }
313
314
315
316
317
318
319
320
321
322
323
324 public List<DiffEntry> compute(ProgressMonitor pm) throws IOException {
325 if (!done) {
326 try {
327 return compute(objectReader, pm);
328 } finally {
329 objectReader.close();
330 }
331 }
332 return Collections.unmodifiableList(entries);
333 }
334
335
336
337
338
339
340
341
342
343
344
345
346
347 public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
348 throws IOException {
349 final ContentSource cs = ContentSource.create(reader);
350 return compute(new ContentSource.Pair(cs, cs), pm);
351 }
352
353
354
355
356
357
358
359
360
361
362
363
364
365 public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
366 throws IOException {
367 if (!done) {
368 done = true;
369
370 if (pm == null)
371 pm = NullProgressMonitor.INSTANCE;
372
373 if (0 < breakScore)
374 breakModifies(reader, pm);
375
376 if (!added.isEmpty() && !deleted.isEmpty())
377 findExactRenames(pm);
378
379 if (!added.isEmpty() && !deleted.isEmpty())
380 findContentRenames(reader, pm);
381
382 if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
383 rejoinModifies(pm);
384
385 entries.addAll(added);
386 added = null;
387
388 entries.addAll(deleted);
389 deleted = null;
390
391 Collections.sort(entries, DIFF_COMPARATOR);
392 }
393 return Collections.unmodifiableList(entries);
394 }
395
396
397 public void reset() {
398 entries = new ArrayList<>();
399 deleted = new ArrayList<>();
400 added = new ArrayList<>();
401 done = false;
402 }
403
404 private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
405 throws IOException {
406 ArrayList<DiffEntry> newEntries = new ArrayList<>(entries.size());
407
408 pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
409
410 for (int i = 0; i < entries.size(); i++) {
411 DiffEntry e = entries.get(i);
412 if (e.getChangeType() == ChangeType.MODIFY) {
413 int score = calculateModifyScore(reader, e);
414 if (score < breakScore) {
415 List<DiffEntry> tmp = DiffEntry.breakModify(e);
416 DiffEntry del = tmp.get(0);
417 del.score = score;
418 deleted.add(del);
419 added.add(tmp.get(1));
420 } else {
421 newEntries.add(e);
422 }
423 } else {
424 newEntries.add(e);
425 }
426 pm.update(1);
427 }
428
429 entries = newEntries;
430 }
431
432 private void rejoinModifies(ProgressMonitor pm) {
433 HashMap<String, DiffEntry> nameMap = new HashMap<>();
434 ArrayList<DiffEntry> newAdded = new ArrayList<>(added.size());
435
436 pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
437 + deleted.size());
438
439 for (DiffEntry src : deleted) {
440 nameMap.put(src.oldPath, src);
441 pm.update(1);
442 }
443
444 for (DiffEntry dst : added) {
445 DiffEntry src = nameMap.remove(dst.newPath);
446 if (src != null) {
447 if (sameType(src.oldMode, dst.newMode)) {
448 entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
449 src.score));
450 } else {
451 nameMap.put(src.oldPath, src);
452 newAdded.add(dst);
453 }
454 } else {
455 newAdded.add(dst);
456 }
457 pm.update(1);
458 }
459
460 added = newAdded;
461 deleted = new ArrayList<>(nameMap.values());
462 }
463
464 private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
465 throws IOException {
466 try {
467 SimilarityIndex src = new SimilarityIndex();
468 src.hash(reader.open(OLD, d));
469 src.sort();
470
471 SimilarityIndex dst = new SimilarityIndex();
472 dst.hash(reader.open(NEW, d));
473 dst.sort();
474 return src.score(dst, 100);
475 } catch (TableFullException tableFull) {
476
477
478
479
480 overRenameLimit = true;
481 return breakScore + 1;
482 }
483 }
484
485 private void findContentRenames(ContentSource.Pair reader,
486 ProgressMonitor pm)
487 throws IOException {
488 int cnt = Math.max(added.size(), deleted.size());
489 if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
490 SimilarityRenameDetector d;
491
492 d = new SimilarityRenameDetector(reader, deleted, added);
493 d.setRenameScore(getRenameScore());
494 d.compute(pm);
495 overRenameLimit |= d.isTableOverflow();
496 deleted = d.getLeftOverSources();
497 added = d.getLeftOverDestinations();
498 entries.addAll(d.getMatches());
499 } else {
500 overRenameLimit = true;
501 }
502 }
503
504 @SuppressWarnings("unchecked")
505 private void findExactRenames(ProgressMonitor pm) {
506 pm.beginTask(JGitText.get().renamesFindingExact,
507 added.size() + added.size() + deleted.size()
508 + added.size() * deleted.size());
509
510 HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
511 HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
512
513 ArrayList<DiffEntry> uniqueAdds = new ArrayList<>(added.size());
514 ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<>();
515
516 for (Object o : addedMap.values()) {
517 if (o instanceof DiffEntry)
518 uniqueAdds.add((DiffEntry) o);
519 else
520 nonUniqueAdds.add((List<DiffEntry>) o);
521 }
522
523 ArrayList<DiffEntry> left = new ArrayList<>(added.size());
524
525 for (DiffEntry a : uniqueAdds) {
526 Object del = deletedMap.get(a.newId);
527 if (del instanceof DiffEntry) {
528
529
530 DiffEntry e = (DiffEntry) del;
531 if (sameType(e.oldMode, a.newMode)) {
532 e.changeType = ChangeType.RENAME;
533 entries.add(exactRename(e, a));
534 } else {
535 left.add(a);
536 }
537 } else if (del != null) {
538
539
540 List<DiffEntry> list = (List<DiffEntry>) del;
541 DiffEntry best = bestPathMatch(a, list);
542 if (best != null) {
543 best.changeType = ChangeType.RENAME;
544 entries.add(exactRename(best, a));
545 } else {
546 left.add(a);
547 }
548 } else {
549 left.add(a);
550 }
551 pm.update(1);
552 }
553
554 for (List<DiffEntry> adds : nonUniqueAdds) {
555 Object o = deletedMap.get(adds.get(0).newId);
556 if (o instanceof DiffEntry) {
557
558
559
560 DiffEntry d = (DiffEntry) o;
561 DiffEntry best = bestPathMatch(d, adds);
562 if (best != null) {
563 d.changeType = ChangeType.RENAME;
564 entries.add(exactRename(d, best));
565 for (DiffEntry a : adds) {
566 if (a != best) {
567 if (sameType(d.oldMode, a.newMode)) {
568 entries.add(exactCopy(d, a));
569 } else {
570 left.add(a);
571 }
572 }
573 }
574 } else {
575 left.addAll(adds);
576 }
577 } else if (o != null) {
578
579
580
581 List<DiffEntry> dels = (List<DiffEntry>) o;
582 long[] matrix = new long[dels.size() * adds.size()];
583 int mNext = 0;
584 for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
585 String deletedName = dels.get(delIdx).oldPath;
586
587 for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
588 String addedName = adds.get(addIdx).newPath;
589
590 int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
591 matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
592 mNext++;
593 }
594 }
595
596 Arrays.sort(matrix);
597
598 for (--mNext; mNext >= 0; mNext--) {
599 long ent = matrix[mNext];
600 int delIdx = SimilarityRenameDetector.srcFile(ent);
601 int addIdx = SimilarityRenameDetector.dstFile(ent);
602 DiffEntry d = dels.get(delIdx);
603 DiffEntry a = adds.get(addIdx);
604
605 if (a == null) {
606 pm.update(1);
607 continue;
608 }
609
610 ChangeType type;
611 if (d.changeType == ChangeType.DELETE) {
612
613
614
615
616 d.changeType = ChangeType.RENAME;
617 type = ChangeType.RENAME;
618 } else {
619 type = ChangeType.COPY;
620 }
621
622 entries.add(DiffEntry.pair(type, d, a, 100));
623 adds.set(addIdx, null);
624 pm.update(1);
625 }
626 } else {
627 left.addAll(adds);
628 }
629 }
630 added = left;
631
632 deleted = new ArrayList<>(deletedMap.size());
633 for (Object o : deletedMap.values()) {
634 if (o instanceof DiffEntry) {
635 DiffEntry e = (DiffEntry) o;
636 if (e.changeType == ChangeType.DELETE)
637 deleted.add(e);
638 } else {
639 List<DiffEntry> list = (List<DiffEntry>) o;
640 for (DiffEntry e : list) {
641 if (e.changeType == ChangeType.DELETE)
642 deleted.add(e);
643 }
644 }
645 }
646 pm.endTask();
647 }
648
649
650
651
652
653
654
655
656
657
658
659
660
661 private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
662 DiffEntry best = null;
663 int score = -1;
664
665 for (DiffEntry d : list) {
666 if (sameType(mode(d), mode(src))) {
667 int tmp = SimilarityRenameDetector
668 .nameScore(path(d), path(src));
669 if (tmp > score) {
670 best = d;
671 score = tmp;
672 }
673 }
674 }
675
676 return best;
677 }
678
679 @SuppressWarnings("unchecked")
680 private HashMap<AbbreviatedObjectId, Object> populateMap(
681 List<DiffEntry> diffEntries, ProgressMonitor pm) {
682 HashMap<AbbreviatedObjectId, Object> map = new HashMap<>();
683 for (DiffEntry de : diffEntries) {
684 Object old = map.put(id(de), de);
685 if (old instanceof DiffEntry) {
686 ArrayList<DiffEntry> list = new ArrayList<>(2);
687 list.add((DiffEntry) old);
688 list.add(de);
689 map.put(id(de), list);
690 } else if (old != null) {
691
692 ((List<DiffEntry>) old).add(de);
693 map.put(id(de), old);
694 }
695 pm.update(1);
696 }
697 return map;
698 }
699
700 private static String path(DiffEntry de) {
701 return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
702 }
703
704 private static FileMode mode(DiffEntry de) {
705 return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
706 }
707
708 private static AbbreviatedObjectId id(DiffEntry de) {
709 return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
710 }
711
712 static boolean sameType(FileMode a, FileMode b) {
713
714
715
716
717 int aType = a.getBits() & FileMode.TYPE_MASK;
718 int bType = b.getBits() & FileMode.TYPE_MASK;
719 return aType == bType;
720 }
721
722 private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
723 return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
724 }
725
726 private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
727 return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
728 }
729 }