1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
47 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
48
49 import java.io.IOException;
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.BitSet;
53 import java.util.List;
54
55 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
56 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
57 import org.eclipse.jgit.internal.JGitText;
58 import org.eclipse.jgit.lib.FileMode;
59 import org.eclipse.jgit.lib.NullProgressMonitor;
60 import org.eclipse.jgit.lib.ProgressMonitor;
61
62 class SimilarityRenameDetector {
63
64
65
66
67
68
69
70
71 private static final int BITS_PER_INDEX = 28;
72
73 private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
74
75 private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
76
77 private ContentSource.Pair reader;
78
79
80
81
82
83
84
85
86 private List<DiffEntry> srcs;
87
88
89
90
91
92
93
94
95 private List<DiffEntry> dsts;
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 private long[] matrix;
111
112
113 private int renameScore = 60;
114
115
116 private boolean tableOverflow;
117
118 private List<DiffEntry> out;
119
120 SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
121 List<DiffEntry> dsts) {
122 this.reader = reader;
123 this.srcs = srcs;
124 this.dsts = dsts;
125 }
126
127 void setRenameScore(int score) {
128 renameScore = score;
129 }
130
131 void compute(ProgressMonitor pm) throws IOException {
132 if (pm == null)
133 pm = NullProgressMonitor.INSTANCE;
134
135 pm.beginTask(JGitText.get().renamesFindingByContent,
136 2 * srcs.size() * dsts.size());
137
138 int mNext = buildMatrix(pm);
139 out = new ArrayList<>(Math.min(mNext, dsts.size()));
140
141
142
143
144 for (--mNext; mNext >= 0; mNext--) {
145 long ent = matrix[mNext];
146 int sIdx = srcFile(ent);
147 int dIdx = dstFile(ent);
148 DiffEntry s = srcs.get(sIdx);
149 DiffEntry d = dsts.get(dIdx);
150
151 if (d == null) {
152 pm.update(1);
153 continue;
154 }
155
156 ChangeType type;
157 if (s.changeType == ChangeType.DELETE) {
158
159
160
161
162 s.changeType = ChangeType.RENAME;
163 type = ChangeType.RENAME;
164 } else {
165 type = ChangeType.COPY;
166 }
167
168 out.add(DiffEntry.pair(type, s, d, score(ent)));
169 dsts.set(dIdx, null);
170 pm.update(1);
171 }
172
173 srcs = compactSrcList(srcs);
174 dsts = compactDstList(dsts);
175 pm.endTask();
176 }
177
178 List<DiffEntry> getMatches() {
179 return out;
180 }
181
182 List<DiffEntry> getLeftOverSources() {
183 return srcs;
184 }
185
186 List<DiffEntry> getLeftOverDestinations() {
187 return dsts;
188 }
189
190 boolean isTableOverflow() {
191 return tableOverflow;
192 }
193
194 private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
195 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
196 for (DiffEntry e : in) {
197 if (e.changeType == ChangeType.DELETE)
198 r.add(e);
199 }
200 return r;
201 }
202
203 private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
204 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
205 for (DiffEntry e : in) {
206 if (e != null)
207 r.add(e);
208 }
209 return r;
210 }
211
212 private int buildMatrix(ProgressMonitor pm) throws IOException {
213
214
215
216 matrix = new long[srcs.size() * dsts.size()];
217
218 long[] srcSizes = new long[srcs.size()];
219 long[] dstSizes = new long[dsts.size()];
220 BitSet dstTooLarge = null;
221
222
223
224
225
226 int mNext = 0;
227 SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
228 DiffEntry srcEnt = srcs.get(srcIdx);
229 if (!isFile(srcEnt.oldMode)) {
230 pm.update(dsts.size());
231 continue;
232 }
233
234 SimilarityIndex s = null;
235
236 for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
237 DiffEntry dstEnt = dsts.get(dstIdx);
238
239 if (!isFile(dstEnt.newMode)) {
240 pm.update(1);
241 continue;
242 }
243
244 if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
245 pm.update(1);
246 continue;
247 }
248
249 if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
250 pm.update(1);
251 continue;
252 }
253
254 long srcSize = srcSizes[srcIdx];
255 if (srcSize == 0) {
256 srcSize = size(OLD, srcEnt) + 1;
257 srcSizes[srcIdx] = srcSize;
258 }
259
260 long dstSize = dstSizes[dstIdx];
261 if (dstSize == 0) {
262 dstSize = size(NEW, dstEnt) + 1;
263 dstSizes[dstIdx] = dstSize;
264 }
265
266 long max = Math.max(srcSize, dstSize);
267 long min = Math.min(srcSize, dstSize);
268 if (min * 100 / max < renameScore) {
269
270 pm.update(1);
271 continue;
272 }
273
274 if (s == null) {
275 try {
276 s = hash(OLD, srcEnt);
277 } catch (TableFullException tableFull) {
278 tableOverflow = true;
279 continue SRC;
280 }
281 }
282
283 SimilarityIndex d;
284 try {
285 d = hash(NEW, dstEnt);
286 } catch (TableFullException tableFull) {
287 if (dstTooLarge == null)
288 dstTooLarge = new BitSet(dsts.size());
289 dstTooLarge.set(dstIdx);
290 tableOverflow = true;
291 pm.update(1);
292 continue;
293 }
294
295 int contentScore = s.score(d, 10000);
296
297
298
299
300 int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
301
302 int score = (contentScore * 99 + nameScore * 1) / 10000;
303
304 if (score < renameScore) {
305 pm.update(1);
306 continue;
307 }
308
309 matrix[mNext++] = encode(score, srcIdx, dstIdx);
310 pm.update(1);
311 }
312 }
313
314
315
316
317
318 Arrays.sort(matrix, 0, mNext);
319 return mNext;
320 }
321
322 static int nameScore(String a, String b) {
323 int aDirLen = a.lastIndexOf("/") + 1;
324 int bDirLen = b.lastIndexOf("/") + 1;
325
326 int dirMin = Math.min(aDirLen, bDirLen);
327 int dirMax = Math.max(aDirLen, bDirLen);
328
329 final int dirScoreLtr;
330 final int dirScoreRtl;
331
332 if (dirMax == 0) {
333 dirScoreLtr = 100;
334 dirScoreRtl = 100;
335 } else {
336 int dirSim = 0;
337 for (; dirSim < dirMin; dirSim++) {
338 if (a.charAt(dirSim) != b.charAt(dirSim))
339 break;
340 }
341 dirScoreLtr = (dirSim * 100) / dirMax;
342
343 if (dirScoreLtr == 100) {
344 dirScoreRtl = 100;
345 } else {
346 for (dirSim = 0; dirSim < dirMin; dirSim++) {
347 if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
348 - dirSim))
349 break;
350 }
351 dirScoreRtl = (dirSim * 100) / dirMax;
352 }
353 }
354
355 int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
356 int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
357
358 int fileSim = 0;
359 for (; fileSim < fileMin; fileSim++) {
360 if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
361 - fileSim))
362 break;
363 }
364 int fileScore = (fileSim * 100) / fileMax;
365
366 return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
367 }
368
369 private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
370 throws IOException, TableFullException {
371 SimilarityIndex r = new SimilarityIndex();
372 r.hash(reader.open(side, ent));
373 r.sort();
374 return r;
375 }
376
377 private long size(DiffEntry.Side side, DiffEntry ent) throws IOException {
378 return reader.size(side, ent);
379 }
380
381 private static int score(long value) {
382 return (int) (value >>> SCORE_SHIFT);
383 }
384
385 static int srcFile(long value) {
386 return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
387 }
388
389 static int dstFile(long value) {
390 return decodeFile(((int) value) & INDEX_MASK);
391 }
392
393 static long encode(int score, int srcIdx, int dstIdx) {
394 return (((long) score) << SCORE_SHIFT)
395 | (encodeFile(srcIdx) << BITS_PER_INDEX)
396 | encodeFile(dstIdx);
397 }
398
399 private static long encodeFile(int idx) {
400
401
402
403
404 return INDEX_MASK - idx;
405 }
406
407 private static int decodeFile(int v) {
408 return INDEX_MASK - v;
409 }
410
411 private static boolean isFile(FileMode mode) {
412 return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
413 }
414 }