1
2
3
4
5
6
7
8
9
10
11 package org.eclipse.jgit.diff;
12
13 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
14 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
15 import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
16
17 import java.io.IOException;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.BitSet;
21 import java.util.List;
22
23 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
24 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
25 import org.eclipse.jgit.errors.CancelledException;
26 import org.eclipse.jgit.internal.JGitText;
27 import org.eclipse.jgit.lib.FileMode;
28 import org.eclipse.jgit.lib.NullProgressMonitor;
29 import org.eclipse.jgit.lib.ObjectLoader;
30 import org.eclipse.jgit.lib.ProgressMonitor;
31
32 class SimilarityRenameDetector {
33
34
35
36
37
38
39
40
41 private static final int BITS_PER_INDEX = 28;
42
43 private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
44
45 private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
46
47 private ContentSource.Pair reader;
48
49
50
51
52
53
54
55
56 private List<DiffEntry> srcs;
57
58
59
60
61
62
63
64
65 private List<DiffEntry> dsts;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 private long[] matrix;
81
82
83 private int renameScore = 60;
84
85
86
87
88
89 private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
90
91
92 private boolean skipBinaryFiles = false;
93
94
95 private boolean tableOverflow;
96
97 private List<DiffEntry> out;
98
99 SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
100 List<DiffEntry> dsts) {
101 this.reader = reader;
102 this.srcs = srcs;
103 this.dsts = dsts;
104 }
105
106 void setRenameScore(int score) {
107 renameScore = score;
108 }
109
110 void setBigFileThreshold(int threshold) {
111 bigFileThreshold = threshold;
112 }
113
114 void setSkipBinaryFiles(boolean value) {
115 skipBinaryFiles = value;
116 }
117
118 void compute(ProgressMonitor pm) throws IOException, CancelledException {
119 if (pm == null)
120 pm = NullProgressMonitor.INSTANCE;
121
122 pm.beginTask(JGitText.get().renamesFindingByContent,
123 2 * srcs.size() * dsts.size());
124
125 int mNext = buildMatrix(pm);
126 out = new ArrayList<>(Math.min(mNext, dsts.size()));
127
128
129
130
131 for (--mNext; mNext >= 0; mNext--) {
132 if (pm.isCancelled()) {
133
134
135 throw new CancelledException(JGitText.get().renameCancelled);
136 }
137 long ent = matrix[mNext];
138 int sIdx = srcFile(ent);
139 int dIdx = dstFile(ent);
140 DiffEntry s = srcs.get(sIdx);
141 DiffEntry d = dsts.get(dIdx);
142
143 if (d == null) {
144 pm.update(1);
145 continue;
146 }
147
148 ChangeType type;
149 if (s.changeType == ChangeType.DELETE) {
150
151
152
153
154 s.changeType = ChangeType.RENAME;
155 type = ChangeType.RENAME;
156 } else {
157 type = ChangeType.COPY;
158 }
159
160 out.add(DiffEntry.pair(type, s, d, score(ent)));
161 dsts.set(dIdx, null);
162 pm.update(1);
163 }
164
165 srcs = compactSrcList(srcs);
166 dsts = compactDstList(dsts);
167 pm.endTask();
168 }
169
170 List<DiffEntry> getMatches() {
171 return out;
172 }
173
174 List<DiffEntry> getLeftOverSources() {
175 return srcs;
176 }
177
178 List<DiffEntry> getLeftOverDestinations() {
179 return dsts;
180 }
181
182 boolean isTableOverflow() {
183 return tableOverflow;
184 }
185
186 private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
187 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
188 for (DiffEntry e : in) {
189 if (e.changeType == ChangeType.DELETE)
190 r.add(e);
191 }
192 return r;
193 }
194
195 private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
196 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
197 for (DiffEntry e : in) {
198 if (e != null)
199 r.add(e);
200 }
201 return r;
202 }
203
204 private int buildMatrix(ProgressMonitor pm)
205 throws IOException, CancelledException {
206
207
208
209 matrix = new long[srcs.size() * dsts.size()];
210
211 long[] srcSizes = new long[srcs.size()];
212 long[] dstSizes = new long[dsts.size()];
213 BitSet dstTooLarge = null;
214
215
216
217
218
219 int mNext = 0;
220 SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
221 DiffEntry srcEnt = srcs.get(srcIdx);
222 if (!isFile(srcEnt.oldMode)) {
223 pm.update(dsts.size());
224 continue;
225 }
226
227 SimilarityIndex s = null;
228
229 for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
230 if (pm.isCancelled()) {
231
232
233
234 throw new CancelledException(
235 JGitText.get().renameCancelled);
236 }
237
238 DiffEntry dstEnt = dsts.get(dstIdx);
239
240 if (!isFile(dstEnt.newMode)) {
241 pm.update(1);
242 continue;
243 }
244
245 if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
246 pm.update(1);
247 continue;
248 }
249
250 if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
251 pm.update(1);
252 continue;
253 }
254
255 long srcSize = srcSizes[srcIdx];
256 if (srcSize == 0) {
257 srcSize = size(OLD, srcEnt) + 1;
258 srcSizes[srcIdx] = srcSize;
259 }
260
261 long dstSize = dstSizes[dstIdx];
262 if (dstSize == 0) {
263 dstSize = size(NEW, dstEnt) + 1;
264 dstSizes[dstIdx] = dstSize;
265 }
266
267 long max = Math.max(srcSize, dstSize);
268 long min = Math.min(srcSize, dstSize);
269 if (min * 100 / max < renameScore) {
270
271 pm.update(1);
272 continue;
273 }
274
275 if (max > bigFileThreshold) {
276 pm.update(1);
277 continue;
278 }
279
280 if (s == null) {
281 try {
282 ObjectLoader loader = reader.open(OLD, srcEnt);
283 if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
284 pm.update(1);
285 continue SRC;
286 }
287 s = hash(loader);
288 } catch (TableFullException tableFull) {
289 tableOverflow = true;
290 continue SRC;
291 }
292 }
293
294 SimilarityIndex d;
295 try {
296 ObjectLoader loader = reader.open(NEW, dstEnt);
297 if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
298 pm.update(1);
299 continue;
300 }
301 d = hash(loader);
302 } catch (TableFullException tableFull) {
303 if (dstTooLarge == null)
304 dstTooLarge = new BitSet(dsts.size());
305 dstTooLarge.set(dstIdx);
306 tableOverflow = true;
307 pm.update(1);
308 continue;
309 }
310
311 int contentScore = s.score(d, 10000);
312
313
314
315
316 int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
317
318 int score = (contentScore * 99 + nameScore * 1) / 10000;
319
320 if (score < renameScore) {
321 pm.update(1);
322 continue;
323 }
324
325 matrix[mNext++] = encode(score, srcIdx, dstIdx);
326 pm.update(1);
327 }
328 }
329
330
331
332
333
334 Arrays.sort(matrix, 0, mNext);
335 return mNext;
336 }
337
338 static int nameScore(String a, String b) {
339 int aDirLen = a.lastIndexOf('/') + 1;
340 int bDirLen = b.lastIndexOf('/') + 1;
341
342 int dirMin = Math.min(aDirLen, bDirLen);
343 int dirMax = Math.max(aDirLen, bDirLen);
344
345 final int dirScoreLtr;
346 final int dirScoreRtl;
347
348 if (dirMax == 0) {
349 dirScoreLtr = 100;
350 dirScoreRtl = 100;
351 } else {
352 int dirSim = 0;
353 for (; dirSim < dirMin; dirSim++) {
354 if (a.charAt(dirSim) != b.charAt(dirSim))
355 break;
356 }
357 dirScoreLtr = (dirSim * 100) / dirMax;
358
359 if (dirScoreLtr == 100) {
360 dirScoreRtl = 100;
361 } else {
362 for (dirSim = 0; dirSim < dirMin; dirSim++) {
363 if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
364 - dirSim))
365 break;
366 }
367 dirScoreRtl = (dirSim * 100) / dirMax;
368 }
369 }
370
371 int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
372 int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
373
374 int fileSim = 0;
375 for (; fileSim < fileMin; fileSim++) {
376 if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
377 - fileSim))
378 break;
379 }
380 int fileScore = (fileSim * 100) / fileMax;
381
382 return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
383 }
384
385 private SimilarityIndex hash(ObjectLoader objectLoader)
386 throws IOException, TableFullException {
387 SimilarityIndex r = new SimilarityIndex();
388 r.hash(objectLoader);
389 r.sort();
390 return r;
391 }
392
393 private long size(DiffEntry.Side side, DiffEntry ent) throws IOException {
394 return reader.size(side, ent);
395 }
396
397 private static int score(long value) {
398 return (int) (value >>> SCORE_SHIFT);
399 }
400
401 static int srcFile(long value) {
402 return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
403 }
404
405 static int dstFile(long value) {
406 return decodeFile(((int) value) & INDEX_MASK);
407 }
408
409 static long encode(int score, int srcIdx, int dstIdx) {
410 return (((long) score) << SCORE_SHIFT)
411 | (encodeFile(srcIdx) << BITS_PER_INDEX)
412 | encodeFile(dstIdx);
413 }
414
415 private static long encodeFile(int idx) {
416
417
418
419
420 return INDEX_MASK - idx;
421 }
422
423 private static int decodeFile(int v) {
424 return INDEX_MASK - v;
425 }
426
427 private static boolean isFile(FileMode mode) {
428 return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
429 }
430 }