1
2
3
4
5
6
7
8
9
10
11 package org.eclipse.jgit.diff;
12
13 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
14 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
15 import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
16
17 import java.io.IOException;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.BitSet;
21 import java.util.List;
22
23 import org.eclipse.jgit.api.errors.CanceledException;
24 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
25 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
26 import org.eclipse.jgit.internal.JGitText;
27 import org.eclipse.jgit.lib.FileMode;
28 import org.eclipse.jgit.lib.NullProgressMonitor;
29 import org.eclipse.jgit.lib.ObjectLoader;
30 import org.eclipse.jgit.lib.ProgressMonitor;
31
32 class SimilarityRenameDetector {
33
34
35
36
37
38
39
40
41 private static final int BITS_PER_INDEX = 28;
42
43 private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
44
45 private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
46
47 private ContentSource.Pair reader;
48
49
50
51
52
53
54
55
56 private List<DiffEntry> srcs;
57
58
59
60
61
62
63
64
65 private List<DiffEntry> dsts;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 private long[] matrix;
81
82
83 private int renameScore = 60;
84
85
86
87
88
89 private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
90
91
92 private boolean skipBinaryFiles = false;
93
94
95 private boolean tableOverflow;
96
97 private List<DiffEntry> out;
98
99 SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
100 List<DiffEntry> dsts) {
101 this.reader = reader;
102 this.srcs = srcs;
103 this.dsts = dsts;
104 }
105
106 void setRenameScore(int score) {
107 renameScore = score;
108 }
109
110 void setBigFileThreshold(int threshold) {
111 bigFileThreshold = threshold;
112 }
113
114 void setSkipBinaryFiles(boolean value) {
115 skipBinaryFiles = value;
116 }
117
118 void compute(ProgressMonitor pm) throws IOException, CanceledException {
119 if (pm == null)
120 pm = NullProgressMonitor.INSTANCE;
121
122 pm.beginTask(JGitText.get().renamesFindingByContent,
123 2 * srcs.size() * dsts.size());
124
125 int mNext = buildMatrix(pm);
126 out = new ArrayList<>(Math.min(mNext, dsts.size()));
127
128
129
130
131 for (--mNext; mNext >= 0; mNext--) {
132 if (pm.isCancelled()) {
133 throw new CanceledException(JGitText.get().renameCancelled);
134 }
135 long ent = matrix[mNext];
136 int sIdx = srcFile(ent);
137 int dIdx = dstFile(ent);
138 DiffEntry s = srcs.get(sIdx);
139 DiffEntry d = dsts.get(dIdx);
140
141 if (d == null) {
142 pm.update(1);
143 continue;
144 }
145
146 ChangeType type;
147 if (s.changeType == ChangeType.DELETE) {
148
149
150
151
152 s.changeType = ChangeType.RENAME;
153 type = ChangeType.RENAME;
154 } else {
155 type = ChangeType.COPY;
156 }
157
158 out.add(DiffEntry.pair(type, s, d, score(ent)));
159 dsts.set(dIdx, null);
160 pm.update(1);
161 }
162
163 srcs = compactSrcList(srcs);
164 dsts = compactDstList(dsts);
165 pm.endTask();
166 }
167
168 List<DiffEntry> getMatches() {
169 return out;
170 }
171
172 List<DiffEntry> getLeftOverSources() {
173 return srcs;
174 }
175
176 List<DiffEntry> getLeftOverDestinations() {
177 return dsts;
178 }
179
180 boolean isTableOverflow() {
181 return tableOverflow;
182 }
183
184 private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
185 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
186 for (DiffEntry e : in) {
187 if (e.changeType == ChangeType.DELETE)
188 r.add(e);
189 }
190 return r;
191 }
192
193 private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
194 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
195 for (DiffEntry e : in) {
196 if (e != null)
197 r.add(e);
198 }
199 return r;
200 }
201
202 private int buildMatrix(ProgressMonitor pm)
203 throws IOException, CanceledException {
204
205
206
207 matrix = new long[srcs.size() * dsts.size()];
208
209 long[] srcSizes = new long[srcs.size()];
210 long[] dstSizes = new long[dsts.size()];
211 BitSet dstTooLarge = null;
212
213
214
215
216
217 int mNext = 0;
218 SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
219 DiffEntry srcEnt = srcs.get(srcIdx);
220 if (!isFile(srcEnt.oldMode)) {
221 pm.update(dsts.size());
222 continue;
223 }
224
225 SimilarityIndex s = null;
226
227 for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
228 if (pm.isCancelled()) {
229 throw new CanceledException(
230 JGitText.get().renameCancelled);
231 }
232
233 DiffEntry dstEnt = dsts.get(dstIdx);
234
235 if (!isFile(dstEnt.newMode)) {
236 pm.update(1);
237 continue;
238 }
239
240 if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
241 pm.update(1);
242 continue;
243 }
244
245 if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
246 pm.update(1);
247 continue;
248 }
249
250 long srcSize = srcSizes[srcIdx];
251 if (srcSize == 0) {
252 srcSize = size(OLD, srcEnt) + 1;
253 srcSizes[srcIdx] = srcSize;
254 }
255
256 long dstSize = dstSizes[dstIdx];
257 if (dstSize == 0) {
258 dstSize = size(NEW, dstEnt) + 1;
259 dstSizes[dstIdx] = dstSize;
260 }
261
262 long max = Math.max(srcSize, dstSize);
263 long min = Math.min(srcSize, dstSize);
264 if (min * 100 / max < renameScore) {
265
266 pm.update(1);
267 continue;
268 }
269
270 if (max > bigFileThreshold) {
271 pm.update(1);
272 continue;
273 }
274
275 if (s == null) {
276 try {
277 ObjectLoader loader = reader.open(OLD, srcEnt);
278 if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
279 pm.update(1);
280 continue SRC;
281 }
282 s = hash(loader);
283 } catch (TableFullException tableFull) {
284 tableOverflow = true;
285 continue SRC;
286 }
287 }
288
289 SimilarityIndex d;
290 try {
291 ObjectLoader loader = reader.open(NEW, dstEnt);
292 if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
293 pm.update(1);
294 continue;
295 }
296 d = hash(loader);
297 } catch (TableFullException tableFull) {
298 if (dstTooLarge == null)
299 dstTooLarge = new BitSet(dsts.size());
300 dstTooLarge.set(dstIdx);
301 tableOverflow = true;
302 pm.update(1);
303 continue;
304 }
305
306 int contentScore = s.score(d, 10000);
307
308
309
310
311 int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
312
313 int score = (contentScore * 99 + nameScore * 1) / 10000;
314
315 if (score < renameScore) {
316 pm.update(1);
317 continue;
318 }
319
320 matrix[mNext++] = encode(score, srcIdx, dstIdx);
321 pm.update(1);
322 }
323 }
324
325
326
327
328
329 Arrays.sort(matrix, 0, mNext);
330 return mNext;
331 }
332
333 static int nameScore(String a, String b) {
334 int aDirLen = a.lastIndexOf('/') + 1;
335 int bDirLen = b.lastIndexOf('/') + 1;
336
337 int dirMin = Math.min(aDirLen, bDirLen);
338 int dirMax = Math.max(aDirLen, bDirLen);
339
340 final int dirScoreLtr;
341 final int dirScoreRtl;
342
343 if (dirMax == 0) {
344 dirScoreLtr = 100;
345 dirScoreRtl = 100;
346 } else {
347 int dirSim = 0;
348 for (; dirSim < dirMin; dirSim++) {
349 if (a.charAt(dirSim) != b.charAt(dirSim))
350 break;
351 }
352 dirScoreLtr = (dirSim * 100) / dirMax;
353
354 if (dirScoreLtr == 100) {
355 dirScoreRtl = 100;
356 } else {
357 for (dirSim = 0; dirSim < dirMin; dirSim++) {
358 if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
359 - dirSim))
360 break;
361 }
362 dirScoreRtl = (dirSim * 100) / dirMax;
363 }
364 }
365
366 int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
367 int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
368
369 int fileSim = 0;
370 for (; fileSim < fileMin; fileSim++) {
371 if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
372 - fileSim))
373 break;
374 }
375 int fileScore = (fileSim * 100) / fileMax;
376
377 return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
378 }
379
380 private SimilarityIndex hash(ObjectLoader objectLoader)
381 throws IOException, TableFullException {
382 SimilarityIndex r = new SimilarityIndex();
383 r.hash(objectLoader);
384 r.sort();
385 return r;
386 }
387
388 private long size(DiffEntry.Side side, DiffEntry ent) throws IOException {
389 return reader.size(side, ent);
390 }
391
392 private static int score(long value) {
393 return (int) (value >>> SCORE_SHIFT);
394 }
395
396 static int srcFile(long value) {
397 return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
398 }
399
400 static int dstFile(long value) {
401 return decodeFile(((int) value) & INDEX_MASK);
402 }
403
404 static long encode(int score, int srcIdx, int dstIdx) {
405 return (((long) score) << SCORE_SHIFT)
406 | (encodeFile(srcIdx) << BITS_PER_INDEX)
407 | encodeFile(dstIdx);
408 }
409
410 private static long encodeFile(int idx) {
411
412
413
414
415 return INDEX_MASK - idx;
416 }
417
418 private static int decodeFile(int v) {
419 return INDEX_MASK - v;
420 }
421
422 private static boolean isFile(FileMode mode) {
423 return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
424 }
425 }