1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
47 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
48
49 import java.io.IOException;
50 import java.util.ArrayList;
51 import java.util.Arrays;
52 import java.util.BitSet;
53 import java.util.List;
54
55 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
56 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
57 import org.eclipse.jgit.errors.CancelledException;
58 import org.eclipse.jgit.internal.JGitText;
59 import org.eclipse.jgit.lib.FileMode;
60 import org.eclipse.jgit.lib.NullProgressMonitor;
61 import org.eclipse.jgit.lib.ProgressMonitor;
62
63 class SimilarityRenameDetector {
64
65
66
67
68
69
70
71
72 private static final int BITS_PER_INDEX = 28;
73
74 private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
75
76 private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
77
78 private ContentSource.Pair reader;
79
80
81
82
83
84
85
86
87 private List<DiffEntry> srcs;
88
89
90
91
92
93
94
95
96 private List<DiffEntry> dsts;
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 private long[] matrix;
112
113
114 private int renameScore = 60;
115
116
117 private boolean tableOverflow;
118
119 private List<DiffEntry> out;
120
121 SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
122 List<DiffEntry> dsts) {
123 this.reader = reader;
124 this.srcs = srcs;
125 this.dsts = dsts;
126 }
127
128 void setRenameScore(int score) {
129 renameScore = score;
130 }
131
132 void compute(ProgressMonitor pm) throws IOException, CancelledException {
133 if (pm == null)
134 pm = NullProgressMonitor.INSTANCE;
135
136 pm.beginTask(JGitText.get().renamesFindingByContent,
137 2 * srcs.size() * dsts.size());
138
139 int mNext = buildMatrix(pm);
140 out = new ArrayList<>(Math.min(mNext, dsts.size()));
141
142
143
144
145 for (--mNext; mNext >= 0; mNext--) {
146 if (pm.isCancelled()) {
147
148
149 throw new CancelledException(JGitText.get().renameCancelled);
150 }
151 long ent = matrix[mNext];
152 int sIdx = srcFile(ent);
153 int dIdx = dstFile(ent);
154 DiffEntry s = srcs.get(sIdx);
155 DiffEntry d = dsts.get(dIdx);
156
157 if (d == null) {
158 pm.update(1);
159 continue;
160 }
161
162 ChangeType type;
163 if (s.changeType == ChangeType.DELETE) {
164
165
166
167
168 s.changeType = ChangeType.RENAME;
169 type = ChangeType.RENAME;
170 } else {
171 type = ChangeType.COPY;
172 }
173
174 out.add(DiffEntry.pair(type, s, d, score(ent)));
175 dsts.set(dIdx, null);
176 pm.update(1);
177 }
178
179 srcs = compactSrcList(srcs);
180 dsts = compactDstList(dsts);
181 pm.endTask();
182 }
183
184 List<DiffEntry> getMatches() {
185 return out;
186 }
187
188 List<DiffEntry> getLeftOverSources() {
189 return srcs;
190 }
191
192 List<DiffEntry> getLeftOverDestinations() {
193 return dsts;
194 }
195
196 boolean isTableOverflow() {
197 return tableOverflow;
198 }
199
200 private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
201 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
202 for (DiffEntry e : in) {
203 if (e.changeType == ChangeType.DELETE)
204 r.add(e);
205 }
206 return r;
207 }
208
209 private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
210 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
211 for (DiffEntry e : in) {
212 if (e != null)
213 r.add(e);
214 }
215 return r;
216 }
217
218 private int buildMatrix(ProgressMonitor pm)
219 throws IOException, CancelledException {
220
221
222
223 matrix = new long[srcs.size() * dsts.size()];
224
225 long[] srcSizes = new long[srcs.size()];
226 long[] dstSizes = new long[dsts.size()];
227 BitSet dstTooLarge = null;
228
229
230
231
232
233 int mNext = 0;
234 SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
235 DiffEntry srcEnt = srcs.get(srcIdx);
236 if (!isFile(srcEnt.oldMode)) {
237 pm.update(dsts.size());
238 continue;
239 }
240
241 SimilarityIndex s = null;
242
243 for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
244 if (pm.isCancelled()) {
245
246
247
248 throw new CancelledException(
249 JGitText.get().renameCancelled);
250 }
251
252 DiffEntry dstEnt = dsts.get(dstIdx);
253
254 if (!isFile(dstEnt.newMode)) {
255 pm.update(1);
256 continue;
257 }
258
259 if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
260 pm.update(1);
261 continue;
262 }
263
264 if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
265 pm.update(1);
266 continue;
267 }
268
269 long srcSize = srcSizes[srcIdx];
270 if (srcSize == 0) {
271 srcSize = size(OLD, srcEnt) + 1;
272 srcSizes[srcIdx] = srcSize;
273 }
274
275 long dstSize = dstSizes[dstIdx];
276 if (dstSize == 0) {
277 dstSize = size(NEW, dstEnt) + 1;
278 dstSizes[dstIdx] = dstSize;
279 }
280
281 long max = Math.max(srcSize, dstSize);
282 long min = Math.min(srcSize, dstSize);
283 if (min * 100 / max < renameScore) {
284
285 pm.update(1);
286 continue;
287 }
288
289 if (s == null) {
290 try {
291 s = hash(OLD, srcEnt);
292 } catch (TableFullException tableFull) {
293 tableOverflow = true;
294 continue SRC;
295 }
296 }
297
298 SimilarityIndex d;
299 try {
300 d = hash(NEW, dstEnt);
301 } catch (TableFullException tableFull) {
302 if (dstTooLarge == null)
303 dstTooLarge = new BitSet(dsts.size());
304 dstTooLarge.set(dstIdx);
305 tableOverflow = true;
306 pm.update(1);
307 continue;
308 }
309
310 int contentScore = s.score(d, 10000);
311
312
313
314
315 int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
316
317 int score = (contentScore * 99 + nameScore * 1) / 10000;
318
319 if (score < renameScore) {
320 pm.update(1);
321 continue;
322 }
323
324 matrix[mNext++] = encode(score, srcIdx, dstIdx);
325 pm.update(1);
326 }
327 }
328
329
330
331
332
333 Arrays.sort(matrix, 0, mNext);
334 return mNext;
335 }
336
337 static int nameScore(String a, String b) {
338 int aDirLen = a.lastIndexOf("/") + 1;
339 int bDirLen = b.lastIndexOf("/") + 1;
340
341 int dirMin = Math.min(aDirLen, bDirLen);
342 int dirMax = Math.max(aDirLen, bDirLen);
343
344 final int dirScoreLtr;
345 final int dirScoreRtl;
346
347 if (dirMax == 0) {
348 dirScoreLtr = 100;
349 dirScoreRtl = 100;
350 } else {
351 int dirSim = 0;
352 for (; dirSim < dirMin; dirSim++) {
353 if (a.charAt(dirSim) != b.charAt(dirSim))
354 break;
355 }
356 dirScoreLtr = (dirSim * 100) / dirMax;
357
358 if (dirScoreLtr == 100) {
359 dirScoreRtl = 100;
360 } else {
361 for (dirSim = 0; dirSim < dirMin; dirSim++) {
362 if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
363 - dirSim))
364 break;
365 }
366 dirScoreRtl = (dirSim * 100) / dirMax;
367 }
368 }
369
370 int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
371 int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
372
373 int fileSim = 0;
374 for (; fileSim < fileMin; fileSim++) {
375 if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
376 - fileSim))
377 break;
378 }
379 int fileScore = (fileSim * 100) / fileMax;
380
381 return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
382 }
383
384 private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
385 throws IOException, TableFullException {
386 SimilarityIndex r = new SimilarityIndex();
387 r.hash(reader.open(side, ent));
388 r.sort();
389 return r;
390 }
391
392 private long size(DiffEntry.Side side, DiffEntry ent) throws IOException {
393 return reader.size(side, ent);
394 }
395
396 private static int score(long value) {
397 return (int) (value >>> SCORE_SHIFT);
398 }
399
400 static int srcFile(long value) {
401 return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
402 }
403
404 static int dstFile(long value) {
405 return decodeFile(((int) value) & INDEX_MASK);
406 }
407
408 static long encode(int score, int srcIdx, int dstIdx) {
409 return (((long) score) << SCORE_SHIFT)
410 | (encodeFile(srcIdx) << BITS_PER_INDEX)
411 | encodeFile(dstIdx);
412 }
413
414 private static long encodeFile(int idx) {
415
416
417
418
419 return INDEX_MASK - idx;
420 }
421
422 private static int decodeFile(int v) {
423 return INDEX_MASK - v;
424 }
425
426 private static boolean isFile(FileMode mode) {
427 return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
428 }
429 }