1
2
3
4
5
6
7
8
9
10
11 package org.eclipse.jgit.diff;
12
13 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
14 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
15
16 import java.io.IOException;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.BitSet;
20 import java.util.List;
21
22 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
23 import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
24 import org.eclipse.jgit.errors.CancelledException;
25 import org.eclipse.jgit.internal.JGitText;
26 import org.eclipse.jgit.lib.FileMode;
27 import org.eclipse.jgit.lib.NullProgressMonitor;
28 import org.eclipse.jgit.lib.ProgressMonitor;
29
30 class SimilarityRenameDetector {
31
32
33
34
35
36
37
38
39 private static final int BITS_PER_INDEX = 28;
40
41 private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
42
43 private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
44
45 private ContentSource.Pair reader;
46
47
48
49
50
51
52
53
54 private List<DiffEntry> srcs;
55
56
57
58
59
60
61
62
63 private List<DiffEntry> dsts;
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 private long[] matrix;
79
80
81 private int renameScore = 60;
82
83
84 private boolean tableOverflow;
85
86 private List<DiffEntry> out;
87
88 SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
89 List<DiffEntry> dsts) {
90 this.reader = reader;
91 this.srcs = srcs;
92 this.dsts = dsts;
93 }
94
95 void setRenameScore(int score) {
96 renameScore = score;
97 }
98
99 void compute(ProgressMonitor pm) throws IOException, CancelledException {
100 if (pm == null)
101 pm = NullProgressMonitor.INSTANCE;
102
103 pm.beginTask(JGitText.get().renamesFindingByContent,
104 2 * srcs.size() * dsts.size());
105
106 int mNext = buildMatrix(pm);
107 out = new ArrayList<>(Math.min(mNext, dsts.size()));
108
109
110
111
112 for (--mNext; mNext >= 0; mNext--) {
113 if (pm.isCancelled()) {
114
115
116 throw new CancelledException(JGitText.get().renameCancelled);
117 }
118 long ent = matrix[mNext];
119 int sIdx = srcFile(ent);
120 int dIdx = dstFile(ent);
121 DiffEntry s = srcs.get(sIdx);
122 DiffEntry d = dsts.get(dIdx);
123
124 if (d == null) {
125 pm.update(1);
126 continue;
127 }
128
129 ChangeType type;
130 if (s.changeType == ChangeType.DELETE) {
131
132
133
134
135 s.changeType = ChangeType.RENAME;
136 type = ChangeType.RENAME;
137 } else {
138 type = ChangeType.COPY;
139 }
140
141 out.add(DiffEntry.pair(type, s, d, score(ent)));
142 dsts.set(dIdx, null);
143 pm.update(1);
144 }
145
146 srcs = compactSrcList(srcs);
147 dsts = compactDstList(dsts);
148 pm.endTask();
149 }
150
151 List<DiffEntry> getMatches() {
152 return out;
153 }
154
155 List<DiffEntry> getLeftOverSources() {
156 return srcs;
157 }
158
159 List<DiffEntry> getLeftOverDestinations() {
160 return dsts;
161 }
162
163 boolean isTableOverflow() {
164 return tableOverflow;
165 }
166
167 private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
168 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
169 for (DiffEntry e : in) {
170 if (e.changeType == ChangeType.DELETE)
171 r.add(e);
172 }
173 return r;
174 }
175
176 private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
177 ArrayList<DiffEntry> r = new ArrayList<>(in.size());
178 for (DiffEntry e : in) {
179 if (e != null)
180 r.add(e);
181 }
182 return r;
183 }
184
185 private int buildMatrix(ProgressMonitor pm)
186 throws IOException, CancelledException {
187
188
189
190 matrix = new long[srcs.size() * dsts.size()];
191
192 long[] srcSizes = new long[srcs.size()];
193 long[] dstSizes = new long[dsts.size()];
194 BitSet dstTooLarge = null;
195
196
197
198
199
200 int mNext = 0;
201 SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
202 DiffEntry srcEnt = srcs.get(srcIdx);
203 if (!isFile(srcEnt.oldMode)) {
204 pm.update(dsts.size());
205 continue;
206 }
207
208 SimilarityIndex s = null;
209
210 for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
211 if (pm.isCancelled()) {
212
213
214
215 throw new CancelledException(
216 JGitText.get().renameCancelled);
217 }
218
219 DiffEntry dstEnt = dsts.get(dstIdx);
220
221 if (!isFile(dstEnt.newMode)) {
222 pm.update(1);
223 continue;
224 }
225
226 if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
227 pm.update(1);
228 continue;
229 }
230
231 if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
232 pm.update(1);
233 continue;
234 }
235
236 long srcSize = srcSizes[srcIdx];
237 if (srcSize == 0) {
238 srcSize = size(OLD, srcEnt) + 1;
239 srcSizes[srcIdx] = srcSize;
240 }
241
242 long dstSize = dstSizes[dstIdx];
243 if (dstSize == 0) {
244 dstSize = size(NEW, dstEnt) + 1;
245 dstSizes[dstIdx] = dstSize;
246 }
247
248 long max = Math.max(srcSize, dstSize);
249 long min = Math.min(srcSize, dstSize);
250 if (min * 100 / max < renameScore) {
251
252 pm.update(1);
253 continue;
254 }
255
256 if (s == null) {
257 try {
258 s = hash(OLD, srcEnt);
259 } catch (TableFullException tableFull) {
260 tableOverflow = true;
261 continue SRC;
262 }
263 }
264
265 SimilarityIndex d;
266 try {
267 d = hash(NEW, dstEnt);
268 } catch (TableFullException tableFull) {
269 if (dstTooLarge == null)
270 dstTooLarge = new BitSet(dsts.size());
271 dstTooLarge.set(dstIdx);
272 tableOverflow = true;
273 pm.update(1);
274 continue;
275 }
276
277 int contentScore = s.score(d, 10000);
278
279
280
281
282 int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
283
284 int score = (contentScore * 99 + nameScore * 1) / 10000;
285
286 if (score < renameScore) {
287 pm.update(1);
288 continue;
289 }
290
291 matrix[mNext++] = encode(score, srcIdx, dstIdx);
292 pm.update(1);
293 }
294 }
295
296
297
298
299
300 Arrays.sort(matrix, 0, mNext);
301 return mNext;
302 }
303
304 static int nameScore(String a, String b) {
305 int aDirLen = a.lastIndexOf('/') + 1;
306 int bDirLen = b.lastIndexOf('/') + 1;
307
308 int dirMin = Math.min(aDirLen, bDirLen);
309 int dirMax = Math.max(aDirLen, bDirLen);
310
311 final int dirScoreLtr;
312 final int dirScoreRtl;
313
314 if (dirMax == 0) {
315 dirScoreLtr = 100;
316 dirScoreRtl = 100;
317 } else {
318 int dirSim = 0;
319 for (; dirSim < dirMin; dirSim++) {
320 if (a.charAt(dirSim) != b.charAt(dirSim))
321 break;
322 }
323 dirScoreLtr = (dirSim * 100) / dirMax;
324
325 if (dirScoreLtr == 100) {
326 dirScoreRtl = 100;
327 } else {
328 for (dirSim = 0; dirSim < dirMin; dirSim++) {
329 if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
330 - dirSim))
331 break;
332 }
333 dirScoreRtl = (dirSim * 100) / dirMax;
334 }
335 }
336
337 int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
338 int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
339
340 int fileSim = 0;
341 for (; fileSim < fileMin; fileSim++) {
342 if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
343 - fileSim))
344 break;
345 }
346 int fileScore = (fileSim * 100) / fileMax;
347
348 return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
349 }
350
351 private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
352 throws IOException, TableFullException {
353 SimilarityIndex r = new SimilarityIndex();
354 r.hash(reader.open(side, ent));
355 r.sort();
356 return r;
357 }
358
359 private long size(DiffEntry.Side side, DiffEntry ent) throws IOException {
360 return reader.size(side, ent);
361 }
362
363 private static int score(long value) {
364 return (int) (value >>> SCORE_SHIFT);
365 }
366
367 static int srcFile(long value) {
368 return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
369 }
370
371 static int dstFile(long value) {
372 return decodeFile(((int) value) & INDEX_MASK);
373 }
374
375 static long encode(int score, int srcIdx, int dstIdx) {
376 return (((long) score) << SCORE_SHIFT)
377 | (encodeFile(srcIdx) << BITS_PER_INDEX)
378 | encodeFile(dstIdx);
379 }
380
381 private static long encodeFile(int idx) {
382
383
384
385
386 return INDEX_MASK - idx;
387 }
388
389 private static int decodeFile(int v) {
390 return INDEX_MASK - v;
391 }
392
393 private static boolean isFile(FileMode mode) {
394 return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
395 }
396 }