1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  package org.eclipse.jgit.diff;
12  
13  import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
14  import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
15  import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
16  
17  import java.io.IOException;
18  import java.util.ArrayList;
19  import java.util.Arrays;
20  import java.util.BitSet;
21  import java.util.List;
22  
23  import org.eclipse.jgit.api.errors.CanceledException;
24  import org.eclipse.jgit.diff.DiffEntry.ChangeType;
25  import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
26  import org.eclipse.jgit.internal.JGitText;
27  import org.eclipse.jgit.lib.FileMode;
28  import org.eclipse.jgit.lib.NullProgressMonitor;
29  import org.eclipse.jgit.lib.ObjectLoader;
30  import org.eclipse.jgit.lib.ProgressMonitor;
31  
32  class SimilarityRenameDetector {
33  	
34  
35  
36  
37  
38  
39  
40  
41  	private static final int BITS_PER_INDEX = 28;
42  
43  	private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
44  
45  	private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
46  
47  	private ContentSource.Pair reader;
48  
49  	
50  
51  
52  
53  
54  
55  
56  	private List<DiffEntry> srcs;
57  
58  	
59  
60  
61  
62  
63  
64  
65  	private List<DiffEntry> dsts;
66  
67  	
68  
69  
70  
71  
72  
73  
74  
75  
76  
77  
78  
79  
80  	private long[] matrix;
81  
82  	
83  	private int renameScore = 60;
84  
85  	
86  
87  
88  
89  	private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
90  
91  	
92  	private boolean skipBinaryFiles = false;
93  
94  	
95  	private boolean tableOverflow;
96  
97  	private List<DiffEntry> out;
98  
99  	SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
100 			List<DiffEntry> dsts) {
101 		this.reader = reader;
102 		this.srcs = srcs;
103 		this.dsts = dsts;
104 	}
105 
106 	void setRenameScore(int score) {
107 		renameScore = score;
108 	}
109 
110 	void setBigFileThreshold(int threshold) {
111 		bigFileThreshold = threshold;
112 	}
113 
114 	void setSkipBinaryFiles(boolean value) {
115 		skipBinaryFiles = value;
116 	}
117 
118 	void compute(ProgressMonitor pm) throws IOException, CanceledException {
119 		if (pm == null)
120 			pm = NullProgressMonitor.INSTANCE;
121 
122 		pm.beginTask(JGitText.get().renamesFindingByContent, 
123 				2 * srcs.size() * dsts.size());
124 
125 		int mNext = buildMatrix(pm);
126 		out = new ArrayList<>(Math.min(mNext, dsts.size()));
127 
128 		
129 		
130 		
131 		for (--mNext; mNext >= 0; mNext--) {
132 			if (pm.isCancelled()) {
133 				throw new CanceledException(JGitText.get().renameCancelled);
134 			}
135 			long ent = matrix[mNext];
136 			int sIdx = srcFile(ent);
137 			int dIdx = dstFile(ent);
138 			DiffEntry s = srcs.get(sIdx);
139 			DiffEntry d = dsts.get(dIdx);
140 
141 			if (d == null) {
142 				pm.update(1);
143 				continue; 
144 			}
145 
146 			ChangeType type;
147 			if (s.changeType == ChangeType.DELETE) {
148 				
149 				
150 				
151 				
152 				s.changeType = ChangeType.RENAME;
153 				type = ChangeType.RENAME;
154 			} else {
155 				type = ChangeType.COPY;
156 			}
157 
158 			out.add(DiffEntry.pair(type, s, d, score(ent)));
159 			dsts.set(dIdx, null); 
160 			pm.update(1);
161 		}
162 
163 		srcs = compactSrcList(srcs);
164 		dsts = compactDstList(dsts);
165 		pm.endTask();
166 	}
167 
168 	List<DiffEntry> getMatches() {
169 		return out;
170 	}
171 
172 	List<DiffEntry> getLeftOverSources() {
173 		return srcs;
174 	}
175 
176 	List<DiffEntry> getLeftOverDestinations() {
177 		return dsts;
178 	}
179 
180 	boolean isTableOverflow() {
181 		return tableOverflow;
182 	}
183 
184 	private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
185 		ArrayList<DiffEntry> r = new ArrayList<>(in.size());
186 		for (DiffEntry e : in) {
187 			if (e.changeType == ChangeType.DELETE)
188 				r.add(e);
189 		}
190 		return r;
191 	}
192 
193 	private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
194 		ArrayList<DiffEntry> r = new ArrayList<>(in.size());
195 		for (DiffEntry e : in) {
196 			if (e != null)
197 				r.add(e);
198 		}
199 		return r;
200 	}
201 
202 	private int buildMatrix(ProgressMonitor pm)
203 			throws IOException, CanceledException {
204 		
205 		
206 		
207 		matrix = new long[srcs.size() * dsts.size()];
208 
209 		long[] srcSizes = new long[srcs.size()];
210 		long[] dstSizes = new long[dsts.size()];
211 		BitSet dstTooLarge = null;
212 
213 		
214 		
215 		
216 		
217 		int mNext = 0;
218 		SRC: for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
219 			DiffEntry srcEnt = srcs.get(srcIdx);
220 			if (!isFile(srcEnt.oldMode)) {
221 				pm.update(dsts.size());
222 				continue;
223 			}
224 
225 			SimilarityIndex s = null;
226 
227 			for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
228 				if (pm.isCancelled()) {
229 					throw new CanceledException(
230 							JGitText.get().renameCancelled);
231 				}
232 
233 				DiffEntry dstEnt = dsts.get(dstIdx);
234 
235 				if (!isFile(dstEnt.newMode)) {
236 					pm.update(1);
237 					continue;
238 				}
239 
240 				if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
241 					pm.update(1);
242 					continue;
243 				}
244 
245 				if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
246 					pm.update(1);
247 					continue;
248 				}
249 
250 				long srcSize = srcSizes[srcIdx];
251 				if (srcSize == 0) {
252 					srcSize = size(OLD, srcEnt) + 1;
253 					srcSizes[srcIdx] = srcSize;
254 				}
255 
256 				long dstSize = dstSizes[dstIdx];
257 				if (dstSize == 0) {
258 					dstSize = size(NEW, dstEnt) + 1;
259 					dstSizes[dstIdx] = dstSize;
260 				}
261 
262 				long max = Math.max(srcSize, dstSize);
263 				long min = Math.min(srcSize, dstSize);
264 				if (min * 100 / max < renameScore) {
265 					
266 					pm.update(1);
267 					continue;
268 				}
269 
270 				if (max > bigFileThreshold) {
271 					pm.update(1);
272 					continue;
273 				}
274 
275 				if (s == null) {
276 					try {
277 						ObjectLoader loader = reader.open(OLD, srcEnt);
278 						if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
279 							pm.update(1);
280 							continue SRC;
281 						}
282 						s = hash(loader);
283 					} catch (TableFullException tableFull) {
284 						tableOverflow = true;
285 						continue SRC;
286 					}
287 				}
288 
289 				SimilarityIndex d;
290 				try {
291 					ObjectLoader loader = reader.open(NEW, dstEnt);
292 					if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
293 						pm.update(1);
294 						continue;
295 					}
296 					d = hash(loader);
297 				} catch (TableFullException tableFull) {
298 					if (dstTooLarge == null)
299 						dstTooLarge = new BitSet(dsts.size());
300 					dstTooLarge.set(dstIdx);
301 					tableOverflow = true;
302 					pm.update(1);
303 					continue;
304 				}
305 
306 				int contentScore = s.score(d, 10000);
307 
308 				
309 				
310 				
311 				int nameScore = nameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
312 
313 				int score = (contentScore * 99 + nameScore * 1) / 10000;
314 
315 				if (score < renameScore) {
316 					pm.update(1);
317 					continue;
318 				}
319 
320 				matrix[mNext++] = encode(score, srcIdx, dstIdx);
321 				pm.update(1);
322 			}
323 		}
324 
325 		
326 		
327 		
328 		
329 		Arrays.sort(matrix, 0, mNext);
330 		return mNext;
331 	}
332 
333 	static int nameScore(String a, String b) {
334 		int aDirLen = a.lastIndexOf('/') + 1;
335 		int bDirLen = b.lastIndexOf('/') + 1;
336 
337 		int dirMin = Math.min(aDirLen, bDirLen);
338 		int dirMax = Math.max(aDirLen, bDirLen);
339 
340 		final int dirScoreLtr;
341 		final int dirScoreRtl;
342 
343 		if (dirMax == 0) {
344 			dirScoreLtr = 100;
345 			dirScoreRtl = 100;
346 		} else {
347 			int dirSim = 0;
348 			for (; dirSim < dirMin; dirSim++) {
349 				if (a.charAt(dirSim) != b.charAt(dirSim))
350 					break;
351 			}
352 			dirScoreLtr = (dirSim * 100) / dirMax;
353 
354 			if (dirScoreLtr == 100) {
355 				dirScoreRtl = 100;
356 			} else {
357 				for (dirSim = 0; dirSim < dirMin; dirSim++) {
358 					if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
359 							- dirSim))
360 						break;
361 				}
362 				dirScoreRtl = (dirSim * 100) / dirMax;
363 			}
364 		}
365 
366 		int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
367 		int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
368 
369 		int fileSim = 0;
370 		for (; fileSim < fileMin; fileSim++) {
371 			if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
372 					- fileSim))
373 				break;
374 		}
375 		int fileScore = (fileSim * 100) / fileMax;
376 
377 		return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
378 	}
379 
380 	private SimilarityIndex hash(ObjectLoader objectLoader)
381 			throws IOException, TableFullException {
382 		SimilarityIndex r = new SimilarityIndex();
383 		r.hash(objectLoader);
384 		r.sort();
385 		return r;
386 	}
387 
388 	private long size(DiffEntry.Side side, DiffEntry ent) throws IOException {
389 		return reader.size(side, ent);
390 	}
391 
392 	private static int score(long value) {
393 		return (int) (value >>> SCORE_SHIFT);
394 	}
395 
396 	static int srcFile(long value) {
397 		return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
398 	}
399 
400 	static int dstFile(long value) {
401 		return decodeFile(((int) value) & INDEX_MASK);
402 	}
403 
404 	static long encode(int score, int srcIdx, int dstIdx) {
405 		return (((long) score) << SCORE_SHIFT) 
406 				| (encodeFile(srcIdx) << BITS_PER_INDEX) 
407 				| encodeFile(dstIdx);
408 	}
409 
410 	private static long encodeFile(int idx) {
411 		
412 		
413 		
414 		
415 		return INDEX_MASK - idx;
416 	}
417 
418 	private static int decodeFile(int v) {
419 		return INDEX_MASK - v;
420 	}
421 
422 	private static boolean isFile(FileMode mode) {
423 		return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
424 	}
425 }