1
2
3
4
5
6
7
8
9
10
11 package org.eclipse.jgit.diff;
12
13 import java.io.EOFException;
14 import java.io.IOException;
15 import java.io.InputStream;
16 import java.util.Arrays;
17
18 import org.eclipse.jgit.errors.MissingObjectException;
19 import org.eclipse.jgit.lib.ObjectLoader;
20 import org.eclipse.jgit.lib.ObjectStream;
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 public class SimilarityIndex {
38
39 public static final TableFullException
40 TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
41
42
43
44
45
46
47
48 private static final int KEY_SHIFT = 32;
49
50
51 private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1;
52
53
54
55
56
57 private long hashedCnt;
58
59
60 private int idSize;
61
62
63 private int idGrowAt;
64
65
66
67
68
69
70
71
72
73
74 private long[] idHash;
75
76
77 private int idHashBits;
78
79
80
81
82
83
84
85
86
87
88
89
90
91 public static SimilarityIndex create(ObjectLoader obj) throws IOException,
92 TableFullException {
93 SimilarityIndex idx = new SimilarityIndex();
94 idx.hash(obj);
95 idx.sort();
96 return idx;
97 }
98
99 SimilarityIndex() {
100 idHashBits = 8;
101 idHash = new long[1 << idHashBits];
102 idGrowAt = growAt(idHashBits);
103 }
104
105 void hash(ObjectLoader obj) throws MissingObjectException, IOException,
106 TableFullException {
107 if (obj.isLarge()) {
108 hashLargeObject(obj);
109 } else {
110 byte[] raw = obj.getCachedBytes();
111 hash(raw, 0, raw.length);
112 }
113 }
114
115 private void hashLargeObject(ObjectLoader obj) throws IOException,
116 TableFullException {
117 boolean text;
118 try (ObjectStream in1 = obj.openStream()) {
119 text = !RawText.isBinary(in1);
120 }
121
122 try (ObjectStream in2 = obj.openStream()) {
123 hash(in2, in2.getSize(), text);
124 }
125 }
126
127 void hash(byte[] raw, int ptr, int end) throws TableFullException {
128 final boolean text = !RawText.isBinary(raw);
129 hashedCnt = 0;
130 while (ptr < end) {
131 int hash = 5381;
132 int blockHashedCnt = 0;
133 int start = ptr;
134
135
136 do {
137 int c = raw[ptr++] & 0xff;
138
139 if (text && c == '\r' && ptr < end && raw[ptr] == '\n')
140 continue;
141 blockHashedCnt++;
142 if (c == '\n')
143 break;
144 hash = (hash << 5) + hash + c;
145 } while (ptr < end && ptr - start < 64);
146 hashedCnt += blockHashedCnt;
147 add(hash, blockHashedCnt);
148 }
149 }
150
151 void hash(InputStream in, long remaining, boolean text) throws IOException,
152 TableFullException {
153 byte[] buf = new byte[4096];
154 int ptr = 0;
155 int cnt = 0;
156
157 while (0 < remaining) {
158 int hash = 5381;
159 int blockHashedCnt = 0;
160
161
162 int n = 0;
163 do {
164 if (ptr == cnt) {
165 ptr = 0;
166 cnt = in.read(buf, 0, buf.length);
167 if (cnt <= 0)
168 throw new EOFException();
169 }
170
171 n++;
172 int c = buf[ptr++] & 0xff;
173
174 if (text && c == '\r' && ptr < cnt && buf[ptr] == '\n')
175 continue;
176 blockHashedCnt++;
177 if (c == '\n')
178 break;
179 hash = (hash << 5) + hash + c;
180 } while (n < 64 && n < remaining);
181 hashedCnt += blockHashedCnt;
182 add(hash, blockHashedCnt);
183 remaining -= n;
184 }
185 }
186
187
188
189
190
191
192 void sort() {
193
194
195
196
197 Arrays.sort(idHash);
198 }
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220 public int score(SimilarityIndex dst, int maxScore) {
221 long max = Math.max(hashedCnt, dst.hashedCnt);
222 if (max == 0)
223 return maxScore;
224 return (int) ((common(dst) * maxScore) / max);
225 }
226
227 long common(SimilarityIndex dst) {
228 return common(this, dst);
229 }
230
231 private static long common(SimilarityIndex./../../org/eclipse/jgit/diff/SimilarityIndex.html#SimilarityIndex">SimilarityIndex src, SimilarityIndex dst) {
232 int srcIdx = src.packedIndex(0);
233 int dstIdx = dst.packedIndex(0);
234 long[] srcHash = src.idHash;
235 long[] dstHash = dst.idHash;
236 return common(srcHash, srcIdx, dstHash, dstIdx);
237 }
238
239 private static long common(long[] srcHash, int srcIdx,
240 long[] dstHash, int dstIdx) {
241 if (srcIdx == srcHash.length || dstIdx == dstHash.length)
242 return 0;
243
244 long common = 0;
245 int srcKey = keyOf(srcHash[srcIdx]);
246 int dstKey = keyOf(dstHash[dstIdx]);
247
248 for (;;) {
249 if (srcKey == dstKey) {
250 common += Math.min(countOf(srcHash[srcIdx]),
251 countOf(dstHash[dstIdx]));
252
253 if (++srcIdx == srcHash.length)
254 break;
255 srcKey = keyOf(srcHash[srcIdx]);
256
257 if (++dstIdx == dstHash.length)
258 break;
259 dstKey = keyOf(dstHash[dstIdx]);
260
261 } else if (srcKey < dstKey) {
262
263 if (++srcIdx == srcHash.length)
264 break;
265 srcKey = keyOf(srcHash[srcIdx]);
266
267 } else {
268
269 if (++dstIdx == dstHash.length)
270 break;
271 dstKey = keyOf(dstHash[dstIdx]);
272 }
273 }
274
275 return common;
276 }
277
278
279 int size() {
280 return idSize;
281 }
282
283
284 int key(int idx) {
285 return keyOf(idHash[packedIndex(idx)]);
286 }
287
288
289 long count(int idx) {
290 return countOf(idHash[packedIndex(idx)]);
291 }
292
293
294 int findIndex(int key) {
295 for (int i = 0; i < idSize; i++)
296 if (key(i) == key)
297 return i;
298 return -1;
299 }
300
301 private int packedIndex(int idx) {
302 return (idHash.length - idSize) + idx;
303 }
304
305 void add(int key, int cnt) throws TableFullException {
306 key = (key * 0x9e370001) >>> 1;
307
308 int j = slot(key);
309 for (;;) {
310 long v = idHash[j];
311 if (v == 0) {
312
313 if (idGrowAt <= idSize) {
314 grow();
315 j = slot(key);
316 continue;
317 }
318 idHash[j] = pair(key, cnt);
319 idSize++;
320 return;
321
322 } else if (keyOf(v) == key) {
323
324
325
326 idHash[j] = pair(key, countOf(v) + cnt);
327 return;
328
329 } else if (++j >= idHash.length) {
330 j = 0;
331 }
332 }
333 }
334
335 private static long pair(int key, long cnt) throws TableFullException {
336 if (MAX_COUNT < cnt)
337 throw new TableFullException();
338 return (((long) key) << KEY_SHIFT) | cnt;
339 }
340
341 private int slot(int key) {
342
343
344
345
346 return key >>> (31 - idHashBits);
347 }
348
349 private static int growAt(int idHashBits) {
350 return (1 << idHashBits) * (idHashBits - 3) / idHashBits;
351 }
352
353 @SuppressWarnings("UnusedException")
354 private void grow() throws TableFullException {
355 if (idHashBits == 30)
356 throw new TableFullException();
357
358 long[] oldHash = idHash;
359 int oldSize = idHash.length;
360
361 idHashBits++;
362 idGrowAt = growAt(idHashBits);
363
364 try {
365 idHash = new long[1 << idHashBits];
366 } catch (OutOfMemoryError noMemory) {
367 throw TABLE_FULL_OUT_OF_MEMORY;
368 }
369
370 for (int i = 0; i < oldSize; i++) {
371 long v = oldHash[i];
372 if (v != 0) {
373 int j = slot(keyOf(v));
374 while (idHash[j] != 0)
375 if (++j >= idHash.length)
376 j = 0;
377 idHash[j] = v;
378 }
379 }
380 }
381
382 private static int keyOf(long v) {
383 return (int) (v >>> KEY_SHIFT);
384 }
385
386 private static long countOf(long v) {
387 return v & MAX_COUNT;
388 }
389
390
391 public static class TableFullException extends Exception {
392 private static final long serialVersionUID = 1L;
393 }
394 }