1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.diff;
45
46 import java.io.EOFException;
47 import java.io.IOException;
48 import java.io.InputStream;
49 import java.util.Arrays;
50
51 import org.eclipse.jgit.errors.MissingObjectException;
52 import org.eclipse.jgit.lib.ObjectLoader;
53 import org.eclipse.jgit.lib.ObjectStream;
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70 public class SimilarityIndex {
71
72 public static final TableFullException
73 TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
74
75
76
77
78
79
80
81 private static final int KEY_SHIFT = 32;
82
83
84 private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1;
85
86
87
88
89
90 private long hashedCnt;
91
92
93 private int idSize;
94
95
96 private int idGrowAt;
97
98
99
100
101
102
103
104
105
106
107 private long[] idHash;
108
109
110 private int idHashBits;
111
112
113
114
115
116
117
118
119
120
121
122
123
124 public static SimilarityIndex create(ObjectLoader obj) throws IOException,
125 TableFullException {
126 SimilarityIndex idx = new SimilarityIndex();
127 idx.hash(obj);
128 idx.sort();
129 return idx;
130 }
131
132 SimilarityIndex() {
133 idHashBits = 8;
134 idHash = new long[1 << idHashBits];
135 idGrowAt = growAt(idHashBits);
136 }
137
138 void hash(ObjectLoader obj) throws MissingObjectException, IOException,
139 TableFullException {
140 if (obj.isLarge()) {
141 hashLargeObject(obj);
142 } else {
143 byte[] raw = obj.getCachedBytes();
144 hash(raw, 0, raw.length);
145 }
146 }
147
148 private void hashLargeObject(ObjectLoader obj) throws IOException,
149 TableFullException {
150 boolean text;
151 try (ObjectStream in1 = obj.openStream()) {
152 text = !RawText.isBinary(in1);
153 }
154
155 try (ObjectStream in2 = obj.openStream()) {
156 hash(in2, in2.getSize(), text);
157 }
158 }
159
160 void hash(byte[] raw, int ptr, int end) throws TableFullException {
161 final boolean text = !RawText.isBinary(raw);
162 hashedCnt = 0;
163 while (ptr < end) {
164 int hash = 5381;
165 int blockHashedCnt = 0;
166 int start = ptr;
167
168
169 do {
170 int c = raw[ptr++] & 0xff;
171
172 if (text && c == '\r' && ptr < end && raw[ptr] == '\n')
173 continue;
174 blockHashedCnt++;
175 if (c == '\n')
176 break;
177 hash = (hash << 5) + hash + c;
178 } while (ptr < end && ptr - start < 64);
179 hashedCnt += blockHashedCnt;
180 add(hash, blockHashedCnt);
181 }
182 }
183
184 void hash(InputStream in, long remaining, boolean text) throws IOException,
185 TableFullException {
186 byte[] buf = new byte[4096];
187 int ptr = 0;
188 int cnt = 0;
189
190 while (0 < remaining) {
191 int hash = 5381;
192 int blockHashedCnt = 0;
193
194
195 int n = 0;
196 do {
197 if (ptr == cnt) {
198 ptr = 0;
199 cnt = in.read(buf, 0, buf.length);
200 if (cnt <= 0)
201 throw new EOFException();
202 }
203
204 n++;
205 int c = buf[ptr++] & 0xff;
206
207 if (text && c == '\r' && ptr < cnt && buf[ptr] == '\n')
208 continue;
209 blockHashedCnt++;
210 if (c == '\n')
211 break;
212 hash = (hash << 5) + hash + c;
213 } while (n < 64 && n < remaining);
214 hashedCnt += blockHashedCnt;
215 add(hash, blockHashedCnt);
216 remaining -= n;
217 }
218 }
219
220
221
222
223
224
225 void sort() {
226
227
228
229
230 Arrays.sort(idHash);
231 }
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253 public int score(SimilarityIndex dst, int maxScore) {
254 long max = Math.max(hashedCnt, dst.hashedCnt);
255 if (max == 0)
256 return maxScore;
257 return (int) ((common(dst) * maxScore) / max);
258 }
259
260 long common(SimilarityIndex dst) {
261 return common(this, dst);
262 }
263
264 private static long common(SimilarityIndex./../../org/eclipse/jgit/diff/SimilarityIndex.html#SimilarityIndex">SimilarityIndex src, SimilarityIndex dst) {
265 int srcIdx = src.packedIndex(0);
266 int dstIdx = dst.packedIndex(0);
267 long[] srcHash = src.idHash;
268 long[] dstHash = dst.idHash;
269 return common(srcHash, srcIdx, dstHash, dstIdx);
270 }
271
272 private static long common(long[] srcHash, int srcIdx,
273 long[] dstHash, int dstIdx) {
274 if (srcIdx == srcHash.length || dstIdx == dstHash.length)
275 return 0;
276
277 long common = 0;
278 int srcKey = keyOf(srcHash[srcIdx]);
279 int dstKey = keyOf(dstHash[dstIdx]);
280
281 for (;;) {
282 if (srcKey == dstKey) {
283 common += Math.min(countOf(srcHash[srcIdx]),
284 countOf(dstHash[dstIdx]));
285
286 if (++srcIdx == srcHash.length)
287 break;
288 srcKey = keyOf(srcHash[srcIdx]);
289
290 if (++dstIdx == dstHash.length)
291 break;
292 dstKey = keyOf(dstHash[dstIdx]);
293
294 } else if (srcKey < dstKey) {
295
296 if (++srcIdx == srcHash.length)
297 break;
298 srcKey = keyOf(srcHash[srcIdx]);
299
300 } else {
301
302 if (++dstIdx == dstHash.length)
303 break;
304 dstKey = keyOf(dstHash[dstIdx]);
305 }
306 }
307
308 return common;
309 }
310
311
312 int size() {
313 return idSize;
314 }
315
316
317 int key(int idx) {
318 return keyOf(idHash[packedIndex(idx)]);
319 }
320
321
322 long count(int idx) {
323 return countOf(idHash[packedIndex(idx)]);
324 }
325
326
327 int findIndex(int key) {
328 for (int i = 0; i < idSize; i++)
329 if (key(i) == key)
330 return i;
331 return -1;
332 }
333
334 private int packedIndex(int idx) {
335 return (idHash.length - idSize) + idx;
336 }
337
338 void add(int key, int cnt) throws TableFullException {
339 key = (key * 0x9e370001) >>> 1;
340
341 int j = slot(key);
342 for (;;) {
343 long v = idHash[j];
344 if (v == 0) {
345
346 if (idGrowAt <= idSize) {
347 grow();
348 j = slot(key);
349 continue;
350 }
351 idHash[j] = pair(key, cnt);
352 idSize++;
353 return;
354
355 } else if (keyOf(v) == key) {
356
357
358
359 idHash[j] = pair(key, countOf(v) + cnt);
360 return;
361
362 } else if (++j >= idHash.length) {
363 j = 0;
364 }
365 }
366 }
367
368 private static long pair(int key, long cnt) throws TableFullException {
369 if (MAX_COUNT < cnt)
370 throw new TableFullException();
371 return (((long) key) << KEY_SHIFT) | cnt;
372 }
373
374 private int slot(int key) {
375
376
377
378
379 return key >>> (31 - idHashBits);
380 }
381
382 private static int growAt(int idHashBits) {
383 return (1 << idHashBits) * (idHashBits - 3) / idHashBits;
384 }
385
386 private void grow() throws TableFullException {
387 if (idHashBits == 30)
388 throw new TableFullException();
389
390 long[] oldHash = idHash;
391 int oldSize = idHash.length;
392
393 idHashBits++;
394 idGrowAt = growAt(idHashBits);
395
396 try {
397 idHash = new long[1 << idHashBits];
398 } catch (OutOfMemoryError noMemory) {
399 throw TABLE_FULL_OUT_OF_MEMORY;
400 }
401
402 for (int i = 0; i < oldSize; i++) {
403 long v = oldHash[i];
404 if (v != 0) {
405 int j = slot(keyOf(v));
406 while (idHash[j] != 0)
407 if (++j >= idHash.length)
408 j = 0;
409 idHash[j] = v;
410 }
411 }
412 }
413
414 private static int keyOf(long v) {
415 return (int) (v >>> KEY_SHIFT);
416 }
417
418 private static long countOf(long v) {
419 return v & MAX_COUNT;
420 }
421
422
423 public static class TableFullException extends Exception {
424 private static final long serialVersionUID = 1L;
425 }
426 }