1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.pgm.debug;
45
46 import static java.lang.Integer.valueOf;
47 import static java.lang.Long.valueOf;
48
49 import java.io.File;
50 import java.lang.reflect.Field;
51 import java.security.MessageDigest;
52 import java.util.ArrayList;
53 import java.util.Arrays;
54 import java.util.HashSet;
55 import java.util.List;
56
57 import org.eclipse.jgit.diff.RawText;
58 import org.eclipse.jgit.diff.RawTextComparator;
59 import org.eclipse.jgit.errors.LargeObjectException;
60 import org.eclipse.jgit.lib.Constants;
61 import org.eclipse.jgit.lib.FileMode;
62 import org.eclipse.jgit.lib.MutableObjectId;
63 import org.eclipse.jgit.lib.ObjectReader;
64 import org.eclipse.jgit.lib.Repository;
65 import org.eclipse.jgit.lib.RepositoryBuilder;
66 import org.eclipse.jgit.lib.RepositoryCache;
67 import org.eclipse.jgit.pgm.Command;
68 import org.eclipse.jgit.pgm.TextBuiltin;
69 import org.eclipse.jgit.pgm.internal.CLIText;
70 import org.eclipse.jgit.revwalk.RevWalk;
71 import org.eclipse.jgit.treewalk.TreeWalk;
72 import org.eclipse.jgit.util.FS;
73 import org.eclipse.jgit.util.NB;
74 import org.kohsuke.args4j.Option;
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 @Command(usage = "usage_TextHashFunctions")
95 class TextHashFunctions extends TextBuiltin {
96
97
98 final Hash sha1 = new Hash() {
99 private final MessageDigest md = Constants.newMessageDigest();
100
101 @Override
102 protected int hashRegion(byte[] raw, int ptr, int end) {
103 md.reset();
104 md.update(raw, ptr, end - ptr);
105 return NB.decodeInt32(md.digest(), 0);
106 }
107 };
108
109
110 final Hash djb = new Hash() {
111 @Override
112 protected int hashRegion(byte[] raw, int ptr, int end) {
113 int hash = 5381;
114 for (; ptr < end; ptr++)
115 hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
116 return hash;
117 }
118 };
119
120
121 final Hash string_hash31 = new Hash() {
122 @Override
123 protected int hashRegion(byte[] raw, int ptr, int end) {
124 int hash = 0;
125 for (; ptr < end; ptr++)
126 hash = 31 * hash + (raw[ptr] & 0xff);
127 return hash;
128 }
129 };
130
131
132 final Hash rabin_DeltaIndex = new Hash() {
133 private final byte[] buf16 = new byte[16];
134
135 @Override
136 protected int hashRegion(byte[] raw, int ptr, int end) {
137 if (end - ptr < 16) {
138 Arrays.fill(buf16, (byte) 0);
139 System.arraycopy(raw, ptr, buf16, 0, end - ptr);
140 return rabin(buf16, 0);
141 } else {
142 return rabin(raw, ptr);
143 }
144 }
145
146 private int rabin(byte[] raw, int ptr) {
147 int hash;
148
149
150
151
152 hash = ((raw[ptr] & 0xff) << 24)
153 | ((raw[ptr + 1] & 0xff) << 16)
154 | ((raw[ptr + 2] & 0xff) << 8)
155 | (raw[ptr + 3] & 0xff);
156 hash ^= T[hash >>> 31];
157
158 hash = ((hash << 8) | (raw[ptr + 4] & 0xff)) ^ T[hash >>> 23];
159 hash = ((hash << 8) | (raw[ptr + 5] & 0xff)) ^ T[hash >>> 23];
160 hash = ((hash << 8) | (raw[ptr + 6] & 0xff)) ^ T[hash >>> 23];
161 hash = ((hash << 8) | (raw[ptr + 7] & 0xff)) ^ T[hash >>> 23];
162
163 hash = ((hash << 8) | (raw[ptr + 8] & 0xff)) ^ T[hash >>> 23];
164 hash = ((hash << 8) | (raw[ptr + 9] & 0xff)) ^ T[hash >>> 23];
165 hash = ((hash << 8) | (raw[ptr + 10] & 0xff)) ^ T[hash >>> 23];
166 hash = ((hash << 8) | (raw[ptr + 11] & 0xff)) ^ T[hash >>> 23];
167
168 hash = ((hash << 8) | (raw[ptr + 12] & 0xff)) ^ T[hash >>> 23];
169 hash = ((hash << 8) | (raw[ptr + 13] & 0xff)) ^ T[hash >>> 23];
170 hash = ((hash << 8) | (raw[ptr + 14] & 0xff)) ^ T[hash >>> 23];
171 hash = ((hash << 8) | (raw[ptr + 15] & 0xff)) ^ T[hash >>> 23];
172
173 return hash;
174 }
175
176 private final int[] T = { 0x00000000, 0xd4c6b32d, 0x7d4bd577,
177 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99,
178 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45,
179 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c,
180 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895,
181 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd,
182 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f,
183 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181,
184 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e,
185 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770,
186 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d,
187 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5,
188 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c,
189 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084,
190 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558,
191 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6,
192 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788,
193 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66,
194 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba,
195 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c,
196 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105,
197 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d,
198 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990,
199 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e,
200 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61,
201 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f,
202 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f,
203 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17,
204 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e,
205 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7,
206 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b,
207 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5,
208 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4,
209 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a,
210 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96,
211 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df,
212 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46,
213 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e,
214 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62,
215 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c,
216 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93,
217 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d,
218 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680,
219 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8,
220 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071,
221 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657,
222 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b,
223 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965,
224 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b,
225 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5,
226 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69,
227 0xe4fe0d44, 0x4d736b1e, 0x99b5d833 };
228 };
229
230
231 final Fold truncate = new Fold() {
232 @Override
233 public int fold(int hash, int bits) {
234 return hash & ((1 << bits) - 1);
235 }
236 };
237
238
239 final Fold golden_ratio = new Fold() {
240 @Override
241 public int fold(int hash, int bits) {
242
243 return (hash * 0x9e370001) >>> (32 - bits);
244 }
245 };
246
247
248
249
250
251
252
253 @Option(name = "--hash", multiValued = true, metaVar = "NAME", usage = "Enable hash function(s)")
254 List<String> hashFunctions = new ArrayList<String>();
255
256 @Option(name = "--fold", multiValued = true, metaVar = "NAME", usage = "Enable fold function(s)")
257 List<String> foldFunctions = new ArrayList<String>();
258
259 @Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan")
260 int textLimit = 15 * 1024;
261
262 @Option(name = "--repository", aliases = { "-r" }, multiValued = true, metaVar = "GIT_DIR", usage = "Repository to scan")
263 List<File> gitDirs = new ArrayList<File>();
264
265 @Override
266 protected boolean requiresRepository() {
267 return false;
268 }
269
270 @Override
271 protected void run() throws Exception {
272 if (gitDirs.isEmpty()) {
273 RepositoryBuilder rb = new RepositoryBuilder()
274 .setGitDir(new File(gitdir))
275 .readEnvironment()
276 .findGitDir();
277 if (rb.getGitDir() == null)
278 throw die(CLIText.get().cantFindGitDirectory);
279 gitDirs.add(rb.getGitDir());
280 }
281
282 for (File dir : gitDirs) {
283 RepositoryBuilder rb = new RepositoryBuilder();
284 if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED))
285 rb.setGitDir(dir);
286 else
287 rb.findGitDir(dir);
288
289 Repository repo = rb.build();
290 try {
291 run(repo);
292 } finally {
293 repo.close();
294 }
295 }
296 }
297
298 private void run(Repository repo) throws Exception {
299 List<Function> all = init();
300
301 long fileCnt = 0;
302 long lineCnt = 0;
303 try (ObjectReader or = repo.newObjectReader();
304 RevWalk rw = new RevWalk(or);
305 TreeWalk tw = new TreeWalk(or)) {
306 final MutableObjectId id = new MutableObjectId();
307 tw.reset(rw.parseTree(repo.resolve(Constants.HEAD)));
308 tw.setRecursive(true);
309
310 while (tw.next()) {
311 FileMode fm = tw.getFileMode(0);
312 if (!FileMode.REGULAR_FILE.equals(fm)
313 && !FileMode.EXECUTABLE_FILE.equals(fm))
314 continue;
315
316 byte[] raw;
317 try {
318 tw.getObjectId(id, 0);
319 raw = or.open(id).getCachedBytes(textLimit * 1024);
320 } catch (LargeObjectException tooBig) {
321 continue;
322 }
323
324 if (RawText.isBinary(raw))
325 continue;
326
327 RawText txt = new RawText(raw);
328 int[] lines = new int[txt.size()];
329 int cnt = 0;
330 HashSet<Line> u = new HashSet<Line>();
331 for (int i = 0; i < txt.size(); i++) {
332 if (u.add(new Line(txt, i)))
333 lines[cnt++] = i;
334 }
335
336 fileCnt++;
337 lineCnt += cnt;
338
339 for (Function fun : all)
340 testOne(fun, txt, lines, cnt);
341 }
342 }
343
344 File directory = repo.getDirectory();
345 if (directory != null) {
346 String name = directory.getName();
347 File parent = directory.getParentFile();
348 if (name.equals(Constants.DOT_GIT) && parent != null)
349 name = parent.getName();
350 outw.println(name + ":");
351 }
352 outw.format(" %6d files; %5d avg. unique lines/file\n",
353 valueOf(fileCnt),
354 valueOf(lineCnt / fileCnt));
355 outw.format("%-20s %-15s %9s\n", "Hash", "Fold", "Max Len");
356 outw.println("-----------------------------------------------");
357 String lastHashName = null;
358 for (Function fun : all) {
359 String hashName = fun.hash.name;
360 if (hashName.equals(lastHashName))
361 hashName = "";
362 outw.format("%-20s %-15s %9d\n",
363 hashName,
364 fun.fold.name,
365 valueOf(fun.maxChainLength));
366 lastHashName = fun.hash.name;
367 }
368 outw.println();
369 outw.flush();
370 }
371
372 private static void testOne(Function fun, RawText txt, int[] elements,
373 int cnt) {
374 final Hash cmp = fun.hash;
375 final Fold fold = fun.fold;
376
377 final int bits = tableBits(cnt);
378 final int[] buckets = new int[1 << bits];
379 for (int i = 0; i < cnt; i++)
380 buckets[fold.fold(cmp.hash(txt, elements[i]), bits)]++;
381
382 int maxChainLength = 0;
383 for (int i = 0; i < buckets.length; i++)
384 maxChainLength = Math.max(maxChainLength, buckets[i]);
385 fun.maxChainLength = Math.max(fun.maxChainLength, maxChainLength);
386 }
387
388 private List<Function> init() {
389 List<Hash> hashes = new ArrayList<Hash>();
390 List<Fold> folds = new ArrayList<Fold>();
391
392 try {
393 for (Field f : TextHashFunctions.class.getDeclaredFields()) {
394 if (f.getType() == Hash.class) {
395 f.setAccessible(true);
396 Hash cmp = (Hash) f.get(this);
397 cmp.name = f.getName();
398 hashes.add(cmp);
399
400 } else if (f.getType() == Fold.class) {
401 f.setAccessible(true);
402 Fold fold = (Fold) f.get(this);
403 fold.name = f.getName();
404 folds.add(fold);
405 }
406 }
407 } catch (IllegalArgumentException e) {
408 throw new RuntimeException("Cannot determine names", e);
409 } catch (IllegalAccessException e) {
410 throw new RuntimeException("Cannot determine names", e);
411 }
412
413 List<Function> all = new ArrayList<Function>();
414 for (Hash cmp : hashes) {
415 if (include(cmp.name, hashFunctions)) {
416 for (Fold f : folds) {
417 if (include(f.name, foldFunctions)) {
418 all.add(new Function(cmp, f));
419 }
420 }
421 }
422 }
423 return all;
424 }
425
426 private static boolean include(String name, List<String> want) {
427 if (want.isEmpty())
428 return true;
429 for (String s : want) {
430 if (s.equalsIgnoreCase(name))
431 return true;
432 }
433 return false;
434 }
435
436 private static class Function {
437 final Hash hash;
438
439 final Fold fold;
440
441 int maxChainLength;
442
443 Function(Hash cmp, Fold fold) {
444 this.hash = cmp;
445 this.fold = fold;
446 }
447 }
448
449
450 private static abstract class Hash extends RawTextComparator {
451 String name;
452
453 @Override
454 public boolean equals(RawText a, int ai, RawText b, int bi) {
455 return RawTextComparator.DEFAULT.equals(a, ai, b, bi);
456 }
457 }
458
459
460 private static abstract class Fold {
461 String name;
462
463
464
465
466
467
468
469
470
471
472
473
474 abstract int fold(int hash, int bits);
475 }
476
477
478 private class Line {
479 private final RawText txt;
480
481 private final int pos;
482
483 Line(RawText txt, int pos) {
484 this.txt = txt;
485 this.pos = pos;
486 }
487
488 @Override
489 public int hashCode() {
490 return RawTextComparator.DEFAULT.hash(txt, pos);
491 }
492
493 @Override
494 public boolean equals(Object obj) {
495 if (obj instanceof Line) {
496 Line e = (Line) obj;
497 return RawTextComparator.DEFAULT.equals(txt, pos, e.txt, e.pos);
498 }
499 return false;
500 }
501 }
502
503 private static int tableBits(final int sz) {
504 int bits = 31 - Integer.numberOfLeadingZeros(sz);
505 if (bits == 0)
506 bits = 1;
507 if (1 << bits < sz)
508 bits++;
509 return bits;
510 }
511 }