1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 package org.eclipse.jgit.pgm.debug;
45
46 import static java.lang.Integer.valueOf;
47 import static java.lang.Long.valueOf;
48
49 import java.io.File;
50 import java.lang.reflect.Field;
51 import java.security.MessageDigest;
52 import java.util.ArrayList;
53 import java.util.Arrays;
54 import java.util.HashSet;
55 import java.util.List;
56
57 import org.eclipse.jgit.diff.RawText;
58 import org.eclipse.jgit.diff.RawTextComparator;
59 import org.eclipse.jgit.errors.LargeObjectException;
60 import org.eclipse.jgit.lib.Constants;
61 import org.eclipse.jgit.lib.FileMode;
62 import org.eclipse.jgit.lib.MutableObjectId;
63 import org.eclipse.jgit.lib.ObjectReader;
64 import org.eclipse.jgit.lib.Repository;
65 import org.eclipse.jgit.lib.RepositoryBuilder;
66 import org.eclipse.jgit.lib.RepositoryCache;
67 import org.eclipse.jgit.pgm.Command;
68 import org.eclipse.jgit.pgm.TextBuiltin;
69 import org.eclipse.jgit.pgm.internal.CLIText;
70 import org.eclipse.jgit.revwalk.RevWalk;
71 import org.eclipse.jgit.treewalk.TreeWalk;
72 import org.eclipse.jgit.util.FS;
73 import org.eclipse.jgit.util.NB;
74 import org.kohsuke.args4j.Option;
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 @Command(usage = "usage_TextHashFunctions")
95 class TextHashFunctions extends TextBuiltin {
96
97
98 final Hash sha1 = new Hash() {
99 private final MessageDigest md = Constants.newMessageDigest();
100
101 @Override
102 protected int hashRegion(byte[] raw, int ptr, int end) {
103 md.reset();
104 md.update(raw, ptr, end - ptr);
105 return NB.decodeInt32(md.digest(), 0);
106 }
107 };
108
109
110 final Hash djb = new Hash() {
111 @Override
112 protected int hashRegion(byte[] raw, int ptr, int end) {
113 int hash = 5381;
114 for (; ptr < end; ptr++)
115 hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
116 return hash;
117 }
118 };
119
120
121 final Hash string_hash31 = new Hash() {
122 @Override
123 protected int hashRegion(byte[] raw, int ptr, int end) {
124 int hash = 0;
125 for (; ptr < end; ptr++)
126 hash = 31 * hash + (raw[ptr] & 0xff);
127 return hash;
128 }
129 };
130
131
132 final Hash rabin_DeltaIndex = new Hash() {
133 private final byte[] buf16 = new byte[16];
134
135 @Override
136 protected int hashRegion(byte[] raw, int ptr, int end) {
137 if (end - ptr < 16) {
138 Arrays.fill(buf16, (byte) 0);
139 System.arraycopy(raw, ptr, buf16, 0, end - ptr);
140 return rabin(buf16, 0);
141 } else {
142 return rabin(raw, ptr);
143 }
144 }
145
146 private int rabin(byte[] raw, int ptr) {
147 int hash;
148
149
150
151
152 hash = ((raw[ptr] & 0xff) << 24)
153 | ((raw[ptr + 1] & 0xff) << 16)
154 | ((raw[ptr + 2] & 0xff) << 8)
155 | (raw[ptr + 3] & 0xff);
156 hash ^= T[hash >>> 31];
157
158 hash = ((hash << 8) | (raw[ptr + 4] & 0xff)) ^ T[hash >>> 23];
159 hash = ((hash << 8) | (raw[ptr + 5] & 0xff)) ^ T[hash >>> 23];
160 hash = ((hash << 8) | (raw[ptr + 6] & 0xff)) ^ T[hash >>> 23];
161 hash = ((hash << 8) | (raw[ptr + 7] & 0xff)) ^ T[hash >>> 23];
162
163 hash = ((hash << 8) | (raw[ptr + 8] & 0xff)) ^ T[hash >>> 23];
164 hash = ((hash << 8) | (raw[ptr + 9] & 0xff)) ^ T[hash >>> 23];
165 hash = ((hash << 8) | (raw[ptr + 10] & 0xff)) ^ T[hash >>> 23];
166 hash = ((hash << 8) | (raw[ptr + 11] & 0xff)) ^ T[hash >>> 23];
167
168 hash = ((hash << 8) | (raw[ptr + 12] & 0xff)) ^ T[hash >>> 23];
169 hash = ((hash << 8) | (raw[ptr + 13] & 0xff)) ^ T[hash >>> 23];
170 hash = ((hash << 8) | (raw[ptr + 14] & 0xff)) ^ T[hash >>> 23];
171 hash = ((hash << 8) | (raw[ptr + 15] & 0xff)) ^ T[hash >>> 23];
172
173 return hash;
174 }
175
176 private final int[] T = { 0x00000000, 0xd4c6b32d, 0x7d4bd577,
177 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99,
178 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45,
179 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c,
180 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895,
181 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd,
182 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f,
183 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181,
184 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e,
185 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770,
186 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d,
187 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5,
188 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c,
189 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084,
190 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558,
191 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6,
192 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788,
193 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66,
194 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba,
195 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c,
196 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105,
197 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d,
198 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990,
199 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e,
200 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61,
201 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f,
202 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f,
203 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17,
204 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e,
205 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7,
206 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b,
207 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5,
208 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4,
209 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a,
210 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96,
211 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df,
212 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46,
213 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e,
214 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62,
215 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c,
216 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93,
217 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d,
218 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680,
219 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8,
220 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071,
221 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657,
222 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b,
223 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965,
224 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b,
225 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5,
226 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69,
227 0xe4fe0d44, 0x4d736b1e, 0x99b5d833 };
228 };
229
230
231 final Fold truncate = new Fold() {
232 @Override
233 public int fold(int hash, int bits) {
234 return hash & ((1 << bits) - 1);
235 }
236 };
237
238
239 final Fold golden_ratio = new Fold() {
240 @Override
241 public int fold(int hash, int bits) {
242
243 return (hash * 0x9e370001) >>> (32 - bits);
244 }
245 };
246
247
248
249
250
251
252
253 @Option(name = "--hash", metaVar = "NAME", usage = "Enable hash function(s)")
254 List<String> hashFunctions = new ArrayList<>();
255
256 @Option(name = "--fold", metaVar = "NAME", usage = "Enable fold function(s)")
257 List<String> foldFunctions = new ArrayList<>();
258
259 @Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan")
260 int textLimit = 15 * 1024;
261
262 @Option(name = "--repository", aliases = { "-r" }, metaVar = "GIT_DIR", usage = "Repository to scan")
263 List<File> gitDirs = new ArrayList<>();
264
265
266 @Override
267 protected boolean requiresRepository() {
268 return false;
269 }
270
271
272 @Override
273 protected void run() throws Exception {
274 if (gitDirs.isEmpty()) {
275 RepositoryBuilder rb = new RepositoryBuilder()
276 .setGitDir(new File(gitdir))
277 .readEnvironment()
278 .findGitDir();
279 if (rb.getGitDir() == null)
280 throw die(CLIText.get().cantFindGitDirectory);
281 gitDirs.add(rb.getGitDir());
282 }
283
284 for (File dir : gitDirs) {
285 RepositoryBuilder rb = new RepositoryBuilder();
286 if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED))
287 rb.setGitDir(dir);
288 else
289 rb.findGitDir(dir);
290
291 try (Repository repo = rb.build()) {
292 run(repo);
293 }
294 }
295 }
296
297 private void run(Repository repo) throws Exception {
298 List<Function> all = init();
299
300 long fileCnt = 0;
301 long lineCnt = 0;
302 try (ObjectReader or = repo.newObjectReader();
303 RevWalk rw = new RevWalk(or);
304 TreeWalk tw = new TreeWalk(or)) {
305 final MutableObjectId id = new MutableObjectId();
306 tw.reset(rw.parseTree(repo.resolve(Constants.HEAD)));
307 tw.setRecursive(true);
308
309 while (tw.next()) {
310 FileMode fm = tw.getFileMode(0);
311 if (!FileMode.REGULAR_FILE.equals(fm)
312 && !FileMode.EXECUTABLE_FILE.equals(fm))
313 continue;
314
315 byte[] raw;
316 try {
317 tw.getObjectId(id, 0);
318 raw = or.open(id).getCachedBytes(textLimit * 1024);
319 } catch (LargeObjectException tooBig) {
320 continue;
321 }
322
323 if (RawText.isBinary(raw))
324 continue;
325
326 RawText txt = new RawText(raw);
327 int[] lines = new int[txt.size()];
328 int cnt = 0;
329 HashSet<Line> u = new HashSet<>();
330 for (int i = 0; i < txt.size(); i++) {
331 if (u.add(new Line(txt, i)))
332 lines[cnt++] = i;
333 }
334
335 fileCnt++;
336 lineCnt += cnt;
337
338 for (Function fun : all)
339 testOne(fun, txt, lines, cnt);
340 }
341 }
342
343 File directory = repo.getDirectory();
344 if (directory != null) {
345 String name = directory.getName();
346 File parent = directory.getParentFile();
347 if (name.equals(Constants.DOT_GIT) && parent != null)
348 name = parent.getName();
349 outw.println(name + ":");
350 }
351 outw.format(" %6d files; %5d avg. unique lines/file\n",
352 valueOf(fileCnt),
353 valueOf(lineCnt / fileCnt));
354 outw.format("%-20s %-15s %9s\n", "Hash", "Fold", "Max Len");
355 outw.println("-----------------------------------------------");
356 String lastHashName = null;
357 for (Function fun : all) {
358 String hashName = fun.hash.name;
359 if (hashName.equals(lastHashName))
360 hashName = "";
361 outw.format("%-20s %-15s %9d\n",
362 hashName,
363 fun.fold.name,
364 valueOf(fun.maxChainLength));
365 lastHashName = fun.hash.name;
366 }
367 outw.println();
368 outw.flush();
369 }
370
371 private static void testOne(Function fun, RawText txt, int[] elements,
372 int cnt) {
373 final Hash cmp = fun.hash;
374 final Fold fold = fun.fold;
375
376 final int bits = tableBits(cnt);
377 final int[] buckets = new int[1 << bits];
378 for (int i = 0; i < cnt; i++)
379 buckets[fold.fold(cmp.hash(txt, elements[i]), bits)]++;
380
381 int maxChainLength = 0;
382 for (int i = 0; i < buckets.length; i++)
383 maxChainLength = Math.max(maxChainLength, buckets[i]);
384 fun.maxChainLength = Math.max(fun.maxChainLength, maxChainLength);
385 }
386
387 private List<Function> init() {
388 List<Hash> hashes = new ArrayList<>();
389 List<Fold> folds = new ArrayList<>();
390
391 try {
392 for (Field f : TextHashFunctions.class.getDeclaredFields()) {
393 if (f.getType() == Hash.class) {
394 f.setAccessible(true);
395 Hash cmp = (Hash) f.get(this);
396 cmp.name = f.getName();
397 hashes.add(cmp);
398
399 } else if (f.getType() == Fold.class) {
400 f.setAccessible(true);
401 Fold fold = (Fold) f.get(this);
402 fold.name = f.getName();
403 folds.add(fold);
404 }
405 }
406 } catch (IllegalArgumentException e) {
407 throw new RuntimeException("Cannot determine names", e);
408 } catch (IllegalAccessException e) {
409 throw new RuntimeException("Cannot determine names", e);
410 }
411
412 List<Function> all = new ArrayList<>();
413 for (Hash cmp : hashes) {
414 if (include(cmp.name, hashFunctions)) {
415 for (Fold f : folds) {
416 if (include(f.name, foldFunctions)) {
417 all.add(new Function(cmp, f));
418 }
419 }
420 }
421 }
422 return all;
423 }
424
425 private static boolean include(String name, List<String> want) {
426 if (want.isEmpty())
427 return true;
428 for (String s : want) {
429 if (s.equalsIgnoreCase(name))
430 return true;
431 }
432 return false;
433 }
434
435 private static class Function {
436 final Hash hash;
437
438 final Fold fold;
439
440 int maxChainLength;
441
442 Function(Hash cmp, Fold fold) {
443 this.hash = cmp;
444 this.fold = fold;
445 }
446 }
447
448
449 private static abstract class Hash extends RawTextComparator {
450 String name;
451
452 @Override
453 public boolean equals(RawText a, int ai, RawText b, int bi) {
454 return RawTextComparator.DEFAULT.equals(a, ai, b, bi);
455 }
456 }
457
458
459 private static abstract class Fold {
460 String name;
461
462
463
464
465
466
467
468
469
470
471
472
473 abstract int fold(int hash, int bits);
474 }
475
476
477 private class Line {
478 private final RawText txt;
479
480 private final int pos;
481
482 Line(RawText txt, int pos) {
483 this.txt = txt;
484 this.pos = pos;
485 }
486
487 @Override
488 public int hashCode() {
489 return RawTextComparator.DEFAULT.hash(txt, pos);
490 }
491
492 @Override
493 public boolean equals(Object obj) {
494 if (obj instanceof Line) {
495 Line e = (Line) obj;
496 return RawTextComparator.DEFAULT.equals(txt, pos, e.txt, e.pos);
497 }
498 return false;
499 }
500 }
501
502 private static int tableBits(int sz) {
503 int bits = 31 - Integer.numberOfLeadingZeros(sz);
504 if (bits == 0)
505 bits = 1;
506 if (1 << bits < sz)
507 bits++;
508 return bits;
509 }
510 }