1
2
3
4
5
6
7
8
9
10
11 package org.eclipse.jgit.pgm.debug;
12
13 import static java.lang.Integer.valueOf;
14 import static java.lang.Long.valueOf;
15
16 import java.io.File;
17 import java.lang.reflect.Field;
18 import java.security.MessageDigest;
19 import java.util.ArrayList;
20 import java.util.Arrays;
21 import java.util.HashSet;
22 import java.util.List;
23
24 import org.eclipse.jgit.diff.RawText;
25 import org.eclipse.jgit.diff.RawTextComparator;
26 import org.eclipse.jgit.errors.LargeObjectException;
27 import org.eclipse.jgit.lib.Constants;
28 import org.eclipse.jgit.lib.FileMode;
29 import org.eclipse.jgit.lib.MutableObjectId;
30 import org.eclipse.jgit.lib.ObjectReader;
31 import org.eclipse.jgit.lib.Repository;
32 import org.eclipse.jgit.lib.RepositoryBuilder;
33 import org.eclipse.jgit.lib.RepositoryCache;
34 import org.eclipse.jgit.pgm.Command;
35 import org.eclipse.jgit.pgm.TextBuiltin;
36 import org.eclipse.jgit.pgm.internal.CLIText;
37 import org.eclipse.jgit.revwalk.RevWalk;
38 import org.eclipse.jgit.treewalk.TreeWalk;
39 import org.eclipse.jgit.util.FS;
40 import org.eclipse.jgit.util.NB;
41 import org.kohsuke.args4j.Option;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 @Command(usage = "usage_TextHashFunctions")
62 class TextHashFunctions extends TextBuiltin {
63
64
65 final Hash sha1 = new Hash() {
66 private final MessageDigest md = Constants.newMessageDigest();
67
68 @Override
69 protected int hashRegion(byte[] raw, int ptr, int end) {
70 md.reset();
71 md.update(raw, ptr, end - ptr);
72 return NB.decodeInt32(md.digest(), 0);
73 }
74 };
75
76
77 final Hash djb = new Hash() {
78 @Override
79 protected int hashRegion(byte[] raw, int ptr, int end) {
80 int hash = 5381;
81 for (; ptr < end; ptr++)
82 hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
83 return hash;
84 }
85 };
86
87
88 final Hash string_hash31 = new Hash() {
89 @Override
90 protected int hashRegion(byte[] raw, int ptr, int end) {
91 int hash = 0;
92 for (; ptr < end; ptr++)
93 hash = 31 * hash + (raw[ptr] & 0xff);
94 return hash;
95 }
96 };
97
98
99 final Hash rabin_DeltaIndex = new Hash() {
100 private final byte[] buf16 = new byte[16];
101
102 @Override
103 protected int hashRegion(byte[] raw, int ptr, int end) {
104 if (end - ptr < 16) {
105 Arrays.fill(buf16, (byte) 0);
106 System.arraycopy(raw, ptr, buf16, 0, end - ptr);
107 return rabin(buf16, 0);
108 }
109 return rabin(raw, ptr);
110 }
111
112 private int rabin(byte[] raw, int ptr) {
113 int hash;
114
115
116
117
118 hash = ((raw[ptr] & 0xff) << 24)
119 | ((raw[ptr + 1] & 0xff) << 16)
120 | ((raw[ptr + 2] & 0xff) << 8)
121 | (raw[ptr + 3] & 0xff);
122 hash ^= T[hash >>> 31];
123
124 hash = ((hash << 8) | (raw[ptr + 4] & 0xff)) ^ T[hash >>> 23];
125 hash = ((hash << 8) | (raw[ptr + 5] & 0xff)) ^ T[hash >>> 23];
126 hash = ((hash << 8) | (raw[ptr + 6] & 0xff)) ^ T[hash >>> 23];
127 hash = ((hash << 8) | (raw[ptr + 7] & 0xff)) ^ T[hash >>> 23];
128
129 hash = ((hash << 8) | (raw[ptr + 8] & 0xff)) ^ T[hash >>> 23];
130 hash = ((hash << 8) | (raw[ptr + 9] & 0xff)) ^ T[hash >>> 23];
131 hash = ((hash << 8) | (raw[ptr + 10] & 0xff)) ^ T[hash >>> 23];
132 hash = ((hash << 8) | (raw[ptr + 11] & 0xff)) ^ T[hash >>> 23];
133
134 hash = ((hash << 8) | (raw[ptr + 12] & 0xff)) ^ T[hash >>> 23];
135 hash = ((hash << 8) | (raw[ptr + 13] & 0xff)) ^ T[hash >>> 23];
136 hash = ((hash << 8) | (raw[ptr + 14] & 0xff)) ^ T[hash >>> 23];
137 hash = ((hash << 8) | (raw[ptr + 15] & 0xff)) ^ T[hash >>> 23];
138
139 return hash;
140 }
141
142 private final int[] T = { 0x00000000, 0xd4c6b32d, 0x7d4bd577,
143 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99,
144 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45,
145 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c,
146 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895,
147 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd,
148 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f,
149 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181,
150 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e,
151 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770,
152 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d,
153 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5,
154 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c,
155 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084,
156 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558,
157 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6,
158 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788,
159 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66,
160 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba,
161 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c,
162 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105,
163 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d,
164 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990,
165 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e,
166 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61,
167 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f,
168 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f,
169 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17,
170 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e,
171 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7,
172 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b,
173 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5,
174 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4,
175 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a,
176 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96,
177 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df,
178 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46,
179 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e,
180 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62,
181 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c,
182 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93,
183 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d,
184 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680,
185 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8,
186 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071,
187 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657,
188 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b,
189 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965,
190 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b,
191 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5,
192 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69,
193 0xe4fe0d44, 0x4d736b1e, 0x99b5d833 };
194 };
195
196
197 final Fold truncate = new Fold() {
198 @Override
199 public int fold(int hash, int bits) {
200 return hash & ((1 << bits) - 1);
201 }
202 };
203
204
205 final Fold golden_ratio = new Fold() {
206 @Override
207 public int fold(int hash, int bits) {
208
209 return (hash * 0x9e370001) >>> (32 - bits);
210 }
211 };
212
213
214
215
216
217
218
219 @Option(name = "--hash", metaVar = "NAME", usage = "Enable hash function(s)")
220 List<String> hashFunctions = new ArrayList<>();
221
222 @Option(name = "--fold", metaVar = "NAME", usage = "Enable fold function(s)")
223 List<String> foldFunctions = new ArrayList<>();
224
225 @Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan")
226 int textLimit = 15 * 1024;
227
228 @Option(name = "--repository", aliases = { "-r" }, metaVar = "GIT_DIR", usage = "Repository to scan")
229 List<File> gitDirs = new ArrayList<>();
230
231
232 @Override
233 protected boolean requiresRepository() {
234 return false;
235 }
236
237
238 @Override
239 protected void run() throws Exception {
240 if (gitDirs.isEmpty()) {
241 RepositoryBuilder rb = new RepositoryBuilder()
242 .setGitDir(new File(gitdir))
243 .readEnvironment()
244 .findGitDir();
245 if (rb.getGitDir() == null)
246 throw die(CLIText.get().cantFindGitDirectory);
247 gitDirs.add(rb.getGitDir());
248 }
249
250 for (File dir : gitDirs) {
251 RepositoryBuilder rb = new RepositoryBuilder();
252 if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED))
253 rb.setGitDir(dir);
254 else
255 rb.findGitDir(dir);
256
257 try (Repository repo = rb.build()) {
258 run(repo);
259 }
260 }
261 }
262
263 private void run(Repository repo) throws Exception {
264 List<Function> all = init();
265
266 long fileCnt = 0;
267 long lineCnt = 0;
268 try (ObjectReader or = repo.newObjectReader();
269 RevWalk rw = new RevWalk(or);
270 TreeWalk tw = new TreeWalk(or)) {
271 final MutableObjectId id = new MutableObjectId();
272 tw.reset(rw.parseTree(repo.resolve(Constants.HEAD)));
273 tw.setRecursive(true);
274
275 while (tw.next()) {
276 FileMode fm = tw.getFileMode(0);
277 if (!FileMode.REGULAR_FILE.equals(fm)
278 && !FileMode.EXECUTABLE_FILE.equals(fm))
279 continue;
280
281 byte[] raw;
282 try {
283 tw.getObjectId(id, 0);
284 raw = or.open(id).getCachedBytes(textLimit * 1024);
285 } catch (LargeObjectException tooBig) {
286 continue;
287 }
288
289 if (RawText.isBinary(raw, raw.length, true))
290 continue;
291
292 RawText txt = new RawText(raw);
293 int[] lines = new int[txt.size()];
294 int cnt = 0;
295 HashSet<Line> u = new HashSet<>();
296 for (int i = 0; i < txt.size(); i++) {
297 if (u.add(new Line(txt, i)))
298 lines[cnt++] = i;
299 }
300
301 fileCnt++;
302 lineCnt += cnt;
303
304 for (Function fun : all)
305 testOne(fun, txt, lines, cnt);
306 }
307 }
308
309 File directory = repo.getDirectory();
310 if (directory != null) {
311 String name = directory.getName();
312 File parent = directory.getParentFile();
313 if (name.equals(Constants.DOT_GIT) && parent != null)
314 name = parent.getName();
315 outw.println(name + ":");
316 }
317 outw.format(" %6d files; %5d avg. unique lines/file\n",
318 valueOf(fileCnt),
319 valueOf(lineCnt / fileCnt));
320 outw.format("%-20s %-15s %9s\n", "Hash", "Fold", "Max Len");
321 outw.println("-----------------------------------------------");
322 String lastHashName = null;
323 for (Function fun : all) {
324 String hashName = fun.hash.name;
325 if (hashName.equals(lastHashName))
326 hashName = "";
327 outw.format("%-20s %-15s %9d\n",
328 hashName,
329 fun.fold.name,
330 valueOf(fun.maxChainLength));
331 lastHashName = fun.hash.name;
332 }
333 outw.println();
334 outw.flush();
335 }
336
337 private static void testOne(Function fun, RawText txt, int[] elements,
338 int cnt) {
339 final Hash cmp = fun.hash;
340 final Fold fold = fun.fold;
341
342 final int bits = tableBits(cnt);
343 final int[] buckets = new int[1 << bits];
344 for (int i = 0; i < cnt; i++)
345 buckets[fold.fold(cmp.hash(txt, elements[i]), bits)]++;
346
347 int maxChainLength = 0;
348 for (int i = 0; i < buckets.length; i++)
349 maxChainLength = Math.max(maxChainLength, buckets[i]);
350 fun.maxChainLength = Math.max(fun.maxChainLength, maxChainLength);
351 }
352
353 private List<Function> init() {
354 List<Hash> hashes = new ArrayList<>();
355 List<Fold> folds = new ArrayList<>();
356
357 try {
358 for (Field f : TextHashFunctions.class.getDeclaredFields()) {
359 if (f.getType() == Hash.class) {
360 f.setAccessible(true);
361 Hash cmp = (Hash) f.get(this);
362 cmp.name = f.getName();
363 hashes.add(cmp);
364
365 } else if (f.getType() == Fold.class) {
366 f.setAccessible(true);
367 Fold fold = (Fold) f.get(this);
368 fold.name = f.getName();
369 folds.add(fold);
370 }
371 }
372 } catch (IllegalArgumentException | IllegalAccessException e) {
373 throw new RuntimeException("Cannot determine names", e);
374 }
375
376 List<Function> all = new ArrayList<>();
377 for (Hash cmp : hashes) {
378 if (include(cmp.name, hashFunctions)) {
379 for (Fold f : folds) {
380 if (include(f.name, foldFunctions)) {
381 all.add(new Function(cmp, f));
382 }
383 }
384 }
385 }
386 return all;
387 }
388
389 private static boolean include(String name, List<String> want) {
390 if (want.isEmpty())
391 return true;
392 for (String s : want) {
393 if (s.equalsIgnoreCase(name))
394 return true;
395 }
396 return false;
397 }
398
399 private static class Function {
400 final Hash hash;
401
402 final Fold fold;
403
404 int maxChainLength;
405
406 Function(Hash cmp, Fold fold) {
407 this.hash = cmp;
408 this.fold = fold;
409 }
410 }
411
412
413 private abstract static class Hash extends RawTextComparator {
414 String name;
415
416 @Override
417 public boolean equals(RawText a, int ai, RawText b, int bi) {
418 return RawTextComparator.DEFAULT.equals(a, ai, b, bi);
419 }
420 }
421
422
423 private abstract static class Fold {
424 String name;
425
426
427
428
429
430
431
432
433
434
435
436
437 abstract int fold(int hash, int bits);
438 }
439
440
441 private static class Line {
442 private final RawText txt;
443
444 private final int pos;
445
446 Line(RawText txt, int pos) {
447 this.txt = txt;
448 this.pos = pos;
449 }
450
451 @Override
452 public int hashCode() {
453 return RawTextComparator.DEFAULT.hash(txt, pos);
454 }
455
456 @Override
457 public boolean equals(Object obj) {
458 if (obj instanceof Line) {
459 Line e = (Line) obj;
460 return RawTextComparator.DEFAULT.equals(txt, pos, e.txt, e.pos);
461 }
462 return false;
463 }
464 }
465
466 private static int tableBits(int sz) {
467 int bits = 31 - Integer.numberOfLeadingZeros(sz);
468 if (bits == 0)
469 bits = 1;
470 if (1 << bits < sz)
471 bits++;
472 return bits;
473 }
474 }