DiffAlgorithms.java
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.pgm.debug;
import static java.lang.Integer.valueOf;
import static java.lang.Long.valueOf;
import java.io.File;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.eclipse.jgit.diff.DiffAlgorithm;
import org.eclipse.jgit.diff.HistogramDiff;
import org.eclipse.jgit.diff.MyersDiff;
import org.eclipse.jgit.diff.RawText;
import org.eclipse.jgit.diff.RawTextComparator;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.RepositoryBuilder;
import org.eclipse.jgit.lib.RepositoryCache;
import org.eclipse.jgit.pgm.Command;
import org.eclipse.jgit.pgm.TextBuiltin;
import org.eclipse.jgit.pgm.internal.CLIText;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.eclipse.jgit.util.FS;
import org.kohsuke.args4j.Option;
@Command(usage = "usage_DiffAlgorithms")
class DiffAlgorithms extends TextBuiltin {
final Algorithm myers = new Algorithm() {
@Override
DiffAlgorithm create() {
return MyersDiff.INSTANCE;
}
};
final Algorithm histogram = new Algorithm() {
@Override
DiffAlgorithm create() {
HistogramDiff d = new HistogramDiff();
d.setFallbackAlgorithm(null);
return d;
}
};
final Algorithm histogram_myers = new Algorithm() {
@Override
DiffAlgorithm create() {
HistogramDiff d = new HistogramDiff();
d.setFallbackAlgorithm(MyersDiff.INSTANCE);
return d;
}
};
// -----------------------------------------------------------------------
//
// Implementation of the suite lives below this line.
//
//
@Option(name = "--algorithm", metaVar = "NAME", usage = "Enable algorithm(s)")
List<String> algorithms = new ArrayList<>();
@Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan per file revision")
int textLimit = 15 * 1024; // 15 MiB as later we do * 1024.
@Option(name = "--repository", aliases = { "-r" }, metaVar = "GIT_DIR", usage = "Repository to scan")
List<File> gitDirs = new ArrayList<>();
@Option(name = "--count", metaVar = "LIMIT", usage = "Number of file revisions to be compared")
int count = 0; // unlimited
private final RawTextComparator cmp = RawTextComparator.DEFAULT;
private ThreadMXBean mxBean;
/** {@inheritDoc} */
@Override
protected boolean requiresRepository() {
return false;
}
/** {@inheritDoc} */
@Override
protected void run() throws Exception {
mxBean = ManagementFactory.getThreadMXBean();
if (!mxBean.isCurrentThreadCpuTimeSupported())
throw die("Current thread CPU time not supported on this JRE"); //$NON-NLS-1$
if (gitDirs.isEmpty()) {
RepositoryBuilder rb = new RepositoryBuilder() //
.setGitDir(new File(gitdir)) //
.readEnvironment() //
.findGitDir();
if (rb.getGitDir() == null)
throw die(CLIText.get().cantFindGitDirectory);
gitDirs.add(rb.getGitDir());
}
for (File dir : gitDirs) {
RepositoryBuilder rb = new RepositoryBuilder();
if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED))
rb.setGitDir(dir);
else
rb.findGitDir(dir);
try (Repository repo = rb.build()) {
run(repo);
}
}
}
private void run(Repository repo) throws Exception {
List<Test> all = init();
long files = 0;
int commits = 0;
int minN = Integer.MAX_VALUE;
int maxN = 0;
AbbreviatedObjectId startId;
try (ObjectReader or = repo.newObjectReader();
RevWalk rw = new RevWalk(or)) {
final MutableObjectId id = new MutableObjectId();
TreeWalk tw = new TreeWalk(or);
tw.setFilter(TreeFilter.ANY_DIFF);
tw.setRecursive(true);
ObjectId start = repo.resolve(Constants.HEAD);
startId = or.abbreviate(start);
rw.markStart(rw.parseCommit(start));
for (;;) {
final RevCommit c = rw.next();
if (c == null)
break;
commits++;
if (c.getParentCount() != 1)
continue;
RevCommit p = c.getParent(0);
rw.parseHeaders(p);
tw.reset(p.getTree(), c.getTree());
while (tw.next()) {
if (!isFile(tw, 0) || !isFile(tw, 1))
continue;
byte[] raw0;
try {
tw.getObjectId(id, 0);
raw0 = or.open(id).getCachedBytes(textLimit * 1024);
} catch (LargeObjectException tooBig) {
continue;
}
if (RawText.isBinary(raw0))
continue;
byte[] raw1;
try {
tw.getObjectId(id, 1);
raw1 = or.open(id).getCachedBytes(textLimit * 1024);
} catch (LargeObjectException tooBig) {
continue;
}
if (RawText.isBinary(raw1))
continue;
RawText txt0 = new RawText(raw0);
RawText txt1 = new RawText(raw1);
minN = Math.min(minN, txt0.size() + txt1.size());
maxN = Math.max(maxN, txt0.size() + txt1.size());
for (Test test : all)
testOne(test, txt0, txt1);
files++;
}
if (count > 0 && files > count)
break;
}
}
Collections.sort(all, new Comparator<Test>() {
@Override
public int compare(Test a, Test b) {
int result = Long.signum(a.runningTimeNanos - b.runningTimeNanos);
if (result == 0) {
result = a.algorithm.name.compareTo(b.algorithm.name);
}
return result;
}
});
File directory = repo.getDirectory();
if (directory != null) {
String name = directory.getName();
File parent = directory.getParentFile();
if (name.equals(Constants.DOT_GIT) && parent != null)
name = parent.getName();
outw.println(name + ": start at " + startId.name()); //$NON-NLS-1$
}
outw.format(" %12d files, %8d commits\n", valueOf(files), //$NON-NLS-1$
valueOf(commits));
outw.format(" N=%10d min lines, %8d max lines\n", valueOf(minN), //$NON-NLS-1$
valueOf(maxN));
outw.format("%-25s %12s ( %12s %12s )\n", //$NON-NLS-1$
"Algorithm", "Time(ns)", "Time(ns) on", "Time(ns) on"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
outw.format("%-25s %12s ( %12s %12s )\n", //$NON-NLS-1$
"", "", "N=" + minN, "N=" + maxN); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
outw.println("-----------------------------------------------------" //$NON-NLS-1$
+ "----------------"); //$NON-NLS-1$
for (Test test : all) {
outw.format("%-25s %12d ( %12d %12d )", // //$NON-NLS-1$
test.algorithm.name, //
valueOf(test.runningTimeNanos), //
valueOf(test.minN.runningTimeNanos), //
valueOf(test.maxN.runningTimeNanos));
outw.println();
}
outw.println();
outw.flush();
}
private static boolean isFile(TreeWalk tw, int ithTree) {
FileMode fm = tw.getFileMode(ithTree);
return FileMode.REGULAR_FILE.equals(fm)
|| FileMode.EXECUTABLE_FILE.equals(fm);
}
private static final int minCPUTimerTicks = 10;
private void testOne(Test test, RawText a, RawText b) {
final DiffAlgorithm da = test.algorithm.create();
int cpuTimeChanges = 0;
int cnt = 0;
final long startTime = mxBean.getCurrentThreadCpuTime();
long lastTime = startTime;
while (cpuTimeChanges < minCPUTimerTicks) {
da.diff(cmp, a, b);
cnt++;
long interimTime = mxBean.getCurrentThreadCpuTime();
if (interimTime != lastTime) {
cpuTimeChanges++;
lastTime = interimTime;
}
}
final long stopTime = mxBean.getCurrentThreadCpuTime();
final long runTime = (stopTime - startTime) / cnt;
test.runningTimeNanos += runTime;
if (test.minN == null || a.size() + b.size() < test.minN.n) {
test.minN = new Run();
test.minN.n = a.size() + b.size();
test.minN.runningTimeNanos = runTime;
}
if (test.maxN == null || a.size() + b.size() > test.maxN.n) {
test.maxN = new Run();
test.maxN.n = a.size() + b.size();
test.maxN.runningTimeNanos = runTime;
}
}
private List<Test> init() {
List<Test> all = new ArrayList<>();
try {
for (Field f : DiffAlgorithms.class.getDeclaredFields()) {
if (f.getType() == Algorithm.class) {
f.setAccessible(true);
Algorithm alg = (Algorithm) f.get(this);
alg.name = f.getName();
if (included(alg.name, algorithms)) {
Test test = new Test();
test.algorithm = alg;
all.add(test);
}
}
}
} catch (IllegalArgumentException e) {
throw die("Cannot determine names", e); //$NON-NLS-1$
} catch (IllegalAccessException e) {
throw die("Cannot determine names", e); //$NON-NLS-1$
}
return all;
}
private static boolean included(String name, List<String> want) {
if (want.isEmpty())
return true;
for (String s : want) {
if (s.equalsIgnoreCase(name))
return true;
}
return false;
}
private static abstract class Algorithm {
String name;
abstract DiffAlgorithm create();
}
private static class Test {
Algorithm algorithm;
long runningTimeNanos;
Run minN;
Run maxN;
}
private static class Run {
int n;
long runningTimeNanos;
}
}