QuotedString.java
/*
* Copyright (C) 2008, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.util.Arrays;
import org.eclipse.jgit.lib.Constants;
/**
* Utility functions related to quoted string handling.
*/
public abstract class QuotedString {
/** Quoting style that obeys the rules Git applies to file names */
public static final GitPathStyle GIT_PATH = new GitPathStyle();
/**
* Quoting style used by the Bourne shell.
* <p>
* Quotes are unconditionally inserted during {@link #quote(String)}. This
* protects shell meta-characters like <code>$</code> or <code>~</code> from
* being recognized as special.
*/
public static final BourneStyle BOURNE = new BourneStyle();
/** Bourne style, but permits <code>~user</code> at the start of the string. */
public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
/**
* Quote an input string by the quoting rules.
* <p>
* If the input string does not require any quoting, the same String
* reference is returned to the caller.
* <p>
* Otherwise a quoted string is returned, including the opening and closing
* quotation marks at the start and end of the string. If the style does not
* permit raw Unicode characters then the string will first be encoded in
* UTF-8, with unprintable sequences possibly escaped by the rules.
*
* @param in
* any non-null Unicode string.
* @return a quoted string. See above for details.
*/
public abstract String quote(String in);
/**
* Clean a previously quoted input, decoding the result via UTF-8.
* <p>
* This method must match quote such that:
*
* <pre>
* a.equals(dequote(quote(a)));
* </pre>
*
* is true for any <code>a</code>.
*
* @param in
* a Unicode string to remove quoting from.
* @return the cleaned string.
* @see #dequote(byte[], int, int)
*/
public String dequote(String in) {
final byte[] b = Constants.encode(in);
return dequote(b, 0, b.length);
}
/**
* Decode a previously quoted input, scanning a UTF-8 encoded buffer.
* <p>
* This method must match quote such that:
*
* <pre>
* a.equals(dequote(Constants.encode(quote(a))));
* </pre>
*
* is true for any <code>a</code>.
* <p>
* This method removes any opening/closing quotation marks added by
* {@link #quote(String)}.
*
* @param in
* the input buffer to parse.
* @param offset
* first position within <code>in</code> to scan.
* @param end
* one position past in <code>in</code> to scan.
* @return the cleaned string.
*/
public abstract String dequote(byte[] in, int offset, int end);
/**
* Quoting style used by the Bourne shell.
* <p>
* Quotes are unconditionally inserted during {@link #quote(String)}. This
* protects shell meta-characters like <code>$</code> or <code>~</code> from
* being recognized as special.
*/
public static class BourneStyle extends QuotedString {
@Override
public String quote(String in) {
final StringBuilder r = new StringBuilder();
r.append('\'');
int start = 0, i = 0;
for (; i < in.length(); i++) {
switch (in.charAt(i)) {
case '\'':
case '!':
r.append(in, start, i);
r.append('\'');
r.append('\\');
r.append(in.charAt(i));
r.append('\'');
start = i + 1;
break;
}
}
r.append(in, start, i);
r.append('\'');
return r.toString();
}
@Override
public String dequote(byte[] in, int ip, int ie) {
boolean inquote = false;
final byte[] r = new byte[ie - ip];
int rPtr = 0;
while (ip < ie) {
final byte b = in[ip++];
switch (b) {
case '\'':
inquote = !inquote;
continue;
case '\\':
if (inquote || ip == ie)
r[rPtr++] = b; // literal within a quote
else
r[rPtr++] = in[ip++];
continue;
default:
r[rPtr++] = b;
continue;
}
}
return RawParseUtils.decode(UTF_8, r, 0, rPtr);
}
}
/** Bourne style, but permits <code>~user</code> at the start of the string. */
public static class BourneUserPathStyle extends BourneStyle {
@Override
public String quote(String in) {
if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
// If the string is just "~user" we can assume they
// mean "~user/".
//
return in + "/"; //$NON-NLS-1$
}
if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
// If the string is of "~/path" or "~user/path"
// we must not escape ~/ or ~user/ from the shell.
//
final int i = in.indexOf('/') + 1;
if (i == in.length())
return in;
return in.substring(0, i) + super.quote(in.substring(i));
}
return super.quote(in);
}
}
/** Quoting style that obeys the rules Git applies to file names */
public static final class GitPathStyle extends QuotedString {
private static final byte[] quote;
static {
quote = new byte[128];
Arrays.fill(quote, (byte) -1);
for (int i = '0'; i <= '9'; i++)
quote[i] = 0;
for (int i = 'a'; i <= 'z'; i++)
quote[i] = 0;
for (int i = 'A'; i <= 'Z'; i++)
quote[i] = 0;
quote[' '] = 0;
quote['$'] = 0;
quote['%'] = 0;
quote['&'] = 0;
quote['*'] = 0;
quote['+'] = 0;
quote[','] = 0;
quote['-'] = 0;
quote['.'] = 0;
quote['/'] = 0;
quote[':'] = 0;
quote[';'] = 0;
quote['='] = 0;
quote['?'] = 0;
quote['@'] = 0;
quote['_'] = 0;
quote['^'] = 0;
quote['|'] = 0;
quote['~'] = 0;
quote['\u0007'] = 'a';
quote['\b'] = 'b';
quote['\f'] = 'f';
quote['\n'] = 'n';
quote['\r'] = 'r';
quote['\t'] = 't';
quote['\u000B'] = 'v';
quote['\\'] = '\\';
quote['"'] = '"';
}
@Override
public String quote(String instr) {
if (instr.length() == 0)
return "\"\""; //$NON-NLS-1$
boolean reuse = true;
final byte[] in = Constants.encode(instr);
final StringBuilder r = new StringBuilder(2 + in.length);
r.append('"');
for (int i = 0; i < in.length; i++) {
final int c = in[i] & 0xff;
if (c < quote.length) {
final byte style = quote[c];
if (style == 0) {
r.append((char) c);
continue;
}
if (style > 0) {
reuse = false;
r.append('\\');
r.append((char) style);
continue;
}
}
reuse = false;
r.append('\\');
r.append((char) (((c >> 6) & 03) + '0'));
r.append((char) (((c >> 3) & 07) + '0'));
r.append((char) (((c >> 0) & 07) + '0'));
}
if (reuse)
return instr;
r.append('"');
return r.toString();
}
@Override
public String dequote(byte[] in, int inPtr, int inEnd) {
if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
return dq(in, inPtr + 1, inEnd - 1);
return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
}
private static String dq(byte[] in, int inPtr, int inEnd) {
final byte[] r = new byte[inEnd - inPtr];
int rPtr = 0;
while (inPtr < inEnd) {
final byte b = in[inPtr++];
if (b != '\\') {
r[rPtr++] = b;
continue;
}
if (inPtr == inEnd) {
// Lone trailing backslash. Treat it as a literal.
//
r[rPtr++] = '\\';
break;
}
switch (in[inPtr++]) {
case 'a':
r[rPtr++] = 0x07 /* \a = BEL */;
continue;
case 'b':
r[rPtr++] = '\b';
continue;
case 'f':
r[rPtr++] = '\f';
continue;
case 'n':
r[rPtr++] = '\n';
continue;
case 'r':
r[rPtr++] = '\r';
continue;
case 't':
r[rPtr++] = '\t';
continue;
case 'v':
r[rPtr++] = 0x0B/* \v = VT */;
continue;
case '\\':
case '"':
r[rPtr++] = in[inPtr - 1];
continue;
case '0':
case '1':
case '2':
case '3': {
int cp = in[inPtr - 1] - '0';
for (int n = 1; n < 3 && inPtr < inEnd; n++) {
final byte c = in[inPtr];
if ('0' <= c && c <= '7') {
cp <<= 3;
cp |= c - '0';
inPtr++;
} else {
break;
}
}
r[rPtr++] = (byte) cp;
continue;
}
default:
// Any other code is taken literally.
//
r[rPtr++] = '\\';
r[rPtr++] = in[inPtr - 1];
continue;
}
}
return RawParseUtils.decode(UTF_8, r, 0, rPtr);
}
private GitPathStyle() {
// Singleton
}
}
}