View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.websocket.api.util;
20  
21  import java.io.IOException;
22  import java.util.Arrays;
23  import java.util.Iterator;
24  import java.util.NoSuchElementException;
25  
26  /**
27   * Provide some consistent Http header value and Extension configuration parameter quoting support.
28   * <p>
29   * While QuotedStringTokenizer exists in jetty-util, and works great with http header values, using it in websocket-api is undesired.
30   * <p>
31   * <ul>
32   * <li>Using QuotedStringTokenizer would introduce a dependency to jetty-util that would need to be exposed via the WebAppContext classloader</li>
33   * <li>ABNF defined extension parameter parsing requirements of RFC-6455 (WebSocket) ABNF, is slightly different than the ABNF parsing defined in RFC-2616
34   * (HTTP/1.1).</li>
35   * <li>Future HTTPbis ABNF changes for parsing will impact QuotedStringTokenizer</li>
36   * </ul>
37   * It was decided to keep this implementation separate for the above reasons.
38   */
39  public class QuoteUtil
40  {
41      private static class DeQuotingStringIterator implements Iterator<String>
42      {
43          private enum State
44          {
45              START,
46              TOKEN,
47              QUOTE_SINGLE,
48              QUOTE_DOUBLE
49          }
50  
51          private static final boolean DEBUG = false;
52  
53          private final String input;
54          private final String delims;
55          private StringBuilder token;
56          private boolean hasToken = false;
57          private int i = 0;
58  
59          public DeQuotingStringIterator(String input, String delims)
60          {
61              this.input = input;
62              this.delims = delims;
63              int len = input.length();
64              token = new StringBuilder(len > 1024?512:len / 2);
65          }
66  
67          private void appendToken(char c)
68          {
69              if (hasToken)
70              {
71                  token.append(c);
72              }
73              else
74              {
75                  if (Character.isWhitespace(c))
76                  {
77                      return; // skip whitespace at start of token.
78                  }
79                  else
80                  {
81                      token.append(c);
82                      hasToken = true;
83                  }
84              }
85          }
86  
87          private void debug(String format, Object... args)
88          {
89              if (DEBUG)
90              {
91                  System.out.printf(format,args);
92              }
93          }
94  
95          @Override
96          public boolean hasNext()
97          {
98              // already found a token
99              if (hasToken)
100             {
101                 return true;
102             }
103 
104             State state = State.START;
105             boolean escape = false;
106             int inputLen = input.length();
107 
108             while (i < inputLen)
109             {
110                 char c = input.charAt(i++);
111 
112                 switch (state)
113                 {
114                     case START:
115                     {
116                         if (c == '\'')
117                         {
118                             state = State.QUOTE_SINGLE;
119                             appendToken(c);
120                         }
121                         else if (c == '\"')
122                         {
123                             state = State.QUOTE_DOUBLE;
124                             appendToken(c);
125                         }
126                         else
127                         {
128                             appendToken(c);
129                             state = State.TOKEN;
130                         }
131                         break;
132                     }
133                     case TOKEN:
134                     {
135                         if (delims.indexOf(c) >= 0)
136                         {
137                             debug("hasNext/t: %b [%s]%n",hasToken,token);
138                             return hasToken;
139                         }
140                         else if (c == '\'')
141                         {
142                             state = State.QUOTE_SINGLE;
143                         }
144                         else if (c == '\"')
145                         {
146                             state = State.QUOTE_DOUBLE;
147                         }
148                         appendToken(c);
149                         break;
150                     }
151                     case QUOTE_SINGLE:
152                     {
153                         if (escape)
154                         {
155                             escape = false;
156                             appendToken(c);
157                         }
158                         else if (c == '\'')
159                         {
160                             appendToken(c);
161                             state = State.TOKEN;
162                         }
163                         else if (c == '\\')
164                         {
165                             escape = true;
166                         }
167                         else
168                         {
169                             appendToken(c);
170                         }
171                         break;
172                     }
173                     case QUOTE_DOUBLE:
174                     {
175                         if (escape)
176                         {
177                             escape = false;
178                             appendToken(c);
179                         }
180                         else if (c == '\"')
181                         {
182                             appendToken(c);
183                             state = State.TOKEN;
184                         }
185                         else if (c == '\\')
186                         {
187                             escape = true;
188                         }
189                         else
190                         {
191                             appendToken(c);
192                         }
193                         break;
194                     }
195                 }
196                 debug("%s <%s> : [%s]%n",state,c,token);
197             }
198 
199             debug("hasNext/e: %b [%s]%n",hasToken,token);
200             return hasToken;
201         }
202 
203         @Override
204         public String next()
205         {
206             if (!hasNext())
207             {
208                 throw new NoSuchElementException();
209             }
210             String ret = token.toString();
211             token.setLength(0);
212             hasToken = false;
213             return QuoteUtil.dequote(ret.trim());
214         }
215 
216         @Override
217         public void remove()
218         {
219             throw new UnsupportedOperationException("Remove not supported with this iterator");
220         }
221     }
222 
223     /**
224      * ABNF from RFC 2616, RFC 822, and RFC 6455 specified characters requiring quoting.
225      */
226     public static final String ABNF_REQUIRED_QUOTING = "\"'\\\n\r\t\f\b%+ ;=";
227 
228     private static final char UNICODE_TAG = 0xFFFF;
229     private static final char[] escapes = new char[32];
230 
231     static
232     {
233         Arrays.fill(escapes,UNICODE_TAG);
234         // non-unicode
235         escapes['\b'] = 'b';
236         escapes['\t'] = 't';
237         escapes['\n'] = 'n';
238         escapes['\f'] = 'f';
239         escapes['\r'] = 'r';
240     }
241 
242     private static int dehex(byte b)
243     {
244         if ((b >= '0') && (b <= '9'))
245         {
246             return (byte)(b - '0');
247         }
248         if ((b >= 'a') && (b <= 'f'))
249         {
250             return (byte)((b - 'a') + 10);
251         }
252         if ((b >= 'A') && (b <= 'F'))
253         {
254             return (byte)((b - 'A') + 10);
255         }
256         throw new IllegalArgumentException("!hex:" + Integer.toHexString(0xff & b));
257     }
258 
259     /**
260      * Remove quotes from a string, only if the input string start with and end with the same quote character.
261      * 
262      * @param str
263      *            the string to remove surrounding quotes from
264      * @return the de-quoted string
265      */
266     public static String dequote(String str)
267     {
268         char start = str.charAt(0);
269         if ((start == '\'') || (start == '\"'))
270         {
271             // possibly quoted
272             char end = str.charAt(str.length() - 1);
273             if (start == end)
274             {
275                 // dequote
276                 return str.substring(1,str.length() - 1);
277             }
278         }
279         return str;
280     }
281 
282     public static void escape(StringBuilder buf, String str)
283     {
284         for (char c : str.toCharArray())
285         {
286             if (c >= 32)
287             {
288                 // non special character
289                 if ((c == '"') || (c == '\\'))
290                 {
291                     buf.append('\\');
292                 }
293                 buf.append(c);
294             }
295             else
296             {
297                 // special characters, requiring escaping
298                 char escaped = escapes[c];
299 
300                 // is this a unicode escape?
301                 if (escaped == UNICODE_TAG)
302                 {
303                     buf.append("\\u00");
304                     if (c < 0x10)
305                     {
306                         buf.append('0');
307                     }
308                     buf.append(Integer.toString(c,16)); // hex
309                 }
310                 else
311                 {
312                     // normal escape
313                     buf.append('\\').append(escaped);
314                 }
315             }
316         }
317     }
318 
319     /**
320      * Simple quote of a string, escaping where needed.
321      * 
322      * @param buf
323      *            the StringBuilder to append to
324      * @param str
325      *            the string to quote
326      */
327     public static void quote(StringBuilder buf, String str)
328     {
329         buf.append('"');
330         escape(buf,str);
331         buf.append('"');
332     }
333 
334     /**
335      * Append into buf the provided string, adding quotes if needed.
336      * <p>
337      * Quoting is determined if any of the characters in the <code>delim</code> are found in the input <code>str</code>.
338      * 
339      * @param buf
340      *            the buffer to append to
341      * @param str
342      *            the string to possibly quote
343      * @param delim
344      *            the delimiter characters that will trigger automatic quoting
345      * @throws IOException
346      */
347     public static void quoteIfNeeded(StringBuilder buf, String str, String delim)
348     {
349         // check for delimiters in input string
350         int len = str.length();
351         int ch;
352         for (int i = 0; i < len; i++)
353         {
354             ch = str.codePointAt(i);
355             if (delim.indexOf(ch) >= 0)
356             {
357                 // found a delimiter codepoint. we need to quote it.
358                 quote(buf,str);
359                 return;
360             }
361         }
362 
363         // no special delimiters used, no quote needed.
364         buf.append(str);
365     }
366 
367     /**
368      * Create an iterator of the input string, breaking apart the string at the provided delimiters, removing quotes and triming the parts of the string as
369      * needed.
370      * 
371      * @param str
372      *            the input string to split apart
373      * @param delims
374      *            the delimiter characters to split the string on
375      * @return the iterator of the parts of the string, trimmed, with quotes around the string part removed, and unescaped
376      */
377     public static Iterator<String> splitAt(String str, String delims)
378     {
379         return new DeQuotingStringIterator(str.trim(),delims);
380     }
381 
382     public static String unescape(String str)
383     {
384         if (str == null)
385         {
386             // nothing there
387             return null;
388         }
389 
390         int len = str.length();
391         if (len <= 1)
392         {
393             // impossible to be escaped
394             return str;
395         }
396 
397         StringBuilder ret = new StringBuilder(len - 2);
398         boolean escaped = false;
399         char c;
400         for (int i = 0; i < len; i++)
401         {
402             c = str.charAt(i);
403             if (escaped)
404             {
405                 escaped = false;
406                 switch (c)
407                 {
408                     case 'n':
409                         ret.append('\n');
410                         break;
411                     case 'r':
412                         ret.append('\r');
413                         break;
414                     case 't':
415                         ret.append('\t');
416                         break;
417                     case 'f':
418                         ret.append('\f');
419                         break;
420                     case 'b':
421                         ret.append('\b');
422                         break;
423                     case '\\':
424                         ret.append('\\');
425                         break;
426                     case '/':
427                         ret.append('/');
428                         break;
429                     case '"':
430                         ret.append('"');
431                         break;
432                     case 'u':
433                         ret.append((char)((dehex((byte)str.charAt(i++)) << 24) + (dehex((byte)str.charAt(i++)) << 16) + (dehex((byte)str.charAt(i++)) << 8) + (dehex((byte)str
434                                 .charAt(i++)))));
435                         break;
436                     default:
437                         ret.append(c);
438                 }
439             }
440             else if (c == '\\')
441             {
442                 escaped = true;
443             }
444             else
445             {
446                 ret.append(c);
447             }
448         }
449         return ret.toString();
450     }
451 }