View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.util;
20  
21  import java.io.UnsupportedEncodingException;
22  import java.nio.charset.StandardCharsets;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import org.eclipse.jetty.util.log.Log;
27  import org.eclipse.jetty.util.log.Logger;
28  
29  /** Fast String Utilities.
30   *
31   * These string utilities provide both convenience methods and
32   * performance improvements over most standard library versions. The
33   * main aim of the optimizations is to avoid object creation unless
34   * absolutely required.
35   *
36   * 
37   */
38  public class StringUtil
39  {
40      private static final Logger LOG = Log.getLogger(StringUtil.class);
41      
42      
43      private final static Trie<String> CHARSETS= new ArrayTrie<>(256);
44      
45      public static final String ALL_INTERFACES="0.0.0.0";
46      public static final String CRLF="\015\012";
47      
48      /** @deprecated use {@link System#lineSeparator()} instead */
49      @Deprecated
50      public static final String __LINE_SEPARATOR = System.lineSeparator();
51         
52      public static final String __ISO_8859_1="iso-8859-1";
53      public final static String __UTF8="utf-8";
54      public final static String __UTF16="utf-16";
55      
56      static
57      {
58          CHARSETS.put("utf-8",__UTF8);
59          CHARSETS.put("utf8",__UTF8);
60          CHARSETS.put("utf-16",__UTF16);
61          CHARSETS.put("utf16",__UTF16);
62          CHARSETS.put("iso-8859-1",__ISO_8859_1);
63          CHARSETS.put("iso_8859_1",__ISO_8859_1);
64      }
65      
66      /* ------------------------------------------------------------ */
67      /** Convert alternate charset names (eg utf8) to normalized
68       * name (eg UTF-8).
69       * @param s the charset to normalize
70       * @return the normalized charset (or null if normalized version not found)
71       */
72      public static String normalizeCharset(String s)
73      {
74          String n=CHARSETS.get(s);
75          return (n==null)?s:n;
76      }
77      
78      /* ------------------------------------------------------------ */
79      /** Convert alternate charset names (eg utf8) to normalized
80       * name (eg UTF-8).
81       * @param s the charset to normalize
82       * @param offset the offset in the charset
83       * @param length the length of the charset in the input param
84       * @return the normalized charset (or null if not found)
85       */
86      public static String normalizeCharset(String s,int offset,int length)
87      {
88          String n=CHARSETS.get(s,offset,length);       
89          return (n==null)?s.substring(offset,offset+length):n;
90      }
91      
92  
93      /* ------------------------------------------------------------ */
94      public static final char[] lowercases = {
95            '\000','\001','\002','\003','\004','\005','\006','\007',
96            '\010','\011','\012','\013','\014','\015','\016','\017',
97            '\020','\021','\022','\023','\024','\025','\026','\027',
98            '\030','\031','\032','\033','\034','\035','\036','\037',
99            '\040','\041','\042','\043','\044','\045','\046','\047',
100           '\050','\051','\052','\053','\054','\055','\056','\057',
101           '\060','\061','\062','\063','\064','\065','\066','\067',
102           '\070','\071','\072','\073','\074','\075','\076','\077',
103           '\100','\141','\142','\143','\144','\145','\146','\147',
104           '\150','\151','\152','\153','\154','\155','\156','\157',
105           '\160','\161','\162','\163','\164','\165','\166','\167',
106           '\170','\171','\172','\133','\134','\135','\136','\137',
107           '\140','\141','\142','\143','\144','\145','\146','\147',
108           '\150','\151','\152','\153','\154','\155','\156','\157',
109           '\160','\161','\162','\163','\164','\165','\166','\167',
110           '\170','\171','\172','\173','\174','\175','\176','\177' };
111 
112     /* ------------------------------------------------------------ */
113     /**
114      * fast lower case conversion. Only works on ascii (not unicode)
115      * @param s the string to convert
116      * @return a lower case version of s
117      */
118     public static String asciiToLowerCase(String s)
119     {
120         char[] c = null;
121         int i=s.length();
122 
123         // look for first conversion
124         while (i-->0)
125         {
126             char c1=s.charAt(i);
127             if (c1<=127)
128             {
129                 char c2=lowercases[c1];
130                 if (c1!=c2)
131                 {
132                     c=s.toCharArray();
133                     c[i]=c2;
134                     break;
135                 }
136             }
137         }
138 
139         while (i-->0)
140         {
141             if(c[i]<=127)
142                 c[i] = lowercases[c[i]];
143         }
144         
145         return c==null?s:new String(c);
146     }
147 
148 
149     /* ------------------------------------------------------------ */
150     public static boolean startsWithIgnoreCase(String s,String w)
151     {
152         if (w==null)
153             return true;
154         
155         if (s==null || s.length()<w.length())
156             return false;
157         
158         for (int i=0;i<w.length();i++)
159         {
160             char c1=s.charAt(i);
161             char c2=w.charAt(i);
162             if (c1!=c2)
163             {
164                 if (c1<=127)
165                     c1=lowercases[c1];
166                 if (c2<=127)
167                     c2=lowercases[c2];
168                 if (c1!=c2)
169                     return false;
170             }
171         }
172         return true;
173     }
174     
175     /* ------------------------------------------------------------ */
176     public static boolean endsWithIgnoreCase(String s,String w)
177     {
178         if (w==null)
179             return true;
180 
181         if (s==null)
182             return false;
183             
184         int sl=s.length();
185         int wl=w.length();
186         
187         if (sl<wl)
188             return false;
189         
190         for (int i=wl;i-->0;)
191         {
192             char c1=s.charAt(--sl);
193             char c2=w.charAt(i);
194             if (c1!=c2)
195             {
196                 if (c1<=127)
197                     c1=lowercases[c1];
198                 if (c2<=127)
199                     c2=lowercases[c2];
200                 if (c1!=c2)
201                     return false;
202             }
203         }
204         return true;
205     }
206     
207     /* ------------------------------------------------------------ */
208     /**
209      * returns the next index of a character from the chars string
210      * @param s the input string to search
211      * @param chars the chars to look for
212      * @return the index of the character in the input stream found.
213      */
214     public static int indexFrom(String s,String chars)
215     {
216         for (int i=0;i<s.length();i++)
217            if (chars.indexOf(s.charAt(i))>=0)
218               return i;
219         return -1;
220     }
221     
222     /* ------------------------------------------------------------ */
223     /**
224      * replace substrings within string.
225      * @param s the input string
226      * @param sub the string to look for
227      * @param with the string to replace with
228      * @return the now replaced string
229      */
230     public static String replace(String s, String sub, String with)
231     {
232         int c=0;
233         int i=s.indexOf(sub,c);
234         if (i == -1)
235             return s;
236     
237         StringBuilder buf = new StringBuilder(s.length()+with.length());
238 
239         do
240         {
241             buf.append(s.substring(c,i));
242             buf.append(with);
243             c=i+sub.length();
244         } while ((i=s.indexOf(sub,c))!=-1);
245 
246         if (c<s.length())
247             buf.append(s.substring(c,s.length()));
248 
249         return buf.toString();
250         
251     }
252 
253 
254     /* ------------------------------------------------------------ */
255     /** Remove single or double quotes.
256      * @param s the input string
257      * @return the string with quotes removed
258      */
259     public static String unquote(String s)
260     {
261         return QuotedStringTokenizer.unquote(s);
262     }
263 
264 
265     /* ------------------------------------------------------------ */
266     /** Append substring to StringBuilder 
267      * @param buf StringBuilder to append to
268      * @param s String to append from
269      * @param offset The offset of the substring
270      * @param length The length of the substring
271      */
272     public static void append(StringBuilder buf,
273                               String s,
274                               int offset,
275                               int length)
276     {
277         synchronized(buf)
278         {
279             int end=offset+length;
280             for (int i=offset; i<end;i++)
281             {
282                 if (i>=s.length())
283                     break;
284                 buf.append(s.charAt(i));
285             }
286         }
287     }
288 
289     
290     /* ------------------------------------------------------------ */
291     /**
292      * append hex digit
293      * @param buf the buffer to append to
294      * @param b the byte to append
295      * @param base the base of the hex output (almost always 16).
296      * 
297      */
298     public static void append(StringBuilder buf,byte b,int base)
299     {
300         int bi=0xff&b;
301         int c='0'+(bi/base)%base;
302         if (c>'9')
303             c= 'a'+(c-'0'-10);
304         buf.append((char)c);
305         c='0'+bi%base;
306         if (c>'9')
307             c= 'a'+(c-'0'-10);
308         buf.append((char)c);
309     }
310 
311     /* ------------------------------------------------------------ */
312     /**
313      * Append 2 digits (zero padded) to the StringBuffer
314      * 
315      * @param buf the buffer to append to
316      * @param i the value to append
317      */
318     public static void append2digits(StringBuffer buf,int i)
319     {
320         if (i<100)
321         {
322             buf.append((char)(i/10+'0'));
323             buf.append((char)(i%10+'0'));
324         }
325     }
326     
327     /* ------------------------------------------------------------ */
328     /**
329      * Append 2 digits (zero padded) to the StringBuilder
330      * 
331      * @param buf the buffer to append to
332      * @param i the value to append
333      */
334     public static void append2digits(StringBuilder buf,int i)
335     {
336         if (i<100)
337         {
338             buf.append((char)(i/10+'0'));
339             buf.append((char)(i%10+'0'));
340         }
341     }
342     
343     /* ------------------------------------------------------------ */
344     /** Return a non null string.
345      * @param s String
346      * @return The string passed in or empty string if it is null. 
347      */
348     public static String nonNull(String s)
349     {
350         if (s==null)
351             return "";
352         return s;
353     }
354     
355     /* ------------------------------------------------------------ */
356     public static boolean equals(String s,char[] buf, int offset, int length)
357     {
358         if (s.length()!=length)
359             return false;
360         for (int i=0;i<length;i++)
361             if (buf[offset+i]!=s.charAt(i))
362                 return false;
363         return true;
364     }
365 
366     /* ------------------------------------------------------------ */
367     public static String toUTF8String(byte[] b,int offset,int length)
368     {
369         return new String(b,offset,length,StandardCharsets.UTF_8);
370     }
371 
372     /* ------------------------------------------------------------ */
373     public static String toString(byte[] b,int offset,int length,String charset)
374     {
375         try
376         {
377             return new String(b,offset,length,charset);
378         }
379         catch (UnsupportedEncodingException e)
380         {
381             throw new IllegalArgumentException(e);
382         }
383     }
384 
385     /* ------------------------------------------------------------ */
386     /**
387      * Test if a string is null or only has whitespace characters in it.
388      * <p>
389      * Note: uses codepoint version of {@link Character#isWhitespace(int)} to support Unicode better.
390      * 
391      * <pre>
392      *   isBlank(null)   == true
393      *   isBlank("")     == true
394      *   isBlank("\r\n") == true
395      *   isBlank("\t")   == true
396      *   isBlank("   ")  == true
397      *   isBlank("a")    == false
398      *   isBlank(".")    == false
399      *   isBlank(";\n")  == false
400      * </pre>
401      * 
402      * @param str
403      *            the string to test.
404      * @return true if string is null or only whitespace characters, false if non-whitespace characters encountered.
405      */
406     public static boolean isBlank(String str)
407     {
408         if (str == null)
409         {
410             return true;
411         }
412         int len = str.length();
413         for (int i = 0; i < len; i++)
414         {
415             if (!Character.isWhitespace(str.codePointAt(i)))
416             {
417                 // found a non-whitespace, we can stop searching  now
418                 return false;
419             }
420         }
421         // only whitespace
422         return true;
423     }
424     
425     /* ------------------------------------------------------------ */
426     /**
427      * Test if a string is not null and contains at least 1 non-whitespace characters in it.
428      * <p>
429      * Note: uses codepoint version of {@link Character#isWhitespace(int)} to support Unicode better.
430      * 
431      * <pre>
432      *   isNotBlank(null)   == false
433      *   isNotBlank("")     == false
434      *   isNotBlank("\r\n") == false
435      *   isNotBlank("\t")   == false
436      *   isNotBlank("   ")  == false
437      *   isNotBlank("a")    == true
438      *   isNotBlank(".")    == true
439      *   isNotBlank(";\n")  == true
440      * </pre>
441      * 
442      * @param str
443      *            the string to test.
444      * @return true if string is not null and has at least 1 non-whitespace character, false if null or all-whitespace characters.
445      */
446     public static boolean isNotBlank(String str)
447     {
448         if (str == null)
449         {
450             return false;
451         }
452         int len = str.length();
453         for (int i = 0; i < len; i++)
454         {
455             if (!Character.isWhitespace(str.codePointAt(i)))
456             {
457                 // found a non-whitespace, we can stop searching  now
458                 return true;
459             }
460         }
461         // only whitespace
462         return false;
463     }
464 
465     /* ------------------------------------------------------------ */
466     public static boolean isUTF8(String charset)
467     {
468         return __UTF8.equalsIgnoreCase(charset)||__UTF8.equalsIgnoreCase(normalizeCharset(charset));
469     }
470 
471 
472     /* ------------------------------------------------------------ */
473     public static String printable(String name)
474     {
475         if (name==null)
476             return null;
477         StringBuilder buf = new StringBuilder(name.length());
478         for (int i=0;i<name.length();i++)
479         {
480             char c=name.charAt(i);
481             if (!Character.isISOControl(c))
482                 buf.append(c);
483         }
484         return buf.toString();
485     }
486     
487     /* ------------------------------------------------------------ */
488     public static String printable(byte[] b)
489     {
490         StringBuilder buf = new StringBuilder();
491         for (int i=0;i<b.length;i++)
492         {
493             char c=(char)b[i];
494             if (Character.isWhitespace(c)|| c>' ' && c<0x7f)
495                 buf.append(c);
496             else 
497             {
498                 buf.append("0x");
499                 TypeUtil.toHex(b[i],buf);
500             }
501         }
502         return buf.toString();
503     }
504     
505     public static byte[] getBytes(String s)
506     {
507         return s.getBytes(StandardCharsets.ISO_8859_1);
508     }
509     
510     public static byte[] getUtf8Bytes(String s)
511     {
512         return s.getBytes(StandardCharsets.UTF_8);
513     }
514     
515     public static byte[] getBytes(String s,String charset)
516     {
517         try
518         {
519             return s.getBytes(charset);
520         }
521         catch(Exception e)
522         {
523             LOG.warn(e);
524             return s.getBytes();
525         }
526     }
527     
528     
529     
530     /**
531      * Converts a binary SID to a string SID
532      * 
533      * http://en.wikipedia.org/wiki/Security_Identifier
534      * 
535      * S-1-IdentifierAuthority-SubAuthority1-SubAuthority2-...-SubAuthorityn
536      * @param sidBytes the SID bytes to build from
537      * @return the string SID
538      */
539     public static String sidBytesToString(byte[] sidBytes)
540     {
541         StringBuilder sidString = new StringBuilder();
542         
543         // Identify this as a SID
544         sidString.append("S-");
545         
546         // Add SID revision level (expect 1 but may change someday)
547         sidString.append(Byte.toString(sidBytes[0])).append('-');
548         
549         StringBuilder tmpBuilder = new StringBuilder();
550         
551         // crunch the six bytes of issuing authority value
552         for (int i = 2; i <= 7; ++i)
553         {
554             tmpBuilder.append(Integer.toHexString(sidBytes[i] & 0xFF));
555         }
556         
557         sidString.append(Long.parseLong(tmpBuilder.toString(), 16)); // '-' is in the subauth loop
558    
559         // the number of subAuthorities we need to attach
560         int subAuthorityCount = sidBytes[1];
561 
562         // attach each of the subAuthorities
563         for (int i = 0; i < subAuthorityCount; ++i)
564         {
565             int offset = i * 4;
566             tmpBuilder.setLength(0);
567             // these need to be zero padded hex and little endian
568             tmpBuilder.append(String.format("%02X%02X%02X%02X", 
569                     (sidBytes[11 + offset] & 0xFF),
570                     (sidBytes[10 + offset] & 0xFF),
571                     (sidBytes[9 + offset] & 0xFF),
572                     (sidBytes[8 + offset] & 0xFF)));  
573             sidString.append('-').append(Long.parseLong(tmpBuilder.toString(), 16));
574         }
575         
576         return sidString.toString();
577     }
578     
579     /**
580      * Converts a string SID to a binary SID
581      * 
582      * http://en.wikipedia.org/wiki/Security_Identifier
583      * 
584      * S-1-IdentifierAuthority-SubAuthority1-SubAuthority2-...-SubAuthorityn
585      * @param sidString the string SID
586      * @return the binary SID
587      */
588     public static byte[] sidStringToBytes( String sidString )
589     {
590         String[] sidTokens = sidString.split("-");
591         
592         int subAuthorityCount = sidTokens.length - 3; // S-Rev-IdAuth-
593         
594         int byteCount = 0;
595         byte[] sidBytes = new byte[1 + 1 + 6 + (4 * subAuthorityCount)];
596         
597         // the revision byte
598         sidBytes[byteCount++] = (byte)Integer.parseInt(sidTokens[1]);
599 
600         // the # of sub authorities byte
601         sidBytes[byteCount++] = (byte)subAuthorityCount;
602 
603         // the certAuthority
604         String hexStr = Long.toHexString(Long.parseLong(sidTokens[2]));
605         
606         while( hexStr.length() < 12) // pad to 12 characters
607         {
608             hexStr = "0" + hexStr;
609         }
610 
611         // place the certAuthority 6 bytes
612         for ( int i = 0 ; i < hexStr.length(); i = i + 2)
613         {
614             sidBytes[byteCount++] = (byte)Integer.parseInt(hexStr.substring(i, i + 2),16);
615         }
616                 
617         
618         for ( int i = 3; i < sidTokens.length ; ++i)
619         {
620             hexStr = Long.toHexString(Long.parseLong(sidTokens[i]));
621             
622             while( hexStr.length() < 8) // pad to 8 characters
623             {
624                 hexStr = "0" + hexStr;
625             }     
626             
627             // place the inverted sub authorities, 4 bytes each
628             for ( int j = hexStr.length(); j > 0; j = j - 2)
629             {          
630                 sidBytes[byteCount++] = (byte)Integer.parseInt(hexStr.substring(j-2, j),16);
631             }
632         }
633       
634         return sidBytes;
635     }
636     
637 
638     /**
639      * Convert String to an integer. Parses up to the first non-numeric character. If no number is found an IllegalArgumentException is thrown
640      * 
641      * @param string A String containing an integer.
642      * @param from The index to start parsing from
643      * @return an int
644      */
645     public static int toInt(String string,int from)
646     {
647         int val = 0;
648         boolean started = false;
649         boolean minus = false;
650 
651         for (int i = from; i < string.length(); i++)
652         {
653             char b = string.charAt(i);
654             if (b <= ' ')
655             {
656                 if (started)
657                     break;
658             }
659             else if (b >= '0' && b <= '9')
660             {
661                 val = val * 10 + (b - '0');
662                 started = true;
663             }
664             else if (b == '-' && !started)
665             {
666                 minus = true;
667             }
668             else
669                 break;
670         }
671 
672         if (started)
673             return minus?(-val):val;
674         throw new NumberFormatException(string);
675     }
676     
677     /**
678      * Convert String to an long. Parses up to the first non-numeric character. If no number is found an IllegalArgumentException is thrown
679      * 
680      * @param string
681      *            A String containing an integer.
682      * @return an int
683      */
684     public static long toLong(String string)
685     {
686         long val = 0;
687         boolean started = false;
688         boolean minus = false;
689 
690         for (int i = 0; i < string.length(); i++)
691         {
692             char b = string.charAt(i);
693             if (b <= ' ')
694             {
695                 if (started)
696                     break;
697             }
698             else if (b >= '0' && b <= '9')
699             {
700                 val = val * 10L + (b - '0');
701                 started = true;
702             }
703             else if (b == '-' && !started)
704             {
705                 minus = true;
706             }
707             else
708                 break;
709         }
710 
711         if (started)
712             return minus?(-val):val;
713         throw new NumberFormatException(string);
714     }
715     
716     /**
717      * Truncate a string to a max size.
718      * 
719      * @param str the string to possibly truncate
720      * @param maxSize the maximum size of the string
721      * @return the truncated string.  if <code>str</code> param is null, then the returned string will also be null.
722      */
723     public static String truncate(String str, int maxSize)
724     {
725         if (str == null)
726         {
727             return null;
728         }
729 
730         if (str.length() <= maxSize)
731         {
732             return str;
733         }
734 
735         return str.substring(0,maxSize);
736     }
737 
738     /**
739     * Parse the string representation of a list using {@link #csvSplit(List,String,int,int)}
740     * @param s The string to parse, expected to be enclosed as '[...]'
741     * @return An array of parsed values.
742     */
743     public static String[] arrayFromString(String s) 
744     {
745         if (s==null)
746             return new String[]{};
747 
748         if (!s.startsWith("[") || !s.endsWith("]"))
749             throw new IllegalArgumentException();
750         if (s.length()==2)
751             return new String[]{};
752 
753         return csvSplit(s,1,s.length()-2);
754     }
755     
756     /**
757     * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
758     * @param s The string to parse
759     * @return An array of parsed values.
760     */
761     public static String[] csvSplit(String s)
762     {
763         if (s==null)
764             return null;
765         return csvSplit(s,0,s.length());
766     }
767     
768     /**
769      * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
770      * @param s The string to parse
771      * @param off The offset into the string to start parsing
772      * @param len The len in characters to parse
773      * @return An array of parsed values.
774      */
775     public static String[] csvSplit(String s, int off,int len)
776     {
777         if (s==null)
778             return null;
779         if (off<0 || len<0 || off>s.length())
780             throw new IllegalArgumentException();
781 
782         List<String> list = new ArrayList<>();
783         csvSplit(list,s,off,len);
784         return list.toArray(new String[list.size()]);
785     }
786 
787     enum CsvSplitState { PRE_DATA, QUOTE, SLOSH, DATA, WHITE, POST_DATA };
788 
789     /** Split a quoted comma separated string to a list
790      * <p>Handle <a href="https://www.ietf.org/rfc/rfc4180.txt">rfc4180</a>-like 
791      * CSV strings, with the exceptions:<ul>
792      * <li>quoted values may contain double quotes escaped with back-slash
793      * <li>Non-quoted values are trimmed of leading trailing white space
794      * <li>trailing commas are ignored
795      * <li>double commas result in a empty string value
796      * </ul>  
797      * @param list The Collection to split to (or null to get a new list)
798      * @param s The string to parse
799      * @param off The offset into the string to start parsing
800      * @param len The len in characters to parse
801      * @return list containing the parsed list values
802      */
803     public static List<String> csvSplit(List<String> list,String s, int off,int len)
804     {
805         if (list==null)
806             list=new ArrayList<>();
807         CsvSplitState state = CsvSplitState.PRE_DATA;
808         StringBuilder out = new StringBuilder();
809         int last=-1;
810         while (len>0)
811         {
812             char ch = s.charAt(off++);
813             len--;
814             
815             switch(state)
816             {
817                 case PRE_DATA:
818                     if (Character.isWhitespace(ch))
819                         continue;
820 
821                     if ('"'==ch)
822                     {
823                         state=CsvSplitState.QUOTE;
824                         continue;
825                     }
826                     
827                     if (','==ch)
828                     {
829                         list.add("");
830                         continue;
831                     }
832 
833                     state=CsvSplitState.DATA;
834                     out.append(ch);
835                     continue;
836 
837                 case DATA:
838                     if (Character.isWhitespace(ch))
839                     {
840                         last=out.length();
841                         out.append(ch);
842                         state=CsvSplitState.WHITE;
843                         continue;
844                     }
845                     
846                     if (','==ch)
847                     {
848                         list.add(out.toString());
849                         out.setLength(0);
850                         state=CsvSplitState.PRE_DATA;
851                         continue;
852                     }
853 
854                     out.append(ch);
855                     continue;
856                     
857                 case WHITE:
858                     if (Character.isWhitespace(ch))
859                     {
860                         out.append(ch);
861                         continue;
862                     }
863                     
864                     if (','==ch)
865                     {
866                         out.setLength(last);
867                         list.add(out.toString());
868                         out.setLength(0);
869                         state=CsvSplitState.PRE_DATA;
870                         continue;
871                     }
872                     
873                     state=CsvSplitState.DATA;
874                     out.append(ch);
875                     last=-1;
876                     continue;
877 
878                 case QUOTE:
879                     if ('\\'==ch)
880                     {
881                         state=CsvSplitState.SLOSH;
882                         continue;
883                     }
884                     if ('"'==ch)
885                     {
886                         list.add(out.toString());
887                         out.setLength(0);
888                         state=CsvSplitState.POST_DATA;
889                         continue;
890                     }
891                     out.append(ch);
892                     continue;
893                     
894                 case SLOSH:
895                     out.append(ch);
896                     state=CsvSplitState.QUOTE;
897                     continue;
898                     
899                 case POST_DATA:
900                     if (','==ch)
901                     {
902                         state=CsvSplitState.PRE_DATA;
903                         continue;
904                     }
905                     continue;
906             }
907         }
908 
909         switch(state)
910         {
911             case PRE_DATA:
912             case POST_DATA:
913                 break;
914 
915             case DATA:
916             case QUOTE:
917             case SLOSH:
918                 list.add(out.toString());
919                 break;
920                 
921             case WHITE:
922                 out.setLength(last);
923                 list.add(out.toString());
924                 break;
925         }
926         
927         return list;
928     }
929 
930     public static String sanitizeXmlString(String html)
931     {
932         if (html==null)
933             return null;
934         
935         int i=0;
936         
937         // Are there any characters that need sanitizing?
938         loop: for (;i<html.length();i++)
939         {
940             char c=html.charAt(i);
941 
942             switch(c)
943             {
944                 case '&' :
945                 case '<' :
946                 case '>' :
947                 case '\'':
948                 case '"':
949                     break loop;
950 
951                 default:
952                     if (Character.isISOControl(c) && !Character.isWhitespace(c))
953                         break loop;
954             }
955         }
956 
957         // No characters need sanitizing, so return original string
958         if (i==html.length())
959             return html;
960         
961         // Create builder with OK content so far 
962         StringBuilder out = new StringBuilder(html.length()*4/3);
963         out.append(html,0,i);
964         
965         // sanitize remaining content
966         for (;i<html.length();i++)
967         {
968             char c=html.charAt(i);
969 
970             switch(c)
971             {
972                 case '&' :
973                     out.append("&amp;");
974                     break;
975                 case '<' :
976                     out.append("&lt;");
977                     break;
978                 case '>' :
979                     out.append("&gt;");
980                     break;
981                 case '\'':
982                     out.append("&apos;");
983                     break;
984                 case '"':
985                     out.append("&quot;");
986                     break;
987 
988                 default:
989                     if (Character.isISOControl(c) && !Character.isWhitespace(c))
990                         out.append('?');
991                     else
992                         out.append(c);
993             }
994         }
995         return out.toString();
996     }
997     
998     /* ------------------------------------------------------------ */
999     /** The String value of an Object
1000      * <p>This method calls {@link String#valueOf(Object)} unless the object is null,
1001      * in which case null is returned</p>
1002      * @param object The object
1003      * @return String value or null
1004      */
1005     public static String valueOf(Object object)
1006     {
1007         return object==null?null:String.valueOf(object);
1008     }
1009 
1010 }