View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.util;
15  
16  /* ------------------------------------------------------------ */
17  /** UTF-8 StringBuilder.
18   *
19   * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
20   * UTF-8 encoded bytes, that are converted into characters.
21   * 
22   * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
23   * state a character is appended to the string buffer.
24   * 
25   * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
26   * The UTF-8 code was inspired by http://javolution.org
27   * 
28   */
29  public class Utf8StringBuilder 
30  {
31      StringBuilder _buffer;
32      int _more;
33      int _bits;
34      
35      public Utf8StringBuilder()
36      {
37          _buffer=new StringBuilder();
38      }
39      
40      public Utf8StringBuilder(int capacity)
41      {
42          _buffer=new StringBuilder(capacity);
43      }
44  
45      public void append(byte[] b,int offset, int length)
46      {
47          int end=offset+length;
48          for (int i=offset; i<end;i++)
49              append(b[i]);
50      }
51      
52      public void append(byte b)
53      {
54          if (b>=0)
55          {
56              if (_more>0)
57              {
58                  _buffer.append('?');
59                  _more=0;
60                  _bits=0;
61              }
62              else
63                  _buffer.append((char)(0x7f&b));
64          }
65          else if (_more==0)
66          {
67              if ((b&0xc0)!=0xc0)
68              {
69                  // 10xxxxxx
70                  _buffer.append('?');
71                  _more=0;
72                  _bits=0;
73              }
74              else
75              { 
76                  if ((b & 0xe0) == 0xc0)
77                  {
78                      //110xxxxx
79                      _more=1;
80                      _bits=b&0x1f;
81                  }
82                  else if ((b & 0xf0) == 0xe0)
83                  {
84                      //1110xxxx
85                      _more=2;
86                      _bits=b&0x0f;
87                  }
88                  else if ((b & 0xf8) == 0xf0)
89                  {
90                      //11110xxx
91                      _more=3;
92                      _bits=b&0x07;
93                  }
94                  else if ((b & 0xfc) == 0xf8)
95                  {
96                      //111110xx
97                      _more=4;
98                      _bits=b&0x03;
99                  }
100                 else if ((b & 0xfe) == 0xfc) 
101                 {
102                     //1111110x
103                     _more=5;
104                     _bits=b&0x01;
105                 }
106                 else
107                 {
108                     throw new IllegalArgumentException("!utf8");
109                 }
110             }
111         }
112         else
113         {
114             if ((b&0xc0)==0xc0)
115             {    // 11??????
116                 _buffer.append('?');
117                 _more=0;
118                 _bits=0;
119                 throw new IllegalArgumentException("!utf8");
120             }
121             else
122             {
123                 // 10xxxxxx
124                 _bits=(_bits<<6)|(b&0x3f);
125                 if (--_more==0)
126                 {
127                     // _buffer.append((char)_bits);
128                     _buffer.append(Character.toChars(_bits));
129                 }
130             }
131         }
132     }
133     
134     public int length()
135     {
136         return _buffer.length();
137     }
138     
139     public void reset()
140     {
141         _buffer.setLength(0);
142         _more=0;
143         _bits=0;
144     }
145     
146     public StringBuilder getStringBuilder()
147     {
148         if (_more!=0)
149             throw new IllegalStateException("!utf8");
150         return _buffer;
151     }
152     
153     @Override
154     public String toString()
155     {
156         if (_more!=0)
157             throw new IllegalStateException("!utf8");
158         return _buffer.toString();
159     }
160 }