1 // ========================================================================
2 // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3 // ------------------------------------------------------------------------
4 // All rights reserved. This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v1.0
6 // and Apache License v2.0 which accompanies this distribution.
7 // The Eclipse Public License is available at
8 // http://www.eclipse.org/legal/epl-v10.html
9 // The Apache License v2.0 is available at
10 // http://www.opensource.org/licenses/apache2.0.php
11 // You may elect to redistribute this code under either of these licenses.
12 // ========================================================================
13
14 package org.eclipse.jetty.util;
15
16 /* ------------------------------------------------------------ */
17 /** UTF-8 StringBuilder.
18 *
19 * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
20 * UTF-8 encoded bytes, that are converted into characters.
21 *
22 * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
23 * state a character is appended to the string buffer.
24 *
25 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
26 * The UTF-8 code was inspired by http://javolution.org
27 *
28 */
29 public class Utf8StringBuilder
30 {
31 StringBuilder _buffer;
32 int _more;
33 int _bits;
34
35 public Utf8StringBuilder()
36 {
37 _buffer=new StringBuilder();
38 }
39
40 public Utf8StringBuilder(int capacity)
41 {
42 _buffer=new StringBuilder(capacity);
43 }
44
45 public void append(byte[] b,int offset, int length)
46 {
47 int end=offset+length;
48 for (int i=offset; i<end;i++)
49 append(b[i]);
50 }
51
52 public void append(byte b)
53 {
54 if (b>=0)
55 {
56 if (_more>0)
57 {
58 _buffer.append('?');
59 _more=0;
60 _bits=0;
61 }
62 else
63 _buffer.append((char)(0x7f&b));
64 }
65 else if (_more==0)
66 {
67 if ((b&0xc0)!=0xc0)
68 {
69 // 10xxxxxx
70 _buffer.append('?');
71 _more=0;
72 _bits=0;
73 }
74 else
75
76 {
77 if ((b & 0xe0) == 0xc0)
78 {
79 //110xxxxx
80 _more=1;
81 _bits=b&0x1f;
82 }
83 else if ((b & 0xf0) == 0xe0)
84 {
85 //1110xxxx
86 _more=2;
87 _bits=b&0x0f;
88 }
89 else if ((b & 0xf8) == 0xf0)
90 {
91 //11110xxx
92 _more=3;
93 _bits=b&0x07;
94 }
95 else if ((b & 0xfc) == 0xf8)
96 {
97 //111110xx
98 _more=4;
99 _bits=b&0x03;
100 }
101 else if ((b & 0xfe) == 0xfc)
102 {
103 //1111110x
104 _more=5;
105 _bits=b&0x01;
106 }
107 else
108 {
109 throw new IllegalArgumentException();
110 }
111
112 if (_bits==0)
113 throw new IllegalArgumentException("non-shortest UTF-8 form");
114 }
115 }
116 else
117 {
118 if ((b&0xc0)==0xc0)
119 { // 11??????
120 _buffer.append('?');
121 _more=0;
122 _bits=0;
123 throw new IllegalArgumentException();
124 }
125 else
126 {
127 // 10xxxxxx
128 _bits=(_bits<<6)|(b&0x3f);
129 if (--_more==0)
130 _buffer.append((char)_bits);
131 }
132 }
133 }
134
135 public int length()
136 {
137 return _buffer.length();
138 }
139
140 public void reset()
141 {
142 _buffer.setLength(0);
143 _more=0;
144 _bits=0;
145 }
146
147 public StringBuilder getStringBuilder()
148 {
149 if (_more!=0)
150 throw new IllegalStateException();
151 return _buffer;
152 }
153
154 public String toString()
155 {
156 if (_more!=0)
157 throw new IllegalStateException();
158 return _buffer.toString();
159 }
160 }