1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.util;
20
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.eclipse.jetty.util.log.Log;
25 import org.eclipse.jetty.util.log.Logger;
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 public abstract class Utf8Appendable
52 {
53 protected static final Logger LOG = Log.getLogger(Utf8Appendable.class);
54 public static final char REPLACEMENT = '\ufffd';
55 public static final byte[] REPLACEMENT_UTF8 = new byte[] {(byte)0xEF,(byte)0xBF,(byte)0xBD };
56 private static final int UTF8_ACCEPT = 0;
57 private static final int UTF8_REJECT = 12;
58
59 protected final Appendable _appendable;
60 protected int _state = UTF8_ACCEPT;
61
62 private static final byte[] BYTE_TABLE =
63 {
64
65
66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
68 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
69 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
70 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
71 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
72 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
73 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
74 };
75
76 private static final byte[] TRANS_TABLE =
77 {
78
79
80 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
81 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
82 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
83 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
84 12,36,12,12,12,12,12,12,12,12,12,12
85 };
86
87 private int _codep;
88
89 public Utf8Appendable(Appendable appendable)
90 {
91 _appendable = appendable;
92 }
93
94 public abstract int length();
95
96 protected void reset()
97 {
98 _state = UTF8_ACCEPT;
99 }
100
101
102 private void checkCharAppend() throws IOException
103 {
104 if (_state != UTF8_ACCEPT)
105 {
106 _appendable.append(REPLACEMENT);
107 int state=_state;
108 _state=UTF8_ACCEPT;
109 throw new NotUtf8Exception("char appended in state "+state);
110 }
111 }
112
113 public void append(char c)
114 {
115 try
116 {
117 checkCharAppend();
118 _appendable.append(c);
119 }
120 catch (IOException e)
121 {
122 throw new RuntimeException(e);
123 }
124 }
125
126 public void append(String s)
127 {
128 try
129 {
130 checkCharAppend();
131 _appendable.append(s);
132 }
133 catch (IOException e)
134 {
135 throw new RuntimeException(e);
136 }
137 }
138
139 public void append(String s,int offset,int length)
140 {
141 try
142 {
143 checkCharAppend();
144 _appendable.append(s,offset,offset+length);
145 }
146 catch (IOException e)
147 {
148 throw new RuntimeException(e);
149 }
150 }
151
152
153 public void append(byte b)
154 {
155 try
156 {
157 appendByte(b);
158 }
159 catch (IOException e)
160 {
161 throw new RuntimeException(e);
162 }
163 }
164
165 public void append(ByteBuffer buf)
166 {
167 try
168 {
169 while (buf.remaining() > 0)
170 {
171 appendByte(buf.get());
172 }
173 }
174 catch (IOException e)
175 {
176 throw new RuntimeException(e);
177 }
178 }
179
180 public void append(byte[] b, int offset, int length)
181 {
182 try
183 {
184 int end = offset + length;
185 for (int i = offset; i < end; i++)
186 appendByte(b[i]);
187 }
188 catch (IOException e)
189 {
190 throw new RuntimeException(e);
191 }
192 }
193
194 public boolean append(byte[] b, int offset, int length, int maxChars)
195 {
196 try
197 {
198 int end = offset + length;
199 for (int i = offset; i < end; i++)
200 {
201 if (length() > maxChars)
202 return false;
203 appendByte(b[i]);
204 }
205 return true;
206 }
207 catch (IOException e)
208 {
209 throw new RuntimeException(e);
210 }
211 }
212
213 protected void appendByte(byte b) throws IOException
214 {
215
216 if (b > 0 && _state == UTF8_ACCEPT)
217 {
218 _appendable.append((char)(b & 0xFF));
219 }
220 else
221 {
222 int i = b & 0xFF;
223 int type = BYTE_TABLE[i];
224 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
225 int next = TRANS_TABLE[_state + type];
226
227 switch(next)
228 {
229 case UTF8_ACCEPT:
230 _state=next;
231 if (_codep < Character.MIN_HIGH_SURROGATE)
232 {
233 _appendable.append((char)_codep);
234 }
235 else
236 {
237 for (char c : Character.toChars(_codep))
238 _appendable.append(c);
239 }
240 break;
241
242 case UTF8_REJECT:
243 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
244 _codep=0;
245 _state = UTF8_ACCEPT;
246 _appendable.append(REPLACEMENT);
247 throw new NotUtf8Exception(reason);
248
249 default:
250 _state=next;
251
252 }
253 }
254 }
255
256 public boolean isUtf8SequenceComplete()
257 {
258 return _state == UTF8_ACCEPT;
259 }
260
261 @SuppressWarnings("serial")
262 public static class NotUtf8Exception extends IllegalArgumentException
263 {
264 public NotUtf8Exception(String reason)
265 {
266 super("Not valid UTF8! "+reason);
267 }
268 }
269
270 protected void checkState()
271 {
272 if (!isUtf8SequenceComplete())
273 {
274 _codep=0;
275 _state = UTF8_ACCEPT;
276 try
277 {
278 _appendable.append(REPLACEMENT);
279 }
280 catch(IOException e)
281 {
282 throw new RuntimeException(e);
283 }
284 throw new NotUtf8Exception("incomplete UTF8 sequence");
285 }
286 }
287
288 public String toReplacedString()
289 {
290 if (!isUtf8SequenceComplete())
291 {
292 _codep=0;
293 _state = UTF8_ACCEPT;
294 try
295 {
296 _appendable.append(REPLACEMENT);
297 }
298 catch(IOException e)
299 {
300 throw new RuntimeException(e);
301 }
302 Throwable th= new NotUtf8Exception("incomplete UTF8 sequence");
303 LOG.warn(th.toString());
304 LOG.debug(th);
305 }
306 return _appendable.toString();
307 }
308 }