1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.util;
20
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.eclipse.jetty.util.log.Log;
25 import org.eclipse.jetty.util.log.Logger;
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 public abstract class Utf8Appendable
52 {
53 protected static final Logger LOG = Log.getLogger(Utf8Appendable.class);
54 public static final char REPLACEMENT = '\ufffd';
55 private static final int UTF8_ACCEPT = 0;
56 private static final int UTF8_REJECT = 12;
57
58 protected final Appendable _appendable;
59 protected int _state = UTF8_ACCEPT;
60
61 private static final byte[] BYTE_TABLE =
62 {
63
64
65 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
68 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
69 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
70 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
71 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
72 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
73 };
74
75 private static final byte[] TRANS_TABLE =
76 {
77
78
79 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
80 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
81 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
82 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
83 12,36,12,12,12,12,12,12,12,12,12,12
84 };
85
86 private int _codep;
87
88 public Utf8Appendable(Appendable appendable)
89 {
90 _appendable = appendable;
91 }
92
93 public abstract int length();
94
95 protected void reset()
96 {
97 _state = UTF8_ACCEPT;
98 }
99
100 public void append(byte b)
101 {
102 try
103 {
104 appendByte(b);
105 }
106 catch (IOException e)
107 {
108 throw new RuntimeException(e);
109 }
110 }
111
112 public void append(ByteBuffer buf)
113 {
114 try
115 {
116 while (buf.remaining() > 0)
117 {
118 appendByte(buf.get());
119 }
120 }
121 catch (IOException e)
122 {
123 throw new RuntimeException(e);
124 }
125 }
126
127 public void append(byte[] b, int offset, int length)
128 {
129 try
130 {
131 int end = offset + length;
132 for (int i = offset; i < end; i++)
133 appendByte(b[i]);
134 }
135 catch (IOException e)
136 {
137 throw new RuntimeException(e);
138 }
139 }
140
141 public boolean append(byte[] b, int offset, int length, int maxChars)
142 {
143 try
144 {
145 int end = offset + length;
146 for (int i = offset; i < end; i++)
147 {
148 if (length() > maxChars)
149 return false;
150 appendByte(b[i]);
151 }
152 return true;
153 }
154 catch (IOException e)
155 {
156 throw new RuntimeException(e);
157 }
158 }
159
160 protected void appendByte(byte b) throws IOException
161 {
162
163 if (b > 0 && _state == UTF8_ACCEPT)
164 {
165 _appendable.append((char)(b & 0xFF));
166 }
167 else
168 {
169 int i = b & 0xFF;
170 int type = BYTE_TABLE[i];
171 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
172 int next = TRANS_TABLE[_state + type];
173
174 switch(next)
175 {
176 case UTF8_ACCEPT:
177 _state=next;
178 if (_codep < Character.MIN_HIGH_SURROGATE)
179 {
180 _appendable.append((char)_codep);
181 }
182 else
183 {
184 for (char c : Character.toChars(_codep))
185 _appendable.append(c);
186 }
187 break;
188
189 case UTF8_REJECT:
190 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
191 _codep=0;
192 _state = UTF8_ACCEPT;
193 _appendable.append(REPLACEMENT);
194 throw new NotUtf8Exception(reason);
195
196 default:
197 _state=next;
198
199 }
200 }
201 }
202
203 public boolean isUtf8SequenceComplete()
204 {
205 return _state == UTF8_ACCEPT;
206 }
207
208 @SuppressWarnings("serial")
209 public static class NotUtf8Exception extends IllegalArgumentException
210 {
211 public NotUtf8Exception(String reason)
212 {
213 super("Not valid UTF8! "+reason);
214 }
215 }
216
217 protected void checkState()
218 {
219 if (!isUtf8SequenceComplete())
220 {
221 _codep=0;
222 _state = UTF8_ACCEPT;
223 try
224 {
225 _appendable.append(REPLACEMENT);
226 }
227 catch(IOException e)
228 {
229 throw new RuntimeException(e);
230 }
231 throw new NotUtf8Exception("incomplete UTF8 sequence");
232 }
233 }
234
235 public String toReplacedString()
236 {
237 if (!isUtf8SequenceComplete())
238 {
239 _codep=0;
240 _state = UTF8_ACCEPT;
241 try
242 {
243 _appendable.append(REPLACEMENT);
244 }
245 catch(IOException e)
246 {
247 throw new RuntimeException(e);
248 }
249 Throwable th= new NotUtf8Exception("incomplete UTF8 sequence");
250 LOG.warn(th.toString());
251 LOG.debug(th);
252 }
253 return _appendable.toString();
254 }
255 }