1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.util;
20
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.eclipse.jetty.util.log.Log;
25 import org.eclipse.jetty.util.log.Logger;
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 public abstract class Utf8Appendable
52 {
53 protected static final Logger LOG = Log.getLogger(Utf8Appendable.class);
54 public static final char REPLACEMENT = '\ufffd';
55 public static final byte[] REPLACEMENT_UTF8 = new byte[] {(byte)0xEF,(byte)0xBF,(byte)0xBD };
56 private static final int UTF8_ACCEPT = 0;
57 private static final int UTF8_REJECT = 12;
58
59 protected final Appendable _appendable;
60 protected int _state = UTF8_ACCEPT;
61
62 private static final byte[] BYTE_TABLE =
63 {
64
65
66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
68 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
69 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
70 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
71 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
72 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
73 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
74 };
75
76 private static final byte[] TRANS_TABLE =
77 {
78
79
80 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
81 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
82 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
83 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
84 12,36,12,12,12,12,12,12,12,12,12,12
85 };
86
87 private int _codep;
88
89 public Utf8Appendable(Appendable appendable)
90 {
91 _appendable = appendable;
92 }
93
94 public abstract int length();
95
96 protected void reset()
97 {
98 _state = UTF8_ACCEPT;
99 }
100
101 public void append(byte b)
102 {
103 try
104 {
105 appendByte(b);
106 }
107 catch (IOException e)
108 {
109 throw new RuntimeException(e);
110 }
111 }
112
113 public void append(ByteBuffer buf)
114 {
115 try
116 {
117 while (buf.remaining() > 0)
118 {
119 appendByte(buf.get());
120 }
121 }
122 catch (IOException e)
123 {
124 throw new RuntimeException(e);
125 }
126 }
127
128 public void append(byte[] b, int offset, int length)
129 {
130 try
131 {
132 int end = offset + length;
133 for (int i = offset; i < end; i++)
134 appendByte(b[i]);
135 }
136 catch (IOException e)
137 {
138 throw new RuntimeException(e);
139 }
140 }
141
142 public boolean append(byte[] b, int offset, int length, int maxChars)
143 {
144 try
145 {
146 int end = offset + length;
147 for (int i = offset; i < end; i++)
148 {
149 if (length() > maxChars)
150 return false;
151 appendByte(b[i]);
152 }
153 return true;
154 }
155 catch (IOException e)
156 {
157 throw new RuntimeException(e);
158 }
159 }
160
161 protected void appendByte(byte b) throws IOException
162 {
163
164 if (b > 0 && _state == UTF8_ACCEPT)
165 {
166 _appendable.append((char)(b & 0xFF));
167 }
168 else
169 {
170 int i = b & 0xFF;
171 int type = BYTE_TABLE[i];
172 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
173 int next = TRANS_TABLE[_state + type];
174
175 switch(next)
176 {
177 case UTF8_ACCEPT:
178 _state=next;
179 if (_codep < Character.MIN_HIGH_SURROGATE)
180 {
181 _appendable.append((char)_codep);
182 }
183 else
184 {
185 for (char c : Character.toChars(_codep))
186 _appendable.append(c);
187 }
188 break;
189
190 case UTF8_REJECT:
191 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
192 _codep=0;
193 _state = UTF8_ACCEPT;
194 _appendable.append(REPLACEMENT);
195 throw new NotUtf8Exception(reason);
196
197 default:
198 _state=next;
199
200 }
201 }
202 }
203
204 public boolean isUtf8SequenceComplete()
205 {
206 return _state == UTF8_ACCEPT;
207 }
208
209 @SuppressWarnings("serial")
210 public static class NotUtf8Exception extends IllegalArgumentException
211 {
212 public NotUtf8Exception(String reason)
213 {
214 super("Not valid UTF8! "+reason);
215 }
216 }
217
218 protected void checkState()
219 {
220 if (!isUtf8SequenceComplete())
221 {
222 _codep=0;
223 _state = UTF8_ACCEPT;
224 try
225 {
226 _appendable.append(REPLACEMENT);
227 }
228 catch(IOException e)
229 {
230 throw new RuntimeException(e);
231 }
232 throw new NotUtf8Exception("incomplete UTF8 sequence");
233 }
234 }
235
236 public String toReplacedString()
237 {
238 if (!isUtf8SequenceComplete())
239 {
240 _codep=0;
241 _state = UTF8_ACCEPT;
242 try
243 {
244 _appendable.append(REPLACEMENT);
245 }
246 catch(IOException e)
247 {
248 throw new RuntimeException(e);
249 }
250 Throwable th= new NotUtf8Exception("incomplete UTF8 sequence");
251 LOG.warn(th.toString());
252 LOG.debug(th);
253 }
254 return _appendable.toString();
255 }
256 }