1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.util;
20
21 import java.io.IOException;
22
23 import org.eclipse.jetty.util.log.Log;
24 import org.eclipse.jetty.util.log.Logger;
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 public abstract class Utf8Appendable
51 {
52 protected static final Logger LOG = Log.getLogger(Utf8Appendable.class);
53 public static final char REPLACEMENT = '\ufffd';
54 private static final int UTF8_ACCEPT = 0;
55 private static final int UTF8_REJECT = 12;
56
57 protected final Appendable _appendable;
58 protected int _state = UTF8_ACCEPT;
59
60 private static final byte[] BYTE_TABLE =
61 {
62
63
64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
65 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
68 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
69 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
70 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
71 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
72 };
73
74 private static final byte[] TRANS_TABLE =
75 {
76
77
78 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
79 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
80 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
81 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
82 12,36,12,12,12,12,12,12,12,12,12,12
83 };
84
85 private int _codep;
86
87 public Utf8Appendable(Appendable appendable)
88 {
89 _appendable = appendable;
90 }
91
92 public abstract int length();
93
94 protected void reset()
95 {
96 _state = UTF8_ACCEPT;
97 }
98
99 public void append(byte b)
100 {
101 try
102 {
103 appendByte(b);
104 }
105 catch (IOException e)
106 {
107 throw new RuntimeException(e);
108 }
109 }
110
111 public void append(byte[] b, int offset, int length)
112 {
113 try
114 {
115 int end = offset + length;
116 for (int i = offset; i < end; i++)
117 appendByte(b[i]);
118 }
119 catch (IOException e)
120 {
121 throw new RuntimeException(e);
122 }
123 }
124
125 public boolean append(byte[] b, int offset, int length, int maxChars)
126 {
127 try
128 {
129 int end = offset + length;
130 for (int i = offset; i < end; i++)
131 {
132 if (length() > maxChars)
133 return false;
134 appendByte(b[i]);
135 }
136 return true;
137 }
138 catch (IOException e)
139 {
140 throw new RuntimeException(e);
141 }
142 }
143
144 protected void appendByte(byte b) throws IOException
145 {
146
147 if (b > 0 && _state == UTF8_ACCEPT)
148 {
149 _appendable.append((char)(b & 0xFF));
150 }
151 else
152 {
153 int i = b & 0xFF;
154 int type = BYTE_TABLE[i];
155 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
156 int next = TRANS_TABLE[_state + type];
157
158 switch(next)
159 {
160 case UTF8_ACCEPT:
161 _state=next;
162 if (_codep < Character.MIN_HIGH_SURROGATE)
163 {
164 _appendable.append((char)_codep);
165 }
166 else
167 {
168 for (char c : Character.toChars(_codep))
169 _appendable.append(c);
170 }
171 break;
172
173 case UTF8_REJECT:
174 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
175 _codep=0;
176 _state = UTF8_ACCEPT;
177 _appendable.append(REPLACEMENT);
178 throw new NotUtf8Exception(reason);
179
180 default:
181 _state=next;
182
183 }
184 }
185 }
186
187 public boolean isUtf8SequenceComplete()
188 {
189 return _state == UTF8_ACCEPT;
190 }
191
192 public static class NotUtf8Exception extends IllegalArgumentException
193 {
194 public NotUtf8Exception(String reason)
195 {
196 super("Not valid UTF8! "+reason);
197 }
198 }
199
200 protected void checkState()
201 {
202 if (!isUtf8SequenceComplete())
203 {
204 _codep=0;
205 _state = UTF8_ACCEPT;
206 try
207 {
208 _appendable.append(REPLACEMENT);
209 }
210 catch(IOException e)
211 {
212 throw new RuntimeException(e);
213 }
214 throw new NotUtf8Exception("incomplete UTF8 sequence");
215 }
216 }
217
218 public String toReplacedString()
219 {
220 if (!isUtf8SequenceComplete())
221 {
222 _codep=0;
223 _state = UTF8_ACCEPT;
224 try
225 {
226 _appendable.append(REPLACEMENT);
227 }
228 catch(IOException e)
229 {
230 throw new RuntimeException(e);
231 }
232 Throwable th= new NotUtf8Exception("incomplete UTF8 sequence");
233 LOG.warn(th.toString());
234 LOG.debug(th);
235 }
236 return _appendable.toString();
237 }
238 }