1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.util;
20
21 import java.io.IOException;
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 public abstract class Utf8Appendable
48 {
49 public static final char REPLACEMENT = '\ufffd';
50 private static final int UTF8_ACCEPT = 0;
51 private static final int UTF8_REJECT = 12;
52
53 protected final Appendable _appendable;
54 protected int _state = UTF8_ACCEPT;
55
56 private static final byte[] BYTE_TABLE =
57 {
58
59
60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
62 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
63 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
64 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
65 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
66 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
67 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
68 };
69
70 private static final byte[] TRANS_TABLE =
71 {
72
73
74 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
75 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
76 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
77 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
78 12,36,12,12,12,12,12,12,12,12,12,12
79 };
80
81 private int _codep;
82
83 public Utf8Appendable(Appendable appendable)
84 {
85 _appendable = appendable;
86 }
87
88 public abstract int length();
89
90 protected void reset()
91 {
92 _state = UTF8_ACCEPT;
93 }
94
95 public void append(byte b)
96 {
97 try
98 {
99 appendByte(b);
100 }
101 catch (IOException e)
102 {
103 throw new RuntimeException(e);
104 }
105 }
106
107 public void append(byte[] b, int offset, int length)
108 {
109 try
110 {
111 int end = offset + length;
112 for (int i = offset; i < end; i++)
113 appendByte(b[i]);
114 }
115 catch (IOException e)
116 {
117 throw new RuntimeException(e);
118 }
119 }
120
121 public boolean append(byte[] b, int offset, int length, int maxChars)
122 {
123 try
124 {
125 int end = offset + length;
126 for (int i = offset; i < end; i++)
127 {
128 if (length() > maxChars)
129 return false;
130 appendByte(b[i]);
131 }
132 return true;
133 }
134 catch (IOException e)
135 {
136 throw new RuntimeException(e);
137 }
138 }
139
140 protected void appendByte(byte b) throws IOException
141 {
142
143 if (b > 0 && _state == UTF8_ACCEPT)
144 {
145 _appendable.append((char)(b & 0xFF));
146 }
147 else
148 {
149 int i = b & 0xFF;
150 int type = BYTE_TABLE[i];
151 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
152 int next = TRANS_TABLE[_state + type];
153
154 switch(next)
155 {
156 case UTF8_ACCEPT:
157 _state=next;
158 if (_codep < Character.MIN_HIGH_SURROGATE)
159 {
160 _appendable.append((char)_codep);
161 }
162 else
163 {
164 for (char c : Character.toChars(_codep))
165 _appendable.append(c);
166 }
167 break;
168
169 case UTF8_REJECT:
170 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
171 _codep=0;
172 _state = UTF8_ACCEPT;
173 _appendable.append(REPLACEMENT);
174 throw new NotUtf8Exception(reason);
175
176 default:
177 _state=next;
178
179 }
180 }
181 }
182
183 public boolean isUtf8SequenceComplete()
184 {
185 return _state == UTF8_ACCEPT;
186 }
187
188 public static class NotUtf8Exception extends IllegalArgumentException
189 {
190 public NotUtf8Exception(String reason)
191 {
192 super("Not valid UTF8! "+reason);
193 }
194 }
195 }