1
2
3
4
5
6
7
8
9
10
11
12
13 package org.eclipse.jetty.util;
14
15 import java.io.IOException;
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41 public abstract class Utf8Appendable
42 {
43 private final char REPLACEMENT = '\ufffd';
44 private static final int UTF8_ACCEPT = 0;
45 private static final int UTF8_REJECT = 12;
46
47 protected final Appendable _appendable;
48 protected int _state = UTF8_ACCEPT;
49
50 private static final byte[] BYTE_TABLE =
51 {
52
53
54 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
58 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
59 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
60 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
61 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
62 };
63
64 private static final byte[] TRANS_TABLE =
65 {
66
67
68 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
69 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
70 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
71 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
72 12,36,12,12,12,12,12,12,12,12,12,12
73 };
74
75 private int _codep;
76
77 public Utf8Appendable(Appendable appendable)
78 {
79 _appendable = appendable;
80 }
81
82 public abstract int length();
83
84 protected void reset()
85 {
86 _state = UTF8_ACCEPT;
87 }
88
89 public void append(byte b)
90 {
91 try
92 {
93 appendByte(b);
94 }
95 catch (IOException e)
96 {
97 throw new RuntimeException(e);
98 }
99 }
100
101 public void append(byte[] b, int offset, int length)
102 {
103 try
104 {
105 int end = offset + length;
106 for (int i = offset; i < end; i++)
107 appendByte(b[i]);
108 }
109 catch (IOException e)
110 {
111 throw new RuntimeException(e);
112 }
113 }
114
115 public boolean append(byte[] b, int offset, int length, int maxChars)
116 {
117 try
118 {
119 int end = offset + length;
120 for (int i = offset; i < end; i++)
121 {
122 if (length() > maxChars)
123 return false;
124 appendByte(b[i]);
125 }
126 return true;
127 }
128 catch (IOException e)
129 {
130 throw new RuntimeException(e);
131 }
132 }
133
134 protected void appendByte(byte b) throws IOException
135 {
136
137 if (b > 0 && _state == UTF8_ACCEPT)
138 {
139 _appendable.append((char)(b & 0xFF));
140 }
141 else
142 {
143 int i = b & 0xFF;
144 int type = BYTE_TABLE[i];
145 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
146 int next = TRANS_TABLE[_state + type];
147
148 switch(next)
149 {
150 case UTF8_ACCEPT:
151 _state=next;
152 if (_codep < Character.MIN_HIGH_SURROGATE)
153 {
154 _appendable.append((char)_codep);
155 }
156 else
157 {
158 for (char c : Character.toChars(_codep))
159 _appendable.append(c);
160 }
161 break;
162
163 case UTF8_REJECT:
164 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
165 _codep=0;
166 _state = UTF8_ACCEPT;
167 _appendable.append(REPLACEMENT);
168 throw new NotUtf8Exception(reason);
169
170 default:
171 _state=next;
172
173 }
174 }
175 }
176
177 public boolean isUtf8SequenceComplete()
178 {
179 return _state == UTF8_ACCEPT;
180 }
181
182 public static class NotUtf8Exception extends IllegalArgumentException
183 {
184 public NotUtf8Exception(String reason)
185 {
186 super("Not valid UTF8! "+reason);
187 }
188 }
189 }