View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.server;
20  
21  import java.io.IOException;
22  
23  /** OutputWriter.
24   * A writer that can wrap a {@link HttpOutput} stream and provide
25   * character encodings.
26   *
27   * The UTF-8 encoding is done by this class and no additional
28   * buffers or Writers are used.
29   * The UTF-8 code was inspired by http://javolution.org
30   */
31  public class Utf8HttpWriter extends HttpWriter
32  {
33      int _surrogate=0;
34  
35      /* ------------------------------------------------------------ */
36      public Utf8HttpWriter(HttpOutput out)
37      {
38          super(out);
39      }
40  
41      /* ------------------------------------------------------------ */
42      @Override
43      public void write (char[] s,int offset, int length) throws IOException
44      {
45          HttpOutput out = _out;
46          if (length==0)
47              out.closeIfAllContentWritten();
48  
49          while (length > 0)
50          {
51              _bytes.reset();
52              int chars = length>MAX_OUTPUT_CHARS?MAX_OUTPUT_CHARS:length;
53  
54              byte[] buffer=_bytes.getBuf();
55              int bytes=_bytes.getCount();
56  
57              if (bytes+chars>buffer.length)
58                  chars=buffer.length-bytes;
59  
60              for (int i = 0; i < chars; i++)
61              {
62                  int code = s[offset+i];
63  
64                  // Do we already have a surrogate?
65                  if(_surrogate==0)
66                  {
67                      // No - is this char code a surrogate?
68                      if(Character.isHighSurrogate((char)code))
69                      {
70                          _surrogate=code; // UCS-?
71                          continue;
72                      }
73                  }
74                  // else handle a low surrogate
75                  else if(Character.isLowSurrogate((char)code))
76                  {
77                      code = Character.toCodePoint((char)_surrogate, (char)code); // UCS-4
78                  }
79                  // else UCS-2
80                  else
81                  {
82                      code=_surrogate; // UCS-2
83                      _surrogate=0; // USED
84                      i--;
85                  }
86  
87                  if ((code & 0xffffff80) == 0)
88                  {
89                      // 1b
90                      if (bytes>=buffer.length)
91                      {
92                          chars=i;
93                          break;
94                      }
95                      buffer[bytes++]=(byte)(code);
96                  }
97                  else
98                  {
99                      if((code&0xfffff800)==0)
100                     {
101                         // 2b
102                         if (bytes+2>buffer.length)
103                         {
104                             chars=i;
105                             break;
106                         }
107                         buffer[bytes++]=(byte)(0xc0|(code>>6));
108                         buffer[bytes++]=(byte)(0x80|(code&0x3f));
109                     }
110                     else if((code&0xffff0000)==0)
111                     {
112                         // 3b
113                         if (bytes+3>buffer.length)
114                         {
115                             chars=i;
116                             break;
117                         }
118                         buffer[bytes++]=(byte)(0xe0|(code>>12));
119                         buffer[bytes++]=(byte)(0x80|((code>>6)&0x3f));
120                         buffer[bytes++]=(byte)(0x80|(code&0x3f));
121                     }
122                     else if((code&0xff200000)==0)
123                     {
124                         // 4b
125                         if (bytes+4>buffer.length)
126                         {
127                             chars=i;
128                             break;
129                         }
130                         buffer[bytes++]=(byte)(0xf0|(code>>18));
131                         buffer[bytes++]=(byte)(0x80|((code>>12)&0x3f));
132                         buffer[bytes++]=(byte)(0x80|((code>>6)&0x3f));
133                         buffer[bytes++]=(byte)(0x80|(code&0x3f));
134                     }
135                     else if((code&0xf4000000)==0)
136                     {
137                         // 5b
138                         if (bytes+5>buffer.length)
139                         {
140                             chars=i;
141                             break;
142                         }
143                         buffer[bytes++]=(byte)(0xf8|(code>>24));
144                         buffer[bytes++]=(byte)(0x80|((code>>18)&0x3f));
145                         buffer[bytes++]=(byte)(0x80|((code>>12)&0x3f));
146                         buffer[bytes++]=(byte)(0x80|((code>>6)&0x3f));
147                         buffer[bytes++]=(byte)(0x80|(code&0x3f));
148                     }
149                     else if((code&0x80000000)==0)
150                     {
151                         // 6b
152                         if (bytes+6>buffer.length)
153                         {
154                             chars=i;
155                             break;
156                         }
157                         buffer[bytes++]=(byte)(0xfc|(code>>30));
158                         buffer[bytes++]=(byte)(0x80|((code>>24)&0x3f));
159                         buffer[bytes++]=(byte)(0x80|((code>>18)&0x3f));
160                         buffer[bytes++]=(byte)(0x80|((code>>12)&0x3f));
161                         buffer[bytes++]=(byte)(0x80|((code>>6)&0x3f));
162                         buffer[bytes++]=(byte)(0x80|(code&0x3f));
163                     }
164                     else
165                     {
166                         buffer[bytes++]=(byte)('?');
167                     }
168 
169                     _surrogate=0; // USED
170 
171                     if (bytes==buffer.length)
172                     {
173                         chars=i+1;
174                         break;
175                     }
176                 }
177             }
178             _bytes.setCount(bytes);
179 
180             _bytes.writeTo(out);
181             length-=chars;
182             offset+=chars;
183         }
184     }
185 }