View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.http;
15  
16  import java.io.UnsupportedEncodingException;
17  
18  import org.eclipse.jetty.util.MultiMap;
19  import org.eclipse.jetty.util.StringUtil;
20  import org.eclipse.jetty.util.TypeUtil;
21  import org.eclipse.jetty.util.URIUtil;
22  import org.eclipse.jetty.util.UrlEncoded;
23  import org.eclipse.jetty.util.Utf8StringBuilder;
24  
25  
26  /* ------------------------------------------------------------ */
27  /** Http URI.
28   * Parse a HTTP URI from a string or byte array.  Given a URI
29   * <code>http://user@host:port/path/info;param?query#fragment</code>
30   * this class will split it into the following undecoded optional elements:<ul>
31   * <li>{@link #getScheme()} - http:</li>
32   * <li>{@link #getAuthority()} - //name@host:port</li>
33   * <li>{@link #getHost()} - host</li>
34   * <li>{@link #getPort()} - port</li>
35   * <li>{@link #getPath()} - /path/info</li>
36   * <li>{@link #getParam()} - param</li>
37   * <li>{@link #getQuery()} - query</li>
38   * <li>{@link #getFragment()} - fragment</li>
39   * </ul>
40   * 
41   */
42  public class HttpURI
43  {
44      private static final byte[] __empty={}; 
45      private final static int 
46      START=0,
47      AUTH_OR_PATH=1,
48      SCHEME_OR_PATH=2,
49      AUTH=4,
50      IPV6=5,
51      PORT=6,
52      PATH=7,
53      PARAM=8,
54      QUERY=9,
55      ASTERISK=10;
56      
57      boolean _partial=false;
58      byte[] _raw=__empty;
59      String _rawString;
60      int _scheme;
61      int _authority;
62      int _host;
63      int _port;
64      int _path;
65      int _param;
66      int _query;
67      int _fragment;
68      int _end;
69      boolean _encoded=false;
70      
71      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
72      
73      public HttpURI()
74      {
75          
76      } 
77      
78      /* ------------------------------------------------------------ */
79      /**
80       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
81       */
82      public HttpURI(boolean parsePartialAuth)
83      {
84          _partial=parsePartialAuth;
85      }
86      
87      public HttpURI(String raw)
88      {
89          _rawString=raw;
90          byte[] b = raw.getBytes();
91          parse(b,0,b.length);
92      }
93      
94      public HttpURI(byte[] raw,int offset, int length)
95      {
96          parse2(raw,offset,length);
97      }
98      
99      public void parse(String raw)
100     {
101         byte[] b = raw.getBytes();
102         parse2(b,0,b.length);
103         _rawString=raw;
104     }
105     
106     public void parse(byte[] raw,int offset, int length)
107     {
108         _rawString=null;
109         parse2(raw,offset,length);
110     }
111     
112     private void parse2(byte[] raw,int offset, int length)
113     {
114         _encoded=false;
115         _raw=raw;
116         int i=offset;
117         int e=offset+length;
118         int state=START;
119         int m=offset;
120         _end=offset+length;
121         _scheme=offset;
122         _authority=offset;
123         _host=offset;
124         _port=offset;
125         _path=offset;
126         _param=_end;
127         _query=_end;
128         _fragment=_end;
129         while (i<e)
130         {
131             char c=(char)(0xff&_raw[i]);
132             int s=i++;
133             
134             state: switch (state)
135             {
136                 case START:
137                 {
138                     m=s;
139                     switch(c)
140                     {
141                         case '/':
142                             state=AUTH_OR_PATH;
143                             break;
144                         case ';':
145                             _param=s;
146                             state=PARAM;
147                             break;
148                         case '?':
149                             _param=s;
150                             _query=s;
151                             state=QUERY;
152                             break;
153                         case '#':
154                             _param=s;
155                             _query=s;
156                             _fragment=s;
157                             break;
158                         case '*':
159                             _path=s;
160                             state=ASTERISK;
161                             break;
162                             
163                         default:
164                             if (Character.isLetterOrDigit(c))
165                                 state=SCHEME_OR_PATH;
166                             else
167                                 throw new IllegalArgumentException(StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
168                     }
169                     
170                     continue;
171                 }
172 
173                 case AUTH_OR_PATH:
174                 {
175                     if ((_partial||_scheme!=_authority) && c=='/')
176                     {
177                         _host=i;
178                         _port=_end;
179                         _path=_end;
180                         state=AUTH;
181                     }
182                     else if (c==';' || c=='?' || c=='#')
183                     {
184                         i--;
185                         state=PATH;
186                     }  
187                     else
188                     {
189                         _host=m;
190                         _port=m;
191                         state=PATH;
192                     }  
193                     continue;
194                 }
195                 
196                 case SCHEME_OR_PATH:
197                 {
198                     // short cut for http and https
199                     if (length>6 && c=='t')
200                     {
201                         if (_raw[offset+3]==':')
202                         {
203                             s=offset+3;
204                             i=offset+4;
205                             c=':';
206                         }
207                         else if (_raw[offset+4]==':')
208                         {
209                             s=offset+4;
210                             i=offset+5;
211                             c=':';
212                         }
213                         else if (_raw[offset+5]==':')
214                         {
215                             s=offset+5;
216                             i=offset+6;
217                             c=':';
218                         }
219                     }
220                     
221                     switch (c)
222                     {
223                         case ':':
224                         {
225                             m = i++;
226                             _authority = m;
227                             _path = m;
228                             c = (char)(0xff & _raw[i]);
229                             if (c == '/')
230                                 state = AUTH_OR_PATH;
231                             else
232                             {
233                                 _host = m;
234                                 _port = m;
235                                 state = PATH;
236                             }
237                             break;
238                         }
239                         
240                         case '/':
241                         {
242                             state = PATH;
243                             break;
244                         }
245                         
246                         case ';':
247                         {
248                             _param = s;
249                             state = PARAM;
250                             break;
251                         }
252                         
253                         case '?':
254                         {
255                             _param = s;
256                             _query = s;
257                             state = QUERY;
258                             break;
259                         }
260                         
261                         case '#':
262                         {
263                             _param = s;
264                             _query = s;
265                             _fragment = s;
266                             break;
267                         }
268                     }
269                     continue;
270                 }
271                 
272                 case AUTH:
273                 {
274                     switch (c)
275                     {
276 
277                         case '/':
278                         {
279                             m = s;
280                             _path = m;
281                             _port = _path;
282                             state = PATH;
283                             break;
284                         }
285                         case '@':
286                         {
287                             _host = i;
288                             break;
289                         }
290                         case ':':
291                         {
292                             _port = s;
293                             state = PORT;
294                             break;
295                         }
296                         case '[':
297                         {
298                             state = IPV6;
299                             break;
300                         }
301                     }
302                     continue;
303                 }
304 
305                 case IPV6:
306                 {
307                     switch (c)
308                     {
309                         case '/':
310                         {
311                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
312                         }
313                         case ']':
314                         {
315                             state = AUTH;
316                             break;
317                         }
318                     }
319 
320                     continue;
321                 }
322                 
323                 case PORT:
324                 {
325                     if (c=='/')
326                     {
327                         m=s;
328                         _path=m;
329                         if (_port<=_authority)
330                             _port=_path;
331                         state=PATH;
332                     }
333                     continue;
334                 }
335                 
336                 case PATH:
337                 {
338                     switch (c)
339                     {
340                         case ';':
341                         {
342                             _param = s;
343                             state = PARAM;
344                             break;
345                         }
346                         case '?':
347                         {
348                             _param = s;
349                             _query = s;
350                             state = QUERY;
351                             break;
352                         }
353                         case '#':
354                         {
355                             _param = s;
356                             _query = s;
357                             _fragment = s;
358                             break state;
359                         }
360                         case '%':
361                         {
362                             _encoded=true;
363                         }
364                     }
365                     continue;
366                 }
367                 
368                 case PARAM:
369                 {
370                     switch (c)
371                     {
372                         case '?':
373                         {
374                             _query = s;
375                             state = QUERY;
376                             break;
377                         }
378                         case '#':
379                         {
380                             _query = s;
381                             _fragment = s;
382                             break state;
383                         }
384                     }
385                     continue;
386                 }
387                 
388                 case QUERY:
389                 {
390                     if (c=='#')
391                     {
392                         _fragment=s;
393                         break state;
394                     }
395                     continue;
396                 }
397                 
398                 case ASTERISK:
399                 {
400                     throw new IllegalArgumentException("only '*'");
401                 }
402             }
403         }
404     }
405     
406     private String toUtf8String(int offset,int length)
407     {
408         _utf8b.reset();
409         _utf8b.append(_raw,offset,length);
410         return _utf8b.toString();
411     }
412     
413     public String getScheme()
414     {
415         if (_scheme==_authority)
416             return null;
417         int l=_authority-_scheme;
418         if (l==5 && 
419             _raw[_scheme]=='h' && 
420             _raw[_scheme+1]=='t' && 
421             _raw[_scheme+2]=='t' && 
422             _raw[_scheme+3]=='p' )
423             return HttpSchemes.HTTP;
424         if (l==6 && 
425             _raw[_scheme]=='h' && 
426             _raw[_scheme+1]=='t' && 
427             _raw[_scheme+2]=='t' && 
428             _raw[_scheme+3]=='p' && 
429             _raw[_scheme+4]=='s' )
430             return HttpSchemes.HTTPS;
431         
432         return toUtf8String(_scheme,_authority-_scheme-1);
433     }
434     
435     public String getAuthority()
436     {
437         if (_authority==_path)
438             return null;
439         return toUtf8String(_authority,_path-_authority);
440     }
441     
442     public String getHost()
443     {
444         if (_host==_port)
445             return null;
446         return toUtf8String(_host,_port-_host);
447     }
448     
449     public int getPort()
450     {
451         if (_port==_path)
452             return -1;
453         return TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
454     }
455     
456     public String getPath()
457     {
458         if (_path==_param)
459             return null;
460         return toUtf8String(_path,_param-_path);
461     }
462     
463     public String getDecodedPath()
464     {
465         if (_path==_param)
466             return null;
467 
468         int length = _param-_path;
469         byte[] bytes=null;
470         int n=0;
471 
472         for (int i=_path;i<_param;i++)
473         {
474             byte b = _raw[i];
475             
476             if (b=='%' && (i+2)<_param)
477             {
478                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
479                 i+=2;
480             }
481             else if (bytes==null)
482             {
483                 n++;
484                 continue;
485             }
486             
487             if (bytes==null)
488             {
489                 bytes=new byte[length];
490                 System.arraycopy(_raw,_path,bytes,0,n);
491             }
492             
493             bytes[n++]=b;
494         }
495 
496         if (bytes==null)
497             return toUtf8String(_path,length);
498 
499         _utf8b.reset();
500         _utf8b.append(bytes,0,n);
501         return _utf8b.toString();
502     }
503     
504     public String getPathAndParam()
505     {
506         if (_path==_query)
507             return null;
508         return toUtf8String(_path,_query-_path);
509     }
510     
511     public String getCompletePath()
512     {
513         if (_path==_end)
514             return null;
515         return toUtf8String(_path,_end-_path);
516     }
517     
518     public String getParam()
519     {
520         if (_param==_query)
521             return null;
522         return toUtf8String(_param+1,_query-_param-1);
523     }
524     
525     public String getQuery()
526     {
527         if (_query==_fragment)
528             return null;
529         return toUtf8String(_query+1,_fragment-_query-1);
530     }
531     
532     public String getQuery(String encoding)
533     {
534         if (_query==_fragment)
535             return null;
536         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
537     }
538     
539     public boolean hasQuery()
540     {
541         return (_fragment>_query);
542     }
543     
544     public String getFragment()
545     {
546         if (_fragment==_end)
547             return null;
548         return toUtf8String(_fragment+1,_end-_fragment-1);
549     }
550 
551     public void decodeQueryTo(MultiMap parameters) 
552     {
553         if (_query==_fragment)
554             return;
555         _utf8b.reset();
556         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
557     }
558 
559     public void decodeQueryTo(MultiMap parameters, String encoding) 
560         throws UnsupportedEncodingException
561     {
562         if (_query==_fragment)
563             return;
564        
565         if (encoding==null || StringUtil.isUTF8(encoding))
566             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
567         else
568             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
569     }
570 
571     public void clear()
572     {
573         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
574         _raw=__empty;
575         _rawString="";
576         _encoded=false;
577     }
578     
579     public String toString()
580     {
581         if (_rawString==null)
582             _rawString=toUtf8String(_scheme,_end-_scheme);
583         return _rawString;
584     }
585     
586     public void writeTo(Utf8StringBuilder buf)
587     {
588         buf.append(_raw,_scheme,_end-_scheme);
589     }
590     
591 }