View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.http;
20  
21  import java.io.UnsupportedEncodingException;
22  import java.net.URI;
23  import java.nio.charset.Charset;
24  
25  import org.eclipse.jetty.util.MultiMap;
26  import org.eclipse.jetty.util.StringUtil;
27  import org.eclipse.jetty.util.TypeUtil;
28  import org.eclipse.jetty.util.URIUtil;
29  import org.eclipse.jetty.util.UrlEncoded;
30  import org.eclipse.jetty.util.Utf8StringBuilder;
31  
32  
33  /* ------------------------------------------------------------ */
34  /** Http URI.
35   * Parse a HTTP URI from a string or byte array.  Given a URI
36   * <code>http://user@host:port/path/info;param?query#fragment</code>
37   * this class will split it into the following undecoded optional elements:<ul>
38   * <li>{@link #getScheme()} - http:</li>
39   * <li>{@link #getAuthority()} - //name@host:port</li>
40   * <li>{@link #getHost()} - host</li>
41   * <li>{@link #getPort()} - port</li>
42   * <li>{@link #getPath()} - /path/info</li>
43   * <li>{@link #getParam()} - param</li>
44   * <li>{@link #getQuery()} - query</li>
45   * <li>{@link #getFragment()} - fragment</li>
46   * </ul>
47   *
48   */
49  public class HttpURI
50  {
51      private static final byte[] __empty={};
52      private final static int
53      START=0,
54      AUTH_OR_PATH=1,
55      SCHEME_OR_PATH=2,
56      AUTH=4,
57      IPV6=5,
58      PORT=6,
59      PATH=7,
60      PARAM=8,
61      QUERY=9,
62      ASTERISK=10;
63  
64      final Charset _charset;
65      boolean _partial=false;
66      byte[] _raw=__empty;
67      String _rawString;
68      int _scheme;
69      int _authority;
70      int _host;
71      int _port;
72      int _portValue;
73      int _path;
74      int _param;
75      int _query;
76      int _fragment;
77      int _end;
78      boolean _encoded=false;
79  
80      public HttpURI()
81      {
82          _charset = URIUtil.__CHARSET;
83      }
84  
85      public HttpURI(Charset charset)
86      {
87          _charset = charset;
88      }
89  
90      /* ------------------------------------------------------------ */
91      /**
92       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
93       */
94      public HttpURI(boolean parsePartialAuth)
95      {
96          _partial=parsePartialAuth;
97          _charset = URIUtil.__CHARSET;
98      }
99  
100     public HttpURI(String raw)
101     {
102         _rawString=raw;
103         byte[] b;
104         try
105         {
106             b = raw.getBytes(StringUtil.__UTF8);
107         }
108         catch (UnsupportedEncodingException e)
109         {
110            throw new RuntimeException(e.getMessage());
111         }
112         parse(b,0,b.length);
113         _charset = URIUtil.__CHARSET;
114     }
115 
116     public HttpURI(byte[] raw,int offset, int length)
117     {
118         parse2(raw,offset,length);
119         _charset = URIUtil.__CHARSET;
120     }
121 
122     public HttpURI(URI uri)
123     {
124         parse(uri.toASCIIString());
125         _charset = URIUtil.__CHARSET;
126     }
127 
128     public void parse(String raw)
129     {
130         byte[] b = StringUtil.getUtf8Bytes(raw);
131         parse2(b,0,b.length);
132         _rawString=raw;
133     }
134 
135     public void parseConnect(String raw)
136     {
137         byte[] b = StringUtil.getBytes(raw);
138         parseConnect(b,0,b.length);
139         _rawString=raw;
140     }
141 
142     public void parse(byte[] raw,int offset, int length)
143     {
144         _rawString=null;
145         parse2(raw,offset,length);
146     }
147 
148 
149     public void parseConnect(byte[] raw,int offset, int length)
150     {
151         _rawString=null;
152         _encoded=false;
153         _raw=raw;
154         int i=offset;
155         int e=offset+length;
156         int state=AUTH;
157         _end=offset+length;
158         _scheme=offset;
159         _authority=offset;
160         _host=offset;
161         _port=_end;
162         _portValue=-1;
163         _path=_end;
164         _param=_end;
165         _query=_end;
166         _fragment=_end;
167 
168         loop: while (i<e)
169         {
170             char c=(char)(0xff&_raw[i]);
171             int s=i++;
172 
173             switch (state)
174             {
175                 case AUTH:
176                 {
177                     switch (c)
178                     {
179                         case ':':
180                         {
181                             _port = s;
182                             break loop;
183                         }
184                         case '[':
185                         {
186                             state = IPV6;
187                             break;
188                         }
189                     }
190                     continue;
191                 }
192 
193                 case IPV6:
194                 {
195                     switch (c)
196                     {
197                         case '/':
198                         {
199                             throw new IllegalArgumentException("No closing ']' for " + new String(_raw,offset,length,_charset));
200                         }
201                         case ']':
202                         {
203                             state = AUTH;
204                             break;
205                         }
206                     }
207 
208                     continue;
209                 }
210             }
211         }
212 
213         if (_port<_path)
214             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
215         else
216             throw new IllegalArgumentException("No port");
217         _path=offset;
218     }
219 
220 
221     private void parse2(byte[] raw,int offset, int length)
222     {
223         _encoded=false;
224         _raw=raw;
225         int i=offset;
226         int e=offset+length;
227         int state=START;
228         int m=offset;
229         _end=offset+length;
230         _scheme=offset;
231         _authority=offset;
232         _host=offset;
233         _port=offset;
234         _portValue=-1;
235         _path=offset;
236         _param=_end;
237         _query=_end;
238         _fragment=_end;
239         while (i<e)
240         {
241             char c=(char)(0xff&_raw[i]);
242             int s=i++;
243 
244             state: switch (state)
245             {
246                 case START:
247                 {
248                     m=s;
249                     switch(c)
250                     {
251                         case '/':
252                             state=AUTH_OR_PATH;
253                             break;
254                         case ';':
255                             _param=s;
256                             state=PARAM;
257                             break;
258                         case '?':
259                             _param=s;
260                             _query=s;
261                             state=QUERY;
262                             break;
263                         case '#':
264                             _param=s;
265                             _query=s;
266                             _fragment=s;
267                             break;
268                         case '*':
269                             _path=s;
270                             state=ASTERISK;
271                             break;
272 
273                         default:
274                             state=SCHEME_OR_PATH;
275                     }
276 
277                     continue;
278                 }
279 
280                 case AUTH_OR_PATH:
281                 {
282                     if ((_partial||_scheme!=_authority) && c=='/')
283                     {
284                         _host=i;
285                         _port=_end;
286                         _path=_end;
287                         state=AUTH;
288                     }
289                     else if (c==';' || c=='?' || c=='#')
290                     {
291                         i--;
292                         state=PATH;
293                     }
294                     else
295                     {
296                         _host=m;
297                         _port=m;
298                         state=PATH;
299                     }
300                     continue;
301                 }
302 
303                 case SCHEME_OR_PATH:
304                 {
305                     // short cut for http and https
306                     if (length>6 && c=='t')
307                     {
308                         if (_raw[offset+3]==':')
309                         {
310                             s=offset+3;
311                             i=offset+4;
312                             c=':';
313                         }
314                         else if (_raw[offset+4]==':')
315                         {
316                             s=offset+4;
317                             i=offset+5;
318                             c=':';
319                         }
320                         else if (_raw[offset+5]==':')
321                         {
322                             s=offset+5;
323                             i=offset+6;
324                             c=':';
325                         }
326                     }
327 
328                     switch (c)
329                     {
330                         case ':':
331                         {
332                             m = i++;
333                             _authority = m;
334                             _path = m;
335                             c = (char)(0xff & _raw[i]);
336                             if (c == '/')
337                                 state = AUTH_OR_PATH;
338                             else
339                             {
340                                 _host = m;
341                                 _port = m;
342                                 state = PATH;
343                             }
344                             break;
345                         }
346 
347                         case '/':
348                         {
349                             state = PATH;
350                             break;
351                         }
352 
353                         case ';':
354                         {
355                             _param = s;
356                             state = PARAM;
357                             break;
358                         }
359 
360                         case '?':
361                         {
362                             _param = s;
363                             _query = s;
364                             state = QUERY;
365                             break;
366                         }
367 
368                         case '#':
369                         {
370                             _param = s;
371                             _query = s;
372                             _fragment = s;
373                             break;
374                         }
375                     }
376                     continue;
377                 }
378 
379                 case AUTH:
380                 {
381                     switch (c)
382                     {
383 
384                         case '/':
385                         {
386                             m = s;
387                             _path = m;
388                             _port = _path;
389                             state = PATH;
390                             break;
391                         }
392                         case '@':
393                         {
394                             _host = i;
395                             break;
396                         }
397                         case ':':
398                         {
399                             _port = s;
400                             state = PORT;
401                             break;
402                         }
403                         case '[':
404                         {
405                             state = IPV6;
406                             break;
407                         }
408                     }
409                     continue;
410                 }
411 
412                 case IPV6:
413                 {
414                     switch (c)
415                     {
416                         case '/':
417                         {
418                             throw new IllegalArgumentException("No closing ']' for " + new String(_raw,offset,length,_charset));
419                         }
420                         case ']':
421                         {
422                             state = AUTH;
423                             break;
424                         }
425                     }
426 
427                     continue;
428                 }
429 
430                 case PORT:
431                 {
432                     if (c=='/')
433                     {
434                         m=s;
435                         _path=m;
436                         if (_port<=_authority)
437                             _port=_path;
438                         state=PATH;
439                     }
440                     continue;
441                 }
442 
443                 case PATH:
444                 {
445                     switch (c)
446                     {
447                         case ';':
448                         {
449                             _param = s;
450                             state = PARAM;
451                             break;
452                         }
453                         case '?':
454                         {
455                             _param = s;
456                             _query = s;
457                             state = QUERY;
458                             break;
459                         }
460                         case '#':
461                         {
462                             _param = s;
463                             _query = s;
464                             _fragment = s;
465                             break state;
466                         }
467                         case '%':
468                         {
469                             _encoded=true;
470                         }
471                     }
472                     continue;
473                 }
474 
475                 case PARAM:
476                 {
477                     switch (c)
478                     {
479                         case '?':
480                         {
481                             _query = s;
482                             state = QUERY;
483                             break;
484                         }
485                         case '#':
486                         {
487                             _query = s;
488                             _fragment = s;
489                             break state;
490                         }
491                     }
492                     continue;
493                 }
494 
495                 case QUERY:
496                 {
497                     if (c=='#')
498                     {
499                         _fragment=s;
500                         break state;
501                     }
502                     continue;
503                 }
504 
505                 case ASTERISK:
506                 {
507                     throw new IllegalArgumentException("only '*'");
508                 }
509             }
510         }
511 
512         if (_port<_path)
513             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
514     }
515 
516     public String getScheme()
517     {
518         if (_scheme==_authority)
519             return null;
520         int l=_authority-_scheme;
521         if (l==5 &&
522                 _raw[_scheme]=='h' &&
523                 _raw[_scheme+1]=='t' &&
524                 _raw[_scheme+2]=='t' &&
525                 _raw[_scheme+3]=='p' )
526             return HttpScheme.HTTP.asString();
527         if (l==6 &&
528                 _raw[_scheme]=='h' &&
529                 _raw[_scheme+1]=='t' &&
530                 _raw[_scheme+2]=='t' &&
531                 _raw[_scheme+3]=='p' &&
532                 _raw[_scheme+4]=='s' )
533             return HttpScheme.HTTPS.asString();
534 
535         return new String(_raw,_scheme,_authority-_scheme-1,_charset);
536     }
537 
538     public String getAuthority()
539     {
540         if (_authority==_path)
541             return null;
542         return new String(_raw,_authority,_path-_authority,_charset);
543     }
544 
545     public String getHost()
546     {
547         if (_host==_port)
548             return null;
549         return new String(_raw,_host,_port-_host,_charset);
550     }
551 
552     public int getPort()
553     {
554         return _portValue;
555     }
556 
557     public String getPath()
558     {
559         if (_path==_param)
560             return null;
561         return new String(_raw,_path,_param-_path,_charset);
562     }
563 
564     public String getDecodedPath()
565     {
566         if (_path==_param)
567             return null;
568 
569         Utf8StringBuilder utf8b=null;
570 
571         for (int i=_path;i<_param;i++)
572         {
573             byte b = _raw[i];
574 
575             if (b=='%')
576             {
577                 if (utf8b==null)
578                 {
579                     utf8b=new Utf8StringBuilder();
580                     utf8b.append(_raw,_path,i-_path);
581                 }
582                 
583                 if ((i+2)>=_param)
584                     throw new IllegalArgumentException("Bad % encoding: "+this);
585                 if (_raw[i+1]=='u')
586                 {
587                     if ((i+5)>=_param)
588                         throw new IllegalArgumentException("Bad %u encoding: "+this);
589                     try
590                     {
591                         String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
592                         utf8b.getStringBuilder().append(unicode);
593                         i+=5;
594                     }
595                     catch(Exception e)
596                     {
597                         throw new RuntimeException(e);
598                     }
599                 }
600                 else
601                 {
602                     b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
603                     utf8b.append(b);
604                     i+=2;
605                 }
606                 continue;
607             }
608             else if (utf8b!=null)
609             {
610                 utf8b.append(b);
611             }
612         }
613 
614         if (utf8b==null)
615             return StringUtil.toUTF8String(_raw, _path, _param-_path);
616         return utf8b.toString();
617     }
618     
619     public String getDecodedPath(String encoding)
620     {
621         if (_path==_param)
622             return null;
623 
624         int length = _param-_path;
625         byte[] bytes=null;
626         int n=0;
627 
628         for (int i=_path;i<_param;i++)
629         {
630             byte b = _raw[i];
631 
632             if (b=='%')
633             {
634                 if (bytes==null)
635                 {
636                     bytes=new byte[length];
637                     System.arraycopy(_raw,_path,bytes,0,n);
638                 }
639                 
640                 if ((i+2)>=_param)
641                     throw new IllegalArgumentException("Bad % encoding: "+this);
642                 if (_raw[i+1]=='u')
643                 {
644                     if ((i+5)>=_param)
645                         throw new IllegalArgumentException("Bad %u encoding: "+this);
646 
647                     try
648                     {
649                         String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
650                         byte[] encoded = unicode.getBytes(encoding);
651                         System.arraycopy(encoded,0,bytes,n,encoded.length);
652                         n+=encoded.length;
653                         i+=5;
654                     }
655                     catch(Exception e)
656                     {
657                         throw new RuntimeException(e);
658                     }
659                 }
660                 else
661                 {
662                     b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
663                     bytes[n++]=b;
664                     i+=2;
665                 }
666                 continue;
667             }
668             else if (bytes==null)
669             {
670                 n++;
671                 continue;
672             }
673 
674             bytes[n++]=b;
675         }
676 
677 
678         if (bytes==null)
679             return StringUtil.toString(_raw,_path,_param-_path,encoding);
680 
681         return StringUtil.toString(bytes,0,n,encoding);
682     }
683 
684     public String getPathAndParam()
685     {
686         if (_path==_query)
687             return null;
688         return new String(_raw,_path,_query-_path,_charset);
689     }
690 
691     public String getCompletePath()
692     {
693         if (_path==_end)
694             return null;
695         return new String(_raw,_path,_end-_path,_charset);
696     }
697 
698     public String getParam()
699     {
700         if (_param==_query)
701             return null;
702         return new String(_raw,_param+1,_query-_param-1,_charset);
703     }
704 
705     public String getQuery()
706     {
707         if (_query==_fragment)
708             return null;
709         return new String(_raw,_query+1,_fragment-_query-1,_charset);
710     }
711 
712     public String getQuery(String encoding)
713     {
714         if (_query==_fragment)
715             return null;
716         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
717     }
718 
719     public boolean hasQuery()
720     {
721         return (_fragment>_query);
722     }
723 
724     public String getFragment()
725     {
726         if (_fragment==_end)
727             return null;
728         return new String(_raw,_fragment+1,_end-_fragment-1,_charset);
729     }
730 
731     public void decodeQueryTo(MultiMap<String> parameters)
732     {
733         if (_query==_fragment)
734             return;
735         if (_charset==StringUtil.__UTF8_CHARSET)
736             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
737         else
738             UrlEncoded.decodeTo(StringUtil.toString(_raw,_query+1,_fragment-_query-1,_charset.toString()),parameters,_charset.toString(),-1);
739     }
740 
741     public void decodeQueryTo(MultiMap<String> parameters, String encoding) throws UnsupportedEncodingException
742     {
743         if (_query==_fragment)
744             return;
745 
746         if (encoding==null || StringUtil.isUTF8(encoding))
747             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
748         else
749             UrlEncoded.decodeTo(StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding),parameters,encoding,-1);
750     }
751 
752     public void clear()
753     {
754         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
755         _raw=__empty;
756         _rawString="";
757         _encoded=false;
758     }
759 
760     @Override
761     public String toString()
762     {
763         if (_rawString==null)
764             _rawString=new String(_raw,_scheme,_end-_scheme,_charset);
765         return _rawString;
766     }
767 
768     public void writeTo(Utf8StringBuilder buf)
769     {
770         buf.append(_raw,_scheme,_end-_scheme);
771     }
772 
773 }