View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.http;
20  
21  import java.io.UnsupportedEncodingException;
22  import java.net.URI;
23  
24  import org.eclipse.jetty.util.MultiMap;
25  import org.eclipse.jetty.util.StringUtil;
26  import org.eclipse.jetty.util.TypeUtil;
27  import org.eclipse.jetty.util.URIUtil;
28  import org.eclipse.jetty.util.UrlEncoded;
29  import org.eclipse.jetty.util.Utf8StringBuilder;
30  
31  
32  /* ------------------------------------------------------------ */
33  /** Http URI.
34   * Parse a HTTP URI from a string or byte array.  Given a URI
35   * <code>http://user@host:port/path/info;param?query#fragment</code>
36   * this class will split it into the following undecoded optional elements:<ul>
37   * <li>{@link #getScheme()} - http:</li>
38   * <li>{@link #getAuthority()} - //name@host:port</li>
39   * <li>{@link #getHost()} - host</li>
40   * <li>{@link #getPort()} - port</li>
41   * <li>{@link #getPath()} - /path/info</li>
42   * <li>{@link #getParam()} - param</li>
43   * <li>{@link #getQuery()} - query</li>
44   * <li>{@link #getFragment()} - fragment</li>
45   * </ul>
46   *
47   */
48  public class HttpURI
49  {
50      private static final byte[] __empty={};
51      private final static int
52      START=0,
53      AUTH_OR_PATH=1,
54      SCHEME_OR_PATH=2,
55      AUTH=4,
56      IPV6=5,
57      PORT=6,
58      PATH=7,
59      PARAM=8,
60      QUERY=9,
61      ASTERISK=10;
62  
63      boolean _partial=false;
64      byte[] _raw=__empty;
65      String _rawString;
66      int _scheme;
67      int _authority;
68      int _host;
69      int _port;
70      int _portValue;
71      int _path;
72      int _param;
73      int _query;
74      int _fragment;
75      int _end;
76      boolean _encoded=false;
77  
78      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
79  
80      public HttpURI()
81      {
82  
83      }
84  
85      /* ------------------------------------------------------------ */
86      /**
87       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
88       */
89      public HttpURI(boolean parsePartialAuth)
90      {
91          _partial=parsePartialAuth;
92      }
93  
94      public HttpURI(String raw)
95      {
96          _rawString=raw;
97          byte[] b;
98          try
99          {
100             b = raw.getBytes(StringUtil.__UTF8);
101         }
102         catch (UnsupportedEncodingException e)
103         {
104            throw new RuntimeException(e.getMessage());
105         }
106         parse(b,0,b.length);
107     }
108 
109     public HttpURI(byte[] raw,int offset, int length)
110     {
111         parse2(raw,offset,length);
112     }
113     
114     public HttpURI(URI uri)
115     {
116         parse(uri.toASCIIString());
117     }
118 
119     public void parse(String raw)
120     {
121         byte[] b = raw.getBytes();
122         parse2(b,0,b.length);
123         _rawString=raw;
124     }
125 
126     public void parse(byte[] raw,int offset, int length)
127     {
128         _rawString=null;
129         parse2(raw,offset,length);
130     }
131 
132 
133     public void parseConnect(byte[] raw,int offset, int length)
134     {
135         _rawString=null;
136         _encoded=false;
137         _raw=raw;
138         int i=offset;
139         int e=offset+length;
140         int state=AUTH;
141         _end=offset+length;
142         _scheme=offset;
143         _authority=offset;
144         _host=offset;
145         _port=_end;
146         _portValue=-1;
147         _path=_end;
148         _param=_end;
149         _query=_end;
150         _fragment=_end;
151 
152         loop: while (i<e)
153         {
154             char c=(char)(0xff&_raw[i]);
155             int s=i++;
156 
157             switch (state)
158             {
159                 case AUTH:
160                 {
161                     switch (c)
162                     {
163                         case ':':
164                         {
165                             _port = s;
166                             break loop;
167                         }
168                         case '[':
169                         {
170                             state = IPV6;
171                             break;
172                         }
173                     }
174                     continue;
175                 }
176 
177                 case IPV6:
178                 {
179                     switch (c)
180                     {
181                         case '/':
182                         {
183                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
184                         }
185                         case ']':
186                         {
187                             state = AUTH;
188                             break;
189                         }
190                     }
191 
192                     continue;
193                 }
194             }
195         }
196 
197         if (_port<_path)
198             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
199         else
200             throw new IllegalArgumentException("No port");
201         _path=offset;
202     }
203 
204 
205     private void parse2(byte[] raw,int offset, int length)
206     {
207         _encoded=false;
208         _raw=raw;
209         int i=offset;
210         int e=offset+length;
211         int state=START;
212         int m=offset;
213         _end=offset+length;
214         _scheme=offset;
215         _authority=offset;
216         _host=offset;
217         _port=offset;
218         _portValue=-1;
219         _path=offset;
220         _param=_end;
221         _query=_end;
222         _fragment=_end;
223         while (i<e)
224         {
225             char c=(char)(0xff&_raw[i]);
226             int s=i++;
227 
228             state: switch (state)
229             {
230                 case START:
231                 {
232                     m=s;
233                     switch(c)
234                     {
235                         case '/':
236                             state=AUTH_OR_PATH;
237                             break;
238                         case ';':
239                             _param=s;
240                             state=PARAM;
241                             break;
242                         case '?':
243                             _param=s;
244                             _query=s;
245                             state=QUERY;
246                             break;
247                         case '#':
248                             _param=s;
249                             _query=s;
250                             _fragment=s;
251                             break;
252                         case '*':
253                             _path=s;
254                             state=ASTERISK;
255                             break;
256 
257                         default:
258                             state=SCHEME_OR_PATH;
259                     }
260 
261                     continue;
262                 }
263 
264                 case AUTH_OR_PATH:
265                 {
266                     if ((_partial||_scheme!=_authority) && c=='/')
267                     {
268                         _host=i;
269                         _port=_end;
270                         _path=_end;
271                         state=AUTH;
272                     }
273                     else if (c==';' || c=='?' || c=='#')
274                     {
275                         i--;
276                         state=PATH;
277                     }
278                     else
279                     {
280                         _host=m;
281                         _port=m;
282                         state=PATH;
283                     }
284                     continue;
285                 }
286 
287                 case SCHEME_OR_PATH:
288                 {
289                     // short cut for http and https
290                     if (length>6 && c=='t')
291                     {
292                         if (_raw[offset+3]==':')
293                         {
294                             s=offset+3;
295                             i=offset+4;
296                             c=':';
297                         }
298                         else if (_raw[offset+4]==':')
299                         {
300                             s=offset+4;
301                             i=offset+5;
302                             c=':';
303                         }
304                         else if (_raw[offset+5]==':')
305                         {
306                             s=offset+5;
307                             i=offset+6;
308                             c=':';
309                         }
310                     }
311 
312                     switch (c)
313                     {
314                         case ':':
315                         {
316                             m = i++;
317                             _authority = m;
318                             _path = m;
319                             c = (char)(0xff & _raw[i]);
320                             if (c == '/')
321                                 state = AUTH_OR_PATH;
322                             else
323                             {
324                                 _host = m;
325                                 _port = m;
326                                 state = PATH;
327                             }
328                             break;
329                         }
330 
331                         case '/':
332                         {
333                             state = PATH;
334                             break;
335                         }
336 
337                         case ';':
338                         {
339                             _param = s;
340                             state = PARAM;
341                             break;
342                         }
343 
344                         case '?':
345                         {
346                             _param = s;
347                             _query = s;
348                             state = QUERY;
349                             break;
350                         }
351 
352                         case '#':
353                         {
354                             _param = s;
355                             _query = s;
356                             _fragment = s;
357                             break;
358                         }
359                     }
360                     continue;
361                 }
362 
363                 case AUTH:
364                 {
365                     switch (c)
366                     {
367 
368                         case '/':
369                         {
370                             m = s;
371                             _path = m;
372                             _port = _path;
373                             state = PATH;
374                             break;
375                         }
376                         case '@':
377                         {
378                             _host = i;
379                             break;
380                         }
381                         case ':':
382                         {
383                             _port = s;
384                             state = PORT;
385                             break;
386                         }
387                         case '[':
388                         {
389                             state = IPV6;
390                             break;
391                         }
392                     }
393                     continue;
394                 }
395 
396                 case IPV6:
397                 {
398                     switch (c)
399                     {
400                         case '/':
401                         {
402                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
403                         }
404                         case ']':
405                         {
406                             state = AUTH;
407                             break;
408                         }
409                     }
410 
411                     continue;
412                 }
413 
414                 case PORT:
415                 {
416                     if (c=='/')
417                     {
418                         m=s;
419                         _path=m;
420                         if (_port<=_authority)
421                             _port=_path;
422                         state=PATH;
423                     }
424                     continue;
425                 }
426 
427                 case PATH:
428                 {
429                     switch (c)
430                     {
431                         case ';':
432                         {
433                             _param = s;
434                             state = PARAM;
435                             break;
436                         }
437                         case '?':
438                         {
439                             _param = s;
440                             _query = s;
441                             state = QUERY;
442                             break;
443                         }
444                         case '#':
445                         {
446                             _param = s;
447                             _query = s;
448                             _fragment = s;
449                             break state;
450                         }
451                         case '%':
452                         {
453                             _encoded=true;
454                         }
455                     }
456                     continue;
457                 }
458 
459                 case PARAM:
460                 {
461                     switch (c)
462                     {
463                         case '?':
464                         {
465                             _query = s;
466                             state = QUERY;
467                             break;
468                         }
469                         case '#':
470                         {
471                             _query = s;
472                             _fragment = s;
473                             break state;
474                         }
475                     }
476                     continue;
477                 }
478 
479                 case QUERY:
480                 {
481                     if (c=='#')
482                     {
483                         _fragment=s;
484                         break state;
485                     }
486                     continue;
487                 }
488 
489                 case ASTERISK:
490                 {
491                     throw new IllegalArgumentException("only '*'");
492                 }
493             }
494         }
495 
496         if (_port<_path)
497             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
498     }
499 
500     private String toUtf8String(int offset,int length)
501     {
502         _utf8b.reset();
503         _utf8b.append(_raw,offset,length);
504         return _utf8b.toString();
505     }
506 
507     public String getScheme()
508     {
509         if (_scheme==_authority)
510             return null;
511         int l=_authority-_scheme;
512         if (l==5 &&
513             _raw[_scheme]=='h' &&
514             _raw[_scheme+1]=='t' &&
515             _raw[_scheme+2]=='t' &&
516             _raw[_scheme+3]=='p' )
517             return HttpSchemes.HTTP;
518         if (l==6 &&
519             _raw[_scheme]=='h' &&
520             _raw[_scheme+1]=='t' &&
521             _raw[_scheme+2]=='t' &&
522             _raw[_scheme+3]=='p' &&
523             _raw[_scheme+4]=='s' )
524             return HttpSchemes.HTTPS;
525 
526         return toUtf8String(_scheme,_authority-_scheme-1);
527     }
528 
529     public String getAuthority()
530     {
531         if (_authority==_path)
532             return null;
533         return toUtf8String(_authority,_path-_authority);
534     }
535 
536     public String getHost()
537     {
538         if (_host==_port)
539             return null;
540         return toUtf8String(_host,_port-_host);
541     }
542 
543     public int getPort()
544     {
545         return _portValue;
546     }
547 
548     public String getPath()
549     {
550         if (_path==_param)
551             return null;
552         return toUtf8String(_path,_param-_path);
553     }
554 
555     public String getDecodedPath()
556     {
557         if (_path==_param)
558             return null;
559 
560         int length = _param-_path;
561         boolean decoding=false;
562 
563         for (int i=_path;i<_param;i++)
564         {
565             byte b = _raw[i];
566 
567             if (b=='%')
568             {
569                 if (!decoding)
570                 {
571                     _utf8b.reset();
572                     _utf8b.append(_raw,_path,i-_path);
573                     decoding=true;
574                 }
575                 
576                 if ((i+2)>=_param)
577                     throw new IllegalArgumentException("Bad % encoding: "+this);
578                 if (_raw[i+1]=='u')
579                 {
580                     if ((i+5)>=_param)
581                         throw new IllegalArgumentException("Bad %u encoding: "+this);
582                     try
583                     {
584                         String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
585                         _utf8b.getStringBuilder().append(unicode);
586                         i+=5;
587                     }
588                     catch(Exception e)
589                     {
590                         throw new RuntimeException(e);
591                     }
592                 }
593                 else
594                 {
595                     b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
596                     _utf8b.append(b);
597                     i+=2;
598                 }
599                 continue;
600             }
601             else if (decoding)
602             {
603                 _utf8b.append(b);
604             }
605         }
606 
607         if (!decoding)
608             return toUtf8String(_path,length);
609         return _utf8b.toString();
610     }
611     
612     public String getDecodedPath(String encoding)
613     {
614         if (_path==_param)
615             return null;
616 
617         int length = _param-_path;
618         byte[] bytes=null;
619         int n=0;
620 
621         for (int i=_path;i<_param;i++)
622         {
623             byte b = _raw[i];
624 
625             if (b=='%')
626             {
627                 if (bytes==null)
628                 {
629                     bytes=new byte[length];
630                     System.arraycopy(_raw,_path,bytes,0,n);
631                 }
632                 
633                 if ((i+2)>=_param)
634                     throw new IllegalArgumentException("Bad % encoding: "+this);
635                 if (_raw[i+1]=='u')
636                 {
637                     if ((i+5)>=_param)
638                         throw new IllegalArgumentException("Bad %u encoding: "+this);
639 
640                     try
641                     {
642                         String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
643                         byte[] encoded = unicode.getBytes(encoding);
644                         System.arraycopy(encoded,0,bytes,n,encoded.length);
645                         n+=encoded.length;
646                         i+=5;
647                     }
648                     catch(Exception e)
649                     {
650                         throw new RuntimeException(e);
651                     }
652                 }
653                 else
654                 {
655                     b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
656                     bytes[n++]=b;
657                     i+=2;
658                 }
659                 continue;
660             }
661             else if (bytes==null)
662             {
663                 n++;
664                 continue;
665             }
666 
667             bytes[n++]=b;
668         }
669 
670 
671         if (bytes==null)
672             return StringUtil.toString(_raw,_path,_param-_path,encoding);
673 
674         return StringUtil.toString(bytes,0,n,encoding);
675     }
676     
677     
678     
679     
680     
681 
682 
683     public String getPathAndParam()
684     {
685         if (_path==_query)
686             return null;
687         return toUtf8String(_path,_query-_path);
688     }
689 
690     public String getCompletePath()
691     {
692         if (_path==_end)
693             return null;
694         return toUtf8String(_path,_end-_path);
695     }
696 
697     public String getParam()
698     {
699         if (_param==_query)
700             return null;
701         return toUtf8String(_param+1,_query-_param-1);
702     }
703 
704     public String getQuery()
705     {
706         if (_query==_fragment)
707             return null;
708         return toUtf8String(_query+1,_fragment-_query-1);
709     }
710 
711     public String getQuery(String encoding)
712     {
713         if (_query==_fragment)
714             return null;
715         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
716     }
717 
718     public boolean hasQuery()
719     {
720         return (_fragment>_query);
721     }
722 
723     public String getFragment()
724     {
725         if (_fragment==_end)
726             return null;
727         return toUtf8String(_fragment+1,_end-_fragment-1);
728     }
729 
730     public void decodeQueryTo(MultiMap parameters)
731     {
732         if (_query==_fragment)
733             return;
734         _utf8b.reset();
735         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
736     }
737 
738     public void decodeQueryTo(MultiMap parameters, String encoding)
739         throws UnsupportedEncodingException
740     {
741         if (_query==_fragment)
742             return;
743 
744         if (encoding==null || StringUtil.isUTF8(encoding))
745             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
746         else
747             UrlEncoded.decodeTo(StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding),parameters,encoding);
748     }
749 
750     public void clear()
751     {
752         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
753         _raw=__empty;
754         _rawString="";
755         _encoded=false;
756     }
757 
758     @Override
759     public String toString()
760     {
761         if (_rawString==null)
762             _rawString=toUtf8String(_scheme,_end-_scheme);
763         return _rawString;
764     }
765 
766     public void writeTo(Utf8StringBuilder buf)
767     {
768         buf.append(_raw,_scheme,_end-_scheme);
769     }
770 
771 }