View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses.
12  // ========================================================================
13  
14  package org.eclipse.jetty.http;
15  
16  import java.io.UnsupportedEncodingException;
17  import java.net.URI;
18  
19  import org.eclipse.jetty.util.MultiMap;
20  import org.eclipse.jetty.util.StringUtil;
21  import org.eclipse.jetty.util.TypeUtil;
22  import org.eclipse.jetty.util.URIUtil;
23  import org.eclipse.jetty.util.UrlEncoded;
24  import org.eclipse.jetty.util.Utf8StringBuilder;
25  
26  
27  /* ------------------------------------------------------------ */
28  /** Http URI.
29   * Parse a HTTP URI from a string or byte array.  Given a URI
30   * <code>http://user@host:port/path/info;param?query#fragment</code>
31   * this class will split it into the following undecoded optional elements:<ul>
32   * <li>{@link #getScheme()} - http:</li>
33   * <li>{@link #getAuthority()} - //name@host:port</li>
34   * <li>{@link #getHost()} - host</li>
35   * <li>{@link #getPort()} - port</li>
36   * <li>{@link #getPath()} - /path/info</li>
37   * <li>{@link #getParam()} - param</li>
38   * <li>{@link #getQuery()} - query</li>
39   * <li>{@link #getFragment()} - fragment</li>
40   * </ul>
41   *
42   */
43  public class HttpURI
44  {
45      private static final byte[] __empty={};
46      private final static int
47      START=0,
48      AUTH_OR_PATH=1,
49      SCHEME_OR_PATH=2,
50      AUTH=4,
51      IPV6=5,
52      PORT=6,
53      PATH=7,
54      PARAM=8,
55      QUERY=9,
56      ASTERISK=10;
57  
58      boolean _partial=false;
59      byte[] _raw=__empty;
60      String _rawString;
61      int _scheme;
62      int _authority;
63      int _host;
64      int _port;
65      int _portValue;
66      int _path;
67      int _param;
68      int _query;
69      int _fragment;
70      int _end;
71      boolean _encoded=false;
72  
73      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
74  
75      public HttpURI()
76      {
77  
78      }
79  
80      /* ------------------------------------------------------------ */
81      /**
82       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
83       */
84      public HttpURI(boolean parsePartialAuth)
85      {
86          _partial=parsePartialAuth;
87      }
88  
89      public HttpURI(String raw)
90      {
91          _rawString=raw;
92          byte[] b = raw.getBytes();
93          parse(b,0,b.length);
94      }
95  
96      public HttpURI(byte[] raw,int offset, int length)
97      {
98          parse2(raw,offset,length);
99      }
100     
101     public HttpURI(URI uri)
102     {
103         parse(uri.toASCIIString());
104     }
105 
106     public void parse(String raw)
107     {
108         byte[] b = raw.getBytes();
109         parse2(b,0,b.length);
110         _rawString=raw;
111     }
112 
113     public void parse(byte[] raw,int offset, int length)
114     {
115         _rawString=null;
116         parse2(raw,offset,length);
117     }
118 
119 
120     public void parseConnect(byte[] raw,int offset, int length)
121     {
122         _rawString=null;
123         _encoded=false;
124         _raw=raw;
125         int i=offset;
126         int e=offset+length;
127         int state=AUTH;
128         _end=offset+length;
129         _scheme=offset;
130         _authority=offset;
131         _host=offset;
132         _port=_end;
133         _portValue=-1;
134         _path=_end;
135         _param=_end;
136         _query=_end;
137         _fragment=_end;
138 
139         loop: while (i<e)
140         {
141             char c=(char)(0xff&_raw[i]);
142             int s=i++;
143 
144             switch (state)
145             {
146                 case AUTH:
147                 {
148                     switch (c)
149                     {
150                         case ':':
151                         {
152                             _port = s;
153                             break loop;
154                         }
155                         case '[':
156                         {
157                             state = IPV6;
158                             break;
159                         }
160                     }
161                     continue;
162                 }
163 
164                 case IPV6:
165                 {
166                     switch (c)
167                     {
168                         case '/':
169                         {
170                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
171                         }
172                         case ']':
173                         {
174                             state = AUTH;
175                             break;
176                         }
177                     }
178 
179                     continue;
180                 }
181             }
182         }
183 
184         if (_port<_path)
185             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
186         else
187             throw new IllegalArgumentException("No port");
188         _path=offset;
189     }
190 
191 
192     private void parse2(byte[] raw,int offset, int length)
193     {
194         _encoded=false;
195         _raw=raw;
196         int i=offset;
197         int e=offset+length;
198         int state=START;
199         int m=offset;
200         _end=offset+length;
201         _scheme=offset;
202         _authority=offset;
203         _host=offset;
204         _port=offset;
205         _portValue=-1;
206         _path=offset;
207         _param=_end;
208         _query=_end;
209         _fragment=_end;
210         while (i<e)
211         {
212             char c=(char)(0xff&_raw[i]);
213             int s=i++;
214 
215             state: switch (state)
216             {
217                 case START:
218                 {
219                     m=s;
220                     switch(c)
221                     {
222                         case '/':
223                             state=AUTH_OR_PATH;
224                             break;
225                         case ';':
226                             _param=s;
227                             state=PARAM;
228                             break;
229                         case '?':
230                             _param=s;
231                             _query=s;
232                             state=QUERY;
233                             break;
234                         case '#':
235                             _param=s;
236                             _query=s;
237                             _fragment=s;
238                             break;
239                         case '*':
240                             _path=s;
241                             state=ASTERISK;
242                             break;
243 
244                         default:
245                             state=SCHEME_OR_PATH;
246                     }
247 
248                     continue;
249                 }
250 
251                 case AUTH_OR_PATH:
252                 {
253                     if ((_partial||_scheme!=_authority) && c=='/')
254                     {
255                         _host=i;
256                         _port=_end;
257                         _path=_end;
258                         state=AUTH;
259                     }
260                     else if (c==';' || c=='?' || c=='#')
261                     {
262                         i--;
263                         state=PATH;
264                     }
265                     else
266                     {
267                         _host=m;
268                         _port=m;
269                         state=PATH;
270                     }
271                     continue;
272                 }
273 
274                 case SCHEME_OR_PATH:
275                 {
276                     // short cut for http and https
277                     if (length>6 && c=='t')
278                     {
279                         if (_raw[offset+3]==':')
280                         {
281                             s=offset+3;
282                             i=offset+4;
283                             c=':';
284                         }
285                         else if (_raw[offset+4]==':')
286                         {
287                             s=offset+4;
288                             i=offset+5;
289                             c=':';
290                         }
291                         else if (_raw[offset+5]==':')
292                         {
293                             s=offset+5;
294                             i=offset+6;
295                             c=':';
296                         }
297                     }
298 
299                     switch (c)
300                     {
301                         case ':':
302                         {
303                             m = i++;
304                             _authority = m;
305                             _path = m;
306                             c = (char)(0xff & _raw[i]);
307                             if (c == '/')
308                                 state = AUTH_OR_PATH;
309                             else
310                             {
311                                 _host = m;
312                                 _port = m;
313                                 state = PATH;
314                             }
315                             break;
316                         }
317 
318                         case '/':
319                         {
320                             state = PATH;
321                             break;
322                         }
323 
324                         case ';':
325                         {
326                             _param = s;
327                             state = PARAM;
328                             break;
329                         }
330 
331                         case '?':
332                         {
333                             _param = s;
334                             _query = s;
335                             state = QUERY;
336                             break;
337                         }
338 
339                         case '#':
340                         {
341                             _param = s;
342                             _query = s;
343                             _fragment = s;
344                             break;
345                         }
346                     }
347                     continue;
348                 }
349 
350                 case AUTH:
351                 {
352                     switch (c)
353                     {
354 
355                         case '/':
356                         {
357                             m = s;
358                             _path = m;
359                             _port = _path;
360                             state = PATH;
361                             break;
362                         }
363                         case '@':
364                         {
365                             _host = i;
366                             break;
367                         }
368                         case ':':
369                         {
370                             _port = s;
371                             state = PORT;
372                             break;
373                         }
374                         case '[':
375                         {
376                             state = IPV6;
377                             break;
378                         }
379                     }
380                     continue;
381                 }
382 
383                 case IPV6:
384                 {
385                     switch (c)
386                     {
387                         case '/':
388                         {
389                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
390                         }
391                         case ']':
392                         {
393                             state = AUTH;
394                             break;
395                         }
396                     }
397 
398                     continue;
399                 }
400 
401                 case PORT:
402                 {
403                     if (c=='/')
404                     {
405                         m=s;
406                         _path=m;
407                         if (_port<=_authority)
408                             _port=_path;
409                         state=PATH;
410                     }
411                     continue;
412                 }
413 
414                 case PATH:
415                 {
416                     switch (c)
417                     {
418                         case ';':
419                         {
420                             _param = s;
421                             state = PARAM;
422                             break;
423                         }
424                         case '?':
425                         {
426                             _param = s;
427                             _query = s;
428                             state = QUERY;
429                             break;
430                         }
431                         case '#':
432                         {
433                             _param = s;
434                             _query = s;
435                             _fragment = s;
436                             break state;
437                         }
438                         case '%':
439                         {
440                             _encoded=true;
441                         }
442                     }
443                     continue;
444                 }
445 
446                 case PARAM:
447                 {
448                     switch (c)
449                     {
450                         case '?':
451                         {
452                             _query = s;
453                             state = QUERY;
454                             break;
455                         }
456                         case '#':
457                         {
458                             _query = s;
459                             _fragment = s;
460                             break state;
461                         }
462                     }
463                     continue;
464                 }
465 
466                 case QUERY:
467                 {
468                     if (c=='#')
469                     {
470                         _fragment=s;
471                         break state;
472                     }
473                     continue;
474                 }
475 
476                 case ASTERISK:
477                 {
478                     throw new IllegalArgumentException("only '*'");
479                 }
480             }
481         }
482 
483         if (_port<_path)
484             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
485     }
486 
487     private String toUtf8String(int offset,int length)
488     {
489         _utf8b.reset();
490         _utf8b.append(_raw,offset,length);
491         return _utf8b.toString();
492     }
493 
494     public String getScheme()
495     {
496         if (_scheme==_authority)
497             return null;
498         int l=_authority-_scheme;
499         if (l==5 &&
500             _raw[_scheme]=='h' &&
501             _raw[_scheme+1]=='t' &&
502             _raw[_scheme+2]=='t' &&
503             _raw[_scheme+3]=='p' )
504             return HttpSchemes.HTTP;
505         if (l==6 &&
506             _raw[_scheme]=='h' &&
507             _raw[_scheme+1]=='t' &&
508             _raw[_scheme+2]=='t' &&
509             _raw[_scheme+3]=='p' &&
510             _raw[_scheme+4]=='s' )
511             return HttpSchemes.HTTPS;
512 
513         return toUtf8String(_scheme,_authority-_scheme-1);
514     }
515 
516     public String getAuthority()
517     {
518         if (_authority==_path)
519             return null;
520         return toUtf8String(_authority,_path-_authority);
521     }
522 
523     public String getHost()
524     {
525         if (_host==_port)
526             return null;
527         return toUtf8String(_host,_port-_host);
528     }
529 
530     public int getPort()
531     {
532         return _portValue;
533     }
534 
535     public String getPath()
536     {
537         if (_path==_param)
538             return null;
539         return toUtf8String(_path,_param-_path);
540     }
541 
542     public String getDecodedPath()
543     {
544         if (_path==_param)
545             return null;
546 
547         int length = _param-_path;
548         byte[] bytes=null;
549         int n=0;
550 
551         for (int i=_path;i<_param;i++)
552         {
553             byte b = _raw[i];
554 
555             if (b=='%')
556             {
557                 if ((i+2)>=_param)
558                     throw new IllegalArgumentException("Bad % encoding: "+this);
559                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
560                 i+=2;
561             }
562             else if (bytes==null)
563             {
564                 n++;
565                 continue;
566             }
567 
568             if (bytes==null)
569             {
570                 bytes=new byte[length];
571                 System.arraycopy(_raw,_path,bytes,0,n);
572             }
573 
574             bytes[n++]=b;
575         }
576 
577         if (bytes==null)
578             return toUtf8String(_path,length);
579 
580         _utf8b.reset();
581         _utf8b.append(bytes,0,n);
582         return _utf8b.toString();
583     }
584 
585     public String getPathAndParam()
586     {
587         if (_path==_query)
588             return null;
589         return toUtf8String(_path,_query-_path);
590     }
591 
592     public String getCompletePath()
593     {
594         if (_path==_end)
595             return null;
596         return toUtf8String(_path,_end-_path);
597     }
598 
599     public String getParam()
600     {
601         if (_param==_query)
602             return null;
603         return toUtf8String(_param+1,_query-_param-1);
604     }
605 
606     public String getQuery()
607     {
608         if (_query==_fragment)
609             return null;
610         return toUtf8String(_query+1,_fragment-_query-1);
611     }
612 
613     public String getQuery(String encoding)
614     {
615         if (_query==_fragment)
616             return null;
617         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
618     }
619 
620     public boolean hasQuery()
621     {
622         return (_fragment>_query);
623     }
624 
625     public String getFragment()
626     {
627         if (_fragment==_end)
628             return null;
629         return toUtf8String(_fragment+1,_end-_fragment-1);
630     }
631 
632     public void decodeQueryTo(MultiMap parameters)
633     {
634         if (_query==_fragment)
635             return;
636         _utf8b.reset();
637         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
638     }
639 
640     public void decodeQueryTo(MultiMap parameters, String encoding)
641         throws UnsupportedEncodingException
642     {
643         if (_query==_fragment)
644             return;
645 
646         if (encoding==null || StringUtil.isUTF8(encoding))
647             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
648         else
649             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
650     }
651 
652     public void clear()
653     {
654         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
655         _raw=__empty;
656         _rawString="";
657         _encoded=false;
658     }
659 
660     @Override
661     public String toString()
662     {
663         if (_rawString==null)
664             _rawString=toUtf8String(_scheme,_end-_scheme);
665         return _rawString;
666     }
667 
668     public void writeTo(Utf8StringBuilder buf)
669     {
670         buf.append(_raw,_scheme,_end-_scheme);
671     }
672 
673 }