View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses.
12  // ========================================================================
13  
14  package org.eclipse.jetty.http;
15  
16  import java.io.UnsupportedEncodingException;
17  
18  import org.eclipse.jetty.util.MultiMap;
19  import org.eclipse.jetty.util.StringUtil;
20  import org.eclipse.jetty.util.TypeUtil;
21  import org.eclipse.jetty.util.URIUtil;
22  import org.eclipse.jetty.util.UrlEncoded;
23  import org.eclipse.jetty.util.Utf8StringBuilder;
24  
25  
26  /* ------------------------------------------------------------ */
27  /** Http URI.
28   * Parse a HTTP URI from a string or byte array.  Given a URI
29   * <code>http://user@host:port/path/info;param?query#fragment</code>
30   * this class will split it into the following undecoded optional elements:<ul>
31   * <li>{@link #getScheme()} - http:</li>
32   * <li>{@link #getAuthority()} - //name@host:port</li>
33   * <li>{@link #getHost()} - host</li>
34   * <li>{@link #getPort()} - port</li>
35   * <li>{@link #getPath()} - /path/info</li>
36   * <li>{@link #getParam()} - param</li>
37   * <li>{@link #getQuery()} - query</li>
38   * <li>{@link #getFragment()} - fragment</li>
39   * </ul>
40   *
41   */
42  public class HttpURI
43  {
44      private static final byte[] __empty={};
45      private final static int
46      START=0,
47      AUTH_OR_PATH=1,
48      SCHEME_OR_PATH=2,
49      AUTH=4,
50      IPV6=5,
51      PORT=6,
52      PATH=7,
53      PARAM=8,
54      QUERY=9,
55      ASTERISK=10;
56  
57      boolean _partial=false;
58      byte[] _raw=__empty;
59      String _rawString;
60      int _scheme;
61      int _authority;
62      int _host;
63      int _port;
64      int _portValue;
65      int _path;
66      int _param;
67      int _query;
68      int _fragment;
69      int _end;
70      boolean _encoded=false;
71  
72      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
73  
74      public HttpURI()
75      {
76  
77      }
78  
79      /* ------------------------------------------------------------ */
80      /**
81       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
82       */
83      public HttpURI(boolean parsePartialAuth)
84      {
85          _partial=parsePartialAuth;
86      }
87  
88      public HttpURI(String raw)
89      {
90          _rawString=raw;
91          byte[] b = raw.getBytes();
92          parse(b,0,b.length);
93      }
94  
95      public HttpURI(byte[] raw,int offset, int length)
96      {
97          parse2(raw,offset,length);
98      }
99  
100     public void parse(String raw)
101     {
102         byte[] b = raw.getBytes();
103         parse2(b,0,b.length);
104         _rawString=raw;
105     }
106 
107     public void parse(byte[] raw,int offset, int length)
108     {
109         _rawString=null;
110         parse2(raw,offset,length);
111     }
112 
113 
114     public void parseConnect(byte[] raw,int offset, int length)
115     {
116         _rawString=null;
117         _encoded=false;
118         _raw=raw;
119         int i=offset;
120         int e=offset+length;
121         int state=AUTH;
122         int m=offset;
123         _end=offset+length;
124         _scheme=offset;
125         _authority=offset;
126         _host=offset;
127         _port=_end;
128         _portValue=-1;
129         _path=_end;
130         _param=_end;
131         _query=_end;
132         _fragment=_end;
133 
134         loop: while (i<e)
135         {
136             char c=(char)(0xff&_raw[i]);
137             int s=i++;
138 
139             switch (state)
140             {
141                 case AUTH:
142                 {
143                     switch (c)
144                     {
145                         case ':':
146                         {
147                             _port = s;
148                             break loop;
149                         }
150                         case '[':
151                         {
152                             state = IPV6;
153                             break;
154                         }
155                     }
156                     continue;
157                 }
158 
159                 case IPV6:
160                 {
161                     switch (c)
162                     {
163                         case '/':
164                         {
165                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
166                         }
167                         case ']':
168                         {
169                             state = AUTH;
170                             break;
171                         }
172                     }
173 
174                     continue;
175                 }
176             }
177         }
178 
179         if (_port<_path)
180             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
181         else
182             throw new IllegalArgumentException("No port");
183         _path=offset;
184     }
185 
186 
187     private void parse2(byte[] raw,int offset, int length)
188     {
189         _encoded=false;
190         _raw=raw;
191         int i=offset;
192         int e=offset+length;
193         int state=START;
194         int m=offset;
195         _end=offset+length;
196         _scheme=offset;
197         _authority=offset;
198         _host=offset;
199         _port=offset;
200         _portValue=-1;
201         _path=offset;
202         _param=_end;
203         _query=_end;
204         _fragment=_end;
205         while (i<e)
206         {
207             char c=(char)(0xff&_raw[i]);
208             int s=i++;
209 
210             state: switch (state)
211             {
212                 case START:
213                 {
214                     m=s;
215                     switch(c)
216                     {
217                         case '/':
218                             state=AUTH_OR_PATH;
219                             break;
220                         case ';':
221                             _param=s;
222                             state=PARAM;
223                             break;
224                         case '?':
225                             _param=s;
226                             _query=s;
227                             state=QUERY;
228                             break;
229                         case '#':
230                             _param=s;
231                             _query=s;
232                             _fragment=s;
233                             break;
234                         case '*':
235                             _path=s;
236                             state=ASTERISK;
237                             break;
238 
239                         default:
240                             if (Character.isLetterOrDigit(c))
241                                 state=SCHEME_OR_PATH;
242                             else
243                                 throw new IllegalArgumentException("!(SCHEME|PATH|AUTH):"+StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
244                     }
245 
246                     continue;
247                 }
248 
249                 case AUTH_OR_PATH:
250                 {
251                     if ((_partial||_scheme!=_authority) && c=='/')
252                     {
253                         _host=i;
254                         _port=_end;
255                         _path=_end;
256                         state=AUTH;
257                     }
258                     else if (c==';' || c=='?' || c=='#')
259                     {
260                         i--;
261                         state=PATH;
262                     }
263                     else
264                     {
265                         _host=m;
266                         _port=m;
267                         state=PATH;
268                     }
269                     continue;
270                 }
271 
272                 case SCHEME_OR_PATH:
273                 {
274                     // short cut for http and https
275                     if (length>6 && c=='t')
276                     {
277                         if (_raw[offset+3]==':')
278                         {
279                             s=offset+3;
280                             i=offset+4;
281                             c=':';
282                         }
283                         else if (_raw[offset+4]==':')
284                         {
285                             s=offset+4;
286                             i=offset+5;
287                             c=':';
288                         }
289                         else if (_raw[offset+5]==':')
290                         {
291                             s=offset+5;
292                             i=offset+6;
293                             c=':';
294                         }
295                     }
296 
297                     switch (c)
298                     {
299                         case ':':
300                         {
301                             m = i++;
302                             _authority = m;
303                             _path = m;
304                             c = (char)(0xff & _raw[i]);
305                             if (c == '/')
306                                 state = AUTH_OR_PATH;
307                             else
308                             {
309                                 _host = m;
310                                 _port = m;
311                                 state = PATH;
312                             }
313                             break;
314                         }
315 
316                         case '/':
317                         {
318                             state = PATH;
319                             break;
320                         }
321 
322                         case ';':
323                         {
324                             _param = s;
325                             state = PARAM;
326                             break;
327                         }
328 
329                         case '?':
330                         {
331                             _param = s;
332                             _query = s;
333                             state = QUERY;
334                             break;
335                         }
336 
337                         case '#':
338                         {
339                             _param = s;
340                             _query = s;
341                             _fragment = s;
342                             break;
343                         }
344                     }
345                     continue;
346                 }
347 
348                 case AUTH:
349                 {
350                     switch (c)
351                     {
352 
353                         case '/':
354                         {
355                             m = s;
356                             _path = m;
357                             _port = _path;
358                             state = PATH;
359                             break;
360                         }
361                         case '@':
362                         {
363                             _host = i;
364                             break;
365                         }
366                         case ':':
367                         {
368                             _port = s;
369                             state = PORT;
370                             break;
371                         }
372                         case '[':
373                         {
374                             state = IPV6;
375                             break;
376                         }
377                     }
378                     continue;
379                 }
380 
381                 case IPV6:
382                 {
383                     switch (c)
384                     {
385                         case '/':
386                         {
387                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
388                         }
389                         case ']':
390                         {
391                             state = AUTH;
392                             break;
393                         }
394                     }
395 
396                     continue;
397                 }
398 
399                 case PORT:
400                 {
401                     if (c=='/')
402                     {
403                         m=s;
404                         _path=m;
405                         if (_port<=_authority)
406                             _port=_path;
407                         state=PATH;
408                     }
409                     continue;
410                 }
411 
412                 case PATH:
413                 {
414                     switch (c)
415                     {
416                         case ';':
417                         {
418                             _param = s;
419                             state = PARAM;
420                             break;
421                         }
422                         case '?':
423                         {
424                             _param = s;
425                             _query = s;
426                             state = QUERY;
427                             break;
428                         }
429                         case '#':
430                         {
431                             _param = s;
432                             _query = s;
433                             _fragment = s;
434                             break state;
435                         }
436                         case '%':
437                         {
438                             _encoded=true;
439                         }
440                     }
441                     continue;
442                 }
443 
444                 case PARAM:
445                 {
446                     switch (c)
447                     {
448                         case '?':
449                         {
450                             _query = s;
451                             state = QUERY;
452                             break;
453                         }
454                         case '#':
455                         {
456                             _query = s;
457                             _fragment = s;
458                             break state;
459                         }
460                     }
461                     continue;
462                 }
463 
464                 case QUERY:
465                 {
466                     if (c=='#')
467                     {
468                         _fragment=s;
469                         break state;
470                     }
471                     continue;
472                 }
473 
474                 case ASTERISK:
475                 {
476                     throw new IllegalArgumentException("only '*'");
477                 }
478             }
479         }
480 
481         if (_port<_path)
482             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
483     }
484 
485     private String toUtf8String(int offset,int length)
486     {
487         _utf8b.reset();
488         _utf8b.append(_raw,offset,length);
489         return _utf8b.toString();
490     }
491 
492     public String getScheme()
493     {
494         if (_scheme==_authority)
495             return null;
496         int l=_authority-_scheme;
497         if (l==5 &&
498             _raw[_scheme]=='h' &&
499             _raw[_scheme+1]=='t' &&
500             _raw[_scheme+2]=='t' &&
501             _raw[_scheme+3]=='p' )
502             return HttpSchemes.HTTP;
503         if (l==6 &&
504             _raw[_scheme]=='h' &&
505             _raw[_scheme+1]=='t' &&
506             _raw[_scheme+2]=='t' &&
507             _raw[_scheme+3]=='p' &&
508             _raw[_scheme+4]=='s' )
509             return HttpSchemes.HTTPS;
510 
511         return toUtf8String(_scheme,_authority-_scheme-1);
512     }
513 
514     public String getAuthority()
515     {
516         if (_authority==_path)
517             return null;
518         return toUtf8String(_authority,_path-_authority);
519     }
520 
521     public String getHost()
522     {
523         if (_host==_port)
524             return null;
525         return toUtf8String(_host,_port-_host);
526     }
527 
528     public int getPort()
529     {
530         return _portValue;
531     }
532 
533     public String getPath()
534     {
535         if (_path==_param)
536             return null;
537         return toUtf8String(_path,_param-_path);
538     }
539 
540     public String getDecodedPath()
541     {
542         if (_path==_param)
543             return null;
544 
545         int length = _param-_path;
546         byte[] bytes=null;
547         int n=0;
548 
549         for (int i=_path;i<_param;i++)
550         {
551             byte b = _raw[i];
552 
553             if (b=='%')
554             {
555                 if ((i+2)>=_param)
556                     throw new IllegalArgumentException("Bad % encoding: "+this);
557                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
558                 i+=2;
559             }
560             else if (bytes==null)
561             {
562                 n++;
563                 continue;
564             }
565 
566             if (bytes==null)
567             {
568                 bytes=new byte[length];
569                 System.arraycopy(_raw,_path,bytes,0,n);
570             }
571 
572             bytes[n++]=b;
573         }
574 
575         if (bytes==null)
576             return toUtf8String(_path,length);
577 
578         _utf8b.reset();
579         _utf8b.append(bytes,0,n);
580         return _utf8b.toString();
581     }
582 
583     public String getPathAndParam()
584     {
585         if (_path==_query)
586             return null;
587         return toUtf8String(_path,_query-_path);
588     }
589 
590     public String getCompletePath()
591     {
592         if (_path==_end)
593             return null;
594         return toUtf8String(_path,_end-_path);
595     }
596 
597     public String getParam()
598     {
599         if (_param==_query)
600             return null;
601         return toUtf8String(_param+1,_query-_param-1);
602     }
603 
604     public String getQuery()
605     {
606         if (_query==_fragment)
607             return null;
608         return toUtf8String(_query+1,_fragment-_query-1);
609     }
610 
611     public String getQuery(String encoding)
612     {
613         if (_query==_fragment)
614             return null;
615         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
616     }
617 
618     public boolean hasQuery()
619     {
620         return (_fragment>_query);
621     }
622 
623     public String getFragment()
624     {
625         if (_fragment==_end)
626             return null;
627         return toUtf8String(_fragment+1,_end-_fragment-1);
628     }
629 
630     public void decodeQueryTo(MultiMap parameters)
631     {
632         if (_query==_fragment)
633             return;
634         _utf8b.reset();
635         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
636     }
637 
638     public void decodeQueryTo(MultiMap parameters, String encoding)
639         throws UnsupportedEncodingException
640     {
641         if (_query==_fragment)
642             return;
643 
644         if (encoding==null || StringUtil.isUTF8(encoding))
645             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
646         else
647             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
648     }
649 
650     public void clear()
651     {
652         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
653         _raw=__empty;
654         _rawString="";
655         _encoded=false;
656     }
657 
658     @Override
659     public String toString()
660     {
661         if (_rawString==null)
662             _rawString=toUtf8String(_scheme,_end-_scheme);
663         return _rawString;
664     }
665 
666     public void writeTo(Utf8StringBuilder buf)
667     {
668         buf.append(_raw,_scheme,_end-_scheme);
669     }
670 
671 }