View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses.
12  // ========================================================================
13  
14  package org.eclipse.jetty.http;
15  
16  import java.io.UnsupportedEncodingException;
17  import java.net.URI;
18  
19  import org.eclipse.jetty.util.MultiMap;
20  import org.eclipse.jetty.util.StringUtil;
21  import org.eclipse.jetty.util.TypeUtil;
22  import org.eclipse.jetty.util.URIUtil;
23  import org.eclipse.jetty.util.UrlEncoded;
24  import org.eclipse.jetty.util.Utf8StringBuilder;
25  
26  
27  /* ------------------------------------------------------------ */
28  /** Http URI.
29   * Parse a HTTP URI from a string or byte array.  Given a URI
30   * <code>http://user@host:port/path/info;param?query#fragment</code>
31   * this class will split it into the following undecoded optional elements:<ul>
32   * <li>{@link #getScheme()} - http:</li>
33   * <li>{@link #getAuthority()} - //name@host:port</li>
34   * <li>{@link #getHost()} - host</li>
35   * <li>{@link #getPort()} - port</li>
36   * <li>{@link #getPath()} - /path/info</li>
37   * <li>{@link #getParam()} - param</li>
38   * <li>{@link #getQuery()} - query</li>
39   * <li>{@link #getFragment()} - fragment</li>
40   * </ul>
41   *
42   */
43  public class HttpURI
44  {
45      private static final byte[] __empty={};
46      private final static int
47      START=0,
48      AUTH_OR_PATH=1,
49      SCHEME_OR_PATH=2,
50      AUTH=4,
51      IPV6=5,
52      PORT=6,
53      PATH=7,
54      PARAM=8,
55      QUERY=9,
56      ASTERISK=10;
57  
58      boolean _partial=false;
59      byte[] _raw=__empty;
60      String _rawString;
61      int _scheme;
62      int _authority;
63      int _host;
64      int _port;
65      int _portValue;
66      int _path;
67      int _param;
68      int _query;
69      int _fragment;
70      int _end;
71      boolean _encoded=false;
72  
73      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
74  
75      public HttpURI()
76      {
77  
78      }
79  
80      /* ------------------------------------------------------------ */
81      /**
82       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
83       */
84      public HttpURI(boolean parsePartialAuth)
85      {
86          _partial=parsePartialAuth;
87      }
88  
89      public HttpURI(String raw)
90      {
91          _rawString=raw;
92          byte[] b = raw.getBytes();
93          parse(b,0,b.length);
94      }
95  
96      public HttpURI(byte[] raw,int offset, int length)
97      {
98          parse2(raw,offset,length);
99      }
100     
101     public HttpURI(URI uri)
102     {
103         parse(uri.toASCIIString());
104     }
105 
106     public void parse(String raw)
107     {
108         byte[] b = raw.getBytes();
109         parse2(b,0,b.length);
110         _rawString=raw;
111     }
112 
113     public void parse(byte[] raw,int offset, int length)
114     {
115         _rawString=null;
116         parse2(raw,offset,length);
117     }
118 
119 
120     public void parseConnect(byte[] raw,int offset, int length)
121     {
122         _rawString=null;
123         _encoded=false;
124         _raw=raw;
125         int i=offset;
126         int e=offset+length;
127         int state=AUTH;
128         int m=offset;
129         _end=offset+length;
130         _scheme=offset;
131         _authority=offset;
132         _host=offset;
133         _port=_end;
134         _portValue=-1;
135         _path=_end;
136         _param=_end;
137         _query=_end;
138         _fragment=_end;
139 
140         loop: while (i<e)
141         {
142             char c=(char)(0xff&_raw[i]);
143             int s=i++;
144 
145             switch (state)
146             {
147                 case AUTH:
148                 {
149                     switch (c)
150                     {
151                         case ':':
152                         {
153                             _port = s;
154                             break loop;
155                         }
156                         case '[':
157                         {
158                             state = IPV6;
159                             break;
160                         }
161                     }
162                     continue;
163                 }
164 
165                 case IPV6:
166                 {
167                     switch (c)
168                     {
169                         case '/':
170                         {
171                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
172                         }
173                         case ']':
174                         {
175                             state = AUTH;
176                             break;
177                         }
178                     }
179 
180                     continue;
181                 }
182             }
183         }
184 
185         if (_port<_path)
186             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
187         else
188             throw new IllegalArgumentException("No port");
189         _path=offset;
190     }
191 
192 
193     private void parse2(byte[] raw,int offset, int length)
194     {
195         _encoded=false;
196         _raw=raw;
197         int i=offset;
198         int e=offset+length;
199         int state=START;
200         int m=offset;
201         _end=offset+length;
202         _scheme=offset;
203         _authority=offset;
204         _host=offset;
205         _port=offset;
206         _portValue=-1;
207         _path=offset;
208         _param=_end;
209         _query=_end;
210         _fragment=_end;
211         while (i<e)
212         {
213             char c=(char)(0xff&_raw[i]);
214             int s=i++;
215 
216             state: switch (state)
217             {
218                 case START:
219                 {
220                     m=s;
221                     switch(c)
222                     {
223                         case '/':
224                             state=AUTH_OR_PATH;
225                             break;
226                         case ';':
227                             _param=s;
228                             state=PARAM;
229                             break;
230                         case '?':
231                             _param=s;
232                             _query=s;
233                             state=QUERY;
234                             break;
235                         case '#':
236                             _param=s;
237                             _query=s;
238                             _fragment=s;
239                             break;
240                         case '*':
241                             _path=s;
242                             state=ASTERISK;
243                             break;
244 
245                         default:
246                             state=SCHEME_OR_PATH;
247                     }
248 
249                     continue;
250                 }
251 
252                 case AUTH_OR_PATH:
253                 {
254                     if ((_partial||_scheme!=_authority) && c=='/')
255                     {
256                         _host=i;
257                         _port=_end;
258                         _path=_end;
259                         state=AUTH;
260                     }
261                     else if (c==';' || c=='?' || c=='#')
262                     {
263                         i--;
264                         state=PATH;
265                     }
266                     else
267                     {
268                         _host=m;
269                         _port=m;
270                         state=PATH;
271                     }
272                     continue;
273                 }
274 
275                 case SCHEME_OR_PATH:
276                 {
277                     // short cut for http and https
278                     if (length>6 && c=='t')
279                     {
280                         if (_raw[offset+3]==':')
281                         {
282                             s=offset+3;
283                             i=offset+4;
284                             c=':';
285                         }
286                         else if (_raw[offset+4]==':')
287                         {
288                             s=offset+4;
289                             i=offset+5;
290                             c=':';
291                         }
292                         else if (_raw[offset+5]==':')
293                         {
294                             s=offset+5;
295                             i=offset+6;
296                             c=':';
297                         }
298                     }
299 
300                     switch (c)
301                     {
302                         case ':':
303                         {
304                             m = i++;
305                             _authority = m;
306                             _path = m;
307                             c = (char)(0xff & _raw[i]);
308                             if (c == '/')
309                                 state = AUTH_OR_PATH;
310                             else
311                             {
312                                 _host = m;
313                                 _port = m;
314                                 state = PATH;
315                             }
316                             break;
317                         }
318 
319                         case '/':
320                         {
321                             state = PATH;
322                             break;
323                         }
324 
325                         case ';':
326                         {
327                             _param = s;
328                             state = PARAM;
329                             break;
330                         }
331 
332                         case '?':
333                         {
334                             _param = s;
335                             _query = s;
336                             state = QUERY;
337                             break;
338                         }
339 
340                         case '#':
341                         {
342                             _param = s;
343                             _query = s;
344                             _fragment = s;
345                             break;
346                         }
347                     }
348                     continue;
349                 }
350 
351                 case AUTH:
352                 {
353                     switch (c)
354                     {
355 
356                         case '/':
357                         {
358                             m = s;
359                             _path = m;
360                             _port = _path;
361                             state = PATH;
362                             break;
363                         }
364                         case '@':
365                         {
366                             _host = i;
367                             break;
368                         }
369                         case ':':
370                         {
371                             _port = s;
372                             state = PORT;
373                             break;
374                         }
375                         case '[':
376                         {
377                             state = IPV6;
378                             break;
379                         }
380                     }
381                     continue;
382                 }
383 
384                 case IPV6:
385                 {
386                     switch (c)
387                     {
388                         case '/':
389                         {
390                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
391                         }
392                         case ']':
393                         {
394                             state = AUTH;
395                             break;
396                         }
397                     }
398 
399                     continue;
400                 }
401 
402                 case PORT:
403                 {
404                     if (c=='/')
405                     {
406                         m=s;
407                         _path=m;
408                         if (_port<=_authority)
409                             _port=_path;
410                         state=PATH;
411                     }
412                     continue;
413                 }
414 
415                 case PATH:
416                 {
417                     switch (c)
418                     {
419                         case ';':
420                         {
421                             _param = s;
422                             state = PARAM;
423                             break;
424                         }
425                         case '?':
426                         {
427                             _param = s;
428                             _query = s;
429                             state = QUERY;
430                             break;
431                         }
432                         case '#':
433                         {
434                             _param = s;
435                             _query = s;
436                             _fragment = s;
437                             break state;
438                         }
439                         case '%':
440                         {
441                             _encoded=true;
442                         }
443                     }
444                     continue;
445                 }
446 
447                 case PARAM:
448                 {
449                     switch (c)
450                     {
451                         case '?':
452                         {
453                             _query = s;
454                             state = QUERY;
455                             break;
456                         }
457                         case '#':
458                         {
459                             _query = s;
460                             _fragment = s;
461                             break state;
462                         }
463                     }
464                     continue;
465                 }
466 
467                 case QUERY:
468                 {
469                     if (c=='#')
470                     {
471                         _fragment=s;
472                         break state;
473                     }
474                     continue;
475                 }
476 
477                 case ASTERISK:
478                 {
479                     throw new IllegalArgumentException("only '*'");
480                 }
481             }
482         }
483 
484         if (_port<_path)
485             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
486     }
487 
488     private String toUtf8String(int offset,int length)
489     {
490         _utf8b.reset();
491         _utf8b.append(_raw,offset,length);
492         return _utf8b.toString();
493     }
494 
495     public String getScheme()
496     {
497         if (_scheme==_authority)
498             return null;
499         int l=_authority-_scheme;
500         if (l==5 &&
501             _raw[_scheme]=='h' &&
502             _raw[_scheme+1]=='t' &&
503             _raw[_scheme+2]=='t' &&
504             _raw[_scheme+3]=='p' )
505             return HttpSchemes.HTTP;
506         if (l==6 &&
507             _raw[_scheme]=='h' &&
508             _raw[_scheme+1]=='t' &&
509             _raw[_scheme+2]=='t' &&
510             _raw[_scheme+3]=='p' &&
511             _raw[_scheme+4]=='s' )
512             return HttpSchemes.HTTPS;
513 
514         return toUtf8String(_scheme,_authority-_scheme-1);
515     }
516 
517     public String getAuthority()
518     {
519         if (_authority==_path)
520             return null;
521         return toUtf8String(_authority,_path-_authority);
522     }
523 
524     public String getHost()
525     {
526         if (_host==_port)
527             return null;
528         return toUtf8String(_host,_port-_host);
529     }
530 
531     public int getPort()
532     {
533         return _portValue;
534     }
535 
536     public String getPath()
537     {
538         if (_path==_param)
539             return null;
540         return toUtf8String(_path,_param-_path);
541     }
542 
543     public String getDecodedPath()
544     {
545         if (_path==_param)
546             return null;
547 
548         int length = _param-_path;
549         byte[] bytes=null;
550         int n=0;
551 
552         for (int i=_path;i<_param;i++)
553         {
554             byte b = _raw[i];
555 
556             if (b=='%')
557             {
558                 if ((i+2)>=_param)
559                     throw new IllegalArgumentException("Bad % encoding: "+this);
560                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
561                 i+=2;
562             }
563             else if (bytes==null)
564             {
565                 n++;
566                 continue;
567             }
568 
569             if (bytes==null)
570             {
571                 bytes=new byte[length];
572                 System.arraycopy(_raw,_path,bytes,0,n);
573             }
574 
575             bytes[n++]=b;
576         }
577 
578         if (bytes==null)
579             return toUtf8String(_path,length);
580 
581         _utf8b.reset();
582         _utf8b.append(bytes,0,n);
583         return _utf8b.toString();
584     }
585 
586     public String getPathAndParam()
587     {
588         if (_path==_query)
589             return null;
590         return toUtf8String(_path,_query-_path);
591     }
592 
593     public String getCompletePath()
594     {
595         if (_path==_end)
596             return null;
597         return toUtf8String(_path,_end-_path);
598     }
599 
600     public String getParam()
601     {
602         if (_param==_query)
603             return null;
604         return toUtf8String(_param+1,_query-_param-1);
605     }
606 
607     public String getQuery()
608     {
609         if (_query==_fragment)
610             return null;
611         return toUtf8String(_query+1,_fragment-_query-1);
612     }
613 
614     public String getQuery(String encoding)
615     {
616         if (_query==_fragment)
617             return null;
618         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
619     }
620 
621     public boolean hasQuery()
622     {
623         return (_fragment>_query);
624     }
625 
626     public String getFragment()
627     {
628         if (_fragment==_end)
629             return null;
630         return toUtf8String(_fragment+1,_end-_fragment-1);
631     }
632 
633     public void decodeQueryTo(MultiMap parameters)
634     {
635         if (_query==_fragment)
636             return;
637         _utf8b.reset();
638         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
639     }
640 
641     public void decodeQueryTo(MultiMap parameters, String encoding)
642         throws UnsupportedEncodingException
643     {
644         if (_query==_fragment)
645             return;
646 
647         if (encoding==null || StringUtil.isUTF8(encoding))
648             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
649         else
650             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
651     }
652 
653     public void clear()
654     {
655         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
656         _raw=__empty;
657         _rawString="";
658         _encoded=false;
659     }
660 
661     @Override
662     public String toString()
663     {
664         if (_rawString==null)
665             _rawString=toUtf8String(_scheme,_end-_scheme);
666         return _rawString;
667     }
668 
669     public void writeTo(Utf8StringBuilder buf)
670     {
671         buf.append(_raw,_scheme,_end-_scheme);
672     }
673 
674 }