View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses.
12  // ========================================================================
13  
14  package org.eclipse.jetty.http;
15  
16  import java.io.UnsupportedEncodingException;
17  
18  import org.eclipse.jetty.util.MultiMap;
19  import org.eclipse.jetty.util.StringUtil;
20  import org.eclipse.jetty.util.TypeUtil;
21  import org.eclipse.jetty.util.URIUtil;
22  import org.eclipse.jetty.util.UrlEncoded;
23  import org.eclipse.jetty.util.Utf8StringBuilder;
24  
25  
26  /* ------------------------------------------------------------ */
27  /** Http URI.
28   * Parse a HTTP URI from a string or byte array.  Given a URI
29   * <code>http://user@host:port/path/info;param?query#fragment</code>
30   * this class will split it into the following undecoded optional elements:<ul>
31   * <li>{@link #getScheme()} - http:</li>
32   * <li>{@link #getAuthority()} - //name@host:port</li>
33   * <li>{@link #getHost()} - host</li>
34   * <li>{@link #getPort()} - port</li>
35   * <li>{@link #getPath()} - /path/info</li>
36   * <li>{@link #getParam()} - param</li>
37   * <li>{@link #getQuery()} - query</li>
38   * <li>{@link #getFragment()} - fragment</li>
39   * </ul>
40   *
41   */
42  public class HttpURI
43  {
44      private static final byte[] __empty={};
45      private final static int
46      START=0,
47      AUTH_OR_PATH=1,
48      SCHEME_OR_PATH=2,
49      AUTH=4,
50      IPV6=5,
51      PORT=6,
52      PATH=7,
53      PARAM=8,
54      QUERY=9,
55      ASTERISK=10;
56  
57      boolean _partial=false;
58      byte[] _raw=__empty;
59      String _rawString;
60      int _scheme;
61      int _authority;
62      int _host;
63      int _port;
64      int _portValue;
65      int _path;
66      int _param;
67      int _query;
68      int _fragment;
69      int _end;
70      boolean _encoded=false;
71  
72      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
73  
74      public HttpURI()
75      {
76  
77      }
78  
79      /* ------------------------------------------------------------ */
80      /**
81       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
82       */
83      public HttpURI(boolean parsePartialAuth)
84      {
85          _partial=parsePartialAuth;
86      }
87  
88      public HttpURI(String raw)
89      {
90          _rawString=raw;
91          byte[] b = raw.getBytes();
92          parse(b,0,b.length);
93      }
94  
95      public HttpURI(byte[] raw,int offset, int length)
96      {
97          parse2(raw,offset,length);
98      }
99  
100     public void parse(String raw)
101     {
102         byte[] b = raw.getBytes();
103         parse2(b,0,b.length);
104         _rawString=raw;
105     }
106 
107     public void parse(byte[] raw,int offset, int length)
108     {
109         _rawString=null;
110         parse2(raw,offset,length);
111     }
112 
113 
114     public void parseConnect(byte[] raw,int offset, int length)
115     {
116         _rawString=null;
117         _encoded=false;
118         _raw=raw;
119         int i=offset;
120         int e=offset+length;
121         int state=AUTH;
122         int m=offset;
123         _end=offset+length;
124         _scheme=offset;
125         _authority=offset;
126         _host=offset;
127         _port=_end;
128         _portValue=-1;
129         _path=_end;
130         _param=_end;
131         _query=_end;
132         _fragment=_end;
133 
134         loop: while (i<e)
135         {
136             char c=(char)(0xff&_raw[i]);
137             int s=i++;
138 
139             switch (state)
140             {
141                 case AUTH:
142                 {
143                     switch (c)
144                     {
145                         case ':':
146                         {
147                             _port = s;
148                             break loop;
149                         }
150                         case '[':
151                         {
152                             state = IPV6;
153                             break;
154                         }
155                     }
156                     continue;
157                 }
158 
159                 case IPV6:
160                 {
161                     switch (c)
162                     {
163                         case '/':
164                         {
165                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
166                         }
167                         case ']':
168                         {
169                             state = AUTH;
170                             break;
171                         }
172                     }
173 
174                     continue;
175                 }
176             }
177         }
178 
179         if (_port<_path)
180             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
181         else
182             throw new IllegalArgumentException("No port");
183         _path=offset;
184     }
185 
186 
187     private void parse2(byte[] raw,int offset, int length)
188     {
189         _encoded=false;
190         _raw=raw;
191         int i=offset;
192         int e=offset+length;
193         int state=START;
194         int m=offset;
195         _end=offset+length;
196         _scheme=offset;
197         _authority=offset;
198         _host=offset;
199         _port=offset;
200         _portValue=-1;
201         _path=offset;
202         _param=_end;
203         _query=_end;
204         _fragment=_end;
205         while (i<e)
206         {
207             char c=(char)(0xff&_raw[i]);
208             int s=i++;
209 
210             state: switch (state)
211             {
212                 case START:
213                 {
214                     m=s;
215                     switch(c)
216                     {
217                         case '/':
218                             state=AUTH_OR_PATH;
219                             break;
220                         case ';':
221                             _param=s;
222                             state=PARAM;
223                             break;
224                         case '?':
225                             _param=s;
226                             _query=s;
227                             state=QUERY;
228                             break;
229                         case '#':
230                             _param=s;
231                             _query=s;
232                             _fragment=s;
233                             break;
234                         case '*':
235                             _path=s;
236                             state=ASTERISK;
237                             break;
238 
239                         default:
240                             state=SCHEME_OR_PATH;
241                     }
242 
243                     continue;
244                 }
245 
246                 case AUTH_OR_PATH:
247                 {
248                     if ((_partial||_scheme!=_authority) && c=='/')
249                     {
250                         _host=i;
251                         _port=_end;
252                         _path=_end;
253                         state=AUTH;
254                     }
255                     else if (c==';' || c=='?' || c=='#')
256                     {
257                         i--;
258                         state=PATH;
259                     }
260                     else
261                     {
262                         _host=m;
263                         _port=m;
264                         state=PATH;
265                     }
266                     continue;
267                 }
268 
269                 case SCHEME_OR_PATH:
270                 {
271                     // short cut for http and https
272                     if (length>6 && c=='t')
273                     {
274                         if (_raw[offset+3]==':')
275                         {
276                             s=offset+3;
277                             i=offset+4;
278                             c=':';
279                         }
280                         else if (_raw[offset+4]==':')
281                         {
282                             s=offset+4;
283                             i=offset+5;
284                             c=':';
285                         }
286                         else if (_raw[offset+5]==':')
287                         {
288                             s=offset+5;
289                             i=offset+6;
290                             c=':';
291                         }
292                     }
293 
294                     switch (c)
295                     {
296                         case ':':
297                         {
298                             m = i++;
299                             _authority = m;
300                             _path = m;
301                             c = (char)(0xff & _raw[i]);
302                             if (c == '/')
303                                 state = AUTH_OR_PATH;
304                             else
305                             {
306                                 _host = m;
307                                 _port = m;
308                                 state = PATH;
309                             }
310                             break;
311                         }
312 
313                         case '/':
314                         {
315                             state = PATH;
316                             break;
317                         }
318 
319                         case ';':
320                         {
321                             _param = s;
322                             state = PARAM;
323                             break;
324                         }
325 
326                         case '?':
327                         {
328                             _param = s;
329                             _query = s;
330                             state = QUERY;
331                             break;
332                         }
333 
334                         case '#':
335                         {
336                             _param = s;
337                             _query = s;
338                             _fragment = s;
339                             break;
340                         }
341                     }
342                     continue;
343                 }
344 
345                 case AUTH:
346                 {
347                     switch (c)
348                     {
349 
350                         case '/':
351                         {
352                             m = s;
353                             _path = m;
354                             _port = _path;
355                             state = PATH;
356                             break;
357                         }
358                         case '@':
359                         {
360                             _host = i;
361                             break;
362                         }
363                         case ':':
364                         {
365                             _port = s;
366                             state = PORT;
367                             break;
368                         }
369                         case '[':
370                         {
371                             state = IPV6;
372                             break;
373                         }
374                     }
375                     continue;
376                 }
377 
378                 case IPV6:
379                 {
380                     switch (c)
381                     {
382                         case '/':
383                         {
384                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
385                         }
386                         case ']':
387                         {
388                             state = AUTH;
389                             break;
390                         }
391                     }
392 
393                     continue;
394                 }
395 
396                 case PORT:
397                 {
398                     if (c=='/')
399                     {
400                         m=s;
401                         _path=m;
402                         if (_port<=_authority)
403                             _port=_path;
404                         state=PATH;
405                     }
406                     continue;
407                 }
408 
409                 case PATH:
410                 {
411                     switch (c)
412                     {
413                         case ';':
414                         {
415                             _param = s;
416                             state = PARAM;
417                             break;
418                         }
419                         case '?':
420                         {
421                             _param = s;
422                             _query = s;
423                             state = QUERY;
424                             break;
425                         }
426                         case '#':
427                         {
428                             _param = s;
429                             _query = s;
430                             _fragment = s;
431                             break state;
432                         }
433                         case '%':
434                         {
435                             _encoded=true;
436                         }
437                     }
438                     continue;
439                 }
440 
441                 case PARAM:
442                 {
443                     switch (c)
444                     {
445                         case '?':
446                         {
447                             _query = s;
448                             state = QUERY;
449                             break;
450                         }
451                         case '#':
452                         {
453                             _query = s;
454                             _fragment = s;
455                             break state;
456                         }
457                     }
458                     continue;
459                 }
460 
461                 case QUERY:
462                 {
463                     if (c=='#')
464                     {
465                         _fragment=s;
466                         break state;
467                     }
468                     continue;
469                 }
470 
471                 case ASTERISK:
472                 {
473                     throw new IllegalArgumentException("only '*'");
474                 }
475             }
476         }
477 
478         if (_port<_path)
479             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
480     }
481 
482     private String toUtf8String(int offset,int length)
483     {
484         _utf8b.reset();
485         _utf8b.append(_raw,offset,length);
486         return _utf8b.toString();
487     }
488 
489     public String getScheme()
490     {
491         if (_scheme==_authority)
492             return null;
493         int l=_authority-_scheme;
494         if (l==5 &&
495             _raw[_scheme]=='h' &&
496             _raw[_scheme+1]=='t' &&
497             _raw[_scheme+2]=='t' &&
498             _raw[_scheme+3]=='p' )
499             return HttpSchemes.HTTP;
500         if (l==6 &&
501             _raw[_scheme]=='h' &&
502             _raw[_scheme+1]=='t' &&
503             _raw[_scheme+2]=='t' &&
504             _raw[_scheme+3]=='p' &&
505             _raw[_scheme+4]=='s' )
506             return HttpSchemes.HTTPS;
507 
508         return toUtf8String(_scheme,_authority-_scheme-1);
509     }
510 
511     public String getAuthority()
512     {
513         if (_authority==_path)
514             return null;
515         return toUtf8String(_authority,_path-_authority);
516     }
517 
518     public String getHost()
519     {
520         if (_host==_port)
521             return null;
522         return toUtf8String(_host,_port-_host);
523     }
524 
525     public int getPort()
526     {
527         return _portValue;
528     }
529 
530     public String getPath()
531     {
532         if (_path==_param)
533             return null;
534         return toUtf8String(_path,_param-_path);
535     }
536 
537     public String getDecodedPath()
538     {
539         if (_path==_param)
540             return null;
541 
542         int length = _param-_path;
543         byte[] bytes=null;
544         int n=0;
545 
546         for (int i=_path;i<_param;i++)
547         {
548             byte b = _raw[i];
549 
550             if (b=='%')
551             {
552                 if ((i+2)>=_param)
553                     throw new IllegalArgumentException("Bad % encoding: "+this);
554                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
555                 i+=2;
556             }
557             else if (bytes==null)
558             {
559                 n++;
560                 continue;
561             }
562 
563             if (bytes==null)
564             {
565                 bytes=new byte[length];
566                 System.arraycopy(_raw,_path,bytes,0,n);
567             }
568 
569             bytes[n++]=b;
570         }
571 
572         if (bytes==null)
573             return toUtf8String(_path,length);
574 
575         _utf8b.reset();
576         _utf8b.append(bytes,0,n);
577         return _utf8b.toString();
578     }
579 
580     public String getPathAndParam()
581     {
582         if (_path==_query)
583             return null;
584         return toUtf8String(_path,_query-_path);
585     }
586 
587     public String getCompletePath()
588     {
589         if (_path==_end)
590             return null;
591         return toUtf8String(_path,_end-_path);
592     }
593 
594     public String getParam()
595     {
596         if (_param==_query)
597             return null;
598         return toUtf8String(_param+1,_query-_param-1);
599     }
600 
601     public String getQuery()
602     {
603         if (_query==_fragment)
604             return null;
605         return toUtf8String(_query+1,_fragment-_query-1);
606     }
607 
608     public String getQuery(String encoding)
609     {
610         if (_query==_fragment)
611             return null;
612         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
613     }
614 
615     public boolean hasQuery()
616     {
617         return (_fragment>_query);
618     }
619 
620     public String getFragment()
621     {
622         if (_fragment==_end)
623             return null;
624         return toUtf8String(_fragment+1,_end-_fragment-1);
625     }
626 
627     public void decodeQueryTo(MultiMap parameters)
628     {
629         if (_query==_fragment)
630             return;
631         _utf8b.reset();
632         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
633     }
634 
635     public void decodeQueryTo(MultiMap parameters, String encoding)
636         throws UnsupportedEncodingException
637     {
638         if (_query==_fragment)
639             return;
640 
641         if (encoding==null || StringUtil.isUTF8(encoding))
642             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
643         else
644             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
645     }
646 
647     public void clear()
648     {
649         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
650         _raw=__empty;
651         _rawString="";
652         _encoded=false;
653     }
654 
655     @Override
656     public String toString()
657     {
658         if (_rawString==null)
659             _rawString=toUtf8String(_scheme,_end-_scheme);
660         return _rawString;
661     }
662 
663     public void writeTo(Utf8StringBuilder buf)
664     {
665         buf.append(_raw,_scheme,_end-_scheme);
666     }
667 
668 }