View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.http;
20  
21  import java.io.UnsupportedEncodingException;
22  import java.net.URI;
23  import java.net.URISyntaxException;
24  import java.nio.charset.Charset;
25  import java.nio.charset.StandardCharsets;
26  
27  import org.eclipse.jetty.util.MultiMap;
28  import org.eclipse.jetty.util.TypeUtil;
29  import org.eclipse.jetty.util.URIUtil;
30  import org.eclipse.jetty.util.UrlEncoded;
31  
32  
33  /* ------------------------------------------------------------ */
34  /** Http URI.
35   * Parse a HTTP URI from a string or byte array.  Given a URI
36   * <code>http://user@host:port/path/info;param?query#fragment</code>
37   * this class will split it into the following undecoded optional elements:<ul>
38   * <li>{@link #getScheme()} - http:</li>
39   * <li>{@link #getAuthority()} - //name@host:port</li>
40   * <li>{@link #getHost()} - host</li>
41   * <li>{@link #getPort()} - port</li>
42   * <li>{@link #getPath()} - /path/info</li>
43   * <li>{@link #getParam()} - param</li>
44   * <li>{@link #getQuery()} - query</li>
45   * <li>{@link #getFragment()} - fragment</li>
46   * </ul>
47   * 
48   * <p>Any parameters will be returned from {@link #getPath()}, but are excluded from the
49   * return value of {@link #getDecodedPath()}.   If there are multiple parameters, the 
50   * {@link #getParam()} method returns only the last one.
51   */
52  public class HttpURI
53  {
54      private enum State {
55      START,
56      HOST_OR_PATH,
57      SCHEME_OR_PATH,
58      HOST,
59      IPV6,
60      PORT,
61      PATH,
62      PARAM,
63      QUERY,
64      FRAGMENT,
65      ASTERISK};
66  
67      private String _scheme;
68      private String _user;
69      private String _host;
70      private int _port;
71      private String _path;
72      private String _param;
73      private String _query;
74      private String _fragment;
75      
76      String _uri;
77      String _decodedPath;
78  
79      /* ------------------------------------------------------------ */
80      /**
81       * Construct a normalized URI.
82       * Port is not set if it is the default port.
83       * @param scheme the URI scheme
84       * @param host the URI hose
85       * @param port the URI port
86       * @param path the URI path
87       * @param param the URI param
88       * @param query the URI query
89       * @param fragment the URI fragment
90       * @return the normalized URI
91       */
92      public static HttpURI createHttpURI(String scheme, String host, int port, String path, String param, String query, String fragment)
93      {
94          if (port==80 && HttpScheme.HTTP.is(scheme))
95              port=0;
96          if (port==443 && HttpScheme.HTTPS.is(scheme))
97              port=0;
98          return new HttpURI(scheme,host,port,path,param,query,fragment);
99      }
100     
101     /* ------------------------------------------------------------ */
102     public HttpURI()
103     {
104     }
105 
106     /* ------------------------------------------------------------ */
107     public HttpURI(String scheme, String host, int port, String path, String param, String query, String fragment)
108     {
109         _scheme = scheme;
110         _host = host;
111         _port = port;
112         _path = path;
113         _param = param;
114         _query = query;
115         _fragment = fragment;
116     }
117 
118     /* ------------------------------------------------------------ */
119     public HttpURI(HttpURI uri)
120     {
121         this(uri._scheme,uri._host,uri._port,uri._path,uri._param,uri._query,uri._fragment);
122     }
123     
124     /* ------------------------------------------------------------ */
125     public HttpURI(String uri)
126     {
127         _port=-1;
128         parse(State.START,uri,0,uri.length());
129     }
130 
131     /* ------------------------------------------------------------ */
132     public HttpURI(URI uri)
133     {
134         _uri=null;
135         
136         _scheme=uri.getScheme();
137         _host=uri.getHost();
138         if (_host==null && uri.getRawSchemeSpecificPart().startsWith("//"))
139             _host="";
140         _port=uri.getPort();
141         _user = uri.getUserInfo();
142         _path=uri.getRawPath();
143         
144         _decodedPath = uri.getPath();
145         if (_decodedPath != null)
146         {
147             int p = _decodedPath.lastIndexOf(';');
148             if (p >= 0)
149                 _param = _decodedPath.substring(p + 1);
150         }
151         _query=uri.getRawQuery();
152         _fragment=uri.getFragment();
153         
154         _decodedPath=null;
155     }
156 
157     /* ------------------------------------------------------------ */
158     public HttpURI(String scheme, String host, int port, String pathQuery)
159     {
160         _uri=null;
161         
162         _scheme=scheme;
163         _host=host;
164         _port=port;
165 
166         parse(State.PATH,pathQuery,0,pathQuery.length());
167         
168     }
169 
170     /* ------------------------------------------------------------ */
171     public void parse(String uri)
172     {
173         clear();
174         _uri=uri;
175         parse(State.START,uri,0,uri.length());
176     }
177 
178     /* ------------------------------------------------------------ */
179     /** Parse according to https://tools.ietf.org/html/rfc7230#section-5.3
180      * @param method The method to parse the URI against (used to allow CONNECT exceptions)
181      * @param uri The URI to parse
182      */
183     public void parseRequestTarget(String method,String uri)
184     {
185         clear();
186         _uri=uri;
187 
188         if (HttpMethod.CONNECT.is(method))
189             _path=uri;
190         else
191             parse(uri.startsWith("/")?State.PATH:State.START,uri,0,uri.length());
192     }
193 
194     /* ------------------------------------------------------------ */
195     @Deprecated
196     public void parseConnect(String uri)
197     {
198         clear();
199         _uri=uri;
200         _path=uri;
201     }
202 
203     /* ------------------------------------------------------------ */
204     public void parse(String uri, int offset, int length)
205     {
206         clear();
207         int end=offset+length;
208         _uri=uri.substring(offset,end);
209         parse(State.START,uri,offset,end);
210     }
211 
212     /* ------------------------------------------------------------ */
213     private void parse(State state, final String uri, final int offset, final int end)
214     {
215         boolean encoded=false;
216         int mark=offset;
217         int path_mark=0;
218         
219         for (int i=offset; i<end; i++)
220         {
221             char c=uri.charAt(i);
222 
223             switch (state)
224             {
225                 case START:
226                 {
227                     switch(c)
228                     {
229                         case '/':
230                             mark = i;
231                             state = State.HOST_OR_PATH;
232                             break;
233                         case ';':
234                             mark=i+1;
235                             state=State.PARAM;
236                             break;
237                         case '?':
238                             // assume empty path (if seen at start)
239                             _path = "";
240                             mark=i+1;
241                             state=State.QUERY;
242                             break;
243                         case '#':
244                             mark=i+1;
245                             state=State.FRAGMENT;
246                             break;
247                         case '*':
248                             _path="*";
249                             state=State.ASTERISK;
250                             break;
251 
252                         default:
253                             mark=i;
254                             if (_scheme==null)
255                                 state=State.SCHEME_OR_PATH;
256                             else
257                             {
258                                 path_mark=i;
259                                 state=State.PATH;
260                             }
261                     }
262 
263                     continue;
264                 }
265 
266                 case SCHEME_OR_PATH:
267                 {
268                     switch (c)
269                     {
270                         case ':':
271                             // must have been a scheme
272                             _scheme=uri.substring(mark,i);
273                             // Start again with scheme set
274                             state=State.START;
275                             break;
276 
277                         case '/':
278                             // must have been in a path and still are
279                             state=State.PATH;
280                             break;
281 
282                         case ';':
283                             // must have been in a path 
284                             mark=i+1;
285                             state=State.PARAM;
286                             break;
287 
288                         case '?':
289                             // must have been in a path 
290                             _path=uri.substring(mark,i);
291                             mark=i+1;
292                             state=State.QUERY;
293                             break;
294 
295                         case '%':
296                             // must have be in an encoded path 
297                             encoded=true;
298                             state=State.PATH;
299                             break;
300                         
301                         case '#':
302                             // must have been in a path 
303                             _path=uri.substring(mark,i);
304                             state=State.FRAGMENT;
305                             break;
306                     }
307                     continue;
308                 }
309                 
310                 case HOST_OR_PATH:
311                 {
312                     switch(c)
313                     {
314                         case '/':
315                             _host="";
316                             mark=i+1;
317                             state=State.HOST;
318                             break;
319                             
320                         case '@':
321                         case ';':
322                         case '?':
323                         case '#':
324                             // was a path, look again
325                             i--;
326                             path_mark=mark;
327                             state=State.PATH;
328                             break;
329                         default:
330                             // it is a path
331                             path_mark=mark;
332                             state=State.PATH;
333                     }
334                     continue;
335                 }
336 
337                 case HOST:
338                 {
339                     switch (c)
340                     {
341                         case '/':
342                             _host = uri.substring(mark,i);
343                             path_mark=mark=i;
344                             state=State.PATH;
345                             break;
346                         case ':':
347                             if (i > mark)
348                                 _host=uri.substring(mark,i);
349                             mark=i+1;
350                             state=State.PORT;
351                             break;
352                         case '@':
353                             if (_user!=null)
354                                 throw new IllegalArgumentException("Bad authority");
355                             _user=uri.substring(mark,i);
356                             mark=i+1;
357                             break;
358                             
359                         case '[':
360                             state=State.IPV6;
361                             break;
362                     }
363                     continue;
364                 }
365 
366                 case IPV6:
367                 {
368                     switch (c)
369                     {
370                         case '/':
371                             throw new IllegalArgumentException("No closing ']' for ipv6 in " + uri);
372                         case ']':
373                             c = uri.charAt(++i);
374                             _host=uri.substring(mark,i);
375                             if (c == ':')
376                             {
377                                 mark=i+1;
378                                 state=State.PORT;
379                             }
380                             else
381                             {
382                                 path_mark=mark=i;
383                                 state=State.PATH;
384                             }
385                             break;
386                     }
387 
388                     continue;
389                 }
390 
391                 case PORT:
392                 {
393                     if (c=='@')
394                     {
395                         if (_user!=null)
396                             throw new IllegalArgumentException("Bad authority");
397                         // It wasn't a port, but a password!
398                         _user=_host+":"+uri.substring(mark,i);
399                         mark=i+1;
400                         state=State.HOST;
401                     }
402                     else if (c=='/')
403                     {
404                         _port=TypeUtil.parseInt(uri,mark,i-mark,10);
405                         path_mark=mark=i;
406                         state=State.PATH;
407                     }
408                     continue;
409                 }
410 
411                 case PATH:
412                 {
413                     switch (c)
414                     {
415                         case ';':
416                             mark=i+1;
417                             state=State.PARAM;
418                             break;
419                         case '?':
420                             _path=uri.substring(path_mark,i);
421                             mark=i+1;
422                             state=State.QUERY;
423                             break;
424                         case '#':
425                             _path=uri.substring(path_mark,i);
426                             mark=i+1;
427                             state=State.FRAGMENT;
428                             break;
429                         case '%':
430                             encoded=true;
431                             break;
432                     }
433                     continue;
434                 }
435 
436                 case PARAM:
437                 {
438                     switch (c)
439                     {
440                         case '?':
441                             _path=uri.substring(path_mark,i);
442                             _param=uri.substring(mark,i);
443                             mark=i+1;
444                             state=State.QUERY;
445                             break;
446                         case '#':
447                             _path=uri.substring(path_mark,i);
448                             _param=uri.substring(mark,i);
449                             mark=i+1;
450                             state=State.FRAGMENT;
451                             break;
452                         case '/':
453                             encoded=true;
454                             // ignore internal params
455                             state=State.PATH;
456                             break;
457                         case ';':
458                             // multiple parameters
459                             mark=i+1;
460                             break;
461                     }
462                     continue;
463                 }
464 
465                 case QUERY:
466                 {
467                     if (c=='#')
468                     {
469                         _query=uri.substring(mark,i);
470                         mark=i+1;
471                         state=State.FRAGMENT;
472                     }
473                     continue;
474                 }
475 
476                 case ASTERISK:
477                 {
478                     throw new IllegalArgumentException("only '*'");
479                 }
480                 
481                 case FRAGMENT:
482                 {
483                     _fragment=uri.substring(mark,end);
484                     i=end;
485                 }
486             }
487         }
488 
489         
490         switch(state)
491         {
492             case START:
493                 break;
494             case SCHEME_OR_PATH:
495                 _path=uri.substring(mark,end);
496                 break;
497 
498             case HOST_OR_PATH:
499                 _path=uri.substring(mark,end);
500                 break;
501                 
502             case HOST:
503                 if(end>mark)
504                     _host=uri.substring(mark,end);
505                 break;
506                 
507             case IPV6:
508                 throw new IllegalArgumentException("No closing ']' for ipv6 in " + uri);
509 
510             case PORT:
511                 _port=TypeUtil.parseInt(uri,mark,end-mark,10);
512                 break;
513                 
514             case ASTERISK:
515                 break;
516                 
517             case FRAGMENT:
518                 _fragment=uri.substring(mark,end);
519                 break;
520                 
521             case PARAM:
522                 _path=uri.substring(path_mark,end);
523                 _param=uri.substring(mark,end);
524                 break;
525                 
526             case PATH:
527                 _path=uri.substring(path_mark,end);
528                 break;
529                 
530             case QUERY:
531                 _query=uri.substring(mark,end);
532                 break;
533         }
534         
535         if (!encoded)
536         {
537             if (_param==null)
538                 _decodedPath=_path;
539             else
540                 _decodedPath=_path.substring(0,_path.length()-_param.length()-1);
541         }
542     }
543 
544     /* ------------------------------------------------------------ */
545     public String getScheme()
546     {
547         return _scheme;
548     }
549 
550     /* ------------------------------------------------------------ */
551     public String getHost()
552     {
553         // Return null for empty host to retain compatibility with java.net.URI
554         if (_host!=null && _host.length()==0)
555             return null;
556         return _host;
557     }
558 
559     /* ------------------------------------------------------------ */
560     public int getPort()
561     {
562         return _port;
563     }
564 
565     /* ------------------------------------------------------------ */
566     /**
567      * The parsed Path.
568      * 
569      * @return the path as parsed on valid URI.  null for invalid URI.
570      */
571     public String getPath()
572     {
573         return _path;
574     }
575 
576     /* ------------------------------------------------------------ */
577     public String getDecodedPath()
578     {
579         if (_decodedPath==null && _path!=null)
580             _decodedPath=URIUtil.decodePath(_path);
581         return _decodedPath;
582     }
583 
584     /* ------------------------------------------------------------ */
585     public String getParam()
586     {
587         return _param;
588     }
589 
590     /* ------------------------------------------------------------ */
591     public String getQuery()
592     {
593         return _query;
594     }
595 
596     /* ------------------------------------------------------------ */
597     public boolean hasQuery()
598     {
599         return _query!=null && _query.length()>0;
600     }
601 
602     /* ------------------------------------------------------------ */
603     public String getFragment()
604     {
605         return _fragment;
606     }
607 
608     /* ------------------------------------------------------------ */
609     public void decodeQueryTo(MultiMap<String> parameters)
610     {
611         if (_query==_fragment)
612             return;
613         UrlEncoded.decodeUtf8To(_query,parameters);
614     }
615 
616     /* ------------------------------------------------------------ */
617     public void decodeQueryTo(MultiMap<String> parameters, String encoding) throws UnsupportedEncodingException
618     {
619         decodeQueryTo(parameters,Charset.forName(encoding));
620     }
621 
622     /* ------------------------------------------------------------ */
623     public void decodeQueryTo(MultiMap<String> parameters, Charset encoding) throws UnsupportedEncodingException
624     {
625         if (_query==_fragment)
626             return;
627 
628         if (encoding==null || StandardCharsets.UTF_8.equals(encoding))
629             UrlEncoded.decodeUtf8To(_query,parameters);
630         else
631             UrlEncoded.decodeTo(_query,parameters,encoding);
632     }
633 
634     /* ------------------------------------------------------------ */
635     public void clear()
636     {
637         _uri=null;
638 
639         _scheme=null;
640         _host=null;
641         _port=-1;
642         _path=null;
643         _param=null;
644         _query=null;
645         _fragment=null;
646 
647         _decodedPath=null;
648     }
649 
650     /* ------------------------------------------------------------ */
651     public boolean isAbsolute()
652     {
653         return _scheme!=null && _scheme.length()>0;
654     }
655     
656     /* ------------------------------------------------------------ */
657     @Override
658     public String toString()
659     {
660         if (_uri==null)
661         {
662             StringBuilder out = new StringBuilder();
663             
664             if (_scheme!=null)
665                 out.append(_scheme).append(':');
666             
667             if (_host != null)
668             {
669                 out.append("//");
670                 if (_user != null)
671                     out.append(_user).append('@');
672                 out.append(_host);
673             }
674             
675             if (_port>0)
676                 out.append(':').append(_port);
677             
678             if (_path!=null)
679                 out.append(_path);
680             
681             if (_query!=null)
682                 out.append('?').append(_query);
683             
684             if (_fragment!=null)
685                 out.append('#').append(_fragment);
686             
687             if (out.length()>0)
688                 _uri=out.toString();
689             else
690                 _uri="";
691         }
692         return _uri;
693     }
694 
695     /* ------------------------------------------------------------ */
696     public boolean equals(Object o)
697     {
698         if (o==this)
699             return true;
700         if (!(o instanceof HttpURI))
701             return false;
702         return toString().equals(o.toString());
703     }
704 
705     /* ------------------------------------------------------------ */
706     public void setScheme(String scheme)
707     {
708         _scheme=scheme;
709         _uri=null;
710     }
711     
712     /* ------------------------------------------------------------ */
713     /**
714      * @param host the host
715      * @param port the port
716      */
717     public void setAuthority(String host, int port)
718     {
719         _host=host;
720         _port=port;
721         _uri=null;
722     }
723 
724     /* ------------------------------------------------------------ */
725     /**
726      * @param path the path
727      */
728     public void setPath(String path)
729     {
730         _uri=null;
731         _path=path;
732         _decodedPath=null;
733     }
734     
735     /* ------------------------------------------------------------ */
736     public void setPathQuery(String path)
737     {
738         _uri=null;
739         _path=null;
740         _decodedPath=null;
741         _param=null;
742         _fragment=null;
743         if (path!=null)
744             parse(State.PATH,path,0,path.length());
745     }
746     
747     /* ------------------------------------------------------------ */
748     public void setQuery(String query)
749     {
750         _query=query;
751         _uri=null;
752     }
753     
754     /* ------------------------------------------------------------ */
755     public URI toURI() throws URISyntaxException
756     {
757         return new URI(_scheme,null,_host,_port,_path,_query==null?null:UrlEncoded.decodeString(_query),_fragment);
758     }
759 
760     /* ------------------------------------------------------------ */
761     public String getPathQuery()
762     {
763         if (_query==null)
764             return _path;
765         return _path+"?"+_query;
766     }
767     
768     /* ------------------------------------------------------------ */
769     public String getAuthority()
770     {
771         if (_port>0)
772             return _host+":"+_port;
773         return _host;
774     }
775     
776     /* ------------------------------------------------------------ */
777     public String getUser()
778     {
779         return _user;
780     }
781 
782 
783 }